| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1446, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002074688796680498, | |
| "grad_norm": 5.411880763762789, | |
| "learning_rate": 3.4482758620689654e-09, | |
| "logits/chosen": 1.0625, | |
| "logits/rejected": 1.078125, | |
| "logps/chosen": -310.0, | |
| "logps/rejected": -220.0, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02074688796680498, | |
| "grad_norm": 5.356267802595583, | |
| "learning_rate": 3.448275862068965e-08, | |
| "logits/chosen": -0.0576171875, | |
| "logits/rejected": 0.01141357421875, | |
| "logps/chosen": -394.0, | |
| "logps/rejected": -374.0, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.2222222238779068, | |
| "rewards/chosen": -0.00445556640625, | |
| "rewards/margins": -0.00445556640625, | |
| "rewards/rejected": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04149377593360996, | |
| "grad_norm": 8.221470216280487, | |
| "learning_rate": 6.89655172413793e-08, | |
| "logits/chosen": -0.057373046875, | |
| "logits/rejected": -0.1484375, | |
| "logps/chosen": -304.0, | |
| "logps/rejected": -364.0, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.0030059814453125, | |
| "rewards/margins": -0.000499725341796875, | |
| "rewards/rejected": 0.003509521484375, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06224066390041494, | |
| "grad_norm": 4.685647512300122, | |
| "learning_rate": 1.0344827586206897e-07, | |
| "logits/chosen": 0.11767578125, | |
| "logits/rejected": 0.09228515625, | |
| "logps/chosen": -350.0, | |
| "logps/rejected": -338.0, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.0019989013671875, | |
| "rewards/margins": 0.0030059814453125, | |
| "rewards/rejected": -0.00099945068359375, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08298755186721991, | |
| "grad_norm": 5.392440324607591, | |
| "learning_rate": 1.379310344827586e-07, | |
| "logits/chosen": -0.1611328125, | |
| "logits/rejected": -0.1640625, | |
| "logps/chosen": -342.0, | |
| "logps/rejected": -372.0, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.00099945068359375, | |
| "rewards/margins": -0.0030059814453125, | |
| "rewards/rejected": 0.003997802734375, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1037344398340249, | |
| "grad_norm": 5.066645187212037, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "logits/chosen": 0.138671875, | |
| "logits/rejected": 0.2412109375, | |
| "logps/chosen": -262.0, | |
| "logps/rejected": -184.0, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.0019989013671875, | |
| "rewards/margins": -0.000499725341796875, | |
| "rewards/rejected": 0.00250244140625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12448132780082988, | |
| "grad_norm": 5.592678715959439, | |
| "learning_rate": 2.0689655172413793e-07, | |
| "logits/chosen": -0.08349609375, | |
| "logits/rejected": -0.0849609375, | |
| "logps/chosen": -512.0, | |
| "logps/rejected": -452.0, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.0240478515625, | |
| "rewards/margins": 0.006500244140625, | |
| "rewards/rejected": 0.017578125, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14522821576763487, | |
| "grad_norm": 7.364077184884452, | |
| "learning_rate": 2.413793103448276e-07, | |
| "logits/chosen": 0.1728515625, | |
| "logits/rejected": 0.11865234375, | |
| "logps/chosen": -214.0, | |
| "logps/rejected": -152.0, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0240478515625, | |
| "rewards/margins": 0.01904296875, | |
| "rewards/rejected": 0.0050048828125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16597510373443983, | |
| "grad_norm": 4.583578088481169, | |
| "learning_rate": 2.758620689655172e-07, | |
| "logits/chosen": 0.111328125, | |
| "logits/rejected": -0.031494140625, | |
| "logps/chosen": -288.0, | |
| "logps/rejected": -284.0, | |
| "loss": 0.6819, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0279541015625, | |
| "rewards/margins": 0.0029754638671875, | |
| "rewards/rejected": 0.0250244140625, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18672199170124482, | |
| "grad_norm": 4.763001538688379, | |
| "learning_rate": 3.103448275862069e-07, | |
| "logits/chosen": 0.006011962890625, | |
| "logits/rejected": 0.0810546875, | |
| "logps/chosen": -414.0, | |
| "logps/rejected": -404.0, | |
| "loss": 0.6792, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.1044921875, | |
| "rewards/margins": 0.052978515625, | |
| "rewards/rejected": 0.051513671875, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2074688796680498, | |
| "grad_norm": 4.896926355089114, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "logits/chosen": -0.0257568359375, | |
| "logits/rejected": 0.05859375, | |
| "logps/chosen": -390.0, | |
| "logps/rejected": -324.0, | |
| "loss": 0.6671, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0966796875, | |
| "rewards/margins": 0.09765625, | |
| "rewards/rejected": -0.000537872314453125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22821576763485477, | |
| "grad_norm": 4.96333643751103, | |
| "learning_rate": 3.793103448275862e-07, | |
| "logits/chosen": 0.0032958984375, | |
| "logits/rejected": 0.003082275390625, | |
| "logps/chosen": -286.0, | |
| "logps/rejected": -296.0, | |
| "loss": 0.6556, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.06494140625, | |
| "rewards/margins": 0.05908203125, | |
| "rewards/rejected": 0.006011962890625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24896265560165975, | |
| "grad_norm": 5.0505635416993035, | |
| "learning_rate": 4.1379310344827586e-07, | |
| "logits/chosen": -0.1982421875, | |
| "logits/rejected": -0.1962890625, | |
| "logps/chosen": -432.0, | |
| "logps/rejected": -312.0, | |
| "loss": 0.6511, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.07080078125, | |
| "rewards/margins": 0.06884765625, | |
| "rewards/rejected": -0.1396484375, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2697095435684647, | |
| "grad_norm": 5.108647391907642, | |
| "learning_rate": 4.482758620689655e-07, | |
| "logits/chosen": -0.2236328125, | |
| "logits/rejected": -0.1015625, | |
| "logps/chosen": -356.0, | |
| "logps/rejected": -358.0, | |
| "loss": 0.6199, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.0269775390625, | |
| "rewards/margins": 0.2109375, | |
| "rewards/rejected": -0.23828125, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.29045643153526973, | |
| "grad_norm": 6.872964653821398, | |
| "learning_rate": 4.827586206896552e-07, | |
| "logits/chosen": -0.228515625, | |
| "logits/rejected": -0.236328125, | |
| "logps/chosen": -376.0, | |
| "logps/rejected": -290.0, | |
| "loss": 0.6187, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.138671875, | |
| "rewards/margins": 0.205078125, | |
| "rewards/rejected": -0.34375, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3112033195020747, | |
| "grad_norm": 6.529806229977509, | |
| "learning_rate": 4.99981778257793e-07, | |
| "logits/chosen": -0.453125, | |
| "logits/rejected": -0.330078125, | |
| "logps/chosen": -456.0, | |
| "logps/rejected": -454.0, | |
| "loss": 0.5776, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.5234375, | |
| "rewards/margins": 0.19140625, | |
| "rewards/rejected": -0.71484375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.33195020746887965, | |
| "grad_norm": 10.04226648908633, | |
| "learning_rate": 4.998360202572815e-07, | |
| "logits/chosen": -0.353515625, | |
| "logits/rejected": -0.486328125, | |
| "logps/chosen": -424.0, | |
| "logps/rejected": -430.0, | |
| "loss": 0.53, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1328125, | |
| "rewards/margins": 0.29296875, | |
| "rewards/rejected": -1.4296875, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35269709543568467, | |
| "grad_norm": 10.68261151488953, | |
| "learning_rate": 4.995445892440316e-07, | |
| "logits/chosen": -0.435546875, | |
| "logits/rejected": -0.32421875, | |
| "logps/chosen": -412.0, | |
| "logps/rejected": -524.0, | |
| "loss": 0.5588, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.9140625, | |
| "rewards/margins": 0.6640625, | |
| "rewards/rejected": -1.578125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37344398340248963, | |
| "grad_norm": 6.73033593795782, | |
| "learning_rate": 4.991076551440359e-07, | |
| "logits/chosen": -0.5, | |
| "logits/rejected": -0.57421875, | |
| "logps/chosen": -632.0, | |
| "logps/rejected": -648.0, | |
| "loss": 0.554, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.1015625, | |
| "rewards/margins": 0.86328125, | |
| "rewards/rejected": -1.96875, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3941908713692946, | |
| "grad_norm": 9.816239750114915, | |
| "learning_rate": 4.985254727224266e-07, | |
| "logits/chosen": -0.2412109375, | |
| "logits/rejected": -0.1767578125, | |
| "logps/chosen": -416.0, | |
| "logps/rejected": -476.0, | |
| "loss": 0.5251, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.66015625, | |
| "rewards/margins": 0.55859375, | |
| "rewards/rejected": -1.21875, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4149377593360996, | |
| "grad_norm": 7.484451121465821, | |
| "learning_rate": 4.977983814349285e-07, | |
| "logits/chosen": -0.345703125, | |
| "logits/rejected": -0.353515625, | |
| "logps/chosen": -432.0, | |
| "logps/rejected": -498.0, | |
| "loss": 0.5354, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.94921875, | |
| "rewards/margins": 0.71484375, | |
| "rewards/rejected": -1.6640625, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43568464730290457, | |
| "grad_norm": 11.768546958658863, | |
| "learning_rate": 4.969268052299307e-07, | |
| "logits/chosen": -0.373046875, | |
| "logits/rejected": -0.384765625, | |
| "logps/chosen": -362.0, | |
| "logps/rejected": -466.0, | |
| "loss": 0.5133, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.92578125, | |
| "rewards/margins": 1.046875, | |
| "rewards/rejected": -1.9765625, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.45643153526970953, | |
| "grad_norm": 11.283753967080404, | |
| "learning_rate": 4.959112523012938e-07, | |
| "logits/chosen": -0.58203125, | |
| "logits/rejected": -0.5859375, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -700.0, | |
| "loss": 0.4936, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.8828125, | |
| "rewards/margins": 0.82421875, | |
| "rewards/rejected": -2.703125, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.47717842323651455, | |
| "grad_norm": 8.364732476956826, | |
| "learning_rate": 4.947523147920345e-07, | |
| "logits/chosen": -0.5, | |
| "logits/rejected": -0.4375, | |
| "logps/chosen": -532.0, | |
| "logps/rejected": -470.0, | |
| "loss": 0.4925, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.109375, | |
| "rewards/margins": 0.4140625, | |
| "rewards/rejected": -1.5234375, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4979253112033195, | |
| "grad_norm": 11.692843266160883, | |
| "learning_rate": 4.934506684490621e-07, | |
| "logits/chosen": -0.4921875, | |
| "logits/rejected": -0.498046875, | |
| "logps/chosen": -448.0, | |
| "logps/rejected": -510.0, | |
| "loss": 0.4734, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.94921875, | |
| "rewards/margins": 0.89453125, | |
| "rewards/rejected": -1.84375, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5186721991701245, | |
| "grad_norm": 9.860975187266469, | |
| "learning_rate": 4.920070722291682e-07, | |
| "logits/chosen": -0.640625, | |
| "logits/rejected": -0.65625, | |
| "logps/chosen": -422.0, | |
| "logps/rejected": -572.0, | |
| "loss": 0.4902, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2109375, | |
| "rewards/margins": 1.8984375, | |
| "rewards/rejected": -3.109375, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5394190871369294, | |
| "grad_norm": 13.494804689613495, | |
| "learning_rate": 4.904223678564975e-07, | |
| "logits/chosen": -0.53515625, | |
| "logits/rejected": -0.455078125, | |
| "logps/chosen": -482.0, | |
| "logps/rejected": -494.0, | |
| "loss": 0.4797, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.1015625, | |
| "rewards/margins": 0.9296875, | |
| "rewards/rejected": -2.03125, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5601659751037344, | |
| "grad_norm": 12.703890931757634, | |
| "learning_rate": 4.886974793317607e-07, | |
| "logits/chosen": -0.376953125, | |
| "logits/rejected": -0.46875, | |
| "logps/chosen": -580.0, | |
| "logps/rejected": -676.0, | |
| "loss": 0.4202, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.4453125, | |
| "rewards/margins": 1.28125, | |
| "rewards/rejected": -2.71875, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5809128630705395, | |
| "grad_norm": 13.36542419547552, | |
| "learning_rate": 4.86833412393473e-07, | |
| "logits/chosen": -0.40234375, | |
| "logits/rejected": -0.44921875, | |
| "logps/chosen": -436.0, | |
| "logps/rejected": -470.0, | |
| "loss": 0.4683, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.671875, | |
| "rewards/margins": 0.65625, | |
| "rewards/rejected": -2.328125, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6016597510373444, | |
| "grad_norm": 18.998627170239676, | |
| "learning_rate": 4.848312539315334e-07, | |
| "logits/chosen": -0.7578125, | |
| "logits/rejected": -0.78515625, | |
| "logps/chosen": -528.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.4245, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.4453125, | |
| "rewards/margins": 1.6640625, | |
| "rewards/rejected": -3.109375, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6224066390041494, | |
| "grad_norm": 16.88135073283805, | |
| "learning_rate": 4.826921713534873e-07, | |
| "logits/chosen": -0.3515625, | |
| "logits/rejected": -0.53515625, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -640.0, | |
| "loss": 0.4323, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.984375, | |
| "rewards/margins": 0.6796875, | |
| "rewards/rejected": -2.65625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6431535269709544, | |
| "grad_norm": 17.945027682392745, | |
| "learning_rate": 4.804174119038404e-07, | |
| "logits/chosen": -0.4140625, | |
| "logits/rejected": -0.40234375, | |
| "logps/chosen": -486.0, | |
| "logps/rejected": -592.0, | |
| "loss": 0.4525, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.2890625, | |
| "rewards/margins": 1.515625, | |
| "rewards/rejected": -2.8125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6639004149377593, | |
| "grad_norm": 9.847511875674074, | |
| "learning_rate": 4.78008301936823e-07, | |
| "logits/chosen": -0.67578125, | |
| "logits/rejected": -0.5390625, | |
| "logps/chosen": -548.0, | |
| "logps/rejected": -676.0, | |
| "loss": 0.4356, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.234375, | |
| "rewards/margins": 1.734375, | |
| "rewards/rejected": -2.96875, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6846473029045643, | |
| "grad_norm": 11.752705584029137, | |
| "learning_rate": 4.754662461430258e-07, | |
| "logits/chosen": -0.578125, | |
| "logits/rejected": -0.6328125, | |
| "logps/chosen": -576.0, | |
| "logps/rejected": -588.0, | |
| "loss": 0.418, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.140625, | |
| "rewards/margins": 1.34375, | |
| "rewards/rejected": -3.484375, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7053941908713693, | |
| "grad_norm": 16.053432840092594, | |
| "learning_rate": 4.727927267303612e-07, | |
| "logits/chosen": -0.5390625, | |
| "logits/rejected": -0.423828125, | |
| "logps/chosen": -494.0, | |
| "logps/rejected": -636.0, | |
| "loss": 0.4206, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.109375, | |
| "rewards/margins": 2.0, | |
| "rewards/rejected": -4.09375, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7261410788381742, | |
| "grad_norm": 11.842580622475824, | |
| "learning_rate": 4.699893025598255e-07, | |
| "logits/chosen": -0.66796875, | |
| "logits/rejected": -0.671875, | |
| "logps/chosen": -588.0, | |
| "logps/rejected": -660.0, | |
| "loss": 0.4345, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.28125, | |
| "rewards/margins": 0.435546875, | |
| "rewards/rejected": -2.703125, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7468879668049793, | |
| "grad_norm": 14.819519415062294, | |
| "learning_rate": 4.67057608236567e-07, | |
| "logits/chosen": -0.75390625, | |
| "logits/rejected": -0.69140625, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -696.0, | |
| "loss": 0.3681, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.609375, | |
| "rewards/margins": 1.7109375, | |
| "rewards/rejected": -4.3125, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7676348547717843, | |
| "grad_norm": 10.366281203736689, | |
| "learning_rate": 4.6399935315678893e-07, | |
| "logits/chosen": -0.408203125, | |
| "logits/rejected": -0.57421875, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -700.0, | |
| "loss": 0.3893, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.4375, | |
| "rewards/margins": 1.2578125, | |
| "rewards/rejected": -4.6875, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7883817427385892, | |
| "grad_norm": 17.40885052546667, | |
| "learning_rate": 4.608163205110447e-07, | |
| "logits/chosen": -0.546875, | |
| "logits/rejected": -0.515625, | |
| "logps/chosen": -620.0, | |
| "logps/rejected": -592.0, | |
| "loss": 0.4214, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.90625, | |
| "rewards/margins": 0.26171875, | |
| "rewards/rejected": -3.15625, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8091286307053942, | |
| "grad_norm": 12.056546567751306, | |
| "learning_rate": 4.5751036624450445e-07, | |
| "logits/chosen": -0.8515625, | |
| "logits/rejected": -0.7890625, | |
| "logps/chosen": -636.0, | |
| "logps/rejected": -660.0, | |
| "loss": 0.4062, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.125, | |
| "rewards/margins": 0.84375, | |
| "rewards/rejected": -2.96875, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8298755186721992, | |
| "grad_norm": 19.517095745745852, | |
| "learning_rate": 4.540834179748012e-07, | |
| "logits/chosen": -0.41015625, | |
| "logits/rejected": -0.439453125, | |
| "logps/chosen": -376.0, | |
| "logps/rejected": -588.0, | |
| "loss": 0.3903, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.25, | |
| "rewards/margins": 1.5859375, | |
| "rewards/rejected": -2.84375, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8506224066390041, | |
| "grad_norm": 11.340627516184107, | |
| "learning_rate": 4.5053747386808564e-07, | |
| "logits/chosen": -0.5703125, | |
| "logits/rejected": -0.61328125, | |
| "logps/chosen": -556.0, | |
| "logps/rejected": -668.0, | |
| "loss": 0.3894, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.171875, | |
| "rewards/margins": 1.09375, | |
| "rewards/rejected": -3.265625, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8713692946058091, | |
| "grad_norm": 21.853682876132318, | |
| "learning_rate": 4.4687460147394706e-07, | |
| "logits/chosen": -0.73828125, | |
| "logits/rejected": -0.76953125, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -820.0, | |
| "loss": 0.3944, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.765625, | |
| "rewards/margins": 2.0, | |
| "rewards/rejected": -3.78125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8921161825726142, | |
| "grad_norm": 9.993671586744828, | |
| "learning_rate": 4.4309693651987726e-07, | |
| "logits/chosen": -0.87109375, | |
| "logits/rejected": -0.796875, | |
| "logps/chosen": -776.0, | |
| "logps/rejected": -804.0, | |
| "loss": 0.3534, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.640625, | |
| "rewards/margins": 1.65625, | |
| "rewards/rejected": -4.28125, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9128630705394191, | |
| "grad_norm": 10.792350406013442, | |
| "learning_rate": 4.3920668166598273e-07, | |
| "logits/chosen": -0.7421875, | |
| "logits/rejected": -0.9140625, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -804.0, | |
| "loss": 0.321, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.625, | |
| "rewards/margins": 2.328125, | |
| "rewards/rejected": -4.9375, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9336099585062241, | |
| "grad_norm": 17.038293447631034, | |
| "learning_rate": 4.352061052206695e-07, | |
| "logits/chosen": -0.828125, | |
| "logits/rejected": -0.859375, | |
| "logps/chosen": -488.0, | |
| "logps/rejected": -680.0, | |
| "loss": 0.3409, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.3125, | |
| "rewards/margins": 1.78125, | |
| "rewards/rejected": -4.09375, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9543568464730291, | |
| "grad_norm": 13.168279519384841, | |
| "learning_rate": 4.3109753981805045e-07, | |
| "logits/chosen": -0.6171875, | |
| "logits/rejected": -0.62890625, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -760.0, | |
| "loss": 0.3341, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.8828125, | |
| "rewards/margins": 2.6875, | |
| "rewards/rejected": -4.5625, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.975103734439834, | |
| "grad_norm": 36.8656206464395, | |
| "learning_rate": 4.2688338105784584e-07, | |
| "logits/chosen": -0.55078125, | |
| "logits/rejected": -0.66796875, | |
| "logps/chosen": -736.0, | |
| "logps/rejected": -764.0, | |
| "loss": 0.339, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.515625, | |
| "rewards/margins": 2.234375, | |
| "rewards/rejected": -4.75, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.995850622406639, | |
| "grad_norm": 14.773211954922196, | |
| "learning_rate": 4.2256608610857014e-07, | |
| "logits/chosen": -0.86328125, | |
| "logits/rejected": -0.78515625, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -780.0, | |
| "loss": 0.3697, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.03125, | |
| "rewards/margins": 2.671875, | |
| "rewards/rejected": -4.71875, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.016597510373444, | |
| "grad_norm": 15.162967960468519, | |
| "learning_rate": 4.181481722748197e-07, | |
| "logits/chosen": -0.78125, | |
| "logits/rejected": -0.84765625, | |
| "logps/chosen": -476.0, | |
| "logps/rejected": -656.0, | |
| "loss": 0.2396, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.921875, | |
| "rewards/margins": 2.109375, | |
| "rewards/rejected": -4.03125, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.037344398340249, | |
| "grad_norm": 19.132102905117456, | |
| "learning_rate": 4.136322155294968e-07, | |
| "logits/chosen": -0.6171875, | |
| "logits/rejected": -0.80859375, | |
| "logps/chosen": -688.0, | |
| "logps/rejected": -896.0, | |
| "loss": 0.1539, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.65625, | |
| "rewards/margins": 2.796875, | |
| "rewards/rejected": -5.4375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.058091286307054, | |
| "grad_norm": 8.59971354606536, | |
| "learning_rate": 4.090208490118253e-07, | |
| "logits/chosen": -0.73828125, | |
| "logits/rejected": -0.8046875, | |
| "logps/chosen": -796.0, | |
| "logps/rejected": -920.0, | |
| "loss": 0.1643, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.609375, | |
| "rewards/margins": 2.78125, | |
| "rewards/rejected": -5.40625, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0788381742738589, | |
| "grad_norm": 16.6448813170998, | |
| "learning_rate": 4.0431676149203457e-07, | |
| "logits/chosen": -0.4375, | |
| "logits/rejected": -0.49609375, | |
| "logps/chosen": -454.0, | |
| "logps/rejected": -912.0, | |
| "loss": 0.1488, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.390625, | |
| "rewards/margins": 4.53125, | |
| "rewards/rejected": -6.90625, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.099585062240664, | |
| "grad_norm": 9.144636292899186, | |
| "learning_rate": 3.995226958036058e-07, | |
| "logits/chosen": -0.42578125, | |
| "logits/rejected": -0.56640625, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -1012.0, | |
| "loss": 0.1644, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.859375, | |
| "rewards/margins": 3.53125, | |
| "rewards/rejected": -7.375, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.120331950207469, | |
| "grad_norm": 14.589338073597446, | |
| "learning_rate": 3.9464144724399605e-07, | |
| "logits/chosen": -0.51171875, | |
| "logits/rejected": -0.421875, | |
| "logps/chosen": -680.0, | |
| "logps/rejected": -1040.0, | |
| "loss": 0.1382, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5625, | |
| "rewards/margins": 3.6875, | |
| "rewards/rejected": -6.25, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1410788381742738, | |
| "grad_norm": 7.897607849867475, | |
| "learning_rate": 3.896758619447714e-07, | |
| "logits/chosen": -0.75, | |
| "logits/rejected": -0.6796875, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -888.0, | |
| "loss": 0.1505, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.4375, | |
| "rewards/margins": 3.96875, | |
| "rewards/rejected": -6.40625, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.161825726141079, | |
| "grad_norm": 10.375941833434622, | |
| "learning_rate": 3.846288352121003e-07, | |
| "logits/chosen": -0.76953125, | |
| "logits/rejected": -0.75390625, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -892.0, | |
| "loss": 0.1659, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.484375, | |
| "rewards/margins": 2.515625, | |
| "rewards/rejected": -6.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1825726141078838, | |
| "grad_norm": 9.709492497216598, | |
| "learning_rate": 3.795033098385744e-07, | |
| "logits/chosen": -0.62109375, | |
| "logits/rejected": -0.61328125, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -952.0, | |
| "loss": 0.1486, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.1875, | |
| "rewards/margins": 2.953125, | |
| "rewards/rejected": -6.125, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2033195020746887, | |
| "grad_norm": 18.021248024950815, | |
| "learning_rate": 3.7430227438734086e-07, | |
| "logits/chosen": -0.76171875, | |
| "logits/rejected": -0.7109375, | |
| "logps/chosen": -872.0, | |
| "logps/rejected": -1168.0, | |
| "loss": 0.128, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -4.28125, | |
| "rewards/margins": 3.375, | |
| "rewards/rejected": -7.625, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2240663900414939, | |
| "grad_norm": 10.12277474430289, | |
| "learning_rate": 3.690287614495481e-07, | |
| "logits/chosen": -0.875, | |
| "logits/rejected": -0.83984375, | |
| "logps/chosen": -760.0, | |
| "logps/rejected": -1216.0, | |
| "loss": 0.1163, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.0625, | |
| "rewards/margins": 4.375, | |
| "rewards/rejected": -8.4375, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2448132780082988, | |
| "grad_norm": 16.358228454815244, | |
| "learning_rate": 3.6368584587611854e-07, | |
| "logits/chosen": -0.70703125, | |
| "logits/rejected": -0.75390625, | |
| "logps/chosen": -596.0, | |
| "logps/rejected": -1012.0, | |
| "loss": 0.1356, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.84375, | |
| "rewards/margins": 4.1875, | |
| "rewards/rejected": -7.03125, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2655601659751037, | |
| "grad_norm": 13.263894273990404, | |
| "learning_rate": 3.582766429848818e-07, | |
| "logits/chosen": -0.91796875, | |
| "logits/rejected": -0.8671875, | |
| "logps/chosen": -740.0, | |
| "logps/rejected": -952.0, | |
| "loss": 0.1465, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.75, | |
| "rewards/margins": 2.828125, | |
| "rewards/rejected": -6.5625, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2863070539419086, | |
| "grad_norm": 13.824768954350477, | |
| "learning_rate": 3.528043067441123e-07, | |
| "logits/chosen": -0.5234375, | |
| "logits/rejected": -0.5703125, | |
| "logps/chosen": -470.0, | |
| "logps/rejected": -772.0, | |
| "loss": 0.1661, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.265625, | |
| "rewards/margins": 2.6875, | |
| "rewards/rejected": -4.96875, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3070539419087137, | |
| "grad_norm": 11.669758162071892, | |
| "learning_rate": 3.472720279335305e-07, | |
| "logits/chosen": -0.8828125, | |
| "logits/rejected": -0.8359375, | |
| "logps/chosen": -756.0, | |
| "logps/rejected": -1056.0, | |
| "loss": 0.1428, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.578125, | |
| "rewards/margins": 3.46875, | |
| "rewards/rejected": -7.03125, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3278008298755186, | |
| "grad_norm": 11.36461562294381, | |
| "learning_rate": 3.4168303228384097e-07, | |
| "logits/chosen": -0.73046875, | |
| "logits/rejected": -0.82421875, | |
| "logps/chosen": -740.0, | |
| "logps/rejected": -1056.0, | |
| "loss": 0.1497, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.625, | |
| "rewards/margins": 2.90625, | |
| "rewards/rejected": -6.53125, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3485477178423237, | |
| "grad_norm": 10.722964833155572, | |
| "learning_rate": 3.36040578595891e-07, | |
| "logits/chosen": -0.8046875, | |
| "logits/rejected": -0.8828125, | |
| "logps/chosen": -696.0, | |
| "logps/rejected": -948.0, | |
| "loss": 0.1362, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.828125, | |
| "rewards/margins": 2.828125, | |
| "rewards/rejected": -5.65625, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3692946058091287, | |
| "grad_norm": 9.800531956736956, | |
| "learning_rate": 3.303479568405467e-07, | |
| "logits/chosen": -0.7734375, | |
| "logits/rejected": -0.86328125, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -848.0, | |
| "loss": 0.1266, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.140625, | |
| "rewards/margins": 2.1875, | |
| "rewards/rejected": -5.34375, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3900414937759336, | |
| "grad_norm": 10.691535240032913, | |
| "learning_rate": 3.246084862403949e-07, | |
| "logits/chosen": -0.65625, | |
| "logits/rejected": -0.6875, | |
| "logps/chosen": -800.0, | |
| "logps/rejected": -1224.0, | |
| "loss": 0.1275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.421875, | |
| "rewards/margins": 5.125, | |
| "rewards/rejected": -8.5, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4107883817427385, | |
| "grad_norm": 19.2843124932583, | |
| "learning_rate": 3.188255133343896e-07, | |
| "logits/chosen": -0.87109375, | |
| "logits/rejected": -0.8203125, | |
| "logps/chosen": -804.0, | |
| "logps/rejected": -1120.0, | |
| "loss": 0.1339, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.59375, | |
| "rewards/margins": 2.859375, | |
| "rewards/rejected": -6.4375, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4315352697095436, | |
| "grad_norm": 13.391310848340655, | |
| "learning_rate": 3.1300241002656964e-07, | |
| "logits/chosen": -0.7578125, | |
| "logits/rejected": -0.75390625, | |
| "logps/chosen": -768.0, | |
| "logps/rejected": -1200.0, | |
| "loss": 0.1261, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.15625, | |
| "rewards/margins": 5.0, | |
| "rewards/rejected": -8.1875, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4522821576763485, | |
| "grad_norm": 12.375177940845006, | |
| "learning_rate": 3.071425716199882e-07, | |
| "logits/chosen": -0.92578125, | |
| "logits/rejected": -0.9765625, | |
| "logps/chosen": -624.0, | |
| "logps/rejected": -1128.0, | |
| "loss": 0.1246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.84375, | |
| "rewards/margins": 4.96875, | |
| "rewards/rejected": -7.8125, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4730290456431536, | |
| "grad_norm": 14.850198748586445, | |
| "learning_rate": 3.0124941483699753e-07, | |
| "logits/chosen": -0.8671875, | |
| "logits/rejected": -0.87109375, | |
| "logps/chosen": -804.0, | |
| "logps/rejected": -1096.0, | |
| "loss": 0.1065, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.21875, | |
| "rewards/margins": 3.734375, | |
| "rewards/rejected": -7.9375, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.4937759336099585, | |
| "grad_norm": 12.507368273272412, | |
| "learning_rate": 2.953263758270459e-07, | |
| "logits/chosen": -0.66796875, | |
| "logits/rejected": -0.796875, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -788.0, | |
| "loss": 0.1412, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.5625, | |
| "rewards/margins": 2.84375, | |
| "rewards/rejected": -5.375, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5145228215767634, | |
| "grad_norm": 12.416603553859566, | |
| "learning_rate": 2.8937690816314577e-07, | |
| "logits/chosen": -0.91015625, | |
| "logits/rejected": -0.953125, | |
| "logps/chosen": -648.0, | |
| "logps/rejected": -996.0, | |
| "loss": 0.0877, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.25, | |
| "rewards/margins": 3.46875, | |
| "rewards/rejected": -6.71875, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5352697095435683, | |
| "grad_norm": 30.476655601465435, | |
| "learning_rate": 2.834044808281841e-07, | |
| "logits/chosen": -0.76953125, | |
| "logits/rejected": -0.8515625, | |
| "logps/chosen": -748.0, | |
| "logps/rejected": -1168.0, | |
| "loss": 0.1211, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.609375, | |
| "rewards/margins": 4.78125, | |
| "rewards/rejected": -8.375, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5560165975103735, | |
| "grad_norm": 16.239887747462344, | |
| "learning_rate": 2.774125761922463e-07, | |
| "logits/chosen": -0.7734375, | |
| "logits/rejected": -0.78515625, | |
| "logps/chosen": -588.0, | |
| "logps/rejected": -972.0, | |
| "loss": 0.1176, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9375, | |
| "rewards/margins": 4.15625, | |
| "rewards/rejected": -7.09375, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5767634854771784, | |
| "grad_norm": 17.482193111874572, | |
| "learning_rate": 2.714046879821358e-07, | |
| "logits/chosen": -0.73046875, | |
| "logits/rejected": -0.8515625, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -1224.0, | |
| "loss": 0.1128, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.359375, | |
| "rewards/margins": 4.78125, | |
| "rewards/rejected": -8.125, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.5975103734439835, | |
| "grad_norm": 8.31205194284139, | |
| "learning_rate": 2.653843192442699e-07, | |
| "logits/chosen": -0.70703125, | |
| "logits/rejected": -0.7578125, | |
| "logps/chosen": -724.0, | |
| "logps/rejected": -1176.0, | |
| "loss": 0.1338, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.609375, | |
| "rewards/margins": 5.15625, | |
| "rewards/rejected": -8.75, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6182572614107884, | |
| "grad_norm": 10.622352189872272, | |
| "learning_rate": 2.5935498030214397e-07, | |
| "logits/chosen": -0.81640625, | |
| "logits/rejected": -0.8125, | |
| "logps/chosen": -756.0, | |
| "logps/rejected": -1080.0, | |
| "loss": 0.0991, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.609375, | |
| "rewards/margins": 3.21875, | |
| "rewards/rejected": -6.8125, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6390041493775933, | |
| "grad_norm": 9.417824682877646, | |
| "learning_rate": 2.533201867095504e-07, | |
| "logits/chosen": -0.6953125, | |
| "logits/rejected": -0.76171875, | |
| "logps/chosen": -840.0, | |
| "logps/rejected": -1312.0, | |
| "loss": 0.1458, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.625, | |
| "rewards/margins": 5.59375, | |
| "rewards/rejected": -10.25, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6597510373443982, | |
| "grad_norm": 11.234878509540781, | |
| "learning_rate": 2.472834572007493e-07, | |
| "logits/chosen": -0.921875, | |
| "logits/rejected": -0.8671875, | |
| "logps/chosen": -608.0, | |
| "logps/rejected": -1048.0, | |
| "loss": 0.1348, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.296875, | |
| "rewards/margins": 4.5625, | |
| "rewards/rejected": -7.84375, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6804979253112033, | |
| "grad_norm": 14.091480064608236, | |
| "learning_rate": 2.4124831163878427e-07, | |
| "logits/chosen": -0.81640625, | |
| "logits/rejected": -0.8515625, | |
| "logps/chosen": -764.0, | |
| "logps/rejected": -1240.0, | |
| "loss": 0.1137, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.0, | |
| "rewards/margins": 4.59375, | |
| "rewards/rejected": -8.625, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7012448132780082, | |
| "grad_norm": 6.963974139105738, | |
| "learning_rate": 2.3521826896313965e-07, | |
| "logits/chosen": -0.9375, | |
| "logits/rejected": -0.9609375, | |
| "logps/chosen": -840.0, | |
| "logps/rejected": -1384.0, | |
| "loss": 0.1082, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.90625, | |
| "rewards/margins": 5.5, | |
| "rewards/rejected": -10.375, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7219917012448134, | |
| "grad_norm": 15.006582270008211, | |
| "learning_rate": 2.2919684513793704e-07, | |
| "logits/chosen": -0.78515625, | |
| "logits/rejected": -0.8515625, | |
| "logps/chosen": -764.0, | |
| "logps/rejected": -1240.0, | |
| "loss": 0.0973, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.3125, | |
| "rewards/margins": 4.875, | |
| "rewards/rejected": -9.1875, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7427385892116183, | |
| "grad_norm": 7.046277080292156, | |
| "learning_rate": 2.2318755110186602e-07, | |
| "logits/chosen": -0.69140625, | |
| "logits/rejected": -0.77734375, | |
| "logps/chosen": -604.0, | |
| "logps/rejected": -996.0, | |
| "loss": 0.1284, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.03125, | |
| "rewards/margins": 3.765625, | |
| "rewards/rejected": -6.8125, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7634854771784232, | |
| "grad_norm": 11.157911173958768, | |
| "learning_rate": 2.171938907210457e-07, | |
| "logits/chosen": -0.77734375, | |
| "logits/rejected": -0.72265625, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -1120.0, | |
| "loss": 0.0941, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.0625, | |
| "rewards/margins": 4.21875, | |
| "rewards/rejected": -8.25, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.784232365145228, | |
| "grad_norm": 7.821478727431665, | |
| "learning_rate": 2.1121935874600914e-07, | |
| "logits/chosen": -0.58203125, | |
| "logits/rejected": -0.63671875, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -1064.0, | |
| "loss": 0.0788, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.765625, | |
| "rewards/margins": 3.90625, | |
| "rewards/rejected": -7.65625, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8049792531120332, | |
| "grad_norm": 19.017867203624398, | |
| "learning_rate": 2.052674387740039e-07, | |
| "logits/chosen": -0.71875, | |
| "logits/rejected": -0.80859375, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -1200.0, | |
| "loss": 0.1237, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.46875, | |
| "rewards/margins": 4.875, | |
| "rewards/rejected": -9.375, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8257261410788381, | |
| "grad_norm": 8.469359704887415, | |
| "learning_rate": 1.9934160121779511e-07, | |
| "logits/chosen": -0.6484375, | |
| "logits/rejected": -0.64453125, | |
| "logps/chosen": -768.0, | |
| "logps/rejected": -1264.0, | |
| "loss": 0.095, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.703125, | |
| "rewards/margins": 4.9375, | |
| "rewards/rejected": -8.625, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8464730290456433, | |
| "grad_norm": 14.658113601459338, | |
| "learning_rate": 1.9344530128215644e-07, | |
| "logits/chosen": -0.78515625, | |
| "logits/rejected": -0.890625, | |
| "logps/chosen": -816.0, | |
| "logps/rejected": -1224.0, | |
| "loss": 0.116, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.546875, | |
| "rewards/margins": 5.09375, | |
| "rewards/rejected": -8.625, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8672199170124482, | |
| "grad_norm": 26.19755259819255, | |
| "learning_rate": 1.8758197694922812e-07, | |
| "logits/chosen": -0.609375, | |
| "logits/rejected": -0.69140625, | |
| "logps/chosen": -816.0, | |
| "logps/rejected": -1184.0, | |
| "loss": 0.1326, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.71875, | |
| "rewards/margins": 4.0625, | |
| "rewards/rejected": -8.75, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.887966804979253, | |
| "grad_norm": 14.081930494469324, | |
| "learning_rate": 1.8175504697391728e-07, | |
| "logits/chosen": -0.79296875, | |
| "logits/rejected": -0.76171875, | |
| "logps/chosen": -804.0, | |
| "logps/rejected": -1168.0, | |
| "loss": 0.0909, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.34375, | |
| "rewards/margins": 4.65625, | |
| "rewards/rejected": -9.0, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.908713692946058, | |
| "grad_norm": 8.117586964004555, | |
| "learning_rate": 1.7596790889050907e-07, | |
| "logits/chosen": -0.8671875, | |
| "logits/rejected": -0.8671875, | |
| "logps/chosen": -652.0, | |
| "logps/rejected": -1056.0, | |
| "loss": 0.1183, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.34375, | |
| "rewards/margins": 4.15625, | |
| "rewards/rejected": -7.5, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.929460580912863, | |
| "grad_norm": 14.392383053670414, | |
| "learning_rate": 1.702239370316515e-07, | |
| "logits/chosen": -0.78515625, | |
| "logits/rejected": -0.8671875, | |
| "logps/chosen": -768.0, | |
| "logps/rejected": -1200.0, | |
| "loss": 0.1086, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.78125, | |
| "rewards/margins": 4.78125, | |
| "rewards/rejected": -8.5625, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.950207468879668, | |
| "grad_norm": 11.827232109289305, | |
| "learning_rate": 1.645264805608674e-07, | |
| "logits/chosen": -0.83984375, | |
| "logits/rejected": -0.80078125, | |
| "logps/chosen": -872.0, | |
| "logps/rejected": -1304.0, | |
| "loss": 0.0874, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -5.125, | |
| "rewards/margins": 4.34375, | |
| "rewards/rejected": -9.5, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9709543568464731, | |
| "grad_norm": 17.849319216864608, | |
| "learning_rate": 1.58878861519743e-07, | |
| "logits/chosen": -0.84375, | |
| "logits/rejected": -0.8828125, | |
| "logps/chosen": -768.0, | |
| "logps/rejected": -1144.0, | |
| "loss": 0.1201, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.46875, | |
| "rewards/margins": 3.640625, | |
| "rewards/rejected": -8.125, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.991701244813278, | |
| "grad_norm": 17.73316536429117, | |
| "learning_rate": 1.5328437289093015e-07, | |
| "logits/chosen": -0.90234375, | |
| "logits/rejected": -0.87890625, | |
| "logps/chosen": -772.0, | |
| "logps/rejected": -1336.0, | |
| "loss": 0.0932, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.90625, | |
| "rewards/margins": 5.0, | |
| "rewards/rejected": -9.875, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.012448132780083, | |
| "grad_norm": 1.7808893883270032, | |
| "learning_rate": 1.4774627667809223e-07, | |
| "logits/chosen": -1.1015625, | |
| "logits/rejected": -1.09375, | |
| "logps/chosen": -808.0, | |
| "logps/rejected": -1184.0, | |
| "loss": 0.0532, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.40625, | |
| "rewards/margins": 4.5625, | |
| "rewards/rejected": -9.0, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.033195020746888, | |
| "grad_norm": 13.54516810123686, | |
| "learning_rate": 1.4226780200391267e-07, | |
| "logits/chosen": -0.330078125, | |
| "logits/rejected": -0.474609375, | |
| "logps/chosen": -884.0, | |
| "logps/rejected": -1552.0, | |
| "loss": 0.026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.96875, | |
| "rewards/margins": 5.53125, | |
| "rewards/rejected": -11.5, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.0539419087136928, | |
| "grad_norm": 1.8805874765997683, | |
| "learning_rate": 1.3685214322727596e-07, | |
| "logits/chosen": -0.7109375, | |
| "logits/rejected": -0.84765625, | |
| "logps/chosen": -948.0, | |
| "logps/rejected": -1584.0, | |
| "loss": 0.0162, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.40625, | |
| "rewards/margins": 6.46875, | |
| "rewards/rejected": -12.875, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.074688796680498, | |
| "grad_norm": 1.3911921785236845, | |
| "learning_rate": 1.3150245808071854e-07, | |
| "logits/chosen": -0.75390625, | |
| "logits/rejected": -0.80078125, | |
| "logps/chosen": -956.0, | |
| "logps/rejected": -1600.0, | |
| "loss": 0.0125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.53125, | |
| "rewards/margins": 6.53125, | |
| "rewards/rejected": -13.0625, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.095435684647303, | |
| "grad_norm": 2.437460163435022, | |
| "learning_rate": 1.2622186582923566e-07, | |
| "logits/chosen": -0.578125, | |
| "logits/rejected": -0.62109375, | |
| "logps/chosen": -1136.0, | |
| "logps/rejected": -1608.0, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.40625, | |
| "rewards/margins": 5.46875, | |
| "rewards/rejected": -11.875, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.116182572614108, | |
| "grad_norm": 3.5023691171107245, | |
| "learning_rate": 1.2101344545151713e-07, | |
| "logits/chosen": -0.63671875, | |
| "logits/rejected": -0.609375, | |
| "logps/chosen": -844.0, | |
| "logps/rejected": -1552.0, | |
| "loss": 0.0142, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.34375, | |
| "rewards/margins": 7.15625, | |
| "rewards/rejected": -12.5, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.136929460580913, | |
| "grad_norm": 4.086843498233057, | |
| "learning_rate": 1.1588023384467335e-07, | |
| "logits/chosen": -0.7578125, | |
| "logits/rejected": -0.81640625, | |
| "logps/chosen": -1112.0, | |
| "logps/rejected": -1848.0, | |
| "loss": 0.0212, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.6875, | |
| "rewards/margins": 7.84375, | |
| "rewards/rejected": -15.5625, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.1576763485477177, | |
| "grad_norm": 6.577269547407749, | |
| "learning_rate": 1.1082522405349834e-07, | |
| "logits/chosen": -0.61328125, | |
| "logits/rejected": -0.61328125, | |
| "logps/chosen": -1080.0, | |
| "logps/rejected": -1760.0, | |
| "loss": 0.0151, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.125, | |
| "rewards/margins": 7.53125, | |
| "rewards/rejected": -14.625, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.1784232365145226, | |
| "grad_norm": 3.091476943089419, | |
| "learning_rate": 1.0585136352530172e-07, | |
| "logits/chosen": -0.92578125, | |
| "logits/rejected": -0.9375, | |
| "logps/chosen": -1080.0, | |
| "logps/rejected": -1600.0, | |
| "loss": 0.0155, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.03125, | |
| "rewards/margins": 5.28125, | |
| "rewards/rejected": -12.3125, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.199170124481328, | |
| "grad_norm": 6.810330147221256, | |
| "learning_rate": 1.0096155239132675e-07, | |
| "logits/chosen": -0.61328125, | |
| "logits/rejected": -0.63671875, | |
| "logps/chosen": -776.0, | |
| "logps/rejected": -1272.0, | |
| "loss": 0.0206, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.3125, | |
| "rewards/margins": 5.5, | |
| "rewards/rejected": -10.875, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.219917012448133, | |
| "grad_norm": 1.5020933483656527, | |
| "learning_rate": 9.615864177575836e-08, | |
| "logits/chosen": -0.7734375, | |
| "logits/rejected": -0.81640625, | |
| "logps/chosen": -1384.0, | |
| "logps/rejected": -1984.0, | |
| "loss": 0.0131, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.78125, | |
| "rewards/margins": 8.5, | |
| "rewards/rejected": -16.25, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.240663900414938, | |
| "grad_norm": 1.8136977524511886, | |
| "learning_rate": 9.144543213330493e-08, | |
| "logits/chosen": -0.85546875, | |
| "logits/rejected": -0.8984375, | |
| "logps/chosen": -1072.0, | |
| "logps/rejected": -1904.0, | |
| "loss": 0.0115, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.90625, | |
| "rewards/margins": 8.5625, | |
| "rewards/rejected": -15.5, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.2614107883817427, | |
| "grad_norm": 1.1956894404186822, | |
| "learning_rate": 8.682467161632508e-08, | |
| "logits/chosen": -0.7734375, | |
| "logits/rejected": -0.859375, | |
| "logps/chosen": -956.0, | |
| "logps/rejected": -1688.0, | |
| "loss": 0.0129, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.0625, | |
| "rewards/margins": 7.3125, | |
| "rewards/rejected": -14.375, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.2821576763485476, | |
| "grad_norm": 13.368765403015455, | |
| "learning_rate": 8.229905447244942e-08, | |
| "logits/chosen": -0.76171875, | |
| "logits/rejected": -0.796875, | |
| "logps/chosen": -1304.0, | |
| "logps/rejected": -1776.0, | |
| "loss": 0.0196, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.59375, | |
| "rewards/margins": 6.78125, | |
| "rewards/rejected": -14.375, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3029045643153525, | |
| "grad_norm": 2.690805031469103, | |
| "learning_rate": 7.787121947363393e-08, | |
| "logits/chosen": -0.66796875, | |
| "logits/rejected": -0.7109375, | |
| "logps/chosen": -1224.0, | |
| "logps/rejected": -1952.0, | |
| "loss": 0.0219, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.25, | |
| "rewards/margins": 7.59375, | |
| "rewards/rejected": -15.875, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.323651452282158, | |
| "grad_norm": 2.4290925851945926, | |
| "learning_rate": 7.354374837755919e-08, | |
| "logits/chosen": -0.69921875, | |
| "logits/rejected": -0.7578125, | |
| "logps/chosen": -1320.0, | |
| "logps/rejected": -2080.0, | |
| "loss": 0.0158, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.0625, | |
| "rewards/margins": 8.375, | |
| "rewards/rejected": -17.5, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.3443983402489628, | |
| "grad_norm": 1.5008186930985454, | |
| "learning_rate": 6.931916442227335e-08, | |
| "logits/chosen": -0.78125, | |
| "logits/rejected": -0.77734375, | |
| "logps/chosen": -1104.0, | |
| "logps/rejected": -1664.0, | |
| "loss": 0.0111, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.8125, | |
| "rewards/margins": 6.125, | |
| "rewards/rejected": -13.9375, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.3651452282157677, | |
| "grad_norm": 0.4121106095749944, | |
| "learning_rate": 6.519993085495622e-08, | |
| "logits/chosen": -0.6171875, | |
| "logits/rejected": -0.68359375, | |
| "logps/chosen": -1048.0, | |
| "logps/rejected": -1720.0, | |
| "loss": 0.0166, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.28125, | |
| "rewards/margins": 6.78125, | |
| "rewards/rejected": -13.0625, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.3858921161825726, | |
| "grad_norm": 2.9431081086738957, | |
| "learning_rate": 6.118844949566293e-08, | |
| "logits/chosen": -0.66015625, | |
| "logits/rejected": -0.8046875, | |
| "logps/chosen": -1012.0, | |
| "logps/rejected": -1800.0, | |
| "loss": 0.0097, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.78125, | |
| "rewards/margins": 8.5, | |
| "rewards/rejected": -15.25, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.4066390041493775, | |
| "grad_norm": 3.2000573542150286, | |
| "learning_rate": 5.728705933688349e-08, | |
| "logits/chosen": -0.71875, | |
| "logits/rejected": -0.69140625, | |
| "logps/chosen": -1088.0, | |
| "logps/rejected": -1936.0, | |
| "loss": 0.0179, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.71875, | |
| "rewards/margins": 8.6875, | |
| "rewards/rejected": -16.375, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.4273858921161824, | |
| "grad_norm": 1.0558871880279557, | |
| "learning_rate": 5.3498035179736475e-08, | |
| "logits/chosen": -0.546875, | |
| "logits/rejected": -0.64453125, | |
| "logps/chosen": -856.0, | |
| "logps/rejected": -1600.0, | |
| "loss": 0.0119, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.4375, | |
| "rewards/margins": 7.3125, | |
| "rewards/rejected": -13.75, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4481327800829877, | |
| "grad_norm": 10.613994074318533, | |
| "learning_rate": 4.98235863075899e-08, | |
| "logits/chosen": -0.6796875, | |
| "logits/rejected": -0.70703125, | |
| "logps/chosen": -1024.0, | |
| "logps/rejected": -1768.0, | |
| "loss": 0.0164, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.125, | |
| "rewards/margins": 7.59375, | |
| "rewards/rejected": -14.6875, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.4688796680497926, | |
| "grad_norm": 1.5107625022692397, | |
| "learning_rate": 4.626585519788476e-08, | |
| "logits/chosen": -0.71484375, | |
| "logits/rejected": -0.72265625, | |
| "logps/chosen": -1040.0, | |
| "logps/rejected": -2040.0, | |
| "loss": 0.0124, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.75, | |
| "rewards/margins": 10.0, | |
| "rewards/rejected": -16.75, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.4896265560165975, | |
| "grad_norm": 0.6379884298790988, | |
| "learning_rate": 4.2826916272911154e-08, | |
| "logits/chosen": -0.490234375, | |
| "logits/rejected": -0.58203125, | |
| "logps/chosen": -1136.0, | |
| "logps/rejected": -1672.0, | |
| "loss": 0.0143, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -7.625, | |
| "rewards/margins": 6.5, | |
| "rewards/rejected": -14.125, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5103734439834025, | |
| "grad_norm": 1.1839781270784333, | |
| "learning_rate": 3.950877469026523e-08, | |
| "logits/chosen": -0.55859375, | |
| "logits/rejected": -0.6953125, | |
| "logps/chosen": -1248.0, | |
| "logps/rejected": -2144.0, | |
| "loss": 0.0132, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.6875, | |
| "rewards/margins": 10.0, | |
| "rewards/rejected": -18.625, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5311203319502074, | |
| "grad_norm": 14.154020819707466, | |
| "learning_rate": 3.631336517369313e-08, | |
| "logits/chosen": -0.75390625, | |
| "logits/rejected": -0.86328125, | |
| "logps/chosen": -968.0, | |
| "logps/rejected": -1704.0, | |
| "loss": 0.0174, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.65625, | |
| "rewards/margins": 7.3125, | |
| "rewards/rejected": -14.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.5518672199170123, | |
| "grad_norm": 4.473861321502488, | |
| "learning_rate": 3.3242550885002805e-08, | |
| "logits/chosen": -0.71875, | |
| "logits/rejected": -0.8046875, | |
| "logps/chosen": -1336.0, | |
| "logps/rejected": -1856.0, | |
| "loss": 0.0088, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.96875, | |
| "rewards/margins": 7.25, | |
| "rewards/rejected": -15.1875, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.572614107883817, | |
| "grad_norm": 0.8764104869307379, | |
| "learning_rate": 3.029812233770215e-08, | |
| "logits/chosen": -0.71484375, | |
| "logits/rejected": -0.765625, | |
| "logps/chosen": -836.0, | |
| "logps/rejected": -1504.0, | |
| "loss": 0.0114, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.84375, | |
| "rewards/margins": 6.90625, | |
| "rewards/rejected": -12.75, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.5933609958506225, | |
| "grad_norm": 0.8441509449443175, | |
| "learning_rate": 2.74817963529958e-08, | |
| "logits/chosen": -0.490234375, | |
| "logits/rejected": -0.52734375, | |
| "logps/chosen": -1072.0, | |
| "logps/rejected": -1880.0, | |
| "loss": 0.0117, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.75, | |
| "rewards/margins": 7.9375, | |
| "rewards/rejected": -15.6875, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6141078838174274, | |
| "grad_norm": 2.8897995526214415, | |
| "learning_rate": 2.479521505875079e-08, | |
| "logits/chosen": -0.71484375, | |
| "logits/rejected": -0.7734375, | |
| "logps/chosen": -1192.0, | |
| "logps/rejected": -1808.0, | |
| "loss": 0.0095, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.0, | |
| "rewards/margins": 7.0, | |
| "rewards/rejected": -15.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.6348547717842323, | |
| "grad_norm": 2.0317827036066234, | |
| "learning_rate": 2.223994493201342e-08, | |
| "logits/chosen": -0.79296875, | |
| "logits/rejected": -0.80078125, | |
| "logps/chosen": -1144.0, | |
| "logps/rejected": -1856.0, | |
| "loss": 0.0083, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.0625, | |
| "rewards/margins": 7.71875, | |
| "rewards/rejected": -15.75, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.6556016597510372, | |
| "grad_norm": 3.365990349119932, | |
| "learning_rate": 1.9817475885636868e-08, | |
| "logits/chosen": -0.70703125, | |
| "logits/rejected": -0.78125, | |
| "logps/chosen": -1064.0, | |
| "logps/rejected": -1824.0, | |
| "loss": 0.0137, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.875, | |
| "rewards/margins": 7.75, | |
| "rewards/rejected": -14.625, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.6763485477178426, | |
| "grad_norm": 1.6615356719262115, | |
| "learning_rate": 1.7529220399550376e-08, | |
| "logits/chosen": -0.58984375, | |
| "logits/rejected": -0.7734375, | |
| "logps/chosen": -1280.0, | |
| "logps/rejected": -2336.0, | |
| "loss": 0.0101, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.4375, | |
| "rewards/margins": 10.1875, | |
| "rewards/rejected": -19.625, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.6970954356846475, | |
| "grad_norm": 1.1273270140301521, | |
| "learning_rate": 1.5376512697178713e-08, | |
| "logits/chosen": -0.58203125, | |
| "logits/rejected": -0.6875, | |
| "logps/chosen": -1056.0, | |
| "logps/rejected": -1752.0, | |
| "loss": 0.0156, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.34375, | |
| "rewards/margins": 7.75, | |
| "rewards/rejected": -15.125, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7178423236514524, | |
| "grad_norm": 7.344349531123166, | |
| "learning_rate": 1.3360607967490307e-08, | |
| "logits/chosen": -0.81640625, | |
| "logits/rejected": -0.83203125, | |
| "logps/chosen": -1416.0, | |
| "logps/rejected": -2192.0, | |
| "loss": 0.0125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.25, | |
| "rewards/margins": 8.625, | |
| "rewards/rejected": -17.875, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7385892116182573, | |
| "grad_norm": 0.7630154530022901, | |
| "learning_rate": 1.1482681633128738e-08, | |
| "logits/chosen": -0.58984375, | |
| "logits/rejected": -0.71875, | |
| "logps/chosen": -1008.0, | |
| "logps/rejected": -1920.0, | |
| "loss": 0.0145, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.3125, | |
| "rewards/margins": 9.5, | |
| "rewards/rejected": -16.75, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.759336099585062, | |
| "grad_norm": 1.063282018361314, | |
| "learning_rate": 9.743828665053466e-09, | |
| "logits/chosen": -0.39453125, | |
| "logits/rejected": -0.54296875, | |
| "logps/chosen": -1088.0, | |
| "logps/rejected": -1976.0, | |
| "loss": 0.0086, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.40625, | |
| "rewards/margins": 9.5, | |
| "rewards/rejected": -16.875, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.780082987551867, | |
| "grad_norm": 1.2970387106157468, | |
| "learning_rate": 8.145062944090425e-09, | |
| "logits/chosen": -0.6015625, | |
| "logits/rejected": -0.6484375, | |
| "logps/chosen": -952.0, | |
| "logps/rejected": -1704.0, | |
| "loss": 0.0222, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.28125, | |
| "rewards/margins": 7.34375, | |
| "rewards/rejected": -13.625, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.800829875518672, | |
| "grad_norm": 2.5481269848678614, | |
| "learning_rate": 6.687316669763937e-09, | |
| "logits/chosen": -0.90234375, | |
| "logits/rejected": -0.97265625, | |
| "logps/chosen": -1080.0, | |
| "logps/rejected": -1720.0, | |
| "loss": 0.0177, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.96875, | |
| "rewards/margins": 7.0625, | |
| "rewards/rejected": -14.0, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.821576763485477, | |
| "grad_norm": 2.514879468782849, | |
| "learning_rate": 5.371439816754892e-09, | |
| "logits/chosen": -0.61328125, | |
| "logits/rejected": -0.71875, | |
| "logps/chosen": -1048.0, | |
| "logps/rejected": -1512.0, | |
| "loss": 0.0179, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -7.0625, | |
| "rewards/margins": 5.21875, | |
| "rewards/rejected": -12.25, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8423236514522823, | |
| "grad_norm": 1.413587585861375, | |
| "learning_rate": 4.198199639302152e-09, | |
| "logits/chosen": -0.6171875, | |
| "logits/rejected": -0.671875, | |
| "logps/chosen": -1032.0, | |
| "logps/rejected": -1888.0, | |
| "loss": 0.0164, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.375, | |
| "rewards/margins": 8.375, | |
| "rewards/rejected": -15.75, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.863070539419087, | |
| "grad_norm": 2.2560390766041793, | |
| "learning_rate": 3.1682802238362506e-09, | |
| "logits/chosen": -0.625, | |
| "logits/rejected": -0.69921875, | |
| "logps/chosen": -1104.0, | |
| "logps/rejected": -1992.0, | |
| "loss": 0.0077, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.75, | |
| "rewards/margins": 9.0, | |
| "rewards/rejected": -16.75, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.883817427385892, | |
| "grad_norm": 2.6052476361718147, | |
| "learning_rate": 2.2822820901060025e-09, | |
| "logits/chosen": -0.76953125, | |
| "logits/rejected": -0.69140625, | |
| "logps/chosen": -1224.0, | |
| "logps/rejected": -1768.0, | |
| "loss": 0.0111, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.3125, | |
| "rewards/margins": 7.46875, | |
| "rewards/rejected": -14.8125, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.904564315352697, | |
| "grad_norm": 0.8393040331914429, | |
| "learning_rate": 1.5407218410307398e-09, | |
| "logits/chosen": -0.765625, | |
| "logits/rejected": -0.79296875, | |
| "logps/chosen": -1136.0, | |
| "logps/rejected": -1656.0, | |
| "loss": 0.0104, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.375, | |
| "rewards/margins": 4.84375, | |
| "rewards/rejected": -13.1875, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.9253112033195023, | |
| "grad_norm": 3.1380931604937596, | |
| "learning_rate": 9.440318614823417e-10, | |
| "logits/chosen": -0.62109375, | |
| "logits/rejected": -0.62109375, | |
| "logps/chosen": -972.0, | |
| "logps/rejected": -1560.0, | |
| "loss": 0.0148, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -6.96875, | |
| "rewards/margins": 6.21875, | |
| "rewards/rejected": -13.1875, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.9460580912863072, | |
| "grad_norm": 1.4055560802364486, | |
| "learning_rate": 4.925600661726537e-10, | |
| "logits/chosen": -0.5859375, | |
| "logits/rejected": -0.734375, | |
| "logps/chosen": -1096.0, | |
| "logps/rejected": -1984.0, | |
| "loss": 0.013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.21875, | |
| "rewards/margins": 9.1875, | |
| "rewards/rejected": -16.375, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.966804979253112, | |
| "grad_norm": 1.2654931032152357, | |
| "learning_rate": 1.8656969679323176e-10, | |
| "logits/chosen": -0.78515625, | |
| "logits/rejected": -0.84375, | |
| "logps/chosen": -1208.0, | |
| "logps/rejected": -1864.0, | |
| "loss": 0.0111, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.53125, | |
| "rewards/margins": 8.0, | |
| "rewards/rejected": -15.5, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.987551867219917, | |
| "grad_norm": 1.055552542271011, | |
| "learning_rate": 2.6239168525898915e-11, | |
| "logits/chosen": -0.73828125, | |
| "logits/rejected": -0.59765625, | |
| "logps/chosen": -1224.0, | |
| "logps/rejected": -1792.0, | |
| "loss": 0.0109, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.125, | |
| "rewards/margins": 7.1875, | |
| "rewards/rejected": -15.3125, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1446, | |
| "total_flos": 0.0, | |
| "train_loss": 0.21590606526138048, | |
| "train_runtime": 112848.5516, | |
| "train_samples_per_second": 0.819, | |
| "train_steps_per_second": 0.013 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1446, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |