| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9973828840617638, | |
| "eval_steps": 500, | |
| "global_step": 954, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "diff_generated": 0.0, | |
| "epoch": 0.002093692750588851, | |
| "grad_norm": 4027.4986845337753, | |
| "learning_rate": 2.083333333333333e-08, | |
| "logits/chosen": -2.1441590785980225, | |
| "logits/rejected": -2.0543735027313232, | |
| "logps/chosen": -276.82366943359375, | |
| "logps/rejected": -131.32485961914062, | |
| "loss": 140.2437, | |
| "losses_ref": -131.32485961914062, | |
| "ref_logps/chosen": -276.82366943359375, | |
| "ref_logps/rejected": -131.32485961914062, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1, | |
| "u": 1.4901161193847656e-08, | |
| "weight": 1.0 | |
| }, | |
| { | |
| "diff_generated": 0.004567362368106842, | |
| "epoch": 0.010468463752944255, | |
| "grad_norm": 4012.8373505662616, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logits/chosen": -2.2097952365875244, | |
| "logits/rejected": -2.1078758239746094, | |
| "logps/chosen": -280.6259460449219, | |
| "logps/rejected": -162.3510284423828, | |
| "loss": 129.4337, | |
| "losses_ref": -163.54556274414062, | |
| "ref_logps/chosen": -280.68133544921875, | |
| "ref_logps/rejected": -162.3555908203125, | |
| "rewards/accuracies": 0.43359375, | |
| "rewards/chosen": 0.000553958467207849, | |
| "rewards/margins": 0.0005082848947495222, | |
| "rewards/rejected": 4.567361975205131e-05, | |
| "step": 5, | |
| "u": 0.01998738758265972, | |
| "weight": 1.0011132955551147 | |
| }, | |
| { | |
| "diff_generated": -0.883712887763977, | |
| "epoch": 0.02093692750588851, | |
| "grad_norm": 3617.405413942307, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -2.355677843093872, | |
| "logits/rejected": -2.1583828926086426, | |
| "logps/chosen": -302.09747314453125, | |
| "logps/rejected": -169.69467163085938, | |
| "loss": 157.3847, | |
| "losses_ref": -137.87350463867188, | |
| "ref_logps/chosen": -302.58917236328125, | |
| "ref_logps/rejected": -168.81094360351562, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.004917326383292675, | |
| "rewards/margins": 0.01375445444136858, | |
| "rewards/rejected": -0.00883712898939848, | |
| "step": 10, | |
| "u": -0.573723316192627, | |
| "weight": 0.8237913250923157 | |
| }, | |
| { | |
| "diff_generated": -3.757080078125, | |
| "epoch": 0.031405391258832765, | |
| "grad_norm": 3487.9086553330885, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -2.285557270050049, | |
| "logits/rejected": -2.1396851539611816, | |
| "logps/chosen": -299.9487609863281, | |
| "logps/rejected": -166.72817993164062, | |
| "loss": 215.9423, | |
| "losses_ref": -61.32612991333008, | |
| "ref_logps/chosen": -304.54766845703125, | |
| "ref_logps/rejected": -162.97108459472656, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 0.0459887757897377, | |
| "rewards/margins": 0.08355957269668579, | |
| "rewards/rejected": -0.03757079690694809, | |
| "step": 15, | |
| "u": -1.074953317642212, | |
| "weight": 0.4649723172187805 | |
| }, | |
| { | |
| "diff_generated": -13.927907943725586, | |
| "epoch": 0.04187385501177702, | |
| "grad_norm": 2892.3287332168984, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -2.246702194213867, | |
| "logits/rejected": -2.1279449462890625, | |
| "logps/chosen": -267.1871337890625, | |
| "logps/rejected": -170.03897094726562, | |
| "loss": 233.0012, | |
| "losses_ref": -32.27024459838867, | |
| "ref_logps/chosen": -283.3597106933594, | |
| "ref_logps/rejected": -156.11105346679688, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.16172581911087036, | |
| "rewards/margins": 0.30100491642951965, | |
| "rewards/rejected": -0.1392790973186493, | |
| "step": 20, | |
| "u": -0.5478723049163818, | |
| "weight": 0.3134520649909973 | |
| }, | |
| { | |
| "diff_generated": -26.503625869750977, | |
| "epoch": 0.05234231876472128, | |
| "grad_norm": 2024.707131865886, | |
| "learning_rate": 5.208333333333334e-07, | |
| "logits/chosen": -2.209564447402954, | |
| "logits/rejected": -2.0659689903259277, | |
| "logps/chosen": -255.67092895507812, | |
| "logps/rejected": -183.784423828125, | |
| "loss": 225.1278, | |
| "losses_ref": -30.188289642333984, | |
| "ref_logps/chosen": -280.2396545410156, | |
| "ref_logps/rejected": -157.2808074951172, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.245687335729599, | |
| "rewards/margins": 0.5107235908508301, | |
| "rewards/rejected": -0.2650362551212311, | |
| "step": 25, | |
| "u": 0.18020522594451904, | |
| "weight": 0.2832922041416168 | |
| }, | |
| { | |
| "diff_generated": -51.14558792114258, | |
| "epoch": 0.06281078251766553, | |
| "grad_norm": 1518.1810592262389, | |
| "learning_rate": 6.249999999999999e-07, | |
| "logits/chosen": -2.2818737030029297, | |
| "logits/rejected": -2.199028968811035, | |
| "logps/chosen": -243.2410888671875, | |
| "logps/rejected": -215.5212860107422, | |
| "loss": 229.1218, | |
| "losses_ref": -20.79702377319336, | |
| "ref_logps/chosen": -273.4181823730469, | |
| "ref_logps/rejected": -164.37570190429688, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": 0.30177104473114014, | |
| "rewards/margins": 0.8132268786430359, | |
| "rewards/rejected": -0.5114558935165405, | |
| "step": 30, | |
| "u": -0.08460383862257004, | |
| "weight": 0.19152367115020752 | |
| }, | |
| { | |
| "diff_generated": -66.31632995605469, | |
| "epoch": 0.07327924627060979, | |
| "grad_norm": 1482.6172349050332, | |
| "learning_rate": 7.291666666666666e-07, | |
| "logits/chosen": -2.2653889656066895, | |
| "logits/rejected": -2.1242835521698, | |
| "logps/chosen": -249.3292999267578, | |
| "logps/rejected": -223.139892578125, | |
| "loss": 228.9043, | |
| "losses_ref": -19.583892822265625, | |
| "ref_logps/chosen": -282.82373046875, | |
| "ref_logps/rejected": -156.8235626220703, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.3349445164203644, | |
| "rewards/margins": 0.9981077909469604, | |
| "rewards/rejected": -0.6631633043289185, | |
| "step": 35, | |
| "u": 0.06723131239414215, | |
| "weight": 0.2029893398284912 | |
| }, | |
| { | |
| "diff_generated": -101.70452880859375, | |
| "epoch": 0.08374771002355404, | |
| "grad_norm": 1747.512023088969, | |
| "learning_rate": 8.333333333333333e-07, | |
| "logits/chosen": -2.109070062637329, | |
| "logits/rejected": -2.079871654510498, | |
| "logps/chosen": -237.7236328125, | |
| "logps/rejected": -262.9115905761719, | |
| "loss": 238.8995, | |
| "losses_ref": -15.8267822265625, | |
| "ref_logps/chosen": -272.7063903808594, | |
| "ref_logps/rejected": -161.20706176757812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.34982770681381226, | |
| "rewards/margins": 1.366873025894165, | |
| "rewards/rejected": -1.017045259475708, | |
| "step": 40, | |
| "u": -1.1587042808532715, | |
| "weight": 0.09851591289043427 | |
| }, | |
| { | |
| "diff_generated": -117.0851058959961, | |
| "epoch": 0.0942161737764983, | |
| "grad_norm": 1667.7557707134451, | |
| "learning_rate": 9.374999999999999e-07, | |
| "logits/chosen": -2.20316219329834, | |
| "logits/rejected": -2.008223295211792, | |
| "logps/chosen": -257.76983642578125, | |
| "logps/rejected": -278.8745422363281, | |
| "loss": 239.9967, | |
| "losses_ref": -20.097864151000977, | |
| "ref_logps/chosen": -293.736083984375, | |
| "ref_logps/rejected": -161.78945922851562, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.3596626818180084, | |
| "rewards/margins": 1.530513882637024, | |
| "rewards/rejected": -1.1708511114120483, | |
| "step": 45, | |
| "u": -0.12792688608169556, | |
| "weight": 0.16130205988883972 | |
| }, | |
| { | |
| "diff_generated": -126.9466781616211, | |
| "epoch": 0.10468463752944256, | |
| "grad_norm": 1521.2094097818665, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "logits/chosen": -2.1982452869415283, | |
| "logits/rejected": -2.1284544467926025, | |
| "logps/chosen": -232.5095977783203, | |
| "logps/rejected": -295.5307922363281, | |
| "loss": 224.3866, | |
| "losses_ref": -21.150318145751953, | |
| "ref_logps/chosen": -270.96405029296875, | |
| "ref_logps/rejected": -168.58413696289062, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.38454434275627136, | |
| "rewards/margins": 1.6540111303329468, | |
| "rewards/rejected": -1.2694666385650635, | |
| "step": 50, | |
| "u": 0.001223707222379744, | |
| "weight": 0.18796880543231964 | |
| }, | |
| { | |
| "diff_generated": -141.50799560546875, | |
| "epoch": 0.11515310128238682, | |
| "grad_norm": 1612.8192197434123, | |
| "learning_rate": 1.1458333333333333e-06, | |
| "logits/chosen": -2.0737013816833496, | |
| "logits/rejected": -1.9873807430267334, | |
| "logps/chosen": -239.891357421875, | |
| "logps/rejected": -311.09619140625, | |
| "loss": 220.8677, | |
| "losses_ref": -7.660050392150879, | |
| "ref_logps/chosen": -280.08502197265625, | |
| "ref_logps/rejected": -169.58819580078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.40193670988082886, | |
| "rewards/margins": 1.8170166015625, | |
| "rewards/rejected": -1.4150798320770264, | |
| "step": 55, | |
| "u": -0.9630683660507202, | |
| "weight": 0.08691856265068054 | |
| }, | |
| { | |
| "diff_generated": -137.93148803710938, | |
| "epoch": 0.12562156503533106, | |
| "grad_norm": 1372.8553226775107, | |
| "learning_rate": 1.2499999999999999e-06, | |
| "logits/chosen": -1.9770643711090088, | |
| "logits/rejected": -1.8704265356063843, | |
| "logps/chosen": -242.3487091064453, | |
| "logps/rejected": -295.7236633300781, | |
| "loss": 226.417, | |
| "losses_ref": -8.987265586853027, | |
| "ref_logps/chosen": -281.4112548828125, | |
| "ref_logps/rejected": -157.79214477539062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.39062565565109253, | |
| "rewards/margins": 1.7699406147003174, | |
| "rewards/rejected": -1.37931489944458, | |
| "step": 60, | |
| "u": -0.9851242303848267, | |
| "weight": 0.08782722800970078 | |
| }, | |
| { | |
| "diff_generated": -155.2223358154297, | |
| "epoch": 0.1360900287882753, | |
| "grad_norm": 1255.5204766616016, | |
| "learning_rate": 1.3541666666666667e-06, | |
| "logits/chosen": -1.9109680652618408, | |
| "logits/rejected": -1.800903081893921, | |
| "logps/chosen": -251.7116241455078, | |
| "logps/rejected": -313.6351318359375, | |
| "loss": 226.6359, | |
| "losses_ref": -6.898039817810059, | |
| "ref_logps/chosen": -291.105224609375, | |
| "ref_logps/rejected": -158.41278076171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3939359784126282, | |
| "rewards/margins": 1.9461593627929688, | |
| "rewards/rejected": -1.5522234439849854, | |
| "step": 65, | |
| "u": -1.2434440851211548, | |
| "weight": 0.07695779949426651 | |
| }, | |
| { | |
| "diff_generated": -131.47259521484375, | |
| "epoch": 0.14655849254121958, | |
| "grad_norm": 1343.9563405956512, | |
| "learning_rate": 1.4583333333333333e-06, | |
| "logits/chosen": -1.8604061603546143, | |
| "logits/rejected": -1.8694736957550049, | |
| "logps/chosen": -233.1003875732422, | |
| "logps/rejected": -294.2840881347656, | |
| "loss": 225.3943, | |
| "losses_ref": -10.19434642791748, | |
| "ref_logps/chosen": -274.62811279296875, | |
| "ref_logps/rejected": -162.8114776611328, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.41527730226516724, | |
| "rewards/margins": 1.7300033569335938, | |
| "rewards/rejected": -1.3147261142730713, | |
| "step": 70, | |
| "u": -1.253035545349121, | |
| "weight": 0.09121803939342499 | |
| }, | |
| { | |
| "diff_generated": -137.2784423828125, | |
| "epoch": 0.15702695629416383, | |
| "grad_norm": 1322.5353000176865, | |
| "learning_rate": 1.5624999999999999e-06, | |
| "logits/chosen": -1.800450086593628, | |
| "logits/rejected": -1.649074912071228, | |
| "logps/chosen": -263.3216247558594, | |
| "logps/rejected": -309.0770568847656, | |
| "loss": 233.1299, | |
| "losses_ref": -10.170949935913086, | |
| "ref_logps/chosen": -306.6936950683594, | |
| "ref_logps/rejected": -171.79859924316406, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4337209165096283, | |
| "rewards/margins": 1.8065054416656494, | |
| "rewards/rejected": -1.3727843761444092, | |
| "step": 75, | |
| "u": -1.3111217021942139, | |
| "weight": 0.08406667411327362 | |
| }, | |
| { | |
| "diff_generated": -128.09861755371094, | |
| "epoch": 0.16749542004710807, | |
| "grad_norm": 1343.7990825981688, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": -1.6293904781341553, | |
| "logits/rejected": -1.653552770614624, | |
| "logps/chosen": -211.403564453125, | |
| "logps/rejected": -288.49102783203125, | |
| "loss": 223.2768, | |
| "losses_ref": -5.209358215332031, | |
| "ref_logps/chosen": -253.810302734375, | |
| "ref_logps/rejected": -160.39239501953125, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.42406734824180603, | |
| "rewards/margins": 1.7050535678863525, | |
| "rewards/rejected": -1.2809861898422241, | |
| "step": 80, | |
| "u": -1.129665732383728, | |
| "weight": 0.05663755536079407 | |
| }, | |
| { | |
| "diff_generated": -135.20687866210938, | |
| "epoch": 0.17796388380005235, | |
| "grad_norm": 1192.5534502285198, | |
| "learning_rate": 1.7708333333333332e-06, | |
| "logits/chosen": -1.573900818824768, | |
| "logits/rejected": -1.4756534099578857, | |
| "logps/chosen": -239.03305053710938, | |
| "logps/rejected": -300.70184326171875, | |
| "loss": 223.0021, | |
| "losses_ref": -7.026658535003662, | |
| "ref_logps/chosen": -282.1534423828125, | |
| "ref_logps/rejected": -165.49496459960938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.43120384216308594, | |
| "rewards/margins": 1.7832725048065186, | |
| "rewards/rejected": -1.3520687818527222, | |
| "step": 85, | |
| "u": -0.8892000317573547, | |
| "weight": 0.074161596596241 | |
| }, | |
| { | |
| "diff_generated": -148.8050994873047, | |
| "epoch": 0.1884323475529966, | |
| "grad_norm": 1417.0310516206605, | |
| "learning_rate": 1.8749999999999998e-06, | |
| "logits/chosen": -1.3538436889648438, | |
| "logits/rejected": -1.2718507051467896, | |
| "logps/chosen": -234.74856567382812, | |
| "logps/rejected": -304.4095458984375, | |
| "loss": 232.0073, | |
| "losses_ref": -11.248689651489258, | |
| "ref_logps/chosen": -279.6741638183594, | |
| "ref_logps/rejected": -155.60443115234375, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4492563307285309, | |
| "rewards/margins": 1.937307596206665, | |
| "rewards/rejected": -1.488051176071167, | |
| "step": 90, | |
| "u": -1.1423507928848267, | |
| "weight": 0.08043224364519119 | |
| }, | |
| { | |
| "diff_generated": -148.7802276611328, | |
| "epoch": 0.19890081130594087, | |
| "grad_norm": 1432.3504082681623, | |
| "learning_rate": 1.9791666666666666e-06, | |
| "logits/chosen": -1.1082611083984375, | |
| "logits/rejected": -1.0555765628814697, | |
| "logps/chosen": -235.3373565673828, | |
| "logps/rejected": -309.65771484375, | |
| "loss": 219.1082, | |
| "losses_ref": -13.706560134887695, | |
| "ref_logps/chosen": -277.9019470214844, | |
| "ref_logps/rejected": -160.87747192382812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4256461262702942, | |
| "rewards/margins": 1.9134483337402344, | |
| "rewards/rejected": -1.487802267074585, | |
| "step": 95, | |
| "u": -1.0568161010742188, | |
| "weight": 0.09002764523029327 | |
| }, | |
| { | |
| "diff_generated": -158.8511962890625, | |
| "epoch": 0.2093692750588851, | |
| "grad_norm": 1374.8147610024293, | |
| "learning_rate": 1.9998927475076105e-06, | |
| "logits/chosen": -0.9869598150253296, | |
| "logits/rejected": -0.8535524606704712, | |
| "logps/chosen": -238.96426391601562, | |
| "logps/rejected": -322.4727783203125, | |
| "loss": 236.8658, | |
| "losses_ref": -5.802731513977051, | |
| "ref_logps/chosen": -282.0462951660156, | |
| "ref_logps/rejected": -163.62156677246094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.43082040548324585, | |
| "rewards/margins": 2.0193324089050293, | |
| "rewards/rejected": -1.5885119438171387, | |
| "step": 100, | |
| "u": -1.2527328729629517, | |
| "weight": 0.06292165815830231 | |
| }, | |
| { | |
| "diff_generated": -147.18008422851562, | |
| "epoch": 0.21983773881182936, | |
| "grad_norm": 1625.9248559762682, | |
| "learning_rate": 1.9994570736865402e-06, | |
| "logits/chosen": -1.07206392288208, | |
| "logits/rejected": -0.9393303990364075, | |
| "logps/chosen": -232.5029296875, | |
| "logps/rejected": -308.7837829589844, | |
| "loss": 213.8591, | |
| "losses_ref": -10.191104888916016, | |
| "ref_logps/chosen": -275.3525390625, | |
| "ref_logps/rejected": -161.60366821289062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4284963011741638, | |
| "rewards/margins": 1.9002971649169922, | |
| "rewards/rejected": -1.4718010425567627, | |
| "step": 105, | |
| "u": -1.0033910274505615, | |
| "weight": 0.10204311460256577 | |
| }, | |
| { | |
| "diff_generated": -128.29922485351562, | |
| "epoch": 0.23030620256477363, | |
| "grad_norm": 1231.2639002533556, | |
| "learning_rate": 1.9986864211644068e-06, | |
| "logits/chosen": -1.1658036708831787, | |
| "logits/rejected": -1.0709865093231201, | |
| "logps/chosen": -231.3977813720703, | |
| "logps/rejected": -283.1410217285156, | |
| "loss": 246.1861, | |
| "losses_ref": -6.052565574645996, | |
| "ref_logps/chosen": -272.9906921386719, | |
| "ref_logps/rejected": -154.841796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.41592931747436523, | |
| "rewards/margins": 1.6989214420318604, | |
| "rewards/rejected": -1.2829921245574951, | |
| "step": 110, | |
| "u": -1.3139088153839111, | |
| "weight": 0.07522980868816376 | |
| }, | |
| { | |
| "diff_generated": -133.98553466796875, | |
| "epoch": 0.24077466631771788, | |
| "grad_norm": 1343.0801296451152, | |
| "learning_rate": 1.997581048233623e-06, | |
| "logits/chosen": -1.1396609544754028, | |
| "logits/rejected": -1.1306806802749634, | |
| "logps/chosen": -226.9049835205078, | |
| "logps/rejected": -293.1982421875, | |
| "loss": 230.2171, | |
| "losses_ref": -5.637959957122803, | |
| "ref_logps/chosen": -269.8221130371094, | |
| "ref_logps/rejected": -159.2126922607422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4291713833808899, | |
| "rewards/margins": 1.769026756286621, | |
| "rewards/rejected": -1.3398553133010864, | |
| "step": 115, | |
| "u": -1.168405294418335, | |
| "weight": 0.05913761258125305 | |
| }, | |
| { | |
| "diff_generated": -123.33839416503906, | |
| "epoch": 0.2512431300706621, | |
| "grad_norm": 1434.3497350520097, | |
| "learning_rate": 1.9961413253717214e-06, | |
| "logits/chosen": -1.5746419429779053, | |
| "logits/rejected": -1.518913984298706, | |
| "logps/chosen": -228.5142822265625, | |
| "logps/rejected": -284.6359558105469, | |
| "loss": 234.8627, | |
| "losses_ref": -9.012969017028809, | |
| "ref_logps/chosen": -274.33917236328125, | |
| "ref_logps/rejected": -161.29759216308594, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4582485258579254, | |
| "rewards/margins": 1.6916322708129883, | |
| "rewards/rejected": -1.2333838939666748, | |
| "step": 120, | |
| "u": -0.9588969349861145, | |
| "weight": 0.08884967118501663 | |
| }, | |
| { | |
| "diff_generated": -151.09429931640625, | |
| "epoch": 0.26171159382360637, | |
| "grad_norm": 1274.213985322993, | |
| "learning_rate": 1.994367735117177e-06, | |
| "logits/chosen": -1.6689637899398804, | |
| "logits/rejected": -1.6743271350860596, | |
| "logps/chosen": -216.779541015625, | |
| "logps/rejected": -306.51861572265625, | |
| "loss": 226.4779, | |
| "losses_ref": -6.019095420837402, | |
| "ref_logps/chosen": -259.2029724121094, | |
| "ref_logps/rejected": -155.42433166503906, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.42423415184020996, | |
| "rewards/margins": 1.9351768493652344, | |
| "rewards/rejected": -1.5109429359436035, | |
| "step": 125, | |
| "u": -1.222081184387207, | |
| "weight": 0.08297105878591537 | |
| }, | |
| { | |
| "diff_generated": -161.22811889648438, | |
| "epoch": 0.2721800575765506, | |
| "grad_norm": 1337.1173679216238, | |
| "learning_rate": 1.992260871907687e-06, | |
| "logits/chosen": -1.5299973487854004, | |
| "logits/rejected": -1.4785773754119873, | |
| "logps/chosen": -239.4655303955078, | |
| "logps/rejected": -327.7781982421875, | |
| "loss": 242.8888, | |
| "losses_ref": -7.182534694671631, | |
| "ref_logps/chosen": -280.188720703125, | |
| "ref_logps/rejected": -166.550048828125, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.407231867313385, | |
| "rewards/margins": 2.019512891769409, | |
| "rewards/rejected": -1.612281084060669, | |
| "step": 130, | |
| "u": -1.2559138536453247, | |
| "weight": 0.05781525373458862 | |
| }, | |
| { | |
| "diff_generated": -169.7267303466797, | |
| "epoch": 0.2826485213294949, | |
| "grad_norm": 1374.1488593321894, | |
| "learning_rate": 1.9898214418809326e-06, | |
| "logits/chosen": -1.3805739879608154, | |
| "logits/rejected": -1.3600701093673706, | |
| "logps/chosen": -238.9783935546875, | |
| "logps/rejected": -343.4627380371094, | |
| "loss": 242.9051, | |
| "losses_ref": -2.127274990081787, | |
| "ref_logps/chosen": -281.3921203613281, | |
| "ref_logps/rejected": -173.73602294921875, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4241371750831604, | |
| "rewards/margins": 2.1214041709899902, | |
| "rewards/rejected": -1.6972671747207642, | |
| "step": 135, | |
| "u": -1.7065389156341553, | |
| "weight": 0.033993639051914215 | |
| }, | |
| { | |
| "diff_generated": -151.85092163085938, | |
| "epoch": 0.29311698508243916, | |
| "grad_norm": 1370.477984750469, | |
| "learning_rate": 1.9870502626379126e-06, | |
| "logits/chosen": -1.3134925365447998, | |
| "logits/rejected": -1.3758270740509033, | |
| "logps/chosen": -227.9882049560547, | |
| "logps/rejected": -322.3777770996094, | |
| "loss": 229.547, | |
| "losses_ref": -4.158343315124512, | |
| "ref_logps/chosen": -270.9952392578125, | |
| "ref_logps/rejected": -170.52687072753906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.43007057905197144, | |
| "rewards/margins": 1.9485795497894287, | |
| "rewards/rejected": -1.518509030342102, | |
| "step": 140, | |
| "u": -1.3956022262573242, | |
| "weight": 0.05143100023269653 | |
| }, | |
| { | |
| "diff_generated": -146.50155639648438, | |
| "epoch": 0.3035854488353834, | |
| "grad_norm": 1794.900701079277, | |
| "learning_rate": 1.983948262968915e-06, | |
| "logits/chosen": -1.5504910945892334, | |
| "logits/rejected": -1.4326040744781494, | |
| "logps/chosen": -259.777587890625, | |
| "logps/rejected": -307.3033752441406, | |
| "loss": 242.1811, | |
| "losses_ref": -2.1557910442352295, | |
| "ref_logps/chosen": -302.7044982910156, | |
| "ref_logps/rejected": -160.8018035888672, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.42926883697509766, | |
| "rewards/margins": 1.8942844867706299, | |
| "rewards/rejected": -1.4650156497955322, | |
| "step": 145, | |
| "u": -1.3577892780303955, | |
| "weight": 0.044694624841213226 | |
| }, | |
| { | |
| "diff_generated": -155.41860961914062, | |
| "epoch": 0.31405391258832765, | |
| "grad_norm": 1420.5558411185323, | |
| "learning_rate": 1.9805164825422237e-06, | |
| "logits/chosen": -2.0522618293762207, | |
| "logits/rejected": -1.9478759765625, | |
| "logps/chosen": -238.4119873046875, | |
| "logps/rejected": -314.91790771484375, | |
| "loss": 224.1883, | |
| "losses_ref": -3.6840145587921143, | |
| "ref_logps/chosen": -281.19158935546875, | |
| "ref_logps/rejected": -159.49932861328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4277961254119873, | |
| "rewards/margins": 1.9819822311401367, | |
| "rewards/rejected": -1.5541859865188599, | |
| "step": 150, | |
| "u": -1.3957250118255615, | |
| "weight": 0.05671170353889465 | |
| }, | |
| { | |
| "diff_generated": -151.29141235351562, | |
| "epoch": 0.3245223763412719, | |
| "grad_norm": 1339.4660772749999, | |
| "learning_rate": 1.9767560715556594e-06, | |
| "logits/chosen": -2.201369524002075, | |
| "logits/rejected": -2.1122801303863525, | |
| "logps/chosen": -232.8695831298828, | |
| "logps/rejected": -321.6642150878906, | |
| "loss": 230.8218, | |
| "losses_ref": -4.063229084014893, | |
| "ref_logps/chosen": -279.747314453125, | |
| "ref_logps/rejected": -170.372802734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.46877723932266235, | |
| "rewards/margins": 1.9816913604736328, | |
| "rewards/rejected": -1.5129140615463257, | |
| "step": 155, | |
| "u": -1.4928115606307983, | |
| "weight": 0.05359172821044922 | |
| }, | |
| { | |
| "diff_generated": -154.98220825195312, | |
| "epoch": 0.33499084009421615, | |
| "grad_norm": 1436.3409054374235, | |
| "learning_rate": 1.972668290351084e-06, | |
| "logits/chosen": -2.1720938682556152, | |
| "logits/rejected": -2.0600266456604004, | |
| "logps/chosen": -240.95022583007812, | |
| "logps/rejected": -311.90997314453125, | |
| "loss": 234.915, | |
| "losses_ref": -4.4140777587890625, | |
| "ref_logps/chosen": -289.99774169921875, | |
| "ref_logps/rejected": -156.92776489257812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4904751777648926, | |
| "rewards/margins": 2.040297031402588, | |
| "rewards/rejected": -1.5498219728469849, | |
| "step": 160, | |
| "u": -1.4394853115081787, | |
| "weight": 0.04004598781466484 | |
| }, | |
| { | |
| "diff_generated": -144.861572265625, | |
| "epoch": 0.34545930384716045, | |
| "grad_norm": 1297.629892424431, | |
| "learning_rate": 1.968254508991978e-06, | |
| "logits/chosen": -2.255429267883301, | |
| "logits/rejected": -2.142435073852539, | |
| "logps/chosen": -243.08935546875, | |
| "logps/rejected": -304.804443359375, | |
| "loss": 237.5995, | |
| "losses_ref": -2.3130009174346924, | |
| "ref_logps/chosen": -284.68487548828125, | |
| "ref_logps/rejected": -159.94287109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4159550666809082, | |
| "rewards/margins": 1.8645708560943604, | |
| "rewards/rejected": -1.4486157894134521, | |
| "step": 165, | |
| "u": -1.5542036294937134, | |
| "weight": 0.030019784346222878 | |
| }, | |
| { | |
| "diff_generated": -151.61795043945312, | |
| "epoch": 0.3559277676001047, | |
| "grad_norm": 1297.3953865872961, | |
| "learning_rate": 1.9635162068042544e-06, | |
| "logits/chosen": -2.119171380996704, | |
| "logits/rejected": -2.017618417739868, | |
| "logps/chosen": -247.02041625976562, | |
| "logps/rejected": -313.6037292480469, | |
| "loss": 237.275, | |
| "losses_ref": -6.966467380523682, | |
| "ref_logps/chosen": -288.6535949707031, | |
| "ref_logps/rejected": -161.9857940673828, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": 0.41633161902427673, | |
| "rewards/margins": 1.9325110912322998, | |
| "rewards/rejected": -1.5161794424057007, | |
| "step": 170, | |
| "u": -1.2121031284332275, | |
| "weight": 0.07038909941911697 | |
| }, | |
| { | |
| "diff_generated": -144.2270050048828, | |
| "epoch": 0.36639623135304894, | |
| "grad_norm": 1438.8100283748447, | |
| "learning_rate": 1.958454971880441e-06, | |
| "logits/chosen": -2.147486686706543, | |
| "logits/rejected": -2.0490543842315674, | |
| "logps/chosen": -268.3631591796875, | |
| "logps/rejected": -305.03021240234375, | |
| "loss": 251.9562, | |
| "losses_ref": -5.818743705749512, | |
| "ref_logps/chosen": -313.4308776855469, | |
| "ref_logps/rejected": -160.80323791503906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4506770670413971, | |
| "rewards/margins": 1.8929469585418701, | |
| "rewards/rejected": -1.442270040512085, | |
| "step": 175, | |
| "u": -1.2561050653457642, | |
| "weight": 0.0653764009475708 | |
| }, | |
| { | |
| "diff_generated": -141.9085693359375, | |
| "epoch": 0.3768646951059932, | |
| "grad_norm": 1207.513295077982, | |
| "learning_rate": 1.9530725005474194e-06, | |
| "logits/chosen": -2.267883539199829, | |
| "logits/rejected": -2.218174457550049, | |
| "logps/chosen": -221.9941864013672, | |
| "logps/rejected": -298.5855407714844, | |
| "loss": 221.5628, | |
| "losses_ref": -3.0411601066589355, | |
| "ref_logps/chosen": -264.38067626953125, | |
| "ref_logps/rejected": -156.677001953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4238646924495697, | |
| "rewards/margins": 1.8429502248764038, | |
| "rewards/rejected": -1.4190856218338013, | |
| "step": 180, | |
| "u": -1.1714732646942139, | |
| "weight": 0.05968625098466873 | |
| }, | |
| { | |
| "diff_generated": -150.76657104492188, | |
| "epoch": 0.38733315885893743, | |
| "grad_norm": 1307.7780975566222, | |
| "learning_rate": 1.9473705967978807e-06, | |
| "logits/chosen": -2.420961856842041, | |
| "logits/rejected": -2.327650547027588, | |
| "logps/chosen": -227.6046600341797, | |
| "logps/rejected": -303.7978210449219, | |
| "loss": 229.0799, | |
| "losses_ref": -15.570757865905762, | |
| "ref_logps/chosen": -272.23333740234375, | |
| "ref_logps/rejected": -153.03126525878906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4462866187095642, | |
| "rewards/margins": 1.9539520740509033, | |
| "rewards/rejected": -1.5076655149459839, | |
| "step": 185, | |
| "u": -0.8297923803329468, | |
| "weight": 0.09269951283931732 | |
| }, | |
| { | |
| "diff_generated": -147.6534423828125, | |
| "epoch": 0.39780162261188173, | |
| "grad_norm": 1169.0067686339887, | |
| "learning_rate": 1.941351171685697e-06, | |
| "logits/chosen": -2.2705044746398926, | |
| "logits/rejected": -2.2303287982940674, | |
| "logps/chosen": -229.6949920654297, | |
| "logps/rejected": -316.17437744140625, | |
| "loss": 234.7021, | |
| "losses_ref": -5.174070835113525, | |
| "ref_logps/chosen": -274.26959228515625, | |
| "ref_logps/rejected": -168.52093505859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4457460343837738, | |
| "rewards/margins": 1.9222803115844727, | |
| "rewards/rejected": -1.4765344858169556, | |
| "step": 190, | |
| "u": -1.7719621658325195, | |
| "weight": 0.03358909860253334 | |
| }, | |
| { | |
| "diff_generated": -159.57711791992188, | |
| "epoch": 0.408270086364826, | |
| "grad_norm": 1222.7096009577886, | |
| "learning_rate": 1.9350162426854148e-06, | |
| "logits/chosen": -2.1345176696777344, | |
| "logits/rejected": -2.1815943717956543, | |
| "logps/chosen": -195.1034393310547, | |
| "logps/rejected": -316.82177734375, | |
| "loss": 220.9707, | |
| "losses_ref": -4.031326770782471, | |
| "ref_logps/chosen": -238.08377075195312, | |
| "ref_logps/rejected": -157.2446746826172, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.42980343103408813, | |
| "rewards/margins": 2.025574207305908, | |
| "rewards/rejected": -1.595771074295044, | |
| "step": 195, | |
| "u": -1.546870231628418, | |
| "weight": 0.03703851252794266 | |
| }, | |
| { | |
| "diff_generated": -167.23892211914062, | |
| "epoch": 0.4187385501177702, | |
| "grad_norm": 1368.307859885155, | |
| "learning_rate": 1.9283679330160725e-06, | |
| "logits/chosen": -2.1258459091186523, | |
| "logits/rejected": -2.004584789276123, | |
| "logps/chosen": -238.9210205078125, | |
| "logps/rejected": -331.30718994140625, | |
| "loss": 244.6853, | |
| "losses_ref": -4.569379806518555, | |
| "ref_logps/chosen": -285.3875732421875, | |
| "ref_logps/rejected": -164.0682830810547, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4646654725074768, | |
| "rewards/margins": 2.137054681777954, | |
| "rewards/rejected": -1.6723893880844116, | |
| "step": 200, | |
| "u": -1.6067603826522827, | |
| "weight": 0.04548769071698189 | |
| }, | |
| { | |
| "diff_generated": -156.21780395507812, | |
| "epoch": 0.42920701387071447, | |
| "grad_norm": 1208.8530669692416, | |
| "learning_rate": 1.9214084709295847e-06, | |
| "logits/chosen": -2.0831170082092285, | |
| "logits/rejected": -1.964040756225586, | |
| "logps/chosen": -255.9301300048828, | |
| "logps/rejected": -318.99798583984375, | |
| "loss": 233.3463, | |
| "losses_ref": -5.610936641693115, | |
| "ref_logps/chosen": -300.7832946777344, | |
| "ref_logps/rejected": -162.78021240234375, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.44853147864341736, | |
| "rewards/margins": 2.010709524154663, | |
| "rewards/rejected": -1.5621780157089233, | |
| "step": 205, | |
| "u": -1.3661489486694336, | |
| "weight": 0.06516700237989426 | |
| }, | |
| { | |
| "diff_generated": -171.98703002929688, | |
| "epoch": 0.4396754776236587, | |
| "grad_norm": 1215.8559114876498, | |
| "learning_rate": 1.9141401889639164e-06, | |
| "logits/chosen": -1.9906151294708252, | |
| "logits/rejected": -1.9088771343231201, | |
| "logps/chosen": -235.02249145507812, | |
| "logps/rejected": -345.1544494628906, | |
| "loss": 234.6928, | |
| "losses_ref": -2.863798141479492, | |
| "ref_logps/chosen": -280.8175048828125, | |
| "ref_logps/rejected": -173.16738891601562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.45795029401779175, | |
| "rewards/margins": 2.1778206825256348, | |
| "rewards/rejected": -1.7198702096939087, | |
| "step": 210, | |
| "u": -1.4222519397735596, | |
| "weight": 0.044259898364543915 | |
| }, | |
| { | |
| "diff_generated": -168.92660522460938, | |
| "epoch": 0.45014394137660296, | |
| "grad_norm": 1266.497741976898, | |
| "learning_rate": 1.906565523161312e-06, | |
| "logits/chosen": -1.9987051486968994, | |
| "logits/rejected": -1.9987319707870483, | |
| "logps/chosen": -227.54159545898438, | |
| "logps/rejected": -331.20281982421875, | |
| "loss": 227.5447, | |
| "losses_ref": -2.0428645610809326, | |
| "ref_logps/chosen": -272.03076171875, | |
| "ref_logps/rejected": -162.27622985839844, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4448915421962738, | |
| "rewards/margins": 2.134157657623291, | |
| "rewards/rejected": -1.6892658472061157, | |
| "step": 215, | |
| "u": -1.699721097946167, | |
| "weight": 0.028461579233407974 | |
| }, | |
| { | |
| "diff_generated": -181.3323211669922, | |
| "epoch": 0.46061240512954726, | |
| "grad_norm": 1409.5627230630107, | |
| "learning_rate": 1.8986870122518259e-06, | |
| "logits/chosen": -1.996578574180603, | |
| "logits/rejected": -1.9339357614517212, | |
| "logps/chosen": -241.12069702148438, | |
| "logps/rejected": -345.39239501953125, | |
| "loss": 250.5986, | |
| "losses_ref": -13.413454055786133, | |
| "ref_logps/chosen": -284.3638610839844, | |
| "ref_logps/rejected": -164.06004333496094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.43243154883384705, | |
| "rewards/margins": 2.2457549571990967, | |
| "rewards/rejected": -1.8133233785629272, | |
| "step": 220, | |
| "u": -1.559470295906067, | |
| "weight": 0.03921313211321831 | |
| }, | |
| { | |
| "diff_generated": -167.23196411132812, | |
| "epoch": 0.4710808688824915, | |
| "grad_norm": 1439.3131066005014, | |
| "learning_rate": 1.8905072968024423e-06, | |
| "logits/chosen": -2.0085692405700684, | |
| "logits/rejected": -1.9212806224822998, | |
| "logps/chosen": -240.53793334960938, | |
| "logps/rejected": -324.13519287109375, | |
| "loss": 229.6424, | |
| "losses_ref": -2.6123085021972656, | |
| "ref_logps/chosen": -288.477783203125, | |
| "ref_logps/rejected": -156.90321350097656, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.47939810156822205, | |
| "rewards/margins": 2.1517176628112793, | |
| "rewards/rejected": -1.6723196506500244, | |
| "step": 225, | |
| "u": -1.7230523824691772, | |
| "weight": 0.04574074223637581 | |
| }, | |
| { | |
| "diff_generated": -159.5584259033203, | |
| "epoch": 0.48154933263543576, | |
| "grad_norm": 1268.731805706848, | |
| "learning_rate": 1.88202911833206e-06, | |
| "logits/chosen": -2.006537914276123, | |
| "logits/rejected": -2.0306971073150635, | |
| "logps/chosen": -209.113037109375, | |
| "logps/rejected": -324.5091552734375, | |
| "loss": 221.1728, | |
| "losses_ref": -2.3901400566101074, | |
| "ref_logps/chosen": -255.0234832763672, | |
| "ref_logps/rejected": -164.95074462890625, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.459104061126709, | |
| "rewards/margins": 2.0546882152557373, | |
| "rewards/rejected": -1.5955842733383179, | |
| "step": 230, | |
| "u": -1.3925855159759521, | |
| "weight": 0.045756690204143524 | |
| }, | |
| { | |
| "diff_generated": -170.58221435546875, | |
| "epoch": 0.49201779638838, | |
| "grad_norm": 1315.789025978012, | |
| "learning_rate": 1.873255318392644e-06, | |
| "logits/chosen": -1.9995191097259521, | |
| "logits/rejected": -1.8898826837539673, | |
| "logps/chosen": -234.0719757080078, | |
| "logps/rejected": -327.0367736816406, | |
| "loss": 242.3326, | |
| "losses_ref": -4.473931312561035, | |
| "ref_logps/chosen": -280.68048095703125, | |
| "ref_logps/rejected": -156.4545440673828, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.46608513593673706, | |
| "rewards/margins": 2.1719069480895996, | |
| "rewards/rejected": -1.7058223485946655, | |
| "step": 235, | |
| "u": -1.6257721185684204, | |
| "weight": 0.034325193613767624 | |
| }, | |
| { | |
| "diff_generated": -163.37722778320312, | |
| "epoch": 0.5024862601413242, | |
| "grad_norm": 1285.4823648929914, | |
| "learning_rate": 1.8641888376168483e-06, | |
| "logits/chosen": -1.9665982723236084, | |
| "logits/rejected": -1.9548044204711914, | |
| "logps/chosen": -215.7754669189453, | |
| "logps/rejected": -326.5556335449219, | |
| "loss": 231.7613, | |
| "losses_ref": -5.584181308746338, | |
| "ref_logps/chosen": -260.7419128417969, | |
| "ref_logps/rejected": -163.17840576171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.44966477155685425, | |
| "rewards/margins": 2.083436965942383, | |
| "rewards/rejected": -1.6337722539901733, | |
| "step": 240, | |
| "u": -1.2691129446029663, | |
| "weight": 0.0609821155667305 | |
| }, | |
| { | |
| "diff_generated": -147.24386596679688, | |
| "epoch": 0.5129547238942685, | |
| "grad_norm": 1347.3156065591786, | |
| "learning_rate": 1.8548327147324312e-06, | |
| "logits/chosen": -1.9906165599822998, | |
| "logits/rejected": -1.872373342514038, | |
| "logps/chosen": -243.5879364013672, | |
| "logps/rejected": -304.78204345703125, | |
| "loss": 236.4194, | |
| "losses_ref": -7.212074279785156, | |
| "ref_logps/chosen": -291.9618835449219, | |
| "ref_logps/rejected": -157.53817749023438, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 0.48373931646347046, | |
| "rewards/margins": 1.9561779499053955, | |
| "rewards/rejected": -1.4724384546279907, | |
| "step": 245, | |
| "u": -0.7504249811172485, | |
| "weight": 0.08246179670095444 | |
| }, | |
| { | |
| "diff_generated": -136.68235778808594, | |
| "epoch": 0.5234231876472127, | |
| "grad_norm": 1280.557570592857, | |
| "learning_rate": 1.8451900855437948e-06, | |
| "logits/chosen": -2.0444495677948, | |
| "logits/rejected": -1.9412866830825806, | |
| "logps/chosen": -237.24496459960938, | |
| "logps/rejected": -305.5830078125, | |
| "loss": 231.6959, | |
| "losses_ref": -4.014006614685059, | |
| "ref_logps/chosen": -285.0312805175781, | |
| "ref_logps/rejected": -168.90065002441406, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.47786277532577515, | |
| "rewards/margins": 1.8446861505508423, | |
| "rewards/rejected": -1.3668235540390015, | |
| "step": 250, | |
| "u": -1.4464961290359497, | |
| "weight": 0.045917607843875885 | |
| }, | |
| { | |
| "diff_generated": -147.02664184570312, | |
| "epoch": 0.533891651400157, | |
| "grad_norm": 1271.960313695608, | |
| "learning_rate": 1.8352641818809846e-06, | |
| "logits/chosen": -2.012394428253174, | |
| "logits/rejected": -1.9293123483657837, | |
| "logps/chosen": -255.23617553710938, | |
| "logps/rejected": -305.11065673828125, | |
| "loss": 237.2504, | |
| "losses_ref": -3.9721827507019043, | |
| "ref_logps/chosen": -298.58929443359375, | |
| "ref_logps/rejected": -158.0840606689453, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.43353086709976196, | |
| "rewards/margins": 1.9037971496582031, | |
| "rewards/rejected": -1.470266342163086, | |
| "step": 255, | |
| "u": -1.2067726850509644, | |
| "weight": 0.04464394599199295 | |
| }, | |
| { | |
| "diff_generated": -150.324462890625, | |
| "epoch": 0.5443601151531012, | |
| "grad_norm": 1323.4761845101339, | |
| "learning_rate": 1.8250583305165094e-06, | |
| "logits/chosen": -1.7699302434921265, | |
| "logits/rejected": -1.7340294122695923, | |
| "logps/chosen": -232.5556640625, | |
| "logps/rejected": -303.0191650390625, | |
| "loss": 236.4857, | |
| "losses_ref": -3.8249027729034424, | |
| "ref_logps/chosen": -277.13360595703125, | |
| "ref_logps/rejected": -152.69473266601562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4457794725894928, | |
| "rewards/margins": 1.949023962020874, | |
| "rewards/rejected": -1.5032446384429932, | |
| "step": 260, | |
| "u": -1.4394437074661255, | |
| "weight": 0.06012386828660965 | |
| }, | |
| { | |
| "diff_generated": -146.3737335205078, | |
| "epoch": 0.5548285789060455, | |
| "grad_norm": 1232.2132266823505, | |
| "learning_rate": 1.8145759520503357e-06, | |
| "logits/chosen": -1.836775541305542, | |
| "logits/rejected": -1.7096904516220093, | |
| "logps/chosen": -242.7677764892578, | |
| "logps/rejected": -308.00592041015625, | |
| "loss": 219.0433, | |
| "losses_ref": -2.2338509559631348, | |
| "ref_logps/chosen": -290.8897705078125, | |
| "ref_logps/rejected": -161.63217163085938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48121970891952515, | |
| "rewards/margins": 1.9449567794799805, | |
| "rewards/rejected": -1.4637373685836792, | |
| "step": 265, | |
| "u": -1.7323522567749023, | |
| "weight": 0.03296298533678055 | |
| }, | |
| { | |
| "diff_generated": -160.1627960205078, | |
| "epoch": 0.5652970426589898, | |
| "grad_norm": 1351.7338122517372, | |
| "learning_rate": 1.803820559763439e-06, | |
| "logits/chosen": -1.7946879863739014, | |
| "logits/rejected": -1.7407840490341187, | |
| "logps/chosen": -215.82290649414062, | |
| "logps/rejected": -316.18743896484375, | |
| "loss": 232.6284, | |
| "losses_ref": -3.786867618560791, | |
| "ref_logps/chosen": -261.61407470703125, | |
| "ref_logps/rejected": -156.02464294433594, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.45791149139404297, | |
| "rewards/margins": 2.059539318084717, | |
| "rewards/rejected": -1.6016279458999634, | |
| "step": 270, | |
| "u": -1.651993751525879, | |
| "weight": 0.04032987728714943 | |
| }, | |
| { | |
| "diff_generated": -142.9796905517578, | |
| "epoch": 0.575765506411934, | |
| "grad_norm": 1181.4870635863472, | |
| "learning_rate": 1.7927957584402895e-06, | |
| "logits/chosen": -1.875299096107483, | |
| "logits/rejected": -1.8068253993988037, | |
| "logps/chosen": -228.66781616210938, | |
| "logps/rejected": -303.5104064941406, | |
| "loss": 224.2237, | |
| "losses_ref": -4.741028308868408, | |
| "ref_logps/chosen": -272.44915771484375, | |
| "ref_logps/rejected": -160.53070068359375, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 0.43781352043151855, | |
| "rewards/margins": 1.8676105737686157, | |
| "rewards/rejected": -1.4297969341278076, | |
| "step": 275, | |
| "u": -1.267141580581665, | |
| "weight": 0.07081650197505951 | |
| }, | |
| { | |
| "diff_generated": -147.29513549804688, | |
| "epoch": 0.5862339701648783, | |
| "grad_norm": 1311.3976945524007, | |
| "learning_rate": 1.78150524316067e-06, | |
| "logits/chosen": -1.9360460042953491, | |
| "logits/rejected": -1.8399826288223267, | |
| "logps/chosen": -244.2842559814453, | |
| "logps/rejected": -319.99603271484375, | |
| "loss": 221.9428, | |
| "losses_ref": -5.114128112792969, | |
| "ref_logps/chosen": -288.6471252441406, | |
| "ref_logps/rejected": -172.7008819580078, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4436289668083191, | |
| "rewards/margins": 1.9165802001953125, | |
| "rewards/rejected": -1.4729512929916382, | |
| "step": 280, | |
| "u": -1.33490788936615, | |
| "weight": 0.05145906284451485 | |
| }, | |
| { | |
| "diff_generated": -165.53073120117188, | |
| "epoch": 0.5967024339178225, | |
| "grad_norm": 1217.7737895640616, | |
| "learning_rate": 1.7699527980612304e-06, | |
| "logits/chosen": -2.008852243423462, | |
| "logits/rejected": -1.865282416343689, | |
| "logps/chosen": -235.48495483398438, | |
| "logps/rejected": -324.86236572265625, | |
| "loss": 237.0448, | |
| "losses_ref": -3.6097474098205566, | |
| "ref_logps/chosen": -281.65557861328125, | |
| "ref_logps/rejected": -159.3316650390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4617062509059906, | |
| "rewards/margins": 2.117013454437256, | |
| "rewards/rejected": -1.655307412147522, | |
| "step": 285, | |
| "u": -1.5152809619903564, | |
| "weight": 0.03953182324767113 | |
| }, | |
| { | |
| "diff_generated": -152.0204315185547, | |
| "epoch": 0.6071708976707668, | |
| "grad_norm": 1365.967274184474, | |
| "learning_rate": 1.758142295067194e-06, | |
| "logits/chosen": -1.9733747243881226, | |
| "logits/rejected": -1.8123550415039062, | |
| "logps/chosen": -253.77774047851562, | |
| "logps/rejected": -316.69073486328125, | |
| "loss": 236.0956, | |
| "losses_ref": -9.393682479858398, | |
| "ref_logps/chosen": -299.4283142089844, | |
| "ref_logps/rejected": -164.6702880859375, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.45650559663772583, | |
| "rewards/margins": 1.9767096042633057, | |
| "rewards/rejected": -1.5202041864395142, | |
| "step": 290, | |
| "u": -1.0198547840118408, | |
| "weight": 0.07342410832643509 | |
| }, | |
| { | |
| "diff_generated": -156.43539428710938, | |
| "epoch": 0.6176393614237111, | |
| "grad_norm": 1274.7347074994798, | |
| "learning_rate": 1.7460776925946416e-06, | |
| "logits/chosen": -2.04952335357666, | |
| "logits/rejected": -1.9772619009017944, | |
| "logps/chosen": -231.12759399414062, | |
| "logps/rejected": -324.58734130859375, | |
| "loss": 216.7738, | |
| "losses_ref": -3.1922709941864014, | |
| "ref_logps/chosen": -275.5738525390625, | |
| "ref_logps/rejected": -168.1519317626953, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4444626271724701, | |
| "rewards/margins": 2.0088164806365967, | |
| "rewards/rejected": -1.5643537044525146, | |
| "step": 295, | |
| "u": -1.7094459533691406, | |
| "weight": 0.027651017531752586 | |
| }, | |
| { | |
| "diff_generated": -166.10739135742188, | |
| "epoch": 0.6281078251766553, | |
| "grad_norm": 1279.289070746857, | |
| "learning_rate": 1.7337630342238039e-06, | |
| "logits/chosen": -2.0860671997070312, | |
| "logits/rejected": -1.9944241046905518, | |
| "logps/chosen": -226.953125, | |
| "logps/rejected": -329.9337158203125, | |
| "loss": 245.769, | |
| "losses_ref": -2.491637706756592, | |
| "ref_logps/chosen": -276.3335266113281, | |
| "ref_logps/rejected": -163.8263397216797, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.49380379915237427, | |
| "rewards/margins": 2.1548776626586914, | |
| "rewards/rejected": -1.6610740423202515, | |
| "step": 300, | |
| "u": -1.8025985956192017, | |
| "weight": 0.020761026069521904 | |
| }, | |
| { | |
| "diff_generated": -160.49281311035156, | |
| "epoch": 0.6385762889295996, | |
| "grad_norm": 1143.8594113545453, | |
| "learning_rate": 1.7212024473438145e-06, | |
| "logits/chosen": -2.1227848529815674, | |
| "logits/rejected": -2.037874698638916, | |
| "logps/chosen": -227.2042694091797, | |
| "logps/rejected": -324.0436096191406, | |
| "loss": 218.3608, | |
| "losses_ref": -5.721261978149414, | |
| "ref_logps/chosen": -275.447265625, | |
| "ref_logps/rejected": -163.55076599121094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4824300706386566, | |
| "rewards/margins": 2.0873584747314453, | |
| "rewards/rejected": -1.6049282550811768, | |
| "step": 305, | |
| "u": -1.5689971446990967, | |
| "weight": 0.03765694424510002 | |
| }, | |
| { | |
| "diff_generated": -165.9134979248047, | |
| "epoch": 0.6490447526825438, | |
| "grad_norm": 1216.910104164249, | |
| "learning_rate": 1.70840014176937e-06, | |
| "logits/chosen": -2.148029327392578, | |
| "logits/rejected": -1.9548304080963135, | |
| "logps/chosen": -259.4276123046875, | |
| "logps/rejected": -335.60723876953125, | |
| "loss": 237.5431, | |
| "losses_ref": -6.571761131286621, | |
| "ref_logps/chosen": -307.9371643066406, | |
| "ref_logps/rejected": -169.69369506835938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48509567975997925, | |
| "rewards/margins": 2.144230604171753, | |
| "rewards/rejected": -1.659135103225708, | |
| "step": 310, | |
| "u": -1.2953577041625977, | |
| "weight": 0.06081492453813553 | |
| }, | |
| { | |
| "diff_generated": -150.7538299560547, | |
| "epoch": 0.6595132164354881, | |
| "grad_norm": 1285.8252216937017, | |
| "learning_rate": 1.6953604083297663e-06, | |
| "logits/chosen": -2.0963034629821777, | |
| "logits/rejected": -2.005828619003296, | |
| "logps/chosen": -238.0185089111328, | |
| "logps/rejected": -313.0700988769531, | |
| "loss": 232.0059, | |
| "losses_ref": -5.998663425445557, | |
| "ref_logps/chosen": -286.41973876953125, | |
| "ref_logps/rejected": -162.3162841796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4840126633644104, | |
| "rewards/margins": 1.9915508031845093, | |
| "rewards/rejected": -1.5075383186340332, | |
| "step": 315, | |
| "u": -1.022328495979309, | |
| "weight": 0.07499580085277557 | |
| }, | |
| { | |
| "diff_generated": -167.22291564941406, | |
| "epoch": 0.6699816801884323, | |
| "grad_norm": 1393.3558242713107, | |
| "learning_rate": 1.6820876174307821e-06, | |
| "logits/chosen": -2.0343525409698486, | |
| "logits/rejected": -1.9958488941192627, | |
| "logps/chosen": -220.11959838867188, | |
| "logps/rejected": -324.1341857910156, | |
| "loss": 235.1374, | |
| "losses_ref": -3.5960795879364014, | |
| "ref_logps/chosen": -265.8931579589844, | |
| "ref_logps/rejected": -156.91128540039062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.45773547887802124, | |
| "rewards/margins": 2.129964590072632, | |
| "rewards/rejected": -1.6722290515899658, | |
| "step": 320, | |
| "u": -1.116194486618042, | |
| "weight": 0.04208649322390556 | |
| }, | |
| { | |
| "diff_generated": -152.4580078125, | |
| "epoch": 0.6804501439413766, | |
| "grad_norm": 1260.574816635609, | |
| "learning_rate": 1.668586217589889e-06, | |
| "logits/chosen": -2.028233051300049, | |
| "logits/rejected": -1.943868637084961, | |
| "logps/chosen": -252.96224975585938, | |
| "logps/rejected": -314.017578125, | |
| "loss": 228.4758, | |
| "losses_ref": -1.9372276067733765, | |
| "ref_logps/chosen": -299.65130615234375, | |
| "ref_logps/rejected": -161.55958557128906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.46689024567604065, | |
| "rewards/margins": 1.9914703369140625, | |
| "rewards/rejected": -1.5245802402496338, | |
| "step": 325, | |
| "u": -1.6558067798614502, | |
| "weight": 0.03156626224517822 | |
| }, | |
| { | |
| "diff_generated": -140.8079376220703, | |
| "epoch": 0.6909186076943209, | |
| "grad_norm": 1201.438854630279, | |
| "learning_rate": 1.6548607339452852e-06, | |
| "logits/chosen": -2.0895023345947266, | |
| "logits/rejected": -2.036318778991699, | |
| "logps/chosen": -216.3995361328125, | |
| "logps/rejected": -303.2993469238281, | |
| "loss": 233.4191, | |
| "losses_ref": -2.161651134490967, | |
| "ref_logps/chosen": -261.6273498535156, | |
| "ref_logps/rejected": -162.49142456054688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4522779583930969, | |
| "rewards/margins": 1.8603572845458984, | |
| "rewards/rejected": -1.4080793857574463, | |
| "step": 330, | |
| "u": -1.8709716796875, | |
| "weight": 0.017609911039471626 | |
| }, | |
| { | |
| "diff_generated": -143.80978393554688, | |
| "epoch": 0.7013870714472651, | |
| "grad_norm": 1237.6054714094937, | |
| "learning_rate": 1.6409157667392455e-06, | |
| "logits/chosen": -2.059278964996338, | |
| "logits/rejected": -1.9892032146453857, | |
| "logps/chosen": -235.5959930419922, | |
| "logps/rejected": -307.551513671875, | |
| "loss": 229.4944, | |
| "losses_ref": -6.860163688659668, | |
| "ref_logps/chosen": -283.805908203125, | |
| "ref_logps/rejected": -163.74172973632812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48209866881370544, | |
| "rewards/margins": 1.920196771621704, | |
| "rewards/rejected": -1.4380979537963867, | |
| "step": 335, | |
| "u": -0.9188238382339478, | |
| "weight": 0.07267802953720093 | |
| }, | |
| { | |
| "diff_generated": -160.1986846923828, | |
| "epoch": 0.7118555352002094, | |
| "grad_norm": 1143.246546308752, | |
| "learning_rate": 1.6267559897763027e-06, | |
| "logits/chosen": -1.8168014287948608, | |
| "logits/rejected": -1.863437294960022, | |
| "logps/chosen": -188.27635192871094, | |
| "logps/rejected": -314.9437561035156, | |
| "loss": 216.8938, | |
| "losses_ref": -1.3188815116882324, | |
| "ref_logps/chosen": -237.00216674804688, | |
| "ref_logps/rejected": -154.74508666992188, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4872584939002991, | |
| "rewards/margins": 2.089244842529297, | |
| "rewards/rejected": -1.6019866466522217, | |
| "step": 340, | |
| "u": -1.303836703300476, | |
| "weight": 0.029538637027144432 | |
| }, | |
| { | |
| "diff_generated": -151.67230224609375, | |
| "epoch": 0.7223239989531536, | |
| "grad_norm": 1267.0562713440388, | |
| "learning_rate": 1.6123861488567708e-06, | |
| "logits/chosen": -1.9331505298614502, | |
| "logits/rejected": -1.7450395822525024, | |
| "logps/chosen": -256.15277099609375, | |
| "logps/rejected": -316.7372131347656, | |
| "loss": 244.0877, | |
| "losses_ref": -2.1836702823638916, | |
| "ref_logps/chosen": -306.53680419921875, | |
| "ref_logps/rejected": -165.06492614746094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5038406848907471, | |
| "rewards/margins": 2.0205636024475098, | |
| "rewards/rejected": -1.5167229175567627, | |
| "step": 345, | |
| "u": -1.485855221748352, | |
| "weight": 0.0375472754240036 | |
| }, | |
| { | |
| "diff_generated": -147.20364379882812, | |
| "epoch": 0.7327924627060979, | |
| "grad_norm": 1350.564919328469, | |
| "learning_rate": 1.5978110601861409e-06, | |
| "logits/chosen": -1.9117012023925781, | |
| "logits/rejected": -1.8668915033340454, | |
| "logps/chosen": -253.0355224609375, | |
| "logps/rejected": -311.43927001953125, | |
| "loss": 240.3254, | |
| "losses_ref": -2.832030773162842, | |
| "ref_logps/chosen": -299.90985107421875, | |
| "ref_logps/rejected": -164.23562622070312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.46874284744262695, | |
| "rewards/margins": 1.9407793283462524, | |
| "rewards/rejected": -1.472036361694336, | |
| "step": 350, | |
| "u": -1.429086685180664, | |
| "weight": 0.04381849616765976 | |
| }, | |
| { | |
| "diff_generated": -152.60690307617188, | |
| "epoch": 0.7432609264590422, | |
| "grad_norm": 1367.6005309235504, | |
| "learning_rate": 1.5830356087608763e-06, | |
| "logits/chosen": -1.887460708618164, | |
| "logits/rejected": -1.8180389404296875, | |
| "logps/chosen": -214.82699584960938, | |
| "logps/rejected": -321.7936096191406, | |
| "loss": 228.0585, | |
| "losses_ref": -1.8199619054794312, | |
| "ref_logps/chosen": -263.9666748046875, | |
| "ref_logps/rejected": -169.18673706054688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.49139684438705444, | |
| "rewards/margins": 2.017465829849243, | |
| "rewards/rejected": -1.526068925857544, | |
| "step": 355, | |
| "u": -1.6218674182891846, | |
| "weight": 0.02579430676996708 | |
| }, | |
| { | |
| "diff_generated": -148.06683349609375, | |
| "epoch": 0.7537293902119864, | |
| "grad_norm": 1346.2814229526575, | |
| "learning_rate": 1.5680647467311555e-06, | |
| "logits/chosen": -1.8571285009384155, | |
| "logits/rejected": -1.7857725620269775, | |
| "logps/chosen": -244.458251953125, | |
| "logps/rejected": -319.65484619140625, | |
| "loss": 223.1362, | |
| "losses_ref": -2.564044237136841, | |
| "ref_logps/chosen": -293.27410888671875, | |
| "ref_logps/rejected": -171.58799743652344, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4881584644317627, | |
| "rewards/margins": 1.9688268899917603, | |
| "rewards/rejected": -1.480668306350708, | |
| "step": 360, | |
| "u": -1.7225738763809204, | |
| "weight": 0.03375329077243805 | |
| }, | |
| { | |
| "diff_generated": -158.3987579345703, | |
| "epoch": 0.7641978539649307, | |
| "grad_norm": 1338.295994157702, | |
| "learning_rate": 1.552903491741107e-06, | |
| "logits/chosen": -1.837961196899414, | |
| "logits/rejected": -1.839646577835083, | |
| "logps/chosen": -230.9562530517578, | |
| "logps/rejected": -320.73455810546875, | |
| "loss": 230.7235, | |
| "losses_ref": -2.363715648651123, | |
| "ref_logps/chosen": -276.13995361328125, | |
| "ref_logps/rejected": -162.33580017089844, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.45183688402175903, | |
| "rewards/margins": 2.0358242988586426, | |
| "rewards/rejected": -1.5839874744415283, | |
| "step": 365, | |
| "u": -1.599321722984314, | |
| "weight": 0.03038620948791504 | |
| }, | |
| { | |
| "diff_generated": -143.7940216064453, | |
| "epoch": 0.7746663177178749, | |
| "grad_norm": 1122.4826063930961, | |
| "learning_rate": 1.5375569252470895e-06, | |
| "logits/chosen": -1.994361162185669, | |
| "logits/rejected": -1.8850581645965576, | |
| "logps/chosen": -266.71722412109375, | |
| "logps/rejected": -306.846923828125, | |
| "loss": 232.9005, | |
| "losses_ref": -7.7454657554626465, | |
| "ref_logps/chosen": -315.1695251464844, | |
| "ref_logps/rejected": -163.05288696289062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48452290892601013, | |
| "rewards/margins": 1.922463059425354, | |
| "rewards/rejected": -1.437940239906311, | |
| "step": 370, | |
| "u": -1.2242950201034546, | |
| "weight": 0.05640628933906555 | |
| }, | |
| { | |
| "diff_generated": -147.28756713867188, | |
| "epoch": 0.7851347814708192, | |
| "grad_norm": 1267.5426171485876, | |
| "learning_rate": 1.5220301908145903e-06, | |
| "logits/chosen": -1.984815001487732, | |
| "logits/rejected": -1.8735277652740479, | |
| "logps/chosen": -236.86972045898438, | |
| "logps/rejected": -316.1020812988281, | |
| "loss": 254.4526, | |
| "losses_ref": -1.4826844930648804, | |
| "ref_logps/chosen": -283.3154296875, | |
| "ref_logps/rejected": -168.81448364257812, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4644569754600525, | |
| "rewards/margins": 1.9373327493667603, | |
| "rewards/rejected": -1.472875714302063, | |
| "step": 375, | |
| "u": -1.3915516138076782, | |
| "weight": 0.03981015831232071 | |
| }, | |
| { | |
| "diff_generated": -141.51336669921875, | |
| "epoch": 0.7956032452237635, | |
| "grad_norm": 1213.313777968865, | |
| "learning_rate": 1.5063284923943028e-06, | |
| "logits/chosen": -1.9686000347137451, | |
| "logits/rejected": -1.856993317604065, | |
| "logps/chosen": -250.8971710205078, | |
| "logps/rejected": -304.9432067871094, | |
| "loss": 236.0771, | |
| "losses_ref": -2.1682116985321045, | |
| "ref_logps/chosen": -298.9543762207031, | |
| "ref_logps/rejected": -163.42984008789062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4805716872215271, | |
| "rewards/margins": 1.895705223083496, | |
| "rewards/rejected": -1.4151335954666138, | |
| "step": 380, | |
| "u": -1.7837505340576172, | |
| "weight": 0.02852563187479973 | |
| }, | |
| { | |
| "diff_generated": -156.00753784179688, | |
| "epoch": 0.8060717089767077, | |
| "grad_norm": 1211.688665180567, | |
| "learning_rate": 1.490457092577968e-06, | |
| "logits/chosen": -1.9195213317871094, | |
| "logits/rejected": -1.8409401178359985, | |
| "logps/chosen": -229.5646209716797, | |
| "logps/rejected": -317.98406982421875, | |
| "loss": 227.1155, | |
| "losses_ref": -1.2010728120803833, | |
| "ref_logps/chosen": -279.9380798339844, | |
| "ref_logps/rejected": -161.97653198242188, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.5037345290184021, | |
| "rewards/margins": 2.063809871673584, | |
| "rewards/rejected": -1.5600755214691162, | |
| "step": 385, | |
| "u": -1.6141672134399414, | |
| "weight": 0.025375287979841232 | |
| }, | |
| { | |
| "diff_generated": -152.0254669189453, | |
| "epoch": 0.816540172729652, | |
| "grad_norm": 1239.5744414457495, | |
| "learning_rate": 1.4744213108345602e-06, | |
| "logits/chosen": -2.0957484245300293, | |
| "logits/rejected": -1.9671990871429443, | |
| "logps/chosen": -254.6474151611328, | |
| "logps/rejected": -313.9129333496094, | |
| "loss": 233.3016, | |
| "losses_ref": -4.944865703582764, | |
| "ref_logps/chosen": -304.72125244140625, | |
| "ref_logps/rejected": -161.88751220703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5007385015487671, | |
| "rewards/margins": 2.0209929943084717, | |
| "rewards/rejected": -1.5202546119689941, | |
| "step": 390, | |
| "u": -1.7007286548614502, | |
| "weight": 0.05083342641592026 | |
| }, | |
| { | |
| "diff_generated": -139.01864624023438, | |
| "epoch": 0.8270086364825961, | |
| "grad_norm": 1205.2400489160098, | |
| "learning_rate": 1.4582265217274103e-06, | |
| "logits/chosen": -1.9418761730194092, | |
| "logits/rejected": -1.8380733728408813, | |
| "logps/chosen": -247.5355682373047, | |
| "logps/rejected": -302.6370849609375, | |
| "loss": 239.286, | |
| "losses_ref": -1.7620617151260376, | |
| "ref_logps/chosen": -293.9803161621094, | |
| "ref_logps/rejected": -163.61843872070312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.464447557926178, | |
| "rewards/margins": 1.8546336889266968, | |
| "rewards/rejected": -1.3901864290237427, | |
| "step": 395, | |
| "u": -1.7652490139007568, | |
| "weight": 0.021758217364549637 | |
| }, | |
| { | |
| "diff_generated": -157.04232788085938, | |
| "epoch": 0.8374771002355405, | |
| "grad_norm": 1205.8199372142892, | |
| "learning_rate": 1.4418781531128635e-06, | |
| "logits/chosen": -2.0544238090515137, | |
| "logits/rejected": -2.0346767902374268, | |
| "logps/chosen": -234.49368286132812, | |
| "logps/rejected": -326.8393249511719, | |
| "loss": 233.9242, | |
| "losses_ref": -1.8244788646697998, | |
| "ref_logps/chosen": -282.6474609375, | |
| "ref_logps/rejected": -169.79696655273438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48153790831565857, | |
| "rewards/margins": 2.0519611835479736, | |
| "rewards/rejected": -1.5704233646392822, | |
| "step": 400, | |
| "u": -1.6305999755859375, | |
| "weight": 0.024077033624053 | |
| }, | |
| { | |
| "diff_generated": -152.27468872070312, | |
| "epoch": 0.8479455639884846, | |
| "grad_norm": 1197.3605051527013, | |
| "learning_rate": 1.4253816843210748e-06, | |
| "logits/chosen": -1.9861503839492798, | |
| "logits/rejected": -1.8832927942276, | |
| "logps/chosen": -244.0829315185547, | |
| "logps/rejected": -317.6984558105469, | |
| "loss": 237.8302, | |
| "losses_ref": -3.3451290130615234, | |
| "ref_logps/chosen": -295.3381652832031, | |
| "ref_logps/rejected": -165.42379760742188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.51255202293396, | |
| "rewards/margins": 2.035299062728882, | |
| "rewards/rejected": -1.5227469205856323, | |
| "step": 405, | |
| "u": -1.4654412269592285, | |
| "weight": 0.036893170326948166 | |
| }, | |
| { | |
| "diff_generated": -154.22146606445312, | |
| "epoch": 0.8584140277414289, | |
| "grad_norm": 1201.6414819745964, | |
| "learning_rate": 1.4087426443195547e-06, | |
| "logits/chosen": -1.9021320343017578, | |
| "logits/rejected": -1.8548545837402344, | |
| "logps/chosen": -212.048583984375, | |
| "logps/rejected": -310.93731689453125, | |
| "loss": 223.4945, | |
| "losses_ref": -1.363377571105957, | |
| "ref_logps/chosen": -261.7601013183594, | |
| "ref_logps/rejected": -156.71588134765625, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4971153736114502, | |
| "rewards/margins": 2.039330005645752, | |
| "rewards/rejected": -1.5422146320343018, | |
| "step": 410, | |
| "u": -1.499205470085144, | |
| "weight": 0.03244508430361748 | |
| }, | |
| { | |
| "diff_generated": -152.22821044921875, | |
| "epoch": 0.8688824914943732, | |
| "grad_norm": 1267.1758549974509, | |
| "learning_rate": 1.391966609860075e-06, | |
| "logits/chosen": -1.9990746974945068, | |
| "logits/rejected": -1.9241716861724854, | |
| "logps/chosen": -235.38150024414062, | |
| "logps/rejected": -307.2711181640625, | |
| "loss": 229.5926, | |
| "losses_ref": -3.3139452934265137, | |
| "ref_logps/chosen": -284.34393310546875, | |
| "ref_logps/rejected": -155.04290771484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48962411284446716, | |
| "rewards/margins": 2.011906147003174, | |
| "rewards/rejected": -1.5222820043563843, | |
| "step": 415, | |
| "u": -1.4567835330963135, | |
| "weight": 0.04380001127719879 | |
| }, | |
| { | |
| "diff_generated": -142.56979370117188, | |
| "epoch": 0.8793509552473174, | |
| "grad_norm": 1188.388834303343, | |
| "learning_rate": 1.3750592036095619e-06, | |
| "logits/chosen": -2.0134921073913574, | |
| "logits/rejected": -1.8790652751922607, | |
| "logps/chosen": -250.85546875, | |
| "logps/rejected": -295.076416015625, | |
| "loss": 235.3638, | |
| "losses_ref": -3.0703201293945312, | |
| "ref_logps/chosen": -298.8680725097656, | |
| "ref_logps/rejected": -152.5066375732422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4801257252693176, | |
| "rewards/margins": 1.9058234691619873, | |
| "rewards/rejected": -1.4256978034973145, | |
| "step": 420, | |
| "u": -1.4335013628005981, | |
| "weight": 0.04213564842939377 | |
| }, | |
| { | |
| "diff_generated": -147.99075317382812, | |
| "epoch": 0.8898194190002617, | |
| "grad_norm": 1470.580405072441, | |
| "learning_rate": 1.3580260922655984e-06, | |
| "logits/chosen": -1.9547443389892578, | |
| "logits/rejected": -1.8864132165908813, | |
| "logps/chosen": -229.1260223388672, | |
| "logps/rejected": -308.96728515625, | |
| "loss": 230.7763, | |
| "losses_ref": -7.961075782775879, | |
| "ref_logps/chosen": -278.4296875, | |
| "ref_logps/rejected": -160.97654724121094, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 0.4930366575717926, | |
| "rewards/margins": 1.9729440212249756, | |
| "rewards/rejected": -1.4799073934555054, | |
| "step": 425, | |
| "u": -0.9531173706054688, | |
| "weight": 0.06897237151861191 | |
| }, | |
| { | |
| "diff_generated": -150.89932250976562, | |
| "epoch": 0.9002878827532059, | |
| "grad_norm": 1247.106031502474, | |
| "learning_rate": 1.3408729846571713e-06, | |
| "logits/chosen": -1.9829527139663696, | |
| "logits/rejected": -1.7790740728378296, | |
| "logps/chosen": -250.89053344726562, | |
| "logps/rejected": -306.49493408203125, | |
| "loss": 227.0822, | |
| "losses_ref": -3.353726625442505, | |
| "ref_logps/chosen": -299.95831298828125, | |
| "ref_logps/rejected": -155.59561157226562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.49067792296409607, | |
| "rewards/margins": 1.9996709823608398, | |
| "rewards/rejected": -1.5089929103851318, | |
| "step": 430, | |
| "u": -1.7451012134552002, | |
| "weight": 0.029034754261374474 | |
| }, | |
| { | |
| "diff_generated": -161.92088317871094, | |
| "epoch": 0.9107563465061502, | |
| "grad_norm": 1215.392487626507, | |
| "learning_rate": 1.3236056298312956e-06, | |
| "logits/chosen": -1.8760721683502197, | |
| "logits/rejected": -1.7741060256958008, | |
| "logps/chosen": -230.2984161376953, | |
| "logps/rejected": -322.80450439453125, | |
| "loss": 219.6414, | |
| "losses_ref": -2.6977756023406982, | |
| "ref_logps/chosen": -276.49066162109375, | |
| "ref_logps/rejected": -160.88360595703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4619222581386566, | |
| "rewards/margins": 2.0811312198638916, | |
| "rewards/rejected": -1.6192089319229126, | |
| "step": 435, | |
| "u": -1.3300695419311523, | |
| "weight": 0.049107056111097336 | |
| }, | |
| { | |
| "diff_generated": -169.64144897460938, | |
| "epoch": 0.9212248102590945, | |
| "grad_norm": 1199.549953331359, | |
| "learning_rate": 1.3062298151261591e-06, | |
| "logits/chosen": -1.8538296222686768, | |
| "logits/rejected": -1.7674894332885742, | |
| "logps/chosen": -247.5723114013672, | |
| "logps/rejected": -334.99432373046875, | |
| "loss": 228.2011, | |
| "losses_ref": -3.9634671211242676, | |
| "ref_logps/chosen": -293.4337463378906, | |
| "ref_logps/rejected": -165.35289001464844, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4586148262023926, | |
| "rewards/margins": 2.155029296875, | |
| "rewards/rejected": -1.696414589881897, | |
| "step": 440, | |
| "u": -0.911568284034729, | |
| "weight": 0.05750606581568718 | |
| }, | |
| { | |
| "diff_generated": -176.77850341796875, | |
| "epoch": 0.9316932740120387, | |
| "grad_norm": 1134.6331161044943, | |
| "learning_rate": 1.2887513642314372e-06, | |
| "logits/chosen": -1.7472941875457764, | |
| "logits/rejected": -1.6525627374649048, | |
| "logps/chosen": -229.4337921142578, | |
| "logps/rejected": -337.2396545410156, | |
| "loss": 225.3431, | |
| "losses_ref": -0.7648504376411438, | |
| "ref_logps/chosen": -279.60003662109375, | |
| "ref_logps/rejected": -160.4611358642578, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5016621947288513, | |
| "rewards/margins": 2.2694473266601562, | |
| "rewards/rejected": -1.7677850723266602, | |
| "step": 445, | |
| "u": -1.5227153301239014, | |
| "weight": 0.024677513167262077 | |
| }, | |
| { | |
| "diff_generated": -180.25607299804688, | |
| "epoch": 0.942161737764983, | |
| "grad_norm": 1254.0549089198466, | |
| "learning_rate": 1.271176135236417e-06, | |
| "logits/chosen": -1.8400166034698486, | |
| "logits/rejected": -1.6989673376083374, | |
| "logps/chosen": -255.73233032226562, | |
| "logps/rejected": -341.2875671386719, | |
| "loss": 233.639, | |
| "losses_ref": -4.4961042404174805, | |
| "ref_logps/chosen": -307.17620849609375, | |
| "ref_logps/rejected": -161.03147888183594, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5144392251968384, | |
| "rewards/margins": 2.317000150680542, | |
| "rewards/rejected": -1.8025610446929932, | |
| "step": 450, | |
| "u": -1.3673655986785889, | |
| "weight": 0.044325508177280426 | |
| }, | |
| { | |
| "diff_generated": -192.91705322265625, | |
| "epoch": 0.9526302015179272, | |
| "grad_norm": 1206.737012082796, | |
| "learning_rate": 1.2535100186666e-06, | |
| "logits/chosen": -1.808547019958496, | |
| "logits/rejected": -1.6920995712280273, | |
| "logps/chosen": -254.8017578125, | |
| "logps/rejected": -351.9469909667969, | |
| "loss": 245.5463, | |
| "losses_ref": -0.9527796506881714, | |
| "ref_logps/chosen": -304.09619140625, | |
| "ref_logps/rejected": -159.02993774414062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4929441809654236, | |
| "rewards/margins": 2.422114610671997, | |
| "rewards/rejected": -1.9291703701019287, | |
| "step": 455, | |
| "u": -1.6784477233886719, | |
| "weight": 0.02164948359131813 | |
| }, | |
| { | |
| "diff_generated": -185.23989868164062, | |
| "epoch": 0.9630986652708715, | |
| "grad_norm": 1270.8466354695972, | |
| "learning_rate": 1.2357589355094273e-06, | |
| "logits/chosen": -1.8315858840942383, | |
| "logits/rejected": -1.7088918685913086, | |
| "logps/chosen": -269.20538330078125, | |
| "logps/rejected": -338.2021179199219, | |
| "loss": 246.9693, | |
| "losses_ref": -3.263090133666992, | |
| "ref_logps/chosen": -319.02618408203125, | |
| "ref_logps/rejected": -152.9622039794922, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4982084631919861, | |
| "rewards/margins": 2.350607395172119, | |
| "rewards/rejected": -1.8523988723754883, | |
| "step": 460, | |
| "u": -1.5151453018188477, | |
| "weight": 0.04330545663833618 | |
| }, | |
| { | |
| "diff_generated": -187.97279357910156, | |
| "epoch": 0.9735671290238157, | |
| "grad_norm": 1262.5411111889684, | |
| "learning_rate": 1.2179288352297982e-06, | |
| "logits/chosen": -1.7451597452163696, | |
| "logits/rejected": -1.6632684469223022, | |
| "logps/chosen": -227.63937377929688, | |
| "logps/rejected": -355.5631103515625, | |
| "loss": 232.7903, | |
| "losses_ref": -1.6858165264129639, | |
| "ref_logps/chosen": -279.9383544921875, | |
| "ref_logps/rejected": -167.59031677246094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5229896903038025, | |
| "rewards/margins": 2.4027175903320312, | |
| "rewards/rejected": -1.8797279596328735, | |
| "step": 465, | |
| "u": -1.672014594078064, | |
| "weight": 0.022105634212493896 | |
| }, | |
| { | |
| "diff_generated": -206.70443725585938, | |
| "epoch": 0.98403559277676, | |
| "grad_norm": 1278.739837777923, | |
| "learning_rate": 1.2000256937760445e-06, | |
| "logits/chosen": -1.570615291595459, | |
| "logits/rejected": -1.4970500469207764, | |
| "logps/chosen": -237.1439208984375, | |
| "logps/rejected": -359.52886962890625, | |
| "loss": 239.3887, | |
| "losses_ref": -2.77233624458313, | |
| "ref_logps/chosen": -285.7524719238281, | |
| "ref_logps/rejected": -152.82440185546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4860858917236328, | |
| "rewards/margins": 2.5531301498413086, | |
| "rewards/rejected": -2.067044496536255, | |
| "step": 470, | |
| "u": -1.4401319026947021, | |
| "weight": 0.04976705089211464 | |
| }, | |
| { | |
| "diff_generated": -199.87838745117188, | |
| "epoch": 0.9945040565297043, | |
| "grad_norm": 1152.4542486150297, | |
| "learning_rate": 1.1820555115770255e-06, | |
| "logits/chosen": -1.4883148670196533, | |
| "logits/rejected": -1.505014419555664, | |
| "logps/chosen": -225.6768798828125, | |
| "logps/rejected": -358.5788269042969, | |
| "loss": 226.472, | |
| "losses_ref": -4.080103874206543, | |
| "ref_logps/chosen": -273.79522705078125, | |
| "ref_logps/rejected": -158.70046997070312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.48118335008621216, | |
| "rewards/margins": 2.4799671173095703, | |
| "rewards/rejected": -1.998783826828003, | |
| "step": 475, | |
| "u": -1.5296900272369385, | |
| "weight": 0.05185595899820328 | |
| }, | |
| { | |
| "diff_generated": -208.0040283203125, | |
| "epoch": 1.0049725202826485, | |
| "grad_norm": 1301.7034712659586, | |
| "learning_rate": 1.1640243115310217e-06, | |
| "logits/chosen": -1.5732040405273438, | |
| "logits/rejected": -1.5068919658660889, | |
| "logps/chosen": -223.4159393310547, | |
| "logps/rejected": -374.62591552734375, | |
| "loss": 226.8136, | |
| "losses_ref": -4.88800573348999, | |
| "ref_logps/chosen": -293.29400634765625, | |
| "ref_logps/rejected": -166.62188720703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.6987806558609009, | |
| "rewards/margins": 2.778820514678955, | |
| "rewards/rejected": -2.080040454864502, | |
| "step": 480, | |
| "u": -1.9416106939315796, | |
| "weight": 0.03623828664422035 | |
| }, | |
| { | |
| "diff_generated": -215.4559783935547, | |
| "epoch": 1.0154409840355927, | |
| "grad_norm": 1355.7730407413364, | |
| "learning_rate": 1.1459381369870972e-06, | |
| "logits/chosen": -1.5292342901229858, | |
| "logits/rejected": -1.4070460796356201, | |
| "logps/chosen": -192.32717895507812, | |
| "logps/rejected": -380.6830139160156, | |
| "loss": 181.3888, | |
| "losses_ref": -3.105132818222046, | |
| "ref_logps/chosen": -294.8918762207031, | |
| "ref_logps/rejected": -165.22702026367188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0256469249725342, | |
| "rewards/margins": 3.180206775665283, | |
| "rewards/rejected": -2.15455961227417, | |
| "step": 485, | |
| "u": -2.883460521697998, | |
| "weight": 0.04116251319646835 | |
| }, | |
| { | |
| "diff_generated": -209.49990844726562, | |
| "epoch": 1.025909447788537, | |
| "grad_norm": 1391.787637976481, | |
| "learning_rate": 1.1278030497196046e-06, | |
| "logits/chosen": -1.2669024467468262, | |
| "logits/rejected": -1.2282651662826538, | |
| "logps/chosen": -166.51095581054688, | |
| "logps/rejected": -365.71807861328125, | |
| "loss": 180.3994, | |
| "losses_ref": -2.922461986541748, | |
| "ref_logps/chosen": -264.67388916015625, | |
| "ref_logps/rejected": -156.21817016601562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9816292524337769, | |
| "rewards/margins": 3.0766279697418213, | |
| "rewards/rejected": -2.094998836517334, | |
| "step": 490, | |
| "u": -3.193206310272217, | |
| "weight": 0.029411697760224342 | |
| }, | |
| { | |
| "diff_generated": -208.9027099609375, | |
| "epoch": 1.0363779115414813, | |
| "grad_norm": 1404.013865827238, | |
| "learning_rate": 1.1096251278965172e-06, | |
| "logits/chosen": -1.229707956314087, | |
| "logits/rejected": -1.2453272342681885, | |
| "logps/chosen": -167.49026489257812, | |
| "logps/rejected": -368.3177185058594, | |
| "loss": 166.3708, | |
| "losses_ref": -5.491534233093262, | |
| "ref_logps/chosen": -266.75250244140625, | |
| "ref_logps/rejected": -159.41500854492188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9926217794418335, | |
| "rewards/margins": 3.081648588180542, | |
| "rewards/rejected": -2.089026927947998, | |
| "step": 495, | |
| "u": -1.6660839319229126, | |
| "weight": 0.06474236398935318 | |
| }, | |
| { | |
| "diff_generated": -213.4435577392578, | |
| "epoch": 1.0468463752944255, | |
| "grad_norm": 1314.8398697189618, | |
| "learning_rate": 1.0914104640422679e-06, | |
| "logits/chosen": -1.391204595565796, | |
| "logits/rejected": -1.3654673099517822, | |
| "logps/chosen": -161.88082885742188, | |
| "logps/rejected": -374.336669921875, | |
| "loss": 175.893, | |
| "losses_ref": -1.2716583013534546, | |
| "ref_logps/chosen": -257.99908447265625, | |
| "ref_logps/rejected": -160.89312744140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9611825942993164, | |
| "rewards/margins": 3.0956180095672607, | |
| "rewards/rejected": -2.1344354152679443, | |
| "step": 500, | |
| "u": -3.1927852630615234, | |
| "weight": 0.015900352969765663 | |
| }, | |
| { | |
| "diff_generated": -206.3148956298828, | |
| "epoch": 1.05731483904737, | |
| "grad_norm": 1445.075187818513, | |
| "learning_rate": 1.0731651629957721e-06, | |
| "logits/chosen": -1.3434970378875732, | |
| "logits/rejected": -1.305525541305542, | |
| "logps/chosen": -192.31558227539062, | |
| "logps/rejected": -378.03851318359375, | |
| "loss": 185.1733, | |
| "losses_ref": -2.9738333225250244, | |
| "ref_logps/chosen": -297.85302734375, | |
| "ref_logps/rejected": -171.7236328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.055374264717102, | |
| "rewards/margins": 3.118523359298706, | |
| "rewards/rejected": -2.0631489753723145, | |
| "step": 505, | |
| "u": -3.4035236835479736, | |
| "weight": 0.024688560515642166 | |
| }, | |
| { | |
| "diff_generated": -186.8843231201172, | |
| "epoch": 1.067783302800314, | |
| "grad_norm": 1181.3904875833675, | |
| "learning_rate": 1.0548953398643274e-06, | |
| "logits/chosen": -1.566375970840454, | |
| "logits/rejected": -1.4381110668182373, | |
| "logps/chosen": -193.49539184570312, | |
| "logps/rejected": -350.2602233886719, | |
| "loss": 179.7564, | |
| "losses_ref": -2.258450984954834, | |
| "ref_logps/chosen": -297.76202392578125, | |
| "ref_logps/rejected": -163.3759002685547, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0426661968231201, | |
| "rewards/margins": 2.9115095138549805, | |
| "rewards/rejected": -1.8688430786132812, | |
| "step": 510, | |
| "u": -2.4679007530212402, | |
| "weight": 0.044156283140182495 | |
| }, | |
| { | |
| "diff_generated": -200.12066650390625, | |
| "epoch": 1.0782517665532583, | |
| "grad_norm": 1297.9609792649137, | |
| "learning_rate": 1.0366071179740706e-06, | |
| "logits/chosen": -1.6367733478546143, | |
| "logits/rejected": -1.4493190050125122, | |
| "logps/chosen": -209.0851593017578, | |
| "logps/rejected": -365.74053955078125, | |
| "loss": 186.0993, | |
| "losses_ref": -3.8747305870056152, | |
| "ref_logps/chosen": -317.296630859375, | |
| "ref_logps/rejected": -165.619873046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0821150541305542, | |
| "rewards/margins": 3.0833218097686768, | |
| "rewards/rejected": -2.001206636428833, | |
| "step": 515, | |
| "u": -2.938070297241211, | |
| "weight": 0.03331952169537544 | |
| }, | |
| { | |
| "diff_generated": -207.33700561523438, | |
| "epoch": 1.0887202303062025, | |
| "grad_norm": 1362.4544964162274, | |
| "learning_rate": 1.0183066268176775e-06, | |
| "logits/chosen": -1.541912317276001, | |
| "logits/rejected": -1.406719446182251, | |
| "logps/chosen": -204.0404052734375, | |
| "logps/rejected": -376.406494140625, | |
| "loss": 202.1916, | |
| "losses_ref": -0.5564223527908325, | |
| "ref_logps/chosen": -307.4422912597656, | |
| "ref_logps/rejected": -169.06948852539062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.034018874168396, | |
| "rewards/margins": 3.107388973236084, | |
| "rewards/rejected": -2.0733699798583984, | |
| "step": 520, | |
| "u": -3.34126353263855, | |
| "weight": 0.007363998796790838 | |
| }, | |
| { | |
| "diff_generated": -209.34707641601562, | |
| "epoch": 1.0991886940591469, | |
| "grad_norm": 1329.928673259645, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -1.4774454832077026, | |
| "logits/rejected": -1.3976843357086182, | |
| "logps/chosen": -190.63027954101562, | |
| "logps/rejected": -365.6118469238281, | |
| "loss": 191.7027, | |
| "losses_ref": -4.4078168869018555, | |
| "ref_logps/chosen": -289.65625, | |
| "ref_logps/rejected": -156.2647705078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.990260124206543, | |
| "rewards/margins": 3.083730936050415, | |
| "rewards/rejected": -2.093470811843872, | |
| "step": 525, | |
| "u": -2.4628920555114746, | |
| "weight": 0.034967873245477676 | |
| }, | |
| { | |
| "diff_generated": -219.25039672851562, | |
| "epoch": 1.109657157812091, | |
| "grad_norm": 1263.6571441007575, | |
| "learning_rate": 9.816933731823228e-07, | |
| "logits/chosen": -1.48972749710083, | |
| "logits/rejected": -1.3531391620635986, | |
| "logps/chosen": -184.37472534179688, | |
| "logps/rejected": -382.6318359375, | |
| "loss": 179.9115, | |
| "losses_ref": -4.217190742492676, | |
| "ref_logps/chosen": -283.9466857910156, | |
| "ref_logps/rejected": -163.38145446777344, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.995719313621521, | |
| "rewards/margins": 3.188223361968994, | |
| "rewards/rejected": -2.1925039291381836, | |
| "step": 530, | |
| "u": -2.7124040126800537, | |
| "weight": 0.03560812398791313 | |
| }, | |
| { | |
| "diff_generated": -222.4695587158203, | |
| "epoch": 1.1201256215650353, | |
| "grad_norm": 1387.0782441687347, | |
| "learning_rate": 9.633928820259293e-07, | |
| "logits/chosen": -1.2347859144210815, | |
| "logits/rejected": -1.2332684993743896, | |
| "logps/chosen": -162.6536102294922, | |
| "logps/rejected": -388.9007263183594, | |
| "loss": 162.1828, | |
| "losses_ref": -2.344147205352783, | |
| "ref_logps/chosen": -256.69085693359375, | |
| "ref_logps/rejected": -166.43115234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9403725862503052, | |
| "rewards/margins": 3.1650681495666504, | |
| "rewards/rejected": -2.2246956825256348, | |
| "step": 535, | |
| "u": -2.9149539470672607, | |
| "weight": 0.036711305379867554 | |
| }, | |
| { | |
| "diff_generated": -220.98583984375, | |
| "epoch": 1.1305940853179797, | |
| "grad_norm": 1297.6784365239848, | |
| "learning_rate": 9.451046601356725e-07, | |
| "logits/chosen": -1.3270328044891357, | |
| "logits/rejected": -1.2543261051177979, | |
| "logps/chosen": -174.17941284179688, | |
| "logps/rejected": -378.3968200683594, | |
| "loss": 168.7943, | |
| "losses_ref": -5.623769760131836, | |
| "ref_logps/chosen": -267.5427551269531, | |
| "ref_logps/rejected": -157.4110107421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9336336255073547, | |
| "rewards/margins": 3.1434922218322754, | |
| "rewards/rejected": -2.2098584175109863, | |
| "step": 540, | |
| "u": -2.444173574447632, | |
| "weight": 0.07986775040626526 | |
| }, | |
| { | |
| "diff_generated": -227.32980346679688, | |
| "epoch": 1.1410625490709239, | |
| "grad_norm": 1205.4234546771595, | |
| "learning_rate": 9.268348370042281e-07, | |
| "logits/chosen": -1.3813427686691284, | |
| "logits/rejected": -1.318725347518921, | |
| "logps/chosen": -174.5741424560547, | |
| "logps/rejected": -399.95172119140625, | |
| "loss": 168.9783, | |
| "losses_ref": -3.7193565368652344, | |
| "ref_logps/chosen": -273.3332824707031, | |
| "ref_logps/rejected": -172.62191772460938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9875916242599487, | |
| "rewards/margins": 3.260889768600464, | |
| "rewards/rejected": -2.2732982635498047, | |
| "step": 545, | |
| "u": -3.2109789848327637, | |
| "weight": 0.042879991233348846 | |
| }, | |
| { | |
| "diff_generated": -249.11709594726562, | |
| "epoch": 1.151531012823868, | |
| "grad_norm": 1266.8954047572045, | |
| "learning_rate": 9.085895359577323e-07, | |
| "logits/chosen": -1.33551824092865, | |
| "logits/rejected": -1.3183876276016235, | |
| "logps/chosen": -167.4661865234375, | |
| "logps/rejected": -403.6305236816406, | |
| "loss": 174.6021, | |
| "losses_ref": -1.4713778495788574, | |
| "ref_logps/chosen": -267.08013916015625, | |
| "ref_logps/rejected": -154.513427734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9961398243904114, | |
| "rewards/margins": 3.4873108863830566, | |
| "rewards/rejected": -2.491170883178711, | |
| "step": 550, | |
| "u": -2.983215093612671, | |
| "weight": 0.02549784444272518 | |
| }, | |
| { | |
| "diff_generated": -220.6905059814453, | |
| "epoch": 1.1619994765768125, | |
| "grad_norm": 1242.623006879505, | |
| "learning_rate": 8.903748721034826e-07, | |
| "logits/chosen": -1.410308599472046, | |
| "logits/rejected": -1.3436871767044067, | |
| "logps/chosen": -175.43826293945312, | |
| "logps/rejected": -392.7843322753906, | |
| "loss": 178.3087, | |
| "losses_ref": -2.583522081375122, | |
| "ref_logps/chosen": -277.257080078125, | |
| "ref_logps/rejected": -172.09388732910156, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.018188238143921, | |
| "rewards/margins": 3.225093126296997, | |
| "rewards/rejected": -2.206904649734497, | |
| "step": 555, | |
| "u": -2.3211851119995117, | |
| "weight": 0.039024386554956436 | |
| }, | |
| { | |
| "diff_generated": -246.51620483398438, | |
| "epoch": 1.1724679403297567, | |
| "grad_norm": 1315.7734920897904, | |
| "learning_rate": 8.721969502803953e-07, | |
| "logits/chosen": -1.4283636808395386, | |
| "logits/rejected": -1.4595166444778442, | |
| "logps/chosen": -190.37667846679688, | |
| "logps/rejected": -401.78594970703125, | |
| "loss": 169.0395, | |
| "losses_ref": -0.9799969792366028, | |
| "ref_logps/chosen": -288.49481201171875, | |
| "ref_logps/rejected": -155.26974487304688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9811817407608032, | |
| "rewards/margins": 3.4463438987731934, | |
| "rewards/rejected": -2.4651618003845215, | |
| "step": 560, | |
| "u": -3.4851043224334717, | |
| "weight": 0.011031994596123695 | |
| }, | |
| { | |
| "diff_generated": -237.05813598632812, | |
| "epoch": 1.1829364040827008, | |
| "grad_norm": 1324.6420978322203, | |
| "learning_rate": 8.540618630129027e-07, | |
| "logits/chosen": -1.5112595558166504, | |
| "logits/rejected": -1.4447729587554932, | |
| "logps/chosen": -197.54592895507812, | |
| "logps/rejected": -408.50360107421875, | |
| "loss": 180.7105, | |
| "losses_ref": -8.419300079345703, | |
| "ref_logps/chosen": -298.6998596191406, | |
| "ref_logps/rejected": -171.4455108642578, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.011539340019226, | |
| "rewards/margins": 3.3821206092834473, | |
| "rewards/rejected": -2.3705811500549316, | |
| "step": 565, | |
| "u": -3.0135345458984375, | |
| "weight": 0.03675166517496109 | |
| }, | |
| { | |
| "diff_generated": -226.0189666748047, | |
| "epoch": 1.193404867835645, | |
| "grad_norm": 1290.2940777725041, | |
| "learning_rate": 8.359756884689783e-07, | |
| "logits/chosen": -1.5810168981552124, | |
| "logits/rejected": -1.4695533514022827, | |
| "logps/chosen": -179.12496948242188, | |
| "logps/rejected": -392.3472595214844, | |
| "loss": 183.5485, | |
| "losses_ref": -1.6247104406356812, | |
| "ref_logps/chosen": -278.8708801269531, | |
| "ref_logps/rejected": -166.32827758789062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9974590539932251, | |
| "rewards/margins": 3.257648468017578, | |
| "rewards/rejected": -2.2601895332336426, | |
| "step": 570, | |
| "u": -3.080786943435669, | |
| "weight": 0.019241400063037872 | |
| }, | |
| { | |
| "diff_generated": -213.7429962158203, | |
| "epoch": 1.2038733315885894, | |
| "grad_norm": 1344.148507837478, | |
| "learning_rate": 8.179444884229744e-07, | |
| "logits/chosen": -1.4880825281143188, | |
| "logits/rejected": -1.502333641052246, | |
| "logps/chosen": -189.47103881835938, | |
| "logps/rejected": -378.13275146484375, | |
| "loss": 171.5777, | |
| "losses_ref": -0.9622389674186707, | |
| "ref_logps/chosen": -284.98492431640625, | |
| "ref_logps/rejected": -164.38975524902344, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9551390409469604, | |
| "rewards/margins": 3.092568874359131, | |
| "rewards/rejected": -2.137429714202881, | |
| "step": 575, | |
| "u": -3.1298766136169434, | |
| "weight": 0.013516530394554138 | |
| }, | |
| { | |
| "diff_generated": -231.77328491210938, | |
| "epoch": 1.2143417953415336, | |
| "grad_norm": 1304.8866597655287, | |
| "learning_rate": 7.999743062239557e-07, | |
| "logits/chosen": -1.4784562587738037, | |
| "logits/rejected": -1.5664056539535522, | |
| "logps/chosen": -176.44296264648438, | |
| "logps/rejected": -421.82135009765625, | |
| "loss": 181.4369, | |
| "losses_ref": -1.1828618049621582, | |
| "ref_logps/chosen": -274.30767822265625, | |
| "ref_logps/rejected": -190.04803466796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9786470532417297, | |
| "rewards/margins": 3.296379804611206, | |
| "rewards/rejected": -2.3177332878112793, | |
| "step": 580, | |
| "u": -3.0596017837524414, | |
| "weight": 0.012155565433204174 | |
| }, | |
| { | |
| "diff_generated": -220.5200653076172, | |
| "epoch": 1.2248102590944778, | |
| "grad_norm": 1320.435055959004, | |
| "learning_rate": 7.820711647702017e-07, | |
| "logits/chosen": -1.4778623580932617, | |
| "logits/rejected": -1.5001682043075562, | |
| "logps/chosen": -168.55393981933594, | |
| "logps/rejected": -381.0849304199219, | |
| "loss": 177.0697, | |
| "losses_ref": -2.307084560394287, | |
| "ref_logps/chosen": -260.8992004394531, | |
| "ref_logps/rejected": -160.5648651123047, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9234523773193359, | |
| "rewards/margins": 3.128653049468994, | |
| "rewards/rejected": -2.205200672149658, | |
| "step": 585, | |
| "u": -2.9381721019744873, | |
| "weight": 0.03427546098828316 | |
| }, | |
| { | |
| "diff_generated": -207.7950439453125, | |
| "epoch": 1.235278722847422, | |
| "grad_norm": 1261.3527727961057, | |
| "learning_rate": 7.642410644905726e-07, | |
| "logits/chosen": -1.4036446809768677, | |
| "logits/rejected": -1.4330257177352905, | |
| "logps/chosen": -171.85134887695312, | |
| "logps/rejected": -370.4696960449219, | |
| "loss": 176.7884, | |
| "losses_ref": -2.213914394378662, | |
| "ref_logps/chosen": -269.0211486816406, | |
| "ref_logps/rejected": -162.6746826171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9716979265213013, | |
| "rewards/margins": 3.0496482849121094, | |
| "rewards/rejected": -2.0779504776000977, | |
| "step": 590, | |
| "u": -2.40622878074646, | |
| "weight": 0.04459633305668831 | |
| }, | |
| { | |
| "diff_generated": -230.44302368164062, | |
| "epoch": 1.2457471866003664, | |
| "grad_norm": 1305.239745538134, | |
| "learning_rate": 7.464899813334e-07, | |
| "logits/chosen": -1.261853575706482, | |
| "logits/rejected": -1.2570579051971436, | |
| "logps/chosen": -181.5194091796875, | |
| "logps/rejected": -393.291259765625, | |
| "loss": 177.5223, | |
| "losses_ref": -4.516595840454102, | |
| "ref_logps/chosen": -278.3271789550781, | |
| "ref_logps/rejected": -162.84823608398438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9680774807929993, | |
| "rewards/margins": 3.272507905960083, | |
| "rewards/rejected": -2.3044302463531494, | |
| "step": 595, | |
| "u": -2.7008533477783203, | |
| "weight": 0.05883873626589775 | |
| }, | |
| { | |
| "diff_generated": -232.184326171875, | |
| "epoch": 1.2562156503533106, | |
| "grad_norm": 1268.765281131021, | |
| "learning_rate": 7.288238647635829e-07, | |
| "logits/chosen": -1.4351574182510376, | |
| "logits/rejected": -1.2977135181427002, | |
| "logps/chosen": -184.0857696533203, | |
| "logps/rejected": -400.95361328125, | |
| "loss": 177.9198, | |
| "losses_ref": -3.803828001022339, | |
| "ref_logps/chosen": -284.0093078613281, | |
| "ref_logps/rejected": -168.76925659179688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9992351531982422, | |
| "rewards/margins": 3.3210787773132324, | |
| "rewards/rejected": -2.3218436241149902, | |
| "step": 600, | |
| "u": -2.89520263671875, | |
| "weight": 0.026231110095977783 | |
| }, | |
| { | |
| "diff_generated": -198.3398895263672, | |
| "epoch": 1.2666841141062548, | |
| "grad_norm": 1202.5442238394803, | |
| "learning_rate": 7.112486357685631e-07, | |
| "logits/chosen": -1.499137043952942, | |
| "logits/rejected": -1.4640613794326782, | |
| "logps/chosen": -186.61227416992188, | |
| "logps/rejected": -356.29132080078125, | |
| "loss": 187.6658, | |
| "losses_ref": -4.061453819274902, | |
| "ref_logps/chosen": -287.03717041015625, | |
| "ref_logps/rejected": -157.95144653320312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.004248857498169, | |
| "rewards/margins": 2.987647771835327, | |
| "rewards/rejected": -1.9833987951278687, | |
| "step": 605, | |
| "u": -3.231105089187622, | |
| "weight": 0.03989076986908913 | |
| }, | |
| { | |
| "diff_generated": -219.8319091796875, | |
| "epoch": 1.2771525778591992, | |
| "grad_norm": 1294.217440674336, | |
| "learning_rate": 6.937701848738407e-07, | |
| "logits/chosen": -1.41506028175354, | |
| "logits/rejected": -1.4094430208206177, | |
| "logps/chosen": -169.46595764160156, | |
| "logps/rejected": -384.82025146484375, | |
| "loss": 167.9688, | |
| "losses_ref": -1.3657054901123047, | |
| "ref_logps/chosen": -266.1152648925781, | |
| "ref_logps/rejected": -164.98837280273438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9664928317070007, | |
| "rewards/margins": 3.1648120880126953, | |
| "rewards/rejected": -2.19831919670105, | |
| "step": 610, | |
| "u": -3.3554062843322754, | |
| "weight": 0.01739688031375408 | |
| }, | |
| { | |
| "diff_generated": -217.53970336914062, | |
| "epoch": 1.2876210416121434, | |
| "grad_norm": 1337.3093609895052, | |
| "learning_rate": 6.763943701687045e-07, | |
| "logits/chosen": -1.633599877357483, | |
| "logits/rejected": -1.5192573070526123, | |
| "logps/chosen": -191.32760620117188, | |
| "logps/rejected": -387.1475524902344, | |
| "loss": 183.0882, | |
| "losses_ref": -0.359982430934906, | |
| "ref_logps/chosen": -299.5060119628906, | |
| "ref_logps/rejected": -169.60787963867188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0817840099334717, | |
| "rewards/margins": 3.257181167602539, | |
| "rewards/rejected": -2.1753971576690674, | |
| "step": 615, | |
| "u": -3.340681552886963, | |
| "weight": 0.008451832458376884 | |
| }, | |
| { | |
| "diff_generated": -208.6597137451172, | |
| "epoch": 1.2980895053650876, | |
| "grad_norm": 1324.2598041902163, | |
| "learning_rate": 6.591270153428288e-07, | |
| "logits/chosen": -1.6454055309295654, | |
| "logits/rejected": -1.489946961402893, | |
| "logps/chosen": -191.6290283203125, | |
| "logps/rejected": -364.0921325683594, | |
| "loss": 178.0635, | |
| "losses_ref": -2.520381450653076, | |
| "ref_logps/chosen": -295.8542785644531, | |
| "ref_logps/rejected": -155.4324188232422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.042252540588379, | |
| "rewards/margins": 3.128849506378174, | |
| "rewards/rejected": -2.086596965789795, | |
| "step": 620, | |
| "u": -2.7204320430755615, | |
| "weight": 0.02861974760890007 | |
| }, | |
| { | |
| "diff_generated": -196.55752563476562, | |
| "epoch": 1.308557969118032, | |
| "grad_norm": 1344.8788218911382, | |
| "learning_rate": 6.419739077344016e-07, | |
| "logits/chosen": -1.5530303716659546, | |
| "logits/rejected": -1.423179030418396, | |
| "logps/chosen": -200.18063354492188, | |
| "logps/rejected": -360.1055603027344, | |
| "loss": 179.8101, | |
| "losses_ref": -3.9870200157165527, | |
| "ref_logps/chosen": -300.4015197753906, | |
| "ref_logps/rejected": -163.5480194091797, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0022084712982178, | |
| "rewards/margins": 2.9677836894989014, | |
| "rewards/rejected": -1.9655752182006836, | |
| "step": 625, | |
| "u": -2.844027042388916, | |
| "weight": 0.038288719952106476 | |
| }, | |
| { | |
| "diff_generated": -198.09622192382812, | |
| "epoch": 1.3190264328709762, | |
| "grad_norm": 1205.0358284390313, | |
| "learning_rate": 6.24940796390438e-07, | |
| "logits/chosen": -1.5373382568359375, | |
| "logits/rejected": -1.444549322128296, | |
| "logps/chosen": -174.25350952148438, | |
| "logps/rejected": -362.83953857421875, | |
| "loss": 166.5968, | |
| "losses_ref": -2.2841248512268066, | |
| "ref_logps/chosen": -274.06365966796875, | |
| "ref_logps/rejected": -164.74331665039062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9981018900871277, | |
| "rewards/margins": 2.9790642261505127, | |
| "rewards/rejected": -1.9809621572494507, | |
| "step": 630, | |
| "u": -2.492745876312256, | |
| "weight": 0.01923806592822075 | |
| }, | |
| { | |
| "diff_generated": -214.6193389892578, | |
| "epoch": 1.3294948966239204, | |
| "grad_norm": 1314.9134741026285, | |
| "learning_rate": 6.08033390139925e-07, | |
| "logits/chosen": -1.4583691358566284, | |
| "logits/rejected": -1.290028691291809, | |
| "logps/chosen": -190.0717315673828, | |
| "logps/rejected": -369.1700439453125, | |
| "loss": 192.5529, | |
| "losses_ref": -0.966667652130127, | |
| "ref_logps/chosen": -293.4891662597656, | |
| "ref_logps/rejected": -154.55068969726562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0341745615005493, | |
| "rewards/margins": 3.180367946624756, | |
| "rewards/rejected": -2.146193265914917, | |
| "step": 635, | |
| "u": -2.8957810401916504, | |
| "weight": 0.020022699609398842 | |
| }, | |
| { | |
| "diff_generated": -209.5125732421875, | |
| "epoch": 1.3399633603768648, | |
| "grad_norm": 1311.833771803947, | |
| "learning_rate": 5.912573556804452e-07, | |
| "logits/chosen": -1.4464821815490723, | |
| "logits/rejected": -1.3825037479400635, | |
| "logps/chosen": -181.79258728027344, | |
| "logps/rejected": -380.0604553222656, | |
| "loss": 186.6832, | |
| "losses_ref": -2.0217666625976562, | |
| "ref_logps/chosen": -283.5255126953125, | |
| "ref_logps/rejected": -170.54788208007812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.017329216003418, | |
| "rewards/margins": 3.112454891204834, | |
| "rewards/rejected": -2.095125675201416, | |
| "step": 640, | |
| "u": -2.146329164505005, | |
| "weight": 0.051374662667512894 | |
| }, | |
| { | |
| "diff_generated": -233.44900512695312, | |
| "epoch": 1.350431824129809, | |
| "grad_norm": 1320.9978857185588, | |
| "learning_rate": 5.746183156789252e-07, | |
| "logits/chosen": -1.4467910528182983, | |
| "logits/rejected": -1.2174046039581299, | |
| "logps/chosen": -190.71127319335938, | |
| "logps/rejected": -401.9010314941406, | |
| "loss": 181.6372, | |
| "losses_ref": -1.3231620788574219, | |
| "ref_logps/chosen": -301.30584716796875, | |
| "ref_logps/rejected": -168.45204162597656, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1059458255767822, | |
| "rewards/margins": 3.4404358863830566, | |
| "rewards/rejected": -2.3344900608062744, | |
| "step": 645, | |
| "u": -2.5961060523986816, | |
| "weight": 0.031209224835038185 | |
| }, | |
| { | |
| "diff_generated": -218.05880737304688, | |
| "epoch": 1.3609002878827532, | |
| "grad_norm": 1268.0769992434364, | |
| "learning_rate": 5.581218468871365e-07, | |
| "logits/chosen": -1.2198398113250732, | |
| "logits/rejected": -1.3189094066619873, | |
| "logps/chosen": -157.86666870117188, | |
| "logps/rejected": -376.75433349609375, | |
| "loss": 168.9012, | |
| "losses_ref": -2.4989333152770996, | |
| "ref_logps/chosen": -252.76400756835938, | |
| "ref_logps/rejected": -158.69552612304688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9489734768867493, | |
| "rewards/margins": 3.129561424255371, | |
| "rewards/rejected": -2.1805882453918457, | |
| "step": 650, | |
| "u": -2.92409086227417, | |
| "weight": 0.0428018681704998 | |
| }, | |
| { | |
| "diff_generated": -235.935546875, | |
| "epoch": 1.3713687516356974, | |
| "grad_norm": 1347.742524924812, | |
| "learning_rate": 5.417734782725896e-07, | |
| "logits/chosen": -1.2961053848266602, | |
| "logits/rejected": -1.261878252029419, | |
| "logps/chosen": -177.77523803710938, | |
| "logps/rejected": -389.1588134765625, | |
| "loss": 179.405, | |
| "losses_ref": -1.0838311910629272, | |
| "ref_logps/chosen": -277.2697448730469, | |
| "ref_logps/rejected": -153.2233123779297, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9949450492858887, | |
| "rewards/margins": 3.3543007373809814, | |
| "rewards/rejected": -2.3593554496765137, | |
| "step": 655, | |
| "u": -3.1044487953186035, | |
| "weight": 0.017367416992783546 | |
| }, | |
| { | |
| "diff_generated": -211.7913055419922, | |
| "epoch": 1.3818372153886418, | |
| "grad_norm": 1311.034191538654, | |
| "learning_rate": 5.255786891654399e-07, | |
| "logits/chosen": -1.2746165990829468, | |
| "logits/rejected": -1.2540855407714844, | |
| "logps/chosen": -170.9514923095703, | |
| "logps/rejected": -376.79229736328125, | |
| "loss": 174.0495, | |
| "losses_ref": -2.949699878692627, | |
| "ref_logps/chosen": -268.7665100097656, | |
| "ref_logps/rejected": -165.0010223388672, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9781501889228821, | |
| "rewards/margins": 3.0960631370544434, | |
| "rewards/rejected": -2.117912769317627, | |
| "step": 660, | |
| "u": -1.9965251684188843, | |
| "weight": 0.04027215391397476 | |
| }, | |
| { | |
| "diff_generated": -224.0879364013672, | |
| "epoch": 1.392305679141586, | |
| "grad_norm": 1328.7070235599076, | |
| "learning_rate": 5.095429074220319e-07, | |
| "logits/chosen": -1.2053465843200684, | |
| "logits/rejected": -1.1557897329330444, | |
| "logps/chosen": -175.30589294433594, | |
| "logps/rejected": -393.43218994140625, | |
| "loss": 184.6881, | |
| "losses_ref": -3.8794121742248535, | |
| "ref_logps/chosen": -274.28826904296875, | |
| "ref_logps/rejected": -169.34422302246094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9898236989974976, | |
| "rewards/margins": 3.230703353881836, | |
| "rewards/rejected": -2.240879535675049, | |
| "step": 665, | |
| "u": -3.0530405044555664, | |
| "weight": 0.03465485945343971 | |
| }, | |
| { | |
| "diff_generated": -240.96484375, | |
| "epoch": 1.4027741428945302, | |
| "grad_norm": 1353.971116750616, | |
| "learning_rate": 4.936715076056974e-07, | |
| "logits/chosen": -1.242436408996582, | |
| "logits/rejected": -1.24913489818573, | |
| "logps/chosen": -183.4954833984375, | |
| "logps/rejected": -405.70050048828125, | |
| "loss": 171.9422, | |
| "losses_ref": -0.8431612253189087, | |
| "ref_logps/chosen": -284.3236999511719, | |
| "ref_logps/rejected": -164.7356414794922, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0082820653915405, | |
| "rewards/margins": 3.4179306030273438, | |
| "rewards/rejected": -2.4096481800079346, | |
| "step": 670, | |
| "u": -3.2680907249450684, | |
| "weight": 0.007226690649986267 | |
| }, | |
| { | |
| "diff_generated": -230.71115112304688, | |
| "epoch": 1.4132426066474744, | |
| "grad_norm": 1301.2684317901687, | |
| "learning_rate": 4.779698091854098e-07, | |
| "logits/chosen": -1.4362276792526245, | |
| "logits/rejected": -1.2898997068405151, | |
| "logps/chosen": -196.05447387695312, | |
| "logps/rejected": -400.25994873046875, | |
| "loss": 193.0132, | |
| "losses_ref": -0.4112131595611572, | |
| "ref_logps/chosen": -306.9317321777344, | |
| "ref_logps/rejected": -169.54879760742188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1087725162506104, | |
| "rewards/margins": 3.415884494781494, | |
| "rewards/rejected": -2.307111978530884, | |
| "step": 675, | |
| "u": -3.2834739685058594, | |
| "weight": 0.006595195736736059 | |
| }, | |
| { | |
| "diff_generated": -205.662841796875, | |
| "epoch": 1.4237110704004188, | |
| "grad_norm": 1344.3331479958706, | |
| "learning_rate": 4.624430747529102e-07, | |
| "logits/chosen": -1.3598095178604126, | |
| "logits/rejected": -1.158661961555481, | |
| "logps/chosen": -205.39236450195312, | |
| "logps/rejected": -369.7188720703125, | |
| "loss": 181.4401, | |
| "losses_ref": -1.5265072584152222, | |
| "ref_logps/chosen": -313.2685546875, | |
| "ref_logps/rejected": -164.05599975585938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0787618160247803, | |
| "rewards/margins": 3.135390520095825, | |
| "rewards/rejected": -2.056628465652466, | |
| "step": 680, | |
| "u": -3.1280694007873535, | |
| "weight": 0.024629075080156326 | |
| }, | |
| { | |
| "diff_generated": -223.96005249023438, | |
| "epoch": 1.434179534153363, | |
| "grad_norm": 1420.0899808408303, | |
| "learning_rate": 4.4709650825889277e-07, | |
| "logits/chosen": -1.202007532119751, | |
| "logits/rejected": -1.1467583179473877, | |
| "logps/chosen": -161.4755859375, | |
| "logps/rejected": -394.6024475097656, | |
| "loss": 181.6898, | |
| "losses_ref": -0.6423639059066772, | |
| "ref_logps/chosen": -258.74224853515625, | |
| "ref_logps/rejected": -170.64236450195312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9726665616035461, | |
| "rewards/margins": 3.2122673988342285, | |
| "rewards/rejected": -2.239600658416748, | |
| "step": 685, | |
| "u": -2.6377460956573486, | |
| "weight": 0.008045530878007412 | |
| }, | |
| { | |
| "diff_generated": -199.37355041503906, | |
| "epoch": 1.4446479979063072, | |
| "grad_norm": 1308.6573761420323, | |
| "learning_rate": 4.3193525326884426e-07, | |
| "logits/chosen": -1.3359885215759277, | |
| "logits/rejected": -1.2320592403411865, | |
| "logps/chosen": -199.9832000732422, | |
| "logps/rejected": -364.55865478515625, | |
| "loss": 197.232, | |
| "losses_ref": -2.2240054607391357, | |
| "ref_logps/chosen": -303.2825927734375, | |
| "ref_logps/rejected": -165.18508911132812, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 1.0329937934875488, | |
| "rewards/margins": 3.026729106903076, | |
| "rewards/rejected": -1.9937355518341064, | |
| "step": 690, | |
| "u": -3.028186559677124, | |
| "weight": 0.02633347176015377 | |
| }, | |
| { | |
| "diff_generated": -224.0160369873047, | |
| "epoch": 1.4551164616592516, | |
| "grad_norm": 1299.079432778448, | |
| "learning_rate": 4.1696439123912406e-07, | |
| "logits/chosen": -1.2223880290985107, | |
| "logits/rejected": -1.209564447402954, | |
| "logps/chosen": -174.464111328125, | |
| "logps/rejected": -393.27691650390625, | |
| "loss": 178.2965, | |
| "losses_ref": -4.651436805725098, | |
| "ref_logps/chosen": -266.0323486328125, | |
| "ref_logps/rejected": -169.26083374023438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.915682315826416, | |
| "rewards/margins": 3.1558427810668945, | |
| "rewards/rejected": -2.2401604652404785, | |
| "step": 695, | |
| "u": -2.1582460403442383, | |
| "weight": 0.050126731395721436 | |
| }, | |
| { | |
| "diff_generated": -229.42153930664062, | |
| "epoch": 1.4655849254121958, | |
| "grad_norm": 1181.3986183050397, | |
| "learning_rate": 4.0218893981385927e-07, | |
| "logits/chosen": -1.2920024394989014, | |
| "logits/rejected": -1.2460237741470337, | |
| "logps/chosen": -169.0710906982422, | |
| "logps/rejected": -389.62451171875, | |
| "loss": 185.8502, | |
| "losses_ref": -2.0431206226348877, | |
| "ref_logps/chosen": -263.98992919921875, | |
| "ref_logps/rejected": -160.20298767089844, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9491885900497437, | |
| "rewards/margins": 3.243403196334839, | |
| "rewards/rejected": -2.294214963912964, | |
| "step": 700, | |
| "u": -2.900634765625, | |
| "weight": 0.029426846653223038 | |
| }, | |
| { | |
| "diff_generated": -238.50405883789062, | |
| "epoch": 1.47605338916514, | |
| "grad_norm": 1395.6758572737517, | |
| "learning_rate": 3.87613851143229e-07, | |
| "logits/chosen": -1.321358323097229, | |
| "logits/rejected": -1.2150487899780273, | |
| "logps/chosen": -193.1901397705078, | |
| "logps/rejected": -408.7565002441406, | |
| "loss": 180.6914, | |
| "losses_ref": -7.425305366516113, | |
| "ref_logps/chosen": -295.8336486816406, | |
| "ref_logps/rejected": -170.25244140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0264348983764648, | |
| "rewards/margins": 3.411475419998169, | |
| "rewards/rejected": -2.385040760040283, | |
| "step": 705, | |
| "u": -2.5025954246520996, | |
| "weight": 0.05010579898953438 | |
| }, | |
| { | |
| "diff_generated": -232.7953643798828, | |
| "epoch": 1.4865218529180844, | |
| "grad_norm": 1298.8055689658759, | |
| "learning_rate": 3.7324401022369744e-07, | |
| "logits/chosen": -1.322563886642456, | |
| "logits/rejected": -1.1327731609344482, | |
| "logps/chosen": -194.57736206054688, | |
| "logps/rejected": -386.1799011230469, | |
| "loss": 178.1232, | |
| "losses_ref": -1.3739917278289795, | |
| "ref_logps/chosen": -296.6303405761719, | |
| "ref_logps/rejected": -153.38453674316406, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0205297470092773, | |
| "rewards/margins": 3.3484835624694824, | |
| "rewards/rejected": -2.327953815460205, | |
| "step": 710, | |
| "u": -3.248492479324341, | |
| "weight": 0.022353414446115494 | |
| }, | |
| { | |
| "diff_generated": -204.0673370361328, | |
| "epoch": 1.4969903166710286, | |
| "grad_norm": 1434.009703095031, | |
| "learning_rate": 3.5908423326075455e-07, | |
| "logits/chosen": -1.2674996852874756, | |
| "logits/rejected": -1.242331862449646, | |
| "logps/chosen": -167.33718872070312, | |
| "logps/rejected": -369.3961486816406, | |
| "loss": 183.2372, | |
| "losses_ref": -1.1576902866363525, | |
| "ref_logps/chosen": -261.9808044433594, | |
| "ref_logps/rejected": -165.32882690429688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9464362263679504, | |
| "rewards/margins": 2.987109661102295, | |
| "rewards/rejected": -2.0406734943389893, | |
| "step": 715, | |
| "u": -2.994257688522339, | |
| "weight": 0.022263679653406143 | |
| }, | |
| { | |
| "diff_generated": -233.936767578125, | |
| "epoch": 1.5074587804239727, | |
| "grad_norm": 1304.2767992641454, | |
| "learning_rate": 3.45139266054715e-07, | |
| "logits/chosen": -1.318178415298462, | |
| "logits/rejected": -1.1334383487701416, | |
| "logps/chosen": -197.61227416992188, | |
| "logps/rejected": -397.12127685546875, | |
| "loss": 183.3899, | |
| "losses_ref": -1.6034200191497803, | |
| "ref_logps/chosen": -309.3571472167969, | |
| "ref_logps/rejected": -163.1844940185547, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1174486875534058, | |
| "rewards/margins": 3.4568161964416504, | |
| "rewards/rejected": -2.3393678665161133, | |
| "step": 720, | |
| "u": -3.3942806720733643, | |
| "weight": 0.019716758280992508 | |
| }, | |
| { | |
| "diff_generated": -244.8833770751953, | |
| "epoch": 1.5179272441769172, | |
| "grad_norm": 1236.5966034907726, | |
| "learning_rate": 3.314137824101111e-07, | |
| "logits/chosen": -1.306779384613037, | |
| "logits/rejected": -1.1290355920791626, | |
| "logps/chosen": -218.06015014648438, | |
| "logps/rejected": -403.7084045410156, | |
| "loss": 191.625, | |
| "losses_ref": -2.257856845855713, | |
| "ref_logps/chosen": -318.39056396484375, | |
| "ref_logps/rejected": -158.82498168945312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0033042430877686, | |
| "rewards/margins": 3.4521377086639404, | |
| "rewards/rejected": -2.448833703994751, | |
| "step": 725, | |
| "u": -3.3267006874084473, | |
| "weight": 0.032559871673583984 | |
| }, | |
| { | |
| "diff_generated": -222.01101684570312, | |
| "epoch": 1.5283957079298613, | |
| "grad_norm": 1211.6699328046157, | |
| "learning_rate": 3.179123825692178e-07, | |
| "logits/chosen": -1.248240351676941, | |
| "logits/rejected": -1.091903805732727, | |
| "logps/chosen": -175.27281188964844, | |
| "logps/rejected": -383.36309814453125, | |
| "loss": 173.3922, | |
| "losses_ref": -5.464686393737793, | |
| "ref_logps/chosen": -273.9178771972656, | |
| "ref_logps/rejected": -161.35206604003906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9864505529403687, | |
| "rewards/margins": 3.2065606117248535, | |
| "rewards/rejected": -2.2201101779937744, | |
| "step": 730, | |
| "u": -2.946007013320923, | |
| "weight": 0.04170671105384827 | |
| }, | |
| { | |
| "diff_generated": -220.7677764892578, | |
| "epoch": 1.5388641716828055, | |
| "grad_norm": 1300.7738080642184, | |
| "learning_rate": 3.0463959167023335e-07, | |
| "logits/chosen": -1.2869834899902344, | |
| "logits/rejected": -1.1894266605377197, | |
| "logps/chosen": -182.8350372314453, | |
| "logps/rejected": -379.5199890136719, | |
| "loss": 171.4061, | |
| "losses_ref": -4.5947465896606445, | |
| "ref_logps/chosen": -284.21063232421875, | |
| "ref_logps/rejected": -158.75221252441406, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0137560367584229, | |
| "rewards/margins": 3.2214341163635254, | |
| "rewards/rejected": -2.2076778411865234, | |
| "step": 735, | |
| "u": -2.485495090484619, | |
| "weight": 0.053016532212495804 | |
| }, | |
| { | |
| "diff_generated": -238.62289428710938, | |
| "epoch": 1.54933263543575, | |
| "grad_norm": 1330.9443663432232, | |
| "learning_rate": 2.915998582306299e-07, | |
| "logits/chosen": -1.3296325206756592, | |
| "logits/rejected": -1.1434093713760376, | |
| "logps/chosen": -192.86752319335938, | |
| "logps/rejected": -412.7796325683594, | |
| "loss": 171.9964, | |
| "losses_ref": -0.9953049421310425, | |
| "ref_logps/chosen": -298.61065673828125, | |
| "ref_logps/rejected": -174.15672302246094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0574313402175903, | |
| "rewards/margins": 3.443660259246826, | |
| "rewards/rejected": -2.3862290382385254, | |
| "step": 740, | |
| "u": -3.0645031929016113, | |
| "weight": 0.014706036075949669 | |
| }, | |
| { | |
| "diff_generated": -232.26016235351562, | |
| "epoch": 1.559801099188694, | |
| "grad_norm": 1284.1954470666844, | |
| "learning_rate": 2.7879755265618557e-07, | |
| "logits/chosen": -1.1518179178237915, | |
| "logits/rejected": -1.1568098068237305, | |
| "logps/chosen": -160.57080078125, | |
| "logps/rejected": -390.94854736328125, | |
| "loss": 177.5848, | |
| "losses_ref": -0.8798303604125977, | |
| "ref_logps/chosen": -254.80648803710938, | |
| "ref_logps/rejected": -158.68838500976562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9423569440841675, | |
| "rewards/margins": 3.264958620071411, | |
| "rewards/rejected": -2.322601556777954, | |
| "step": 745, | |
| "u": -2.921161413192749, | |
| "weight": 0.0170670785009861 | |
| }, | |
| { | |
| "diff_generated": -233.20187377929688, | |
| "epoch": 1.5702695629416383, | |
| "grad_norm": 1289.4633991370238, | |
| "learning_rate": 2.6623696577619625e-07, | |
| "logits/chosen": -1.2346287965774536, | |
| "logits/rejected": -1.2745471000671387, | |
| "logps/chosen": -192.0581512451172, | |
| "logps/rejected": -391.81146240234375, | |
| "loss": 182.0347, | |
| "losses_ref": -1.6879841089248657, | |
| "ref_logps/chosen": -290.81500244140625, | |
| "ref_logps/rejected": -158.6095428466797, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9875686764717102, | |
| "rewards/margins": 3.3195877075195312, | |
| "rewards/rejected": -2.3320186138153076, | |
| "step": 750, | |
| "u": -3.0655760765075684, | |
| "weight": 0.02230766788125038 | |
| }, | |
| { | |
| "diff_generated": -217.38137817382812, | |
| "epoch": 1.5807380266945825, | |
| "grad_norm": 1443.1241349727247, | |
| "learning_rate": 2.5392230740535846e-07, | |
| "logits/chosen": -1.4136921167373657, | |
| "logits/rejected": -1.14936363697052, | |
| "logps/chosen": -205.6985321044922, | |
| "logps/rejected": -384.5464782714844, | |
| "loss": 193.6084, | |
| "losses_ref": -2.2517495155334473, | |
| "ref_logps/chosen": -317.1262512207031, | |
| "ref_logps/rejected": -167.1651153564453, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1142771244049072, | |
| "rewards/margins": 3.288090467453003, | |
| "rewards/rejected": -2.173813581466675, | |
| "step": 755, | |
| "u": -2.5095248222351074, | |
| "weight": 0.04148329049348831 | |
| }, | |
| { | |
| "diff_generated": -223.0001220703125, | |
| "epoch": 1.5912064904475267, | |
| "grad_norm": 1315.9291386894508, | |
| "learning_rate": 2.418577049328058e-07, | |
| "logits/chosen": -1.6086959838867188, | |
| "logits/rejected": -1.2083603143692017, | |
| "logps/chosen": -214.2064666748047, | |
| "logps/rejected": -383.09918212890625, | |
| "loss": 193.4667, | |
| "losses_ref": -0.697050929069519, | |
| "ref_logps/chosen": -327.52081298828125, | |
| "ref_logps/rejected": -160.09909057617188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1331430673599243, | |
| "rewards/margins": 3.3631443977355957, | |
| "rewards/rejected": -2.2300009727478027, | |
| "step": 760, | |
| "u": -3.4212822914123535, | |
| "weight": 0.02071799524128437 | |
| }, | |
| { | |
| "diff_generated": -240.10708618164062, | |
| "epoch": 1.6016749542004711, | |
| "grad_norm": 1354.8278973512276, | |
| "learning_rate": 2.300472019387697e-07, | |
| "logits/chosen": -1.3740001916885376, | |
| "logits/rejected": -1.2972242832183838, | |
| "logps/chosen": -184.8181915283203, | |
| "logps/rejected": -400.69439697265625, | |
| "loss": 183.1763, | |
| "losses_ref": -5.5122270584106445, | |
| "ref_logps/chosen": -284.5431213378906, | |
| "ref_logps/rejected": -160.58731079101562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9972493052482605, | |
| "rewards/margins": 3.398320436477661, | |
| "rewards/rejected": -2.4010708332061768, | |
| "step": 765, | |
| "u": -2.9249844551086426, | |
| "weight": 0.04190880060195923 | |
| }, | |
| { | |
| "diff_generated": -224.24789428710938, | |
| "epoch": 1.6121434179534153, | |
| "grad_norm": 1294.896383811541, | |
| "learning_rate": 2.1849475683932994e-07, | |
| "logits/chosen": -1.3714028596878052, | |
| "logits/rejected": -1.3127011060714722, | |
| "logps/chosen": -184.06544494628906, | |
| "logps/rejected": -384.2123107910156, | |
| "loss": 179.5198, | |
| "losses_ref": -3.6349315643310547, | |
| "ref_logps/chosen": -284.44268798828125, | |
| "ref_logps/rejected": -159.9644012451172, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.003772497177124, | |
| "rewards/margins": 3.246250867843628, | |
| "rewards/rejected": -2.242478847503662, | |
| "step": 770, | |
| "u": -2.7088732719421387, | |
| "weight": 0.04078099876642227 | |
| }, | |
| { | |
| "diff_generated": -228.37911987304688, | |
| "epoch": 1.6226118817063595, | |
| "grad_norm": 1315.1478573927377, | |
| "learning_rate": 2.0720424155971038e-07, | |
| "logits/chosen": -1.4367603063583374, | |
| "logits/rejected": -1.2870023250579834, | |
| "logps/chosen": -201.5555877685547, | |
| "logps/rejected": -386.1324157714844, | |
| "loss": 176.5013, | |
| "losses_ref": -2.8903164863586426, | |
| "ref_logps/chosen": -306.54461669921875, | |
| "ref_logps/rejected": -157.75328063964844, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0498902797698975, | |
| "rewards/margins": 3.333681583404541, | |
| "rewards/rejected": -2.2837913036346436, | |
| "step": 775, | |
| "u": -2.703965902328491, | |
| "weight": 0.04053039103746414 | |
| }, | |
| { | |
| "diff_generated": -220.4508819580078, | |
| "epoch": 1.633080345459304, | |
| "grad_norm": 1400.50539955428, | |
| "learning_rate": 1.961794402365611e-07, | |
| "logits/chosen": -1.4036462306976318, | |
| "logits/rejected": -1.2919548749923706, | |
| "logps/chosen": -200.26541137695312, | |
| "logps/rejected": -386.81597900390625, | |
| "loss": 183.6931, | |
| "losses_ref": -1.8775193691253662, | |
| "ref_logps/chosen": -310.53729248046875, | |
| "ref_logps/rejected": -166.36508178710938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1027185916900635, | |
| "rewards/margins": 3.307227373123169, | |
| "rewards/rejected": -2.2045087814331055, | |
| "step": 780, | |
| "u": -2.835704803466797, | |
| "weight": 0.031143631786108017 | |
| }, | |
| { | |
| "diff_generated": -217.3331298828125, | |
| "epoch": 1.643548809212248, | |
| "grad_norm": 1301.0844819616188, | |
| "learning_rate": 1.8542404794966427e-07, | |
| "logits/chosen": -1.4641870260238647, | |
| "logits/rejected": -1.3147245645523071, | |
| "logps/chosen": -196.31103515625, | |
| "logps/rejected": -391.39166259765625, | |
| "loss": 178.2437, | |
| "losses_ref": -1.2605804204940796, | |
| "ref_logps/chosen": -303.4082336425781, | |
| "ref_logps/rejected": -174.0585479736328, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.070972204208374, | |
| "rewards/margins": 3.2443034648895264, | |
| "rewards/rejected": -2.1733312606811523, | |
| "step": 785, | |
| "u": -2.590919017791748, | |
| "weight": 0.01949651725590229 | |
| }, | |
| { | |
| "diff_generated": -220.4461669921875, | |
| "epoch": 1.6540172729651923, | |
| "grad_norm": 1297.8083100097251, | |
| "learning_rate": 1.7494166948349053e-07, | |
| "logits/chosen": -1.3500601053237915, | |
| "logits/rejected": -1.411941409111023, | |
| "logps/chosen": -159.91616821289062, | |
| "logps/rejected": -383.6579284667969, | |
| "loss": 166.2805, | |
| "losses_ref": -1.1791235208511353, | |
| "ref_logps/chosen": -257.8923034667969, | |
| "ref_logps/rejected": -163.21176147460938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9797613024711609, | |
| "rewards/margins": 3.184222936630249, | |
| "rewards/rejected": -2.2044615745544434, | |
| "step": 790, | |
| "u": -3.492673873901367, | |
| "weight": 0.020172851160168648 | |
| }, | |
| { | |
| "diff_generated": -218.501708984375, | |
| "epoch": 1.6644857367181367, | |
| "grad_norm": 1305.6902286203212, | |
| "learning_rate": 1.6473581811901528e-07, | |
| "logits/chosen": -1.3759443759918213, | |
| "logits/rejected": -1.3116881847381592, | |
| "logps/chosen": -175.59524536132812, | |
| "logps/rejected": -386.4131774902344, | |
| "loss": 166.0248, | |
| "losses_ref": -0.9349870681762695, | |
| "ref_logps/chosen": -275.24603271484375, | |
| "ref_logps/rejected": -167.91146850585938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9965084791183472, | |
| "rewards/margins": 3.181525468826294, | |
| "rewards/rejected": -2.1850171089172363, | |
| "step": 795, | |
| "u": -3.1951217651367188, | |
| "weight": 0.007377298083156347 | |
| }, | |
| { | |
| "diff_generated": -227.8848114013672, | |
| "epoch": 1.674954200471081, | |
| "grad_norm": 1377.3447203192195, | |
| "learning_rate": 1.5480991445620538e-07, | |
| "logits/chosen": -1.3294823169708252, | |
| "logits/rejected": -1.3292287588119507, | |
| "logps/chosen": -171.1267852783203, | |
| "logps/rejected": -383.339111328125, | |
| "loss": 179.9502, | |
| "losses_ref": -1.4068111181259155, | |
| "ref_logps/chosen": -269.3274841308594, | |
| "ref_logps/rejected": -155.45433044433594, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9820070266723633, | |
| "rewards/margins": 3.260855197906494, | |
| "rewards/rejected": -2.278848171234131, | |
| "step": 800, | |
| "u": -3.075801134109497, | |
| "weight": 0.02200758084654808 | |
| }, | |
| { | |
| "diff_generated": -223.35498046875, | |
| "epoch": 1.685422664224025, | |
| "grad_norm": 1269.6278028028526, | |
| "learning_rate": 1.4516728526756873e-07, | |
| "logits/chosen": -1.4065078496932983, | |
| "logits/rejected": -1.2835044860839844, | |
| "logps/chosen": -182.1883544921875, | |
| "logps/rejected": -374.7066650390625, | |
| "loss": 186.9203, | |
| "losses_ref": -2.037257671356201, | |
| "ref_logps/chosen": -276.4019470214844, | |
| "ref_logps/rejected": -151.35165405273438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9421361684799194, | |
| "rewards/margins": 3.1756858825683594, | |
| "rewards/rejected": -2.2335495948791504, | |
| "step": 805, | |
| "u": -2.378087282180786, | |
| "weight": 0.04411940649151802 | |
| }, | |
| { | |
| "diff_generated": -216.16616821289062, | |
| "epoch": 1.6958911279769695, | |
| "grad_norm": 1463.8714206417467, | |
| "learning_rate": 1.3581116238315194e-07, | |
| "logits/chosen": -1.4423078298568726, | |
| "logits/rejected": -1.3139569759368896, | |
| "logps/chosen": -205.9932098388672, | |
| "logps/rejected": -375.70849609375, | |
| "loss": 190.2176, | |
| "losses_ref": -1.2827723026275635, | |
| "ref_logps/chosen": -311.7004699707031, | |
| "ref_logps/rejected": -159.54234313964844, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": 1.057072639465332, | |
| "rewards/margins": 3.2187340259552, | |
| "rewards/rejected": -2.161661386489868, | |
| "step": 810, | |
| "u": -2.773268938064575, | |
| "weight": 0.022295668721199036 | |
| }, | |
| { | |
| "diff_generated": -213.4055633544922, | |
| "epoch": 1.7063595917299135, | |
| "grad_norm": 1352.148808645479, | |
| "learning_rate": 1.2674468160735586e-07, | |
| "logits/chosen": -1.4077790975570679, | |
| "logits/rejected": -1.3166415691375732, | |
| "logps/chosen": -177.383544921875, | |
| "logps/rejected": -373.1116027832031, | |
| "loss": 179.0974, | |
| "losses_ref": -3.5087268352508545, | |
| "ref_logps/chosen": -279.3453674316406, | |
| "ref_logps/rejected": -159.70603942871094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.019618034362793, | |
| "rewards/margins": 3.1536736488342285, | |
| "rewards/rejected": -2.1340556144714355, | |
| "step": 815, | |
| "u": -2.615370512008667, | |
| "weight": 0.0508296899497509 | |
| }, | |
| { | |
| "diff_generated": -237.0354766845703, | |
| "epoch": 1.7168280554828579, | |
| "grad_norm": 1326.9582054025304, | |
| "learning_rate": 1.1797088166794e-07, | |
| "logits/chosen": -1.328039288520813, | |
| "logits/rejected": -1.2903969287872314, | |
| "logps/chosen": -176.13819885253906, | |
| "logps/rejected": -401.98748779296875, | |
| "loss": 179.7547, | |
| "losses_ref": -0.005425100214779377, | |
| "ref_logps/chosen": -275.8017883300781, | |
| "ref_logps/rejected": -164.95204162597656, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9966354370117188, | |
| "rewards/margins": 3.366990327835083, | |
| "rewards/rejected": -2.370354652404785, | |
| "step": 820, | |
| "u": -2.696533679962158, | |
| "weight": 3.794050280703232e-05 | |
| }, | |
| { | |
| "diff_generated": -219.973876953125, | |
| "epoch": 1.7272965192358023, | |
| "grad_norm": 1183.8442331623387, | |
| "learning_rate": 1.0949270319755766e-07, | |
| "logits/chosen": -1.3806655406951904, | |
| "logits/rejected": -1.337877631187439, | |
| "logps/chosen": -167.13290405273438, | |
| "logps/rejected": -381.6925048828125, | |
| "loss": 173.9734, | |
| "losses_ref": -2.8696396350860596, | |
| "ref_logps/chosen": -262.41363525390625, | |
| "ref_logps/rejected": -161.7186279296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9528074264526367, | |
| "rewards/margins": 3.1525461673736572, | |
| "rewards/rejected": -2.1997389793395996, | |
| "step": 825, | |
| "u": -2.7405786514282227, | |
| "weight": 0.02941594459116459 | |
| }, | |
| { | |
| "diff_generated": -211.63803100585938, | |
| "epoch": 1.7377649829887463, | |
| "grad_norm": 1227.5586469104078, | |
| "learning_rate": 1.013129877481741e-07, | |
| "logits/chosen": -1.3626017570495605, | |
| "logits/rejected": -1.199372410774231, | |
| "logps/chosen": -211.2673797607422, | |
| "logps/rejected": -382.85003662109375, | |
| "loss": 185.7144, | |
| "losses_ref": -5.601190090179443, | |
| "ref_logps/chosen": -318.6112060546875, | |
| "ref_logps/rejected": -171.2120361328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0734381675720215, | |
| "rewards/margins": 3.1898186206817627, | |
| "rewards/rejected": -2.116380214691162, | |
| "step": 830, | |
| "u": -2.8598952293395996, | |
| "weight": 0.029378216713666916 | |
| }, | |
| { | |
| "diff_generated": -230.986328125, | |
| "epoch": 1.7482334467416907, | |
| "grad_norm": 1290.8717428712873, | |
| "learning_rate": 9.343447683868799e-08, | |
| "logits/chosen": -1.2116000652313232, | |
| "logits/rejected": -1.2751588821411133, | |
| "logps/chosen": -169.79380798339844, | |
| "logps/rejected": -394.716064453125, | |
| "loss": 178.4699, | |
| "losses_ref": -0.9493634104728699, | |
| "ref_logps/chosen": -262.22894287109375, | |
| "ref_logps/rejected": -163.72976684570312, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9243512153625488, | |
| "rewards/margins": 3.2342143058776855, | |
| "rewards/rejected": -2.309863328933716, | |
| "step": 835, | |
| "u": -2.9001994132995605, | |
| "weight": 0.008882230147719383 | |
| }, | |
| { | |
| "diff_generated": -216.50314331054688, | |
| "epoch": 1.7587019104946349, | |
| "grad_norm": 1335.6008361033998, | |
| "learning_rate": 8.585981103608342e-08, | |
| "logits/chosen": -1.3362239599227905, | |
| "logits/rejected": -1.1397970914840698, | |
| "logps/chosen": -206.77511596679688, | |
| "logps/rejected": -389.9546203613281, | |
| "loss": 191.1818, | |
| "losses_ref": -0.28884872794151306, | |
| "ref_logps/chosen": -316.14837646484375, | |
| "ref_logps/rejected": -173.45150756835938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0937325954437256, | |
| "rewards/margins": 3.2587637901306152, | |
| "rewards/rejected": -2.1650314331054688, | |
| "step": 840, | |
| "u": -3.1953749656677246, | |
| "weight": 0.004329306539148092 | |
| }, | |
| { | |
| "diff_generated": -202.93106079101562, | |
| "epoch": 1.769170374247579, | |
| "grad_norm": 1266.3673749218208, | |
| "learning_rate": 7.859152907041544e-08, | |
| "logits/chosen": -1.354994773864746, | |
| "logits/rejected": -1.1393146514892578, | |
| "logps/chosen": -199.24710083007812, | |
| "logps/rejected": -360.2781677246094, | |
| "loss": 176.0576, | |
| "losses_ref": -1.7230793237686157, | |
| "ref_logps/chosen": -305.8094177246094, | |
| "ref_logps/rejected": -157.3471221923828, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.06562340259552, | |
| "rewards/margins": 3.0949339866638184, | |
| "rewards/rejected": -2.029310464859009, | |
| "step": 845, | |
| "u": -2.745694160461426, | |
| "weight": 0.0367230661213398 | |
| }, | |
| { | |
| "diff_generated": -211.84765625, | |
| "epoch": 1.7796388380005235, | |
| "grad_norm": 1302.5644299154628, | |
| "learning_rate": 7.163206698392742e-08, | |
| "logits/chosen": -1.2949212789535522, | |
| "logits/rejected": -1.1885995864868164, | |
| "logps/chosen": -185.09088134765625, | |
| "logps/rejected": -367.0600891113281, | |
| "loss": 184.3042, | |
| "losses_ref": -3.0929312705993652, | |
| "ref_logps/chosen": -285.8619384765625, | |
| "ref_logps/rejected": -155.21240234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0077106952667236, | |
| "rewards/margins": 3.1261868476867676, | |
| "rewards/rejected": -2.118476629257202, | |
| "step": 850, | |
| "u": -3.2007651329040527, | |
| "weight": 0.03339768201112747 | |
| }, | |
| { | |
| "diff_generated": -209.816650390625, | |
| "epoch": 1.7901073017534677, | |
| "grad_norm": 1345.4005628593675, | |
| "learning_rate": 6.498375731458527e-08, | |
| "logits/chosen": -1.4427772760391235, | |
| "logits/rejected": -1.2521936893463135, | |
| "logps/chosen": -190.9636688232422, | |
| "logps/rejected": -376.20391845703125, | |
| "loss": 177.6342, | |
| "losses_ref": -2.1869561672210693, | |
| "ref_logps/chosen": -298.1745300292969, | |
| "ref_logps/rejected": -166.38723754882812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.072108507156372, | |
| "rewards/margins": 3.1702747344970703, | |
| "rewards/rejected": -2.098165988922119, | |
| "step": 855, | |
| "u": -3.108565330505371, | |
| "weight": 0.026614084839820862 | |
| }, | |
| { | |
| "diff_generated": -229.3601837158203, | |
| "epoch": 1.8005757655064119, | |
| "grad_norm": 1261.2192787306672, | |
| "learning_rate": 5.8648828314302735e-08, | |
| "logits/chosen": -1.3119590282440186, | |
| "logits/rejected": -1.1316639184951782, | |
| "logps/chosen": -186.41650390625, | |
| "logps/rejected": -386.34906005859375, | |
| "loss": 176.3818, | |
| "losses_ref": -2.3852286338806152, | |
| "ref_logps/chosen": -289.1662292480469, | |
| "ref_logps/rejected": -156.98886108398438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0274972915649414, | |
| "rewards/margins": 3.3210995197296143, | |
| "rewards/rejected": -2.2936015129089355, | |
| "step": 860, | |
| "u": -2.8182337284088135, | |
| "weight": 0.03502316027879715 | |
| }, | |
| { | |
| "diff_generated": -210.816650390625, | |
| "epoch": 1.8110442292593563, | |
| "grad_norm": 1269.4818113722586, | |
| "learning_rate": 5.2629403202119505e-08, | |
| "logits/chosen": -1.2412734031677246, | |
| "logits/rejected": -1.227634072303772, | |
| "logps/chosen": -173.3083953857422, | |
| "logps/rejected": -375.7776184082031, | |
| "loss": 171.0543, | |
| "losses_ref": -0.6853199005126953, | |
| "ref_logps/chosen": -271.1694030761719, | |
| "ref_logps/rejected": -164.96096801757812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9786099195480347, | |
| "rewards/margins": 3.0867760181427, | |
| "rewards/rejected": -2.108166217803955, | |
| "step": 865, | |
| "u": -3.394763231277466, | |
| "weight": 0.010494846850633621 | |
| }, | |
| { | |
| "diff_generated": -226.4854736328125, | |
| "epoch": 1.8215126930123005, | |
| "grad_norm": 1268.655880675009, | |
| "learning_rate": 4.692749945258057e-08, | |
| "logits/chosen": -1.3430616855621338, | |
| "logits/rejected": -1.1744420528411865, | |
| "logps/chosen": -195.01589965820312, | |
| "logps/rejected": -389.9505920410156, | |
| "loss": 186.5683, | |
| "losses_ref": -3.434800624847412, | |
| "ref_logps/chosen": -299.091796875, | |
| "ref_logps/rejected": -163.46514892578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0407590866088867, | |
| "rewards/margins": 3.3056137561798096, | |
| "rewards/rejected": -2.2648544311523438, | |
| "step": 870, | |
| "u": -2.5695509910583496, | |
| "weight": 0.046660859137773514 | |
| }, | |
| { | |
| "diff_generated": -236.5417022705078, | |
| "epoch": 1.8319811567652446, | |
| "grad_norm": 1204.436962297953, | |
| "learning_rate": 4.1545028119559066e-08, | |
| "logits/chosen": -1.3133630752563477, | |
| "logits/rejected": -1.3207045793533325, | |
| "logps/chosen": -190.3129425048828, | |
| "logps/rejected": -398.3179931640625, | |
| "loss": 171.6172, | |
| "losses_ref": -1.358794927597046, | |
| "ref_logps/chosen": -287.73504638671875, | |
| "ref_logps/rejected": -161.77627563476562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.974220871925354, | |
| "rewards/margins": 3.339637279510498, | |
| "rewards/rejected": -2.365417003631592, | |
| "step": 875, | |
| "u": -2.7414660453796387, | |
| "weight": 0.022162286564707756 | |
| }, | |
| { | |
| "diff_generated": -210.89306640625, | |
| "epoch": 1.842449620518189, | |
| "grad_norm": 1230.1188284044147, | |
| "learning_rate": 3.648379319574568e-08, | |
| "logits/chosen": -1.383299708366394, | |
| "logits/rejected": -1.3287036418914795, | |
| "logps/chosen": -190.19691467285156, | |
| "logps/rejected": -363.8573913574219, | |
| "loss": 168.7976, | |
| "losses_ref": -4.205277442932129, | |
| "ref_logps/chosen": -291.4452819824219, | |
| "ref_logps/rejected": -152.96432495117188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0124839544296265, | |
| "rewards/margins": 3.1214146614074707, | |
| "rewards/rejected": -2.1089303493499756, | |
| "step": 880, | |
| "u": -2.69191312789917, | |
| "weight": 0.03942141681909561 | |
| }, | |
| { | |
| "diff_generated": -224.0536346435547, | |
| "epoch": 1.8529180842711332, | |
| "grad_norm": 1317.8866979194424, | |
| "learning_rate": 3.17454910080216e-08, | |
| "logits/chosen": -1.387369155883789, | |
| "logits/rejected": -1.256730318069458, | |
| "logps/chosen": -213.5888671875, | |
| "logps/rejected": -388.00115966796875, | |
| "loss": 200.2688, | |
| "losses_ref": -0.6602109670639038, | |
| "ref_logps/chosen": -319.39569091796875, | |
| "ref_logps/rejected": -163.94747924804688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.058068037033081, | |
| "rewards/margins": 3.2986044883728027, | |
| "rewards/rejected": -2.2405362129211426, | |
| "step": 885, | |
| "u": -2.7355685234069824, | |
| "weight": 0.029411468654870987 | |
| }, | |
| { | |
| "diff_generated": -221.8759765625, | |
| "epoch": 1.8633865480240774, | |
| "grad_norm": 1306.8555947562052, | |
| "learning_rate": 2.733170964891607e-08, | |
| "logits/chosen": -1.3195066452026367, | |
| "logits/rejected": -1.2867323160171509, | |
| "logps/chosen": -170.53369140625, | |
| "logps/rejected": -378.52935791015625, | |
| "loss": 174.36, | |
| "losses_ref": -0.899361252784729, | |
| "ref_logps/chosen": -274.72943115234375, | |
| "ref_logps/rejected": -156.65335083007812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0419572591781616, | |
| "rewards/margins": 3.2607169151306152, | |
| "rewards/rejected": -2.218759775161743, | |
| "step": 890, | |
| "u": -3.3832144737243652, | |
| "weight": 0.008660494349896908 | |
| }, | |
| { | |
| "diff_generated": -214.10165405273438, | |
| "epoch": 1.8738550117770219, | |
| "grad_norm": 1275.7234308585855, | |
| "learning_rate": 2.324392844434042e-08, | |
| "logits/chosen": -1.3565282821655273, | |
| "logits/rejected": -1.344678282737732, | |
| "logps/chosen": -192.53738403320312, | |
| "logps/rejected": -390.2491149902344, | |
| "loss": 191.1614, | |
| "losses_ref": -2.9138152599334717, | |
| "ref_logps/chosen": -295.70458984375, | |
| "ref_logps/rejected": -176.14747619628906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0316721200942993, | |
| "rewards/margins": 3.1726887226104736, | |
| "rewards/rejected": -2.141016721725464, | |
| "step": 895, | |
| "u": -3.1400444507598877, | |
| "weight": 0.02205641008913517 | |
| }, | |
| { | |
| "diff_generated": -221.6460418701172, | |
| "epoch": 1.8843234755299658, | |
| "grad_norm": 1242.4363921596732, | |
| "learning_rate": 1.9483517457776434e-08, | |
| "logits/chosen": -1.1762725114822388, | |
| "logits/rejected": -1.3724615573883057, | |
| "logps/chosen": -159.86691284179688, | |
| "logps/rejected": -381.15081787109375, | |
| "loss": 172.6295, | |
| "losses_ref": -4.887435436248779, | |
| "ref_logps/chosen": -252.33743286132812, | |
| "ref_logps/rejected": -159.50479125976562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9247050285339355, | |
| "rewards/margins": 3.1411654949188232, | |
| "rewards/rejected": -2.216460704803467, | |
| "step": 900, | |
| "u": -2.2665815353393555, | |
| "weight": 0.07192285358905792 | |
| }, | |
| { | |
| "diff_generated": -227.669189453125, | |
| "epoch": 1.8947919392829102, | |
| "grad_norm": 1323.6799372011517, | |
| "learning_rate": 1.6051737031084533e-08, | |
| "logits/chosen": -1.2494432926177979, | |
| "logits/rejected": -1.1595919132232666, | |
| "logps/chosen": -175.1837921142578, | |
| "logps/rejected": -384.48175048828125, | |
| "loss": 174.3896, | |
| "losses_ref": -1.007882833480835, | |
| "ref_logps/chosen": -276.83319091796875, | |
| "ref_logps/rejected": -156.81253051757812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0164941549301147, | |
| "rewards/margins": 3.2931861877441406, | |
| "rewards/rejected": -2.2766921520233154, | |
| "step": 905, | |
| "u": -3.0982091426849365, | |
| "weight": 0.01854753866791725 | |
| }, | |
| { | |
| "diff_generated": -222.00784301757812, | |
| "epoch": 1.9052604030358546, | |
| "grad_norm": 1353.0501425434295, | |
| "learning_rate": 1.2949737362087154e-08, | |
| "logits/chosen": -1.222752332687378, | |
| "logits/rejected": -1.265421986579895, | |
| "logps/chosen": -173.27577209472656, | |
| "logps/rejected": -388.85797119140625, | |
| "loss": 174.8498, | |
| "losses_ref": -6.1348981857299805, | |
| "ref_logps/chosen": -269.9849853515625, | |
| "ref_logps/rejected": -166.85018920898438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9670922160148621, | |
| "rewards/margins": 3.1871705055236816, | |
| "rewards/rejected": -2.220078229904175, | |
| "step": 910, | |
| "u": -2.5107998847961426, | |
| "weight": 0.06687295436859131 | |
| }, | |
| { | |
| "diff_generated": -211.4918975830078, | |
| "epoch": 1.9157288667887986, | |
| "grad_norm": 1286.3307044665144, | |
| "learning_rate": 1.0178558119067315e-08, | |
| "logits/chosen": -1.2266263961791992, | |
| "logits/rejected": -1.0511000156402588, | |
| "logps/chosen": -177.09149169921875, | |
| "logps/rejected": -372.6114807128906, | |
| "loss": 175.9135, | |
| "losses_ref": -0.7255733609199524, | |
| "ref_logps/chosen": -277.30194091796875, | |
| "ref_logps/rejected": -161.1195831298828, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0021045207977295, | |
| "rewards/margins": 3.11702299118042, | |
| "rewards/rejected": -2.1149187088012695, | |
| "step": 915, | |
| "u": -3.0817387104034424, | |
| "weight": 0.014788592234253883 | |
| }, | |
| { | |
| "diff_generated": -220.1043701171875, | |
| "epoch": 1.926197330541743, | |
| "grad_norm": 1287.429219240266, | |
| "learning_rate": 7.739128092312918e-09, | |
| "logits/chosen": -1.3375459909439087, | |
| "logits/rejected": -1.274279236793518, | |
| "logps/chosen": -181.00665283203125, | |
| "logps/rejected": -377.59088134765625, | |
| "loss": 171.8915, | |
| "losses_ref": -1.6772384643554688, | |
| "ref_logps/chosen": -280.682861328125, | |
| "ref_logps/rejected": -157.4865264892578, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9967617988586426, | |
| "rewards/margins": 3.197805881500244, | |
| "rewards/rejected": -2.2010436058044434, | |
| "step": 920, | |
| "u": -2.880985736846924, | |
| "weight": 0.036544255912303925 | |
| }, | |
| { | |
| "diff_generated": -222.374755859375, | |
| "epoch": 1.9366657942946872, | |
| "grad_norm": 1348.176434591513, | |
| "learning_rate": 5.632264882822757e-09, | |
| "logits/chosen": -1.3248652219772339, | |
| "logits/rejected": -1.2289717197418213, | |
| "logps/chosen": -187.19947814941406, | |
| "logps/rejected": -382.12860107421875, | |
| "loss": 186.23, | |
| "losses_ref": -2.8856143951416016, | |
| "ref_logps/chosen": -288.6744079589844, | |
| "ref_logps/rejected": -159.7538299560547, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0147496461868286, | |
| "rewards/margins": 3.238497257232666, | |
| "rewards/rejected": -2.223747730255127, | |
| "step": 925, | |
| "u": -2.8917412757873535, | |
| "weight": 0.024965789169073105 | |
| }, | |
| { | |
| "diff_generated": -213.1198272705078, | |
| "epoch": 1.9471342580476314, | |
| "grad_norm": 1395.889446013467, | |
| "learning_rate": 3.858674628278824e-09, | |
| "logits/chosen": -1.366350531578064, | |
| "logits/rejected": -1.119940996170044, | |
| "logps/chosen": -188.4399871826172, | |
| "logps/rejected": -371.8886413574219, | |
| "loss": 182.8113, | |
| "losses_ref": -5.109557151794434, | |
| "ref_logps/chosen": -294.67010498046875, | |
| "ref_logps/rejected": -158.7688446044922, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0623013973236084, | |
| "rewards/margins": 3.1934995651245117, | |
| "rewards/rejected": -2.1311981678009033, | |
| "step": 930, | |
| "u": -2.426971435546875, | |
| "weight": 0.053149282932281494 | |
| }, | |
| { | |
| "diff_generated": -237.7774200439453, | |
| "epoch": 1.9576027218005758, | |
| "grad_norm": 1267.4031070589224, | |
| "learning_rate": 2.418951766376742e-09, | |
| "logits/chosen": -1.2219622135162354, | |
| "logits/rejected": -1.2400046586990356, | |
| "logps/chosen": -167.6567840576172, | |
| "logps/rejected": -398.30865478515625, | |
| "loss": 180.7217, | |
| "losses_ref": -5.82874059677124, | |
| "ref_logps/chosen": -267.5108947753906, | |
| "ref_logps/rejected": -160.53125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9985405802726746, | |
| "rewards/margins": 3.376314878463745, | |
| "rewards/rejected": -2.377774238586426, | |
| "step": 935, | |
| "u": -2.8524553775787354, | |
| "weight": 0.05332515761256218 | |
| }, | |
| { | |
| "diff_generated": -221.6811981201172, | |
| "epoch": 1.96807118555352, | |
| "grad_norm": 1234.1312151479083, | |
| "learning_rate": 1.313578835593465e-09, | |
| "logits/chosen": -1.3167364597320557, | |
| "logits/rejected": -1.0956764221191406, | |
| "logps/chosen": -202.79949951171875, | |
| "logps/rejected": -389.28814697265625, | |
| "loss": 183.0365, | |
| "losses_ref": -1.295898199081421, | |
| "ref_logps/chosen": -312.72149658203125, | |
| "ref_logps/rejected": -167.60696411132812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0992200374603271, | |
| "rewards/margins": 3.3160319328308105, | |
| "rewards/rejected": -2.2168118953704834, | |
| "step": 940, | |
| "u": -2.668116569519043, | |
| "weight": 0.01472543366253376 | |
| }, | |
| { | |
| "diff_generated": -209.20425415039062, | |
| "epoch": 1.9785396493064642, | |
| "grad_norm": 1327.2259702611773, | |
| "learning_rate": 5.429263134594242e-10, | |
| "logits/chosen": -1.298588514328003, | |
| "logits/rejected": -1.3200442790985107, | |
| "logps/chosen": -177.170654296875, | |
| "logps/rejected": -369.2535705566406, | |
| "loss": 179.5952, | |
| "losses_ref": -4.355043888092041, | |
| "ref_logps/chosen": -273.0224304199219, | |
| "ref_logps/rejected": -160.04933166503906, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9585177302360535, | |
| "rewards/margins": 3.050560474395752, | |
| "rewards/rejected": -2.0920424461364746, | |
| "step": 945, | |
| "u": -2.591240406036377, | |
| "weight": 0.0508696511387825 | |
| }, | |
| { | |
| "diff_generated": -218.85842895507812, | |
| "epoch": 1.9890081130594086, | |
| "grad_norm": 1215.4951947566592, | |
| "learning_rate": 1.0725249238940915e-10, | |
| "logits/chosen": -1.3104689121246338, | |
| "logits/rejected": -1.166074514389038, | |
| "logps/chosen": -190.97283935546875, | |
| "logps/rejected": -377.3951110839844, | |
| "loss": 185.4394, | |
| "losses_ref": -1.4506399631500244, | |
| "ref_logps/chosen": -288.34576416015625, | |
| "ref_logps/rejected": -158.53671264648438, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9737294316291809, | |
| "rewards/margins": 3.162313938140869, | |
| "rewards/rejected": -2.188584089279175, | |
| "step": 950, | |
| "u": -3.3431270122528076, | |
| "weight": 0.014722567982971668 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 954, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |