| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998441639395356, | |
| "eval_steps": 500, | |
| "global_step": 401, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0024933769674302633, | |
| "grad_norm": 133.0, | |
| "learning_rate": 4.375e-08, | |
| "logits/chosen": 0.9333375096321106, | |
| "logits/rejected": 0.8665135502815247, | |
| "logps/chosen": -1.574099063873291, | |
| "logps/rejected": -1.2997534275054932, | |
| "loss": 6.6643, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -15.74099063873291, | |
| "rewards/margins": -2.7434558868408203, | |
| "rewards/rejected": -12.997533798217773, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004986753934860527, | |
| "grad_norm": 78.5, | |
| "learning_rate": 8.75e-08, | |
| "logits/chosen": 1.007162094116211, | |
| "logits/rejected": 0.9319976568222046, | |
| "logps/chosen": -1.5873029232025146, | |
| "logps/rejected": -1.1813093423843384, | |
| "loss": 6.8875, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -15.873027801513672, | |
| "rewards/margins": -4.059934616088867, | |
| "rewards/rejected": -11.813094139099121, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0074801309022907905, | |
| "grad_norm": 145.0, | |
| "learning_rate": 1.3125e-07, | |
| "logits/chosen": 1.015642523765564, | |
| "logits/rejected": 0.8658874034881592, | |
| "logps/chosen": -2.187445640563965, | |
| "logps/rejected": -1.3217400312423706, | |
| "loss": 10.8975, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -21.87445831298828, | |
| "rewards/margins": -8.657057762145996, | |
| "rewards/rejected": -13.217399597167969, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.009973507869721053, | |
| "grad_norm": 80.5, | |
| "learning_rate": 1.75e-07, | |
| "logits/chosen": 1.0409551858901978, | |
| "logits/rejected": 0.9476256966590881, | |
| "logps/chosen": -1.4537204504013062, | |
| "logps/rejected": -1.1356033086776733, | |
| "loss": 5.5006, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -14.537205696105957, | |
| "rewards/margins": -3.181171417236328, | |
| "rewards/rejected": -11.356032371520996, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012466884837151316, | |
| "grad_norm": 58.75, | |
| "learning_rate": 2.1875e-07, | |
| "logits/chosen": 0.9139229655265808, | |
| "logits/rejected": 1.0454109907150269, | |
| "logps/chosen": -1.550325632095337, | |
| "logps/rejected": -1.160091757774353, | |
| "loss": 6.5305, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -15.503257751464844, | |
| "rewards/margins": -3.902338981628418, | |
| "rewards/rejected": -11.60091781616211, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.014960261804581581, | |
| "grad_norm": 123.5, | |
| "learning_rate": 2.625e-07, | |
| "logits/chosen": 0.9995524883270264, | |
| "logits/rejected": 0.9891590476036072, | |
| "logps/chosen": -2.1153974533081055, | |
| "logps/rejected": -1.3793702125549316, | |
| "loss": 10.0926, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -21.153976440429688, | |
| "rewards/margins": -7.360274314880371, | |
| "rewards/rejected": -13.793702125549316, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017453638772011844, | |
| "grad_norm": 122.5, | |
| "learning_rate": 3.0625e-07, | |
| "logits/chosen": 0.9477364420890808, | |
| "logits/rejected": 0.8998125791549683, | |
| "logps/chosen": -2.1086533069610596, | |
| "logps/rejected": -1.1814693212509155, | |
| "loss": 10.9058, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -21.086532592773438, | |
| "rewards/margins": -9.271841049194336, | |
| "rewards/rejected": -11.814691543579102, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.019947015739442107, | |
| "grad_norm": 117.5, | |
| "learning_rate": 3.5e-07, | |
| "logits/chosen": 0.9074363112449646, | |
| "logits/rejected": 1.0108835697174072, | |
| "logps/chosen": -2.171971082687378, | |
| "logps/rejected": -1.2922351360321045, | |
| "loss": 11.804, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -21.719709396362305, | |
| "rewards/margins": -8.797359466552734, | |
| "rewards/rejected": -12.922350883483887, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02244039270687237, | |
| "grad_norm": 92.0, | |
| "learning_rate": 3.9375e-07, | |
| "logits/chosen": 0.9447617530822754, | |
| "logits/rejected": 0.8549212217330933, | |
| "logps/chosen": -2.003368616104126, | |
| "logps/rejected": -1.2754697799682617, | |
| "loss": 9.4959, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -20.033687591552734, | |
| "rewards/margins": -7.278989791870117, | |
| "rewards/rejected": -12.754697799682617, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.024933769674302633, | |
| "grad_norm": 98.5, | |
| "learning_rate": 4.375e-07, | |
| "logits/chosen": 1.009035587310791, | |
| "logits/rejected": 0.895173192024231, | |
| "logps/chosen": -1.9958442449569702, | |
| "logps/rejected": -1.3750892877578735, | |
| "loss": 8.6131, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -19.95844078063965, | |
| "rewards/margins": -6.207549095153809, | |
| "rewards/rejected": -13.750892639160156, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027427146641732895, | |
| "grad_norm": 110.0, | |
| "learning_rate": 4.812499999999999e-07, | |
| "logits/chosen": 0.9430880546569824, | |
| "logits/rejected": 0.9480469226837158, | |
| "logps/chosen": -2.0413639545440674, | |
| "logps/rejected": -1.3464946746826172, | |
| "loss": 9.6928, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -20.413639068603516, | |
| "rewards/margins": -6.948694229125977, | |
| "rewards/rejected": -13.464945793151855, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.029920523609163162, | |
| "grad_norm": 152.0, | |
| "learning_rate": 5.25e-07, | |
| "logits/chosen": 0.9941633343696594, | |
| "logits/rejected": 0.7915381193161011, | |
| "logps/chosen": -2.5496878623962402, | |
| "logps/rejected": -1.5264402627944946, | |
| "loss": 12.4115, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -25.496877670288086, | |
| "rewards/margins": -10.232475280761719, | |
| "rewards/rejected": -15.26440143585205, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03241390057659342, | |
| "grad_norm": 78.5, | |
| "learning_rate": 5.6875e-07, | |
| "logits/chosen": 0.8952471017837524, | |
| "logits/rejected": 0.8926589488983154, | |
| "logps/chosen": -1.597143530845642, | |
| "logps/rejected": -1.355407476425171, | |
| "loss": 6.8413, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -15.971436500549316, | |
| "rewards/margins": -2.41736102104187, | |
| "rewards/rejected": -13.554075241088867, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03490727754402369, | |
| "grad_norm": 128.0, | |
| "learning_rate": 6.125e-07, | |
| "logits/chosen": 1.050255537033081, | |
| "logits/rejected": 0.8761364221572876, | |
| "logps/chosen": -1.834416151046753, | |
| "logps/rejected": -1.298659324645996, | |
| "loss": 7.9545, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -18.344160079956055, | |
| "rewards/margins": -5.357568740844727, | |
| "rewards/rejected": -12.986591339111328, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03740065451145395, | |
| "grad_norm": 133.0, | |
| "learning_rate": 6.5625e-07, | |
| "logits/chosen": 1.0313760042190552, | |
| "logits/rejected": 0.914068341255188, | |
| "logps/chosen": -2.0879173278808594, | |
| "logps/rejected": -1.2106117010116577, | |
| "loss": 10.301, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -20.879173278808594, | |
| "rewards/margins": -8.773056030273438, | |
| "rewards/rejected": -12.10611629486084, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.039894031478884213, | |
| "grad_norm": 56.5, | |
| "learning_rate": 7e-07, | |
| "logits/chosen": 0.9663585424423218, | |
| "logits/rejected": 0.9516808986663818, | |
| "logps/chosen": -1.7078903913497925, | |
| "logps/rejected": -1.2222994565963745, | |
| "loss": 7.0132, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -17.078907012939453, | |
| "rewards/margins": -4.8559112548828125, | |
| "rewards/rejected": -12.222993850708008, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04238740844631448, | |
| "grad_norm": 96.5, | |
| "learning_rate": 6.999883476391534e-07, | |
| "logits/chosen": 1.0192354917526245, | |
| "logits/rejected": 0.9732477068901062, | |
| "logps/chosen": -1.774751901626587, | |
| "logps/rejected": -1.0946956872940063, | |
| "loss": 8.648, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -17.747520446777344, | |
| "rewards/margins": -6.800562858581543, | |
| "rewards/rejected": -10.946956634521484, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04488078541374474, | |
| "grad_norm": 121.0, | |
| "learning_rate": 6.999533913324853e-07, | |
| "logits/chosen": 0.981746256351471, | |
| "logits/rejected": 0.9062566757202148, | |
| "logps/chosen": -2.0760321617126465, | |
| "logps/rejected": -2.2810633182525635, | |
| "loss": 10.1792, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -20.76032066345215, | |
| "rewards/margins": 2.050312042236328, | |
| "rewards/rejected": -22.810632705688477, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.047374162381175006, | |
| "grad_norm": 67.0, | |
| "learning_rate": 6.998951334075586e-07, | |
| "logits/chosen": 1.0017695426940918, | |
| "logits/rejected": 0.9386453032493591, | |
| "logps/chosen": -1.5593485832214355, | |
| "logps/rejected": -1.3584306240081787, | |
| "loss": 5.5006, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -15.593484878540039, | |
| "rewards/margins": -2.0091779232025146, | |
| "rewards/rejected": -13.584305763244629, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.049867539348605265, | |
| "grad_norm": 83.5, | |
| "learning_rate": 6.998135777434723e-07, | |
| "logits/chosen": 0.9819589853286743, | |
| "logits/rejected": 0.9480808973312378, | |
| "logps/chosen": -1.7974135875701904, | |
| "logps/rejected": -1.2320420742034912, | |
| "loss": 8.011, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -17.974136352539062, | |
| "rewards/margins": -5.653716564178467, | |
| "rewards/rejected": -12.32042121887207, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05236091631603553, | |
| "grad_norm": 123.0, | |
| "learning_rate": 6.99708729770604e-07, | |
| "logits/chosen": 0.9151750802993774, | |
| "logits/rejected": 0.9027111530303955, | |
| "logps/chosen": -1.9205522537231445, | |
| "logps/rejected": -1.6000399589538574, | |
| "loss": 8.9355, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -19.205522537231445, | |
| "rewards/margins": -3.205122232437134, | |
| "rewards/rejected": -16.00040054321289, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05485429328346579, | |
| "grad_norm": 53.25, | |
| "learning_rate": 6.995805964702472e-07, | |
| "logits/chosen": 0.9063746333122253, | |
| "logits/rejected": 0.9599690437316895, | |
| "logps/chosen": -1.5286774635314941, | |
| "logps/rejected": -1.182100772857666, | |
| "loss": 6.2756, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -15.286775588989258, | |
| "rewards/margins": -3.4657678604125977, | |
| "rewards/rejected": -11.821005821228027, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05734767025089606, | |
| "grad_norm": 115.5, | |
| "learning_rate": 6.994291863741474e-07, | |
| "logits/chosen": 0.9865818619728088, | |
| "logits/rejected": 0.8803253173828125, | |
| "logps/chosen": -1.8937522172927856, | |
| "logps/rejected": -1.1672096252441406, | |
| "loss": 9.051, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -18.937522888183594, | |
| "rewards/margins": -7.265425205230713, | |
| "rewards/rejected": -11.672097206115723, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.059841047218326324, | |
| "grad_norm": 120.0, | |
| "learning_rate": 6.992545095639337e-07, | |
| "logits/chosen": 0.8972434997558594, | |
| "logits/rejected": 0.8747442960739136, | |
| "logps/chosen": -2.372899055480957, | |
| "logps/rejected": -1.4153152704238892, | |
| "loss": 11.7809, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -23.728988647460938, | |
| "rewards/margins": -9.575835227966309, | |
| "rewards/rejected": -14.153154373168945, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06233442418575658, | |
| "grad_norm": 58.5, | |
| "learning_rate": 6.990565776704475e-07, | |
| "logits/chosen": 0.9191975593566895, | |
| "logits/rejected": 0.908176839351654, | |
| "logps/chosen": -1.6683969497680664, | |
| "logps/rejected": -1.231262445449829, | |
| "loss": 7.8375, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -16.683971405029297, | |
| "rewards/margins": -4.371344566345215, | |
| "rewards/rejected": -12.31262493133545, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06482780115318684, | |
| "grad_norm": 120.5, | |
| "learning_rate": 6.988354038729676e-07, | |
| "logits/chosen": 0.9013136625289917, | |
| "logits/rejected": 0.7893968820571899, | |
| "logps/chosen": -2.127075433731079, | |
| "logps/rejected": -1.3035414218902588, | |
| "loss": 10.6297, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -21.270755767822266, | |
| "rewards/margins": -8.23534107208252, | |
| "rewards/rejected": -13.03541374206543, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06732117812061711, | |
| "grad_norm": 82.0, | |
| "learning_rate": 6.985910028983336e-07, | |
| "logits/chosen": 0.9725473523139954, | |
| "logits/rejected": 0.9624121189117432, | |
| "logps/chosen": -2.005342483520508, | |
| "logps/rejected": -1.2962756156921387, | |
| "loss": 8.3075, | |
| "rewards/accuracies": 0.1875, | |
| "rewards/chosen": -20.053424835205078, | |
| "rewards/margins": -7.09066915512085, | |
| "rewards/rejected": -12.962756156921387, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06981455508804738, | |
| "grad_norm": 52.25, | |
| "learning_rate": 6.983233910199648e-07, | |
| "logits/chosen": 0.8846550583839417, | |
| "logits/rejected": 0.9423845410346985, | |
| "logps/chosen": -1.6742780208587646, | |
| "logps/rejected": -1.188732385635376, | |
| "loss": 7.3535, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -16.742778778076172, | |
| "rewards/margins": -4.8554558753967285, | |
| "rewards/rejected": -11.887323379516602, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07230793205547764, | |
| "grad_norm": 74.0, | |
| "learning_rate": 6.98032586056776e-07, | |
| "logits/chosen": 0.9702792167663574, | |
| "logits/rejected": 0.8728958368301392, | |
| "logps/chosen": -1.8141976594924927, | |
| "logps/rejected": -1.3005653619766235, | |
| "loss": 7.5819, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -18.14197540283203, | |
| "rewards/margins": -5.136322975158691, | |
| "rewards/rejected": -13.005653381347656, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0748013090229079, | |
| "grad_norm": 115.5, | |
| "learning_rate": 6.977186073719925e-07, | |
| "logits/chosen": 0.855915904045105, | |
| "logits/rejected": 0.7963756918907166, | |
| "logps/chosen": -1.9207674264907837, | |
| "logps/rejected": -1.16620934009552, | |
| "loss": 9.6435, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -19.20767593383789, | |
| "rewards/margins": -7.545581340789795, | |
| "rewards/rejected": -11.662094116210938, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07729468599033816, | |
| "grad_norm": 32.5, | |
| "learning_rate": 6.973814758718596e-07, | |
| "logits/chosen": 0.9370359182357788, | |
| "logits/rejected": 0.896599531173706, | |
| "logps/chosen": -1.3457211256027222, | |
| "logps/rejected": -1.0361064672470093, | |
| "loss": 4.8073, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -13.457212448120117, | |
| "rewards/margins": -3.0961475372314453, | |
| "rewards/rejected": -10.361063957214355, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.07978806295776843, | |
| "grad_norm": 68.0, | |
| "learning_rate": 6.97021214004251e-07, | |
| "logits/chosen": 0.8998004198074341, | |
| "logits/rejected": 0.9092382192611694, | |
| "logps/chosen": -1.5766998529434204, | |
| "logps/rejected": -1.1409169435501099, | |
| "loss": 6.4345, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -15.766998291015625, | |
| "rewards/margins": -4.357827663421631, | |
| "rewards/rejected": -11.409171104431152, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0822814399251987, | |
| "grad_norm": 76.0, | |
| "learning_rate": 6.96637845757174e-07, | |
| "logits/chosen": 0.8456138372421265, | |
| "logits/rejected": 0.9118346571922302, | |
| "logps/chosen": -2.059769868850708, | |
| "logps/rejected": -1.3298571109771729, | |
| "loss": 9.2994, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -20.597698211669922, | |
| "rewards/margins": -7.299127101898193, | |
| "rewards/rejected": -13.29857063293457, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08477481689262896, | |
| "grad_norm": 50.0, | |
| "learning_rate": 6.962313966571722e-07, | |
| "logits/chosen": 0.8960351347923279, | |
| "logits/rejected": 0.8999559879302979, | |
| "logps/chosen": -1.4601362943649292, | |
| "logps/rejected": -1.4213840961456299, | |
| "loss": 4.6079, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -14.601361274719238, | |
| "rewards/margins": -0.3875225782394409, | |
| "rewards/rejected": -14.21384048461914, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08726819386005921, | |
| "grad_norm": 47.0, | |
| "learning_rate": 6.958018937676262e-07, | |
| "logits/chosen": 0.9134461879730225, | |
| "logits/rejected": 0.8920255899429321, | |
| "logps/chosen": -1.46458101272583, | |
| "logps/rejected": -1.2648781538009644, | |
| "loss": 5.2894, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -14.6458101272583, | |
| "rewards/margins": -1.99702787399292, | |
| "rewards/rejected": -12.648781776428223, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08976157082748948, | |
| "grad_norm": 86.5, | |
| "learning_rate": 6.953493656869511e-07, | |
| "logits/chosen": 0.9218010902404785, | |
| "logits/rejected": 0.7793766260147095, | |
| "logps/chosen": -1.7202703952789307, | |
| "logps/rejected": -1.3021219968795776, | |
| "loss": 6.3929, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -17.20270538330078, | |
| "rewards/margins": -4.181485176086426, | |
| "rewards/rejected": -13.021220207214355, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09225494779491974, | |
| "grad_norm": 55.0, | |
| "learning_rate": 6.948738425466925e-07, | |
| "logits/chosen": 0.9479645490646362, | |
| "logits/rejected": 0.8090993762016296, | |
| "logps/chosen": -1.6109609603881836, | |
| "logps/rejected": -1.395875334739685, | |
| "loss": 5.8692, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -16.10961151123047, | |
| "rewards/margins": -2.1508564949035645, | |
| "rewards/rejected": -13.95875358581543, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09474832476235001, | |
| "grad_norm": 32.75, | |
| "learning_rate": 6.943753560095204e-07, | |
| "logits/chosen": 1.020020604133606, | |
| "logits/rejected": 0.9307425618171692, | |
| "logps/chosen": -1.447858452796936, | |
| "logps/rejected": -1.0777596235275269, | |
| "loss": 5.3116, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -14.478584289550781, | |
| "rewards/margins": -3.700988292694092, | |
| "rewards/rejected": -10.777596473693848, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09724170172978028, | |
| "grad_norm": 45.0, | |
| "learning_rate": 6.938539392671203e-07, | |
| "logits/chosen": 0.939849317073822, | |
| "logits/rejected": 0.9025396108627319, | |
| "logps/chosen": -1.6659669876098633, | |
| "logps/rejected": -1.1725908517837524, | |
| "loss": 7.0117, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -16.659669876098633, | |
| "rewards/margins": -4.933763027191162, | |
| "rewards/rejected": -11.725908279418945, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.09973507869721053, | |
| "grad_norm": 74.5, | |
| "learning_rate": 6.933096270379841e-07, | |
| "logits/chosen": 0.996893584728241, | |
| "logits/rejected": 0.912053108215332, | |
| "logps/chosen": -1.2696326971054077, | |
| "logps/rejected": -1.1286218166351318, | |
| "loss": 4.2095, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -12.69632625579834, | |
| "rewards/margins": -1.410109281539917, | |
| "rewards/rejected": -11.286218643188477, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1022284556646408, | |
| "grad_norm": 47.0, | |
| "learning_rate": 6.927424555650974e-07, | |
| "logits/chosen": 0.9594122171401978, | |
| "logits/rejected": 0.8550945520401001, | |
| "logps/chosen": -1.5375633239746094, | |
| "logps/rejected": -1.2417051792144775, | |
| "loss": 5.0733, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -15.375633239746094, | |
| "rewards/margins": -2.9585819244384766, | |
| "rewards/rejected": -12.4170503616333, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.10472183263207106, | |
| "grad_norm": 44.0, | |
| "learning_rate": 6.921524626135268e-07, | |
| "logits/chosen": 0.8996063470840454, | |
| "logits/rejected": 0.9653378129005432, | |
| "logps/chosen": -1.763725996017456, | |
| "logps/rejected": -1.0993306636810303, | |
| "loss": 8.0476, | |
| "rewards/accuracies": 0.21875, | |
| "rewards/chosen": -17.637258529663086, | |
| "rewards/margins": -6.643953323364258, | |
| "rewards/rejected": -10.993307113647461, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10721520959950133, | |
| "grad_norm": 42.25, | |
| "learning_rate": 6.915396874679055e-07, | |
| "logits/chosen": 1.0091477632522583, | |
| "logits/rejected": 0.9392642974853516, | |
| "logps/chosen": -1.2002838850021362, | |
| "logps/rejected": -1.0848746299743652, | |
| "loss": 3.0284, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -12.002839088439941, | |
| "rewards/margins": -1.1540918350219727, | |
| "rewards/rejected": -10.848746299743652, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.10970858656693158, | |
| "grad_norm": 40.25, | |
| "learning_rate": 6.909041709298168e-07, | |
| "logits/chosen": 0.8822853565216064, | |
| "logits/rejected": 0.8290736079216003, | |
| "logps/chosen": -1.4588274955749512, | |
| "logps/rejected": -1.2779145240783691, | |
| "loss": 4.9219, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -14.588274955749512, | |
| "rewards/margins": -1.8091294765472412, | |
| "rewards/rejected": -12.779145240783691, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11220196353436185, | |
| "grad_norm": 56.75, | |
| "learning_rate": 6.902459553150779e-07, | |
| "logits/chosen": 0.9077208638191223, | |
| "logits/rejected": 0.7896067500114441, | |
| "logps/chosen": -1.4615594148635864, | |
| "logps/rejected": -1.2456190586090088, | |
| "loss": 5.1754, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -14.615594863891602, | |
| "rewards/margins": -2.1594033241271973, | |
| "rewards/rejected": -12.456191062927246, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.11469534050179211, | |
| "grad_norm": 67.0, | |
| "learning_rate": 6.895650844509226e-07, | |
| "logits/chosen": 0.9100595116615295, | |
| "logits/rejected": 0.7619892358779907, | |
| "logps/chosen": -1.6750259399414062, | |
| "logps/rejected": -1.2229750156402588, | |
| "loss": 6.2716, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -16.75025749206543, | |
| "rewards/margins": -4.5205078125, | |
| "rewards/rejected": -12.229750633239746, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11718871746922238, | |
| "grad_norm": 70.0, | |
| "learning_rate": 6.88861603673082e-07, | |
| "logits/chosen": 0.8918619751930237, | |
| "logits/rejected": 0.9012125134468079, | |
| "logps/chosen": -1.64901602268219, | |
| "logps/rejected": -1.265884518623352, | |
| "loss": 6.6894, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -16.490161895751953, | |
| "rewards/margins": -3.831315279006958, | |
| "rewards/rejected": -12.658845901489258, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.11968209443665265, | |
| "grad_norm": 51.0, | |
| "learning_rate": 6.88135559822767e-07, | |
| "logits/chosen": 0.8720345497131348, | |
| "logits/rejected": 0.7581244707107544, | |
| "logps/chosen": -1.872530221939087, | |
| "logps/rejected": -1.4055814743041992, | |
| "loss": 6.6813, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -18.72530174255371, | |
| "rewards/margins": -4.669487476348877, | |
| "rewards/rejected": -14.055814743041992, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1221754714040829, | |
| "grad_norm": 52.75, | |
| "learning_rate": 6.873870012435486e-07, | |
| "logits/chosen": 0.8616499900817871, | |
| "logits/rejected": 0.7673721313476562, | |
| "logps/chosen": -1.3419944047927856, | |
| "logps/rejected": -1.2395105361938477, | |
| "loss": 3.3706, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -13.419943809509277, | |
| "rewards/margins": -1.0248385667800903, | |
| "rewards/rejected": -12.395105361938477, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.12466884837151317, | |
| "grad_norm": 56.25, | |
| "learning_rate": 6.866159777781393e-07, | |
| "logits/chosen": 0.8702710866928101, | |
| "logits/rejected": 0.7436060309410095, | |
| "logps/chosen": -1.6595778465270996, | |
| "logps/rejected": -1.1382110118865967, | |
| "loss": 6.7412, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -16.595779418945312, | |
| "rewards/margins": -5.213669300079346, | |
| "rewards/rejected": -11.382110595703125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12716222533894342, | |
| "grad_norm": 50.75, | |
| "learning_rate": 6.858225407650741e-07, | |
| "logits/chosen": 0.7868949174880981, | |
| "logits/rejected": 0.8334120512008667, | |
| "logps/chosen": -1.7013866901397705, | |
| "logps/rejected": -1.3084328174591064, | |
| "loss": 6.2144, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -17.013864517211914, | |
| "rewards/margins": -3.929537296295166, | |
| "rewards/rejected": -13.084327697753906, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.12965560230637369, | |
| "grad_norm": 65.0, | |
| "learning_rate": 6.850067430352923e-07, | |
| "logits/chosen": 0.8779257535934448, | |
| "logits/rejected": 0.7302612066268921, | |
| "logps/chosen": -1.9540197849273682, | |
| "logps/rejected": -1.4614614248275757, | |
| "loss": 6.5912, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -19.540199279785156, | |
| "rewards/margins": -4.925583362579346, | |
| "rewards/rejected": -14.61461353302002, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13214897927380395, | |
| "grad_norm": 79.0, | |
| "learning_rate": 6.8416863890862e-07, | |
| "logits/chosen": 0.91861492395401, | |
| "logits/rejected": 0.8185287714004517, | |
| "logps/chosen": -1.457578182220459, | |
| "logps/rejected": -1.275127649307251, | |
| "loss": 5.29, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -14.575782775878906, | |
| "rewards/margins": -1.8245068788528442, | |
| "rewards/rejected": -12.751276016235352, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.13464235624123422, | |
| "grad_norm": 25.625, | |
| "learning_rate": 6.833082841901524e-07, | |
| "logits/chosen": 0.8008706569671631, | |
| "logits/rejected": 0.7791386246681213, | |
| "logps/chosen": -1.2828067541122437, | |
| "logps/rejected": -1.14899480342865, | |
| "loss": 3.6665, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -12.828067779541016, | |
| "rewards/margins": -1.3381190299987793, | |
| "rewards/rejected": -11.489947319030762, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.13713573320866448, | |
| "grad_norm": 37.5, | |
| "learning_rate": 6.82425736166539e-07, | |
| "logits/chosen": 0.8428397178649902, | |
| "logits/rejected": 0.819983720779419, | |
| "logps/chosen": -1.5656299591064453, | |
| "logps/rejected": -1.6019946336746216, | |
| "loss": 5.8169, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -15.656298637390137, | |
| "rewards/margins": 0.3636472821235657, | |
| "rewards/rejected": -16.019947052001953, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.13962911017609475, | |
| "grad_norm": 43.75, | |
| "learning_rate": 6.815210536021685e-07, | |
| "logits/chosen": 0.7473218441009521, | |
| "logits/rejected": 0.7424555420875549, | |
| "logps/chosen": -1.4687354564666748, | |
| "logps/rejected": -1.2596654891967773, | |
| "loss": 5.3807, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -14.687355041503906, | |
| "rewards/margins": -2.090701103210449, | |
| "rewards/rejected": -12.596653938293457, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14212248714352502, | |
| "grad_norm": 33.5, | |
| "learning_rate": 6.805942967352563e-07, | |
| "logits/chosen": 0.8693878650665283, | |
| "logits/rejected": 0.8091084361076355, | |
| "logps/chosen": -1.4544310569763184, | |
| "logps/rejected": -1.1222821474075317, | |
| "loss": 5.2342, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -14.544310569763184, | |
| "rewards/margins": -3.3214893341064453, | |
| "rewards/rejected": -11.222820281982422, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.14461586411095528, | |
| "grad_norm": 60.75, | |
| "learning_rate": 6.796455272738337e-07, | |
| "logits/chosen": 0.8443146347999573, | |
| "logits/rejected": 0.7834912538528442, | |
| "logps/chosen": -1.630685806274414, | |
| "logps/rejected": -2.097377061843872, | |
| "loss": 5.0217, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -16.306856155395508, | |
| "rewards/margins": 4.6669135093688965, | |
| "rewards/rejected": -20.973772048950195, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14710924107838555, | |
| "grad_norm": 34.0, | |
| "learning_rate": 6.78674808391638e-07, | |
| "logits/chosen": 0.7124283313751221, | |
| "logits/rejected": 0.7266104221343994, | |
| "logps/chosen": -1.5309463739395142, | |
| "logps/rejected": -1.204602599143982, | |
| "loss": 4.9925, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -15.309463500976562, | |
| "rewards/margins": -3.263436794281006, | |
| "rewards/rejected": -12.046026229858398, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1496026180458158, | |
| "grad_norm": 31.25, | |
| "learning_rate": 6.776822047239079e-07, | |
| "logits/chosen": 0.810710608959198, | |
| "logits/rejected": 0.7433085441589355, | |
| "logps/chosen": -1.3407872915267944, | |
| "logps/rejected": -1.1019080877304077, | |
| "loss": 4.0638, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -13.407873153686523, | |
| "rewards/margins": -2.3887932300567627, | |
| "rewards/rejected": -11.019081115722656, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15209599501324605, | |
| "grad_norm": 35.75, | |
| "learning_rate": 6.766677823630784e-07, | |
| "logits/chosen": 0.9204759001731873, | |
| "logits/rejected": 0.8126802444458008, | |
| "logps/chosen": -1.3521380424499512, | |
| "logps/rejected": -1.230940341949463, | |
| "loss": 3.1759, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -13.521379470825195, | |
| "rewards/margins": -1.2119766473770142, | |
| "rewards/rejected": -12.309402465820312, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.15458937198067632, | |
| "grad_norm": 74.5, | |
| "learning_rate": 6.756316088543799e-07, | |
| "logits/chosen": 0.8732976317405701, | |
| "logits/rejected": 0.7553092837333679, | |
| "logps/chosen": -1.6522544622421265, | |
| "logps/rejected": -1.304805874824524, | |
| "loss": 5.2966, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -16.522544860839844, | |
| "rewards/margins": -3.474484920501709, | |
| "rewards/rejected": -13.048059463500977, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1570827489481066, | |
| "grad_norm": 32.25, | |
| "learning_rate": 6.74573753191342e-07, | |
| "logits/chosen": 0.8279662728309631, | |
| "logits/rejected": 0.7906845808029175, | |
| "logps/chosen": -1.3082658052444458, | |
| "logps/rejected": -1.215187907218933, | |
| "loss": 3.2516, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -13.082658767700195, | |
| "rewards/margins": -0.9307788610458374, | |
| "rewards/rejected": -12.15187931060791, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.15957612591553685, | |
| "grad_norm": 30.0, | |
| "learning_rate": 6.734942858111986e-07, | |
| "logits/chosen": 0.8267450332641602, | |
| "logits/rejected": 0.7294779419898987, | |
| "logps/chosen": -1.272381067276001, | |
| "logps/rejected": -1.2460516691207886, | |
| "loss": 3.548, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -12.723810195922852, | |
| "rewards/margins": -0.26329320669174194, | |
| "rewards/rejected": -12.460516929626465, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16206950288296712, | |
| "grad_norm": 50.75, | |
| "learning_rate": 6.723932785901975e-07, | |
| "logits/chosen": 0.9013331532478333, | |
| "logits/rejected": 0.8166715502738953, | |
| "logps/chosen": -1.563563346862793, | |
| "logps/rejected": -1.2308857440948486, | |
| "loss": 4.8669, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -15.63563346862793, | |
| "rewards/margins": -3.3267745971679688, | |
| "rewards/rejected": -12.308857917785645, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1645628798503974, | |
| "grad_norm": 18.75, | |
| "learning_rate": 6.712708048388158e-07, | |
| "logits/chosen": 0.833111047744751, | |
| "logits/rejected": 0.7176869511604309, | |
| "logps/chosen": -1.2247819900512695, | |
| "logps/rejected": -1.460402488708496, | |
| "loss": 2.2855, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -12.247820854187012, | |
| "rewards/margins": 2.3562047481536865, | |
| "rewards/rejected": -14.604024887084961, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.16705625681782765, | |
| "grad_norm": 41.75, | |
| "learning_rate": 6.701269392968773e-07, | |
| "logits/chosen": 0.8795142769813538, | |
| "logits/rejected": 0.7385881543159485, | |
| "logps/chosen": -1.5149438381195068, | |
| "logps/rejected": -1.4080404043197632, | |
| "loss": 3.8984, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -15.14943790435791, | |
| "rewards/margins": -1.0690345764160156, | |
| "rewards/rejected": -14.080402374267578, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.16954963378525792, | |
| "grad_norm": 37.0, | |
| "learning_rate": 6.689617581285765e-07, | |
| "logits/chosen": 0.8711040616035461, | |
| "logits/rejected": 0.6986596584320068, | |
| "logps/chosen": -1.6020938158035278, | |
| "logps/rejected": -1.350814938545227, | |
| "loss": 4.9801, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -16.02094078063965, | |
| "rewards/margins": -2.5127904415130615, | |
| "rewards/rejected": -13.508148193359375, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.17204301075268819, | |
| "grad_norm": 45.25, | |
| "learning_rate": 6.677753389174075e-07, | |
| "logits/chosen": 0.9395517706871033, | |
| "logits/rejected": 0.7759240865707397, | |
| "logps/chosen": -1.5319843292236328, | |
| "logps/rejected": -1.3424811363220215, | |
| "loss": 5.1017, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -15.319843292236328, | |
| "rewards/margins": -1.8950309753417969, | |
| "rewards/rejected": -13.424812316894531, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.17453638772011842, | |
| "grad_norm": 35.0, | |
| "learning_rate": 6.665677606609973e-07, | |
| "logits/chosen": 0.8715901374816895, | |
| "logits/rejected": 0.8017496466636658, | |
| "logps/chosen": -1.5180387496948242, | |
| "logps/rejected": -1.3085150718688965, | |
| "loss": 4.5983, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -15.180386543273926, | |
| "rewards/margins": -2.095235824584961, | |
| "rewards/rejected": -13.085149765014648, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1770297646875487, | |
| "grad_norm": 42.75, | |
| "learning_rate": 6.653391037658466e-07, | |
| "logits/chosen": 0.8521101474761963, | |
| "logits/rejected": 0.7697039246559143, | |
| "logps/chosen": -1.5679848194122314, | |
| "logps/rejected": -1.3208036422729492, | |
| "loss": 4.539, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -15.679848670959473, | |
| "rewards/margins": -2.471813201904297, | |
| "rewards/rejected": -13.208035469055176, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.17952314165497896, | |
| "grad_norm": 46.5, | |
| "learning_rate": 6.640894500419754e-07, | |
| "logits/chosen": 0.9186801314353943, | |
| "logits/rejected": 0.7545082569122314, | |
| "logps/chosen": -1.5185034275054932, | |
| "logps/rejected": -1.2024480104446411, | |
| "loss": 5.072, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -15.185033798217773, | |
| "rewards/margins": -3.160555601119995, | |
| "rewards/rejected": -12.024478912353516, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.18201651862240922, | |
| "grad_norm": 23.375, | |
| "learning_rate": 6.628188826974758e-07, | |
| "logits/chosen": 0.8491867780685425, | |
| "logits/rejected": 0.7822642922401428, | |
| "logps/chosen": -1.1384882926940918, | |
| "logps/rejected": -1.1687763929367065, | |
| "loss": 2.5731, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -11.384883880615234, | |
| "rewards/margins": 0.3028792440891266, | |
| "rewards/rejected": -11.687764167785645, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1845098955898395, | |
| "grad_norm": 33.0, | |
| "learning_rate": 6.615274863329715e-07, | |
| "logits/chosen": 0.9214451909065247, | |
| "logits/rejected": 0.75420743227005, | |
| "logps/chosen": -1.5405924320220947, | |
| "logps/rejected": -1.6105780601501465, | |
| "loss": 2.1388, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -15.405925750732422, | |
| "rewards/margins": 0.6998560428619385, | |
| "rewards/rejected": -16.10578155517578, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.18700327255726976, | |
| "grad_norm": 39.5, | |
| "learning_rate": 6.602153469359852e-07, | |
| "logits/chosen": 0.905957043170929, | |
| "logits/rejected": 0.7297846078872681, | |
| "logps/chosen": -1.4268043041229248, | |
| "logps/rejected": -1.3724501132965088, | |
| "loss": 2.9419, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -14.268043518066406, | |
| "rewards/margins": -0.5435430407524109, | |
| "rewards/rejected": -13.72450065612793, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.18949664952470002, | |
| "grad_norm": 34.5, | |
| "learning_rate": 6.588825518752124e-07, | |
| "logits/chosen": 0.9336991310119629, | |
| "logits/rejected": 0.7428035736083984, | |
| "logps/chosen": -1.4046045541763306, | |
| "logps/rejected": -1.1234869956970215, | |
| "loss": 4.3453, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -14.046045303344727, | |
| "rewards/margins": -2.8111753463745117, | |
| "rewards/rejected": -11.234869003295898, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1919900264921303, | |
| "grad_norm": 40.75, | |
| "learning_rate": 6.575291898947046e-07, | |
| "logits/chosen": 0.8886721134185791, | |
| "logits/rejected": 0.6734512448310852, | |
| "logps/chosen": -1.4164619445800781, | |
| "logps/rejected": -1.3936028480529785, | |
| "loss": 3.5175, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -14.164620399475098, | |
| "rewards/margins": -0.22859010100364685, | |
| "rewards/rejected": -13.936028480529785, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.19448340345956056, | |
| "grad_norm": 41.75, | |
| "learning_rate": 6.561553511079596e-07, | |
| "logits/chosen": 0.829595148563385, | |
| "logits/rejected": 0.6838914155960083, | |
| "logps/chosen": -1.5907689332962036, | |
| "logps/rejected": -1.4232121706008911, | |
| "loss": 4.2889, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -15.907690048217773, | |
| "rewards/margins": -1.675569772720337, | |
| "rewards/rejected": -14.232120513916016, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1969767804269908, | |
| "grad_norm": 34.5, | |
| "learning_rate": 6.54761126991922e-07, | |
| "logits/chosen": 0.9110915660858154, | |
| "logits/rejected": 0.6820324063301086, | |
| "logps/chosen": -1.547090768814087, | |
| "logps/rejected": -1.3237884044647217, | |
| "loss": 4.8103, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -15.470909118652344, | |
| "rewards/margins": -2.2330236434936523, | |
| "rewards/rejected": -13.237884521484375, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.19947015739442106, | |
| "grad_norm": 43.75, | |
| "learning_rate": 6.533466103808918e-07, | |
| "logits/chosen": 0.8135228157043457, | |
| "logits/rejected": 0.7062645554542542, | |
| "logps/chosen": -1.5417137145996094, | |
| "logps/rejected": -1.3554742336273193, | |
| "loss": 5.119, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -15.417137145996094, | |
| "rewards/margins": -1.8623945713043213, | |
| "rewards/rejected": -13.554742813110352, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.20196353436185133, | |
| "grad_norm": 64.5, | |
| "learning_rate": 6.519118954603431e-07, | |
| "logits/chosen": 0.818507194519043, | |
| "logits/rejected": 0.7929250001907349, | |
| "logps/chosen": -1.6561720371246338, | |
| "logps/rejected": -1.3400187492370605, | |
| "loss": 5.316, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -16.56171989440918, | |
| "rewards/margins": -3.161534309387207, | |
| "rewards/rejected": -13.400186538696289, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2044569113292816, | |
| "grad_norm": 22.75, | |
| "learning_rate": 6.504570777606531e-07, | |
| "logits/chosen": 0.8459409475326538, | |
| "logits/rejected": 0.7011772990226746, | |
| "logps/chosen": -1.3367918729782104, | |
| "logps/rejected": -1.2118648290634155, | |
| "loss": 3.5423, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -13.367918014526367, | |
| "rewards/margins": -1.249271273612976, | |
| "rewards/rejected": -12.118647575378418, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.20695028829671186, | |
| "grad_norm": 25.875, | |
| "learning_rate": 6.489822541507404e-07, | |
| "logits/chosen": 0.8798666596412659, | |
| "logits/rejected": 0.7069228887557983, | |
| "logps/chosen": -1.1269609928131104, | |
| "logps/rejected": -1.1012687683105469, | |
| "loss": 2.5165, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -11.269609451293945, | |
| "rewards/margins": -0.25692227482795715, | |
| "rewards/rejected": -11.012688636779785, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.20944366526414213, | |
| "grad_norm": 30.875, | |
| "learning_rate": 6.474875228316158e-07, | |
| "logits/chosen": 0.9361159801483154, | |
| "logits/rejected": 0.8077545762062073, | |
| "logps/chosen": -1.4038376808166504, | |
| "logps/rejected": -1.357134222984314, | |
| "loss": 3.4071, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -14.038376808166504, | |
| "rewards/margins": -0.4670344293117523, | |
| "rewards/rejected": -13.571342468261719, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2119370422315724, | |
| "grad_norm": 20.625, | |
| "learning_rate": 6.459729833298434e-07, | |
| "logits/chosen": 0.7581954002380371, | |
| "logits/rejected": 0.7710189819335938, | |
| "logps/chosen": -1.2664942741394043, | |
| "logps/rejected": -1.2973535060882568, | |
| "loss": 3.0325, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -12.66494369506836, | |
| "rewards/margins": 0.3085915148258209, | |
| "rewards/rejected": -12.973533630371094, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.21443041919900266, | |
| "grad_norm": 38.25, | |
| "learning_rate": 6.444387364909134e-07, | |
| "logits/chosen": 0.8360967636108398, | |
| "logits/rejected": 0.7465887069702148, | |
| "logps/chosen": -1.4347429275512695, | |
| "logps/rejected": -1.4330288171768188, | |
| "loss": 3.0653, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -14.347427368164062, | |
| "rewards/margins": -0.01714131236076355, | |
| "rewards/rejected": -14.33028793334961, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.21692379616643293, | |
| "grad_norm": 25.375, | |
| "learning_rate": 6.428848844725274e-07, | |
| "logits/chosen": 0.7691155672073364, | |
| "logits/rejected": 0.6017144322395325, | |
| "logps/chosen": -1.2951093912124634, | |
| "logps/rejected": -1.3574622869491577, | |
| "loss": 2.8385, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -12.951093673706055, | |
| "rewards/margins": 0.623528778553009, | |
| "rewards/rejected": -13.574623107910156, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.21941717313386316, | |
| "grad_norm": 48.0, | |
| "learning_rate": 6.413115307377965e-07, | |
| "logits/chosen": 0.8395971059799194, | |
| "logits/rejected": 0.6882689595222473, | |
| "logps/chosen": -1.4701257944107056, | |
| "logps/rejected": -1.4139220714569092, | |
| "loss": 3.306, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -14.701258659362793, | |
| "rewards/margins": -0.5620384216308594, | |
| "rewards/rejected": -14.1392183303833, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.22191055010129343, | |
| "grad_norm": 31.625, | |
| "learning_rate": 6.397187800483519e-07, | |
| "logits/chosen": 0.8466267585754395, | |
| "logits/rejected": 0.6940711140632629, | |
| "logps/chosen": -1.4214099645614624, | |
| "logps/rejected": -1.3584471940994263, | |
| "loss": 2.831, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -14.214098930358887, | |
| "rewards/margins": -0.6296274662017822, | |
| "rewards/rejected": -13.58447265625, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2244039270687237, | |
| "grad_norm": 33.5, | |
| "learning_rate": 6.381067384573693e-07, | |
| "logits/chosen": 0.8270119428634644, | |
| "logits/rejected": 0.65580815076828, | |
| "logps/chosen": -1.4739896059036255, | |
| "logps/rejected": -1.2760796546936035, | |
| "loss": 3.8132, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -14.739895820617676, | |
| "rewards/margins": -1.9790987968444824, | |
| "rewards/rejected": -12.760797500610352, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.22689730403615396, | |
| "grad_norm": 27.375, | |
| "learning_rate": 6.364755133025077e-07, | |
| "logits/chosen": 0.8560658693313599, | |
| "logits/rejected": 0.6389474868774414, | |
| "logps/chosen": -1.2929621934890747, | |
| "logps/rejected": -1.887449860572815, | |
| "loss": 2.5581, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -12.929622650146484, | |
| "rewards/margins": 5.944877624511719, | |
| "rewards/rejected": -18.87449836730957, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.22939068100358423, | |
| "grad_norm": 51.25, | |
| "learning_rate": 6.348252131987621e-07, | |
| "logits/chosen": 0.9491753578186035, | |
| "logits/rejected": 0.5920038819313049, | |
| "logps/chosen": -1.7384018898010254, | |
| "logps/rejected": -1.4728763103485107, | |
| "loss": 4.2467, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -17.38401985168457, | |
| "rewards/margins": -2.6552560329437256, | |
| "rewards/rejected": -14.728763580322266, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2318840579710145, | |
| "grad_norm": 41.75, | |
| "learning_rate": 6.331559480312316e-07, | |
| "logits/chosen": 0.8945069313049316, | |
| "logits/rejected": 0.6501726508140564, | |
| "logps/chosen": -1.6423015594482422, | |
| "logps/rejected": -1.5272799730300903, | |
| "loss": 3.7742, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -16.423015594482422, | |
| "rewards/margins": -1.150214433670044, | |
| "rewards/rejected": -15.272799491882324, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.23437743493844476, | |
| "grad_norm": 27.875, | |
| "learning_rate": 6.314678289478021e-07, | |
| "logits/chosen": 0.868090033531189, | |
| "logits/rejected": 0.7094947695732117, | |
| "logps/chosen": -1.3805748224258423, | |
| "logps/rejected": -1.3794375658035278, | |
| "loss": 2.4021, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -13.805749893188477, | |
| "rewards/margins": -0.011373043060302734, | |
| "rewards/rejected": -13.794376373291016, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.23687081190587503, | |
| "grad_norm": 30.625, | |
| "learning_rate": 6.297609683517465e-07, | |
| "logits/chosen": 0.9310474395751953, | |
| "logits/rejected": 0.7228609323501587, | |
| "logps/chosen": -1.339646339416504, | |
| "logps/rejected": -1.4403069019317627, | |
| "loss": 2.1866, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -13.396462440490723, | |
| "rewards/margins": 1.0066064596176147, | |
| "rewards/rejected": -14.403070449829102, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2393641888733053, | |
| "grad_norm": 27.875, | |
| "learning_rate": 6.280354798942394e-07, | |
| "logits/chosen": 0.8475272059440613, | |
| "logits/rejected": 0.7736526727676392, | |
| "logps/chosen": -1.3078885078430176, | |
| "logps/rejected": -1.3328254222869873, | |
| "loss": 2.2729, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -13.078886985778809, | |
| "rewards/margins": 0.24936795234680176, | |
| "rewards/rejected": -13.328254699707031, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.24185756584073553, | |
| "grad_norm": 23.125, | |
| "learning_rate": 6.262914784667902e-07, | |
| "logits/chosen": 0.8516014814376831, | |
| "logits/rejected": 0.6699912548065186, | |
| "logps/chosen": -1.2598787546157837, | |
| "logps/rejected": -1.2950444221496582, | |
| "loss": 3.0957, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -12.598786354064941, | |
| "rewards/margins": 0.35165655612945557, | |
| "rewards/rejected": -12.950444221496582, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2443509428081658, | |
| "grad_norm": 37.75, | |
| "learning_rate": 6.245290801935929e-07, | |
| "logits/chosen": 0.8076661229133606, | |
| "logits/rejected": 0.6437760591506958, | |
| "logps/chosen": -1.4585728645324707, | |
| "logps/rejected": -1.3383572101593018, | |
| "loss": 4.0758, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -14.58572769165039, | |
| "rewards/margins": -1.2021559476852417, | |
| "rewards/rejected": -13.38357162475586, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.24684431977559607, | |
| "grad_norm": 22.875, | |
| "learning_rate": 6.227484024237941e-07, | |
| "logits/chosen": 0.8829818367958069, | |
| "logits/rejected": 0.6302488446235657, | |
| "logps/chosen": -1.3252794742584229, | |
| "logps/rejected": -1.350675344467163, | |
| "loss": 2.156, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -13.252795219421387, | |
| "rewards/margins": 0.2539580166339874, | |
| "rewards/rejected": -13.506753921508789, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.24933769674302633, | |
| "grad_norm": 42.25, | |
| "learning_rate": 6.209495637236789e-07, | |
| "logits/chosen": 0.7620182037353516, | |
| "logits/rejected": 0.7404342889785767, | |
| "logps/chosen": -1.7423349618911743, | |
| "logps/rejected": -1.6396631002426147, | |
| "loss": 4.6426, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -17.423349380493164, | |
| "rewards/margins": -1.026718258857727, | |
| "rewards/rejected": -16.396631240844727, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2518310737104566, | |
| "grad_norm": 44.25, | |
| "learning_rate": 6.191326838687767e-07, | |
| "logits/chosen": 0.8130788803100586, | |
| "logits/rejected": 0.6447663307189941, | |
| "logps/chosen": -1.6222593784332275, | |
| "logps/rejected": -1.539342999458313, | |
| "loss": 3.8946, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -16.222591400146484, | |
| "rewards/margins": -0.8291639089584351, | |
| "rewards/rejected": -15.393428802490234, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.25432445067788684, | |
| "grad_norm": 23.25, | |
| "learning_rate": 6.172978838358858e-07, | |
| "logits/chosen": 0.8688798546791077, | |
| "logits/rejected": 0.7208373546600342, | |
| "logps/chosen": -1.2495828866958618, | |
| "logps/rejected": -1.16335129737854, | |
| "loss": 3.3786, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -12.495828628540039, | |
| "rewards/margins": -0.8623146414756775, | |
| "rewards/rejected": -11.633513450622559, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.25681782764531713, | |
| "grad_norm": 28.625, | |
| "learning_rate": 6.154452857950179e-07, | |
| "logits/chosen": 0.867901086807251, | |
| "logits/rejected": 0.6571163535118103, | |
| "logps/chosen": -1.4274240732192993, | |
| "logps/rejected": -1.2099568843841553, | |
| "loss": 4.0273, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -14.274239540100098, | |
| "rewards/margins": -2.174670934677124, | |
| "rewards/rejected": -12.099568367004395, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.25931120461274737, | |
| "grad_norm": 17.5, | |
| "learning_rate": 6.135750131012639e-07, | |
| "logits/chosen": 0.8423357009887695, | |
| "logits/rejected": 0.7953418493270874, | |
| "logps/chosen": -1.1816288232803345, | |
| "logps/rejected": -1.4284311532974243, | |
| "loss": 1.5765, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -11.816287994384766, | |
| "rewards/margins": 2.468022346496582, | |
| "rewards/rejected": -14.284311294555664, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.26180458158017766, | |
| "grad_norm": 48.0, | |
| "learning_rate": 6.116871902865795e-07, | |
| "logits/chosen": 0.7953894138336182, | |
| "logits/rejected": 0.6910791993141174, | |
| "logps/chosen": -1.4984780550003052, | |
| "logps/rejected": -1.359675645828247, | |
| "loss": 4.2421, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -14.984780311584473, | |
| "rewards/margins": -1.3880234956741333, | |
| "rewards/rejected": -13.596756935119629, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2642979585476079, | |
| "grad_norm": 14.5, | |
| "learning_rate": 6.097819430514944e-07, | |
| "logits/chosen": 0.8314008712768555, | |
| "logits/rejected": 0.6421066522598267, | |
| "logps/chosen": -1.1923877000808716, | |
| "logps/rejected": -1.403716802597046, | |
| "loss": 1.3615, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -11.92387580871582, | |
| "rewards/margins": 2.1132919788360596, | |
| "rewards/rejected": -14.037168502807617, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2667913355150382, | |
| "grad_norm": 41.25, | |
| "learning_rate": 6.078593982567416e-07, | |
| "logits/chosen": 0.9006607532501221, | |
| "logits/rejected": 0.7951247096061707, | |
| "logps/chosen": -1.5271791219711304, | |
| "logps/rejected": -1.3939223289489746, | |
| "loss": 3.7453, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -15.271790504455566, | |
| "rewards/margins": -1.332566738128662, | |
| "rewards/rejected": -13.939225196838379, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.26928471248246844, | |
| "grad_norm": 47.25, | |
| "learning_rate": 6.059196839148109e-07, | |
| "logits/chosen": 0.7548659443855286, | |
| "logits/rejected": 0.6844202280044556, | |
| "logps/chosen": -1.4953826665878296, | |
| "logps/rejected": -1.211591362953186, | |
| "loss": 5.099, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -14.953826904296875, | |
| "rewards/margins": -2.8379130363464355, | |
| "rewards/rejected": -12.115914344787598, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.27177808944989873, | |
| "grad_norm": 26.75, | |
| "learning_rate": 6.039629291814247e-07, | |
| "logits/chosen": 0.7883430123329163, | |
| "logits/rejected": 0.6593764424324036, | |
| "logps/chosen": -1.4129087924957275, | |
| "logps/rejected": -1.6393111944198608, | |
| "loss": 2.0234, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -14.12908935546875, | |
| "rewards/margins": 2.2640252113342285, | |
| "rewards/rejected": -16.39311408996582, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.27427146641732897, | |
| "grad_norm": 47.25, | |
| "learning_rate": 6.019892643469387e-07, | |
| "logits/chosen": 0.8495079874992371, | |
| "logits/rejected": 0.7186658978462219, | |
| "logps/chosen": -1.4737249612808228, | |
| "logps/rejected": -1.3164616823196411, | |
| "loss": 3.8864, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -14.737249374389648, | |
| "rewards/margins": -1.5726318359375, | |
| "rewards/rejected": -13.164617538452148, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2767648433847592, | |
| "grad_norm": 60.5, | |
| "learning_rate": 5.999988208276662e-07, | |
| "logits/chosen": 0.8825462460517883, | |
| "logits/rejected": 0.6535596251487732, | |
| "logps/chosen": -1.5816519260406494, | |
| "logps/rejected": -1.498726725578308, | |
| "loss": 3.1086, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -15.816520690917969, | |
| "rewards/margins": -0.8292534351348877, | |
| "rewards/rejected": -14.987266540527344, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2792582203521895, | |
| "grad_norm": 54.0, | |
| "learning_rate": 5.979917311571282e-07, | |
| "logits/chosen": 0.8688668012619019, | |
| "logits/rejected": 0.5492098927497864, | |
| "logps/chosen": -1.4838958978652954, | |
| "logps/rejected": -1.6213023662567139, | |
| "loss": 2.3478, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -14.838959693908691, | |
| "rewards/margins": 1.3740637302398682, | |
| "rewards/rejected": -16.213022232055664, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.28175159731961974, | |
| "grad_norm": 46.5, | |
| "learning_rate": 5.959681289772278e-07, | |
| "logits/chosen": 0.842609703540802, | |
| "logits/rejected": 0.6387814283370972, | |
| "logps/chosen": -1.5294029712677002, | |
| "logps/rejected": -1.7203947305679321, | |
| "loss": 2.5737, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -15.294027328491211, | |
| "rewards/margins": 1.9099199771881104, | |
| "rewards/rejected": -17.203948974609375, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.28424497428705003, | |
| "grad_norm": 22.875, | |
| "learning_rate": 5.939281490293527e-07, | |
| "logits/chosen": 0.7885753512382507, | |
| "logits/rejected": 0.7003703713417053, | |
| "logps/chosen": -1.6169934272766113, | |
| "logps/rejected": -1.616774082183838, | |
| "loss": 3.128, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -16.169931411743164, | |
| "rewards/margins": -0.002192378044128418, | |
| "rewards/rejected": -16.167739868164062, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2867383512544803, | |
| "grad_norm": 125.5, | |
| "learning_rate": 5.918719271454026e-07, | |
| "logits/chosen": 0.8902820944786072, | |
| "logits/rejected": 0.6495590806007385, | |
| "logps/chosen": -1.7944972515106201, | |
| "logps/rejected": -1.6713979244232178, | |
| "loss": 3.4653, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -17.94497299194336, | |
| "rewards/margins": -1.2309918403625488, | |
| "rewards/rejected": -16.71398162841797, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.28923172822191057, | |
| "grad_norm": 12.5, | |
| "learning_rate": 5.897996002387454e-07, | |
| "logits/chosen": 0.9350267648696899, | |
| "logits/rejected": 0.7698911428451538, | |
| "logps/chosen": -1.3168952465057373, | |
| "logps/rejected": -1.5409971475601196, | |
| "loss": 2.018, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -13.168952941894531, | |
| "rewards/margins": 2.241018533706665, | |
| "rewards/rejected": -15.409971237182617, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2917251051893408, | |
| "grad_norm": 35.25, | |
| "learning_rate": 5.877113062951007e-07, | |
| "logits/chosen": 0.9151044487953186, | |
| "logits/rejected": 0.7181938886642456, | |
| "logps/chosen": -1.3629308938980103, | |
| "logps/rejected": -2.3142240047454834, | |
| "loss": 2.7597, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -13.62930965423584, | |
| "rewards/margins": 9.512930870056152, | |
| "rewards/rejected": -23.142240524291992, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2942184821567711, | |
| "grad_norm": 19.75, | |
| "learning_rate": 5.856071843633516e-07, | |
| "logits/chosen": 0.8448264598846436, | |
| "logits/rejected": 0.6548407077789307, | |
| "logps/chosen": -1.355668544769287, | |
| "logps/rejected": -1.4345015287399292, | |
| "loss": 2.6585, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -13.556684494018555, | |
| "rewards/margins": 0.7883304953575134, | |
| "rewards/rejected": -14.345015525817871, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.29671185912420134, | |
| "grad_norm": 52.0, | |
| "learning_rate": 5.834873745462869e-07, | |
| "logits/chosen": 0.9469012022018433, | |
| "logits/rejected": 0.6909551620483398, | |
| "logps/chosen": -1.5371216535568237, | |
| "logps/rejected": -1.9698981046676636, | |
| "loss": 1.7712, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -15.3712158203125, | |
| "rewards/margins": 4.327763557434082, | |
| "rewards/rejected": -19.698978424072266, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2992052360916316, | |
| "grad_norm": 38.75, | |
| "learning_rate": 5.813520179912718e-07, | |
| "logits/chosen": 0.8846210241317749, | |
| "logits/rejected": 0.6549557447433472, | |
| "logps/chosen": -1.5691332817077637, | |
| "logps/rejected": -1.859965443611145, | |
| "loss": 1.9083, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -15.691333770751953, | |
| "rewards/margins": 2.9083199501037598, | |
| "rewards/rejected": -18.599653244018555, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.30169861305906187, | |
| "grad_norm": 52.75, | |
| "learning_rate": 5.792012568808498e-07, | |
| "logits/chosen": 0.9424107074737549, | |
| "logits/rejected": 0.638304591178894, | |
| "logps/chosen": -1.7227492332458496, | |
| "logps/rejected": -1.9438109397888184, | |
| "loss": 2.7587, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -17.227493286132812, | |
| "rewards/margins": 2.2106146812438965, | |
| "rewards/rejected": -19.438106536865234, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3041919900264921, | |
| "grad_norm": 30.125, | |
| "learning_rate": 5.770352344232754e-07, | |
| "logits/chosen": 0.9350774884223938, | |
| "logits/rejected": 0.7812179327011108, | |
| "logps/chosen": -1.4625705480575562, | |
| "logps/rejected": -1.6399712562561035, | |
| "loss": 2.1925, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -14.62570571899414, | |
| "rewards/margins": 1.7740064859390259, | |
| "rewards/rejected": -16.39971351623535, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3066853669939224, | |
| "grad_norm": 38.75, | |
| "learning_rate": 5.748540948429791e-07, | |
| "logits/chosen": 0.8861021995544434, | |
| "logits/rejected": 0.5621581077575684, | |
| "logps/chosen": -1.7297865152359009, | |
| "logps/rejected": -2.025303840637207, | |
| "loss": 2.116, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -17.297866821289062, | |
| "rewards/margins": 2.955172538757324, | |
| "rewards/rejected": -20.25303840637207, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.30917874396135264, | |
| "grad_norm": 40.0, | |
| "learning_rate": 5.726579833709629e-07, | |
| "logits/chosen": 0.8791552782058716, | |
| "logits/rejected": 0.7237104773521423, | |
| "logps/chosen": -1.5754930973052979, | |
| "logps/rejected": -1.760854959487915, | |
| "loss": 1.9028, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -15.75493049621582, | |
| "rewards/margins": 1.853618860244751, | |
| "rewards/rejected": -17.608549118041992, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.31167212092878294, | |
| "grad_norm": 52.25, | |
| "learning_rate": 5.704470462351321e-07, | |
| "logits/chosen": 0.8605432510375977, | |
| "logits/rejected": 0.6145266890525818, | |
| "logps/chosen": -1.4967951774597168, | |
| "logps/rejected": -1.6985702514648438, | |
| "loss": 2.7418, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -14.9679536819458, | |
| "rewards/margins": 2.0177483558654785, | |
| "rewards/rejected": -16.985700607299805, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3141654978962132, | |
| "grad_norm": 9.25, | |
| "learning_rate": 5.682214306505567e-07, | |
| "logits/chosen": 0.89193195104599, | |
| "logits/rejected": 0.7236483097076416, | |
| "logps/chosen": -1.4118638038635254, | |
| "logps/rejected": -1.9812034368515015, | |
| "loss": 1.6725, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -14.118638038635254, | |
| "rewards/margins": 5.693397521972656, | |
| "rewards/rejected": -19.812034606933594, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.31665887486364347, | |
| "grad_norm": 19.0, | |
| "learning_rate": 5.659812848096706e-07, | |
| "logits/chosen": 0.7631481289863586, | |
| "logits/rejected": 0.6791519522666931, | |
| "logps/chosen": -1.5167012214660645, | |
| "logps/rejected": -1.6185718774795532, | |
| "loss": 3.444, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -15.167011260986328, | |
| "rewards/margins": 1.0187066793441772, | |
| "rewards/rejected": -16.185718536376953, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3191522518310737, | |
| "grad_norm": 55.75, | |
| "learning_rate": 5.637267578724034e-07, | |
| "logits/chosen": 0.847726047039032, | |
| "logits/rejected": 0.693824291229248, | |
| "logps/chosen": -1.5810160636901855, | |
| "logps/rejected": -1.9167366027832031, | |
| "loss": 2.9597, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -15.810161590576172, | |
| "rewards/margins": 3.357205867767334, | |
| "rewards/rejected": -19.16736602783203, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.32164562879850395, | |
| "grad_norm": 72.5, | |
| "learning_rate": 5.614579999562487e-07, | |
| "logits/chosen": 0.878848135471344, | |
| "logits/rejected": 0.7662035822868347, | |
| "logps/chosen": -1.6665140390396118, | |
| "logps/rejected": -1.7739882469177246, | |
| "loss": 3.1744, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -16.66514015197754, | |
| "rewards/margins": 1.0747425556182861, | |
| "rewards/rejected": -17.73988151550293, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.32413900576593424, | |
| "grad_norm": 61.5, | |
| "learning_rate": 5.591751621262691e-07, | |
| "logits/chosen": 0.8593266010284424, | |
| "logits/rejected": 0.7886440753936768, | |
| "logps/chosen": -1.1743977069854736, | |
| "logps/rejected": -1.3935869932174683, | |
| "loss": 1.9932, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -11.743976593017578, | |
| "rewards/margins": 2.1918928623199463, | |
| "rewards/rejected": -13.935870170593262, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3266323827333645, | |
| "grad_norm": 23.75, | |
| "learning_rate": 5.568783963850368e-07, | |
| "logits/chosen": 0.9685453176498413, | |
| "logits/rejected": 0.7054411768913269, | |
| "logps/chosen": -1.598836898803711, | |
| "logps/rejected": -1.8996286392211914, | |
| "loss": 2.1934, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -15.988369941711426, | |
| "rewards/margins": 3.0079164505004883, | |
| "rewards/rejected": -18.996286392211914, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3291257597007948, | |
| "grad_norm": 22.0, | |
| "learning_rate": 5.545678556625129e-07, | |
| "logits/chosen": 0.8639561533927917, | |
| "logits/rejected": 0.6618623733520508, | |
| "logps/chosen": -1.7690205574035645, | |
| "logps/rejected": -2.254978895187378, | |
| "loss": 1.9177, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -17.69020652770996, | |
| "rewards/margins": 4.859582901000977, | |
| "rewards/rejected": -22.549787521362305, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.331619136668225, | |
| "grad_norm": 27.5, | |
| "learning_rate": 5.522436938058645e-07, | |
| "logits/chosen": 0.8631035089492798, | |
| "logits/rejected": 0.7001104950904846, | |
| "logps/chosen": -1.5964336395263672, | |
| "logps/rejected": -2.130333185195923, | |
| "loss": 1.625, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -15.964335441589355, | |
| "rewards/margins": 5.338994979858398, | |
| "rewards/rejected": -21.303333282470703, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3341125136356553, | |
| "grad_norm": 59.0, | |
| "learning_rate": 5.49906065569221e-07, | |
| "logits/chosen": 0.733770489692688, | |
| "logits/rejected": 0.5061658620834351, | |
| "logps/chosen": -1.5350837707519531, | |
| "logps/rejected": -1.8332159519195557, | |
| "loss": 2.7709, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -15.350838661193848, | |
| "rewards/margins": 2.981321334838867, | |
| "rewards/rejected": -18.3321590423584, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.33660589060308554, | |
| "grad_norm": 13.1875, | |
| "learning_rate": 5.475551266033692e-07, | |
| "logits/chosen": 0.9098625183105469, | |
| "logits/rejected": 0.7151045203208923, | |
| "logps/chosen": -1.388254165649414, | |
| "logps/rejected": -1.944246530532837, | |
| "loss": 1.4884, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -13.88254165649414, | |
| "rewards/margins": 5.559926509857178, | |
| "rewards/rejected": -19.442468643188477, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.33909926757051584, | |
| "grad_norm": 36.75, | |
| "learning_rate": 5.451910334453903e-07, | |
| "logits/chosen": 0.9809038639068604, | |
| "logits/rejected": 0.6819513440132141, | |
| "logps/chosen": -1.6769332885742188, | |
| "logps/rejected": -2.2928450107574463, | |
| "loss": 1.2734, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -16.769332885742188, | |
| "rewards/margins": 6.159116268157959, | |
| "rewards/rejected": -22.928447723388672, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3415926445379461, | |
| "grad_norm": 111.5, | |
| "learning_rate": 5.428139435082358e-07, | |
| "logits/chosen": 0.9270225763320923, | |
| "logits/rejected": 0.6331555843353271, | |
| "logps/chosen": -1.6441551446914673, | |
| "logps/rejected": -1.7793883085250854, | |
| "loss": 2.9125, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -16.441551208496094, | |
| "rewards/margins": 1.3523308038711548, | |
| "rewards/rejected": -17.793882369995117, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.34408602150537637, | |
| "grad_norm": 19.25, | |
| "learning_rate": 5.404240150702472e-07, | |
| "logits/chosen": 0.9672467708587646, | |
| "logits/rejected": 0.8573353886604309, | |
| "logps/chosen": -1.3790785074234009, | |
| "logps/rejected": -1.8634607791900635, | |
| "loss": 1.7018, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -13.79078483581543, | |
| "rewards/margins": 4.8438215255737305, | |
| "rewards/rejected": -18.634607315063477, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3465793984728066, | |
| "grad_norm": 83.0, | |
| "learning_rate": 5.38021407264616e-07, | |
| "logits/chosen": 0.8024469614028931, | |
| "logits/rejected": 0.5433262586593628, | |
| "logps/chosen": -1.3546580076217651, | |
| "logps/rejected": -1.5655174255371094, | |
| "loss": 2.6886, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -13.546581268310547, | |
| "rewards/margins": 2.108593702316284, | |
| "rewards/rejected": -15.655172348022461, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.34907277544023685, | |
| "grad_norm": 56.5, | |
| "learning_rate": 5.356062800687886e-07, | |
| "logits/chosen": 0.7994624972343445, | |
| "logits/rejected": 0.6035336256027222, | |
| "logps/chosen": -1.2650129795074463, | |
| "logps/rejected": -1.3674687147140503, | |
| "loss": 2.4405, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -12.650128364562988, | |
| "rewards/margins": 1.0245567560195923, | |
| "rewards/rejected": -13.67468547821045, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.35156615240766714, | |
| "grad_norm": 60.75, | |
| "learning_rate": 5.331787942938142e-07, | |
| "logits/chosen": 1.0324114561080933, | |
| "logits/rejected": 0.7126603126525879, | |
| "logps/chosen": -1.5447206497192383, | |
| "logps/rejected": -1.9410955905914307, | |
| "loss": 1.5742, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -15.447206497192383, | |
| "rewards/margins": 3.963749885559082, | |
| "rewards/rejected": -19.41095733642578, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3540595293750974, | |
| "grad_norm": 14.4375, | |
| "learning_rate": 5.307391115736366e-07, | |
| "logits/chosen": 0.7712888717651367, | |
| "logits/rejected": 0.5555048584938049, | |
| "logps/chosen": -1.2323440313339233, | |
| "logps/rejected": -1.6426218748092651, | |
| "loss": 1.5398, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -12.32343864440918, | |
| "rewards/margins": 4.102778434753418, | |
| "rewards/rejected": -16.42621612548828, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3565529063425277, | |
| "grad_norm": 42.25, | |
| "learning_rate": 5.282873943543326e-07, | |
| "logits/chosen": 0.8940728306770325, | |
| "logits/rejected": 0.7413418292999268, | |
| "logps/chosen": -1.296794056892395, | |
| "logps/rejected": -1.8393501043319702, | |
| "loss": 1.7974, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -12.967940330505371, | |
| "rewards/margins": 5.425559997558594, | |
| "rewards/rejected": -18.39349937438965, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3590462833099579, | |
| "grad_norm": 31.75, | |
| "learning_rate": 5.258238058832948e-07, | |
| "logits/chosen": 0.9329725503921509, | |
| "logits/rejected": 0.5702534914016724, | |
| "logps/chosen": -1.3792263269424438, | |
| "logps/rejected": -1.757681965827942, | |
| "loss": 2.1616, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -13.792261123657227, | |
| "rewards/margins": 3.784557342529297, | |
| "rewards/rejected": -17.576818466186523, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3615396602773882, | |
| "grad_norm": 78.5, | |
| "learning_rate": 5.233485101983624e-07, | |
| "logits/chosen": 0.9451256990432739, | |
| "logits/rejected": 0.8186403512954712, | |
| "logps/chosen": -1.5383343696594238, | |
| "logps/rejected": -2.494551181793213, | |
| "loss": 1.4328, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -15.383341789245605, | |
| "rewards/margins": 9.562170028686523, | |
| "rewards/rejected": -24.945512771606445, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.36403303724481845, | |
| "grad_norm": 58.5, | |
| "learning_rate": 5.208616721168984e-07, | |
| "logits/chosen": 0.9742121696472168, | |
| "logits/rejected": 0.7483265995979309, | |
| "logps/chosen": -1.6329911947250366, | |
| "logps/rejected": -2.0833840370178223, | |
| "loss": 1.8646, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -16.329910278320312, | |
| "rewards/margins": 4.503929138183594, | |
| "rewards/rejected": -20.833839416503906, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.36652641421224874, | |
| "grad_norm": 29.5, | |
| "learning_rate": 5.183634572248153e-07, | |
| "logits/chosen": 0.8174174427986145, | |
| "logits/rejected": 0.7698001265525818, | |
| "logps/chosen": -1.255910038948059, | |
| "logps/rejected": -1.4298808574676514, | |
| "loss": 2.2763, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -12.559102058410645, | |
| "rewards/margins": 1.739708423614502, | |
| "rewards/rejected": -14.298810005187988, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.369019791179679, | |
| "grad_norm": 161.0, | |
| "learning_rate": 5.158540318655495e-07, | |
| "logits/chosen": 1.1192365884780884, | |
| "logits/rejected": 0.7937313914299011, | |
| "logps/chosen": -1.7974631786346436, | |
| "logps/rejected": -2.402998924255371, | |
| "loss": 2.2646, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -17.974632263183594, | |
| "rewards/margins": 6.055357933044434, | |
| "rewards/rejected": -24.02998924255371, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3715131681471092, | |
| "grad_norm": 13.4375, | |
| "learning_rate": 5.133335631289858e-07, | |
| "logits/chosen": 1.004485011100769, | |
| "logits/rejected": 0.6550527215003967, | |
| "logps/chosen": -1.4417423009872437, | |
| "logps/rejected": -2.1560442447662354, | |
| "loss": 1.3901, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -14.4174222946167, | |
| "rewards/margins": 7.1430182456970215, | |
| "rewards/rejected": -21.560441970825195, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3740065451145395, | |
| "grad_norm": 29.75, | |
| "learning_rate": 5.10802218840331e-07, | |
| "logits/chosen": 0.8932673335075378, | |
| "logits/rejected": 0.695792019367218, | |
| "logps/chosen": -1.3724555969238281, | |
| "logps/rejected": -1.7769482135772705, | |
| "loss": 1.7406, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -13.724554061889648, | |
| "rewards/margins": 4.044928073883057, | |
| "rewards/rejected": -17.76948356628418, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.37649992208196975, | |
| "grad_norm": 38.25, | |
| "learning_rate": 5.0826016754894e-07, | |
| "logits/chosen": 0.9987000823020935, | |
| "logits/rejected": 0.6120975017547607, | |
| "logps/chosen": -1.7447395324707031, | |
| "logps/rejected": -2.424745559692383, | |
| "loss": 2.0385, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -17.4473934173584, | |
| "rewards/margins": 6.800059795379639, | |
| "rewards/rejected": -24.247455596923828, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.37899329904940005, | |
| "grad_norm": 43.5, | |
| "learning_rate": 5.057075785170923e-07, | |
| "logits/chosen": 0.7949992418289185, | |
| "logits/rejected": 0.735917866230011, | |
| "logps/chosen": -1.4737513065338135, | |
| "logps/rejected": -1.7997541427612305, | |
| "loss": 2.4462, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -14.737512588500977, | |
| "rewards/margins": 3.2600276470184326, | |
| "rewards/rejected": -17.997541427612305, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3814866760168303, | |
| "grad_norm": 34.75, | |
| "learning_rate": 5.031446217087223e-07, | |
| "logits/chosen": 0.7635215520858765, | |
| "logits/rejected": 0.6593471765518188, | |
| "logps/chosen": -1.4680148363113403, | |
| "logps/rejected": -1.8192330598831177, | |
| "loss": 2.3192, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -14.680147171020508, | |
| "rewards/margins": 3.5121822357177734, | |
| "rewards/rejected": -18.19232940673828, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3839800529842606, | |
| "grad_norm": 18.625, | |
| "learning_rate": 5.005714677781016e-07, | |
| "logits/chosen": 0.8512160778045654, | |
| "logits/rejected": 0.638878583908081, | |
| "logps/chosen": -1.239166259765625, | |
| "logps/rejected": -1.7152905464172363, | |
| "loss": 1.1124, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -12.39166259765625, | |
| "rewards/margins": 4.761242866516113, | |
| "rewards/rejected": -17.15290641784668, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3864734299516908, | |
| "grad_norm": 16.375, | |
| "learning_rate": 4.979882880584766e-07, | |
| "logits/chosen": 0.9124481678009033, | |
| "logits/rejected": 0.7296810150146484, | |
| "logps/chosen": -1.7560640573501587, | |
| "logps/rejected": -2.781906843185425, | |
| "loss": 1.6899, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.56064224243164, | |
| "rewards/margins": 10.258424758911133, | |
| "rewards/rejected": -27.81906509399414, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3889668069191211, | |
| "grad_norm": 30.125, | |
| "learning_rate": 4.953952545506602e-07, | |
| "logits/chosen": 0.8763688802719116, | |
| "logits/rejected": 0.7317189574241638, | |
| "logps/chosen": -1.6232566833496094, | |
| "logps/rejected": -2.2681305408477783, | |
| "loss": 1.9121, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -16.232566833496094, | |
| "rewards/margins": 6.448739051818848, | |
| "rewards/rejected": -22.681304931640625, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.39146018388655135, | |
| "grad_norm": 23.125, | |
| "learning_rate": 4.927925399115788e-07, | |
| "logits/chosen": 0.8235619068145752, | |
| "logits/rejected": 0.7919750213623047, | |
| "logps/chosen": -1.391683578491211, | |
| "logps/rejected": -1.6939644813537598, | |
| "loss": 2.2898, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -13.916834831237793, | |
| "rewards/margins": 3.0228097438812256, | |
| "rewards/rejected": -16.939645767211914, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3939535608539816, | |
| "grad_norm": 58.0, | |
| "learning_rate": 4.901803174427757e-07, | |
| "logits/chosen": 0.890289306640625, | |
| "logits/rejected": 0.6626406311988831, | |
| "logps/chosen": -1.6668946743011475, | |
| "logps/rejected": -2.7818055152893066, | |
| "loss": 1.1016, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -16.668947219848633, | |
| "rewards/margins": 11.14910888671875, | |
| "rewards/rejected": -27.81805419921875, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3964469378214119, | |
| "grad_norm": 50.25, | |
| "learning_rate": 4.875587610788733e-07, | |
| "logits/chosen": 0.7171937227249146, | |
| "logits/rejected": 0.6810190677642822, | |
| "logps/chosen": -1.645186424255371, | |
| "logps/rejected": -2.06756854057312, | |
| "loss": 2.5663, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -16.45186424255371, | |
| "rewards/margins": 4.22382116317749, | |
| "rewards/rejected": -20.67568588256836, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3989403147888421, | |
| "grad_norm": 19.375, | |
| "learning_rate": 4.849280453759897e-07, | |
| "logits/chosen": 0.9262104630470276, | |
| "logits/rejected": 0.7050573229789734, | |
| "logps/chosen": -1.6274131536483765, | |
| "logps/rejected": -2.1605324745178223, | |
| "loss": 1.2244, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -16.274131774902344, | |
| "rewards/margins": 5.331192970275879, | |
| "rewards/rejected": -21.60532569885254, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4014336917562724, | |
| "grad_norm": 83.0, | |
| "learning_rate": 4.822883455001173e-07, | |
| "logits/chosen": 0.9184644818305969, | |
| "logits/rejected": 0.8644086122512817, | |
| "logps/chosen": -1.5301023721694946, | |
| "logps/rejected": -1.876584768295288, | |
| "loss": 2.0259, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -15.30102252960205, | |
| "rewards/margins": 3.464823007583618, | |
| "rewards/rejected": -18.76584815979004, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.40392706872370265, | |
| "grad_norm": 24.5, | |
| "learning_rate": 4.796398372154588e-07, | |
| "logits/chosen": 1.0671634674072266, | |
| "logits/rejected": 0.8774153590202332, | |
| "logps/chosen": -1.6217372417449951, | |
| "logps/rejected": -2.3855130672454834, | |
| "loss": 1.4698, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -16.21737289428711, | |
| "rewards/margins": 7.637757301330566, | |
| "rewards/rejected": -23.85512924194336, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.40642044569113295, | |
| "grad_norm": 44.0, | |
| "learning_rate": 4.769826968727243e-07, | |
| "logits/chosen": 0.80574631690979, | |
| "logits/rejected": 0.6158944964408875, | |
| "logps/chosen": -1.5703632831573486, | |
| "logps/rejected": -2.269869327545166, | |
| "loss": 1.3586, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -15.703633308410645, | |
| "rewards/margins": 6.995059967041016, | |
| "rewards/rejected": -22.698694229125977, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4089138226585632, | |
| "grad_norm": 27.5, | |
| "learning_rate": 4.743171013973885e-07, | |
| "logits/chosen": 0.935499370098114, | |
| "logits/rejected": 0.7237244844436646, | |
| "logps/chosen": -1.7726106643676758, | |
| "logps/rejected": -2.6084468364715576, | |
| "loss": 1.447, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -17.72610855102539, | |
| "rewards/margins": 8.358359336853027, | |
| "rewards/rejected": -26.08446502685547, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4114071996259935, | |
| "grad_norm": 30.0, | |
| "learning_rate": 4.716432282779106e-07, | |
| "logits/chosen": 0.9203133583068848, | |
| "logits/rejected": 0.7862353920936584, | |
| "logps/chosen": -1.4431755542755127, | |
| "logps/rejected": -2.1590194702148438, | |
| "loss": 1.4126, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -14.431756019592285, | |
| "rewards/margins": 7.158439636230469, | |
| "rewards/rejected": -21.590194702148438, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4139005765934237, | |
| "grad_norm": 100.5, | |
| "learning_rate": 4.6896125555391575e-07, | |
| "logits/chosen": 0.9510793685913086, | |
| "logits/rejected": 0.7097218036651611, | |
| "logps/chosen": -1.377150535583496, | |
| "logps/rejected": -1.8154629468917847, | |
| "loss": 1.436, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -13.771505355834961, | |
| "rewards/margins": 4.383124351501465, | |
| "rewards/rejected": -18.15462875366211, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.41639395356085396, | |
| "grad_norm": 40.5, | |
| "learning_rate": 4.662713618043413e-07, | |
| "logits/chosen": 0.9421004056930542, | |
| "logits/rejected": 0.6513608694076538, | |
| "logps/chosen": -1.4433151483535767, | |
| "logps/rejected": -1.7160542011260986, | |
| "loss": 1.3431, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -14.433152198791504, | |
| "rewards/margins": 2.7273917198181152, | |
| "rewards/rejected": -17.16054344177246, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.41888733052828425, | |
| "grad_norm": 78.5, | |
| "learning_rate": 4.635737261355447e-07, | |
| "logits/chosen": 0.8841539621353149, | |
| "logits/rejected": 0.7275552153587341, | |
| "logps/chosen": -1.617548942565918, | |
| "logps/rejected": -2.5178287029266357, | |
| "loss": 1.7514, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -16.17548942565918, | |
| "rewards/margins": 9.002798080444336, | |
| "rewards/rejected": -25.178287506103516, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4213807074957145, | |
| "grad_norm": 61.5, | |
| "learning_rate": 4.608685281693789e-07, | |
| "logits/chosen": 0.795113205909729, | |
| "logits/rejected": 0.7205825448036194, | |
| "logps/chosen": -1.5723981857299805, | |
| "logps/rejected": -1.8851563930511475, | |
| "loss": 2.6762, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -15.723981857299805, | |
| "rewards/margins": 3.1275830268859863, | |
| "rewards/rejected": -18.851564407348633, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4238740844631448, | |
| "grad_norm": 40.5, | |
| "learning_rate": 4.581559480312316e-07, | |
| "logits/chosen": 0.9474557042121887, | |
| "logits/rejected": 0.7945749759674072, | |
| "logps/chosen": -1.8188387155532837, | |
| "logps/rejected": -2.6367805004119873, | |
| "loss": 1.3681, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -18.18838882446289, | |
| "rewards/margins": 8.179415702819824, | |
| "rewards/rejected": -26.36780548095703, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.426367461430575, | |
| "grad_norm": 33.25, | |
| "learning_rate": 4.5543616633803197e-07, | |
| "logits/chosen": 0.7378120422363281, | |
| "logits/rejected": 0.7000318169593811, | |
| "logps/chosen": -1.4147385358810425, | |
| "logps/rejected": -1.8963797092437744, | |
| "loss": 1.899, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -14.147384643554688, | |
| "rewards/margins": 4.816410541534424, | |
| "rewards/rejected": -18.963794708251953, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4288608383980053, | |
| "grad_norm": 35.25, | |
| "learning_rate": 4.527093641862241e-07, | |
| "logits/chosen": 0.9072024822235107, | |
| "logits/rejected": 0.7587930560112, | |
| "logps/chosen": -1.2699742317199707, | |
| "logps/rejected": -1.702739953994751, | |
| "loss": 1.4364, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -12.699743270874023, | |
| "rewards/margins": 4.327658176422119, | |
| "rewards/rejected": -17.027400970458984, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.43135421536543556, | |
| "grad_norm": 25.75, | |
| "learning_rate": 4.499757231397087e-07, | |
| "logits/chosen": 0.8443821668624878, | |
| "logits/rejected": 0.6597446203231812, | |
| "logps/chosen": -1.509061336517334, | |
| "logps/rejected": -2.0712409019470215, | |
| "loss": 1.2708, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -15.09061336517334, | |
| "rewards/margins": 5.6217942237854, | |
| "rewards/rejected": -20.7124080657959, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.43384759233286585, | |
| "grad_norm": 23.375, | |
| "learning_rate": 4.4723542521775385e-07, | |
| "logits/chosen": 1.0543487071990967, | |
| "logits/rejected": 0.5649646520614624, | |
| "logps/chosen": -1.4722059965133667, | |
| "logps/rejected": -2.189124584197998, | |
| "loss": 0.8446, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -14.72205924987793, | |
| "rewards/margins": 7.169185638427734, | |
| "rewards/rejected": -21.891244888305664, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4363409693002961, | |
| "grad_norm": 54.75, | |
| "learning_rate": 4.444886528828749e-07, | |
| "logits/chosen": 0.9907981157302856, | |
| "logits/rejected": 0.7723469138145447, | |
| "logps/chosen": -1.8176202774047852, | |
| "logps/rejected": -2.3857648372650146, | |
| "loss": 1.7344, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -18.17620277404785, | |
| "rewards/margins": 5.6814446449279785, | |
| "rewards/rejected": -23.857648849487305, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4388343462677263, | |
| "grad_norm": 31.5, | |
| "learning_rate": 4.417355890286857e-07, | |
| "logits/chosen": 0.9411242008209229, | |
| "logits/rejected": 0.7533101439476013, | |
| "logps/chosen": -1.6791445016860962, | |
| "logps/rejected": -2.381438732147217, | |
| "loss": 1.8322, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -16.791446685791016, | |
| "rewards/margins": 7.022940635681152, | |
| "rewards/rejected": -23.81438446044922, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4413277232351566, | |
| "grad_norm": 51.25, | |
| "learning_rate": 4.389764169677205e-07, | |
| "logits/chosen": 0.862296462059021, | |
| "logits/rejected": 0.7431577444076538, | |
| "logps/chosen": -1.3871877193450928, | |
| "logps/rejected": -1.9420627355575562, | |
| "loss": 1.2998, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -13.87187671661377, | |
| "rewards/margins": 5.548751354217529, | |
| "rewards/rejected": -19.420629501342773, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.44382110020258686, | |
| "grad_norm": 41.0, | |
| "learning_rate": 4.3621132041922745e-07, | |
| "logits/chosen": 0.8196381330490112, | |
| "logits/rejected": 0.735532820224762, | |
| "logps/chosen": -1.3557261228561401, | |
| "logps/rejected": -2.2253174781799316, | |
| "loss": 1.2671, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -13.557262420654297, | |
| "rewards/margins": 8.695913314819336, | |
| "rewards/rejected": -22.253175735473633, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.44631447717001715, | |
| "grad_norm": 28.75, | |
| "learning_rate": 4.334404834969368e-07, | |
| "logits/chosen": 1.0182719230651855, | |
| "logits/rejected": 0.8464354872703552, | |
| "logps/chosen": -1.3779345750808716, | |
| "logps/rejected": -1.8232632875442505, | |
| "loss": 1.3614, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -13.779345512390137, | |
| "rewards/margins": 4.4532856941223145, | |
| "rewards/rejected": -18.23263168334961, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4488078541374474, | |
| "grad_norm": 18.375, | |
| "learning_rate": 4.306640906968011e-07, | |
| "logits/chosen": 0.927130401134491, | |
| "logits/rejected": 0.7001396417617798, | |
| "logps/chosen": -1.3739848136901855, | |
| "logps/rejected": -2.3601279258728027, | |
| "loss": 0.5738, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -13.739850044250488, | |
| "rewards/margins": 9.861430168151855, | |
| "rewards/rejected": -23.601280212402344, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4513012311048777, | |
| "grad_norm": 46.25, | |
| "learning_rate": 4.2788232688471e-07, | |
| "logits/chosen": 0.858923077583313, | |
| "logits/rejected": 0.7578305006027222, | |
| "logps/chosen": -1.2482776641845703, | |
| "logps/rejected": -1.7487417459487915, | |
| "loss": 1.0749, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -12.482775688171387, | |
| "rewards/margins": 5.004642486572266, | |
| "rewards/rejected": -17.487417221069336, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.4537946080723079, | |
| "grad_norm": 83.0, | |
| "learning_rate": 4.2509537728418233e-07, | |
| "logits/chosen": 0.8518757224082947, | |
| "logits/rejected": 0.7721596360206604, | |
| "logps/chosen": -1.3393375873565674, | |
| "logps/rejected": -1.7837915420532227, | |
| "loss": 1.2853, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -13.393375396728516, | |
| "rewards/margins": 4.444540977478027, | |
| "rewards/rejected": -17.83791732788086, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4562879850397382, | |
| "grad_norm": 56.0, | |
| "learning_rate": 4.223034274640317e-07, | |
| "logits/chosen": 0.9242639541625977, | |
| "logits/rejected": 0.7321256995201111, | |
| "logps/chosen": -1.6946762800216675, | |
| "logps/rejected": -2.9650654792785645, | |
| "loss": 1.0034, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -16.94676399230957, | |
| "rewards/margins": 12.703892707824707, | |
| "rewards/rejected": -29.65065574645996, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.45878136200716846, | |
| "grad_norm": 38.75, | |
| "learning_rate": 4.195066633260109e-07, | |
| "logits/chosen": 0.8796188831329346, | |
| "logits/rejected": 0.6841633319854736, | |
| "logps/chosen": -1.3098094463348389, | |
| "logps/rejected": -1.709314227104187, | |
| "loss": 1.0082, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -13.09809398651123, | |
| "rewards/margins": 3.995047092437744, | |
| "rewards/rejected": -17.0931396484375, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4612747389745987, | |
| "grad_norm": 49.75, | |
| "learning_rate": 4.1670527109243414e-07, | |
| "logits/chosen": 0.8437327146530151, | |
| "logits/rejected": 0.7233911156654358, | |
| "logps/chosen": -1.552445888519287, | |
| "logps/rejected": -2.1319706439971924, | |
| "loss": 1.2603, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -15.524458885192871, | |
| "rewards/margins": 5.795248031616211, | |
| "rewards/rejected": -21.3197078704834, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.463768115942029, | |
| "grad_norm": 45.5, | |
| "learning_rate": 4.138994372937766e-07, | |
| "logits/chosen": 0.9246405363082886, | |
| "logits/rejected": 0.7257117629051208, | |
| "logps/chosen": -1.5023407936096191, | |
| "logps/rejected": -2.219346046447754, | |
| "loss": 1.2028, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -15.023408889770508, | |
| "rewards/margins": 7.170053958892822, | |
| "rewards/rejected": -22.193462371826172, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.46626149290945923, | |
| "grad_norm": 74.0, | |
| "learning_rate": 4.110893487562548e-07, | |
| "logits/chosen": 0.7957507371902466, | |
| "logits/rejected": 0.7296849489212036, | |
| "logps/chosen": -1.3323700428009033, | |
| "logps/rejected": -2.052271842956543, | |
| "loss": 0.6391, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -13.323701858520508, | |
| "rewards/margins": 7.199017524719238, | |
| "rewards/rejected": -20.52271842956543, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4687548698768895, | |
| "grad_norm": 56.25, | |
| "learning_rate": 4.082751925893869e-07, | |
| "logits/chosen": 0.8817852735519409, | |
| "logits/rejected": 0.7720733880996704, | |
| "logps/chosen": -1.196410059928894, | |
| "logps/rejected": -1.500748634338379, | |
| "loss": 0.9032, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -11.96410083770752, | |
| "rewards/margins": 3.0433857440948486, | |
| "rewards/rejected": -15.007488250732422, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.47124824684431976, | |
| "grad_norm": 41.0, | |
| "learning_rate": 4.054571561735334e-07, | |
| "logits/chosen": 0.9272749423980713, | |
| "logits/rejected": 0.6019188761711121, | |
| "logps/chosen": -1.804772138595581, | |
| "logps/rejected": -2.7550244331359863, | |
| "loss": 0.5996, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -18.04772186279297, | |
| "rewards/margins": 9.502524375915527, | |
| "rewards/rejected": -27.550243377685547, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.47374162381175006, | |
| "grad_norm": 15.6875, | |
| "learning_rate": 4.026354271474214e-07, | |
| "logits/chosen": 0.9149619340896606, | |
| "logits/rejected": 0.6641325950622559, | |
| "logps/chosen": -1.705862283706665, | |
| "logps/rejected": -3.0835325717926025, | |
| "loss": 1.0064, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.058624267578125, | |
| "rewards/margins": 13.776700019836426, | |
| "rewards/rejected": -30.835325241088867, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4762350007791803, | |
| "grad_norm": 23.0, | |
| "learning_rate": 3.998101933956498e-07, | |
| "logits/chosen": 0.8473320007324219, | |
| "logits/rejected": 0.7866963744163513, | |
| "logps/chosen": -1.4231493473052979, | |
| "logps/rejected": -2.1398186683654785, | |
| "loss": 0.7511, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -14.23149299621582, | |
| "rewards/margins": 7.1666951179504395, | |
| "rewards/rejected": -21.39818572998047, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4787283777466106, | |
| "grad_norm": 39.5, | |
| "learning_rate": 3.969816430361794e-07, | |
| "logits/chosen": 0.8237781524658203, | |
| "logits/rejected": 0.7161869406700134, | |
| "logps/chosen": -1.8508167266845703, | |
| "logps/rejected": -2.9980063438415527, | |
| "loss": 0.7146, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -18.508167266845703, | |
| "rewards/margins": 11.471895217895508, | |
| "rewards/rejected": -29.980064392089844, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.48122175471404083, | |
| "grad_norm": 75.0, | |
| "learning_rate": 3.9414996440780724e-07, | |
| "logits/chosen": 0.9529024958610535, | |
| "logits/rejected": 0.8278242349624634, | |
| "logps/chosen": -1.8834271430969238, | |
| "logps/rejected": -2.4769580364227295, | |
| "loss": 1.046, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -18.834270477294922, | |
| "rewards/margins": 5.935309410095215, | |
| "rewards/rejected": -24.769580841064453, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.48371513168147107, | |
| "grad_norm": 62.0, | |
| "learning_rate": 3.913153460576256e-07, | |
| "logits/chosen": 0.916070818901062, | |
| "logits/rejected": 0.6884597539901733, | |
| "logps/chosen": -1.893513560295105, | |
| "logps/rejected": -2.9602129459381104, | |
| "loss": 1.0144, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -18.935134887695312, | |
| "rewards/margins": 10.66699504852295, | |
| "rewards/rejected": -29.602130889892578, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.48620850864890136, | |
| "grad_norm": 18.125, | |
| "learning_rate": 3.8847797672846825e-07, | |
| "logits/chosen": 0.9603822231292725, | |
| "logits/rejected": 0.6512764692306519, | |
| "logps/chosen": -1.7449413537979126, | |
| "logps/rejected": -2.7406604290008545, | |
| "loss": 0.3528, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.44941520690918, | |
| "rewards/margins": 9.957185745239258, | |
| "rewards/rejected": -27.40660285949707, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4887018856163316, | |
| "grad_norm": 10.75, | |
| "learning_rate": 3.8563804534634246e-07, | |
| "logits/chosen": 0.9687063694000244, | |
| "logits/rejected": 0.8893125057220459, | |
| "logps/chosen": -1.372796654701233, | |
| "logps/rejected": -2.2664504051208496, | |
| "loss": 0.533, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -13.727968215942383, | |
| "rewards/margins": 8.93653678894043, | |
| "rewards/rejected": -22.664501190185547, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4911952625837619, | |
| "grad_norm": 48.75, | |
| "learning_rate": 3.827957410078494e-07, | |
| "logits/chosen": 0.8412132859230042, | |
| "logits/rejected": 0.7297399044036865, | |
| "logps/chosen": -2.030590772628784, | |
| "logps/rejected": -3.408313035964966, | |
| "loss": 0.7652, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -20.305906295776367, | |
| "rewards/margins": 13.777222633361816, | |
| "rewards/rejected": -34.0831298828125, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.49368863955119213, | |
| "grad_norm": 25.0, | |
| "learning_rate": 3.799512529675939e-07, | |
| "logits/chosen": 0.8365733623504639, | |
| "logits/rejected": 0.7946135997772217, | |
| "logps/chosen": -1.8182940483093262, | |
| "logps/rejected": -2.8680472373962402, | |
| "loss": 0.6624, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -18.182941436767578, | |
| "rewards/margins": 10.497532844543457, | |
| "rewards/rejected": -28.680471420288086, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4961820165186224, | |
| "grad_norm": 49.25, | |
| "learning_rate": 3.7710477062558195e-07, | |
| "logits/chosen": 0.8030841946601868, | |
| "logits/rejected": 0.6840673685073853, | |
| "logps/chosen": -1.7462419271469116, | |
| "logps/rejected": -2.6651408672332764, | |
| "loss": 0.9539, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -17.462419509887695, | |
| "rewards/margins": 9.188987731933594, | |
| "rewards/rejected": -26.651405334472656, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.49867539348605266, | |
| "grad_norm": 37.25, | |
| "learning_rate": 3.742564835146099e-07, | |
| "logits/chosen": 0.940216064453125, | |
| "logits/rejected": 0.7382882833480835, | |
| "logps/chosen": -1.5715973377227783, | |
| "logps/rejected": -2.2499136924743652, | |
| "loss": 0.5224, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.715973854064941, | |
| "rewards/margins": 6.7831621170043945, | |
| "rewards/rejected": -22.499134063720703, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.501168770453483, | |
| "grad_norm": 53.5, | |
| "learning_rate": 3.71406581287645e-07, | |
| "logits/chosen": 0.8017429113388062, | |
| "logits/rejected": 0.7019472122192383, | |
| "logps/chosen": -1.5708627700805664, | |
| "logps/rejected": -2.446748733520508, | |
| "loss": 0.6145, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.70862865447998, | |
| "rewards/margins": 8.758858680725098, | |
| "rewards/rejected": -24.467487335205078, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5036621474209132, | |
| "grad_norm": 24.5, | |
| "learning_rate": 3.6855525370519617e-07, | |
| "logits/chosen": 0.9191329479217529, | |
| "logits/rejected": 0.7709681987762451, | |
| "logps/chosen": -1.2503210306167603, | |
| "logps/rejected": -1.8521945476531982, | |
| "loss": 0.5184, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -12.50321102142334, | |
| "rewards/margins": 6.018735408782959, | |
| "rewards/rejected": -18.52194595336914, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5061555243883434, | |
| "grad_norm": 17.375, | |
| "learning_rate": 3.6570269062268025e-07, | |
| "logits/chosen": 0.7203347682952881, | |
| "logits/rejected": 0.7312765717506409, | |
| "logps/chosen": -1.9442358016967773, | |
| "logps/rejected": -3.1624157428741455, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.44235610961914, | |
| "rewards/margins": 12.18179988861084, | |
| "rewards/rejected": -31.624156951904297, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5086489013557737, | |
| "grad_norm": 14.125, | |
| "learning_rate": 3.6284908197777915e-07, | |
| "logits/chosen": 0.7811324596405029, | |
| "logits/rejected": 0.7788522839546204, | |
| "logps/chosen": -1.5343513488769531, | |
| "logps/rejected": -2.655756711959839, | |
| "loss": 0.4037, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.343514442443848, | |
| "rewards/margins": 11.214055061340332, | |
| "rewards/rejected": -26.557571411132812, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.511142278323204, | |
| "grad_norm": 9.75, | |
| "learning_rate": 3.599946177777936e-07, | |
| "logits/chosen": 0.9504005908966064, | |
| "logits/rejected": 0.8734852075576782, | |
| "logps/chosen": -1.6099568605422974, | |
| "logps/rejected": -2.527747869491577, | |
| "loss": 0.4034, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.099567413330078, | |
| "rewards/margins": 9.177909851074219, | |
| "rewards/rejected": -25.277477264404297, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5136356552906343, | |
| "grad_norm": 9.0, | |
| "learning_rate": 3.571394880869919e-07, | |
| "logits/chosen": 1.0245471000671387, | |
| "logits/rejected": 0.8590348958969116, | |
| "logps/chosen": -1.5646387338638306, | |
| "logps/rejected": -2.8300962448120117, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.64638614654541, | |
| "rewards/margins": 12.65457534790039, | |
| "rewards/rejected": -28.300960540771484, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 11.9375, | |
| "learning_rate": 3.5428388301395325e-07, | |
| "logits/chosen": 0.9345250129699707, | |
| "logits/rejected": 0.8547608852386475, | |
| "logps/chosen": -1.4048100709915161, | |
| "logps/rejected": -2.2128334045410156, | |
| "loss": 0.5043, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -14.048102378845215, | |
| "rewards/margins": 8.080232620239258, | |
| "rewards/rejected": -22.128334045410156, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5186224092254947, | |
| "grad_norm": 12.375, | |
| "learning_rate": 3.514279926989105e-07, | |
| "logits/chosen": 0.9688948392868042, | |
| "logits/rejected": 0.7790014743804932, | |
| "logps/chosen": -2.1355183124542236, | |
| "logps/rejected": -3.5980281829833984, | |
| "loss": 0.5437, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -21.35518455505371, | |
| "rewards/margins": 14.625100135803223, | |
| "rewards/rejected": -35.98028564453125, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5211157861929251, | |
| "grad_norm": 8.5, | |
| "learning_rate": 3.485720073010896e-07, | |
| "logits/chosen": 0.9008550643920898, | |
| "logits/rejected": 0.8881810307502747, | |
| "logps/chosen": -1.938570261001587, | |
| "logps/rejected": -3.0095808506011963, | |
| "loss": 0.513, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -19.38570213317871, | |
| "rewards/margins": 10.710105895996094, | |
| "rewards/rejected": -30.095808029174805, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5236091631603553, | |
| "grad_norm": 24.5, | |
| "learning_rate": 3.457161169860469e-07, | |
| "logits/chosen": 0.9138238430023193, | |
| "logits/rejected": 0.6945370435714722, | |
| "logps/chosen": -1.7868579626083374, | |
| "logps/rejected": -3.283820390701294, | |
| "loss": 0.5155, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.868579864501953, | |
| "rewards/margins": 14.969620704650879, | |
| "rewards/rejected": -32.83820343017578, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5261025401277856, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 3.428605119130082e-07, | |
| "logits/chosen": 0.8236789703369141, | |
| "logits/rejected": 0.8235811591148376, | |
| "logps/chosen": -1.940079689025879, | |
| "logps/rejected": -3.2608189582824707, | |
| "loss": 0.2783, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.400798797607422, | |
| "rewards/margins": 13.207389831542969, | |
| "rewards/rejected": -32.60818862915039, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5285959170952158, | |
| "grad_norm": 45.75, | |
| "learning_rate": 3.4000538222220635e-07, | |
| "logits/chosen": 0.9403684139251709, | |
| "logits/rejected": 0.8005753755569458, | |
| "logps/chosen": -1.6408517360687256, | |
| "logps/rejected": -2.543820858001709, | |
| "loss": 0.4837, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.408517837524414, | |
| "rewards/margins": 9.029691696166992, | |
| "rewards/rejected": -25.438209533691406, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.531089294062646, | |
| "grad_norm": 8.625, | |
| "learning_rate": 3.37150918022221e-07, | |
| "logits/chosen": 0.8799944519996643, | |
| "logits/rejected": 0.7955228090286255, | |
| "logps/chosen": -1.9793920516967773, | |
| "logps/rejected": -3.432222843170166, | |
| "loss": 0.216, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.79391860961914, | |
| "rewards/margins": 14.528306007385254, | |
| "rewards/rejected": -34.32222366333008, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5335826710300764, | |
| "grad_norm": 7.96875, | |
| "learning_rate": 3.342973093773199e-07, | |
| "logits/chosen": 0.9032948017120361, | |
| "logits/rejected": 0.8324267268180847, | |
| "logps/chosen": -1.3809956312179565, | |
| "logps/rejected": -2.4675354957580566, | |
| "loss": 0.3747, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -13.809956550598145, | |
| "rewards/margins": 10.865400314331055, | |
| "rewards/rejected": -24.675355911254883, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5360760479975066, | |
| "grad_norm": 5.875, | |
| "learning_rate": 3.314447462948038e-07, | |
| "logits/chosen": 0.8150188326835632, | |
| "logits/rejected": 0.7412484884262085, | |
| "logps/chosen": -1.7949788570404053, | |
| "logps/rejected": -3.1454625129699707, | |
| "loss": 0.5245, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.94978904724121, | |
| "rewards/margins": 13.504838943481445, | |
| "rewards/rejected": -31.454627990722656, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5385694249649369, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 3.285934187123551e-07, | |
| "logits/chosen": 0.9771428108215332, | |
| "logits/rejected": 0.7578872442245483, | |
| "logps/chosen": -1.5606952905654907, | |
| "logps/rejected": -2.352637529373169, | |
| "loss": 0.9085, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -15.606952667236328, | |
| "rewards/margins": 7.9194231033325195, | |
| "rewards/rejected": -23.526376724243164, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5410628019323671, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 3.2574351648539017e-07, | |
| "logits/chosen": 0.8879974484443665, | |
| "logits/rejected": 0.7966049909591675, | |
| "logps/chosen": -1.748363733291626, | |
| "logps/rejected": -2.8166420459747314, | |
| "loss": 0.7279, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -17.4836368560791, | |
| "rewards/margins": 10.682784080505371, | |
| "rewards/rejected": -28.16642189025879, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5435561788997975, | |
| "grad_norm": 12.875, | |
| "learning_rate": 3.228952293744181e-07, | |
| "logits/chosen": 0.9608930349349976, | |
| "logits/rejected": 0.7884482741355896, | |
| "logps/chosen": -1.9304460287094116, | |
| "logps/rejected": -3.123302459716797, | |
| "loss": 0.6031, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.304460525512695, | |
| "rewards/margins": 11.928570747375488, | |
| "rewards/rejected": -31.233030319213867, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5460495558672277, | |
| "grad_norm": 13.6875, | |
| "learning_rate": 3.200487470324062e-07, | |
| "logits/chosen": 0.9692325592041016, | |
| "logits/rejected": 0.8839060068130493, | |
| "logps/chosen": -1.7750276327133179, | |
| "logps/rejected": -3.3542592525482178, | |
| "loss": 0.4743, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -17.750276565551758, | |
| "rewards/margins": 15.792311668395996, | |
| "rewards/rejected": -33.54258728027344, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5485429328346579, | |
| "grad_norm": 15.6875, | |
| "learning_rate": 3.172042589921506e-07, | |
| "logits/chosen": 0.9265443086624146, | |
| "logits/rejected": 0.8019118309020996, | |
| "logps/chosen": -1.751523494720459, | |
| "logps/rejected": -2.899667501449585, | |
| "loss": 0.6666, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.515233993530273, | |
| "rewards/margins": 11.481440544128418, | |
| "rewards/rejected": -28.996675491333008, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5510363098020882, | |
| "grad_norm": 27.375, | |
| "learning_rate": 3.1436195465365767e-07, | |
| "logits/chosen": 0.8846260905265808, | |
| "logits/rejected": 0.8169682621955872, | |
| "logps/chosen": -1.4631338119506836, | |
| "logps/rejected": -2.289689064025879, | |
| "loss": 0.5698, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -14.631338119506836, | |
| "rewards/margins": 8.265554428100586, | |
| "rewards/rejected": -22.896892547607422, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5535296867695184, | |
| "grad_norm": 13.875, | |
| "learning_rate": 3.115220232715318e-07, | |
| "logits/chosen": 0.8759758472442627, | |
| "logits/rejected": 0.8523691296577454, | |
| "logps/chosen": -1.8312069177627563, | |
| "logps/rejected": -3.232063055038452, | |
| "loss": 0.645, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -18.312068939208984, | |
| "rewards/margins": 14.008562088012695, | |
| "rewards/rejected": -32.32063293457031, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5560230637369488, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 3.086846539423744e-07, | |
| "logits/chosen": 0.8589959740638733, | |
| "logits/rejected": 0.7877765893936157, | |
| "logps/chosen": -1.3714457750320435, | |
| "logps/rejected": -2.5221285820007324, | |
| "loss": 0.584, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -13.714457511901855, | |
| "rewards/margins": 11.506828308105469, | |
| "rewards/rejected": -25.221284866333008, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.558516440704379, | |
| "grad_norm": 12.75, | |
| "learning_rate": 3.0585003559219284e-07, | |
| "logits/chosen": 0.7336137294769287, | |
| "logits/rejected": 0.8082336187362671, | |
| "logps/chosen": -2.2451255321502686, | |
| "logps/rejected": -4.2201972007751465, | |
| "loss": 0.6203, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -22.451255798339844, | |
| "rewards/margins": 19.750713348388672, | |
| "rewards/rejected": -42.20196533203125, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5610098176718092, | |
| "grad_norm": 50.0, | |
| "learning_rate": 3.030183569638207e-07, | |
| "logits/chosen": 0.7706287503242493, | |
| "logits/rejected": 0.7501264810562134, | |
| "logps/chosen": -1.5991909503936768, | |
| "logps/rejected": -2.917886972427368, | |
| "loss": 0.2763, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -15.99190902709961, | |
| "rewards/margins": 13.186960220336914, | |
| "rewards/rejected": -29.178869247436523, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5635031946392395, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 3.001898066043502e-07, | |
| "logits/chosen": 0.9699455499649048, | |
| "logits/rejected": 0.8485396504402161, | |
| "logps/chosen": -2.2904715538024902, | |
| "logps/rejected": -4.465200424194336, | |
| "loss": 0.0169, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -22.90471649169922, | |
| "rewards/margins": 21.74728775024414, | |
| "rewards/rejected": -44.652008056640625, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5659965716066698, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 2.973645728525786e-07, | |
| "logits/chosen": 0.8099995851516724, | |
| "logits/rejected": 0.6409150958061218, | |
| "logps/chosen": -1.6182489395141602, | |
| "logps/rejected": -2.964346408843994, | |
| "loss": 0.3709, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.1824893951416, | |
| "rewards/margins": 13.460972785949707, | |
| "rewards/rejected": -29.643461227416992, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5684899485741001, | |
| "grad_norm": 33.5, | |
| "learning_rate": 2.9454284382646654e-07, | |
| "logits/chosen": 0.8826979398727417, | |
| "logits/rejected": 0.7478980422019958, | |
| "logps/chosen": -1.6775342226028442, | |
| "logps/rejected": -3.144256830215454, | |
| "loss": 0.7344, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.77534294128418, | |
| "rewards/margins": 14.667223930358887, | |
| "rewards/rejected": -31.44256591796875, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5709833255415303, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 2.917248074106132e-07, | |
| "logits/chosen": 0.7391412854194641, | |
| "logits/rejected": 0.7339631915092468, | |
| "logps/chosen": -1.612046241760254, | |
| "logps/rejected": -2.4834823608398438, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.120464324951172, | |
| "rewards/margins": 8.714361190795898, | |
| "rewards/rejected": -24.834821701049805, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5734767025089605, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 2.889106512437452e-07, | |
| "logits/chosen": 0.7340772151947021, | |
| "logits/rejected": 0.8868482708930969, | |
| "logps/chosen": -1.7300639152526855, | |
| "logps/rejected": -2.9352312088012695, | |
| "loss": 0.454, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -17.30063819885254, | |
| "rewards/margins": 12.051673889160156, | |
| "rewards/rejected": -29.352313995361328, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5759700794763908, | |
| "grad_norm": 9.8125, | |
| "learning_rate": 2.8610056270622344e-07, | |
| "logits/chosen": 0.9421735405921936, | |
| "logits/rejected": 0.7073564529418945, | |
| "logps/chosen": -1.7943646907806396, | |
| "logps/rejected": -3.0728399753570557, | |
| "loss": 0.4413, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -17.943645477294922, | |
| "rewards/margins": 12.784753799438477, | |
| "rewards/rejected": -30.7283992767334, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5784634564438211, | |
| "grad_norm": 14.4375, | |
| "learning_rate": 2.8329472890756593e-07, | |
| "logits/chosen": 0.8662580251693726, | |
| "logits/rejected": 0.844997227191925, | |
| "logps/chosen": -1.5802185535430908, | |
| "logps/rejected": -2.640042304992676, | |
| "loss": 0.8093, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -15.802186965942383, | |
| "rewards/margins": 10.598236083984375, | |
| "rewards/rejected": -26.40042495727539, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5809568334112514, | |
| "grad_norm": 37.25, | |
| "learning_rate": 2.8049333667398917e-07, | |
| "logits/chosen": 0.9215195775032043, | |
| "logits/rejected": 0.8155514597892761, | |
| "logps/chosen": -2.146245241165161, | |
| "logps/rejected": -3.7037928104400635, | |
| "loss": 0.6597, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -21.462451934814453, | |
| "rewards/margins": 15.575474739074707, | |
| "rewards/rejected": -37.037925720214844, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5834502103786816, | |
| "grad_norm": 10.375, | |
| "learning_rate": 2.776965725359684e-07, | |
| "logits/chosen": 0.8086358308792114, | |
| "logits/rejected": 0.7704899907112122, | |
| "logps/chosen": -1.5882647037506104, | |
| "logps/rejected": -2.976109743118286, | |
| "loss": 0.6318, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.882646560668945, | |
| "rewards/margins": 13.878451347351074, | |
| "rewards/rejected": -29.761098861694336, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5859435873461118, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 2.7490462271581774e-07, | |
| "logits/chosen": 0.9086362719535828, | |
| "logits/rejected": 0.8243853449821472, | |
| "logps/chosen": -1.8874664306640625, | |
| "logps/rejected": -3.1539669036865234, | |
| "loss": 0.7813, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -18.874664306640625, | |
| "rewards/margins": 12.665003776550293, | |
| "rewards/rejected": -31.539669036865234, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5884369643135422, | |
| "grad_norm": 9.375, | |
| "learning_rate": 2.7211767311529e-07, | |
| "logits/chosen": 0.8527828454971313, | |
| "logits/rejected": 0.8761582374572754, | |
| "logps/chosen": -1.5832545757293701, | |
| "logps/rejected": -2.610931396484375, | |
| "loss": 0.7349, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -15.832547187805176, | |
| "rewards/margins": 10.276766777038574, | |
| "rewards/rejected": -26.10931396484375, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5909303412809724, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 2.6933590930319903e-07, | |
| "logits/chosen": 0.7043416500091553, | |
| "logits/rejected": 0.7324919104576111, | |
| "logps/chosen": -1.7684717178344727, | |
| "logps/rejected": -3.180450916290283, | |
| "loss": 0.4904, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -17.68471908569336, | |
| "rewards/margins": 14.11978816986084, | |
| "rewards/rejected": -31.804506301879883, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5934237182484027, | |
| "grad_norm": 7.78125, | |
| "learning_rate": 2.665595165030632e-07, | |
| "logits/chosen": 0.7452791929244995, | |
| "logits/rejected": 0.7571016550064087, | |
| "logps/chosen": -1.6962933540344238, | |
| "logps/rejected": -4.225780963897705, | |
| "loss": 0.0719, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -16.962934494018555, | |
| "rewards/margins": 25.29487419128418, | |
| "rewards/rejected": -42.257808685302734, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5959170952158329, | |
| "grad_norm": 11.375, | |
| "learning_rate": 2.637886795807726e-07, | |
| "logits/chosen": 0.81926429271698, | |
| "logits/rejected": 0.8182339072227478, | |
| "logps/chosen": -1.742372751235962, | |
| "logps/rejected": -3.2170917987823486, | |
| "loss": 0.3052, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.42372703552246, | |
| "rewards/margins": 14.747193336486816, | |
| "rewards/rejected": -32.17091751098633, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5984104721832632, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 2.6102358303227965e-07, | |
| "logits/chosen": 0.8492619395256042, | |
| "logits/rejected": 0.8256470561027527, | |
| "logps/chosen": -1.6656391620635986, | |
| "logps/rejected": -3.1693525314331055, | |
| "loss": 0.713, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -16.656391143798828, | |
| "rewards/margins": 15.03713607788086, | |
| "rewards/rejected": -31.693523406982422, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6009038491506935, | |
| "grad_norm": 12.375, | |
| "learning_rate": 2.5826441097131433e-07, | |
| "logits/chosen": 0.7694429755210876, | |
| "logits/rejected": 0.7062366008758545, | |
| "logps/chosen": -1.8840895891189575, | |
| "logps/rejected": -3.3917837142944336, | |
| "loss": 0.4917, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.84089469909668, | |
| "rewards/margins": 15.07693862915039, | |
| "rewards/rejected": -33.9178352355957, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6033972261181237, | |
| "grad_norm": 3.140625, | |
| "learning_rate": 2.555113471171251e-07, | |
| "logits/chosen": 0.7511383295059204, | |
| "logits/rejected": 0.8419240713119507, | |
| "logps/chosen": -1.99148428440094, | |
| "logps/rejected": -3.644866943359375, | |
| "loss": 0.1984, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -19.914844512939453, | |
| "rewards/margins": 16.533824920654297, | |
| "rewards/rejected": -36.44866943359375, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.605890603085554, | |
| "grad_norm": 34.0, | |
| "learning_rate": 2.527645747822462e-07, | |
| "logits/chosen": 0.7965211272239685, | |
| "logits/rejected": 0.7004488706588745, | |
| "logps/chosen": -1.90436851978302, | |
| "logps/rejected": -3.2234106063842773, | |
| "loss": 0.3643, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -19.043685913085938, | |
| "rewards/margins": 13.19041919708252, | |
| "rewards/rejected": -32.23410415649414, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6083839800529842, | |
| "grad_norm": 9.125, | |
| "learning_rate": 2.5002427686029125e-07, | |
| "logits/chosen": 0.9241939783096313, | |
| "logits/rejected": 0.8476071953773499, | |
| "logps/chosen": -1.663865566253662, | |
| "logps/rejected": -2.624908685684204, | |
| "loss": 0.5361, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -16.638656616210938, | |
| "rewards/margins": 9.610431671142578, | |
| "rewards/rejected": -26.249088287353516, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6108773570204146, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 2.472906358137759e-07, | |
| "logits/chosen": 0.7417331337928772, | |
| "logits/rejected": 0.67648845911026, | |
| "logps/chosen": -1.45391845703125, | |
| "logps/rejected": -2.7644288539886475, | |
| "loss": 0.451, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -14.539186477661133, | |
| "rewards/margins": 13.105106353759766, | |
| "rewards/rejected": -27.644290924072266, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6133707339878448, | |
| "grad_norm": 13.9375, | |
| "learning_rate": 2.445638336619681e-07, | |
| "logits/chosen": 0.8194867968559265, | |
| "logits/rejected": 0.7898424863815308, | |
| "logps/chosen": -1.7679089307785034, | |
| "logps/rejected": -3.1125354766845703, | |
| "loss": 0.5163, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -17.679088592529297, | |
| "rewards/margins": 13.446268081665039, | |
| "rewards/rejected": -31.12535858154297, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.615864110955275, | |
| "grad_norm": 9.8125, | |
| "learning_rate": 2.418440519687684e-07, | |
| "logits/chosen": 0.9577868580818176, | |
| "logits/rejected": 0.7647145986557007, | |
| "logps/chosen": -1.611385703086853, | |
| "logps/rejected": -2.760087728500366, | |
| "loss": 0.6587, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -16.11385726928711, | |
| "rewards/margins": 11.487018585205078, | |
| "rewards/rejected": -27.600875854492188, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6183574879227053, | |
| "grad_norm": 60.75, | |
| "learning_rate": 2.391314718306212e-07, | |
| "logits/chosen": 0.8142352104187012, | |
| "logits/rejected": 0.7828744053840637, | |
| "logps/chosen": -1.1173535585403442, | |
| "logps/rejected": -1.752410650253296, | |
| "loss": 0.6811, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -11.173534393310547, | |
| "rewards/margins": 6.350571632385254, | |
| "rewards/rejected": -17.52410888671875, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6208508648901355, | |
| "grad_norm": 12.375, | |
| "learning_rate": 2.3642627386445537e-07, | |
| "logits/chosen": 0.8487910628318787, | |
| "logits/rejected": 0.8330541849136353, | |
| "logps/chosen": -1.5378450155258179, | |
| "logps/rejected": -2.359829902648926, | |
| "loss": 0.8157, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -15.378450393676758, | |
| "rewards/margins": 8.219846725463867, | |
| "rewards/rejected": -23.598297119140625, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6233442418575659, | |
| "grad_norm": 6.25, | |
| "learning_rate": 2.3372863819565868e-07, | |
| "logits/chosen": 0.8798298239707947, | |
| "logits/rejected": 0.7591216564178467, | |
| "logps/chosen": -1.6283077001571655, | |
| "logps/rejected": -3.044259548187256, | |
| "loss": 0.4055, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -16.2830753326416, | |
| "rewards/margins": 14.159520149230957, | |
| "rewards/rejected": -30.442594528198242, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6258376188249961, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 2.310387444460842e-07, | |
| "logits/chosen": 0.8435265421867371, | |
| "logits/rejected": 0.6582808494567871, | |
| "logps/chosen": -1.9542193412780762, | |
| "logps/rejected": -3.527535915374756, | |
| "loss": 0.2511, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.542194366455078, | |
| "rewards/margins": 15.733165740966797, | |
| "rewards/rejected": -35.275360107421875, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6283309957924264, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 2.2835677172208942e-07, | |
| "logits/chosen": 0.9236465692520142, | |
| "logits/rejected": 0.7837573885917664, | |
| "logps/chosen": -1.5361783504486084, | |
| "logps/rejected": -2.8093583583831787, | |
| "loss": 0.4637, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.361783981323242, | |
| "rewards/margins": 12.731797218322754, | |
| "rewards/rejected": -28.093584060668945, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6308243727598566, | |
| "grad_norm": 7.25, | |
| "learning_rate": 2.2568289860261148e-07, | |
| "logits/chosen": 0.8141547441482544, | |
| "logits/rejected": 0.734942615032196, | |
| "logps/chosen": -1.593569040298462, | |
| "logps/rejected": -3.0460009574890137, | |
| "loss": 0.4794, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -15.935691833496094, | |
| "rewards/margins": 14.52431869506836, | |
| "rewards/rejected": -30.46000862121582, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6333177497272869, | |
| "grad_norm": 19.875, | |
| "learning_rate": 2.2301730312727568e-07, | |
| "logits/chosen": 0.8214707374572754, | |
| "logits/rejected": 0.7384806871414185, | |
| "logps/chosen": -1.9334094524383545, | |
| "logps/rejected": -3.117767095565796, | |
| "loss": 0.5317, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -19.334095001220703, | |
| "rewards/margins": 11.843574523925781, | |
| "rewards/rejected": -31.177671432495117, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6358111266947172, | |
| "grad_norm": 4.375, | |
| "learning_rate": 2.203601627845411e-07, | |
| "logits/chosen": 0.9514889717102051, | |
| "logits/rejected": 0.8385657072067261, | |
| "logps/chosen": -2.093611001968384, | |
| "logps/rejected": -4.076337814331055, | |
| "loss": 0.1654, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -20.936111450195312, | |
| "rewards/margins": 19.8272705078125, | |
| "rewards/rejected": -40.76338195800781, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6383045036621474, | |
| "grad_norm": 32.5, | |
| "learning_rate": 2.1771165449988274e-07, | |
| "logits/chosen": 1.076192855834961, | |
| "logits/rejected": 0.8149666786193848, | |
| "logps/chosen": -1.584862232208252, | |
| "logps/rejected": -2.5359246730804443, | |
| "loss": 0.4198, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.84862232208252, | |
| "rewards/margins": 9.510624885559082, | |
| "rewards/rejected": -25.3592472076416, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6407978806295777, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 2.1507195462401042e-07, | |
| "logits/chosen": 0.8264479041099548, | |
| "logits/rejected": 0.8565191626548767, | |
| "logps/chosen": -1.590366244316101, | |
| "logps/rejected": -2.96756911277771, | |
| "loss": 0.7281, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -15.903663635253906, | |
| "rewards/margins": 13.772027969360352, | |
| "rewards/rejected": -29.675691604614258, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6432912575970079, | |
| "grad_norm": 18.75, | |
| "learning_rate": 2.1244123892112674e-07, | |
| "logits/chosen": 0.8875083923339844, | |
| "logits/rejected": 0.8365639448165894, | |
| "logps/chosen": -1.9993164539337158, | |
| "logps/rejected": -4.3548431396484375, | |
| "loss": 0.4588, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -19.993162155151367, | |
| "rewards/margins": 23.555269241333008, | |
| "rewards/rejected": -43.548431396484375, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6457846345644382, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 2.0981968255722427e-07, | |
| "logits/chosen": 0.9401863217353821, | |
| "logits/rejected": 0.8267409801483154, | |
| "logps/chosen": -1.508123755455017, | |
| "logps/rejected": -2.8934311866760254, | |
| "loss": 0.2964, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.08123779296875, | |
| "rewards/margins": 13.853076934814453, | |
| "rewards/rejected": -28.93431282043457, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6482780115318685, | |
| "grad_norm": 10.125, | |
| "learning_rate": 2.072074600884213e-07, | |
| "logits/chosen": 0.7929245233535767, | |
| "logits/rejected": 0.7758727669715881, | |
| "logps/chosen": -1.806505560874939, | |
| "logps/rejected": -3.316180944442749, | |
| "loss": 0.6586, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -18.0650577545166, | |
| "rewards/margins": 15.09675407409668, | |
| "rewards/rejected": -33.16181182861328, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6507713884992987, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 2.0460474544933978e-07, | |
| "logits/chosen": 0.7232526540756226, | |
| "logits/rejected": 0.7585304975509644, | |
| "logps/chosen": -1.4770225286483765, | |
| "logps/rejected": -2.5309412479400635, | |
| "loss": 0.423, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -14.770224571228027, | |
| "rewards/margins": 10.539185523986816, | |
| "rewards/rejected": -25.309412002563477, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.653264765466729, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 2.020117119415233e-07, | |
| "logits/chosen": 0.7610968351364136, | |
| "logits/rejected": 0.6675768494606018, | |
| "logps/chosen": -1.518571376800537, | |
| "logps/rejected": -2.640080690383911, | |
| "loss": 0.3495, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -15.185713768005371, | |
| "rewards/margins": 11.215094566345215, | |
| "rewards/rejected": -26.400808334350586, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6557581424341593, | |
| "grad_norm": 20.125, | |
| "learning_rate": 1.9942853222189841e-07, | |
| "logits/chosen": 0.8614793419837952, | |
| "logits/rejected": 0.7701671719551086, | |
| "logps/chosen": -1.5696934461593628, | |
| "logps/rejected": -2.8778579235076904, | |
| "loss": 0.6096, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -15.69693374633789, | |
| "rewards/margins": 13.081643104553223, | |
| "rewards/rejected": -28.778575897216797, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6582515194015895, | |
| "grad_norm": 12.375, | |
| "learning_rate": 1.968553782912778e-07, | |
| "logits/chosen": 0.8768056631088257, | |
| "logits/rejected": 0.8102119565010071, | |
| "logps/chosen": -1.6998172998428345, | |
| "logps/rejected": -2.9253602027893066, | |
| "loss": 0.625, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -16.998172760009766, | |
| "rewards/margins": 12.255431175231934, | |
| "rewards/rejected": -29.253602981567383, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6607448963690198, | |
| "grad_norm": 29.0, | |
| "learning_rate": 1.942924214829077e-07, | |
| "logits/chosen": 0.9345517158508301, | |
| "logits/rejected": 0.7886137962341309, | |
| "logps/chosen": -1.9977731704711914, | |
| "logps/rejected": -3.9683516025543213, | |
| "loss": 0.5431, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.97772979736328, | |
| "rewards/margins": 19.705781936645508, | |
| "rewards/rejected": -39.68351364135742, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.66323827333645, | |
| "grad_norm": 4.125, | |
| "learning_rate": 1.9173983245106005e-07, | |
| "logits/chosen": 0.9463739395141602, | |
| "logits/rejected": 0.8353683948516846, | |
| "logps/chosen": -1.8554211854934692, | |
| "logps/rejected": -3.5142271518707275, | |
| "loss": 0.2197, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.554210662841797, | |
| "rewards/margins": 16.58806037902832, | |
| "rewards/rejected": -35.14227294921875, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6657316503038803, | |
| "grad_norm": 31.25, | |
| "learning_rate": 1.891977811596689e-07, | |
| "logits/chosen": 1.0108263492584229, | |
| "logits/rejected": 0.723067581653595, | |
| "logps/chosen": -1.615850567817688, | |
| "logps/rejected": -2.9190895557403564, | |
| "loss": 0.7786, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -16.158506393432617, | |
| "rewards/margins": 13.032387733459473, | |
| "rewards/rejected": -29.19089698791504, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6682250272713106, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 1.8666643687101418e-07, | |
| "logits/chosen": 0.922001302242279, | |
| "logits/rejected": 0.8183608651161194, | |
| "logps/chosen": -1.845801830291748, | |
| "logps/rejected": -3.9838411808013916, | |
| "loss": 0.2435, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.458017349243164, | |
| "rewards/margins": 21.380395889282227, | |
| "rewards/rejected": -39.83841323852539, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6707184042387409, | |
| "grad_norm": 9.625, | |
| "learning_rate": 1.8414596813445047e-07, | |
| "logits/chosen": 0.9229664206504822, | |
| "logits/rejected": 0.8024593591690063, | |
| "logps/chosen": -1.5461751222610474, | |
| "logps/rejected": -2.713073968887329, | |
| "loss": 0.4835, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.461751937866211, | |
| "rewards/margins": 11.668989181518555, | |
| "rewards/rejected": -27.130741119384766, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6732117812061711, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 1.8163654277518476e-07, | |
| "logits/chosen": 0.8847929835319519, | |
| "logits/rejected": 0.7221932411193848, | |
| "logps/chosen": -1.56783127784729, | |
| "logps/rejected": -2.7949106693267822, | |
| "loss": 0.357, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -15.678312301635742, | |
| "rewards/margins": 12.270795822143555, | |
| "rewards/rejected": -27.949108123779297, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6757051581736013, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 1.7913832788310162e-07, | |
| "logits/chosen": 0.9237401485443115, | |
| "logits/rejected": 0.8515968322753906, | |
| "logps/chosen": -1.6207122802734375, | |
| "logps/rejected": -2.975242853164673, | |
| "loss": 0.3603, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.207122802734375, | |
| "rewards/margins": 13.54530143737793, | |
| "rewards/rejected": -29.752422332763672, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6781985351410317, | |
| "grad_norm": 24.375, | |
| "learning_rate": 1.7665148980163747e-07, | |
| "logits/chosen": 0.9174185991287231, | |
| "logits/rejected": 0.8517237901687622, | |
| "logps/chosen": -1.9268598556518555, | |
| "logps/rejected": -3.653189182281494, | |
| "loss": 0.5412, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -19.268598556518555, | |
| "rewards/margins": 17.263296127319336, | |
| "rewards/rejected": -36.531890869140625, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6806919121084619, | |
| "grad_norm": 20.125, | |
| "learning_rate": 1.741761941167051e-07, | |
| "logits/chosen": 0.8469513654708862, | |
| "logits/rejected": 0.7570927739143372, | |
| "logps/chosen": -1.7269822359085083, | |
| "logps/rejected": -3.0970540046691895, | |
| "loss": 0.4379, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.26982307434082, | |
| "rewards/margins": 13.700716018676758, | |
| "rewards/rejected": -30.970539093017578, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6831852890758922, | |
| "grad_norm": 7.65625, | |
| "learning_rate": 1.7171260564566735e-07, | |
| "logits/chosen": 0.853800892829895, | |
| "logits/rejected": 0.6823726892471313, | |
| "logps/chosen": -1.6823272705078125, | |
| "logps/rejected": -3.1744813919067383, | |
| "loss": 0.4328, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.823274612426758, | |
| "rewards/margins": 14.921540260314941, | |
| "rewards/rejected": -31.744813919067383, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6856786660433224, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 1.6926088842636336e-07, | |
| "logits/chosen": 0.8564770817756653, | |
| "logits/rejected": 0.7224562168121338, | |
| "logps/chosen": -1.7104108333587646, | |
| "logps/rejected": -3.030578851699829, | |
| "loss": 0.34, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -17.104108810424805, | |
| "rewards/margins": 13.201680183410645, | |
| "rewards/rejected": -30.3057918548584, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6881720430107527, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.6682120570618583e-07, | |
| "logits/chosen": 0.9256489276885986, | |
| "logits/rejected": 0.8403459787368774, | |
| "logps/chosen": -1.7236558198928833, | |
| "logps/rejected": -3.5449249744415283, | |
| "loss": 0.2784, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -17.23655891418457, | |
| "rewards/margins": 18.212690353393555, | |
| "rewards/rejected": -35.449249267578125, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.690665419978183, | |
| "grad_norm": 6.75, | |
| "learning_rate": 1.6439371993121142e-07, | |
| "logits/chosen": 1.0069345235824585, | |
| "logits/rejected": 0.8647799491882324, | |
| "logps/chosen": -1.7778537273406982, | |
| "logps/rejected": -3.3906145095825195, | |
| "loss": 0.4693, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.77853775024414, | |
| "rewards/margins": 16.127605438232422, | |
| "rewards/rejected": -33.90614318847656, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6931587969456132, | |
| "grad_norm": 7.15625, | |
| "learning_rate": 1.61978592735384e-07, | |
| "logits/chosen": 0.7570043802261353, | |
| "logits/rejected": 0.7392297387123108, | |
| "logps/chosen": -1.772491455078125, | |
| "logps/rejected": -3.0083491802215576, | |
| "loss": 0.3305, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -17.72491455078125, | |
| "rewards/margins": 12.358576774597168, | |
| "rewards/rejected": -30.0834903717041, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 1.595759849297528e-07, | |
| "logits/chosen": 0.9452332258224487, | |
| "logits/rejected": 0.8405147790908813, | |
| "logps/chosen": -1.5206505060195923, | |
| "logps/rejected": -2.9708354473114014, | |
| "loss": 0.7058, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -15.206504821777344, | |
| "rewards/margins": 14.501848220825195, | |
| "rewards/rejected": -29.708354949951172, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6981455508804737, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.5718605649176415e-07, | |
| "logits/chosen": 0.9056351780891418, | |
| "logits/rejected": 0.759840190410614, | |
| "logps/chosen": -1.3903148174285889, | |
| "logps/rejected": -2.4290876388549805, | |
| "loss": 0.4354, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -13.90314769744873, | |
| "rewards/margins": 10.387725830078125, | |
| "rewards/rejected": -24.290874481201172, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.700638927847904, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 1.5480896655460975e-07, | |
| "logits/chosen": 0.8469112515449524, | |
| "logits/rejected": 0.7188205718994141, | |
| "logps/chosen": -1.4428998231887817, | |
| "logps/rejected": -3.476562261581421, | |
| "loss": 0.4048, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -14.428997993469238, | |
| "rewards/margins": 20.336626052856445, | |
| "rewards/rejected": -34.765625, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7031323048153343, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 1.5244487339663086e-07, | |
| "logits/chosen": 0.9786227941513062, | |
| "logits/rejected": 0.9008299112319946, | |
| "logps/chosen": -2.115980386734009, | |
| "logps/rejected": -3.8103702068328857, | |
| "loss": 0.3597, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -21.159805297851562, | |
| "rewards/margins": 16.943897247314453, | |
| "rewards/rejected": -38.103702545166016, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7056256817827645, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 1.5009393443077906e-07, | |
| "logits/chosen": 0.9762454032897949, | |
| "logits/rejected": 0.8306148648262024, | |
| "logps/chosen": -1.981116771697998, | |
| "logps/rejected": -3.2973973751068115, | |
| "loss": 0.4867, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -19.811168670654297, | |
| "rewards/margins": 13.16280460357666, | |
| "rewards/rejected": -32.973976135253906, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7081190587501948, | |
| "grad_norm": 22.625, | |
| "learning_rate": 1.477563061941355e-07, | |
| "logits/chosen": 1.017063856124878, | |
| "logits/rejected": 0.7016565799713135, | |
| "logps/chosen": -1.3156154155731201, | |
| "logps/rejected": -2.303849458694458, | |
| "loss": 0.6619, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -13.15615463256836, | |
| "rewards/margins": 9.882339477539062, | |
| "rewards/rejected": -23.038494110107422, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7106124357176251, | |
| "grad_norm": 12.9375, | |
| "learning_rate": 1.4543214433748714e-07, | |
| "logits/chosen": 1.039493203163147, | |
| "logits/rejected": 0.8438839912414551, | |
| "logps/chosen": -1.7385656833648682, | |
| "logps/rejected": -3.1600584983825684, | |
| "loss": 0.4472, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.385656356811523, | |
| "rewards/margins": 14.21492862701416, | |
| "rewards/rejected": -31.6005859375, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7131058126850554, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.4312160361496325e-07, | |
| "logits/chosen": 0.880534291267395, | |
| "logits/rejected": 0.8419840335845947, | |
| "logps/chosen": -1.7119375467300415, | |
| "logps/rejected": -3.064938545227051, | |
| "loss": 0.5029, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.11937713623047, | |
| "rewards/margins": 13.530012130737305, | |
| "rewards/rejected": -30.64938735961914, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7155991896524856, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 1.4082483787373093e-07, | |
| "logits/chosen": 0.8826863765716553, | |
| "logits/rejected": 0.8228853940963745, | |
| "logps/chosen": -1.5570282936096191, | |
| "logps/rejected": -2.6813974380493164, | |
| "loss": 0.8264, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -15.570282936096191, | |
| "rewards/margins": 11.243692398071289, | |
| "rewards/rejected": -26.813976287841797, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7180925666199158, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 1.3854200004375123e-07, | |
| "logits/chosen": 0.752357542514801, | |
| "logits/rejected": 0.7416955828666687, | |
| "logps/chosen": -1.8245292901992798, | |
| "logps/rejected": -3.410393714904785, | |
| "loss": 0.2918, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.24529266357422, | |
| "rewards/margins": 15.858641624450684, | |
| "rewards/rejected": -34.10393142700195, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7205859435873461, | |
| "grad_norm": 6.375, | |
| "learning_rate": 1.3627324212759662e-07, | |
| "logits/chosen": 0.9414355754852295, | |
| "logits/rejected": 0.7949234843254089, | |
| "logps/chosen": -1.5395060777664185, | |
| "logps/rejected": -2.7976861000061035, | |
| "loss": 0.5103, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -15.395059585571289, | |
| "rewards/margins": 12.581799507141113, | |
| "rewards/rejected": -27.97686195373535, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7230793205547764, | |
| "grad_norm": 13.375, | |
| "learning_rate": 1.3401871519032942e-07, | |
| "logits/chosen": 0.7719554305076599, | |
| "logits/rejected": 0.8289276957511902, | |
| "logps/chosen": -1.5537012815475464, | |
| "logps/rejected": -2.931002140045166, | |
| "loss": 0.4564, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.537013053894043, | |
| "rewards/margins": 13.7730073928833, | |
| "rewards/rejected": -29.310020446777344, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7255726975222067, | |
| "grad_norm": 12.0, | |
| "learning_rate": 1.317785693494433e-07, | |
| "logits/chosen": 0.906543493270874, | |
| "logits/rejected": 0.8372653126716614, | |
| "logps/chosen": -1.877508282661438, | |
| "logps/rejected": -3.658639669418335, | |
| "loss": 0.5423, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -18.775081634521484, | |
| "rewards/margins": 17.811315536499023, | |
| "rewards/rejected": -36.58639907836914, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7280660744896369, | |
| "grad_norm": 20.25, | |
| "learning_rate": 1.2955295376486793e-07, | |
| "logits/chosen": 0.9387526512145996, | |
| "logits/rejected": 0.8902648687362671, | |
| "logps/chosen": -1.6775869131088257, | |
| "logps/rejected": -3.076120138168335, | |
| "loss": 0.8689, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -16.775869369506836, | |
| "rewards/margins": 13.985333442687988, | |
| "rewards/rejected": -30.76120376586914, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7305594514570671, | |
| "grad_norm": 14.0625, | |
| "learning_rate": 1.273420166290371e-07, | |
| "logits/chosen": 0.771159827709198, | |
| "logits/rejected": 0.7604851126670837, | |
| "logps/chosen": -1.4995477199554443, | |
| "logps/rejected": -2.7952613830566406, | |
| "loss": 0.5616, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -14.995477676391602, | |
| "rewards/margins": 12.957136154174805, | |
| "rewards/rejected": -27.952613830566406, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7330528284244975, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.2514590515702093e-07, | |
| "logits/chosen": 0.9259358048439026, | |
| "logits/rejected": 0.8557572364807129, | |
| "logps/chosen": -1.718395471572876, | |
| "logps/rejected": -3.260573387145996, | |
| "loss": 0.4787, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.183956146240234, | |
| "rewards/margins": 15.421775817871094, | |
| "rewards/rejected": -32.60573196411133, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7355462053919277, | |
| "grad_norm": 3.984375, | |
| "learning_rate": 1.2296476557672452e-07, | |
| "logits/chosen": 0.9226200580596924, | |
| "logits/rejected": 0.7464591264724731, | |
| "logps/chosen": -1.7733253240585327, | |
| "logps/rejected": -3.0839881896972656, | |
| "loss": 0.4696, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.733253479003906, | |
| "rewards/margins": 13.10662841796875, | |
| "rewards/rejected": -30.839881896972656, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.738039582359358, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 1.2079874311915026e-07, | |
| "logits/chosen": 0.9862551689147949, | |
| "logits/rejected": 0.8426701426506042, | |
| "logps/chosen": -1.5399045944213867, | |
| "logps/rejected": -2.9282631874084473, | |
| "loss": 0.5123, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.399044036865234, | |
| "rewards/margins": 13.883587837219238, | |
| "rewards/rejected": -29.282634735107422, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7405329593267882, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 1.1864798200872824e-07, | |
| "logits/chosen": 0.9428563714027405, | |
| "logits/rejected": 0.7972367405891418, | |
| "logps/chosen": -1.6001325845718384, | |
| "logps/rejected": -3.5637686252593994, | |
| "loss": 0.2544, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -16.001325607299805, | |
| "rewards/margins": 19.63636016845703, | |
| "rewards/rejected": -35.6376838684082, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7430263362942184, | |
| "grad_norm": 27.75, | |
| "learning_rate": 1.1651262545371318e-07, | |
| "logits/chosen": 0.8185573816299438, | |
| "logits/rejected": 0.8264700174331665, | |
| "logps/chosen": -1.9273895025253296, | |
| "logps/rejected": -3.5442724227905273, | |
| "loss": 0.3429, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -19.273895263671875, | |
| "rewards/margins": 16.16883087158203, | |
| "rewards/rejected": -35.442726135253906, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7455197132616488, | |
| "grad_norm": 10.25, | |
| "learning_rate": 1.1439281563664836e-07, | |
| "logits/chosen": 0.8733742833137512, | |
| "logits/rejected": 0.8228683471679688, | |
| "logps/chosen": -2.0226247310638428, | |
| "logps/rejected": -3.6978607177734375, | |
| "loss": 0.2416, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -20.226245880126953, | |
| "rewards/margins": 16.75235939025879, | |
| "rewards/rejected": -36.978607177734375, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.748013090229079, | |
| "grad_norm": 23.75, | |
| "learning_rate": 1.1228869370489933e-07, | |
| "logits/chosen": 0.8455230593681335, | |
| "logits/rejected": 0.726607620716095, | |
| "logps/chosen": -1.7042028903961182, | |
| "logps/rejected": -2.9396235942840576, | |
| "loss": 0.6624, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -17.042028427124023, | |
| "rewards/margins": 12.354209899902344, | |
| "rewards/rejected": -29.396238327026367, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7505064671965093, | |
| "grad_norm": 16.625, | |
| "learning_rate": 1.1020039976125454e-07, | |
| "logits/chosen": 0.862872838973999, | |
| "logits/rejected": 0.7240791320800781, | |
| "logps/chosen": -1.6873464584350586, | |
| "logps/rejected": -3.173642635345459, | |
| "loss": 0.4094, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.873464584350586, | |
| "rewards/margins": 14.862963676452637, | |
| "rewards/rejected": -31.73642921447754, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.7529998441639395, | |
| "grad_norm": 20.625, | |
| "learning_rate": 1.0812807285459737e-07, | |
| "logits/chosen": 0.8827072978019714, | |
| "logits/rejected": 0.7801661491394043, | |
| "logps/chosen": -1.760999321937561, | |
| "logps/rejected": -3.0102696418762207, | |
| "loss": 0.1915, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -17.60999298095703, | |
| "rewards/margins": 12.492703437805176, | |
| "rewards/rejected": -30.10269546508789, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7554932211313699, | |
| "grad_norm": 9.875, | |
| "learning_rate": 1.0607185097064733e-07, | |
| "logits/chosen": 0.9539688229560852, | |
| "logits/rejected": 0.8203067183494568, | |
| "logps/chosen": -1.5383775234222412, | |
| "logps/rejected": -2.5994794368743896, | |
| "loss": 0.6321, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.38377571105957, | |
| "rewards/margins": 10.61102294921875, | |
| "rewards/rejected": -25.994796752929688, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7579865980988001, | |
| "grad_norm": 10.375, | |
| "learning_rate": 1.0403187102277212e-07, | |
| "logits/chosen": 0.9740419387817383, | |
| "logits/rejected": 0.7236615419387817, | |
| "logps/chosen": -1.680724024772644, | |
| "logps/rejected": -3.213937759399414, | |
| "loss": 0.5017, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -16.807239532470703, | |
| "rewards/margins": 15.332136154174805, | |
| "rewards/rejected": -32.13937759399414, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7604799750662303, | |
| "grad_norm": 8.5, | |
| "learning_rate": 1.020082688428718e-07, | |
| "logits/chosen": 0.7849897146224976, | |
| "logits/rejected": 0.7147915959358215, | |
| "logps/chosen": -1.7177461385726929, | |
| "logps/rejected": -3.196275234222412, | |
| "loss": 0.4481, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.17746353149414, | |
| "rewards/margins": 14.785287857055664, | |
| "rewards/rejected": -31.962751388549805, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.7629733520336606, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 1.0000117917233373e-07, | |
| "logits/chosen": 0.7844271659851074, | |
| "logits/rejected": 0.795640230178833, | |
| "logps/chosen": -1.8986274003982544, | |
| "logps/rejected": -3.815453052520752, | |
| "loss": 0.2918, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -18.98627471923828, | |
| "rewards/margins": 19.168254852294922, | |
| "rewards/rejected": -38.1545295715332, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7654667290010908, | |
| "grad_norm": 22.375, | |
| "learning_rate": 9.801073565306134e-08, | |
| "logits/chosen": 0.915310800075531, | |
| "logits/rejected": 0.8614601492881775, | |
| "logps/chosen": -1.577059030532837, | |
| "logps/rejected": -2.661583185195923, | |
| "loss": 0.7258, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -15.770591735839844, | |
| "rewards/margins": 10.84524154663086, | |
| "rewards/rejected": -26.61583137512207, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.7679601059685212, | |
| "grad_norm": 11.5, | |
| "learning_rate": 9.603707081857533e-08, | |
| "logits/chosen": 0.8341223001480103, | |
| "logits/rejected": 0.7446467876434326, | |
| "logps/chosen": -2.0905356407165527, | |
| "logps/rejected": -3.864170551300049, | |
| "loss": 0.2911, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -20.90535545349121, | |
| "rewards/margins": 17.73634910583496, | |
| "rewards/rejected": -38.64170837402344, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7704534829359514, | |
| "grad_norm": 9.4375, | |
| "learning_rate": 9.40803160851891e-08, | |
| "logits/chosen": 0.9718061685562134, | |
| "logits/rejected": 0.9494335651397705, | |
| "logps/chosen": -1.6537656784057617, | |
| "logps/rejected": -3.119168758392334, | |
| "loss": 0.9953, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -16.537656784057617, | |
| "rewards/margins": 14.654030799865723, | |
| "rewards/rejected": -31.191692352294922, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.7729468599033816, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 9.214060174325823e-08, | |
| "logits/chosen": 0.7993795871734619, | |
| "logits/rejected": 0.7918787002563477, | |
| "logps/chosen": -1.9169942140579224, | |
| "logps/rejected": -3.608771800994873, | |
| "loss": 0.4286, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -19.16994285583496, | |
| "rewards/margins": 16.91777229309082, | |
| "rewards/rejected": -36.08771514892578, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7754402368708119, | |
| "grad_norm": 76.0, | |
| "learning_rate": 9.021805694850552e-08, | |
| "logits/chosen": 0.7791964411735535, | |
| "logits/rejected": 0.6525046229362488, | |
| "logps/chosen": -1.878448724746704, | |
| "logps/rejected": -3.2219111919403076, | |
| "loss": 0.3889, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -18.784488677978516, | |
| "rewards/margins": 13.434623718261719, | |
| "rewards/rejected": -32.21910858154297, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.7779336138382422, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 8.831280971342049e-08, | |
| "logits/chosen": 0.8384397625923157, | |
| "logits/rejected": 0.8411078453063965, | |
| "logps/chosen": -1.9580962657928467, | |
| "logps/rejected": -3.684387445449829, | |
| "loss": 0.4734, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.580963134765625, | |
| "rewards/margins": 17.26291275024414, | |
| "rewards/rejected": -36.8438720703125, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7804269908056725, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 8.642498689873619e-08, | |
| "logits/chosen": 0.9194357395172119, | |
| "logits/rejected": 0.7971946597099304, | |
| "logps/chosen": -1.6777794361114502, | |
| "logps/rejected": -2.920085906982422, | |
| "loss": 0.6, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.777795791625977, | |
| "rewards/margins": 12.423064231872559, | |
| "rewards/rejected": -29.20086097717285, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.7829203677731027, | |
| "grad_norm": 25.0, | |
| "learning_rate": 8.45547142049821e-08, | |
| "logits/chosen": 0.8890621066093445, | |
| "logits/rejected": 0.6691703796386719, | |
| "logps/chosen": -1.6438565254211426, | |
| "logps/rejected": -3.2583494186401367, | |
| "loss": 0.2676, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.43856430053711, | |
| "rewards/margins": 16.14493179321289, | |
| "rewards/rejected": -32.58349609375, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7854137447405329, | |
| "grad_norm": 36.75, | |
| "learning_rate": 8.270211616411413e-08, | |
| "logits/chosen": 0.8961160182952881, | |
| "logits/rejected": 0.7380497455596924, | |
| "logps/chosen": -1.8019180297851562, | |
| "logps/rejected": -3.853311061859131, | |
| "loss": 0.4376, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -18.019180297851562, | |
| "rewards/margins": 20.513931274414062, | |
| "rewards/rejected": -38.533111572265625, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.7879071217079632, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 8.086731613122324e-08, | |
| "logits/chosen": 0.8375217914581299, | |
| "logits/rejected": 0.706248939037323, | |
| "logps/chosen": -1.8641583919525146, | |
| "logps/rejected": -3.3603389263153076, | |
| "loss": 0.203, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.641584396362305, | |
| "rewards/margins": 14.961803436279297, | |
| "rewards/rejected": -33.60338592529297, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7904004986753935, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 7.905043627632113e-08, | |
| "logits/chosen": 0.7290425300598145, | |
| "logits/rejected": 0.7092160582542419, | |
| "logps/chosen": -1.6382381916046143, | |
| "logps/rejected": -3.2959959506988525, | |
| "loss": 0.2101, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.382381439208984, | |
| "rewards/margins": 16.577579498291016, | |
| "rewards/rejected": -32.9599609375, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.7928938756428238, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 7.725159757620596e-08, | |
| "logits/chosen": 0.9056103825569153, | |
| "logits/rejected": 0.8917890787124634, | |
| "logps/chosen": -1.4776190519332886, | |
| "logps/rejected": -2.642958641052246, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -14.776190757751465, | |
| "rewards/margins": 11.653392791748047, | |
| "rewards/rejected": -26.429582595825195, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.795387252610254, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 7.547091980640708e-08, | |
| "logits/chosen": 0.7614390850067139, | |
| "logits/rejected": 0.7574427127838135, | |
| "logps/chosen": -1.3012231588363647, | |
| "logps/rejected": -2.685375928878784, | |
| "loss": 0.4226, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -13.012231826782227, | |
| "rewards/margins": 13.841525077819824, | |
| "rewards/rejected": -26.853755950927734, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.7978806295776842, | |
| "grad_norm": 18.625, | |
| "learning_rate": 7.370852153320973e-08, | |
| "logits/chosen": 0.9617218971252441, | |
| "logits/rejected": 0.7541022896766663, | |
| "logps/chosen": -1.5465266704559326, | |
| "logps/rejected": -2.6077213287353516, | |
| "loss": 0.6237, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -15.465266227722168, | |
| "rewards/margins": 10.611949920654297, | |
| "rewards/rejected": -26.07721710205078, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8003740065451146, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 7.196452010576056e-08, | |
| "logits/chosen": 0.8094066381454468, | |
| "logits/rejected": 0.7924161553382874, | |
| "logps/chosen": -2.0370168685913086, | |
| "logps/rejected": -3.8806991577148438, | |
| "loss": 0.2498, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -20.370168685913086, | |
| "rewards/margins": 18.436824798583984, | |
| "rewards/rejected": -38.80699157714844, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8028673835125448, | |
| "grad_norm": 11.625, | |
| "learning_rate": 7.023903164825346e-08, | |
| "logits/chosen": 0.9718628525733948, | |
| "logits/rejected": 0.8176442384719849, | |
| "logps/chosen": -2.1258928775787354, | |
| "logps/rejected": -4.15927267074585, | |
| "loss": 0.6349, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -21.258926391601562, | |
| "rewards/margins": 20.333797454833984, | |
| "rewards/rejected": -41.59272384643555, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8053607604799751, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 6.853217105219782e-08, | |
| "logits/chosen": 0.7881964445114136, | |
| "logits/rejected": 0.6961764693260193, | |
| "logps/chosen": -1.541295051574707, | |
| "logps/rejected": -2.8516957759857178, | |
| "loss": 0.2766, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -15.412951469421387, | |
| "rewards/margins": 13.104007720947266, | |
| "rewards/rejected": -28.516956329345703, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8078541374474053, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 6.684405196876843e-08, | |
| "logits/chosen": 0.9054229259490967, | |
| "logits/rejected": 0.799680233001709, | |
| "logps/chosen": -1.280112624168396, | |
| "logps/rejected": -2.1591079235076904, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -12.801126480102539, | |
| "rewards/margins": 8.789952278137207, | |
| "rewards/rejected": -21.591079711914062, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8103475144148355, | |
| "grad_norm": 11.875, | |
| "learning_rate": 6.517478680123776e-08, | |
| "logits/chosen": 0.8642288446426392, | |
| "logits/rejected": 0.825298547744751, | |
| "logps/chosen": -1.4944114685058594, | |
| "logps/rejected": -2.5201451778411865, | |
| "loss": 0.7636, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -14.944114685058594, | |
| "rewards/margins": 10.257339477539062, | |
| "rewards/rejected": -25.201452255249023, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8128408913822659, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 6.352448669749224e-08, | |
| "logits/chosen": 0.9343512654304504, | |
| "logits/rejected": 0.8261175155639648, | |
| "logps/chosen": -2.1113977432250977, | |
| "logps/rejected": -4.173766136169434, | |
| "loss": 0.3753, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -21.11397933959961, | |
| "rewards/margins": 20.623685836791992, | |
| "rewards/rejected": -41.73766326904297, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8153342683496961, | |
| "grad_norm": 5.25, | |
| "learning_rate": 6.189326154263068e-08, | |
| "logits/chosen": 0.7759539484977722, | |
| "logits/rejected": 0.7987840175628662, | |
| "logps/chosen": -1.8643192052841187, | |
| "logps/rejected": -3.6068685054779053, | |
| "loss": 0.4069, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -18.643192291259766, | |
| "rewards/margins": 17.425495147705078, | |
| "rewards/rejected": -36.068687438964844, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8178276453171264, | |
| "grad_norm": 22.875, | |
| "learning_rate": 6.028121995164812e-08, | |
| "logits/chosen": 0.8969675302505493, | |
| "logits/rejected": 0.7524930238723755, | |
| "logps/chosen": -1.4999438524246216, | |
| "logps/rejected": -2.7047371864318848, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -14.999438285827637, | |
| "rewards/margins": 12.047935485839844, | |
| "rewards/rejected": -27.04737091064453, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8203210222845566, | |
| "grad_norm": 5.0, | |
| "learning_rate": 5.868846926220346e-08, | |
| "logits/chosen": 0.9210751056671143, | |
| "logits/rejected": 0.8755130767822266, | |
| "logps/chosen": -2.071080446243286, | |
| "logps/rejected": -4.128433704376221, | |
| "loss": 0.3026, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -20.710805892944336, | |
| "rewards/margins": 20.573535919189453, | |
| "rewards/rejected": -41.284339904785156, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.822814399251987, | |
| "grad_norm": 6.375, | |
| "learning_rate": 5.7115115527472575e-08, | |
| "logits/chosen": 0.746177077293396, | |
| "logits/rejected": 0.7545452117919922, | |
| "logps/chosen": -1.6824297904968262, | |
| "logps/rejected": -2.987593650817871, | |
| "loss": 0.3245, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.824296951293945, | |
| "rewards/margins": 13.051637649536133, | |
| "rewards/rejected": -29.875934600830078, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8253077762194172, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 5.556126350908654e-08, | |
| "logits/chosen": 0.8064150810241699, | |
| "logits/rejected": 0.7365544438362122, | |
| "logps/chosen": -1.7546963691711426, | |
| "logps/rejected": -3.0438730716705322, | |
| "loss": 0.4564, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.54696273803711, | |
| "rewards/margins": 12.891766548156738, | |
| "rewards/rejected": -30.438732147216797, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8278011531868474, | |
| "grad_norm": 40.75, | |
| "learning_rate": 5.402701667015655e-08, | |
| "logits/chosen": 0.8081064820289612, | |
| "logits/rejected": 0.8690564632415771, | |
| "logps/chosen": -2.0253567695617676, | |
| "logps/rejected": -3.4268417358398438, | |
| "loss": 0.591, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -20.253568649291992, | |
| "rewards/margins": 14.014848709106445, | |
| "rewards/rejected": -34.26841735839844, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8302945301542777, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 5.2512477168384125e-08, | |
| "logits/chosen": 0.7250826954841614, | |
| "logits/rejected": 0.6569004654884338, | |
| "logps/chosen": -1.7179774045944214, | |
| "logps/rejected": -3.111191987991333, | |
| "loss": 0.4474, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.179773330688477, | |
| "rewards/margins": 13.932147979736328, | |
| "rewards/rejected": -31.111919403076172, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8327879071217079, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 5.101774584925959e-08, | |
| "logits/chosen": 0.7951087355613708, | |
| "logits/rejected": 0.7745989561080933, | |
| "logps/chosen": -1.7096567153930664, | |
| "logps/rejected": -3.168893814086914, | |
| "loss": 0.3182, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.096567153930664, | |
| "rewards/margins": 14.592374801635742, | |
| "rewards/rejected": -31.688940048217773, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8352812840891383, | |
| "grad_norm": 11.8125, | |
| "learning_rate": 4.9542922239346865e-08, | |
| "logits/chosen": 0.9192527532577515, | |
| "logits/rejected": 0.8001135587692261, | |
| "logps/chosen": -1.9547454118728638, | |
| "logps/rejected": -3.6320178508758545, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.547454833984375, | |
| "rewards/margins": 16.772724151611328, | |
| "rewards/rejected": -36.3201789855957, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8377746610565685, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 4.8088104539656715e-08, | |
| "logits/chosen": 0.7919576168060303, | |
| "logits/rejected": 0.8408181667327881, | |
| "logps/chosen": -1.7132326364517212, | |
| "logps/rejected": -3.2617900371551514, | |
| "loss": 0.6176, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -17.132326126098633, | |
| "rewards/margins": 15.485575675964355, | |
| "rewards/rejected": -32.61790084838867, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8402680380239987, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 4.665338961910819e-08, | |
| "logits/chosen": 0.9263704419136047, | |
| "logits/rejected": 0.9631155729293823, | |
| "logps/chosen": -1.8150866031646729, | |
| "logps/rejected": -3.4056591987609863, | |
| "loss": 0.3432, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -18.15086555480957, | |
| "rewards/margins": 15.905729293823242, | |
| "rewards/rejected": -34.05659103393555, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.842761414991429, | |
| "grad_norm": 14.8125, | |
| "learning_rate": 4.5238873008078036e-08, | |
| "logits/chosen": 0.92448890209198, | |
| "logits/rejected": 0.9032832980155945, | |
| "logps/chosen": -1.8629943132400513, | |
| "logps/rejected": -3.826144218444824, | |
| "loss": 0.4574, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -18.62994384765625, | |
| "rewards/margins": 19.631500244140625, | |
| "rewards/rejected": -38.261444091796875, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8452547919588593, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 4.38446488920405e-08, | |
| "logits/chosen": 0.7789740562438965, | |
| "logits/rejected": 0.7178948521614075, | |
| "logps/chosen": -1.713463544845581, | |
| "logps/rejected": -3.125110149383545, | |
| "loss": 0.1509, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -17.13463592529297, | |
| "rewards/margins": 14.11646556854248, | |
| "rewards/rejected": -31.251100540161133, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8477481689262896, | |
| "grad_norm": 11.5, | |
| "learning_rate": 4.247081010529546e-08, | |
| "logits/chosen": 0.7394505739212036, | |
| "logits/rejected": 0.7483058571815491, | |
| "logps/chosen": -1.7241424322128296, | |
| "logps/rejected": -2.999985456466675, | |
| "loss": 0.9044, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.241424560546875, | |
| "rewards/margins": 12.758427619934082, | |
| "rewards/rejected": -29.999855041503906, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8502415458937198, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 4.1117448124787594e-08, | |
| "logits/chosen": 0.8453940153121948, | |
| "logits/rejected": 0.7934137582778931, | |
| "logps/chosen": -1.7638615369796753, | |
| "logps/rejected": -3.4477648735046387, | |
| "loss": 0.3345, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.63861656188965, | |
| "rewards/margins": 16.839033126831055, | |
| "rewards/rejected": -34.4776496887207, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.85273492286115, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 3.9784653064014826e-08, | |
| "logits/chosen": 0.9908114671707153, | |
| "logits/rejected": 0.7525830268859863, | |
| "logps/chosen": -1.7432222366333008, | |
| "logps/rejected": -3.2565882205963135, | |
| "loss": 0.7245, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.432220458984375, | |
| "rewards/margins": 15.133658409118652, | |
| "rewards/rejected": -32.565879821777344, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8552282998285803, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 3.8472513667028556e-08, | |
| "logits/chosen": 0.9397574067115784, | |
| "logits/rejected": 0.7737162709236145, | |
| "logps/chosen": -1.6045491695404053, | |
| "logps/rejected": -2.63539981842041, | |
| "loss": 0.4466, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.04549217224121, | |
| "rewards/margins": 10.30850601196289, | |
| "rewards/rejected": -26.35399627685547, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.8577216767960106, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 3.7181117302524304e-08, | |
| "logits/chosen": 1.0774602890014648, | |
| "logits/rejected": 0.7839712500572205, | |
| "logps/chosen": -1.9381461143493652, | |
| "logps/rejected": -3.418266773223877, | |
| "loss": 0.4184, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.381460189819336, | |
| "rewards/margins": 14.801210403442383, | |
| "rewards/rejected": -34.18267059326172, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8602150537634409, | |
| "grad_norm": 11.5, | |
| "learning_rate": 3.591054995802462e-08, | |
| "logits/chosen": 0.8521052598953247, | |
| "logits/rejected": 0.8215041756629944, | |
| "logps/chosen": -1.5074630975723267, | |
| "logps/rejected": -2.6447994709014893, | |
| "loss": 0.7877, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.074629783630371, | |
| "rewards/margins": 11.373364448547363, | |
| "rewards/rejected": -26.447994232177734, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.8627084307308711, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 3.466089623415333e-08, | |
| "logits/chosen": 0.8286025524139404, | |
| "logits/rejected": 0.7513220310211182, | |
| "logps/chosen": -2.0260732173919678, | |
| "logps/rejected": -3.53950572013855, | |
| "loss": 0.5153, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -20.260732650756836, | |
| "rewards/margins": 15.134326934814453, | |
| "rewards/rejected": -35.39506149291992, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8652018076983014, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 3.3432239339002654e-08, | |
| "logits/chosen": 0.6205800175666809, | |
| "logits/rejected": 0.8249342441558838, | |
| "logps/chosen": -1.9891235828399658, | |
| "logps/rejected": -3.901944398880005, | |
| "loss": 0.3429, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.891237258911133, | |
| "rewards/margins": 19.12820816040039, | |
| "rewards/rejected": -39.01944351196289, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.8676951846657317, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 3.222466108259252e-08, | |
| "logits/chosen": 0.9737166166305542, | |
| "logits/rejected": 0.8952223658561707, | |
| "logps/chosen": -1.9070756435394287, | |
| "logps/rejected": -3.703439712524414, | |
| "loss": 0.3237, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -19.07075309753418, | |
| "rewards/margins": 17.96364402770996, | |
| "rewards/rejected": -37.03439712524414, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8701885616331619, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 3.10382418714235e-08, | |
| "logits/chosen": 0.9149331450462341, | |
| "logits/rejected": 0.8159484267234802, | |
| "logps/chosen": -1.6051839590072632, | |
| "logps/rejected": -2.925374984741211, | |
| "loss": 0.5309, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -16.051841735839844, | |
| "rewards/margins": 13.201909065246582, | |
| "rewards/rejected": -29.25374984741211, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.8726819386005922, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 2.9873060703122815e-08, | |
| "logits/chosen": 0.9303115606307983, | |
| "logits/rejected": 0.8033692836761475, | |
| "logps/chosen": -2.0454282760620117, | |
| "logps/rejected": -3.6858325004577637, | |
| "loss": 0.3837, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -20.454280853271484, | |
| "rewards/margins": 16.40404510498047, | |
| "rewards/rejected": -36.85832595825195, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8751753155680224, | |
| "grad_norm": 8.0, | |
| "learning_rate": 2.8729195161184243e-08, | |
| "logits/chosen": 0.7548041939735413, | |
| "logits/rejected": 0.8524357080459595, | |
| "logps/chosen": -1.8255373239517212, | |
| "logps/rejected": -3.674532890319824, | |
| "loss": 0.5723, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -18.255373001098633, | |
| "rewards/margins": 18.489957809448242, | |
| "rewards/rejected": -36.745330810546875, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.8776686925354527, | |
| "grad_norm": 15.8125, | |
| "learning_rate": 2.7606721409802498e-08, | |
| "logits/chosen": 0.9838480353355408, | |
| "logits/rejected": 0.8637805581092834, | |
| "logps/chosen": -1.717268705368042, | |
| "logps/rejected": -2.822934627532959, | |
| "loss": 0.7475, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -17.17268943786621, | |
| "rewards/margins": 11.056660652160645, | |
| "rewards/rejected": -28.229345321655273, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.880162069502883, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 2.650571418880144e-08, | |
| "logits/chosen": 0.8108838796615601, | |
| "logits/rejected": 0.793830394744873, | |
| "logps/chosen": -1.8453660011291504, | |
| "logps/rejected": -3.455012083053589, | |
| "loss": 0.3673, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -18.453659057617188, | |
| "rewards/margins": 16.096466064453125, | |
| "rewards/rejected": -34.55012512207031, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.8826554464703132, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 2.5426246808657902e-08, | |
| "logits/chosen": 0.7718413472175598, | |
| "logits/rejected": 0.7736707925796509, | |
| "logps/chosen": -1.9948774576187134, | |
| "logps/rejected": -3.8196072578430176, | |
| "loss": 0.2475, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -19.948774337768555, | |
| "rewards/margins": 18.247299194335938, | |
| "rewards/rejected": -38.19607162475586, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.8851488234377435, | |
| "grad_norm": 8.25, | |
| "learning_rate": 2.4368391145620064e-08, | |
| "logits/chosen": 0.8589321374893188, | |
| "logits/rejected": 0.7836854457855225, | |
| "logps/chosen": -1.6310110092163086, | |
| "logps/rejected": -2.939948320388794, | |
| "loss": 0.2476, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.31011199951172, | |
| "rewards/margins": 13.089373588562012, | |
| "rewards/rejected": -29.39948272705078, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.8876422004051737, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 2.3332217636921637e-08, | |
| "logits/chosen": 0.9285929203033447, | |
| "logits/rejected": 0.8569374084472656, | |
| "logps/chosen": -1.9147915840148926, | |
| "logps/rejected": -3.808230400085449, | |
| "loss": 0.3004, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.147912979125977, | |
| "rewards/margins": 18.934389114379883, | |
| "rewards/rejected": -38.08230209350586, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8901355773726041, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 2.2317795276091977e-08, | |
| "logits/chosen": 0.8501561880111694, | |
| "logits/rejected": 0.8791577219963074, | |
| "logps/chosen": -1.746106505393982, | |
| "logps/rejected": -3.179072856903076, | |
| "loss": 0.8857, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.4610652923584, | |
| "rewards/margins": 14.329660415649414, | |
| "rewards/rejected": -31.79072380065918, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.8926289543400343, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 2.1325191608361908e-08, | |
| "logits/chosen": 0.8351438641548157, | |
| "logits/rejected": 0.8163132667541504, | |
| "logps/chosen": -1.5546523332595825, | |
| "logps/rejected": -2.714953660964966, | |
| "loss": 0.4302, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.546524047851562, | |
| "rewards/margins": 11.603012084960938, | |
| "rewards/rejected": -27.1495361328125, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8951223313074645, | |
| "grad_norm": 12.625, | |
| "learning_rate": 2.035447272616638e-08, | |
| "logits/chosen": 0.8828765153884888, | |
| "logits/rejected": 0.7569836378097534, | |
| "logps/chosen": -1.804396629333496, | |
| "logps/rejected": -3.248098134994507, | |
| "loss": 0.4005, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -18.043964385986328, | |
| "rewards/margins": 14.437012672424316, | |
| "rewards/rejected": -32.480979919433594, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.8976157082748948, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 1.9405703264743645e-08, | |
| "logits/chosen": 0.8172731995582581, | |
| "logits/rejected": 0.7823519706726074, | |
| "logps/chosen": -1.4623509645462036, | |
| "logps/rejected": -2.556082010269165, | |
| "loss": 0.3153, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -14.62350845336914, | |
| "rewards/margins": 10.937310218811035, | |
| "rewards/rejected": -25.56081771850586, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.900109085242325, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 1.8478946397831535e-08, | |
| "logits/chosen": 0.8463267683982849, | |
| "logits/rejected": 0.8544177412986755, | |
| "logps/chosen": -1.8382923603057861, | |
| "logps/rejected": -3.775862693786621, | |
| "loss": 0.405, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.382923126220703, | |
| "rewards/margins": 19.375703811645508, | |
| "rewards/rejected": -37.75862503051758, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9026024622097554, | |
| "grad_norm": 8.5, | |
| "learning_rate": 1.7574263833461018e-08, | |
| "logits/chosen": 0.85582435131073, | |
| "logits/rejected": 0.750614583492279, | |
| "logps/chosen": -1.5502458810806274, | |
| "logps/rejected": -2.6339385509490967, | |
| "loss": 0.4212, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.502457618713379, | |
| "rewards/margins": 10.836931228637695, | |
| "rewards/rejected": -26.33938980102539, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9050958391771856, | |
| "grad_norm": 8.25, | |
| "learning_rate": 1.6691715809847622e-08, | |
| "logits/chosen": 1.001466989517212, | |
| "logits/rejected": 0.8945422768592834, | |
| "logps/chosen": -1.4286935329437256, | |
| "logps/rejected": -2.590465545654297, | |
| "loss": 0.6361, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -14.28693675994873, | |
| "rewards/margins": 11.617722511291504, | |
| "rewards/rejected": -25.90465545654297, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9075892161446159, | |
| "grad_norm": 27.25, | |
| "learning_rate": 1.5831361091380085e-08, | |
| "logits/chosen": 1.0156899690628052, | |
| "logits/rejected": 0.9483416080474854, | |
| "logps/chosen": -2.2000174522399902, | |
| "logps/rejected": -3.7587168216705322, | |
| "loss": 0.7706, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -22.000173568725586, | |
| "rewards/margins": 15.586994171142578, | |
| "rewards/rejected": -37.5871696472168, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9100825931120461, | |
| "grad_norm": 11.9375, | |
| "learning_rate": 1.4993256964707667e-08, | |
| "logits/chosen": 0.9330320358276367, | |
| "logits/rejected": 0.7909821271896362, | |
| "logps/chosen": -1.7297008037567139, | |
| "logps/rejected": -3.1835391521453857, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -17.297008514404297, | |
| "rewards/margins": 14.538382530212402, | |
| "rewards/rejected": -31.835391998291016, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9125759700794764, | |
| "grad_norm": 150.0, | |
| "learning_rate": 1.4177459234925959e-08, | |
| "logits/chosen": 1.0243542194366455, | |
| "logits/rejected": 0.8424570560455322, | |
| "logps/chosen": -1.55518639087677, | |
| "logps/rejected": -2.423957109451294, | |
| "loss": 0.74, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -15.551864624023438, | |
| "rewards/margins": 8.687705993652344, | |
| "rewards/rejected": -24.23957061767578, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9150693470469067, | |
| "grad_norm": 3.8125, | |
| "learning_rate": 1.3384022221860707e-08, | |
| "logits/chosen": 0.7477589845657349, | |
| "logits/rejected": 0.7240265011787415, | |
| "logps/chosen": -1.9058078527450562, | |
| "logps/rejected": -4.386825084686279, | |
| "loss": 0.1929, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -19.05807876586914, | |
| "rewards/margins": 24.810171127319336, | |
| "rewards/rejected": -43.868247985839844, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9175627240143369, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 1.2612998756451366e-08, | |
| "logits/chosen": 0.8763638734817505, | |
| "logits/rejected": 0.8146540522575378, | |
| "logps/chosen": -1.8290818929672241, | |
| "logps/rejected": -3.116455554962158, | |
| "loss": 0.6755, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -18.290821075439453, | |
| "rewards/margins": 12.873735427856445, | |
| "rewards/rejected": -31.164554595947266, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9200561009817672, | |
| "grad_norm": 45.0, | |
| "learning_rate": 1.1864440177232976e-08, | |
| "logits/chosen": 0.8767358660697937, | |
| "logits/rejected": 0.7798057794570923, | |
| "logps/chosen": -1.9157465696334839, | |
| "logps/rejected": -4.174086570739746, | |
| "loss": 0.3354, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -19.1574649810791, | |
| "rewards/margins": 22.58340072631836, | |
| "rewards/rejected": -41.74085998535156, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9225494779491974, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 1.1138396326917977e-08, | |
| "logits/chosen": 0.9398146867752075, | |
| "logits/rejected": 0.9706467390060425, | |
| "logps/chosen": -2.1085903644561768, | |
| "logps/rejected": -3.980117082595825, | |
| "loss": 0.438, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -21.085905075073242, | |
| "rewards/margins": 18.71526336669922, | |
| "rewards/rejected": -39.80117416381836, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9250428549166277, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 1.0434915549077461e-08, | |
| "logits/chosen": 0.9370230436325073, | |
| "logits/rejected": 0.6799491047859192, | |
| "logps/chosen": -2.0628821849823, | |
| "logps/rejected": -4.021778106689453, | |
| "loss": 0.2515, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -20.628820419311523, | |
| "rewards/margins": 19.588960647583008, | |
| "rewards/rejected": -40.21778106689453, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.927536231884058, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 9.754044684922053e-09, | |
| "logits/chosen": 0.9780002236366272, | |
| "logits/rejected": 0.8844839334487915, | |
| "logps/chosen": -2.1081106662750244, | |
| "logps/rejected": -3.9642934799194336, | |
| "loss": 0.2881, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -21.08110809326172, | |
| "rewards/margins": 18.56182861328125, | |
| "rewards/rejected": -39.64293670654297, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9300296088514882, | |
| "grad_norm": 38.0, | |
| "learning_rate": 9.095829070183286e-09, | |
| "logits/chosen": 0.8360333442687988, | |
| "logits/rejected": 0.7579271793365479, | |
| "logps/chosen": -1.7824090719223022, | |
| "logps/rejected": -3.0244534015655518, | |
| "loss": 0.8543, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -17.82408905029297, | |
| "rewards/margins": 12.42044448852539, | |
| "rewards/rejected": -30.24453353881836, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9325229858189185, | |
| "grad_norm": 9.625, | |
| "learning_rate": 8.460312532094555e-09, | |
| "logits/chosen": 0.85768723487854, | |
| "logits/rejected": 0.8098315000534058, | |
| "logps/chosen": -1.6384191513061523, | |
| "logps/rejected": -2.991260051727295, | |
| "loss": 0.3512, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -16.38418960571289, | |
| "rewards/margins": 13.528410911560059, | |
| "rewards/rejected": -29.9126033782959, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9350163627863488, | |
| "grad_norm": 164.0, | |
| "learning_rate": 7.847537386473157e-09, | |
| "logits/chosen": 0.7866430878639221, | |
| "logits/rejected": 0.8129922747612, | |
| "logps/chosen": -1.9942247867584229, | |
| "logps/rejected": -3.5987548828125, | |
| "loss": 0.4176, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -19.942249298095703, | |
| "rewards/margins": 16.04530143737793, | |
| "rewards/rejected": -35.987548828125, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.937509739753779, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 7.257544434902646e-09, | |
| "logits/chosen": 0.7069447040557861, | |
| "logits/rejected": 0.7614144086837769, | |
| "logps/chosen": -1.3751400709152222, | |
| "logps/rejected": -2.30409574508667, | |
| "loss": 0.4299, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -13.751401901245117, | |
| "rewards/margins": 9.289555549621582, | |
| "rewards/rejected": -23.040958404541016, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9400031167212093, | |
| "grad_norm": 11.75, | |
| "learning_rate": 6.690372962015922e-09, | |
| "logits/chosen": 0.7851680517196655, | |
| "logits/rejected": 0.7357572913169861, | |
| "logps/chosen": -1.5484297275543213, | |
| "logps/rejected": -2.7063143253326416, | |
| "loss": 0.835, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -15.484295845031738, | |
| "rewards/margins": 11.578847885131836, | |
| "rewards/rejected": -27.063142776489258, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.9424964936886395, | |
| "grad_norm": 17.625, | |
| "learning_rate": 6.146060732879643e-09, | |
| "logits/chosen": 0.9812300801277161, | |
| "logits/rejected": 0.9034566879272461, | |
| "logps/chosen": -1.6774706840515137, | |
| "logps/rejected": -2.880139112472534, | |
| "loss": 0.711, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -16.774707794189453, | |
| "rewards/margins": 12.026679992675781, | |
| "rewards/rejected": -28.801387786865234, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9449898706560698, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 5.624643990479616e-09, | |
| "logits/chosen": 0.8070354461669922, | |
| "logits/rejected": 0.8890936374664307, | |
| "logps/chosen": -1.5846179723739624, | |
| "logps/rejected": -2.867779016494751, | |
| "loss": 0.9569, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -15.846179962158203, | |
| "rewards/margins": 12.831609725952148, | |
| "rewards/rejected": -28.67778778076172, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9474832476235001, | |
| "grad_norm": 12.0, | |
| "learning_rate": 5.126157453307456e-09, | |
| "logits/chosen": 0.9138520359992981, | |
| "logits/rejected": 0.8435475826263428, | |
| "logps/chosen": -1.5853928327560425, | |
| "logps/rejected": -3.102871894836426, | |
| "loss": 0.3206, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -15.853928565979004, | |
| "rewards/margins": 15.174790382385254, | |
| "rewards/rejected": -31.028718948364258, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9499766245909304, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 4.6506343130488956e-09, | |
| "logits/chosen": 0.7391858100891113, | |
| "logits/rejected": 0.7543048858642578, | |
| "logps/chosen": -2.2143211364746094, | |
| "logps/rejected": -4.464791774749756, | |
| "loss": 0.2126, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -22.14321517944336, | |
| "rewards/margins": 22.50470542907715, | |
| "rewards/rejected": -44.647911071777344, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.9524700015583606, | |
| "grad_norm": 28.375, | |
| "learning_rate": 4.198106232373788e-09, | |
| "logits/chosen": 0.8407728672027588, | |
| "logits/rejected": 0.7748773694038391, | |
| "logps/chosen": -1.5643658638000488, | |
| "logps/rejected": -2.904953956604004, | |
| "loss": 0.6156, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -15.643659591674805, | |
| "rewards/margins": 13.405879020690918, | |
| "rewards/rejected": -29.049535751342773, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.9549633785257908, | |
| "grad_norm": 5.5, | |
| "learning_rate": 3.768603342827719e-09, | |
| "logits/chosen": 0.7649537324905396, | |
| "logits/rejected": 0.9055894613265991, | |
| "logps/chosen": -2.0387561321258545, | |
| "logps/rejected": -3.687187671661377, | |
| "loss": 0.2725, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -20.387561798095703, | |
| "rewards/margins": 16.484315872192383, | |
| "rewards/rejected": -36.87187957763672, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.9574567554932212, | |
| "grad_norm": 9.625, | |
| "learning_rate": 3.3621542428259764e-09, | |
| "logits/chosen": 0.7531914710998535, | |
| "logits/rejected": 0.7303828001022339, | |
| "logps/chosen": -1.985339641571045, | |
| "logps/rejected": -3.589801549911499, | |
| "loss": 0.2374, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -19.853397369384766, | |
| "rewards/margins": 16.044618606567383, | |
| "rewards/rejected": -35.898014068603516, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9599501324606514, | |
| "grad_norm": 13.1875, | |
| "learning_rate": 2.978785995748928e-09, | |
| "logits/chosen": 0.8882652521133423, | |
| "logits/rejected": 0.7716068029403687, | |
| "logps/chosen": -1.404790997505188, | |
| "logps/rejected": -2.1909685134887695, | |
| "loss": 1.0033, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -14.047908782958984, | |
| "rewards/margins": 7.8617753982543945, | |
| "rewards/rejected": -21.909685134887695, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.9624435094280817, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 2.618524128140309e-09, | |
| "logits/chosen": 0.8234500885009766, | |
| "logits/rejected": 0.826442301273346, | |
| "logps/chosen": -1.7514184713363647, | |
| "logps/rejected": -3.4685299396514893, | |
| "loss": 0.4523, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.51418685913086, | |
| "rewards/margins": 17.171112060546875, | |
| "rewards/rejected": -34.685298919677734, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9649368863955119, | |
| "grad_norm": 19.0, | |
| "learning_rate": 2.2813926280074225e-09, | |
| "logits/chosen": 0.9154322147369385, | |
| "logits/rejected": 0.7984371781349182, | |
| "logps/chosen": -1.6743313074111938, | |
| "logps/rejected": -2.603154182434082, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -16.74331283569336, | |
| "rewards/margins": 9.288228988647461, | |
| "rewards/rejected": -26.03154182434082, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.9674302633629421, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 1.9674139432240056e-09, | |
| "logits/chosen": 0.7447303533554077, | |
| "logits/rejected": 0.6416030526161194, | |
| "logps/chosen": -1.896831750869751, | |
| "logps/rejected": -3.2503926753997803, | |
| "loss": 0.2141, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.96831703186035, | |
| "rewards/margins": 13.535609245300293, | |
| "rewards/rejected": -32.50392532348633, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9699236403303725, | |
| "grad_norm": 3.5, | |
| "learning_rate": 1.6766089800352934e-09, | |
| "logits/chosen": 0.8328643441200256, | |
| "logits/rejected": 0.8856253623962402, | |
| "logps/chosen": -2.1792397499084473, | |
| "logps/rejected": -4.523174285888672, | |
| "loss": 0.2019, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -21.792396545410156, | |
| "rewards/margins": 23.43934440612793, | |
| "rewards/rejected": -45.23174285888672, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.9724170172978027, | |
| "grad_norm": 8.25, | |
| "learning_rate": 1.408997101666326e-09, | |
| "logits/chosen": 0.8109475374221802, | |
| "logits/rejected": 0.9047868251800537, | |
| "logps/chosen": -2.0790956020355225, | |
| "logps/rejected": -4.099780559539795, | |
| "loss": 0.1869, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -20.790956497192383, | |
| "rewards/margins": 20.206846237182617, | |
| "rewards/rejected": -40.997806549072266, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.974910394265233, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.1645961270323746e-09, | |
| "logits/chosen": 0.8082598447799683, | |
| "logits/rejected": 0.7497880458831787, | |
| "logps/chosen": -1.4980417490005493, | |
| "logps/rejected": -3.6578102111816406, | |
| "loss": 0.3026, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -14.98041820526123, | |
| "rewards/margins": 21.59768295288086, | |
| "rewards/rejected": -36.578102111816406, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.9774037712326632, | |
| "grad_norm": 14.0625, | |
| "learning_rate": 9.434223295524958e-10, | |
| "logits/chosen": 0.822067141532898, | |
| "logits/rejected": 0.8411962985992432, | |
| "logps/chosen": -1.5677428245544434, | |
| "logps/rejected": -2.5952415466308594, | |
| "loss": 0.421, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -15.677427291870117, | |
| "rewards/margins": 10.274986267089844, | |
| "rewards/rejected": -25.95241355895996, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9798971482000935, | |
| "grad_norm": 41.0, | |
| "learning_rate": 7.454904360661762e-10, | |
| "logits/chosen": 0.7836760878562927, | |
| "logits/rejected": 0.7335962653160095, | |
| "logps/chosen": -1.7851057052612305, | |
| "logps/rejected": -3.307037830352783, | |
| "loss": 0.6381, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -17.851055145263672, | |
| "rewards/margins": 15.219318389892578, | |
| "rewards/rejected": -33.07037353515625, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.9823905251675238, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 5.708136258525231e-10, | |
| "logits/chosen": 1.0210167169570923, | |
| "logits/rejected": 0.8218429088592529, | |
| "logps/chosen": -1.745602011680603, | |
| "logps/rejected": -3.2325327396392822, | |
| "loss": 0.4579, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.45602035522461, | |
| "rewards/margins": 14.869308471679688, | |
| "rewards/rejected": -32.3253288269043, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.984883902134954, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 4.194035297527765e-10, | |
| "logits/chosen": 0.9661321640014648, | |
| "logits/rejected": 0.8232787847518921, | |
| "logps/chosen": -1.6603198051452637, | |
| "logps/rejected": -3.042840003967285, | |
| "loss": 0.5841, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -16.603199005126953, | |
| "rewards/margins": 13.825201034545898, | |
| "rewards/rejected": -30.42839813232422, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.9873772791023843, | |
| "grad_norm": 9.875, | |
| "learning_rate": 2.912702293959901e-10, | |
| "logits/chosen": 0.9294121861457825, | |
| "logits/rejected": 0.7912936210632324, | |
| "logps/chosen": -1.8600192070007324, | |
| "logps/rejected": -3.488579750061035, | |
| "loss": 0.3076, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -18.600191116333008, | |
| "rewards/margins": 16.28560447692871, | |
| "rewards/rejected": -34.885799407958984, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.9898706560698145, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 1.8642225652760746e-10, | |
| "logits/chosen": 1.0502732992172241, | |
| "logits/rejected": 0.7951204180717468, | |
| "logps/chosen": -1.755967617034912, | |
| "logps/rejected": -3.1554126739501953, | |
| "loss": 0.4734, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -17.559675216674805, | |
| "rewards/margins": 13.994451522827148, | |
| "rewards/rejected": -31.554126739501953, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.9923640330372449, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 1.0486659244136054e-10, | |
| "logits/chosen": 0.8521815538406372, | |
| "logits/rejected": 0.8380050659179688, | |
| "logps/chosen": -1.743971586227417, | |
| "logps/rejected": -3.0718655586242676, | |
| "loss": 0.2066, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -17.439714431762695, | |
| "rewards/margins": 13.27894115447998, | |
| "rewards/rejected": -30.71865463256836, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.9948574100046751, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 4.6608667514608234e-11, | |
| "logits/chosen": 0.8208640217781067, | |
| "logits/rejected": 0.7938324213027954, | |
| "logps/chosen": -1.5473930835723877, | |
| "logps/rejected": -2.6816587448120117, | |
| "loss": 0.3252, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -15.473930358886719, | |
| "rewards/margins": 11.342655181884766, | |
| "rewards/rejected": -26.816585540771484, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.9973507869721053, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 1.1652360846531317e-11, | |
| "logits/chosen": 1.0007308721542358, | |
| "logits/rejected": 0.8393873572349548, | |
| "logps/chosen": -2.072484016418457, | |
| "logps/rejected": -4.150708198547363, | |
| "loss": 0.3247, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -20.724838256835938, | |
| "rewards/margins": 20.782241821289062, | |
| "rewards/rejected": -41.507083892822266, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9998441639395356, | |
| "grad_norm": 9.0, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 0.8794471025466919, | |
| "logits/rejected": 0.8441964983940125, | |
| "logps/chosen": -1.8147742748260498, | |
| "logps/rejected": -3.1960084438323975, | |
| "loss": 0.3852, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -18.147741317749023, | |
| "rewards/margins": 13.812341690063477, | |
| "rewards/rejected": -31.9600830078125, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.9998441639395356, | |
| "step": 401, | |
| "total_flos": 5.68672318443248e+18, | |
| "train_loss": 2.14272621887599, | |
| "train_runtime": 89392.3847, | |
| "train_samples_per_second": 0.144, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 401, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 110, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.68672318443248e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |