| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 50, | |
| "global_step": 352, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.028409090909090908, | |
| "grad_norm": 58.560175797662396, | |
| "learning_rate": 1.3888888888888888e-07, | |
| "logits/chosen": -2.854257822036743, | |
| "logits/rejected": -2.6711959838867188, | |
| "logps/chosen": -372.8061828613281, | |
| "logps/rejected": -748.3225708007812, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0024342918768525124, | |
| "rewards/margins": 0.015255051665008068, | |
| "rewards/rejected": -0.01282076071947813, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.056818181818181816, | |
| "grad_norm": 19.273001268117515, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "logits/chosen": -2.8417019844055176, | |
| "logits/rejected": -2.6597249507904053, | |
| "logps/chosen": -380.89154052734375, | |
| "logps/rejected": -695.0466918945312, | |
| "loss": 0.4818, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08547976613044739, | |
| "rewards/margins": 0.59254390001297, | |
| "rewards/rejected": -0.5070642232894897, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08522727272727272, | |
| "grad_norm": 4.004572162611932, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -2.8608241081237793, | |
| "logits/rejected": -2.65569806098938, | |
| "logps/chosen": -324.8751220703125, | |
| "logps/rejected": -1104.629150390625, | |
| "loss": 0.1406, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.39872533082962036, | |
| "rewards/margins": 4.522292137145996, | |
| "rewards/rejected": -4.123566627502441, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11363636363636363, | |
| "grad_norm": 0.38876724989910894, | |
| "learning_rate": 4.998023493068254e-07, | |
| "logits/chosen": -2.869859218597412, | |
| "logits/rejected": -2.628769874572754, | |
| "logps/chosen": -311.97674560546875, | |
| "logps/rejected": -2135.577392578125, | |
| "loss": 0.0209, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.43964704871177673, | |
| "rewards/margins": 15.060437202453613, | |
| "rewards/rejected": -14.620790481567383, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14204545454545456, | |
| "grad_norm": 0.782568924522184, | |
| "learning_rate": 4.975823666181255e-07, | |
| "logits/chosen": -2.880276918411255, | |
| "logits/rejected": -2.6541552543640137, | |
| "logps/chosen": -422.45806884765625, | |
| "logps/rejected": -3464.00537109375, | |
| "loss": 0.0053, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.41893959045410156, | |
| "rewards/margins": 27.855571746826172, | |
| "rewards/rejected": -28.274511337280273, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14204545454545456, | |
| "eval_logits/chosen": -2.8265042304992676, | |
| "eval_logits/rejected": -2.55804181098938, | |
| "eval_logps/chosen": -454.01495361328125, | |
| "eval_logps/rejected": -4311.6689453125, | |
| "eval_loss": 0.0024393389467149973, | |
| "eval_rewards/accuracies": 0.9979838728904724, | |
| "eval_rewards/chosen": -0.8703324198722839, | |
| "eval_rewards/margins": 35.83847427368164, | |
| "eval_rewards/rejected": -36.70880889892578, | |
| "eval_runtime": 193.6452, | |
| "eval_samples_per_second": 20.166, | |
| "eval_steps_per_second": 0.32, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17045454545454544, | |
| "grad_norm": 3.6303694847529218, | |
| "learning_rate": 4.929173350101024e-07, | |
| "logits/chosen": -2.867490291595459, | |
| "logits/rejected": -2.5985803604125977, | |
| "logps/chosen": -477.73272705078125, | |
| "logps/rejected": -5529.82861328125, | |
| "loss": 0.0218, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.165496826171875, | |
| "rewards/margins": 46.3895378112793, | |
| "rewards/rejected": -47.55502700805664, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19886363636363635, | |
| "grad_norm": 0.02570393857059677, | |
| "learning_rate": 4.858533249305336e-07, | |
| "logits/chosen": -2.8956971168518066, | |
| "logits/rejected": -2.5464015007019043, | |
| "logps/chosen": -492.2872619628906, | |
| "logps/rejected": -5038.3505859375, | |
| "loss": 0.0042, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.363173007965088, | |
| "rewards/margins": 41.90122604370117, | |
| "rewards/rejected": -43.26439666748047, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 0.019530473634071552, | |
| "learning_rate": 4.764600984163808e-07, | |
| "logits/chosen": -2.9800267219543457, | |
| "logits/rejected": -2.6762020587921143, | |
| "logps/chosen": -501.85772705078125, | |
| "logps/rejected": -5076.8662109375, | |
| "loss": 0.0028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2898057699203491, | |
| "rewards/margins": 42.464393615722656, | |
| "rewards/rejected": -43.75419235229492, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2556818181818182, | |
| "grad_norm": 0.021637173397401434, | |
| "learning_rate": 4.6483042014491527e-07, | |
| "logits/chosen": -3.0119636058807373, | |
| "logits/rejected": -2.6861469745635986, | |
| "logps/chosen": -490.847900390625, | |
| "logps/rejected": -5491.890625, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2475790977478027, | |
| "rewards/margins": 47.27143478393555, | |
| "rewards/rejected": -48.519012451171875, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2840909090909091, | |
| "grad_norm": 0.24085676005599221, | |
| "learning_rate": 4.510791413176912e-07, | |
| "logits/chosen": -2.996633529663086, | |
| "logits/rejected": -2.7358081340789795, | |
| "logps/chosen": -525.2289428710938, | |
| "logps/rejected": -5033.7158203125, | |
| "loss": 0.0061, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5802116394042969, | |
| "rewards/margins": 41.77141571044922, | |
| "rewards/rejected": -43.35162353515625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2840909090909091, | |
| "eval_logits/chosen": -2.9615049362182617, | |
| "eval_logits/rejected": -2.6516737937927246, | |
| "eval_logps/chosen": -529.8892822265625, | |
| "eval_logps/rejected": -4822.52294921875, | |
| "eval_loss": 0.0012528152437880635, | |
| "eval_rewards/accuracies": 0.9979838728904724, | |
| "eval_rewards/chosen": -1.629075527191162, | |
| "eval_rewards/margins": 40.188270568847656, | |
| "eval_rewards/rejected": -41.81734848022461, | |
| "eval_runtime": 192.9327, | |
| "eval_samples_per_second": 20.24, | |
| "eval_steps_per_second": 0.321, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 0.5179684876316148, | |
| "learning_rate": 4.353420654246546e-07, | |
| "logits/chosen": -2.9635558128356934, | |
| "logits/rejected": -2.640312433242798, | |
| "logps/chosen": -524.4633178710938, | |
| "logps/rejected": -5045.9384765625, | |
| "loss": 0.0018, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5064172744750977, | |
| "rewards/margins": 42.47917175292969, | |
| "rewards/rejected": -43.98558807373047, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3409090909090909, | |
| "grad_norm": 1.1132308145442282, | |
| "learning_rate": 4.177746070897592e-07, | |
| "logits/chosen": -2.987412214279175, | |
| "logits/rejected": -2.5702311992645264, | |
| "logps/chosen": -529.0863647460938, | |
| "logps/rejected": -4814.87939453125, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6565755605697632, | |
| "rewards/margins": 40.433387756347656, | |
| "rewards/rejected": -42.089962005615234, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3693181818181818, | |
| "grad_norm": 0.1524427654589431, | |
| "learning_rate": 3.9855025724292763e-07, | |
| "logits/chosen": -2.988529682159424, | |
| "logits/rejected": -2.547990083694458, | |
| "logps/chosen": -530.0753784179688, | |
| "logps/rejected": -4828.005859375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5551540851593018, | |
| "rewards/margins": 40.872337341308594, | |
| "rewards/rejected": -42.427490234375, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3977272727272727, | |
| "grad_norm": 0.058047718713258305, | |
| "learning_rate": 3.7785886977585555e-07, | |
| "logits/chosen": -2.9764513969421387, | |
| "logits/rejected": -2.531805992126465, | |
| "logps/chosen": -517.7172241210938, | |
| "logps/rejected": -5057.0849609375, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4110362529754639, | |
| "rewards/margins": 42.79331588745117, | |
| "rewards/rejected": -44.2043571472168, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.42613636363636365, | |
| "grad_norm": 0.03612899599023959, | |
| "learning_rate": 3.5590478660213206e-07, | |
| "logits/chosen": -3.0368993282318115, | |
| "logits/rejected": -2.6156864166259766, | |
| "logps/chosen": -539.3234252929688, | |
| "logps/rejected": -5024.9345703125, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.587400197982788, | |
| "rewards/margins": 41.970149993896484, | |
| "rewards/rejected": -43.55754852294922, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.42613636363636365, | |
| "eval_logits/chosen": -2.9830355644226074, | |
| "eval_logits/rejected": -2.518221378326416, | |
| "eval_logps/chosen": -536.5906372070312, | |
| "eval_logps/rejected": -5789.0673828125, | |
| "eval_loss": 0.00048427816363982856, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -1.696089267730713, | |
| "eval_rewards/margins": 49.78670120239258, | |
| "eval_rewards/rejected": -51.4827880859375, | |
| "eval_runtime": 191.9777, | |
| "eval_samples_per_second": 20.341, | |
| "eval_steps_per_second": 0.323, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.017819958857612898, | |
| "learning_rate": 3.3290481963801696e-07, | |
| "logits/chosen": -2.969292402267456, | |
| "logits/rejected": -2.488386631011963, | |
| "logps/chosen": -585.0005493164062, | |
| "logps/rejected": -6239.01025390625, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9822533130645752, | |
| "rewards/margins": 53.68230438232422, | |
| "rewards/rejected": -55.6645622253418, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.48295454545454547, | |
| "grad_norm": 0.04768437011269856, | |
| "learning_rate": 3.0908610963322626e-07, | |
| "logits/chosen": -2.916259765625, | |
| "logits/rejected": -2.3954265117645264, | |
| "logps/chosen": -582.5315551757812, | |
| "logps/rejected": -6029.98486328125, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1212856769561768, | |
| "rewards/margins": 51.80366897583008, | |
| "rewards/rejected": -53.924957275390625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5113636363636364, | |
| "grad_norm": 0.00786856607244259, | |
| "learning_rate": 2.846838829972671e-07, | |
| "logits/chosen": -2.9504363536834717, | |
| "logits/rejected": -2.4123263359069824, | |
| "logps/chosen": -604.1171875, | |
| "logps/rejected": -6656.84521484375, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2390635013580322, | |
| "rewards/margins": 57.837486267089844, | |
| "rewards/rejected": -60.07655715942383, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5397727272727273, | |
| "grad_norm": 0.02736966148588083, | |
| "learning_rate": 2.5993912877423147e-07, | |
| "logits/chosen": -2.9569239616394043, | |
| "logits/rejected": -2.3740897178649902, | |
| "logps/chosen": -581.5017700195312, | |
| "logps/rejected": -6748.75927734375, | |
| "loss": 0.003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1127352714538574, | |
| "rewards/margins": 58.927764892578125, | |
| "rewards/rejected": -61.040489196777344, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5681818181818182, | |
| "grad_norm": 0.06339173150454364, | |
| "learning_rate": 2.3509621870754504e-07, | |
| "logits/chosen": -3.002689838409424, | |
| "logits/rejected": -2.4718198776245117, | |
| "logps/chosen": -596.9158935546875, | |
| "logps/rejected": -6154.20263671875, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0328099727630615, | |
| "rewards/margins": 52.925392150878906, | |
| "rewards/rejected": -54.95819854736328, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5681818181818182, | |
| "eval_logits/chosen": -2.9937610626220703, | |
| "eval_logits/rejected": -2.5169804096221924, | |
| "eval_logps/chosen": -570.3004150390625, | |
| "eval_logps/rejected": -5663.16015625, | |
| "eval_loss": 0.00048508381587453187, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -2.033186435699463, | |
| "eval_rewards/margins": 48.190528869628906, | |
| "eval_rewards/rejected": -50.223716735839844, | |
| "eval_runtime": 193.4558, | |
| "eval_samples_per_second": 20.185, | |
| "eval_steps_per_second": 0.32, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5965909090909091, | |
| "grad_norm": 0.016437717754495415, | |
| "learning_rate": 2.1040049389819624e-07, | |
| "logits/chosen": -3.0172648429870605, | |
| "logits/rejected": -2.565308094024658, | |
| "logps/chosen": -547.1297607421875, | |
| "logps/rejected": -5656.95166015625, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0488336086273193, | |
| "rewards/margins": 47.970909118652344, | |
| "rewards/rejected": -50.019744873046875, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 1.693727446683944, | |
| "learning_rate": 1.8609584188988133e-07, | |
| "logits/chosen": -3.0191075801849365, | |
| "logits/rejected": -2.507822036743164, | |
| "logps/chosen": -582.245849609375, | |
| "logps/rejected": -5883.4912109375, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9353878498077393, | |
| "rewards/margins": 50.56637191772461, | |
| "rewards/rejected": -52.50175857543945, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6534090909090909, | |
| "grad_norm": 0.0008297976344692307, | |
| "learning_rate": 1.624222881090439e-07, | |
| "logits/chosen": -3.0655417442321777, | |
| "logits/rejected": -2.5936379432678223, | |
| "logps/chosen": -571.0089111328125, | |
| "logps/rejected": -5979.38037109375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8964475393295288, | |
| "rewards/margins": 51.060691833496094, | |
| "rewards/rejected": -52.9571418762207, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 12.048855827746156, | |
| "learning_rate": 1.3961362544602212e-07, | |
| "logits/chosen": -3.0751774311065674, | |
| "logits/rejected": -2.6132192611694336, | |
| "logps/chosen": -555.5328979492188, | |
| "logps/rejected": -5732.3876953125, | |
| "loss": 0.0044, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.028006076812744, | |
| "rewards/margins": 48.74276351928711, | |
| "rewards/rejected": -50.770774841308594, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7102272727272727, | |
| "grad_norm": 0.030090428752627654, | |
| "learning_rate": 1.1789510538684522e-07, | |
| "logits/chosen": -3.1127967834472656, | |
| "logits/rejected": -2.603487730026245, | |
| "logps/chosen": -577.8153076171875, | |
| "logps/rejected": -5749.29541015625, | |
| "loss": 0.0066, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0291168689727783, | |
| "rewards/margins": 49.05834197998047, | |
| "rewards/rejected": -51.087459564208984, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7102272727272727, | |
| "eval_logits/chosen": -3.0874154567718506, | |
| "eval_logits/rejected": -2.5713446140289307, | |
| "eval_logps/chosen": -571.5287475585938, | |
| "eval_logps/rejected": -5825.4208984375, | |
| "eval_loss": 0.0006546394433826208, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -2.0454704761505127, | |
| "eval_rewards/margins": 49.80085754394531, | |
| "eval_rewards/rejected": -51.8463249206543, | |
| "eval_runtime": 191.814, | |
| "eval_samples_per_second": 20.358, | |
| "eval_steps_per_second": 0.323, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7386363636363636, | |
| "grad_norm": 0.008133900264785228, | |
| "learning_rate": 9.748121349736891e-08, | |
| "logits/chosen": -3.1245901584625244, | |
| "logits/rejected": -2.5510289669036865, | |
| "logps/chosen": -562.6072387695312, | |
| "logps/rejected": -6041.6435546875, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9334728717803955, | |
| "rewards/margins": 51.701171875, | |
| "rewards/rejected": -53.6346435546875, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7670454545454546, | |
| "grad_norm": 0.03464071283654092, | |
| "learning_rate": 7.857355122839673e-08, | |
| "logits/chosen": -3.0760347843170166, | |
| "logits/rejected": -2.473140239715576, | |
| "logps/chosen": -577.8392333984375, | |
| "logps/rejected": -6429.56005859375, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.981665015220642, | |
| "rewards/margins": 55.79956817626953, | |
| "rewards/rejected": -57.78123092651367, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7954545454545454, | |
| "grad_norm": 0.09200228880640939, | |
| "learning_rate": 6.135884496044244e-08, | |
| "logits/chosen": -3.094788074493408, | |
| "logits/rejected": -2.5839569568634033, | |
| "logps/chosen": -593.9009399414062, | |
| "logps/rejected": -6378.4140625, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.036696672439575, | |
| "rewards/margins": 55.2489128112793, | |
| "rewards/rejected": -57.285614013671875, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8238636363636364, | |
| "grad_norm": 0.04828363097903115, | |
| "learning_rate": 4.600710195020982e-08, | |
| "logits/chosen": -3.084293842315674, | |
| "logits/rejected": -2.5237388610839844, | |
| "logps/chosen": -586.5794067382812, | |
| "logps/rejected": -5471.75146484375, | |
| "loss": 0.0027, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0138309001922607, | |
| "rewards/margins": 46.394798278808594, | |
| "rewards/rejected": -48.408634185791016, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8522727272727273, | |
| "grad_norm": 1.5124351519089532, | |
| "learning_rate": 3.2669931390104374e-08, | |
| "logits/chosen": -3.095336437225342, | |
| "logits/rejected": -2.56819748878479, | |
| "logps/chosen": -608.9658813476562, | |
| "logps/rejected": -6138.87451171875, | |
| "loss": 0.0055, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.1408629417419434, | |
| "rewards/margins": 52.5675163269043, | |
| "rewards/rejected": -54.70838165283203, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8522727272727273, | |
| "eval_logits/chosen": -3.0607314109802246, | |
| "eval_logits/rejected": -2.524188756942749, | |
| "eval_logps/chosen": -572.0144653320312, | |
| "eval_logps/rejected": -5950.38330078125, | |
| "eval_loss": 0.0002967408508993685, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -2.0503275394439697, | |
| "eval_rewards/margins": 51.04560852050781, | |
| "eval_rewards/rejected": -53.095943450927734, | |
| "eval_runtime": 191.8524, | |
| "eval_samples_per_second": 20.354, | |
| "eval_steps_per_second": 0.323, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8806818181818182, | |
| "grad_norm": 0.08621703547632295, | |
| "learning_rate": 2.147904716149135e-08, | |
| "logits/chosen": -3.0894367694854736, | |
| "logits/rejected": -2.5899417400360107, | |
| "logps/chosen": -578.2093505859375, | |
| "logps/rejected": -5487.2734375, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0271658897399902, | |
| "rewards/margins": 46.07707214355469, | |
| "rewards/rejected": -48.10424041748047, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.04429289003054511, | |
| "learning_rate": 1.254496706805433e-08, | |
| "logits/chosen": -3.071737051010132, | |
| "logits/rejected": -2.5150017738342285, | |
| "logps/chosen": -575.0449829101562, | |
| "logps/rejected": -5943.66552734375, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9631164073944092, | |
| "rewards/margins": 51.358001708984375, | |
| "rewards/rejected": -53.32111358642578, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 0.05330126976035339, | |
| "learning_rate": 5.955921395237318e-09, | |
| "logits/chosen": -3.074503183364868, | |
| "logits/rejected": -2.5923588275909424, | |
| "logps/chosen": -587.8565673828125, | |
| "logps/rejected": -5959.51318359375, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0806593894958496, | |
| "rewards/margins": 50.763099670410156, | |
| "rewards/rejected": -52.84375762939453, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9659090909090909, | |
| "grad_norm": 0.0015915884352458761, | |
| "learning_rate": 1.7769815745066474e-09, | |
| "logits/chosen": -3.0953116416931152, | |
| "logits/rejected": -2.6012253761291504, | |
| "logps/chosen": -584.4715576171875, | |
| "logps/rejected": -5886.49951171875, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0364444255828857, | |
| "rewards/margins": 50.340614318847656, | |
| "rewards/rejected": -52.37705612182617, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9943181818181818, | |
| "grad_norm": 0.36200936509368786, | |
| "learning_rate": 4.9417557483610875e-11, | |
| "logits/chosen": -3.0942559242248535, | |
| "logits/rejected": -2.5950565338134766, | |
| "logps/chosen": -553.3095703125, | |
| "logps/rejected": -5615.38134765625, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.989404320716858, | |
| "rewards/margins": 47.7803955078125, | |
| "rewards/rejected": -49.769798278808594, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9943181818181818, | |
| "eval_logits/chosen": -3.071096897125244, | |
| "eval_logits/rejected": -2.540203809738159, | |
| "eval_logps/chosen": -571.614990234375, | |
| "eval_logps/rejected": -5922.82763671875, | |
| "eval_loss": 0.00029848425765521824, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -2.046332597732544, | |
| "eval_rewards/margins": 50.7740592956543, | |
| "eval_rewards/rejected": -52.82038879394531, | |
| "eval_runtime": 192.8009, | |
| "eval_samples_per_second": 20.254, | |
| "eval_steps_per_second": 0.322, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 352, | |
| "total_flos": 0.0, | |
| "train_loss": 0.04021276280278614, | |
| "train_runtime": 10097.1377, | |
| "train_samples_per_second": 4.457, | |
| "train_steps_per_second": 0.035 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 352, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |