| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 100, | |
| "global_step": 4164, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007204610951008645, | |
| "grad_norm": 2.336021900177002, | |
| "learning_rate": 1.199040767386091e-10, | |
| "logits/chosen": -1.3860063552856445, | |
| "logits/rejected": -1.3949532508850098, | |
| "logps/chosen": -34.621925354003906, | |
| "logps/rejected": -37.30891418457031, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.007204610951008645, | |
| "grad_norm": 2.7931599617004395, | |
| "learning_rate": 1.199040767386091e-09, | |
| "logits/chosen": -1.546767234802246, | |
| "logits/rejected": -1.5282517671585083, | |
| "logps/chosen": -42.52494812011719, | |
| "logps/rejected": -44.546756744384766, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.3680555522441864, | |
| "rewards/chosen": -0.00010908626427408308, | |
| "rewards/margins": -0.00013866486551705748, | |
| "rewards/rejected": 2.95786012429744e-05, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01440922190201729, | |
| "grad_norm": 2.9333579540252686, | |
| "learning_rate": 2.398081534772182e-09, | |
| "logits/chosen": -1.5552335977554321, | |
| "logits/rejected": -1.5412750244140625, | |
| "logps/chosen": -44.075599670410156, | |
| "logps/rejected": -46.59809112548828, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 9.86563172773458e-05, | |
| "rewards/margins": 0.00012048264034092426, | |
| "rewards/rejected": -2.182633033953607e-05, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.021613832853025938, | |
| "grad_norm": 3.4939088821411133, | |
| "learning_rate": 3.597122302158273e-09, | |
| "logits/chosen": -1.5116419792175293, | |
| "logits/rejected": -1.5045888423919678, | |
| "logps/chosen": -47.83784866333008, | |
| "logps/rejected": -50.80131149291992, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.000109563407022506, | |
| "rewards/margins": 0.00020976344239898026, | |
| "rewards/rejected": -0.00010020002082455903, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02881844380403458, | |
| "grad_norm": 2.5776360034942627, | |
| "learning_rate": 4.796163069544364e-09, | |
| "logits/chosen": -1.558410406112671, | |
| "logits/rejected": -1.5545051097869873, | |
| "logps/chosen": -43.07380294799805, | |
| "logps/rejected": -45.55127716064453, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.00022044967045076191, | |
| "rewards/margins": -0.00018095636914949864, | |
| "rewards/rejected": -3.949330493924208e-05, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03602305475504323, | |
| "grad_norm": 2.6562020778656006, | |
| "learning_rate": 5.995203836930456e-09, | |
| "logits/chosen": -1.4691417217254639, | |
| "logits/rejected": -1.46826171875, | |
| "logps/chosen": -43.00727081298828, | |
| "logps/rejected": -44.83103942871094, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.1715678144573758e-07, | |
| "rewards/margins": 4.3453786929603666e-05, | |
| "rewards/rejected": -4.367092333268374e-05, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.043227665706051875, | |
| "grad_norm": 3.931988477706909, | |
| "learning_rate": 7.194244604316546e-09, | |
| "logits/chosen": -1.567317247390747, | |
| "logits/rejected": -1.5605990886688232, | |
| "logps/chosen": -50.6888542175293, | |
| "logps/rejected": -52.02692794799805, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -2.2426731447922066e-05, | |
| "rewards/margins": 3.057774301851168e-05, | |
| "rewards/rejected": -5.3004478104412556e-05, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05043227665706052, | |
| "grad_norm": 2.3015387058258057, | |
| "learning_rate": 8.393285371702639e-09, | |
| "logits/chosen": -1.5361818075180054, | |
| "logits/rejected": -1.528193473815918, | |
| "logps/chosen": -50.07262420654297, | |
| "logps/rejected": -52.7786750793457, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -4.649541006074287e-05, | |
| "rewards/margins": -4.221068138576811e-06, | |
| "rewards/rejected": -4.2274350562365726e-05, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05763688760806916, | |
| "grad_norm": 3.439854145050049, | |
| "learning_rate": 9.592326139088728e-09, | |
| "logits/chosen": -1.5699741840362549, | |
| "logits/rejected": -1.562044382095337, | |
| "logps/chosen": -51.11206817626953, | |
| "logps/rejected": -52.69794464111328, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -4.887674094788963e-06, | |
| "rewards/margins": 0.0001481349318055436, | |
| "rewards/rejected": -0.00015302257088478655, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06484149855907781, | |
| "grad_norm": 2.781512975692749, | |
| "learning_rate": 1.0791366906474819e-08, | |
| "logits/chosen": -1.5034945011138916, | |
| "logits/rejected": -1.500799536705017, | |
| "logps/chosen": -49.011016845703125, | |
| "logps/rejected": -51.1546516418457, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 5.785684334114194e-05, | |
| "rewards/margins": -0.00010168743028771132, | |
| "rewards/rejected": 0.0001595442445250228, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07204610951008646, | |
| "grad_norm": 3.0006866455078125, | |
| "learning_rate": 1.1990407673860912e-08, | |
| "logits/chosen": -1.584449052810669, | |
| "logits/rejected": -1.5737826824188232, | |
| "logps/chosen": -45.691627502441406, | |
| "logps/rejected": -48.739620208740234, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -1.0443580322316848e-05, | |
| "rewards/margins": -0.00019995025650132447, | |
| "rewards/rejected": 0.00018950665253214538, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0792507204610951, | |
| "grad_norm": 2.3066394329071045, | |
| "learning_rate": 1.3189448441247003e-08, | |
| "logits/chosen": -1.4547593593597412, | |
| "logits/rejected": -1.4310551881790161, | |
| "logps/chosen": -48.961212158203125, | |
| "logps/rejected": -51.16516876220703, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.3687500059604645, | |
| "rewards/chosen": -0.0002893832861445844, | |
| "rewards/margins": -0.00031176736229099333, | |
| "rewards/rejected": 2.2384085241355933e-05, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08645533141210375, | |
| "grad_norm": 2.296614408493042, | |
| "learning_rate": 1.4388489208633092e-08, | |
| "logits/chosen": -1.4833275079727173, | |
| "logits/rejected": -1.480101227760315, | |
| "logps/chosen": -44.263572692871094, | |
| "logps/rejected": -46.608760833740234, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.0001023497898131609, | |
| "rewards/margins": -1.7168078557006083e-05, | |
| "rewards/rejected": -8.518168760929257e-05, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0936599423631124, | |
| "grad_norm": 3.2075729370117188, | |
| "learning_rate": 1.5587529976019183e-08, | |
| "logits/chosen": -1.5763187408447266, | |
| "logits/rejected": -1.5709102153778076, | |
| "logps/chosen": -49.41350555419922, | |
| "logps/rejected": -51.297760009765625, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -7.49648897908628e-05, | |
| "rewards/margins": -0.00010434426076244563, | |
| "rewards/rejected": 2.9379374609561637e-05, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10086455331412104, | |
| "grad_norm": 2.821803092956543, | |
| "learning_rate": 1.6786570743405277e-08, | |
| "logits/chosen": -1.4521141052246094, | |
| "logits/rejected": -1.442638874053955, | |
| "logps/chosen": -45.84361267089844, | |
| "logps/rejected": -50.25464630126953, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 3.1858147849561647e-05, | |
| "rewards/margins": 0.00010751285299193114, | |
| "rewards/rejected": -7.56547087803483e-05, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10806916426512968, | |
| "grad_norm": 3.5821127891540527, | |
| "learning_rate": 1.7985611510791365e-08, | |
| "logits/chosen": -1.471673607826233, | |
| "logits/rejected": -1.464890956878662, | |
| "logps/chosen": -48.29644775390625, | |
| "logps/rejected": -51.45283889770508, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 3.749009920284152e-05, | |
| "rewards/margins": 0.00023978715762495995, | |
| "rewards/rejected": -0.0002022970438702032, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11527377521613832, | |
| "grad_norm": 2.457958459854126, | |
| "learning_rate": 1.9184652278177456e-08, | |
| "logits/chosen": -1.5046113729476929, | |
| "logits/rejected": -1.4862873554229736, | |
| "logps/chosen": -41.24737548828125, | |
| "logps/rejected": -44.58059310913086, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": 7.707150507485494e-05, | |
| "rewards/margins": -9.014904935611412e-05, | |
| "rewards/rejected": 0.00016722058353479952, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12247838616714697, | |
| "grad_norm": 3.1706480979919434, | |
| "learning_rate": 2.038369304556355e-08, | |
| "logits/chosen": -1.5169397592544556, | |
| "logits/rejected": -1.4981354475021362, | |
| "logps/chosen": -44.85956954956055, | |
| "logps/rejected": -46.871952056884766, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -6.305011629592627e-05, | |
| "rewards/margins": -7.952237501740456e-05, | |
| "rewards/rejected": 1.6472253264510073e-05, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.12968299711815562, | |
| "grad_norm": 2.6257331371307373, | |
| "learning_rate": 2.1582733812949638e-08, | |
| "logits/chosen": -1.5798842906951904, | |
| "logits/rejected": -1.5681228637695312, | |
| "logps/chosen": -45.10202407836914, | |
| "logps/rejected": -46.846622467041016, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -9.954520646715537e-05, | |
| "rewards/margins": -8.836419874569401e-05, | |
| "rewards/rejected": -1.1181010449945461e-05, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.13688760806916425, | |
| "grad_norm": 2.9169774055480957, | |
| "learning_rate": 2.278177458033573e-08, | |
| "logits/chosen": -1.5893226861953735, | |
| "logits/rejected": -1.5855244398117065, | |
| "logps/chosen": -42.26114273071289, | |
| "logps/rejected": -45.406829833984375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 6.678106728941202e-06, | |
| "rewards/margins": 2.9820144845871255e-05, | |
| "rewards/rejected": -2.3142014470067807e-05, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1440922190201729, | |
| "grad_norm": 3.5420353412628174, | |
| "learning_rate": 2.3980815347721823e-08, | |
| "logits/chosen": -1.5361340045928955, | |
| "logits/rejected": -1.5295995473861694, | |
| "logps/chosen": -43.47291946411133, | |
| "logps/rejected": -47.111000061035156, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": 0.0001089770594262518, | |
| "rewards/margins": 1.1494899808894843e-05, | |
| "rewards/rejected": 9.748217416927218e-05, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15129682997118155, | |
| "grad_norm": 3.056239128112793, | |
| "learning_rate": 2.517985611510791e-08, | |
| "logits/chosen": -1.5651589632034302, | |
| "logits/rejected": -1.5538842678070068, | |
| "logps/chosen": -43.047855377197266, | |
| "logps/rejected": -43.400611877441406, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.00010786174971144646, | |
| "rewards/margins": -0.00011335410817991942, | |
| "rewards/rejected": 5.4923671086726245e-06, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1585014409221902, | |
| "grad_norm": 2.8404786586761475, | |
| "learning_rate": 2.6378896882494006e-08, | |
| "logits/chosen": -1.4806840419769287, | |
| "logits/rejected": -1.4740344285964966, | |
| "logps/chosen": -47.371559143066406, | |
| "logps/rejected": -52.52393341064453, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.00014377260231412947, | |
| "rewards/margins": -0.00026818824699148536, | |
| "rewards/rejected": 0.00012441558646969497, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.16570605187319884, | |
| "grad_norm": 2.566518545150757, | |
| "learning_rate": 2.7577937649880097e-08, | |
| "logits/chosen": -1.5321465730667114, | |
| "logits/rejected": -1.5307929515838623, | |
| "logps/chosen": -44.468902587890625, | |
| "logps/rejected": -48.26350021362305, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 9.540587598166894e-06, | |
| "rewards/margins": 0.00015555776190012693, | |
| "rewards/rejected": -0.00014601717703044415, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1729106628242075, | |
| "grad_norm": 2.9977493286132812, | |
| "learning_rate": 2.8776978417266184e-08, | |
| "logits/chosen": -1.579411268234253, | |
| "logits/rejected": -1.5686959028244019, | |
| "logps/chosen": -49.147926330566406, | |
| "logps/rejected": -51.22269821166992, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -6.021944136591628e-05, | |
| "rewards/margins": 0.00012635164603125304, | |
| "rewards/rejected": -0.00018657106556929648, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.18011527377521613, | |
| "grad_norm": 4.014063835144043, | |
| "learning_rate": 2.997601918465228e-08, | |
| "logits/chosen": -1.4550889730453491, | |
| "logits/rejected": -1.4445072412490845, | |
| "logps/chosen": -49.6921501159668, | |
| "logps/rejected": -50.77552795410156, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.0001581499818712473, | |
| "rewards/margins": 0.0003015203692484647, | |
| "rewards/rejected": -0.0001433703873772174, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1873198847262248, | |
| "grad_norm": 3.186095952987671, | |
| "learning_rate": 3.1175059952038366e-08, | |
| "logits/chosen": -1.495941162109375, | |
| "logits/rejected": -1.4901201725006104, | |
| "logps/chosen": -49.999717712402344, | |
| "logps/rejected": -51.457984924316406, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 9.421829599887133e-05, | |
| "rewards/margins": 0.0001734937832225114, | |
| "rewards/rejected": -7.927548722364008e-05, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.19452449567723343, | |
| "grad_norm": 3.3234660625457764, | |
| "learning_rate": 3.237410071942446e-08, | |
| "logits/chosen": -1.5844049453735352, | |
| "logits/rejected": -1.5691239833831787, | |
| "logps/chosen": -46.7442512512207, | |
| "logps/rejected": -49.30167007446289, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": 0.0002405096747679636, | |
| "rewards/margins": 1.5671601431677118e-05, | |
| "rewards/rejected": 0.00022483807697426528, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2017291066282421, | |
| "grad_norm": 2.917389154434204, | |
| "learning_rate": 3.3573141486810555e-08, | |
| "logits/chosen": -1.5494643449783325, | |
| "logits/rejected": -1.533740758895874, | |
| "logps/chosen": -42.46552276611328, | |
| "logps/rejected": -43.715232849121094, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 5.060394687461667e-05, | |
| "rewards/margins": 1.1760695088014472e-05, | |
| "rewards/rejected": 3.884324905811809e-05, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.20893371757925072, | |
| "grad_norm": 2.7840898036956787, | |
| "learning_rate": 3.477218225419664e-08, | |
| "logits/chosen": -1.579872727394104, | |
| "logits/rejected": -1.5693047046661377, | |
| "logps/chosen": -44.499839782714844, | |
| "logps/rejected": -45.791568756103516, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.00012782707926817238, | |
| "rewards/margins": 0.00019142820383422077, | |
| "rewards/rejected": -6.360108818626031e-05, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.21613832853025935, | |
| "grad_norm": 3.146667003631592, | |
| "learning_rate": 3.597122302158273e-08, | |
| "logits/chosen": -1.5557529926300049, | |
| "logits/rejected": -1.5390459299087524, | |
| "logps/chosen": -48.216209411621094, | |
| "logps/rejected": -50.678340911865234, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.00012882667942903936, | |
| "rewards/margins": -0.00033005099976435304, | |
| "rewards/rejected": 0.00020122430578339845, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22334293948126802, | |
| "grad_norm": 3.016376495361328, | |
| "learning_rate": 3.717026378896883e-08, | |
| "logits/chosen": -1.4412884712219238, | |
| "logits/rejected": -1.431753158569336, | |
| "logps/chosen": -48.03168487548828, | |
| "logps/rejected": -50.060646057128906, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 9.238120401278138e-05, | |
| "rewards/margins": 7.04801277606748e-05, | |
| "rewards/rejected": 2.1901068976148963e-05, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.23054755043227665, | |
| "grad_norm": 2.544971466064453, | |
| "learning_rate": 3.836930455635491e-08, | |
| "logits/chosen": -1.5533429384231567, | |
| "logits/rejected": -1.53842031955719, | |
| "logps/chosen": -47.08930206298828, | |
| "logps/rejected": -51.531005859375, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.00025622855173423886, | |
| "rewards/margins": -9.28807639866136e-05, | |
| "rewards/rejected": 0.0003491092938929796, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2377521613832853, | |
| "grad_norm": 2.311227798461914, | |
| "learning_rate": 3.9568345323741003e-08, | |
| "logits/chosen": -1.5248558521270752, | |
| "logits/rejected": -1.5185081958770752, | |
| "logps/chosen": -50.7647819519043, | |
| "logps/rejected": -49.56135177612305, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -8.110304770525545e-05, | |
| "rewards/margins": -0.00011024585546692833, | |
| "rewards/rejected": 2.914278593380004e-05, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.24495677233429394, | |
| "grad_norm": 2.6722328662872314, | |
| "learning_rate": 4.07673860911271e-08, | |
| "logits/chosen": -1.5785841941833496, | |
| "logits/rejected": -1.5681426525115967, | |
| "logps/chosen": -51.070430755615234, | |
| "logps/rejected": -52.11237335205078, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0002661117759998888, | |
| "rewards/margins": 0.0003887194616254419, | |
| "rewards/rejected": -0.00012260770017746836, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2521613832853026, | |
| "grad_norm": 3.674198865890503, | |
| "learning_rate": 4.1966426858513185e-08, | |
| "logits/chosen": -1.5093214511871338, | |
| "logits/rejected": -1.5045934915542603, | |
| "logps/chosen": -45.63981628417969, | |
| "logps/rejected": -48.58732986450195, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.0002857402723748237, | |
| "rewards/margins": 0.00017633094103075564, | |
| "rewards/rejected": 0.00010940933134406805, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.25936599423631124, | |
| "grad_norm": 3.499232053756714, | |
| "learning_rate": 4.3165467625899276e-08, | |
| "logits/chosen": -1.5016661882400513, | |
| "logits/rejected": -1.4912707805633545, | |
| "logps/chosen": -53.919525146484375, | |
| "logps/rejected": -56.54018020629883, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.00013425681390799582, | |
| "rewards/margins": -2.2593365429202095e-05, | |
| "rewards/rejected": 0.0001568501756992191, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2665706051873199, | |
| "grad_norm": 3.743687152862549, | |
| "learning_rate": 4.4364508393285374e-08, | |
| "logits/chosen": -1.4821672439575195, | |
| "logits/rejected": -1.4804311990737915, | |
| "logps/chosen": -48.30991744995117, | |
| "logps/rejected": -53.03217697143555, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.00033552644890733063, | |
| "rewards/margins": 0.0004464868106879294, | |
| "rewards/rejected": -0.00011096037633251399, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2737752161383285, | |
| "grad_norm": 2.4759514331817627, | |
| "learning_rate": 4.556354916067146e-08, | |
| "logits/chosen": -1.5723472833633423, | |
| "logits/rejected": -1.5651805400848389, | |
| "logps/chosen": -46.992698669433594, | |
| "logps/rejected": -48.03093719482422, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 7.932665903354064e-05, | |
| "rewards/margins": -7.613336492795497e-05, | |
| "rewards/rejected": 0.00015546004578936845, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.28097982708933716, | |
| "grad_norm": 2.923274040222168, | |
| "learning_rate": 4.676258992805755e-08, | |
| "logits/chosen": -1.544915795326233, | |
| "logits/rejected": -1.535043716430664, | |
| "logps/chosen": -48.17082595825195, | |
| "logps/rejected": -51.49504470825195, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.000253693520789966, | |
| "rewards/margins": 0.0001750610099406913, | |
| "rewards/rejected": 7.86325108492747e-05, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2881844380403458, | |
| "grad_norm": 3.412151575088501, | |
| "learning_rate": 4.796163069544365e-08, | |
| "logits/chosen": -1.5585193634033203, | |
| "logits/rejected": -1.553504467010498, | |
| "logps/chosen": -44.887977600097656, | |
| "logps/rejected": -46.11194610595703, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.00015390969929285347, | |
| "rewards/margins": -2.5745690436451696e-05, | |
| "rewards/rejected": 0.00017965536972042173, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2953890489913545, | |
| "grad_norm": 3.6315953731536865, | |
| "learning_rate": 4.916067146282973e-08, | |
| "logits/chosen": -1.509155511856079, | |
| "logits/rejected": -1.5060787200927734, | |
| "logps/chosen": -47.50922393798828, | |
| "logps/rejected": -49.60902786254883, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": 9.884338214760646e-05, | |
| "rewards/margins": -4.629031172953546e-05, | |
| "rewards/rejected": 0.00014513370115309954, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3025936599423631, | |
| "grad_norm": 2.4514944553375244, | |
| "learning_rate": 4.999992091672379e-08, | |
| "logits/chosen": -1.4691174030303955, | |
| "logits/rejected": -1.4793603420257568, | |
| "logps/chosen": -45.651798248291016, | |
| "logps/rejected": -48.96102523803711, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": 0.00014277843001764268, | |
| "rewards/margins": 2.968159424199257e-05, | |
| "rewards/rejected": 0.00011309684487059712, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.30979827089337175, | |
| "grad_norm": 2.288839817047119, | |
| "learning_rate": 4.999851500573209e-08, | |
| "logits/chosen": -1.497201681137085, | |
| "logits/rejected": -1.4975249767303467, | |
| "logps/chosen": -46.066871643066406, | |
| "logps/rejected": -46.192413330078125, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.00031205304549075663, | |
| "rewards/margins": 4.402028935146518e-05, | |
| "rewards/rejected": 0.00026803271612152457, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3170028818443804, | |
| "grad_norm": 2.4444422721862793, | |
| "learning_rate": 4.999535180235972e-08, | |
| "logits/chosen": -1.4983166456222534, | |
| "logits/rejected": -1.4901825189590454, | |
| "logps/chosen": -46.0124397277832, | |
| "logps/rejected": -49.43706512451172, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.000347124005202204, | |
| "rewards/margins": 0.00018896095571108162, | |
| "rewards/rejected": 0.0001581630203872919, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3242074927953891, | |
| "grad_norm": 3.173124313354492, | |
| "learning_rate": 4.9990431528966836e-08, | |
| "logits/chosen": -1.5115959644317627, | |
| "logits/rejected": -1.4905065298080444, | |
| "logps/chosen": -53.1868896484375, | |
| "logps/rejected": -51.287513732910156, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.00036136750713922083, | |
| "rewards/margins": 0.00031076278537511826, | |
| "rewards/rejected": 5.0604761781869456e-05, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3314121037463977, | |
| "grad_norm": 3.7471864223480225, | |
| "learning_rate": 4.9983754531428326e-08, | |
| "logits/chosen": -1.5160473585128784, | |
| "logits/rejected": -1.4985129833221436, | |
| "logps/chosen": -53.772499084472656, | |
| "logps/rejected": -55.6755256652832, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0006765121361240745, | |
| "rewards/margins": 0.0007202303386293352, | |
| "rewards/rejected": -4.371829709270969e-05, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.33861671469740634, | |
| "grad_norm": 3.7323288917541504, | |
| "learning_rate": 4.997532127910954e-08, | |
| "logits/chosen": -1.5780543088912964, | |
| "logits/rejected": -1.5486719608306885, | |
| "logps/chosen": -52.630889892578125, | |
| "logps/rejected": -53.20234298706055, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.0005277583841234446, | |
| "rewards/margins": 0.00045975102693773806, | |
| "rewards/rejected": 6.800738628953695e-05, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.345821325648415, | |
| "grad_norm": 3.6508657932281494, | |
| "learning_rate": 4.996513236483331e-08, | |
| "logits/chosen": -1.6470165252685547, | |
| "logits/rejected": -1.6330372095108032, | |
| "logps/chosen": -42.519134521484375, | |
| "logps/rejected": -45.442176818847656, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0003384821757208556, | |
| "rewards/margins": 0.00045337239862419665, | |
| "rewards/rejected": -0.00011489020835142583, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3530259365994236, | |
| "grad_norm": 4.240896701812744, | |
| "learning_rate": 4.9953188504838225e-08, | |
| "logits/chosen": -1.5246374607086182, | |
| "logits/rejected": -1.5129501819610596, | |
| "logps/chosen": -46.44121170043945, | |
| "logps/rejected": -49.50823974609375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.00048602544120512903, | |
| "rewards/margins": 0.00015522187459282577, | |
| "rewards/rejected": 0.0003308035375084728, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.36023054755043227, | |
| "grad_norm": 2.8267297744750977, | |
| "learning_rate": 4.993949053872834e-08, | |
| "logits/chosen": -1.5284509658813477, | |
| "logits/rejected": -1.5051023960113525, | |
| "logps/chosen": -42.638057708740234, | |
| "logps/rejected": -45.888179779052734, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.0007548181456513703, | |
| "rewards/margins": 0.0005205943016335368, | |
| "rewards/rejected": 0.0002342238585697487, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.36743515850144093, | |
| "grad_norm": 2.894747257232666, | |
| "learning_rate": 4.9924039429414086e-08, | |
| "logits/chosen": -1.6394548416137695, | |
| "logits/rejected": -1.6217561960220337, | |
| "logps/chosen": -45.99773025512695, | |
| "logps/rejected": -47.97636795043945, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.000765104778110981, | |
| "rewards/margins": 0.0005781695945188403, | |
| "rewards/rejected": 0.000186935139936395, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3746397694524496, | |
| "grad_norm": 3.5549099445343018, | |
| "learning_rate": 4.990683626304467e-08, | |
| "logits/chosen": -1.5345748662948608, | |
| "logits/rejected": -1.5293605327606201, | |
| "logps/chosen": -53.918365478515625, | |
| "logps/rejected": -56.037811279296875, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 0.00044068036368116736, | |
| "rewards/margins": 0.00020219176076352596, | |
| "rewards/rejected": 0.00023848857381381094, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3818443804034582, | |
| "grad_norm": 3.221543788909912, | |
| "learning_rate": 4.9887882248931646e-08, | |
| "logits/chosen": -1.4587208032608032, | |
| "logits/rejected": -1.4379479885101318, | |
| "logps/chosen": -46.45323181152344, | |
| "logps/rejected": -47.54587173461914, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.0005941767594777048, | |
| "rewards/margins": 6.761190161341801e-05, | |
| "rewards/rejected": 0.0005265648360364139, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.38904899135446686, | |
| "grad_norm": 3.1585187911987305, | |
| "learning_rate": 4.986717871946393e-08, | |
| "logits/chosen": -1.4854298830032349, | |
| "logits/rejected": -1.4644863605499268, | |
| "logps/chosen": -45.88875198364258, | |
| "logps/rejected": -47.8057975769043, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.0008031625184230506, | |
| "rewards/margins": 0.0005653280531987548, | |
| "rewards/rejected": 0.00023783447977621108, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3962536023054755, | |
| "grad_norm": 3.0785837173461914, | |
| "learning_rate": 4.984472713001416e-08, | |
| "logits/chosen": -1.4299240112304688, | |
| "logits/rejected": -1.4215822219848633, | |
| "logps/chosen": -48.3651008605957, | |
| "logps/rejected": -48.36215591430664, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0007953125168569386, | |
| "rewards/margins": 0.000552699959371239, | |
| "rewards/rejected": 0.0002426125865895301, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4034582132564842, | |
| "grad_norm": 3.2375829219818115, | |
| "learning_rate": 4.982052905883637e-08, | |
| "logits/chosen": -1.5734624862670898, | |
| "logits/rejected": -1.5630805492401123, | |
| "logps/chosen": -48.487159729003906, | |
| "logps/rejected": -49.93968200683594, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0006789817707613111, | |
| "rewards/margins": 0.0005560799618251622, | |
| "rewards/rejected": 0.00012290175072848797, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4106628242074928, | |
| "grad_norm": 2.9218199253082275, | |
| "learning_rate": 4.979458620695505e-08, | |
| "logits/chosen": -1.552829384803772, | |
| "logits/rejected": -1.523561716079712, | |
| "logps/chosen": -52.486000061035156, | |
| "logps/rejected": -54.49695587158203, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0009576571173965931, | |
| "rewards/margins": 0.0007622435805387795, | |
| "rewards/rejected": 0.00019541350775398314, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.41786743515850144, | |
| "grad_norm": 3.183899164199829, | |
| "learning_rate": 4.976690039804555e-08, | |
| "logits/chosen": -1.5768444538116455, | |
| "logits/rejected": -1.563186764717102, | |
| "logps/chosen": -42.62350845336914, | |
| "logps/rejected": -44.09015655517578, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.000733331311494112, | |
| "rewards/margins": 0.0006372180068865418, | |
| "rewards/rejected": 9.611332643544301e-05, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4250720461095101, | |
| "grad_norm": 2.7339372634887695, | |
| "learning_rate": 4.973747357830592e-08, | |
| "logits/chosen": -1.527267336845398, | |
| "logits/rejected": -1.525882601737976, | |
| "logps/chosen": -47.51892852783203, | |
| "logps/rejected": -53.13426971435547, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0010354900732636452, | |
| "rewards/margins": 0.0007501026266254485, | |
| "rewards/rejected": 0.0002853873302228749, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4322766570605187, | |
| "grad_norm": 2.791902780532837, | |
| "learning_rate": 4.970630781632009e-08, | |
| "logits/chosen": -1.6299854516983032, | |
| "logits/rejected": -1.619668960571289, | |
| "logps/chosen": -45.41339874267578, | |
| "logps/rejected": -49.07583236694336, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0008391918381676078, | |
| "rewards/margins": 0.0007512712036259472, | |
| "rewards/rejected": 8.792075095698237e-05, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43948126801152737, | |
| "grad_norm": 3.9196627140045166, | |
| "learning_rate": 4.967340530291242e-08, | |
| "logits/chosen": -1.534325361251831, | |
| "logits/rejected": -1.5173766613006592, | |
| "logps/chosen": -50.456092834472656, | |
| "logps/rejected": -51.076194763183594, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0006875869585201144, | |
| "rewards/margins": 0.00029638074920512736, | |
| "rewards/rejected": 0.0003912062384188175, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.44668587896253603, | |
| "grad_norm": 2.6910152435302734, | |
| "learning_rate": 4.9638768350993755e-08, | |
| "logits/chosen": -1.5682487487792969, | |
| "logits/rejected": -1.5539703369140625, | |
| "logps/chosen": -42.393123626708984, | |
| "logps/rejected": -44.458831787109375, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0009301385725848377, | |
| "rewards/margins": 0.0005425423150882125, | |
| "rewards/rejected": 0.00038759634480811656, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4538904899135447, | |
| "grad_norm": 2.3632237911224365, | |
| "learning_rate": 4.9602399395398786e-08, | |
| "logits/chosen": -1.5710407495498657, | |
| "logits/rejected": -1.5638505220413208, | |
| "logps/chosen": -43.065277099609375, | |
| "logps/rejected": -46.55408477783203, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0009118100861087441, | |
| "rewards/margins": 0.0007094824686646461, | |
| "rewards/rejected": 0.00020232764654792845, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4610951008645533, | |
| "grad_norm": 2.9160289764404297, | |
| "learning_rate": 4.9564300992714914e-08, | |
| "logits/chosen": -1.42928946018219, | |
| "logits/rejected": -1.4244521856307983, | |
| "logps/chosen": -45.373146057128906, | |
| "logps/rejected": -48.03164291381836, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.001190593116916716, | |
| "rewards/margins": 0.001122686779126525, | |
| "rewards/rejected": 6.790638872189447e-05, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.46829971181556196, | |
| "grad_norm": 3.4644691944122314, | |
| "learning_rate": 4.952447582110253e-08, | |
| "logits/chosen": -1.6130802631378174, | |
| "logits/rejected": -1.5843290090560913, | |
| "logps/chosen": -45.46699905395508, | |
| "logps/rejected": -45.392459869384766, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0012219400377944112, | |
| "rewards/margins": 0.0004954574396833777, | |
| "rewards/rejected": 0.0007264827145263553, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4755043227665706, | |
| "grad_norm": 3.4215919971466064, | |
| "learning_rate": 4.948292668010676e-08, | |
| "logits/chosen": -1.5425606966018677, | |
| "logits/rejected": -1.5405076742172241, | |
| "logps/chosen": -47.15483474731445, | |
| "logps/rejected": -50.021751403808594, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0011111641069874167, | |
| "rewards/margins": 0.000992415938526392, | |
| "rewards/rejected": 0.00011874813208123669, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4827089337175792, | |
| "grad_norm": 3.4226503372192383, | |
| "learning_rate": 4.943965649046064e-08, | |
| "logits/chosen": -1.501985788345337, | |
| "logits/rejected": -1.4745370149612427, | |
| "logps/chosen": -49.8314323425293, | |
| "logps/rejected": -51.143699645996094, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 0.0009843518491834402, | |
| "rewards/margins": 0.00042856420623138547, | |
| "rewards/rejected": 0.0005557876429520547, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4899135446685879, | |
| "grad_norm": 4.60683536529541, | |
| "learning_rate": 4.9394668293879835e-08, | |
| "logits/chosen": -1.4445635080337524, | |
| "logits/rejected": -1.4311821460723877, | |
| "logps/chosen": -49.72310256958008, | |
| "logps/rejected": -49.60923767089844, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0016157480422407389, | |
| "rewards/margins": 0.0012294099433347583, | |
| "rewards/rejected": 0.00038633812800981104, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.49711815561959655, | |
| "grad_norm": 3.328009605407715, | |
| "learning_rate": 4.93479652528488e-08, | |
| "logits/chosen": -1.5311024188995361, | |
| "logits/rejected": -1.5204837322235107, | |
| "logps/chosen": -47.83661651611328, | |
| "logps/rejected": -50.61973190307617, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0011463447008281946, | |
| "rewards/margins": 0.0007241896819323301, | |
| "rewards/rejected": 0.0004221551935188472, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5043227665706052, | |
| "grad_norm": 2.7923624515533447, | |
| "learning_rate": 4.929955065039848e-08, | |
| "logits/chosen": -1.544571042060852, | |
| "logits/rejected": -1.5311849117279053, | |
| "logps/chosen": -46.463233947753906, | |
| "logps/rejected": -49.25968933105469, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0013559869257733226, | |
| "rewards/margins": 0.00096817099256441, | |
| "rewards/rejected": 0.00038781590410508215, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5115273775216138, | |
| "grad_norm": 2.8159823417663574, | |
| "learning_rate": 4.92494278898755e-08, | |
| "logits/chosen": -1.5252270698547363, | |
| "logits/rejected": -1.509064793586731, | |
| "logps/chosen": -41.342586517333984, | |
| "logps/rejected": -43.392704010009766, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0013612203765660524, | |
| "rewards/margins": 0.000969057553447783, | |
| "rewards/rejected": 0.0003921627067029476, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5187319884726225, | |
| "grad_norm": 3.322333574295044, | |
| "learning_rate": 4.9197600494702955e-08, | |
| "logits/chosen": -1.4962725639343262, | |
| "logits/rejected": -1.4811670780181885, | |
| "logps/chosen": -49.289588928222656, | |
| "logps/rejected": -52.451210021972656, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.0014330081176012754, | |
| "rewards/margins": 0.0010211131302639842, | |
| "rewards/rejected": 0.00041189510375261307, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5259365994236311, | |
| "grad_norm": 2.875361442565918, | |
| "learning_rate": 4.9144072108132725e-08, | |
| "logits/chosen": -1.5103423595428467, | |
| "logits/rejected": -1.490912675857544, | |
| "logps/chosen": -48.928245544433594, | |
| "logps/rejected": -51.06798553466797, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0015080540906637907, | |
| "rewards/margins": 0.000958929886110127, | |
| "rewards/rejected": 0.0005491242045536637, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5331412103746398, | |
| "grad_norm": 2.9019076824188232, | |
| "learning_rate": 4.908884649298937e-08, | |
| "logits/chosen": -1.5038750171661377, | |
| "logits/rejected": -1.5000956058502197, | |
| "logps/chosen": -46.73221206665039, | |
| "logps/rejected": -46.28728485107422, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0010901311179623008, | |
| "rewards/margins": 0.0005718341562896967, | |
| "rewards/rejected": 0.0005182969616726041, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5403458213256485, | |
| "grad_norm": 2.882866859436035, | |
| "learning_rate": 4.903192753140557e-08, | |
| "logits/chosen": -1.5271108150482178, | |
| "logits/rejected": -1.510434865951538, | |
| "logps/chosen": -48.88886260986328, | |
| "logps/rejected": -50.089317321777344, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0017092444468289614, | |
| "rewards/margins": 0.001493553863838315, | |
| "rewards/rejected": 0.00021569046657532454, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.547550432276657, | |
| "grad_norm": 3.3220198154449463, | |
| "learning_rate": 4.897331922454931e-08, | |
| "logits/chosen": -1.4528629779815674, | |
| "logits/rejected": -1.4513108730316162, | |
| "logps/chosen": -45.530757904052734, | |
| "logps/rejected": -48.61653518676758, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.0014288409147411585, | |
| "rewards/margins": 0.0009447381016798317, | |
| "rewards/rejected": 0.00048410287126898766, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5547550432276657, | |
| "grad_norm": 3.242431640625, | |
| "learning_rate": 4.891302569234256e-08, | |
| "logits/chosen": -1.4737539291381836, | |
| "logits/rejected": -1.4677354097366333, | |
| "logps/chosen": -43.22222137451172, | |
| "logps/rejected": -45.910953521728516, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.0018933138344436884, | |
| "rewards/margins": 0.0016789849614724517, | |
| "rewards/rejected": 0.0002143288729712367, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5619596541786743, | |
| "grad_norm": 2.821153163909912, | |
| "learning_rate": 4.8851051173171656e-08, | |
| "logits/chosen": -1.4989246129989624, | |
| "logits/rejected": -1.4895321130752563, | |
| "logps/chosen": -48.4155387878418, | |
| "logps/rejected": -50.1673583984375, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0015832766657695174, | |
| "rewards/margins": 0.0008711686241440475, | |
| "rewards/rejected": 0.0007121083326637745, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.569164265129683, | |
| "grad_norm": 2.891462802886963, | |
| "learning_rate": 4.87874000235894e-08, | |
| "logits/chosen": -1.5470997095108032, | |
| "logits/rejected": -1.5371620655059814, | |
| "logps/chosen": -49.890480041503906, | |
| "logps/rejected": -53.466880798339844, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0018944181501865387, | |
| "rewards/margins": 0.001705197966657579, | |
| "rewards/rejected": 0.00018922006711363792, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5763688760806917, | |
| "grad_norm": 3.3358006477355957, | |
| "learning_rate": 4.872207671800876e-08, | |
| "logits/chosen": -1.5252196788787842, | |
| "logits/rejected": -1.5138275623321533, | |
| "logps/chosen": -46.89189910888672, | |
| "logps/rejected": -47.887367248535156, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.0016973629826679826, | |
| "rewards/margins": 0.0011269793612882495, | |
| "rewards/rejected": 0.0005703835631720722, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5835734870317003, | |
| "grad_norm": 2.7009220123291016, | |
| "learning_rate": 4.865508584838841e-08, | |
| "logits/chosen": -1.5175960063934326, | |
| "logits/rejected": -1.5209373235702515, | |
| "logps/chosen": -44.73839569091797, | |
| "logps/rejected": -47.89008712768555, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0014606156619265676, | |
| "rewards/margins": 0.0012506326893344522, | |
| "rewards/rejected": 0.00020998305990360677, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.590778097982709, | |
| "grad_norm": 2.719752788543701, | |
| "learning_rate": 4.858643212390985e-08, | |
| "logits/chosen": -1.552513837814331, | |
| "logits/rejected": -1.5306655168533325, | |
| "logps/chosen": -46.937049865722656, | |
| "logps/rejected": -47.570526123046875, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0019067225512117147, | |
| "rewards/margins": 0.0016014000866562128, | |
| "rewards/rejected": 0.0003053225518669933, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5979827089337176, | |
| "grad_norm": 2.6290316581726074, | |
| "learning_rate": 4.851612037064643e-08, | |
| "logits/chosen": -1.5101115703582764, | |
| "logits/rejected": -1.5031507015228271, | |
| "logps/chosen": -41.800697326660156, | |
| "logps/rejected": -44.64197540283203, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.002227164339274168, | |
| "rewards/margins": 0.0015375663060694933, | |
| "rewards/rejected": 0.0006895981496199965, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6051873198847262, | |
| "grad_norm": 2.2590839862823486, | |
| "learning_rate": 4.8444155531224065e-08, | |
| "logits/chosen": -1.5194236040115356, | |
| "logits/rejected": -1.5119448900222778, | |
| "logps/chosen": -47.19294357299805, | |
| "logps/rejected": -47.494895935058594, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0023757501039654016, | |
| "rewards/margins": 0.0018679177155718207, | |
| "rewards/rejected": 0.0005078322719782591, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6123919308357348, | |
| "grad_norm": 3.8300580978393555, | |
| "learning_rate": 4.8370542664473805e-08, | |
| "logits/chosen": -1.5282552242279053, | |
| "logits/rejected": -1.5171794891357422, | |
| "logps/chosen": -47.18886947631836, | |
| "logps/rejected": -50.46569061279297, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.0021307552233338356, | |
| "rewards/margins": 0.001966592390090227, | |
| "rewards/rejected": 0.00016416283324360847, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6195965417867435, | |
| "grad_norm": 2.7995476722717285, | |
| "learning_rate": 4.829528694507624e-08, | |
| "logits/chosen": -1.5349972248077393, | |
| "logits/rejected": -1.5197311639785767, | |
| "logps/chosen": -56.855506896972656, | |
| "logps/rejected": -56.77360153198242, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.002222384326159954, | |
| "rewards/margins": 0.0018006416503340006, | |
| "rewards/rejected": 0.00042174261761829257, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6268011527377522, | |
| "grad_norm": 3.1079301834106445, | |
| "learning_rate": 4.821839366319768e-08, | |
| "logits/chosen": -1.5740025043487549, | |
| "logits/rejected": -1.563123345375061, | |
| "logps/chosen": -47.606285095214844, | |
| "logps/rejected": -50.62641906738281, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0018488764762878418, | |
| "rewards/margins": 0.001883768243715167, | |
| "rewards/rejected": -3.4891847462859005e-05, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6340057636887608, | |
| "grad_norm": 3.0539097785949707, | |
| "learning_rate": 4.813986822411833e-08, | |
| "logits/chosen": -1.5950686931610107, | |
| "logits/rejected": -1.5871632099151611, | |
| "logps/chosen": -46.467430114746094, | |
| "logps/rejected": -47.59926223754883, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0018272616434842348, | |
| "rewards/margins": 0.0014566404279321432, | |
| "rewards/rejected": 0.0003706212155520916, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6412103746397695, | |
| "grad_norm": 2.957125663757324, | |
| "learning_rate": 4.805971614785231e-08, | |
| "logits/chosen": -1.5934646129608154, | |
| "logits/rejected": -1.5831199884414673, | |
| "logps/chosen": -44.18696975708008, | |
| "logps/rejected": -45.84946823120117, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0023696511052548885, | |
| "rewards/margins": 0.001978666288778186, | |
| "rewards/rejected": 0.00039098464185371995, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6484149855907781, | |
| "grad_norm": 3.1889781951904297, | |
| "learning_rate": 4.797794306875963e-08, | |
| "logits/chosen": -1.4427862167358398, | |
| "logits/rejected": -1.4459459781646729, | |
| "logps/chosen": -52.905487060546875, | |
| "logps/rejected": -56.0252685546875, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0017143869772553444, | |
| "rewards/margins": 0.0013808589428663254, | |
| "rewards/rejected": 0.000333528034389019, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6556195965417867, | |
| "grad_norm": 3.1252031326293945, | |
| "learning_rate": 4.7894554735150076e-08, | |
| "logits/chosen": -1.4938929080963135, | |
| "logits/rejected": -1.486290693283081, | |
| "logps/chosen": -50.43352508544922, | |
| "logps/rejected": -51.920677185058594, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0016551424050703645, | |
| "rewards/margins": 0.0010273735970258713, | |
| "rewards/rejected": 0.0006277688080444932, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6628242074927954, | |
| "grad_norm": 2.4982540607452393, | |
| "learning_rate": 4.7809557008879185e-08, | |
| "logits/chosen": -1.5264360904693604, | |
| "logits/rejected": -1.514701247215271, | |
| "logps/chosen": -42.05514907836914, | |
| "logps/rejected": -43.98528289794922, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.002506498945876956, | |
| "rewards/margins": 0.0023819494526833296, | |
| "rewards/rejected": 0.00012454968236852437, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.670028818443804, | |
| "grad_norm": 3.3386528491973877, | |
| "learning_rate": 4.772295586493613e-08, | |
| "logits/chosen": -1.5923378467559814, | |
| "logits/rejected": -1.5789897441864014, | |
| "logps/chosen": -46.37192916870117, | |
| "logps/rejected": -48.806602478027344, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.002459385199472308, | |
| "rewards/margins": 0.001840058364905417, | |
| "rewards/rejected": 0.0006193270673975348, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6772334293948127, | |
| "grad_norm": 2.304319143295288, | |
| "learning_rate": 4.763475739102374e-08, | |
| "logits/chosen": -1.4732818603515625, | |
| "logits/rejected": -1.468638300895691, | |
| "logps/chosen": -54.89690017700195, | |
| "logps/rejected": -55.61511993408203, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.002316520782187581, | |
| "rewards/margins": 0.00171504239551723, | |
| "rewards/rejected": 0.0006014782702550292, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6844380403458213, | |
| "grad_norm": 2.9147212505340576, | |
| "learning_rate": 4.754496778713054e-08, | |
| "logits/chosen": -1.4295583963394165, | |
| "logits/rejected": -1.443414330482483, | |
| "logps/chosen": -46.24966049194336, | |
| "logps/rejected": -50.85737228393555, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.00283303065225482, | |
| "rewards/margins": 0.001242567435838282, | |
| "rewards/rejected": 0.0015904635656625032, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.69164265129683, | |
| "grad_norm": 2.9757370948791504, | |
| "learning_rate": 4.7453593365094926e-08, | |
| "logits/chosen": -1.5650156736373901, | |
| "logits/rejected": -1.5567947626113892, | |
| "logps/chosen": -48.92793655395508, | |
| "logps/rejected": -51.33110809326172, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.00195170973893255, | |
| "rewards/margins": 0.0016631295438855886, | |
| "rewards/rejected": 0.00028858016594313085, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6988472622478387, | |
| "grad_norm": 3.6602158546447754, | |
| "learning_rate": 4.736064054816145e-08, | |
| "logits/chosen": -1.5796701908111572, | |
| "logits/rejected": -1.571428656578064, | |
| "logps/chosen": -44.42335510253906, | |
| "logps/rejected": -47.71165084838867, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 0.003234829055145383, | |
| "rewards/margins": 0.0026938277296721935, | |
| "rewards/rejected": 0.0005410015000961721, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7060518731988472, | |
| "grad_norm": 2.7763288021087646, | |
| "learning_rate": 4.726611587052933e-08, | |
| "logits/chosen": -1.4306137561798096, | |
| "logits/rejected": -1.4332586526870728, | |
| "logps/chosen": -50.7025260925293, | |
| "logps/rejected": -55.78404998779297, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0023340326733887196, | |
| "rewards/margins": 0.0012934005353599787, | |
| "rewards/rejected": 0.0010406316723674536, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7132564841498559, | |
| "grad_norm": 3.8869450092315674, | |
| "learning_rate": 4.71700259768931e-08, | |
| "logits/chosen": -1.5388015508651733, | |
| "logits/rejected": -1.53265380859375, | |
| "logps/chosen": -50.47968673706055, | |
| "logps/rejected": -51.989112854003906, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.002496583852916956, | |
| "rewards/margins": 0.0018227867549285293, | |
| "rewards/rejected": 0.0006737969815731049, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7204610951008645, | |
| "grad_norm": 2.7834904193878174, | |
| "learning_rate": 4.707237762197549e-08, | |
| "logits/chosen": -1.5226459503173828, | |
| "logits/rejected": -1.5110045671463013, | |
| "logps/chosen": -47.024845123291016, | |
| "logps/rejected": -49.01807403564453, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.0033415439538657665, | |
| "rewards/margins": 0.0018541000317782164, | |
| "rewards/rejected": 0.0014874439220875502, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7276657060518732, | |
| "grad_norm": 3.953099250793457, | |
| "learning_rate": 4.697317767005265e-08, | |
| "logits/chosen": -1.5304441452026367, | |
| "logits/rejected": -1.518947958946228, | |
| "logps/chosen": -43.01828384399414, | |
| "logps/rejected": -44.82352066040039, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0028657042421400547, | |
| "rewards/margins": 0.0023243515752255917, | |
| "rewards/rejected": 0.0005413526087068021, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7348703170028819, | |
| "grad_norm": 2.848383665084839, | |
| "learning_rate": 4.6872433094471577e-08, | |
| "logits/chosen": -1.5489187240600586, | |
| "logits/rejected": -1.5362749099731445, | |
| "logps/chosen": -46.616783142089844, | |
| "logps/rejected": -48.41503143310547, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.002097527962177992, | |
| "rewards/margins": 0.0009780559921637177, | |
| "rewards/rejected": 0.0011194719700142741, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.7420749279538905, | |
| "grad_norm": 2.6786868572235107, | |
| "learning_rate": 4.677015097715994e-08, | |
| "logits/chosen": -1.4802360534667969, | |
| "logits/rejected": -1.472486972808838, | |
| "logps/chosen": -43.47395706176758, | |
| "logps/rejected": -46.79792785644531, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.0036659184843301773, | |
| "rewards/margins": 0.003058892907574773, | |
| "rewards/rejected": 0.0006070258095860481, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7492795389048992, | |
| "grad_norm": 2.420900583267212, | |
| "learning_rate": 4.666633850812825e-08, | |
| "logits/chosen": -1.523895502090454, | |
| "logits/rejected": -1.5075430870056152, | |
| "logps/chosen": -46.26005554199219, | |
| "logps/rejected": -48.20013427734375, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.002487817080691457, | |
| "rewards/margins": 0.002060279715806246, | |
| "rewards/rejected": 0.0004275373066775501, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7564841498559077, | |
| "grad_norm": 2.367501974105835, | |
| "learning_rate": 4.656100298496439e-08, | |
| "logits/chosen": -1.43381929397583, | |
| "logits/rejected": -1.4203495979309082, | |
| "logps/chosen": -41.25200271606445, | |
| "logps/rejected": -44.32691955566406, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.0034980247728526592, | |
| "rewards/margins": 0.002962891710922122, | |
| "rewards/rejected": 0.0005351334111765027, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7636887608069164, | |
| "grad_norm": 3.0824179649353027, | |
| "learning_rate": 4.6454151812320715e-08, | |
| "logits/chosen": -1.5102834701538086, | |
| "logits/rejected": -1.4844160079956055, | |
| "logps/chosen": -47.216556549072266, | |
| "logps/rejected": -48.71002960205078, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0031948499381542206, | |
| "rewards/margins": 0.0031615805346518755, | |
| "rewards/rejected": 3.326934529468417e-05, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.770893371757925, | |
| "grad_norm": 3.8348448276519775, | |
| "learning_rate": 4.6345792501393434e-08, | |
| "logits/chosen": -1.4996126890182495, | |
| "logits/rejected": -1.4940050840377808, | |
| "logps/chosen": -53.705413818359375, | |
| "logps/rejected": -57.729454040527344, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0037941508926451206, | |
| "rewards/margins": 0.002940600039437413, | |
| "rewards/rejected": 0.0008535509696230292, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7780979827089337, | |
| "grad_norm": 3.0553150177001953, | |
| "learning_rate": 4.6235932669394676e-08, | |
| "logits/chosen": -1.5072972774505615, | |
| "logits/rejected": -1.499051809310913, | |
| "logps/chosen": -48.09918975830078, | |
| "logps/rejected": -51.08711624145508, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004021850414574146, | |
| "rewards/margins": 0.0031438730657100677, | |
| "rewards/rejected": 0.0008779771742410958, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7853025936599424, | |
| "grad_norm": 3.5560803413391113, | |
| "learning_rate": 4.612458003901698e-08, | |
| "logits/chosen": -1.5295929908752441, | |
| "logits/rejected": -1.5242204666137695, | |
| "logps/chosen": -52.4840202331543, | |
| "logps/rejected": -56.06623458862305, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.002851675031706691, | |
| "rewards/margins": 0.0034307793248444796, | |
| "rewards/rejected": -0.0005791039438918233, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.792507204610951, | |
| "grad_norm": 3.1871278285980225, | |
| "learning_rate": 4.6011742437890476e-08, | |
| "logits/chosen": -1.5383789539337158, | |
| "logits/rejected": -1.515812873840332, | |
| "logps/chosen": -47.310516357421875, | |
| "logps/rejected": -48.80550765991211, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0031328003387898207, | |
| "rewards/margins": 0.002742994111031294, | |
| "rewards/rejected": 0.0003898058203049004, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7997118155619597, | |
| "grad_norm": 2.296072483062744, | |
| "learning_rate": 4.589742779803259e-08, | |
| "logits/chosen": -1.5470924377441406, | |
| "logits/rejected": -1.5346300601959229, | |
| "logps/chosen": -46.48540115356445, | |
| "logps/rejected": -48.65108108520508, | |
| "loss": 0.6919, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0027128090150654316, | |
| "rewards/margins": 0.002509825862944126, | |
| "rewards/rejected": 0.00020298334129620343, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8069164265129684, | |
| "grad_norm": 2.6368446350097656, | |
| "learning_rate": 4.5781644155290486e-08, | |
| "logits/chosen": -1.4871833324432373, | |
| "logits/rejected": -1.4772343635559082, | |
| "logps/chosen": -45.52531433105469, | |
| "logps/rejected": -46.510433197021484, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.004077502526342869, | |
| "rewards/margins": 0.00330347940325737, | |
| "rewards/rejected": 0.0007740228320471942, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8141210374639769, | |
| "grad_norm": 2.8489768505096436, | |
| "learning_rate": 4.566439964877613e-08, | |
| "logits/chosen": -1.5221761465072632, | |
| "logits/rejected": -1.5160510540008545, | |
| "logps/chosen": -43.423152923583984, | |
| "logps/rejected": -45.22438049316406, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.002408969681710005, | |
| "rewards/margins": 0.002129070693627, | |
| "rewards/rejected": 0.0002798990753944963, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8213256484149856, | |
| "grad_norm": 2.916175127029419, | |
| "learning_rate": 4.554570252029421e-08, | |
| "logits/chosen": -1.5706638097763062, | |
| "logits/rejected": -1.561694622039795, | |
| "logps/chosen": -46.79415512084961, | |
| "logps/rejected": -49.107398986816406, | |
| "loss": 0.6911, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.004531461279839277, | |
| "rewards/margins": 0.004187966696918011, | |
| "rewards/rejected": 0.0003434947575442493, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8285302593659942, | |
| "grad_norm": 2.6654767990112305, | |
| "learning_rate": 4.542556111376274e-08, | |
| "logits/chosen": -1.5651055574417114, | |
| "logits/rejected": -1.55381178855896, | |
| "logps/chosen": -48.72823715209961, | |
| "logps/rejected": -50.8982048034668, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.003072004299610853, | |
| "rewards/margins": 0.0029076444916427135, | |
| "rewards/rejected": 0.00016435966244898736, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8357348703170029, | |
| "grad_norm": 3.0894460678100586, | |
| "learning_rate": 4.5303983874626506e-08, | |
| "logits/chosen": -1.541265606880188, | |
| "logits/rejected": -1.5295203924179077, | |
| "logps/chosen": -50.63302993774414, | |
| "logps/rejected": -51.34119415283203, | |
| "loss": 0.6919, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0029607131145894527, | |
| "rewards/margins": 0.0025523048825562, | |
| "rewards/rejected": 0.00040840805741027, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8429394812680115, | |
| "grad_norm": 3.537862539291382, | |
| "learning_rate": 4.518097934926339e-08, | |
| "logits/chosen": -1.460115909576416, | |
| "logits/rejected": -1.4346593618392944, | |
| "logps/chosen": -46.76906967163086, | |
| "logps/rejected": -46.864540100097656, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.003340616822242737, | |
| "rewards/margins": 0.0031544927041977644, | |
| "rewards/rejected": 0.00018612373969517648, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8501440922190202, | |
| "grad_norm": 3.8841352462768555, | |
| "learning_rate": 4.505655618438363e-08, | |
| "logits/chosen": -1.4248067140579224, | |
| "logits/rejected": -1.4110429286956787, | |
| "logps/chosen": -48.690521240234375, | |
| "logps/rejected": -49.68914794921875, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.002877553692087531, | |
| "rewards/margins": 0.0027670259587466717, | |
| "rewards/rejected": 0.00011052779154852033, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8573487031700289, | |
| "grad_norm": 2.915313482284546, | |
| "learning_rate": 4.4930723126421945e-08, | |
| "logits/chosen": -1.5898973941802979, | |
| "logits/rejected": -1.5667184591293335, | |
| "logps/chosen": -49.06376647949219, | |
| "logps/rejected": -50.36428451538086, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0030966580379754305, | |
| "rewards/margins": 0.0030564782209694386, | |
| "rewards/rejected": 4.0179769712267444e-05, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.8645533141210374, | |
| "grad_norm": 3.2781777381896973, | |
| "learning_rate": 4.48034890209227e-08, | |
| "logits/chosen": -1.465415120124817, | |
| "logits/rejected": -1.4456936120986938, | |
| "logps/chosen": -51.64118576049805, | |
| "logps/rejected": -53.60643768310547, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.00304031022824347, | |
| "rewards/margins": 0.003465033369138837, | |
| "rewards/rejected": -0.000424722908064723, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8717579250720461, | |
| "grad_norm": 2.642411470413208, | |
| "learning_rate": 4.4674862811918155e-08, | |
| "logits/chosen": -1.4467787742614746, | |
| "logits/rejected": -1.4440176486968994, | |
| "logps/chosen": -43.359764099121094, | |
| "logps/rejected": -46.50818634033203, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.003694910556077957, | |
| "rewards/margins": 0.003382860217243433, | |
| "rewards/rejected": 0.00031205095001496375, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8789625360230547, | |
| "grad_norm": 3.445678234100342, | |
| "learning_rate": 4.454485354129966e-08, | |
| "logits/chosen": -1.4948651790618896, | |
| "logits/rejected": -1.4892648458480835, | |
| "logps/chosen": -46.580467224121094, | |
| "logps/rejected": -50.10879135131836, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.004008608870208263, | |
| "rewards/margins": 0.003896749345585704, | |
| "rewards/rejected": 0.0001118591899285093, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8861671469740634, | |
| "grad_norm": 2.9829981327056885, | |
| "learning_rate": 4.4413470348182124e-08, | |
| "logits/chosen": -1.4465805292129517, | |
| "logits/rejected": -1.4232655763626099, | |
| "logps/chosen": -48.75006866455078, | |
| "logps/rejected": -50.792945861816406, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.003733579069375992, | |
| "rewards/margins": 0.0036661014892160892, | |
| "rewards/rejected": 6.747785664629191e-05, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8933717579250721, | |
| "grad_norm": 3.606907367706299, | |
| "learning_rate": 4.42807224682615e-08, | |
| "logits/chosen": -1.502768874168396, | |
| "logits/rejected": -1.490201473236084, | |
| "logps/chosen": -42.87495040893555, | |
| "logps/rejected": -45.96978759765625, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.0038322817999869585, | |
| "rewards/margins": 0.004567673895508051, | |
| "rewards/rejected": -0.0007353918626904488, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9005763688760807, | |
| "grad_norm": 2.5278074741363525, | |
| "learning_rate": 4.4146619233165604e-08, | |
| "logits/chosen": -1.5510034561157227, | |
| "logits/rejected": -1.546442985534668, | |
| "logps/chosen": -50.644317626953125, | |
| "logps/rejected": -54.01775360107422, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0030824330169707537, | |
| "rewards/margins": 0.0030220781918615103, | |
| "rewards/rejected": 6.0354708693921566e-05, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9077809798270894, | |
| "grad_norm": 3.128469944000244, | |
| "learning_rate": 4.4011170069798126e-08, | |
| "logits/chosen": -1.5054194927215576, | |
| "logits/rejected": -1.521511435508728, | |
| "logps/chosen": -46.49076461791992, | |
| "logps/rejected": -53.44841766357422, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.00285265501588583, | |
| "rewards/margins": 0.003102459479123354, | |
| "rewards/rejected": -0.00024980431771837175, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.9149855907780979, | |
| "grad_norm": 3.1893060207366943, | |
| "learning_rate": 4.387438449967594e-08, | |
| "logits/chosen": -1.4547739028930664, | |
| "logits/rejected": -1.4414390325546265, | |
| "logps/chosen": -45.3841438293457, | |
| "logps/rejected": -47.97360610961914, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.00473793875426054, | |
| "rewards/margins": 0.005001851357519627, | |
| "rewards/rejected": -0.0002639126614667475, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.9221902017291066, | |
| "grad_norm": 3.4499995708465576, | |
| "learning_rate": 4.373627213825983e-08, | |
| "logits/chosen": -1.609289526939392, | |
| "logits/rejected": -1.599395751953125, | |
| "logps/chosen": -46.193485260009766, | |
| "logps/rejected": -49.694297790527344, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.004828107543289661, | |
| "rewards/margins": 0.005287128034979105, | |
| "rewards/rejected": -0.00045902031706646085, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.9293948126801153, | |
| "grad_norm": 2.4953017234802246, | |
| "learning_rate": 4.359684269427848e-08, | |
| "logits/chosen": -1.5663963556289673, | |
| "logits/rejected": -1.563528299331665, | |
| "logps/chosen": -45.60202407836914, | |
| "logps/rejected": -49.13114929199219, | |
| "loss": 0.6911, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.004381075501441956, | |
| "rewards/margins": 0.0041623106226325035, | |
| "rewards/rejected": 0.00021876460232306272, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.9365994236311239, | |
| "grad_norm": 3.026409149169922, | |
| "learning_rate": 4.34561059690461e-08, | |
| "logits/chosen": -1.6096134185791016, | |
| "logits/rejected": -1.608337163925171, | |
| "logps/chosen": -47.30002975463867, | |
| "logps/rejected": -48.903472900390625, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.0020707384683191776, | |
| "rewards/margins": 0.0018296821508556604, | |
| "rewards/rejected": 0.00024105608463287354, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9438040345821326, | |
| "grad_norm": 2.6735422611236572, | |
| "learning_rate": 4.3314071855773314e-08, | |
| "logits/chosen": -1.5702834129333496, | |
| "logits/rejected": -1.5721920728683472, | |
| "logps/chosen": -41.86522674560547, | |
| "logps/rejected": -45.095703125, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0037654649931937456, | |
| "rewards/margins": 0.004000894725322723, | |
| "rewards/rejected": -0.0002354306634515524, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9510086455331412, | |
| "grad_norm": 3.103926420211792, | |
| "learning_rate": 4.3170750338871806e-08, | |
| "logits/chosen": -1.5061908960342407, | |
| "logits/rejected": -1.4911664724349976, | |
| "logps/chosen": -46.52951431274414, | |
| "logps/rejected": -49.689754486083984, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.004671442788094282, | |
| "rewards/margins": 0.004490147810429335, | |
| "rewards/rejected": 0.00018129443924408406, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9582132564841499, | |
| "grad_norm": 3.006434917449951, | |
| "learning_rate": 4.3026151493252414e-08, | |
| "logits/chosen": -1.549617886543274, | |
| "logits/rejected": -1.5283677577972412, | |
| "logps/chosen": -51.5010871887207, | |
| "logps/rejected": -52.90639114379883, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.003953360952436924, | |
| "rewards/margins": 0.0045930324122309685, | |
| "rewards/rejected": -0.0006396712851710618, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9654178674351584, | |
| "grad_norm": 3.5157463550567627, | |
| "learning_rate": 4.2880285483616895e-08, | |
| "logits/chosen": -1.5331405401229858, | |
| "logits/rejected": -1.5302000045776367, | |
| "logps/chosen": -45.698429107666016, | |
| "logps/rejected": -48.99650955200195, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.004010093864053488, | |
| "rewards/margins": 0.003920042887330055, | |
| "rewards/rejected": 9.005082392832264e-05, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9726224783861671, | |
| "grad_norm": 2.748664617538452, | |
| "learning_rate": 4.273316256374342e-08, | |
| "logits/chosen": -1.4038106203079224, | |
| "logits/rejected": -1.39895498752594, | |
| "logps/chosen": -52.240440368652344, | |
| "logps/rejected": -53.147804260253906, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.004330686293542385, | |
| "rewards/margins": 0.004021846689283848, | |
| "rewards/rejected": 0.00030883977888152003, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9798270893371758, | |
| "grad_norm": 3.36702299118042, | |
| "learning_rate": 4.258479307576576e-08, | |
| "logits/chosen": -1.5006659030914307, | |
| "logits/rejected": -1.4956198930740356, | |
| "logps/chosen": -43.774024963378906, | |
| "logps/rejected": -45.699832916259766, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0054828086867928505, | |
| "rewards/margins": 0.005769900046288967, | |
| "rewards/rejected": -0.0002870913012884557, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9870317002881844, | |
| "grad_norm": 2.7340095043182373, | |
| "learning_rate": 4.243518744944626e-08, | |
| "logits/chosen": -1.504451870918274, | |
| "logits/rejected": -1.5006357431411743, | |
| "logps/chosen": -43.27858352661133, | |
| "logps/rejected": -47.12054443359375, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.003977509681135416, | |
| "rewards/margins": 0.004465220961719751, | |
| "rewards/rejected": -0.0004877113678958267, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9942363112391931, | |
| "grad_norm": 3.410355567932129, | |
| "learning_rate": 4.22843562014427e-08, | |
| "logits/chosen": -1.4497559070587158, | |
| "logits/rejected": -1.439967155456543, | |
| "logps/chosen": -46.891605377197266, | |
| "logps/rejected": -49.04447937011719, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.003908143378794193, | |
| "rewards/margins": 0.002761934418231249, | |
| "rewards/rejected": 0.0011462090769782662, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.0014409221902016, | |
| "grad_norm": 3.0687458515167236, | |
| "learning_rate": 4.2132309934569e-08, | |
| "logits/chosen": -1.5673506259918213, | |
| "logits/rejected": -1.5622578859329224, | |
| "logps/chosen": -43.75770950317383, | |
| "logps/rejected": -46.118858337402344, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.005200340412557125, | |
| "rewards/margins": 0.003705868497490883, | |
| "rewards/rejected": 0.0014944719150662422, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.0086455331412103, | |
| "grad_norm": 2.4820141792297363, | |
| "learning_rate": 4.197905933704989e-08, | |
| "logits/chosen": -1.4312834739685059, | |
| "logits/rejected": -1.4214531183242798, | |
| "logps/chosen": -47.255332946777344, | |
| "logps/rejected": -49.971588134765625, | |
| "loss": 0.6895, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.005209643859416246, | |
| "rewards/margins": 0.007327331695705652, | |
| "rewards/rejected": -0.0021176892332732677, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.015850144092219, | |
| "grad_norm": 2.7282636165618896, | |
| "learning_rate": 4.1824615181769577e-08, | |
| "logits/chosen": -1.4861654043197632, | |
| "logits/rejected": -1.4929125308990479, | |
| "logps/chosen": -43.8185920715332, | |
| "logps/rejected": -47.74663162231445, | |
| "loss": 0.6902, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004913496784865856, | |
| "rewards/margins": 0.005964468698948622, | |
| "rewards/rejected": -0.0010509720304980874, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.0230547550432276, | |
| "grad_norm": 3.1228253841400146, | |
| "learning_rate": 4.1668988325514434e-08, | |
| "logits/chosen": -1.5240795612335205, | |
| "logits/rejected": -1.5136892795562744, | |
| "logps/chosen": -49.41614532470703, | |
| "logps/rejected": -52.12762451171875, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.004956229589879513, | |
| "rewards/margins": 0.006649328861385584, | |
| "rewards/rejected": -0.0016930990386754274, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.0302593659942363, | |
| "grad_norm": 3.0137906074523926, | |
| "learning_rate": 4.1512189708209844e-08, | |
| "logits/chosen": -1.5741335153579712, | |
| "logits/rejected": -1.5646674633026123, | |
| "logps/chosen": -38.225059509277344, | |
| "logps/rejected": -39.43558883666992, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.005329563282430172, | |
| "rewards/margins": 0.005151194520294666, | |
| "rewards/rejected": 0.00017836911138147116, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.037463976945245, | |
| "grad_norm": 3.6123361587524414, | |
| "learning_rate": 4.1354230352151143e-08, | |
| "logits/chosen": -1.5040456056594849, | |
| "logits/rejected": -1.4911547899246216, | |
| "logps/chosen": -56.4525260925293, | |
| "logps/rejected": -56.65864944458008, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0040727341547608376, | |
| "rewards/margins": 0.004969144240021706, | |
| "rewards/rejected": -0.0008964102598838508, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.0446685878962536, | |
| "grad_norm": 2.5805535316467285, | |
| "learning_rate": 4.119512136122882e-08, | |
| "logits/chosen": -1.608700156211853, | |
| "logits/rejected": -1.621206283569336, | |
| "logps/chosen": -42.28131866455078, | |
| "logps/rejected": -48.45494842529297, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.004296852741390467, | |
| "rewards/margins": 0.0070196837186813354, | |
| "rewards/rejected": -0.002722830278798938, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0518731988472623, | |
| "grad_norm": 3.4811322689056396, | |
| "learning_rate": 4.103487392014795e-08, | |
| "logits/chosen": -1.4754607677459717, | |
| "logits/rejected": -1.4568121433258057, | |
| "logps/chosen": -46.388267517089844, | |
| "logps/rejected": -51.01404571533203, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.005554754287004471, | |
| "rewards/margins": 0.008482200093567371, | |
| "rewards/rejected": -0.002927445573732257, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.059077809798271, | |
| "grad_norm": 2.9166300296783447, | |
| "learning_rate": 4.087349929364192e-08, | |
| "logits/chosen": -1.5654090642929077, | |
| "logits/rejected": -1.544526219367981, | |
| "logps/chosen": -42.55342483520508, | |
| "logps/rejected": -45.895294189453125, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.004680173471570015, | |
| "rewards/margins": 0.007236185017973185, | |
| "rewards/rejected": -0.0025560115464031696, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.0662824207492796, | |
| "grad_norm": 2.4293131828308105, | |
| "learning_rate": 4.0711008825680645e-08, | |
| "logits/chosen": -1.5042253732681274, | |
| "logits/rejected": -1.4851328134536743, | |
| "logps/chosen": -47.321651458740234, | |
| "logps/rejected": -50.18535614013672, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.004479935858398676, | |
| "rewards/margins": 0.005603041499853134, | |
| "rewards/rejected": -0.0011231055250391364, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.0734870317002883, | |
| "grad_norm": 3.547532796859741, | |
| "learning_rate": 4.054741393867306e-08, | |
| "logits/chosen": -1.4755966663360596, | |
| "logits/rejected": -1.4644145965576172, | |
| "logps/chosen": -54.06425857543945, | |
| "logps/rejected": -55.69597244262695, | |
| "loss": 0.6902, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.004462078679352999, | |
| "rewards/margins": 0.005896457936614752, | |
| "rewards/rejected": -0.0014343796065077186, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.080691642651297, | |
| "grad_norm": 2.9653351306915283, | |
| "learning_rate": 4.038272613266419e-08, | |
| "logits/chosen": -1.5455403327941895, | |
| "logits/rejected": -1.5216772556304932, | |
| "logps/chosen": -44.87678909301758, | |
| "logps/rejected": -47.38240432739258, | |
| "loss": 0.6902, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.005075398366898298, | |
| "rewards/margins": 0.006029829382896423, | |
| "rewards/rejected": -0.0009544305503368378, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0878962536023056, | |
| "grad_norm": 3.3833298683166504, | |
| "learning_rate": 4.0216956984526784e-08, | |
| "logits/chosen": -1.5539488792419434, | |
| "logits/rejected": -1.5496807098388672, | |
| "logps/chosen": -42.88352966308594, | |
| "logps/rejected": -45.572662353515625, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.005250538233667612, | |
| "rewards/margins": 0.007218127138912678, | |
| "rewards/rejected": -0.001967588672414422, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.0951008645533142, | |
| "grad_norm": 3.101933002471924, | |
| "learning_rate": 4.0050118147147446e-08, | |
| "logits/chosen": -1.5179487466812134, | |
| "logits/rejected": -1.5099337100982666, | |
| "logps/chosen": -53.398643493652344, | |
| "logps/rejected": -52.119972229003906, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.0032478254288434982, | |
| "rewards/margins": 0.0021998791489750147, | |
| "rewards/rejected": 0.0010479463962838054, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.1023054755043227, | |
| "grad_norm": 3.1006407737731934, | |
| "learning_rate": 3.988222134860755e-08, | |
| "logits/chosen": -1.5638402700424194, | |
| "logits/rejected": -1.550837516784668, | |
| "logps/chosen": -47.294315338134766, | |
| "logps/rejected": -51.660377502441406, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0049809180200099945, | |
| "rewards/margins": 0.006470340304076672, | |
| "rewards/rejected": -0.0014894230989739299, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.1095100864553313, | |
| "grad_norm": 3.0943164825439453, | |
| "learning_rate": 3.9713278391358724e-08, | |
| "logits/chosen": -1.5746233463287354, | |
| "logits/rejected": -1.5629007816314697, | |
| "logps/chosen": -45.94882583618164, | |
| "logps/rejected": -49.18882369995117, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.005064360331743956, | |
| "rewards/margins": 0.005639818962663412, | |
| "rewards/rejected": -0.0005754587473347783, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.11671469740634, | |
| "grad_norm": 2.466256618499756, | |
| "learning_rate": 3.954330115139328e-08, | |
| "logits/chosen": -1.5431041717529297, | |
| "logits/rejected": -1.532755732536316, | |
| "logps/chosen": -46.7624397277832, | |
| "logps/rejected": -48.83042526245117, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004654773510992527, | |
| "rewards/margins": 0.006525079254060984, | |
| "rewards/rejected": -0.0018703056266531348, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.1239193083573487, | |
| "grad_norm": 4.066368103027344, | |
| "learning_rate": 3.937230157740931e-08, | |
| "logits/chosen": -1.591922402381897, | |
| "logits/rejected": -1.573209285736084, | |
| "logps/chosen": -47.90293884277344, | |
| "logps/rejected": -51.45854949951172, | |
| "loss": 0.6893, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.006319095846265554, | |
| "rewards/margins": 0.007731067482382059, | |
| "rewards/rejected": -0.001411972800269723, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.1311239193083573, | |
| "grad_norm": 2.3516957759857178, | |
| "learning_rate": 3.920029168997077e-08, | |
| "logits/chosen": -1.5559136867523193, | |
| "logits/rejected": -1.5427807569503784, | |
| "logps/chosen": -48.74748992919922, | |
| "logps/rejected": -51.54194259643555, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.004629113245755434, | |
| "rewards/margins": 0.005624239332973957, | |
| "rewards/rejected": -0.0009951259708032012, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.138328530259366, | |
| "grad_norm": 3.9600844383239746, | |
| "learning_rate": 3.9027283580662476e-08, | |
| "logits/chosen": -1.519817590713501, | |
| "logits/rejected": -1.5075610876083374, | |
| "logps/chosen": -49.621826171875, | |
| "logps/rejected": -52.77042770385742, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.005040620919317007, | |
| "rewards/margins": 0.008789965882897377, | |
| "rewards/rejected": -0.0037493451964110136, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.1455331412103746, | |
| "grad_norm": 3.975217819213867, | |
| "learning_rate": 3.885328941124014e-08, | |
| "logits/chosen": -1.5018306970596313, | |
| "logits/rejected": -1.4887568950653076, | |
| "logps/chosen": -45.943092346191406, | |
| "logps/rejected": -50.64377975463867, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.005147297866642475, | |
| "rewards/margins": 0.007178432308137417, | |
| "rewards/rejected": -0.0020311346743255854, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.1527377521613833, | |
| "grad_norm": 3.0368916988372803, | |
| "learning_rate": 3.867832141277539e-08, | |
| "logits/chosen": -1.5485479831695557, | |
| "logits/rejected": -1.5292450189590454, | |
| "logps/chosen": -49.10287857055664, | |
| "logps/rejected": -51.29143142700195, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.004347370471805334, | |
| "rewards/margins": 0.00650381064042449, | |
| "rewards/rejected": -0.002156440168619156, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.159942363112392, | |
| "grad_norm": 3.4196557998657227, | |
| "learning_rate": 3.850239188479606e-08, | |
| "logits/chosen": -1.4620041847229004, | |
| "logits/rejected": -1.4584031105041504, | |
| "logps/chosen": -46.734825134277344, | |
| "logps/rejected": -49.128395080566406, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0048998757265508175, | |
| "rewards/margins": 0.0066976905800402164, | |
| "rewards/rejected": -0.0017978150863200426, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.1671469740634006, | |
| "grad_norm": 3.5657782554626465, | |
| "learning_rate": 3.832551319442151e-08, | |
| "logits/chosen": -1.5856021642684937, | |
| "logits/rejected": -1.5821958780288696, | |
| "logps/chosen": -49.59635543823242, | |
| "logps/rejected": -53.7983283996582, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0052263312973082066, | |
| "rewards/margins": 0.006498755421489477, | |
| "rewards/rejected": -0.0012724247062578797, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.1743515850144093, | |
| "grad_norm": 4.230812072753906, | |
| "learning_rate": 3.81476977754933e-08, | |
| "logits/chosen": -1.3997455835342407, | |
| "logits/rejected": -1.3851207494735718, | |
| "logps/chosen": -51.304656982421875, | |
| "logps/rejected": -50.67051315307617, | |
| "loss": 0.6901, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.004272806458175182, | |
| "rewards/margins": 0.006219337694346905, | |
| "rewards/rejected": -0.0019465312361717224, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.181556195965418, | |
| "grad_norm": 2.398345470428467, | |
| "learning_rate": 3.796895812770114e-08, | |
| "logits/chosen": -1.5023247003555298, | |
| "logits/rejected": -1.4925954341888428, | |
| "logps/chosen": -45.7723503112793, | |
| "logps/rejected": -47.24066925048828, | |
| "loss": 0.6894, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0059529999271035194, | |
| "rewards/margins": 0.0075266240164637566, | |
| "rewards/rejected": -0.0015736244386062026, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.1887608069164266, | |
| "grad_norm": 3.0664162635803223, | |
| "learning_rate": 3.7789306815704216e-08, | |
| "logits/chosen": -1.5271222591400146, | |
| "logits/rejected": -1.5166699886322021, | |
| "logps/chosen": -40.933349609375, | |
| "logps/rejected": -42.03236770629883, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.003515125485137105, | |
| "rewards/margins": 0.00480083329603076, | |
| "rewards/rejected": -0.001285707694478333, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.195965417867435, | |
| "grad_norm": 2.647942543029785, | |
| "learning_rate": 3.760875646824795e-08, | |
| "logits/chosen": -1.3960988521575928, | |
| "logits/rejected": -1.3973405361175537, | |
| "logps/chosen": -46.11708068847656, | |
| "logps/rejected": -48.433963775634766, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0037715521175414324, | |
| "rewards/margins": 0.007300334516912699, | |
| "rewards/rejected": -0.003528781933709979, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.2031700288184437, | |
| "grad_norm": 3.641119956970215, | |
| "learning_rate": 3.742731977727623e-08, | |
| "logits/chosen": -1.5391809940338135, | |
| "logits/rejected": -1.5319098234176636, | |
| "logps/chosen": -45.204803466796875, | |
| "logps/rejected": -49.10814666748047, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.006137064192444086, | |
| "rewards/margins": 0.007269109599292278, | |
| "rewards/rejected": -0.0011320464545860887, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.2103746397694524, | |
| "grad_norm": 3.703112840652466, | |
| "learning_rate": 3.7245009497039244e-08, | |
| "logits/chosen": -1.4356962442398071, | |
| "logits/rejected": -1.4204628467559814, | |
| "logps/chosen": -45.42398452758789, | |
| "logps/rejected": -49.50120162963867, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.003938040696084499, | |
| "rewards/margins": 0.008085368201136589, | |
| "rewards/rejected": -0.004147327970713377, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.217579250720461, | |
| "grad_norm": 2.649240732192993, | |
| "learning_rate": 3.7061838443196886e-08, | |
| "logits/chosen": -1.511671543121338, | |
| "logits/rejected": -1.5024120807647705, | |
| "logps/chosen": -50.048927307128906, | |
| "logps/rejected": -52.17205810546875, | |
| "loss": 0.6885, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.005982678383588791, | |
| "rewards/margins": 0.009426699951291084, | |
| "rewards/rejected": -0.003444021102041006, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.2247838616714697, | |
| "grad_norm": 2.996338129043579, | |
| "learning_rate": 3.68778194919179e-08, | |
| "logits/chosen": -1.4723883867263794, | |
| "logits/rejected": -1.4665647745132446, | |
| "logps/chosen": -50.0838737487793, | |
| "logps/rejected": -53.330780029296875, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 0.008046514354646206, | |
| "rewards/margins": 0.010942642576992512, | |
| "rewards/rejected": -0.0028961277566850185, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.2319884726224783, | |
| "grad_norm": 3.614102840423584, | |
| "learning_rate": 3.66929655789747e-08, | |
| "logits/chosen": -1.570845365524292, | |
| "logits/rejected": -1.5528652667999268, | |
| "logps/chosen": -41.95561599731445, | |
| "logps/rejected": -46.511497497558594, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.005627007223665714, | |
| "rewards/margins": 0.008476624265313148, | |
| "rewards/rejected": -0.0028496168088167906, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.239193083573487, | |
| "grad_norm": 2.334775447845459, | |
| "learning_rate": 3.6507289698834064e-08, | |
| "logits/chosen": -1.4717720746994019, | |
| "logits/rejected": -1.455172061920166, | |
| "logps/chosen": -43.546348571777344, | |
| "logps/rejected": -46.07920455932617, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.005223124288022518, | |
| "rewards/margins": 0.008515788242220879, | |
| "rewards/rejected": -0.003292663721367717, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.2463976945244957, | |
| "grad_norm": 4.027002334594727, | |
| "learning_rate": 3.6320804903743684e-08, | |
| "logits/chosen": -1.5162203311920166, | |
| "logits/rejected": -1.5112934112548828, | |
| "logps/chosen": -45.405494689941406, | |
| "logps/rejected": -49.15242385864258, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.0035115727223455906, | |
| "rewards/margins": 0.008787490427494049, | |
| "rewards/rejected": -0.005275918636471033, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.2536023054755043, | |
| "grad_norm": 2.6576426029205322, | |
| "learning_rate": 3.61335243028146e-08, | |
| "logits/chosen": -1.495697021484375, | |
| "logits/rejected": -1.489630937576294, | |
| "logps/chosen": -48.88677978515625, | |
| "logps/rejected": -51.62388229370117, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.004441169556230307, | |
| "rewards/margins": 0.008156510069966316, | |
| "rewards/rejected": -0.0037153400480747223, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.260806916426513, | |
| "grad_norm": 3.2648770809173584, | |
| "learning_rate": 3.5945461061099736e-08, | |
| "logits/chosen": -1.4391025304794312, | |
| "logits/rejected": -1.4078892469406128, | |
| "logps/chosen": -50.779335021972656, | |
| "logps/rejected": -49.67884063720703, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.006459861062467098, | |
| "rewards/margins": 0.012035077437758446, | |
| "rewards/rejected": -0.005575217306613922, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2680115273775217, | |
| "grad_norm": 2.9683494567871094, | |
| "learning_rate": 3.5756628398668446e-08, | |
| "logits/chosen": -1.5588018894195557, | |
| "logits/rejected": -1.557953953742981, | |
| "logps/chosen": -51.247032165527344, | |
| "logps/rejected": -53.68225860595703, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0037513356655836105, | |
| "rewards/margins": 0.009223448112607002, | |
| "rewards/rejected": -0.005472113378345966, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.2752161383285303, | |
| "grad_norm": 2.657179832458496, | |
| "learning_rate": 3.556703958967716e-08, | |
| "logits/chosen": -1.557582974433899, | |
| "logits/rejected": -1.5438480377197266, | |
| "logps/chosen": -44.341522216796875, | |
| "logps/rejected": -47.97272491455078, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.0037257769145071507, | |
| "rewards/margins": 0.007122798822820187, | |
| "rewards/rejected": -0.003397023305296898, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.282420749279539, | |
| "grad_norm": 4.005496978759766, | |
| "learning_rate": 3.5376707961436297e-08, | |
| "logits/chosen": -1.5314931869506836, | |
| "logits/rejected": -1.5151678323745728, | |
| "logps/chosen": -53.3531494140625, | |
| "logps/rejected": -53.47139358520508, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.004263926763087511, | |
| "rewards/margins": 0.005168497562408447, | |
| "rewards/rejected": -0.000904570915736258, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.2896253602305476, | |
| "grad_norm": 2.4250881671905518, | |
| "learning_rate": 3.51856468934734e-08, | |
| "logits/chosen": -1.4921042919158936, | |
| "logits/rejected": -1.4953409433364868, | |
| "logps/chosen": -46.357215881347656, | |
| "logps/rejected": -48.65216064453125, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.004532798193395138, | |
| "rewards/margins": 0.003982014954090118, | |
| "rewards/rejected": 0.0005507826572284102, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.2968299711815563, | |
| "grad_norm": 3.3605597019195557, | |
| "learning_rate": 3.499386981659262e-08, | |
| "logits/chosen": -1.5788064002990723, | |
| "logits/rejected": -1.570356011390686, | |
| "logps/chosen": -45.479488372802734, | |
| "logps/rejected": -51.7054328918457, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.006253694649785757, | |
| "rewards/margins": 0.008554017171263695, | |
| "rewards/rejected": -0.0023003218229860067, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.304034582132565, | |
| "grad_norm": 2.548804759979248, | |
| "learning_rate": 3.480139021193057e-08, | |
| "logits/chosen": -1.462729811668396, | |
| "logits/rejected": -1.4625592231750488, | |
| "logps/chosen": -46.50673294067383, | |
| "logps/rejected": -49.94841384887695, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.003766898764297366, | |
| "rewards/margins": 0.007147823926061392, | |
| "rewards/rejected": -0.0033809244632720947, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.3112391930835736, | |
| "grad_norm": 4.094874382019043, | |
| "learning_rate": 3.4608221610008666e-08, | |
| "logits/chosen": -1.5544965267181396, | |
| "logits/rejected": -1.5445128679275513, | |
| "logps/chosen": -40.694557189941406, | |
| "logps/rejected": -45.3368034362793, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.00552348280325532, | |
| "rewards/margins": 0.010925527662038803, | |
| "rewards/rejected": -0.005402045324444771, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.318443804034582, | |
| "grad_norm": 2.3106565475463867, | |
| "learning_rate": 3.4414377589782e-08, | |
| "logits/chosen": -1.4896498918533325, | |
| "logits/rejected": -1.4890059232711792, | |
| "logps/chosen": -44.28178787231445, | |
| "logps/rejected": -46.68126678466797, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.003096462693065405, | |
| "rewards/margins": 0.00837036594748497, | |
| "rewards/rejected": -0.0052739037200808525, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.3256484149855907, | |
| "grad_norm": 2.239630937576294, | |
| "learning_rate": 3.4219871777684745e-08, | |
| "logits/chosen": -1.5045303106307983, | |
| "logits/rejected": -1.48006272315979, | |
| "logps/chosen": -48.24463653564453, | |
| "logps/rejected": -49.66504669189453, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.004429197870194912, | |
| "rewards/margins": 0.0088044423609972, | |
| "rewards/rejected": -0.004375244490802288, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.3328530259365994, | |
| "grad_norm": 3.131451368331909, | |
| "learning_rate": 3.4024717846672364e-08, | |
| "logits/chosen": -1.5544954538345337, | |
| "logits/rejected": -1.5410051345825195, | |
| "logps/chosen": -43.851314544677734, | |
| "logps/rejected": -47.097129821777344, | |
| "loss": 0.6885, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0032080274540930986, | |
| "rewards/margins": 0.009510315954685211, | |
| "rewards/rejected": -0.006302288733422756, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.340057636887608, | |
| "grad_norm": 3.247105121612549, | |
| "learning_rate": 3.382892951526036e-08, | |
| "logits/chosen": -1.5086390972137451, | |
| "logits/rejected": -1.4984405040740967, | |
| "logps/chosen": -48.57241439819336, | |
| "logps/rejected": -53.5483283996582, | |
| "loss": 0.688, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.004579062573611736, | |
| "rewards/margins": 0.010537253692746162, | |
| "rewards/rejected": -0.005958191119134426, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.3472622478386167, | |
| "grad_norm": 3.0748696327209473, | |
| "learning_rate": 3.3632520546559974e-08, | |
| "logits/chosen": -1.4774866104125977, | |
| "logits/rejected": -1.450300693511963, | |
| "logps/chosen": -42.174537658691406, | |
| "logps/rejected": -46.27776336669922, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.005281184334307909, | |
| "rewards/margins": 0.009902546182274818, | |
| "rewards/rejected": -0.004621362779289484, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.3544668587896254, | |
| "grad_norm": 3.4227564334869385, | |
| "learning_rate": 3.34355047473107e-08, | |
| "logits/chosen": -1.509340763092041, | |
| "logits/rejected": -1.4928722381591797, | |
| "logps/chosen": -49.13660430908203, | |
| "logps/rejected": -50.32002639770508, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.003565728198736906, | |
| "rewards/margins": 0.008065813221037388, | |
| "rewards/rejected": -0.004500086419284344, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.361671469740634, | |
| "grad_norm": 3.2549450397491455, | |
| "learning_rate": 3.323789596690971e-08, | |
| "logits/chosen": -1.4439764022827148, | |
| "logits/rejected": -1.440734624862671, | |
| "logps/chosen": -46.024192810058594, | |
| "logps/rejected": -50.3931999206543, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004367457702755928, | |
| "rewards/margins": 0.009686267003417015, | |
| "rewards/rejected": -0.005318809300661087, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.3688760806916427, | |
| "grad_norm": 2.0359508991241455, | |
| "learning_rate": 3.303970809643828e-08, | |
| "logits/chosen": -1.5255951881408691, | |
| "logits/rejected": -1.5279831886291504, | |
| "logps/chosen": -45.330116271972656, | |
| "logps/rejected": -49.04480743408203, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.005456699524074793, | |
| "rewards/margins": 0.008413759991526604, | |
| "rewards/rejected": -0.002957060467451811, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3760806916426513, | |
| "grad_norm": 3.008654832839966, | |
| "learning_rate": 3.2840955067685356e-08, | |
| "logits/chosen": -1.5634180307388306, | |
| "logits/rejected": -1.5633050203323364, | |
| "logps/chosen": -45.995826721191406, | |
| "logps/rejected": -50.449180603027344, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.005350454244762659, | |
| "rewards/margins": 0.011478688567876816, | |
| "rewards/rejected": -0.006128234788775444, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.38328530259366, | |
| "grad_norm": 2.7495784759521484, | |
| "learning_rate": 3.264165085216817e-08, | |
| "logits/chosen": -1.5800001621246338, | |
| "logits/rejected": -1.572776436805725, | |
| "logps/chosen": -38.539920806884766, | |
| "logps/rejected": -43.823753356933594, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.004405132494866848, | |
| "rewards/margins": 0.008914101868867874, | |
| "rewards/rejected": -0.004508969374001026, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.3904899135446687, | |
| "grad_norm": 4.20028018951416, | |
| "learning_rate": 3.244180946015008e-08, | |
| "logits/chosen": -1.4439995288848877, | |
| "logits/rejected": -1.435498833656311, | |
| "logps/chosen": -52.166969299316406, | |
| "logps/rejected": -53.882225036621094, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.004471774213016033, | |
| "rewards/margins": 0.0068550496362149715, | |
| "rewards/rejected": -0.0023832768201828003, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.397694524495677, | |
| "grad_norm": 2.5121569633483887, | |
| "learning_rate": 3.224144493965578e-08, | |
| "logits/chosen": -1.5799609422683716, | |
| "logits/rejected": -1.5780445337295532, | |
| "logps/chosen": -43.653690338134766, | |
| "logps/rejected": -45.741546630859375, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0035719252191483974, | |
| "rewards/margins": 0.00797609519213438, | |
| "rewards/rejected": -0.0044041709043085575, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.4048991354466858, | |
| "grad_norm": 2.816251516342163, | |
| "learning_rate": 3.204057137548371e-08, | |
| "logits/chosen": -1.5314784049987793, | |
| "logits/rejected": -1.5235313177108765, | |
| "logps/chosen": -43.7066650390625, | |
| "logps/rejected": -47.314064025878906, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0040044053457677364, | |
| "rewards/margins": 0.011081613600254059, | |
| "rewards/rejected": -0.007077208254486322, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.4121037463976944, | |
| "grad_norm": 3.713770866394043, | |
| "learning_rate": 3.183920288821597e-08, | |
| "logits/chosen": -1.4900578260421753, | |
| "logits/rejected": -1.4816348552703857, | |
| "logps/chosen": -45.30292510986328, | |
| "logps/rejected": -49.97011184692383, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.004732588771730661, | |
| "rewards/margins": 0.011991357430815697, | |
| "rewards/rejected": -0.007258768193423748, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.419308357348703, | |
| "grad_norm": 3.808242082595825, | |
| "learning_rate": 3.1637353633225735e-08, | |
| "logits/chosen": -1.539838433265686, | |
| "logits/rejected": -1.5290555953979492, | |
| "logps/chosen": -41.22531509399414, | |
| "logps/rejected": -45.71930694580078, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.003598598064854741, | |
| "rewards/margins": 0.012951062992215157, | |
| "rewards/rejected": -0.00935246329754591, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.4265129682997117, | |
| "grad_norm": 3.283512592315674, | |
| "learning_rate": 3.143503779968213e-08, | |
| "logits/chosen": -1.5066019296646118, | |
| "logits/rejected": -1.5066450834274292, | |
| "logps/chosen": -45.44121551513672, | |
| "logps/rejected": -49.75178909301758, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.0011785195674747229, | |
| "rewards/margins": 0.008287757635116577, | |
| "rewards/rejected": -0.007109238300472498, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.4337175792507204, | |
| "grad_norm": 3.336965322494507, | |
| "learning_rate": 3.1232269609552875e-08, | |
| "logits/chosen": -1.518235445022583, | |
| "logits/rejected": -1.507875680923462, | |
| "logps/chosen": -43.67535400390625, | |
| "logps/rejected": -46.17144012451172, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.004166613798588514, | |
| "rewards/margins": 0.00908889900892973, | |
| "rewards/rejected": -0.0049222856760025024, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.440922190201729, | |
| "grad_norm": 2.1761865615844727, | |
| "learning_rate": 3.102906331660444e-08, | |
| "logits/chosen": -1.5566504001617432, | |
| "logits/rejected": -1.543027639389038, | |
| "logps/chosen": -41.95298767089844, | |
| "logps/rejected": -48.24913024902344, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 0.0051358104683458805, | |
| "rewards/margins": 0.013014426454901695, | |
| "rewards/rejected": -0.007878614589571953, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.4481268011527377, | |
| "grad_norm": 3.1758456230163574, | |
| "learning_rate": 3.082543320540015e-08, | |
| "logits/chosen": -1.46958327293396, | |
| "logits/rejected": -1.454538106918335, | |
| "logps/chosen": -43.856781005859375, | |
| "logps/rejected": -47.52972412109375, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0025951929856091738, | |
| "rewards/margins": 0.010161316022276878, | |
| "rewards/rejected": -0.007566122803837061, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.4553314121037464, | |
| "grad_norm": 4.123020648956299, | |
| "learning_rate": 3.062139359029599e-08, | |
| "logits/chosen": -1.5575106143951416, | |
| "logits/rejected": -1.5533676147460938, | |
| "logps/chosen": -46.472938537597656, | |
| "logps/rejected": -48.92716598510742, | |
| "loss": 0.6881, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0033696771133691072, | |
| "rewards/margins": 0.010232589207589626, | |
| "rewards/rejected": -0.006862912327051163, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.462536023054755, | |
| "grad_norm": 3.4289743900299072, | |
| "learning_rate": 3.041695881443437e-08, | |
| "logits/chosen": -1.5759422779083252, | |
| "logits/rejected": -1.5675259828567505, | |
| "logps/chosen": -46.361610412597656, | |
| "logps/rejected": -50.346927642822266, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0033702836371958256, | |
| "rewards/margins": 0.006468233652412891, | |
| "rewards/rejected": -0.003097949782386422, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.4697406340057637, | |
| "grad_norm": 4.054693698883057, | |
| "learning_rate": 3.0212143248735886e-08, | |
| "logits/chosen": -1.5312144756317139, | |
| "logits/rejected": -1.5288541316986084, | |
| "logps/chosen": -49.77804946899414, | |
| "logps/rejected": -54.46125411987305, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.004172780551016331, | |
| "rewards/margins": 0.011559790931642056, | |
| "rewards/rejected": -0.007387010846287012, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.4769452449567724, | |
| "grad_norm": 3.1554675102233887, | |
| "learning_rate": 3.0006961290889077e-08, | |
| "logits/chosen": -1.5213099718093872, | |
| "logits/rejected": -1.4963198900222778, | |
| "logps/chosen": -50.681434631347656, | |
| "logps/rejected": -53.1854362487793, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.004848620388656855, | |
| "rewards/margins": 0.011397367343306541, | |
| "rewards/rejected": -0.006548746023327112, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.484149855907781, | |
| "grad_norm": 2.6636829376220703, | |
| "learning_rate": 2.980142736433833e-08, | |
| "logits/chosen": -1.5465320348739624, | |
| "logits/rejected": -1.5224246978759766, | |
| "logps/chosen": -44.290504455566406, | |
| "logps/rejected": -44.52730941772461, | |
| "loss": 0.6885, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0015873590018600225, | |
| "rewards/margins": 0.009515630081295967, | |
| "rewards/rejected": -0.007928271777927876, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.4913544668587897, | |
| "grad_norm": 4.1119771003723145, | |
| "learning_rate": 2.9595555917269997e-08, | |
| "logits/chosen": -1.5561182498931885, | |
| "logits/rejected": -1.5290348529815674, | |
| "logps/chosen": -51.56610107421875, | |
| "logps/rejected": -53.313232421875, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0019942389335483313, | |
| "rewards/margins": 0.009912279434502125, | |
| "rewards/rejected": -0.00791804026812315, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.4985590778097984, | |
| "grad_norm": 3.134394407272339, | |
| "learning_rate": 2.9389361421596725e-08, | |
| "logits/chosen": -1.4347946643829346, | |
| "logits/rejected": -1.4317066669464111, | |
| "logps/chosen": -49.19475555419922, | |
| "logps/rejected": -53.55085372924805, | |
| "loss": 0.6871, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.0042834230698645115, | |
| "rewards/margins": 0.012365362606942654, | |
| "rewards/rejected": -0.00808194000273943, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.505763688760807, | |
| "grad_norm": 2.575726270675659, | |
| "learning_rate": 2.9182858371940126e-08, | |
| "logits/chosen": -1.5316616296768188, | |
| "logits/rejected": -1.5172832012176514, | |
| "logps/chosen": -42.60432052612305, | |
| "logps/rejected": -46.17161560058594, | |
| "loss": 0.6866, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0029449663124978542, | |
| "rewards/margins": 0.013502433896064758, | |
| "rewards/rejected": -0.010557468049228191, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.5129682997118157, | |
| "grad_norm": 3.6238691806793213, | |
| "learning_rate": 2.8976061284611908e-08, | |
| "logits/chosen": -1.4699041843414307, | |
| "logits/rejected": -1.4809154272079468, | |
| "logps/chosen": -41.60236358642578, | |
| "logps/rejected": -45.32775115966797, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.004988711792975664, | |
| "rewards/margins": 0.011551633477210999, | |
| "rewards/rejected": -0.006562922149896622, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.5201729106628243, | |
| "grad_norm": 3.270362377166748, | |
| "learning_rate": 2.8768984696593384e-08, | |
| "logits/chosen": -1.4800060987472534, | |
| "logits/rejected": -1.4634923934936523, | |
| "logps/chosen": -44.76579666137695, | |
| "logps/rejected": -47.92957305908203, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.005053057335317135, | |
| "rewards/margins": 0.013063912279903889, | |
| "rewards/rejected": -0.008010854944586754, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.527377521613833, | |
| "grad_norm": 3.2324070930480957, | |
| "learning_rate": 2.8561643164513637e-08, | |
| "logits/chosen": -1.3341636657714844, | |
| "logits/rejected": -1.3184261322021484, | |
| "logps/chosen": -51.973052978515625, | |
| "logps/rejected": -54.29719924926758, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.0038118623197078705, | |
| "rewards/margins": 0.008959764614701271, | |
| "rewards/rejected": -0.005147902760654688, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.5345821325648417, | |
| "grad_norm": 3.309232711791992, | |
| "learning_rate": 2.8354051263626227e-08, | |
| "logits/chosen": -1.4665729999542236, | |
| "logits/rejected": -1.4669950008392334, | |
| "logps/chosen": -50.305789947509766, | |
| "logps/rejected": -52.8392219543457, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.002960771322250366, | |
| "rewards/margins": 0.009312191978096962, | |
| "rewards/rejected": -0.006351419724524021, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.54178674351585, | |
| "grad_norm": 4.678018569946289, | |
| "learning_rate": 2.8146223586784573e-08, | |
| "logits/chosen": -1.4573040008544922, | |
| "logits/rejected": -1.4442112445831299, | |
| "logps/chosen": -52.07715606689453, | |
| "logps/rejected": -54.99871063232422, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0037856134586036205, | |
| "rewards/margins": 0.012529050931334496, | |
| "rewards/rejected": -0.008743437938392162, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.5489913544668588, | |
| "grad_norm": 3.4094982147216797, | |
| "learning_rate": 2.7938174743416205e-08, | |
| "logits/chosen": -1.3623288869857788, | |
| "logits/rejected": -1.3554866313934326, | |
| "logps/chosen": -51.5221061706543, | |
| "logps/rejected": -55.462249755859375, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0025853284168988466, | |
| "rewards/margins": 0.011182976886630058, | |
| "rewards/rejected": -0.00859764777123928, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.5561959654178674, | |
| "grad_norm": 3.0521068572998047, | |
| "learning_rate": 2.7729919358495728e-08, | |
| "logits/chosen": -1.5039377212524414, | |
| "logits/rejected": -1.494292974472046, | |
| "logps/chosen": -52.330955505371094, | |
| "logps/rejected": -53.54069900512695, | |
| "loss": 0.6869, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0036649019457399845, | |
| "rewards/margins": 0.012714678421616554, | |
| "rewards/rejected": -0.009049774147570133, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.563400576368876, | |
| "grad_norm": 3.8235228061676025, | |
| "learning_rate": 2.7521472071516772e-08, | |
| "logits/chosen": -1.4729235172271729, | |
| "logits/rejected": -1.4665067195892334, | |
| "logps/chosen": -43.68346405029297, | |
| "logps/rejected": -47.45942306518555, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.005660043563693762, | |
| "rewards/margins": 0.01002978254109621, | |
| "rewards/rejected": -0.004369738511741161, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.5706051873198847, | |
| "grad_norm": 3.896113395690918, | |
| "learning_rate": 2.731284753546289e-08, | |
| "logits/chosen": -1.4809584617614746, | |
| "logits/rejected": -1.474462866783142, | |
| "logps/chosen": -53.04487228393555, | |
| "logps/rejected": -56.80231857299805, | |
| "loss": 0.6894, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.00045274553121998906, | |
| "rewards/margins": 0.007639092858880758, | |
| "rewards/rejected": -0.008091839030385017, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.5778097982708934, | |
| "grad_norm": 4.060333728790283, | |
| "learning_rate": 2.710406041577751e-08, | |
| "logits/chosen": -1.551286220550537, | |
| "logits/rejected": -1.5481075048446655, | |
| "logps/chosen": -48.00239181518555, | |
| "logps/rejected": -53.792564392089844, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.004222895950078964, | |
| "rewards/margins": 0.010167112573981285, | |
| "rewards/rejected": -0.005944215692579746, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.585014409221902, | |
| "grad_norm": 3.3298470973968506, | |
| "learning_rate": 2.6895125389333017e-08, | |
| "logits/chosen": -1.5374701023101807, | |
| "logits/rejected": -1.5225059986114502, | |
| "logps/chosen": -48.45729446411133, | |
| "logps/rejected": -52.615623474121094, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.007050990127027035, | |
| "rewards/margins": 0.01621903106570244, | |
| "rewards/rejected": -0.009168041869997978, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5922190201729105, | |
| "grad_norm": 3.1585147380828857, | |
| "learning_rate": 2.6686057143399028e-08, | |
| "logits/chosen": -1.50589919090271, | |
| "logits/rejected": -1.4978208541870117, | |
| "logps/chosen": -48.517723083496094, | |
| "logps/rejected": -50.0194091796875, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.004859555047005415, | |
| "rewards/margins": 0.01032261736690998, | |
| "rewards/rejected": -0.005463062319904566, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.5994236311239192, | |
| "grad_norm": 3.698626756668091, | |
| "learning_rate": 2.647687037460996e-08, | |
| "logits/chosen": -1.4842262268066406, | |
| "logits/rejected": -1.4768081903457642, | |
| "logps/chosen": -52.859100341796875, | |
| "logps/rejected": -58.45001220703125, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.006431101355701685, | |
| "rewards/margins": 0.014263585209846497, | |
| "rewards/rejected": -0.007832483388483524, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.6066282420749278, | |
| "grad_norm": 3.2058353424072266, | |
| "learning_rate": 2.626757978793187e-08, | |
| "logits/chosen": -1.506158471107483, | |
| "logits/rejected": -1.4989348649978638, | |
| "logps/chosen": -48.894813537597656, | |
| "logps/rejected": -52.491554260253906, | |
| "loss": 0.6889, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.0004457598552107811, | |
| "rewards/margins": 0.008815920911729336, | |
| "rewards/rejected": -0.009261680766940117, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.6138328530259365, | |
| "grad_norm": 2.961463212966919, | |
| "learning_rate": 2.6058200095628797e-08, | |
| "logits/chosen": -1.5059995651245117, | |
| "logits/rejected": -1.5062806606292725, | |
| "logps/chosen": -40.8658447265625, | |
| "logps/rejected": -46.769744873046875, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0045011648908257484, | |
| "rewards/margins": 0.01694817841053009, | |
| "rewards/rejected": -0.012447012588381767, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.6210374639769451, | |
| "grad_norm": 3.2749526500701904, | |
| "learning_rate": 2.584874601622854e-08, | |
| "logits/chosen": -1.566329836845398, | |
| "logits/rejected": -1.5490646362304688, | |
| "logps/chosen": -49.39610290527344, | |
| "logps/rejected": -53.27736282348633, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.002292500576004386, | |
| "rewards/margins": 0.009129652753472328, | |
| "rewards/rejected": -0.006837151013314724, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.6282420749279538, | |
| "grad_norm": 3.0580456256866455, | |
| "learning_rate": 2.5639232273487993e-08, | |
| "logits/chosen": -1.4602752923965454, | |
| "logits/rejected": -1.440234899520874, | |
| "logps/chosen": -44.33481216430664, | |
| "logps/rejected": -47.66165542602539, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.004179822281002998, | |
| "rewards/margins": 0.011104853823781013, | |
| "rewards/rejected": -0.00692503247410059, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.6354466858789625, | |
| "grad_norm": 3.618812322616577, | |
| "learning_rate": 2.5429673595358142e-08, | |
| "logits/chosen": -1.5238935947418213, | |
| "logits/rejected": -1.509108304977417, | |
| "logps/chosen": -45.8094596862793, | |
| "logps/rejected": -48.57301330566406, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0015709620201960206, | |
| "rewards/margins": 0.010965234600007534, | |
| "rewards/rejected": -0.009394274093210697, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.6426512968299711, | |
| "grad_norm": 3.3065967559814453, | |
| "learning_rate": 2.5220084712948764e-08, | |
| "logits/chosen": -1.4578711986541748, | |
| "logits/rejected": -1.4458692073822021, | |
| "logps/chosen": -52.10942459106445, | |
| "logps/rejected": -55.22698211669922, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.002750293118879199, | |
| "rewards/margins": 0.006976880133152008, | |
| "rewards/rejected": -0.004226587247103453, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.6498559077809798, | |
| "grad_norm": 3.7602572441101074, | |
| "learning_rate": 2.5010480359492838e-08, | |
| "logits/chosen": -1.4637352228164673, | |
| "logits/rejected": -1.4515202045440674, | |
| "logps/chosen": -49.456268310546875, | |
| "logps/rejected": -49.46593475341797, | |
| "loss": 0.6859, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.0037472727708518505, | |
| "rewards/margins": 0.014802709221839905, | |
| "rewards/rejected": -0.011055436916649342, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.6570605187319885, | |
| "grad_norm": 2.980210542678833, | |
| "learning_rate": 2.480087526931091e-08, | |
| "logits/chosen": -1.5034412145614624, | |
| "logits/rejected": -1.4840527772903442, | |
| "logps/chosen": -43.39094161987305, | |
| "logps/rejected": -45.42670822143555, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.00358552485704422, | |
| "rewards/margins": 0.014299413189291954, | |
| "rewards/rejected": -0.010713890194892883, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.6642651296829971, | |
| "grad_norm": 3.3928635120391846, | |
| "learning_rate": 2.4591284176775326e-08, | |
| "logits/chosen": -1.4473450183868408, | |
| "logits/rejected": -1.4353264570236206, | |
| "logps/chosen": -55.103553771972656, | |
| "logps/rejected": -56.5111198425293, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.003477326361462474, | |
| "rewards/margins": 0.008474309928715229, | |
| "rewards/rejected": -0.0049969833344221115, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.6714697406340058, | |
| "grad_norm": 2.8873450756073, | |
| "learning_rate": 2.4381721815274443e-08, | |
| "logits/chosen": -1.520268201828003, | |
| "logits/rejected": -1.5136438608169556, | |
| "logps/chosen": -43.23371887207031, | |
| "logps/rejected": -46.416725158691406, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.0015050893416628242, | |
| "rewards/margins": 0.01274427305907011, | |
| "rewards/rejected": -0.011239184066653252, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.6786743515850144, | |
| "grad_norm": 3.149796724319458, | |
| "learning_rate": 2.4172202916176936e-08, | |
| "logits/chosen": -1.5638529062271118, | |
| "logits/rejected": -1.555055856704712, | |
| "logps/chosen": -43.024620056152344, | |
| "logps/rejected": -47.86211013793945, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.0007431974518112838, | |
| "rewards/margins": 0.014958178624510765, | |
| "rewards/rejected": -0.014214983209967613, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.685878962536023, | |
| "grad_norm": 3.653278112411499, | |
| "learning_rate": 2.3962742207796268e-08, | |
| "logits/chosen": -1.4480760097503662, | |
| "logits/rejected": -1.4380666017532349, | |
| "logps/chosen": -41.63762283325195, | |
| "logps/rejected": -45.63017654418945, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 0.00497156148776412, | |
| "rewards/margins": 0.016974590718746185, | |
| "rewards/rejected": -0.012003025971353054, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.6930835734870318, | |
| "grad_norm": 3.627781391143799, | |
| "learning_rate": 2.3753354414355334e-08, | |
| "logits/chosen": -1.4231648445129395, | |
| "logits/rejected": -1.4004006385803223, | |
| "logps/chosen": -53.55913543701172, | |
| "logps/rejected": -55.218833923339844, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.00025733548682183027, | |
| "rewards/margins": 0.012184584513306618, | |
| "rewards/rejected": -0.011927250772714615, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.7002881844380404, | |
| "grad_norm": 3.3585216999053955, | |
| "learning_rate": 2.3544054254951408e-08, | |
| "logits/chosen": -1.464521050453186, | |
| "logits/rejected": -1.4445630311965942, | |
| "logps/chosen": -42.864479064941406, | |
| "logps/rejected": -48.42354202270508, | |
| "loss": 0.6842, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 0.004192016087472439, | |
| "rewards/margins": 0.018358776345849037, | |
| "rewards/rejected": -0.014166759327054024, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.707492795389049, | |
| "grad_norm": 3.362706184387207, | |
| "learning_rate": 2.3334856442521435e-08, | |
| "logits/chosen": -1.5583127737045288, | |
| "logits/rejected": -1.539954662322998, | |
| "logps/chosen": -51.2715950012207, | |
| "logps/rejected": -51.411582946777344, | |
| "loss": 0.688, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.0023382273502647877, | |
| "rewards/margins": 0.010567070916295052, | |
| "rewards/rejected": -0.008228843100368977, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.7146974063400577, | |
| "grad_norm": 3.5896084308624268, | |
| "learning_rate": 2.3125775682807826e-08, | |
| "logits/chosen": -1.5538270473480225, | |
| "logits/rejected": -1.5523487329483032, | |
| "logps/chosen": -49.91779708862305, | |
| "logps/rejected": -53.7571907043457, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.002975709503516555, | |
| "rewards/margins": 0.014789762906730175, | |
| "rewards/rejected": -0.011814054101705551, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.7219020172910664, | |
| "grad_norm": 2.732419013977051, | |
| "learning_rate": 2.291682667332464e-08, | |
| "logits/chosen": -1.6124897003173828, | |
| "logits/rejected": -1.5989201068878174, | |
| "logps/chosen": -46.4678840637207, | |
| "logps/rejected": -49.60542678833008, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0006780127296224236, | |
| "rewards/margins": 0.009133217856287956, | |
| "rewards/rejected": -0.008455204777419567, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.729106628242075, | |
| "grad_norm": 2.9192984104156494, | |
| "learning_rate": 2.2708024102324454e-08, | |
| "logits/chosen": -1.5306718349456787, | |
| "logits/rejected": -1.5253630876541138, | |
| "logps/chosen": -46.716922760009766, | |
| "logps/rejected": -51.75889205932617, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0032539735548198223, | |
| "rewards/margins": 0.016655398532748222, | |
| "rewards/rejected": -0.013401424512267113, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.7363112391930837, | |
| "grad_norm": 3.7035152912139893, | |
| "learning_rate": 2.2499382647765797e-08, | |
| "logits/chosen": -1.4964570999145508, | |
| "logits/rejected": -1.4977965354919434, | |
| "logps/chosen": -48.43117141723633, | |
| "logps/rejected": -52.01741409301758, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.00011159153655171394, | |
| "rewards/margins": 0.011605454608798027, | |
| "rewards/rejected": -0.011717047542333603, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.7435158501440924, | |
| "grad_norm": 2.8924202919006348, | |
| "learning_rate": 2.2290916976281427e-08, | |
| "logits/chosen": -1.4779541492462158, | |
| "logits/rejected": -1.4635918140411377, | |
| "logps/chosen": -43.7186279296875, | |
| "logps/rejected": -46.1024055480957, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.00032794266007840633, | |
| "rewards/margins": 0.014410694129765034, | |
| "rewards/rejected": -0.014738637022674084, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.7507204610951008, | |
| "grad_norm": 3.557410478591919, | |
| "learning_rate": 2.2082641742147238e-08, | |
| "logits/chosen": -1.471459150314331, | |
| "logits/rejected": -1.46261727809906, | |
| "logps/chosen": -45.6954231262207, | |
| "logps/rejected": -51.618690490722656, | |
| "loss": 0.6865, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0008009333396330476, | |
| "rewards/margins": 0.013774615712463856, | |
| "rewards/rejected": -0.012973681092262268, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.7579250720461095, | |
| "grad_norm": 3.1066579818725586, | |
| "learning_rate": 2.1874571586252177e-08, | |
| "logits/chosen": -1.5453145503997803, | |
| "logits/rejected": -1.5335826873779297, | |
| "logps/chosen": -45.59928512573242, | |
| "logps/rejected": -48.342613220214844, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0005938579561188817, | |
| "rewards/margins": 0.010966666042804718, | |
| "rewards/rejected": -0.010372808203101158, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.7651296829971181, | |
| "grad_norm": 2.4217562675476074, | |
| "learning_rate": 2.1666721135069037e-08, | |
| "logits/chosen": -1.5157561302185059, | |
| "logits/rejected": -1.5022157430648804, | |
| "logps/chosen": -49.85866165161133, | |
| "logps/rejected": -51.332054138183594, | |
| "loss": 0.6874, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0015822596615180373, | |
| "rewards/margins": 0.011859697289764881, | |
| "rewards/rejected": -0.010277437046170235, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.7723342939481268, | |
| "grad_norm": 2.6538779735565186, | |
| "learning_rate": 2.145910499962628e-08, | |
| "logits/chosen": -1.57613205909729, | |
| "logits/rejected": -1.5550073385238647, | |
| "logps/chosen": -44.032875061035156, | |
| "logps/rejected": -46.20510482788086, | |
| "loss": 0.6846, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.003437323495745659, | |
| "rewards/margins": 0.017632577568292618, | |
| "rewards/rejected": -0.014195254072546959, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.7795389048991355, | |
| "grad_norm": 3.9013473987579346, | |
| "learning_rate": 2.1251737774480915e-08, | |
| "logits/chosen": -1.548778772354126, | |
| "logits/rejected": -1.5395104885101318, | |
| "logps/chosen": -53.24138259887695, | |
| "logps/rejected": -55.38903045654297, | |
| "loss": 0.6871, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0015886023174971342, | |
| "rewards/margins": 0.01247491780668497, | |
| "rewards/rejected": -0.010886315256357193, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.7867435158501441, | |
| "grad_norm": 2.5475399494171143, | |
| "learning_rate": 2.104463403669264e-08, | |
| "logits/chosen": -1.4768997430801392, | |
| "logits/rejected": -1.456456184387207, | |
| "logps/chosen": -49.00716018676758, | |
| "logps/rejected": -51.28715133666992, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.0008974798256531358, | |
| "rewards/margins": 0.014535580761730671, | |
| "rewards/rejected": -0.013638099655508995, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.7939481268011528, | |
| "grad_norm": 2.6631736755371094, | |
| "learning_rate": 2.0837808344799028e-08, | |
| "logits/chosen": -1.4518641233444214, | |
| "logits/rejected": -1.4358189105987549, | |
| "logps/chosen": -43.846717834472656, | |
| "logps/rejected": -47.554298400878906, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.006967812776565552, | |
| "rewards/margins": 0.019296620041131973, | |
| "rewards/rejected": -0.012328808195888996, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.8011527377521612, | |
| "grad_norm": 3.2146389484405518, | |
| "learning_rate": 2.063127523779219e-08, | |
| "logits/chosen": -1.4298336505889893, | |
| "logits/rejected": -1.4289504289627075, | |
| "logps/chosen": -44.85996627807617, | |
| "logps/rejected": -51.302490234375, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 0.0029459139332175255, | |
| "rewards/margins": 0.019514020532369614, | |
| "rewards/rejected": -0.016568105667829514, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.8083573487031699, | |
| "grad_norm": 3.835034132003784, | |
| "learning_rate": 2.0425049234096737e-08, | |
| "logits/chosen": -1.4856529235839844, | |
| "logits/rejected": -1.4708282947540283, | |
| "logps/chosen": -49.11738967895508, | |
| "logps/rejected": -51.82581329345703, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.00014816033944953233, | |
| "rewards/margins": 0.014810358174145222, | |
| "rewards/rejected": -0.014662196859717369, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.8155619596541785, | |
| "grad_norm": 2.643429756164551, | |
| "learning_rate": 2.0219144830549163e-08, | |
| "logits/chosen": -1.4603726863861084, | |
| "logits/rejected": -1.4508287906646729, | |
| "logps/chosen": -49.00832748413086, | |
| "logps/rejected": -52.694000244140625, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0007474374724552035, | |
| "rewards/margins": 0.017317909747362137, | |
| "rewards/rejected": -0.016570471227169037, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.8227665706051872, | |
| "grad_norm": 2.901050329208374, | |
| "learning_rate": 2.0013576501378823e-08, | |
| "logits/chosen": -1.4358056783676147, | |
| "logits/rejected": -1.4261062145233154, | |
| "logps/chosen": -44.63908767700195, | |
| "logps/rejected": -48.853946685791016, | |
| "loss": 0.6816, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.007906489074230194, | |
| "rewards/margins": 0.0237591415643692, | |
| "rewards/rejected": -0.015852652490139008, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.8299711815561959, | |
| "grad_norm": 3.5685510635375977, | |
| "learning_rate": 1.9808358697190426e-08, | |
| "logits/chosen": -1.4616773128509521, | |
| "logits/rejected": -1.4614180326461792, | |
| "logps/chosen": -40.03239059448242, | |
| "logps/rejected": -45.37065124511719, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.000598257698584348, | |
| "rewards/margins": 0.017768951132893562, | |
| "rewards/rejected": -0.018367204815149307, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.8371757925072045, | |
| "grad_norm": 3.0533125400543213, | |
| "learning_rate": 1.9603505843948214e-08, | |
| "logits/chosen": -1.4894813299179077, | |
| "logits/rejected": -1.4689750671386719, | |
| "logps/chosen": -41.06487274169922, | |
| "logps/rejected": -46.349754333496094, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.0008318226900883019, | |
| "rewards/margins": 0.013914955779910088, | |
| "rewards/rejected": -0.013083134777843952, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.8443804034582132, | |
| "grad_norm": 3.040861129760742, | |
| "learning_rate": 1.9399032341961886e-08, | |
| "logits/chosen": -1.4607442617416382, | |
| "logits/rejected": -1.440852165222168, | |
| "logps/chosen": -44.09001922607422, | |
| "logps/rejected": -45.968475341796875, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.003010817337781191, | |
| "rewards/margins": 0.013135477900505066, | |
| "rewards/rejected": -0.010124661028385162, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.8515850144092219, | |
| "grad_norm": 3.694607734680176, | |
| "learning_rate": 1.9194952564874323e-08, | |
| "logits/chosen": -1.4902753829956055, | |
| "logits/rejected": -1.4777010679244995, | |
| "logps/chosen": -49.39779281616211, | |
| "logps/rejected": -52.769874572753906, | |
| "loss": 0.6858, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0012734916526824236, | |
| "rewards/margins": 0.015179460868239403, | |
| "rewards/rejected": -0.013905969448387623, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.8587896253602305, | |
| "grad_norm": 3.0213842391967773, | |
| "learning_rate": 1.8991280858651157e-08, | |
| "logits/chosen": -1.4653866291046143, | |
| "logits/rejected": -1.444471836090088, | |
| "logps/chosen": -48.05756378173828, | |
| "logps/rejected": -49.7245979309082, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.0008609207579866052, | |
| "rewards/margins": 0.014405569061636925, | |
| "rewards/rejected": -0.013544648885726929, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.8659942363112392, | |
| "grad_norm": 3.783416986465454, | |
| "learning_rate": 1.8788031540572327e-08, | |
| "logits/chosen": -1.432162880897522, | |
| "logits/rejected": -1.4183642864227295, | |
| "logps/chosen": -43.341087341308594, | |
| "logps/rejected": -47.24171447753906, | |
| "loss": 0.6846, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.0013560210354626179, | |
| "rewards/margins": 0.01773322932422161, | |
| "rewards/rejected": -0.01637720875442028, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.8731988472622478, | |
| "grad_norm": 3.501404285430908, | |
| "learning_rate": 1.858521889822565e-08, | |
| "logits/chosen": -1.4806432723999023, | |
| "logits/rejected": -1.47100031375885, | |
| "logps/chosen": -44.776634216308594, | |
| "logps/rejected": -47.38679504394531, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.00211677816696465, | |
| "rewards/margins": 0.012272249907255173, | |
| "rewards/rejected": -0.010155471973121166, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.8804034582132565, | |
| "grad_norm": 3.0854015350341797, | |
| "learning_rate": 1.8382857188502422e-08, | |
| "logits/chosen": -1.4793776273727417, | |
| "logits/rejected": -1.4643954038619995, | |
| "logps/chosen": -43.39844512939453, | |
| "logps/rejected": -46.25333023071289, | |
| "loss": 0.6856, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.0010049303527921438, | |
| "rewards/margins": 0.015818919986486435, | |
| "rewards/rejected": -0.014813992194831371, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.8876080691642652, | |
| "grad_norm": 3.0658137798309326, | |
| "learning_rate": 1.8180960636595234e-08, | |
| "logits/chosen": -1.4343183040618896, | |
| "logits/rejected": -1.4235293865203857, | |
| "logps/chosen": -45.46432876586914, | |
| "logps/rejected": -48.85834503173828, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.0007860889891162515, | |
| "rewards/margins": 0.01868055760860443, | |
| "rewards/rejected": -0.01789446920156479, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.8948126801152738, | |
| "grad_norm": 2.6446595191955566, | |
| "learning_rate": 1.7979543434998015e-08, | |
| "logits/chosen": -1.51717209815979, | |
| "logits/rejected": -1.5126534700393677, | |
| "logps/chosen": -54.11791229248047, | |
| "logps/rejected": -55.764686584472656, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.002511689905077219, | |
| "rewards/margins": 0.009304236620664597, | |
| "rewards/rejected": -0.011815925128757954, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.9020172910662825, | |
| "grad_norm": 3.2104849815368652, | |
| "learning_rate": 1.7778619742508345e-08, | |
| "logits/chosen": -1.4984880685806274, | |
| "logits/rejected": -1.4783477783203125, | |
| "logps/chosen": -48.82425308227539, | |
| "logps/rejected": -50.621795654296875, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0017830973956733942, | |
| "rewards/margins": 0.014747503213584423, | |
| "rewards/rejected": -0.0165305994451046, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.9092219020172911, | |
| "grad_norm": 5.295479774475098, | |
| "learning_rate": 1.757820368323213e-08, | |
| "logits/chosen": -1.4480249881744385, | |
| "logits/rejected": -1.4318348169326782, | |
| "logps/chosen": -55.6407585144043, | |
| "logps/rejected": -60.88798141479492, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0003665873664431274, | |
| "rewards/margins": 0.01638123393058777, | |
| "rewards/rejected": -0.016747821122407913, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.9164265129682998, | |
| "grad_norm": 2.7129533290863037, | |
| "learning_rate": 1.7378309345590803e-08, | |
| "logits/chosen": -1.5183932781219482, | |
| "logits/rejected": -1.5215718746185303, | |
| "logps/chosen": -48.15589141845703, | |
| "logps/rejected": -51.79344940185547, | |
| "loss": 0.6858, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.00031967152608558536, | |
| "rewards/margins": 0.015088710002601147, | |
| "rewards/rejected": -0.014769040048122406, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.9236311239193085, | |
| "grad_norm": 3.052567481994629, | |
| "learning_rate": 1.717895078133088e-08, | |
| "logits/chosen": -1.5369809865951538, | |
| "logits/rejected": -1.5272055864334106, | |
| "logps/chosen": -45.72820281982422, | |
| "logps/rejected": -50.92518615722656, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 0.001268348889425397, | |
| "rewards/margins": 0.018100250512361526, | |
| "rewards/rejected": -0.01683189906179905, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.9308357348703171, | |
| "grad_norm": 2.9650213718414307, | |
| "learning_rate": 1.698014200453624e-08, | |
| "logits/chosen": -1.512068510055542, | |
| "logits/rejected": -1.5153911113739014, | |
| "logps/chosen": -48.55554962158203, | |
| "logps/rejected": -53.2547492980957, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 4.29423516834504e-06, | |
| "rewards/margins": 0.008227944374084473, | |
| "rewards/rejected": -0.00822365004569292, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.9380403458213258, | |
| "grad_norm": 3.127119541168213, | |
| "learning_rate": 1.6781896990642964e-08, | |
| "logits/chosen": -1.416456937789917, | |
| "logits/rejected": -1.406944751739502, | |
| "logps/chosen": -53.6823844909668, | |
| "logps/rejected": -55.647216796875, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": 0.0006507608923129737, | |
| "rewards/margins": 0.01228669099509716, | |
| "rewards/rejected": -0.011635931208729744, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.9452449567723344, | |
| "grad_norm": 3.6985666751861572, | |
| "learning_rate": 1.658422967545693e-08, | |
| "logits/chosen": -1.544832706451416, | |
| "logits/rejected": -1.523085117340088, | |
| "logps/chosen": -46.62820816040039, | |
| "logps/rejected": -48.89812469482422, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.002641532802954316, | |
| "rewards/margins": 0.01398603618144989, | |
| "rewards/rejected": -0.016627568751573563, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.952449567723343, | |
| "grad_norm": 3.3566489219665527, | |
| "learning_rate": 1.638715395417418e-08, | |
| "logits/chosen": -1.5159873962402344, | |
| "logits/rejected": -1.4998763799667358, | |
| "logps/chosen": -47.75004959106445, | |
| "logps/rejected": -50.23518753051758, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.0016118374187499285, | |
| "rewards/margins": 0.01232110895216465, | |
| "rewards/rejected": -0.013932946138083935, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.9596541786743515, | |
| "grad_norm": 3.4124369621276855, | |
| "learning_rate": 1.619068368040416e-08, | |
| "logits/chosen": -1.5037791728973389, | |
| "logits/rejected": -1.4937578439712524, | |
| "logps/chosen": -42.351898193359375, | |
| "logps/rejected": -47.97382354736328, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0007811610703356564, | |
| "rewards/margins": 0.017164845019578934, | |
| "rewards/rejected": -0.016383685171604156, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.9668587896253602, | |
| "grad_norm": 3.3016412258148193, | |
| "learning_rate": 1.5994832665195853e-08, | |
| "logits/chosen": -1.4340431690216064, | |
| "logits/rejected": -1.4277336597442627, | |
| "logps/chosen": -46.43476486206055, | |
| "logps/rejected": -48.81419372558594, | |
| "loss": 0.6874, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.00010869167454075068, | |
| "rewards/margins": 0.011941631324589252, | |
| "rewards/rejected": -0.011832939460873604, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.9740634005763689, | |
| "grad_norm": 3.3040690422058105, | |
| "learning_rate": 1.5799614676066906e-08, | |
| "logits/chosen": -1.561361312866211, | |
| "logits/rejected": -1.5565513372421265, | |
| "logps/chosen": -42.6262092590332, | |
| "logps/rejected": -47.142024993896484, | |
| "loss": 0.6851, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.0014737072633579373, | |
| "rewards/margins": 0.01672922447323799, | |
| "rewards/rejected": -0.018202928826212883, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.9812680115273775, | |
| "grad_norm": 2.7992374897003174, | |
| "learning_rate": 1.560504343603587e-08, | |
| "logits/chosen": -1.4569079875946045, | |
| "logits/rejected": -1.4597570896148682, | |
| "logps/chosen": -47.62577438354492, | |
| "logps/rejected": -53.16133499145508, | |
| "loss": 0.6863, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0012593867722898722, | |
| "rewards/margins": 0.014034323394298553, | |
| "rewards/rejected": -0.012774934992194176, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.9884726224783862, | |
| "grad_norm": 2.700779438018799, | |
| "learning_rate": 1.541113262265748e-08, | |
| "logits/chosen": -1.558211088180542, | |
| "logits/rejected": -1.553525686264038, | |
| "logps/chosen": -47.86023712158203, | |
| "logps/rejected": -52.087554931640625, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.00030795374186709523, | |
| "rewards/margins": 0.01597772166132927, | |
| "rewards/rejected": -0.015669768676161766, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.9956772334293948, | |
| "grad_norm": 2.8260514736175537, | |
| "learning_rate": 1.5217895867061227e-08, | |
| "logits/chosen": -1.4797998666763306, | |
| "logits/rejected": -1.4682440757751465, | |
| "logps/chosen": -49.11394119262695, | |
| "logps/rejected": -51.766929626464844, | |
| "loss": 0.6856, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0009157865424640477, | |
| "rewards/margins": 0.015710871666669846, | |
| "rewards/rejected": -0.016626659780740738, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.0028818443804033, | |
| "grad_norm": 3.2303998470306396, | |
| "learning_rate": 1.5025346752993098e-08, | |
| "logits/chosen": -1.4731628894805908, | |
| "logits/rejected": -1.4776121377944946, | |
| "logps/chosen": -47.23828887939453, | |
| "logps/rejected": -51.46478271484375, | |
| "loss": 0.6885, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.003107684198766947, | |
| "rewards/margins": 0.009776955470442772, | |
| "rewards/rejected": -0.01288464106619358, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.010086455331412, | |
| "grad_norm": 3.200279951095581, | |
| "learning_rate": 1.4833498815860756e-08, | |
| "logits/chosen": -1.603215217590332, | |
| "logits/rejected": -1.5945051908493042, | |
| "logps/chosen": -44.726016998291016, | |
| "logps/rejected": -49.45168685913086, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0007763226167298853, | |
| "rewards/margins": 0.020336374640464783, | |
| "rewards/rejected": -0.01956005021929741, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.0172910662824206, | |
| "grad_norm": 3.470811128616333, | |
| "learning_rate": 1.4642365541781993e-08, | |
| "logits/chosen": -1.4187241792678833, | |
| "logits/rejected": -1.402005910873413, | |
| "logps/chosen": -46.40681838989258, | |
| "logps/rejected": -51.32469940185547, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0019146741833537817, | |
| "rewards/margins": 0.017073018476366997, | |
| "rewards/rejected": -0.018987691029906273, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.0244956772334293, | |
| "grad_norm": 3.550839424133301, | |
| "learning_rate": 1.4451960366636745e-08, | |
| "logits/chosen": -1.5050057172775269, | |
| "logits/rejected": -1.5091335773468018, | |
| "logps/chosen": -50.25291061401367, | |
| "logps/rejected": -54.865325927734375, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -1.4477363038167823e-05, | |
| "rewards/margins": 0.014642780646681786, | |
| "rewards/rejected": -0.014657258987426758, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.031700288184438, | |
| "grad_norm": 2.99293851852417, | |
| "learning_rate": 1.4262296675122592e-08, | |
| "logits/chosen": -1.505652666091919, | |
| "logits/rejected": -1.490431547164917, | |
| "logps/chosen": -43.927162170410156, | |
| "logps/rejected": -48.62024688720703, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0005578006966970861, | |
| "rewards/margins": 0.016974329948425293, | |
| "rewards/rejected": -0.01753213070333004, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.0389048991354466, | |
| "grad_norm": 3.470144748687744, | |
| "learning_rate": 1.407338779981389e-08, | |
| "logits/chosen": -1.473926305770874, | |
| "logits/rejected": -1.4623931646347046, | |
| "logps/chosen": -41.433082580566406, | |
| "logps/rejected": -46.476688385009766, | |
| "loss": 0.6829, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.0010272187646478415, | |
| "rewards/margins": 0.021077865734696388, | |
| "rewards/rejected": -0.022105086594820023, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.0461095100864553, | |
| "grad_norm": 3.15632963180542, | |
| "learning_rate": 1.3885247020224534e-08, | |
| "logits/chosen": -1.4739320278167725, | |
| "logits/rejected": -1.4632718563079834, | |
| "logps/chosen": -40.990055084228516, | |
| "logps/rejected": -44.34033203125, | |
| "loss": 0.6834, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.000578380364459008, | |
| "rewards/margins": 0.020333198830485344, | |
| "rewards/rejected": -0.019754819571971893, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.053314121037464, | |
| "grad_norm": 2.7791974544525146, | |
| "learning_rate": 1.369788756187445e-08, | |
| "logits/chosen": -1.5228140354156494, | |
| "logits/rejected": -1.511235237121582, | |
| "logps/chosen": -46.87430953979492, | |
| "logps/rejected": -48.13962936401367, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.0030249585397541523, | |
| "rewards/margins": 0.011208303272724152, | |
| "rewards/rejected": -0.014233263209462166, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.0605187319884726, | |
| "grad_norm": 3.1895058155059814, | |
| "learning_rate": 1.3511322595359925e-08, | |
| "logits/chosen": -1.5314232110977173, | |
| "logits/rejected": -1.5200746059417725, | |
| "logps/chosen": -43.31315994262695, | |
| "logps/rejected": -49.05580139160156, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.0011509342584758997, | |
| "rewards/margins": 0.019369563087821007, | |
| "rewards/rejected": -0.020520497113466263, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.0677233429394812, | |
| "grad_norm": 3.250976324081421, | |
| "learning_rate": 1.3325565235427716e-08, | |
| "logits/chosen": -1.5527724027633667, | |
| "logits/rejected": -1.544170618057251, | |
| "logps/chosen": -45.31568145751953, | |
| "logps/rejected": -49.30888366699219, | |
| "loss": 0.6846, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.001304257777519524, | |
| "rewards/margins": 0.017727408558130264, | |
| "rewards/rejected": -0.019031664356589317, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.07492795389049, | |
| "grad_norm": 3.654803991317749, | |
| "learning_rate": 1.3140628540053218e-08, | |
| "logits/chosen": -1.4575417041778564, | |
| "logits/rejected": -1.4557491540908813, | |
| "logps/chosen": -45.90990447998047, | |
| "logps/rejected": -49.446495056152344, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.002215947024524212, | |
| "rewards/margins": 0.015816405415534973, | |
| "rewards/rejected": -0.013600456528365612, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.0821325648414986, | |
| "grad_norm": 3.999239921569824, | |
| "learning_rate": 1.2956525509522451e-08, | |
| "logits/chosen": -1.4346693754196167, | |
| "logits/rejected": -1.4396179914474487, | |
| "logps/chosen": -47.850128173828125, | |
| "logps/rejected": -51.41837692260742, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0015433436492457986, | |
| "rewards/margins": 0.012201479636132717, | |
| "rewards/rejected": -0.010658138431608677, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.089337175792507, | |
| "grad_norm": 3.8457813262939453, | |
| "learning_rate": 1.2773269085518267e-08, | |
| "logits/chosen": -1.5163942575454712, | |
| "logits/rejected": -1.5108063220977783, | |
| "logps/chosen": -52.5305061340332, | |
| "logps/rejected": -56.18115234375, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.00015631233691237867, | |
| "rewards/margins": 0.01401402335613966, | |
| "rewards/rejected": -0.013857712037861347, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.096541786743516, | |
| "grad_norm": 2.6759727001190186, | |
| "learning_rate": 1.2590872150210574e-08, | |
| "logits/chosen": -1.5920841693878174, | |
| "logits/rejected": -1.5757710933685303, | |
| "logps/chosen": -45.695072174072266, | |
| "logps/rejected": -47.82882308959961, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.004866642877459526, | |
| "rewards/margins": 0.019523626193404198, | |
| "rewards/rejected": -0.024390270933508873, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.1037463976945245, | |
| "grad_norm": 2.84696888923645, | |
| "learning_rate": 1.2409347525350775e-08, | |
| "logits/chosen": -1.4993377923965454, | |
| "logits/rejected": -1.480965256690979, | |
| "logps/chosen": -47.44709396362305, | |
| "logps/rejected": -51.3841667175293, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.0008200405864045024, | |
| "rewards/margins": 0.02079201303422451, | |
| "rewards/rejected": -0.019971970468759537, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.110951008645533, | |
| "grad_norm": 3.4382824897766113, | |
| "learning_rate": 1.2228707971370421e-08, | |
| "logits/chosen": -1.4966394901275635, | |
| "logits/rejected": -1.4779356718063354, | |
| "logps/chosen": -42.06577682495117, | |
| "logps/rejected": -44.59346008300781, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.0021643335931003094, | |
| "rewards/margins": 0.01934720203280449, | |
| "rewards/rejected": -0.017182866111397743, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.118155619596542, | |
| "grad_norm": 4.404411792755127, | |
| "learning_rate": 1.2048966186484282e-08, | |
| "logits/chosen": -1.5253870487213135, | |
| "logits/rejected": -1.495600700378418, | |
| "logps/chosen": -52.13329315185547, | |
| "logps/rejected": -55.06341552734375, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.001562885008752346, | |
| "rewards/margins": 0.014812910929322243, | |
| "rewards/rejected": -0.016375798732042313, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.1253602305475505, | |
| "grad_norm": 3.313408374786377, | |
| "learning_rate": 1.187013480579762e-08, | |
| "logits/chosen": -1.4894028902053833, | |
| "logits/rejected": -1.4830772876739502, | |
| "logps/chosen": -45.41261291503906, | |
| "logps/rejected": -49.40631103515625, | |
| "loss": 0.684, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.004708284046500921, | |
| "rewards/margins": 0.019039448350667953, | |
| "rewards/rejected": -0.023747732862830162, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.132564841498559, | |
| "grad_norm": 4.3451385498046875, | |
| "learning_rate": 1.1692226400418073e-08, | |
| "logits/chosen": -1.4124035835266113, | |
| "logits/rejected": -1.4043338298797607, | |
| "logps/chosen": -49.05751419067383, | |
| "logps/rejected": -52.05934524536133, | |
| "loss": 0.6855, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.004499994218349457, | |
| "rewards/margins": 0.01585240475833416, | |
| "rewards/rejected": -0.020352398976683617, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.139769452449568, | |
| "grad_norm": 2.498106002807617, | |
| "learning_rate": 1.1515253476571923e-08, | |
| "logits/chosen": -1.4480842351913452, | |
| "logits/rejected": -1.4422247409820557, | |
| "logps/chosen": -44.44374084472656, | |
| "logps/rejected": -51.034915924072266, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.003585060592740774, | |
| "rewards/margins": 0.018647151067852974, | |
| "rewards/rejected": -0.022232210263609886, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.1469740634005765, | |
| "grad_norm": 3.3477675914764404, | |
| "learning_rate": 1.133922847472496e-08, | |
| "logits/chosen": -1.4905388355255127, | |
| "logits/rejected": -1.4867520332336426, | |
| "logps/chosen": -52.541542053222656, | |
| "logps/rejected": -55.05914306640625, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0006752757471986115, | |
| "rewards/margins": 0.017081119120121002, | |
| "rewards/rejected": -0.017756396904587746, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.154178674351585, | |
| "grad_norm": 3.3309946060180664, | |
| "learning_rate": 1.1164163768707952e-08, | |
| "logits/chosen": -1.4653676748275757, | |
| "logits/rejected": -1.454390525817871, | |
| "logps/chosen": -47.43062973022461, | |
| "logps/rejected": -51.644432067871094, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.0007173820049501956, | |
| "rewards/margins": 0.0224156454205513, | |
| "rewards/rejected": -0.02313302643597126, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.161383285302594, | |
| "grad_norm": 3.354658365249634, | |
| "learning_rate": 1.0990071664846861e-08, | |
| "logits/chosen": -1.4403693675994873, | |
| "logits/rejected": -1.4301955699920654, | |
| "logps/chosen": -48.77064895629883, | |
| "logps/rejected": -53.99092483520508, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 9.120155300479382e-05, | |
| "rewards/margins": 0.023369425907731056, | |
| "rewards/rejected": -0.02327822335064411, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.1685878962536025, | |
| "grad_norm": 3.0528011322021484, | |
| "learning_rate": 1.0816964401097739e-08, | |
| "logits/chosen": -1.482627272605896, | |
| "logits/rejected": -1.4725825786590576, | |
| "logps/chosen": -43.032142639160156, | |
| "logps/rejected": -45.872344970703125, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0006441779551096261, | |
| "rewards/margins": 0.016231542453169823, | |
| "rewards/rejected": -0.016875719651579857, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.175792507204611, | |
| "grad_norm": 3.9394450187683105, | |
| "learning_rate": 1.0644854146186406e-08, | |
| "logits/chosen": -1.5147243738174438, | |
| "logits/rejected": -1.4970117807388306, | |
| "logps/chosen": -48.232513427734375, | |
| "logps/rejected": -52.97943878173828, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0024471329525113106, | |
| "rewards/margins": 0.021746691316366196, | |
| "rewards/rejected": -0.02419382520020008, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.18299711815562, | |
| "grad_norm": 3.223052978515625, | |
| "learning_rate": 1.0473752998753114e-08, | |
| "logits/chosen": -1.4945493936538696, | |
| "logits/rejected": -1.473004698753357, | |
| "logps/chosen": -48.55394744873047, | |
| "logps/rejected": -51.71314239501953, | |
| "loss": 0.6823, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0011378397466614842, | |
| "rewards/margins": 0.022417975589632988, | |
| "rewards/rejected": -0.021280135959386826, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.1902017291066285, | |
| "grad_norm": 2.9647247791290283, | |
| "learning_rate": 1.030367298650201e-08, | |
| "logits/chosen": -1.4931919574737549, | |
| "logits/rejected": -1.492661476135254, | |
| "logps/chosen": -48.77735900878906, | |
| "logps/rejected": -53.65592575073242, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.004096081480383873, | |
| "rewards/margins": 0.012563072144985199, | |
| "rewards/rejected": -0.016659153625369072, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.1974063400576367, | |
| "grad_norm": 3.8749027252197266, | |
| "learning_rate": 1.0134626065355675e-08, | |
| "logits/chosen": -1.5941665172576904, | |
| "logits/rejected": -1.5830225944519043, | |
| "logps/chosen": -49.36051559448242, | |
| "logps/rejected": -52.95866012573242, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.0015227645635604858, | |
| "rewards/margins": 0.022038374096155167, | |
| "rewards/rejected": -0.02051560953259468, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.2046109510086453, | |
| "grad_norm": 3.4919955730438232, | |
| "learning_rate": 9.966624118614611e-09, | |
| "logits/chosen": -1.4911123514175415, | |
| "logits/rejected": -1.4718658924102783, | |
| "logps/chosen": -52.330223083496094, | |
| "logps/rejected": -55.529273986816406, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.0019868058152496815, | |
| "rewards/margins": 0.01953335851430893, | |
| "rewards/rejected": -0.017546551302075386, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.211815561959654, | |
| "grad_norm": 2.4199657440185547, | |
| "learning_rate": 9.799678956121976e-09, | |
| "logits/chosen": -1.4366905689239502, | |
| "logits/rejected": -1.420212984085083, | |
| "logps/chosen": -45.93531036376953, | |
| "logps/rejected": -48.54907989501953, | |
| "loss": 0.6871, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.0024884543381631374, | |
| "rewards/margins": 0.012519553303718567, | |
| "rewards/rejected": -0.015008007176220417, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.2190201729106627, | |
| "grad_norm": 3.557464361190796, | |
| "learning_rate": 9.633802313433314e-09, | |
| "logits/chosen": -1.4143495559692383, | |
| "logits/rejected": -1.410310983657837, | |
| "logps/chosen": -48.37750244140625, | |
| "logps/rejected": -50.99727249145508, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.0013545064721256495, | |
| "rewards/margins": 0.017067620530724525, | |
| "rewards/rejected": -0.01842212677001953, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.2262247838616713, | |
| "grad_norm": 2.7765674591064453, | |
| "learning_rate": 9.469005850991705e-09, | |
| "logits/chosen": -1.4845517873764038, | |
| "logits/rejected": -1.4717377424240112, | |
| "logps/chosen": -47.20240020751953, | |
| "logps/rejected": -48.706504821777344, | |
| "loss": 0.6842, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0018641784554347396, | |
| "rewards/margins": 0.018557867035269737, | |
| "rewards/rejected": -0.02042204514145851, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.23342939481268, | |
| "grad_norm": 3.18376088142395, | |
| "learning_rate": 9.305301153307949e-09, | |
| "logits/chosen": -1.496174693107605, | |
| "logits/rejected": -1.5003782510757446, | |
| "logps/chosen": -40.0098762512207, | |
| "logps/rejected": -44.06249237060547, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.005063945893198252, | |
| "rewards/margins": 0.020303303375840187, | |
| "rewards/rejected": -0.02536724880337715, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.2406340057636887, | |
| "grad_norm": 2.7682111263275146, | |
| "learning_rate": 9.142699728146336e-09, | |
| "logits/chosen": -1.43401300907135, | |
| "logits/rejected": -1.4253044128417969, | |
| "logps/chosen": -46.127769470214844, | |
| "logps/rejected": -51.10237503051758, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.002828083001077175, | |
| "rewards/margins": 0.018272753804922104, | |
| "rewards/rejected": -0.021100837737321854, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.2478386167146973, | |
| "grad_norm": 2.9937551021575928, | |
| "learning_rate": 8.981213005715627e-09, | |
| "logits/chosen": -1.501936435699463, | |
| "logits/rejected": -1.501483678817749, | |
| "logps/chosen": -44.192527770996094, | |
| "logps/rejected": -49.055450439453125, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0020972955971956253, | |
| "rewards/margins": 0.018690943717956543, | |
| "rewards/rejected": -0.020788241177797318, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.255043227665706, | |
| "grad_norm": 3.6883223056793213, | |
| "learning_rate": 8.820852337865611e-09, | |
| "logits/chosen": -1.5518832206726074, | |
| "logits/rejected": -1.5363496541976929, | |
| "logps/chosen": -45.0767822265625, | |
| "logps/rejected": -48.65938186645508, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.0013773315586149693, | |
| "rewards/margins": 0.018128078430891037, | |
| "rewards/rejected": -0.01950540952384472, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.2622478386167146, | |
| "grad_norm": 2.8200368881225586, | |
| "learning_rate": 8.661628997289044e-09, | |
| "logits/chosen": -1.4339885711669922, | |
| "logits/rejected": -1.4207003116607666, | |
| "logps/chosen": -45.365943908691406, | |
| "logps/rejected": -49.89191818237305, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.000960633740760386, | |
| "rewards/margins": 0.019434962421655655, | |
| "rewards/rejected": -0.020395595580339432, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.2694524495677233, | |
| "grad_norm": 2.8255815505981445, | |
| "learning_rate": 8.503554176729341e-09, | |
| "logits/chosen": -1.4203985929489136, | |
| "logits/rejected": -1.4152114391326904, | |
| "logps/chosen": -45.45329666137695, | |
| "logps/rejected": -49.060142517089844, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.000864202156662941, | |
| "rewards/margins": 0.020447982475161552, | |
| "rewards/rejected": -0.01958378031849861, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.276657060518732, | |
| "grad_norm": 3.818735122680664, | |
| "learning_rate": 8.346638988193636e-09, | |
| "logits/chosen": -1.4726885557174683, | |
| "logits/rejected": -1.4705435037612915, | |
| "logps/chosen": -40.58414840698242, | |
| "logps/rejected": -46.35978317260742, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.0010551244486123323, | |
| "rewards/margins": 0.019719591364264488, | |
| "rewards/rejected": -0.020774714648723602, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.2838616714697406, | |
| "grad_norm": 4.216102123260498, | |
| "learning_rate": 8.19089446217176e-09, | |
| "logits/chosen": -1.4285030364990234, | |
| "logits/rejected": -1.4068001508712769, | |
| "logps/chosen": -45.76138687133789, | |
| "logps/rejected": -51.217926025390625, | |
| "loss": 0.6797, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0022968396078795195, | |
| "rewards/margins": 0.027888495475053787, | |
| "rewards/rejected": -0.025591660290956497, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.2910662824207493, | |
| "grad_norm": 3.064260959625244, | |
| "learning_rate": 8.036331546860777e-09, | |
| "logits/chosen": -1.4557617902755737, | |
| "logits/rejected": -1.4532365798950195, | |
| "logps/chosen": -45.43151092529297, | |
| "logps/rejected": -48.28192901611328, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0039183879271149635, | |
| "rewards/margins": 0.011963925324380398, | |
| "rewards/rejected": -0.01588231325149536, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.298270893371758, | |
| "grad_norm": 3.668010950088501, | |
| "learning_rate": 7.882961107395416e-09, | |
| "logits/chosen": -1.4972290992736816, | |
| "logits/rejected": -1.4874447584152222, | |
| "logps/chosen": -52.34519577026367, | |
| "logps/rejected": -52.7003173828125, | |
| "loss": 0.6871, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.00619167648255825, | |
| "rewards/margins": 0.01287173479795456, | |
| "rewards/rejected": -0.01906341128051281, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.3054755043227666, | |
| "grad_norm": 4.626786231994629, | |
| "learning_rate": 7.73079392508428e-09, | |
| "logits/chosen": -1.4211690425872803, | |
| "logits/rejected": -1.4251266717910767, | |
| "logps/chosen": -49.76266098022461, | |
| "logps/rejected": -56.70771026611328, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.00390449957922101, | |
| "rewards/margins": 0.022429395467042923, | |
| "rewards/rejected": -0.026333892717957497, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.3126801152737753, | |
| "grad_norm": 3.64296555519104, | |
| "learning_rate": 7.579840696651938e-09, | |
| "logits/chosen": -1.5132083892822266, | |
| "logits/rejected": -1.5068855285644531, | |
| "logps/chosen": -42.3305778503418, | |
| "logps/rejected": -45.745948791503906, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.004392626229673624, | |
| "rewards/margins": 0.01889212615787983, | |
| "rewards/rejected": -0.023284751921892166, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.319884726224784, | |
| "grad_norm": 4.373074054718018, | |
| "learning_rate": 7.43011203348704e-09, | |
| "logits/chosen": -1.3568141460418701, | |
| "logits/rejected": -1.351928472518921, | |
| "logps/chosen": -53.11281204223633, | |
| "logps/rejected": -53.88922882080078, | |
| "loss": 0.6857, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.005558110773563385, | |
| "rewards/margins": 0.015617373399436474, | |
| "rewards/rejected": -0.021175485104322433, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.3270893371757926, | |
| "grad_norm": 3.2564187049865723, | |
| "learning_rate": 7.281618460896344e-09, | |
| "logits/chosen": -1.4833844900131226, | |
| "logits/rejected": -1.4731206893920898, | |
| "logps/chosen": -46.27775192260742, | |
| "logps/rejected": -50.7646598815918, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.0017924957210198045, | |
| "rewards/margins": 0.018207941204309464, | |
| "rewards/rejected": -0.020000439137220383, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.3342939481268012, | |
| "grad_norm": 2.9897639751434326, | |
| "learning_rate": 7.134370417364849e-09, | |
| "logits/chosen": -1.4312418699264526, | |
| "logits/rejected": -1.4230421781539917, | |
| "logps/chosen": -45.265655517578125, | |
| "logps/rejected": -48.087547302246094, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.007034836802631617, | |
| "rewards/margins": 0.01339829433709383, | |
| "rewards/rejected": -0.020433131605386734, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.34149855907781, | |
| "grad_norm": 4.031427383422852, | |
| "learning_rate": 6.988378253821981e-09, | |
| "logits/chosen": -1.4584436416625977, | |
| "logits/rejected": -1.4505494832992554, | |
| "logps/chosen": -51.43975067138672, | |
| "logps/rejected": -54.94978713989258, | |
| "loss": 0.6869, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.00025085260858759284, | |
| "rewards/margins": 0.013079972937703133, | |
| "rewards/rejected": -0.013330824673175812, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.3487031700288186, | |
| "grad_norm": 3.0655064582824707, | |
| "learning_rate": 6.8436522329140186e-09, | |
| "logits/chosen": -1.4397895336151123, | |
| "logits/rejected": -1.4459335803985596, | |
| "logps/chosen": -46.97881317138672, | |
| "logps/rejected": -50.81465530395508, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.0028320541605353355, | |
| "rewards/margins": 0.01652853563427925, | |
| "rewards/rejected": -0.01936059445142746, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.3559077809798272, | |
| "grad_norm": 3.536172389984131, | |
| "learning_rate": 6.700202528282603e-09, | |
| "logits/chosen": -1.41671621799469, | |
| "logits/rejected": -1.3971173763275146, | |
| "logps/chosen": -48.60463333129883, | |
| "logps/rejected": -51.607643127441406, | |
| "loss": 0.6834, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.006176457740366459, | |
| "rewards/margins": 0.020537305623292923, | |
| "rewards/rejected": -0.026713764294981956, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.363112391930836, | |
| "grad_norm": 3.716606378555298, | |
| "learning_rate": 6.558039223849668e-09, | |
| "logits/chosen": -1.5101362466812134, | |
| "logits/rejected": -1.4909931421279907, | |
| "logps/chosen": -46.293373107910156, | |
| "logps/rejected": -52.6843147277832, | |
| "loss": 0.6816, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.0020285609643906355, | |
| "rewards/margins": 0.02413899265229702, | |
| "rewards/rejected": -0.026167552918195724, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.3703170028818445, | |
| "grad_norm": 2.897578001022339, | |
| "learning_rate": 6.417172313108471e-09, | |
| "logits/chosen": -1.4238040447235107, | |
| "logits/rejected": -1.4123533964157104, | |
| "logps/chosen": -44.11698913574219, | |
| "logps/rejected": -47.348690032958984, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.007857661694288254, | |
| "rewards/margins": 0.01665417291224003, | |
| "rewards/rejected": -0.024511834606528282, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.377521613832853, | |
| "grad_norm": 2.985238552093506, | |
| "learning_rate": 6.277611698421179e-09, | |
| "logits/chosen": -1.5537796020507812, | |
| "logits/rejected": -1.5336295366287231, | |
| "logps/chosen": -39.11783218383789, | |
| "logps/rejected": -45.03361892700195, | |
| "loss": 0.6814, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.004091170616447926, | |
| "rewards/margins": 0.02452995628118515, | |
| "rewards/rejected": -0.0286211259663105, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.3847262247838614, | |
| "grad_norm": 4.756281852722168, | |
| "learning_rate": 6.139367190322714e-09, | |
| "logits/chosen": -1.4921059608459473, | |
| "logits/rejected": -1.4921514987945557, | |
| "logps/chosen": -52.750709533691406, | |
| "logps/rejected": -58.163116455078125, | |
| "loss": 0.6855, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.0038170956540852785, | |
| "rewards/margins": 0.01597406342625618, | |
| "rewards/rejected": -0.01979115977883339, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.39193083573487, | |
| "grad_norm": 2.5016090869903564, | |
| "learning_rate": 6.002448506831171e-09, | |
| "logits/chosen": -1.4790103435516357, | |
| "logits/rejected": -1.4744632244110107, | |
| "logps/chosen": -44.081382751464844, | |
| "logps/rejected": -49.28645706176758, | |
| "loss": 0.6842, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.003526459215208888, | |
| "rewards/margins": 0.01861676014959812, | |
| "rewards/rejected": -0.02214321866631508, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.3991354466858787, | |
| "grad_norm": 3.0572214126586914, | |
| "learning_rate": 5.866865272764607e-09, | |
| "logits/chosen": -1.4946694374084473, | |
| "logits/rejected": -1.4880764484405518, | |
| "logps/chosen": -46.4217643737793, | |
| "logps/rejected": -50.51008605957031, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0063838111236691475, | |
| "rewards/margins": 0.016102833673357964, | |
| "rewards/rejected": -0.022486645728349686, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.4063400576368874, | |
| "grad_norm": 4.673269271850586, | |
| "learning_rate": 5.7326270190645595e-09, | |
| "logits/chosen": -1.3282158374786377, | |
| "logits/rejected": -1.3230302333831787, | |
| "logps/chosen": -49.934425354003906, | |
| "logps/rejected": -52.02433395385742, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.005276781972497702, | |
| "rewards/margins": 0.018048197031021118, | |
| "rewards/rejected": -0.023324977606534958, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.413544668587896, | |
| "grad_norm": 3.670146942138672, | |
| "learning_rate": 5.599743182125938e-09, | |
| "logits/chosen": -1.5373293161392212, | |
| "logits/rejected": -1.5367281436920166, | |
| "logps/chosen": -48.803321838378906, | |
| "logps/rejected": -54.12480545043945, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.0014929801691323519, | |
| "rewards/margins": 0.01699111983180046, | |
| "rewards/rejected": -0.018484100699424744, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.4207492795389047, | |
| "grad_norm": 3.4955081939697266, | |
| "learning_rate": 5.46822310313379e-09, | |
| "logits/chosen": -1.560293436050415, | |
| "logits/rejected": -1.5650306940078735, | |
| "logps/chosen": -49.443687438964844, | |
| "logps/rejected": -52.810874938964844, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0048328666016459465, | |
| "rewards/margins": 0.012173362076282501, | |
| "rewards/rejected": -0.017006227746605873, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.4279538904899134, | |
| "grad_norm": 3.653897523880005, | |
| "learning_rate": 5.33807602740658e-09, | |
| "logits/chosen": -1.556183934211731, | |
| "logits/rejected": -1.5406051874160767, | |
| "logps/chosen": -41.885528564453125, | |
| "logps/rejected": -47.48136901855469, | |
| "loss": 0.6799, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.0013223844580352306, | |
| "rewards/margins": 0.027406567707657814, | |
| "rewards/rejected": -0.028728952631354332, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.435158501440922, | |
| "grad_norm": 3.8069379329681396, | |
| "learning_rate": 5.209311103746334e-09, | |
| "logits/chosen": -1.4751628637313843, | |
| "logits/rejected": -1.4709327220916748, | |
| "logps/chosen": -47.141212463378906, | |
| "logps/rejected": -52.47745895385742, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.003443703055381775, | |
| "rewards/margins": 0.019782431423664093, | |
| "rewards/rejected": -0.02322613261640072, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.4423631123919307, | |
| "grad_norm": 4.198550701141357, | |
| "learning_rate": 5.081937383795484e-09, | |
| "logits/chosen": -1.4638655185699463, | |
| "logits/rejected": -1.4535114765167236, | |
| "logps/chosen": -44.21641540527344, | |
| "logps/rejected": -48.895484924316406, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.001047824858687818, | |
| "rewards/margins": 0.022810544818639755, | |
| "rewards/rejected": -0.02385837212204933, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.4495677233429394, | |
| "grad_norm": 3.6953177452087402, | |
| "learning_rate": 4.955963821400599e-09, | |
| "logits/chosen": -1.5245097875595093, | |
| "logits/rejected": -1.5063731670379639, | |
| "logps/chosen": -46.904541015625, | |
| "logps/rejected": -49.754425048828125, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.00250697392039001, | |
| "rewards/margins": 0.021855643019080162, | |
| "rewards/rejected": -0.02436261810362339, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.456772334293948, | |
| "grad_norm": 2.7883760929107666, | |
| "learning_rate": 4.831399271982928e-09, | |
| "logits/chosen": -1.3962290287017822, | |
| "logits/rejected": -1.379988431930542, | |
| "logps/chosen": -49.720054626464844, | |
| "logps/rejected": -52.83693313598633, | |
| "loss": 0.683, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.0017061985563486814, | |
| "rewards/margins": 0.021188754588365555, | |
| "rewards/rejected": -0.022894952446222305, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.4639769452449567, | |
| "grad_norm": 3.981572389602661, | |
| "learning_rate": 4.708252491915951e-09, | |
| "logits/chosen": -1.4993345737457275, | |
| "logits/rejected": -1.4892202615737915, | |
| "logps/chosen": -47.0760383605957, | |
| "logps/rejected": -51.671592712402344, | |
| "loss": 0.6829, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.003932795021682978, | |
| "rewards/margins": 0.021549254655838013, | |
| "rewards/rejected": -0.02548205293715, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.4711815561959654, | |
| "grad_norm": 2.9065399169921875, | |
| "learning_rate": 4.58653213790981e-09, | |
| "logits/chosen": -1.4973710775375366, | |
| "logits/rejected": -1.4790763854980469, | |
| "logps/chosen": -47.4251823425293, | |
| "logps/rejected": -52.07381057739258, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0022236169315874577, | |
| "rewards/margins": 0.019855182617902756, | |
| "rewards/rejected": -0.022078800946474075, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.478386167146974, | |
| "grad_norm": 3.3968310356140137, | |
| "learning_rate": 4.466246766402773e-09, | |
| "logits/chosen": -1.4705581665039062, | |
| "logits/rejected": -1.4513893127441406, | |
| "logps/chosen": -48.730186462402344, | |
| "logps/rejected": -52.47394943237305, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.0018464624881744385, | |
| "rewards/margins": 0.023552386090159416, | |
| "rewards/rejected": -0.025398846715688705, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.4855907780979827, | |
| "grad_norm": 3.7176759243011475, | |
| "learning_rate": 4.347404832959775e-09, | |
| "logits/chosen": -1.5254770517349243, | |
| "logits/rejected": -1.5139881372451782, | |
| "logps/chosen": -44.64695739746094, | |
| "logps/rejected": -48.886756896972656, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.004621665924787521, | |
| "rewards/margins": 0.019287504255771637, | |
| "rewards/rejected": -0.02390917018055916, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.4927953890489913, | |
| "grad_norm": 3.4857494831085205, | |
| "learning_rate": 4.230014691678016e-09, | |
| "logits/chosen": -1.477423071861267, | |
| "logits/rejected": -1.478615403175354, | |
| "logps/chosen": -49.44475173950195, | |
| "logps/rejected": -51.15663528442383, | |
| "loss": 0.6865, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.006155318580567837, | |
| "rewards/margins": 0.013897466473281384, | |
| "rewards/rejected": -0.02005278691649437, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 3.1407816410064697, | |
| "learning_rate": 4.114084594599707e-09, | |
| "logits/chosen": -1.4632585048675537, | |
| "logits/rejected": -1.4401248693466187, | |
| "logps/chosen": -45.59687042236328, | |
| "logps/rejected": -51.5286979675293, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.00289691099897027, | |
| "rewards/margins": 0.022817853838205338, | |
| "rewards/rejected": -0.02571476623415947, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.5072046109510087, | |
| "grad_norm": 3.1534249782562256, | |
| "learning_rate": 3.9996226911319546e-09, | |
| "logits/chosen": -1.4795855283737183, | |
| "logits/rejected": -1.4571055173873901, | |
| "logps/chosen": -45.63375473022461, | |
| "logps/rejected": -48.75299072265625, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0032276164274662733, | |
| "rewards/margins": 0.019503096118569374, | |
| "rewards/rejected": -0.022730711847543716, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.5144092219020173, | |
| "grad_norm": 3.362689971923828, | |
| "learning_rate": 3.886637027473949e-09, | |
| "logits/chosen": -1.5117019414901733, | |
| "logits/rejected": -1.507697343826294, | |
| "logps/chosen": -47.55299758911133, | |
| "logps/rejected": -51.62897491455078, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.005091325379908085, | |
| "rewards/margins": 0.01942974142730236, | |
| "rewards/rejected": -0.02452106960117817, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.521613832853026, | |
| "grad_norm": 3.200064182281494, | |
| "learning_rate": 3.775135546051295e-09, | |
| "logits/chosen": -1.4044318199157715, | |
| "logits/rejected": -1.4048256874084473, | |
| "logps/chosen": -46.03704833984375, | |
| "logps/rejected": -50.541046142578125, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.004429998807609081, | |
| "rewards/margins": 0.02344963699579239, | |
| "rewards/rejected": -0.027879636734724045, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.5288184438040346, | |
| "grad_norm": 3.3993964195251465, | |
| "learning_rate": 3.665126084957723e-09, | |
| "logits/chosen": -1.4691989421844482, | |
| "logits/rejected": -1.4607309103012085, | |
| "logps/chosen": -50.858978271484375, | |
| "logps/rejected": -51.20890426635742, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.005501296371221542, | |
| "rewards/margins": 0.018329834565520287, | |
| "rewards/rejected": -0.023831134662032127, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.5360230547550433, | |
| "grad_norm": 3.145418167114258, | |
| "learning_rate": 3.556616377404101e-09, | |
| "logits/chosen": -1.5022590160369873, | |
| "logits/rejected": -1.4903608560562134, | |
| "logps/chosen": -51.8497428894043, | |
| "logps/rejected": -55.86328887939453, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.006599706597626209, | |
| "rewards/margins": 0.023647639900445938, | |
| "rewards/rejected": -0.030247345566749573, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.543227665706052, | |
| "grad_norm": 3.399369478225708, | |
| "learning_rate": 3.4496140511748125e-09, | |
| "logits/chosen": -1.4858022928237915, | |
| "logits/rejected": -1.4672108888626099, | |
| "logps/chosen": -48.12791061401367, | |
| "logps/rejected": -51.02872085571289, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.00712383771315217, | |
| "rewards/margins": 0.02042998932301998, | |
| "rewards/rejected": -0.027553830295801163, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.5504322766570606, | |
| "grad_norm": 3.965590000152588, | |
| "learning_rate": 3.3441266280915427e-09, | |
| "logits/chosen": -1.4491469860076904, | |
| "logits/rejected": -1.4466187953948975, | |
| "logps/chosen": -53.73418045043945, | |
| "logps/rejected": -57.25054931640625, | |
| "loss": 0.6853, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.002359113423153758, | |
| "rewards/margins": 0.01653447560966015, | |
| "rewards/rejected": -0.018893588334321976, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.5576368876080693, | |
| "grad_norm": 3.5014843940734863, | |
| "learning_rate": 3.2401615234845693e-09, | |
| "logits/chosen": -1.492725133895874, | |
| "logits/rejected": -1.4749128818511963, | |
| "logps/chosen": -54.025245666503906, | |
| "logps/rejected": -57.38044357299805, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.007148324511945248, | |
| "rewards/margins": 0.02353733219206333, | |
| "rewards/rejected": -0.030685653910040855, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.564841498559078, | |
| "grad_norm": 3.1182708740234375, | |
| "learning_rate": 3.1377260456714375e-09, | |
| "logits/chosen": -1.3231797218322754, | |
| "logits/rejected": -1.3109872341156006, | |
| "logps/chosen": -49.063629150390625, | |
| "logps/rejected": -54.12776565551758, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.007035274989902973, | |
| "rewards/margins": 0.020289259031414986, | |
| "rewards/rejected": -0.027324533089995384, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.5720461095100866, | |
| "grad_norm": 3.6655466556549072, | |
| "learning_rate": 3.0368273954432698e-09, | |
| "logits/chosen": -1.5293312072753906, | |
| "logits/rejected": -1.5017873048782349, | |
| "logps/chosen": -51.08592987060547, | |
| "logps/rejected": -53.295440673828125, | |
| "loss": 0.6846, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.006225470919162035, | |
| "rewards/margins": 0.017943011596798897, | |
| "rewards/rejected": -0.024168482050299644, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.5792507204610953, | |
| "grad_norm": 3.021207094192505, | |
| "learning_rate": 2.937472665558541e-09, | |
| "logits/chosen": -1.5536901950836182, | |
| "logits/rejected": -1.5462580919265747, | |
| "logps/chosen": -45.45623016357422, | |
| "logps/rejected": -47.74097442626953, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.008379434235394001, | |
| "rewards/margins": 0.0236833319067955, | |
| "rewards/rejected": -0.03206276521086693, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.586455331412104, | |
| "grad_norm": 4.031035900115967, | |
| "learning_rate": 2.8396688402445053e-09, | |
| "logits/chosen": -1.574249505996704, | |
| "logits/rejected": -1.5572589635849, | |
| "logps/chosen": -45.45792007446289, | |
| "logps/rejected": -51.68840789794922, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.01051649171859026, | |
| "rewards/margins": 0.024006729945540428, | |
| "rewards/rejected": -0.03452322259545326, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.5936599423631126, | |
| "grad_norm": 4.006160259246826, | |
| "learning_rate": 2.7434227947062324e-09, | |
| "logits/chosen": -1.5262694358825684, | |
| "logits/rejected": -1.5151522159576416, | |
| "logps/chosen": -53.84954071044922, | |
| "logps/rejected": -57.39601516723633, | |
| "loss": 0.6856, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.005320216063410044, | |
| "rewards/margins": 0.015715904533863068, | |
| "rewards/rejected": -0.021036118268966675, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.6008645533141213, | |
| "grad_norm": 3.0315651893615723, | |
| "learning_rate": 2.6487412946432976e-09, | |
| "logits/chosen": -1.4456422328948975, | |
| "logits/rejected": -1.4327547550201416, | |
| "logps/chosen": -49.539451599121094, | |
| "logps/rejected": -52.36029052734375, | |
| "loss": 0.6821, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.012892505154013634, | |
| "rewards/margins": 0.023083876818418503, | |
| "rewards/rejected": -0.03597638010978699, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.60806916426513, | |
| "grad_norm": 3.428950786590576, | |
| "learning_rate": 2.5556309957742024e-09, | |
| "logits/chosen": -1.4444409608840942, | |
| "logits/rejected": -1.4349945783615112, | |
| "logps/chosen": -44.949119567871094, | |
| "logps/rejected": -52.14201736450195, | |
| "loss": 0.6804, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0015101671451702714, | |
| "rewards/margins": 0.026495974510908127, | |
| "rewards/rejected": -0.02498580887913704, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.6152737752161386, | |
| "grad_norm": 3.3707656860351562, | |
| "learning_rate": 2.4640984433684758e-09, | |
| "logits/chosen": -1.5578646659851074, | |
| "logits/rejected": -1.5437214374542236, | |
| "logps/chosen": -50.96113204956055, | |
| "logps/rejected": -53.15272903442383, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.004595599137246609, | |
| "rewards/margins": 0.019676122814416885, | |
| "rewards/rejected": -0.02427172288298607, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.6224783861671472, | |
| "grad_norm": 3.6540372371673584, | |
| "learning_rate": 2.3741500717865987e-09, | |
| "logits/chosen": -1.4447523355484009, | |
| "logits/rejected": -1.4563525915145874, | |
| "logps/chosen": -47.37974166870117, | |
| "logps/rejected": -52.231971740722656, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.0018523692851886153, | |
| "rewards/margins": 0.019028810784220695, | |
| "rewards/rejected": -0.020881177857518196, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.629682997118156, | |
| "grad_norm": 3.127500295639038, | |
| "learning_rate": 2.285792204027678e-09, | |
| "logits/chosen": -1.4207046031951904, | |
| "logits/rejected": -1.4100974798202515, | |
| "logps/chosen": -47.52744674682617, | |
| "logps/rejected": -54.74420166015625, | |
| "loss": 0.6823, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.005003909580409527, | |
| "rewards/margins": 0.022536050528287888, | |
| "rewards/rejected": -0.027539962902665138, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.636887608069164, | |
| "grad_norm": 3.715735912322998, | |
| "learning_rate": 2.199031051284972e-09, | |
| "logits/chosen": -1.4993317127227783, | |
| "logits/rejected": -1.4986127614974976, | |
| "logps/chosen": -48.38993453979492, | |
| "logps/rejected": -52.40184783935547, | |
| "loss": 0.6839, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.005102119408547878, | |
| "rewards/margins": 0.019588427618145943, | |
| "rewards/rejected": -0.024690547958016396, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.6440922190201728, | |
| "grad_norm": 3.8177621364593506, | |
| "learning_rate": 2.113872712509254e-09, | |
| "logits/chosen": -1.4067778587341309, | |
| "logits/rejected": -1.397430658340454, | |
| "logps/chosen": -56.226844787597656, | |
| "logps/rejected": -59.394134521484375, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.009724282659590244, | |
| "rewards/margins": 0.02074650302529335, | |
| "rewards/rejected": -0.03047078475356102, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.6512968299711814, | |
| "grad_norm": 3.4949936866760254, | |
| "learning_rate": 2.0303231739801143e-09, | |
| "logits/chosen": -1.4101097583770752, | |
| "logits/rejected": -1.3969051837921143, | |
| "logps/chosen": -50.74885177612305, | |
| "logps/rejected": -55.03565216064453, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.009084323421120644, | |
| "rewards/margins": 0.018547767773270607, | |
| "rewards/rejected": -0.027632087469100952, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.65850144092219, | |
| "grad_norm": 3.9415016174316406, | |
| "learning_rate": 1.948388308885102e-09, | |
| "logits/chosen": -1.5741355419158936, | |
| "logits/rejected": -1.5591968297958374, | |
| "logps/chosen": -50.14144515991211, | |
| "logps/rejected": -53.07600784301758, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.003897708607837558, | |
| "rewards/margins": 0.017061758786439896, | |
| "rewards/rejected": -0.02095946855843067, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.6657060518731988, | |
| "grad_norm": 3.1455366611480713, | |
| "learning_rate": 1.86807387690692e-09, | |
| "logits/chosen": -1.5526137351989746, | |
| "logits/rejected": -1.5452834367752075, | |
| "logps/chosen": -50.222904205322266, | |
| "logps/rejected": -57.684120178222656, | |
| "loss": 0.6788, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.0016280229901894927, | |
| "rewards/margins": 0.029992084950208664, | |
| "rewards/rejected": -0.03162010759115219, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.6729106628242074, | |
| "grad_norm": 3.540015697479248, | |
| "learning_rate": 1.789385523818493e-09, | |
| "logits/chosen": -1.4762442111968994, | |
| "logits/rejected": -1.4785929918289185, | |
| "logps/chosen": -45.24761199951172, | |
| "logps/rejected": -51.211875915527344, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0035579826217144728, | |
| "rewards/margins": 0.02260783314704895, | |
| "rewards/rejected": -0.02616581693291664, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.680115273775216, | |
| "grad_norm": 3.6030099391937256, | |
| "learning_rate": 1.712328781086131e-09, | |
| "logits/chosen": -1.5480079650878906, | |
| "logits/rejected": -1.5321252346038818, | |
| "logps/chosen": -51.021087646484375, | |
| "logps/rejected": -53.23911666870117, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.007240858860313892, | |
| "rewards/margins": 0.014243543148040771, | |
| "rewards/rejected": -0.02148440107703209, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.6873198847262247, | |
| "grad_norm": 3.420142650604248, | |
| "learning_rate": 1.6369090654806543e-09, | |
| "logits/chosen": -1.5726039409637451, | |
| "logits/rejected": -1.5602816343307495, | |
| "logps/chosen": -46.87481689453125, | |
| "logps/rejected": -51.70646286010742, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.008368275128304958, | |
| "rewards/margins": 0.017895232886075974, | |
| "rewards/rejected": -0.026263505220413208, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.6945244956772334, | |
| "grad_norm": 3.2335476875305176, | |
| "learning_rate": 1.5631316786966498e-09, | |
| "logits/chosen": -1.4826228618621826, | |
| "logits/rejected": -1.4666416645050049, | |
| "logps/chosen": -45.12044143676758, | |
| "logps/rejected": -48.53943634033203, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.0067514353431761265, | |
| "rewards/margins": 0.017413515597581863, | |
| "rewards/rejected": -0.024164952337741852, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.701729106628242, | |
| "grad_norm": 4.138692378997803, | |
| "learning_rate": 1.491001806979772e-09, | |
| "logits/chosen": -1.512900710105896, | |
| "logits/rejected": -1.498417615890503, | |
| "logps/chosen": -50.178470611572266, | |
| "logps/rejected": -54.36761474609375, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.0021749637089669704, | |
| "rewards/margins": 0.01952570676803589, | |
| "rewards/rejected": -0.021700672805309296, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.7089337175792507, | |
| "grad_norm": 3.769171953201294, | |
| "learning_rate": 1.4205245207621508e-09, | |
| "logits/chosen": -1.4354712963104248, | |
| "logits/rejected": -1.419633388519287, | |
| "logps/chosen": -52.882362365722656, | |
| "logps/rejected": -55.71977996826172, | |
| "loss": 0.682, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0019592377357184887, | |
| "rewards/margins": 0.023377398028969765, | |
| "rewards/rejected": -0.02533663436770439, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.7161383285302594, | |
| "grad_norm": 3.8841655254364014, | |
| "learning_rate": 1.3517047743059978e-09, | |
| "logits/chosen": -1.5189292430877686, | |
| "logits/rejected": -1.5208401679992676, | |
| "logps/chosen": -49.525108337402344, | |
| "logps/rejected": -55.4781494140625, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.0053468383848667145, | |
| "rewards/margins": 0.019498584792017937, | |
| "rewards/rejected": -0.0248454250395298, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.723342939481268, | |
| "grad_norm": 3.297200918197632, | |
| "learning_rate": 1.2845474053553156e-09, | |
| "logits/chosen": -1.517322063446045, | |
| "logits/rejected": -1.5089843273162842, | |
| "logps/chosen": -43.52055740356445, | |
| "logps/rejected": -47.09668731689453, | |
| "loss": 0.6851, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.007344129495322704, | |
| "rewards/margins": 0.016880614683032036, | |
| "rewards/rejected": -0.024224746972322464, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.7305475504322767, | |
| "grad_norm": 2.828813314437866, | |
| "learning_rate": 1.2190571347958422e-09, | |
| "logits/chosen": -1.5427597761154175, | |
| "logits/rejected": -1.5456900596618652, | |
| "logps/chosen": -43.29767990112305, | |
| "logps/rejected": -50.125144958496094, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.0003991241683252156, | |
| "rewards/margins": 0.018153730779886246, | |
| "rewards/rejected": -0.018552854657173157, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.7377521613832854, | |
| "grad_norm": 2.9458444118499756, | |
| "learning_rate": 1.1552385663231634e-09, | |
| "logits/chosen": -1.479465365409851, | |
| "logits/rejected": -1.4579660892486572, | |
| "logps/chosen": -48.168670654296875, | |
| "logps/rejected": -50.11082077026367, | |
| "loss": 0.6851, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0051146079786121845, | |
| "rewards/margins": 0.016856908798217773, | |
| "rewards/rejected": -0.02197151444852352, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.744956772334294, | |
| "grad_norm": 3.110060930252075, | |
| "learning_rate": 1.0930961861191302e-09, | |
| "logits/chosen": -1.4410741329193115, | |
| "logits/rejected": -1.4403388500213623, | |
| "logps/chosen": -46.421630859375, | |
| "logps/rejected": -49.944129943847656, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.007156413048505783, | |
| "rewards/margins": 0.014478680677711964, | |
| "rewards/rejected": -0.021635092794895172, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.7521613832853027, | |
| "grad_norm": 3.0305254459381104, | |
| "learning_rate": 1.0326343625364608e-09, | |
| "logits/chosen": -1.435240387916565, | |
| "logits/rejected": -1.419524908065796, | |
| "logps/chosen": -47.08954620361328, | |
| "logps/rejected": -52.5599250793457, | |
| "loss": 0.6807, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.005212406627833843, | |
| "rewards/margins": 0.026095682755112648, | |
| "rewards/rejected": -0.03130808845162392, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.7593659942363113, | |
| "grad_norm": 2.6526858806610107, | |
| "learning_rate": 9.738573457917066e-10, | |
| "logits/chosen": -1.5486071109771729, | |
| "logits/rejected": -1.542614459991455, | |
| "logps/chosen": -41.173927307128906, | |
| "logps/rejected": -47.29227828979492, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.005259085912257433, | |
| "rewards/margins": 0.022309530526399612, | |
| "rewards/rejected": -0.027568617835640907, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.76657060518732, | |
| "grad_norm": 3.002786874771118, | |
| "learning_rate": 9.16769267666434e-10, | |
| "logits/chosen": -1.4672725200653076, | |
| "logits/rejected": -1.4611655473709106, | |
| "logps/chosen": -46.251319885253906, | |
| "logps/rejected": -48.279781341552734, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.00623342115432024, | |
| "rewards/margins": 0.009535645134747028, | |
| "rewards/rejected": -0.01576906442642212, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.7737752161383287, | |
| "grad_norm": 3.2858715057373047, | |
| "learning_rate": 8.613741412168113e-10, | |
| "logits/chosen": -1.4862616062164307, | |
| "logits/rejected": -1.4810454845428467, | |
| "logps/chosen": -54.2935676574707, | |
| "logps/rejected": -58.491119384765625, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.003660841379314661, | |
| "rewards/margins": 0.020793884992599487, | |
| "rewards/rejected": -0.024454724043607712, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.7809798270893373, | |
| "grad_norm": 3.317065954208374, | |
| "learning_rate": 8.076758604914802e-10, | |
| "logits/chosen": -1.4456998109817505, | |
| "logits/rejected": -1.4331220388412476, | |
| "logps/chosen": -43.14230728149414, | |
| "logps/rejected": -46.72562026977539, | |
| "loss": 0.6842, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0021024621091783047, | |
| "rewards/margins": 0.01865329220890999, | |
| "rewards/rejected": -0.02075575292110443, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.7881844380403455, | |
| "grad_norm": 4.6626362800598145, | |
| "learning_rate": 7.55678200257856e-10, | |
| "logits/chosen": -1.442833662033081, | |
| "logits/rejected": -1.4301643371582031, | |
| "logps/chosen": -50.06626510620117, | |
| "logps/rejected": -55.51959991455078, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.006806174758821726, | |
| "rewards/margins": 0.021879781037569046, | |
| "rewards/rejected": -0.028685953468084335, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.795389048991354, | |
| "grad_norm": 3.242816925048828, | |
| "learning_rate": 7.053848157367315e-10, | |
| "logits/chosen": -1.4660804271697998, | |
| "logits/rejected": -1.4522478580474854, | |
| "logps/chosen": -48.17863082885742, | |
| "logps/rejected": -53.19854736328125, | |
| "loss": 0.6828, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0016250055050477386, | |
| "rewards/margins": 0.021487154066562653, | |
| "rewards/rejected": -0.023112159222364426, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.802593659942363, | |
| "grad_norm": 2.588425397872925, | |
| "learning_rate": 6.567992423453794e-10, | |
| "logits/chosen": -1.4936778545379639, | |
| "logits/rejected": -1.4873104095458984, | |
| "logps/chosen": -43.38020706176758, | |
| "logps/rejected": -46.673580169677734, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.0043821679428219795, | |
| "rewards/margins": 0.019423723220825195, | |
| "rewards/rejected": -0.0238058902323246, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.8097982708933715, | |
| "grad_norm": 3.189708948135376, | |
| "learning_rate": 6.099248954489794e-10, | |
| "logits/chosen": -1.4088729619979858, | |
| "logits/rejected": -1.4072296619415283, | |
| "logps/chosen": -47.93668746948242, | |
| "logps/rejected": -53.1524543762207, | |
| "loss": 0.6834, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.0071435668505728245, | |
| "rewards/margins": 0.020221812650561333, | |
| "rewards/rejected": -0.02736538089811802, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.81700288184438, | |
| "grad_norm": 3.6743202209472656, | |
| "learning_rate": 5.647650701205653e-10, | |
| "logits/chosen": -1.5016138553619385, | |
| "logits/rejected": -1.481757402420044, | |
| "logps/chosen": -54.419166564941406, | |
| "logps/rejected": -58.34161376953125, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0001567740982864052, | |
| "rewards/margins": 0.02638132870197296, | |
| "rewards/rejected": -0.026538103818893433, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.824207492795389, | |
| "grad_norm": 3.1479997634887695, | |
| "learning_rate": 5.213229409093856e-10, | |
| "logits/chosen": -1.5346415042877197, | |
| "logits/rejected": -1.5240830183029175, | |
| "logps/chosen": -52.7498664855957, | |
| "logps/rejected": -57.8280143737793, | |
| "loss": 0.6816, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.004968610592186451, | |
| "rewards/margins": 0.024416498839855194, | |
| "rewards/rejected": -0.02938510850071907, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.8314121037463975, | |
| "grad_norm": 4.354657173156738, | |
| "learning_rate": 4.796015616177401e-10, | |
| "logits/chosen": -1.4575870037078857, | |
| "logits/rejected": -1.4455732107162476, | |
| "logps/chosen": -51.82859420776367, | |
| "logps/rejected": -55.66108322143555, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.007370557636022568, | |
| "rewards/margins": 0.016358794644474983, | |
| "rewards/rejected": -0.0237293504178524, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.838616714697406, | |
| "grad_norm": 3.3422064781188965, | |
| "learning_rate": 4.3960386508631595e-10, | |
| "logits/chosen": -1.3845546245574951, | |
| "logits/rejected": -1.38401198387146, | |
| "logps/chosen": -42.65739059448242, | |
| "logps/rejected": -46.67231369018555, | |
| "loss": 0.6851, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.006754173897206783, | |
| "rewards/margins": 0.017140675336122513, | |
| "rewards/rejected": -0.02389485016465187, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.845821325648415, | |
| "grad_norm": 4.824497699737549, | |
| "learning_rate": 4.013326629880243e-10, | |
| "logits/chosen": -1.42984139919281, | |
| "logits/rejected": -1.4125710725784302, | |
| "logps/chosen": -50.18545150756836, | |
| "logps/rejected": -54.050750732421875, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.007838496938347816, | |
| "rewards/margins": 0.022195886820554733, | |
| "rewards/rejected": -0.0300343818962574, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.8530259365994235, | |
| "grad_norm": 3.4627509117126465, | |
| "learning_rate": 3.64790645630339e-10, | |
| "logits/chosen": -1.3913991451263428, | |
| "logits/rejected": -1.3863855600357056, | |
| "logps/chosen": -53.33915328979492, | |
| "logps/rejected": -55.66510009765625, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0018841477576643229, | |
| "rewards/margins": 0.012181239202618599, | |
| "rewards/rejected": -0.014065387658774853, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.860230547550432, | |
| "grad_norm": 4.963607311248779, | |
| "learning_rate": 3.2998038176619e-10, | |
| "logits/chosen": -1.4526419639587402, | |
| "logits/rejected": -1.4365007877349854, | |
| "logps/chosen": -51.497032165527344, | |
| "logps/rejected": -54.97554397583008, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0071846963837742805, | |
| "rewards/margins": 0.01706068590283394, | |
| "rewards/rejected": -0.024245383217930794, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.867435158501441, | |
| "grad_norm": 3.4952621459960938, | |
| "learning_rate": 2.969043184133907e-10, | |
| "logits/chosen": -1.5576436519622803, | |
| "logits/rejected": -1.5564569234848022, | |
| "logps/chosen": -45.00513458251953, | |
| "logps/rejected": -53.378684997558594, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.00028269606991671026, | |
| "rewards/margins": 0.023711269721388817, | |
| "rewards/rejected": -0.02399396523833275, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.8746397694524495, | |
| "grad_norm": 3.8880701065063477, | |
| "learning_rate": 2.6556478068261447e-10, | |
| "logits/chosen": -1.4495378732681274, | |
| "logits/rejected": -1.4356410503387451, | |
| "logps/chosen": -44.4900016784668, | |
| "logps/rejected": -48.011497497558594, | |
| "loss": 0.6796, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0006750145694240928, | |
| "rewards/margins": 0.028233755379915237, | |
| "rewards/rejected": -0.027558740228414536, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.881844380403458, | |
| "grad_norm": 3.490116596221924, | |
| "learning_rate": 2.3596397161395607e-10, | |
| "logits/chosen": -1.558830976486206, | |
| "logits/rejected": -1.5369431972503662, | |
| "logps/chosen": -49.584800720214844, | |
| "logps/rejected": -54.7055549621582, | |
| "loss": 0.6809, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 0.00045850887545384467, | |
| "rewards/margins": 0.025326168164610863, | |
| "rewards/rejected": -0.02486766129732132, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.889048991354467, | |
| "grad_norm": 4.86458683013916, | |
| "learning_rate": 2.0810397202206399e-10, | |
| "logits/chosen": -1.4145355224609375, | |
| "logits/rejected": -1.4098033905029297, | |
| "logps/chosen": -49.917091369628906, | |
| "logps/rejected": -53.33013916015625, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.0004277043044567108, | |
| "rewards/margins": 0.017619196325540543, | |
| "rewards/rejected": -0.017191490158438683, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.8962536023054755, | |
| "grad_norm": 3.27022647857666, | |
| "learning_rate": 1.819867403498737e-10, | |
| "logits/chosen": -1.5645722150802612, | |
| "logits/rejected": -1.5553191900253296, | |
| "logps/chosen": -47.87715530395508, | |
| "logps/rejected": -51.797218322753906, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0083371726796031, | |
| "rewards/margins": 0.020166922360658646, | |
| "rewards/rejected": -0.02850409410893917, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.903458213256484, | |
| "grad_norm": 3.4694247245788574, | |
| "learning_rate": 1.5761411253092382e-10, | |
| "logits/chosen": -1.4308217763900757, | |
| "logits/rejected": -1.4093315601348877, | |
| "logps/chosen": -46.04566192626953, | |
| "logps/rejected": -48.20244598388672, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.0066087073646485806, | |
| "rewards/margins": 0.018396150320768356, | |
| "rewards/rejected": -0.025004858151078224, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.910662824207493, | |
| "grad_norm": 3.5978357791900635, | |
| "learning_rate": 1.3498780186031455e-10, | |
| "logits/chosen": -1.4942580461502075, | |
| "logits/rejected": -1.4852937459945679, | |
| "logps/chosen": -53.630462646484375, | |
| "logps/rejected": -57.28614044189453, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.006508751306682825, | |
| "rewards/margins": 0.017568571493029594, | |
| "rewards/rejected": -0.024077320471405983, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.9178674351585014, | |
| "grad_norm": 3.3043346405029297, | |
| "learning_rate": 1.1410939887425141e-10, | |
| "logits/chosen": -1.498331904411316, | |
| "logits/rejected": -1.4901821613311768, | |
| "logps/chosen": -47.133060455322266, | |
| "logps/rejected": -49.65581130981445, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.010484300553798676, | |
| "rewards/margins": 0.014650911092758179, | |
| "rewards/rejected": -0.025135213509202003, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.92507204610951, | |
| "grad_norm": 2.9464681148529053, | |
| "learning_rate": 9.498037123825686e-11, | |
| "logits/chosen": -1.5104553699493408, | |
| "logits/rejected": -1.499537706375122, | |
| "logps/chosen": -45.17995834350586, | |
| "logps/rejected": -49.46979522705078, | |
| "loss": 0.6832, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.004455415066331625, | |
| "rewards/margins": 0.020533457398414612, | |
| "rewards/rejected": -0.024988874793052673, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.9322766570605188, | |
| "grad_norm": 3.2877230644226074, | |
| "learning_rate": 7.760206364398614e-11, | |
| "logits/chosen": -1.5863580703735352, | |
| "logits/rejected": -1.565198540687561, | |
| "logps/chosen": -49.96894073486328, | |
| "logps/rejected": -53.23193359375, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.009586494415998459, | |
| "rewards/margins": 0.020023521035909653, | |
| "rewards/rejected": -0.029610013589262962, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.9394812680115274, | |
| "grad_norm": 3.83817720413208, | |
| "learning_rate": 6.19756977147029e-11, | |
| "logits/chosen": -1.4424731731414795, | |
| "logits/rejected": -1.4336028099060059, | |
| "logps/chosen": -47.239967346191406, | |
| "logps/rejected": -54.48072052001953, | |
| "loss": 0.6829, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.009559125639498234, | |
| "rewards/margins": 0.021477770060300827, | |
| "rewards/rejected": -0.031036894768476486, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.946685878962536, | |
| "grad_norm": 2.8576111793518066, | |
| "learning_rate": 4.810237191940625e-11, | |
| "logits/chosen": -1.4422481060028076, | |
| "logits/rejected": -1.4329513311386108, | |
| "logps/chosen": -46.956687927246094, | |
| "logps/rejected": -49.96436309814453, | |
| "loss": 0.6855, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.008776359260082245, | |
| "rewards/margins": 0.016020456328988075, | |
| "rewards/rejected": -0.02479681745171547, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.9538904899135447, | |
| "grad_norm": 3.3893821239471436, | |
| "learning_rate": 3.5983061495617476e-11, | |
| "logits/chosen": -1.5260568857192993, | |
| "logits/rejected": -1.5251171588897705, | |
| "logps/chosen": -51.82386016845703, | |
| "logps/rejected": -57.438377380371094, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.005095969419926405, | |
| "rewards/margins": 0.018611816689372063, | |
| "rewards/rejected": -0.023707788437604904, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.9610951008645534, | |
| "grad_norm": 3.1232125759124756, | |
| "learning_rate": 2.5618618380812694e-11, | |
| "logits/chosen": -1.5207054615020752, | |
| "logits/rejected": -1.5060088634490967, | |
| "logps/chosen": -42.08940124511719, | |
| "logps/rejected": -47.49885559082031, | |
| "loss": 0.6811, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.00411585858091712, | |
| "rewards/margins": 0.024918580427765846, | |
| "rewards/rejected": -0.029034441336989403, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.968299711815562, | |
| "grad_norm": 3.4026784896850586, | |
| "learning_rate": 1.700977115254576e-11, | |
| "logits/chosen": -1.463889479637146, | |
| "logits/rejected": -1.454345464706421, | |
| "logps/chosen": -46.250335693359375, | |
| "logps/rejected": -51.5218505859375, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.00747289415448904, | |
| "rewards/margins": 0.02096741273999214, | |
| "rewards/rejected": -0.028440307825803757, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.9755043227665707, | |
| "grad_norm": 2.975994110107422, | |
| "learning_rate": 1.0157124977230868e-11, | |
| "logits/chosen": -1.4347963333129883, | |
| "logits/rejected": -1.4254258871078491, | |
| "logps/chosen": -43.63376998901367, | |
| "logps/rejected": -47.81342315673828, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.0023829059209674597, | |
| "rewards/margins": 0.018163811415433884, | |
| "rewards/rejected": -0.020546717569231987, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.9827089337175794, | |
| "grad_norm": 3.555697441101074, | |
| "learning_rate": 5.061161567596061e-12, | |
| "logits/chosen": -1.4684605598449707, | |
| "logits/rejected": -1.4558757543563843, | |
| "logps/chosen": -47.77406692504883, | |
| "logps/rejected": -50.39754867553711, | |
| "loss": 0.6844, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.001015470246784389, | |
| "rewards/margins": 0.018158772960305214, | |
| "rewards/rejected": -0.0191742442548275, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.989913544668588, | |
| "grad_norm": 3.382728099822998, | |
| "learning_rate": 1.7222391488297406e-12, | |
| "logits/chosen": -1.51640784740448, | |
| "logits/rejected": -1.5047814846038818, | |
| "logps/chosen": -53.642372131347656, | |
| "logps/rejected": -58.678138732910156, | |
| "loss": 0.6794, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0038291483651846647, | |
| "rewards/margins": 0.02880244515836239, | |
| "rewards/rejected": -0.03263159841299057, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.9971181556195967, | |
| "grad_norm": 3.958996534347534, | |
| "learning_rate": 1.4059243338693238e-13, | |
| "logits/chosen": -1.4424545764923096, | |
| "logits/rejected": -1.431705355644226, | |
| "logps/chosen": -48.65971374511719, | |
| "logps/rejected": -53.465545654296875, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0016985107213258743, | |
| "rewards/margins": 0.02206951007246971, | |
| "rewards/rejected": -0.023768020793795586, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 4164, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6880548467553658, | |
| "train_runtime": 7425.9787, | |
| "train_samples_per_second": 8.969, | |
| "train_steps_per_second": 0.561 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4164, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |