| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 400, | |
| "global_step": 938, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010660980810234541, | |
| "grad_norm": 5.5463102558146335, | |
| "learning_rate": 5.3191489361702125e-09, | |
| "logits/chosen": -0.48140522837638855, | |
| "logits/rejected": -0.7986129522323608, | |
| "logps/chosen": -160.70640563964844, | |
| "logps/rejected": -136.7216033935547, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005330490405117271, | |
| "grad_norm": 5.944179098618821, | |
| "learning_rate": 2.6595744680851062e-08, | |
| "logits/chosen": -0.5258230566978455, | |
| "logits/rejected": -0.640978991985321, | |
| "logps/chosen": -143.9716033935547, | |
| "logps/rejected": -130.26953125, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.0004923552623949945, | |
| "rewards/margins": -0.0009530532988719642, | |
| "rewards/rejected": 0.00046069800737313926, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.010660980810234541, | |
| "grad_norm": 4.877499599443773, | |
| "learning_rate": 5.3191489361702123e-08, | |
| "logits/chosen": -0.45687875151634216, | |
| "logits/rejected": -0.633367121219635, | |
| "logps/chosen": -171.5751190185547, | |
| "logps/rejected": -156.70230102539062, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.38749998807907104, | |
| "rewards/chosen": -0.0014754905132576823, | |
| "rewards/margins": -0.0020646383054554462, | |
| "rewards/rejected": 0.0005891475593671203, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015991471215351813, | |
| "grad_norm": 4.928018921113954, | |
| "learning_rate": 7.978723404255319e-08, | |
| "logits/chosen": -0.5080267190933228, | |
| "logits/rejected": -0.6690904498100281, | |
| "logps/chosen": -168.29055786132812, | |
| "logps/rejected": -155.68568420410156, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.00017321776249445975, | |
| "rewards/margins": 0.0004894703743048012, | |
| "rewards/rejected": -0.00031625264091417193, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.021321961620469083, | |
| "grad_norm": 5.502864121859809, | |
| "learning_rate": 1.0638297872340425e-07, | |
| "logits/chosen": -0.49741801619529724, | |
| "logits/rejected": -0.6529160141944885, | |
| "logps/chosen": -158.0856475830078, | |
| "logps/rejected": -141.2250518798828, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.0007460988126695156, | |
| "rewards/margins": 0.0009390910854563117, | |
| "rewards/rejected": -0.00019299241830594838, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026652452025586353, | |
| "grad_norm": 5.3161185644529905, | |
| "learning_rate": 1.329787234042553e-07, | |
| "logits/chosen": -0.46866098046302795, | |
| "logits/rejected": -0.5745824575424194, | |
| "logps/chosen": -156.7174835205078, | |
| "logps/rejected": -144.2376251220703, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.0008935723453760147, | |
| "rewards/margins": -0.0009486509370617568, | |
| "rewards/rejected": 5.507881360244937e-05, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.031982942430703626, | |
| "grad_norm": 5.272912501828491, | |
| "learning_rate": 1.5957446808510638e-07, | |
| "logits/chosen": -0.49024850130081177, | |
| "logits/rejected": -0.6033456921577454, | |
| "logps/chosen": -157.64395141601562, | |
| "logps/rejected": -150.4394073486328, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0018176069715991616, | |
| "rewards/margins": 0.0021634683944284916, | |
| "rewards/rejected": -0.0003458613937254995, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03731343283582089, | |
| "grad_norm": 5.55755087436118, | |
| "learning_rate": 1.8617021276595742e-07, | |
| "logits/chosen": -0.5024099349975586, | |
| "logits/rejected": -0.5742695927619934, | |
| "logps/chosen": -162.9497528076172, | |
| "logps/rejected": -156.5416259765625, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.0004840154724661261, | |
| "rewards/margins": 4.346743298810907e-05, | |
| "rewards/rejected": 0.00044054799946025014, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.042643923240938165, | |
| "grad_norm": 5.050573104575282, | |
| "learning_rate": 2.127659574468085e-07, | |
| "logits/chosen": -0.4654630124568939, | |
| "logits/rejected": -0.5773854851722717, | |
| "logps/chosen": -153.1385955810547, | |
| "logps/rejected": -147.40850830078125, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.00016260957636404783, | |
| "rewards/margins": 0.0010524257086217403, | |
| "rewards/rejected": -0.0008898162050172687, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04797441364605544, | |
| "grad_norm": 5.699569850833138, | |
| "learning_rate": 2.393617021276596e-07, | |
| "logits/chosen": -0.4928715229034424, | |
| "logits/rejected": -0.6359135508537292, | |
| "logps/chosen": -155.63232421875, | |
| "logps/rejected": -143.79296875, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0014595793327316642, | |
| "rewards/margins": 0.001696806401014328, | |
| "rewards/rejected": -0.00023722714104223996, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.053304904051172705, | |
| "grad_norm": 5.124728866824331, | |
| "learning_rate": 2.659574468085106e-07, | |
| "logits/chosen": -0.43387550115585327, | |
| "logits/rejected": -0.5658468008041382, | |
| "logps/chosen": -175.50062561035156, | |
| "logps/rejected": -154.56787109375, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0016532255103811622, | |
| "rewards/margins": 0.0013198386877775192, | |
| "rewards/rejected": 0.0003333869099151343, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05863539445628998, | |
| "grad_norm": 5.56223084484684, | |
| "learning_rate": 2.925531914893617e-07, | |
| "logits/chosen": -0.5003554224967957, | |
| "logits/rejected": -0.6052166223526001, | |
| "logps/chosen": -151.86026000976562, | |
| "logps/rejected": -144.47586059570312, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0034173422027379274, | |
| "rewards/margins": 0.0018555650021880865, | |
| "rewards/rejected": 0.0015617769677191973, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06396588486140725, | |
| "grad_norm": 5.412628405701925, | |
| "learning_rate": 3.1914893617021275e-07, | |
| "logits/chosen": -0.42743635177612305, | |
| "logits/rejected": -0.5739923715591431, | |
| "logps/chosen": -147.76356506347656, | |
| "logps/rejected": -135.29502868652344, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.004091166891157627, | |
| "rewards/margins": 0.0018434191588312387, | |
| "rewards/rejected": 0.0022477474994957447, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06929637526652452, | |
| "grad_norm": 5.442775471516293, | |
| "learning_rate": 3.457446808510638e-07, | |
| "logits/chosen": -0.522619366645813, | |
| "logits/rejected": -0.6582551002502441, | |
| "logps/chosen": -162.0552520751953, | |
| "logps/rejected": -147.86856079101562, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.005479807965457439, | |
| "rewards/margins": 0.004093030467629433, | |
| "rewards/rejected": 0.001386777381412685, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 5.634942637913951, | |
| "learning_rate": 3.7234042553191484e-07, | |
| "logits/chosen": -0.5439807772636414, | |
| "logits/rejected": -0.6844218969345093, | |
| "logps/chosen": -156.98483276367188, | |
| "logps/rejected": -140.76651000976562, | |
| "loss": 0.6911, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.0070900036953389645, | |
| "rewards/margins": 0.00586737459525466, | |
| "rewards/rejected": 0.0012226292164996266, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07995735607675906, | |
| "grad_norm": 4.9084292553173166, | |
| "learning_rate": 3.989361702127659e-07, | |
| "logits/chosen": -0.4480295181274414, | |
| "logits/rejected": -0.5396173596382141, | |
| "logps/chosen": -149.27737426757812, | |
| "logps/rejected": -138.79080200195312, | |
| "loss": 0.6901, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0054114703088998795, | |
| "rewards/margins": 0.004020148888230324, | |
| "rewards/rejected": 0.0013913216535001993, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 5.243856701834545, | |
| "learning_rate": 4.25531914893617e-07, | |
| "logits/chosen": -0.5261912941932678, | |
| "logits/rejected": -0.6559049487113953, | |
| "logps/chosen": -155.7753143310547, | |
| "logps/rejected": -146.03860473632812, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.007854573428630829, | |
| "rewards/margins": 0.004827320575714111, | |
| "rewards/rejected": 0.0030272528529167175, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0906183368869936, | |
| "grad_norm": 5.506781468585061, | |
| "learning_rate": 4.5212765957446806e-07, | |
| "logits/chosen": -0.5435600876808167, | |
| "logits/rejected": -0.6427361369132996, | |
| "logps/chosen": -136.4017791748047, | |
| "logps/rejected": -130.49105834960938, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.00461820513010025, | |
| "rewards/margins": 0.004016582854092121, | |
| "rewards/rejected": 0.0006016212282702327, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09594882729211088, | |
| "grad_norm": 5.811357371598748, | |
| "learning_rate": 4.787234042553192e-07, | |
| "logits/chosen": -0.5588937401771545, | |
| "logits/rejected": -0.6763302087783813, | |
| "logps/chosen": -157.134521484375, | |
| "logps/rejected": -146.5879364013672, | |
| "loss": 0.6895, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.008327952586114407, | |
| "rewards/margins": 0.008614275604486465, | |
| "rewards/rejected": -0.0002863233967218548, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10127931769722814, | |
| "grad_norm": 5.698422927582689, | |
| "learning_rate": 4.999982680938129e-07, | |
| "logits/chosen": -0.5830127596855164, | |
| "logits/rejected": -0.7297841310501099, | |
| "logps/chosen": -165.22900390625, | |
| "logps/rejected": -152.13014221191406, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.01156298816204071, | |
| "rewards/margins": 0.011255884543061256, | |
| "rewards/rejected": 0.00030710286227986217, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10660980810234541, | |
| "grad_norm": 5.540011917380718, | |
| "learning_rate": 4.999376538968061e-07, | |
| "logits/chosen": -0.5830188393592834, | |
| "logits/rejected": -0.6362646222114563, | |
| "logps/chosen": -160.86549377441406, | |
| "logps/rejected": -151.5850372314453, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.007453514728695154, | |
| "rewards/margins": 0.0087841572239995, | |
| "rewards/rejected": -0.0013306414475664496, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11194029850746269, | |
| "grad_norm": 5.554459962070174, | |
| "learning_rate": 4.997904683849418e-07, | |
| "logits/chosen": -0.6047431826591492, | |
| "logits/rejected": -0.7156568765640259, | |
| "logps/chosen": -145.95703125, | |
| "logps/rejected": -137.16812133789062, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.003936653956770897, | |
| "rewards/margins": 0.009741699323058128, | |
| "rewards/rejected": -0.013678351417183876, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11727078891257996, | |
| "grad_norm": 6.6635480922191395, | |
| "learning_rate": 4.99556762539107e-07, | |
| "logits/chosen": -0.5515817403793335, | |
| "logits/rejected": -0.7226412296295166, | |
| "logps/chosen": -163.9252166748047, | |
| "logps/rejected": -151.83767700195312, | |
| "loss": 0.6857, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0011928931344300508, | |
| "rewards/margins": 0.012570838443934917, | |
| "rewards/rejected": -0.013763731345534325, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12260127931769722, | |
| "grad_norm": 5.957095370833089, | |
| "learning_rate": 4.992366173083787e-07, | |
| "logits/chosen": -0.586641788482666, | |
| "logits/rejected": -0.7417147159576416, | |
| "logps/chosen": -161.9275360107422, | |
| "logps/rejected": -145.07772827148438, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.003873241599649191, | |
| "rewards/margins": 0.01244218461215496, | |
| "rewards/rejected": -0.01631542667746544, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1279317697228145, | |
| "grad_norm": 5.703633835475685, | |
| "learning_rate": 4.988301435819852e-07, | |
| "logits/chosen": -0.5778621435165405, | |
| "logits/rejected": -0.6562256217002869, | |
| "logps/chosen": -164.537353515625, | |
| "logps/rejected": -152.1725616455078, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.007811696734279394, | |
| "rewards/margins": 0.015073996968567371, | |
| "rewards/rejected": -0.022885693237185478, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13326226012793177, | |
| "grad_norm": 7.3435341527621585, | |
| "learning_rate": 4.983374821508973e-07, | |
| "logits/chosen": -0.6186214685440063, | |
| "logits/rejected": -0.7367585301399231, | |
| "logps/chosen": -190.20452880859375, | |
| "logps/rejected": -183.6043243408203, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.008534837514162064, | |
| "rewards/margins": 0.027956834062933922, | |
| "rewards/rejected": -0.019421998411417007, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13859275053304904, | |
| "grad_norm": 6.188992862485417, | |
| "learning_rate": 4.977588036590624e-07, | |
| "logits/chosen": -0.6698447465896606, | |
| "logits/rejected": -0.7765822410583496, | |
| "logps/chosen": -157.9294891357422, | |
| "logps/rejected": -146.48617553710938, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.013769884593784809, | |
| "rewards/margins": 0.021815448999404907, | |
| "rewards/rejected": -0.03558532893657684, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1439232409381663, | |
| "grad_norm": 5.78355447268637, | |
| "learning_rate": 4.970943085442984e-07, | |
| "logits/chosen": -0.6052809953689575, | |
| "logits/rejected": -0.768462061882019, | |
| "logps/chosen": -156.30868530273438, | |
| "logps/rejected": -149.22007751464844, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02559695765376091, | |
| "rewards/margins": 0.03413590043783188, | |
| "rewards/rejected": -0.05973286181688309, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 5.813443617152644, | |
| "learning_rate": 4.96344226968867e-07, | |
| "logits/chosen": -0.6367892026901245, | |
| "logits/rejected": -0.7320101857185364, | |
| "logps/chosen": -162.81149291992188, | |
| "logps/rejected": -153.95095825195312, | |
| "loss": 0.6829, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.03205486014485359, | |
| "rewards/margins": 0.017323989421129227, | |
| "rewards/rejected": -0.04937884956598282, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15458422174840086, | |
| "grad_norm": 6.2002717305065, | |
| "learning_rate": 4.955088187397534e-07, | |
| "logits/chosen": -0.7039578557014465, | |
| "logits/rejected": -0.8707769513130188, | |
| "logps/chosen": -176.5757598876953, | |
| "logps/rejected": -168.57083129882812, | |
| "loss": 0.6787, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.04023490846157074, | |
| "rewards/margins": 0.030298087745904922, | |
| "rewards/rejected": -0.07053300738334656, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.15991471215351813, | |
| "grad_norm": 5.905902869206233, | |
| "learning_rate": 4.945883732186751e-07, | |
| "logits/chosen": -0.6456910371780396, | |
| "logits/rejected": -0.8251630067825317, | |
| "logps/chosen": -141.7700653076172, | |
| "logps/rejected": -129.72817993164062, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0674080178141594, | |
| "rewards/margins": 0.03635396808385849, | |
| "rewards/rejected": -0.10376199334859848, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1652452025586354, | |
| "grad_norm": 6.258969756632891, | |
| "learning_rate": 4.935832092218558e-07, | |
| "logits/chosen": -0.724746823310852, | |
| "logits/rejected": -0.842291533946991, | |
| "logps/chosen": -159.5133514404297, | |
| "logps/rejected": -152.9370574951172, | |
| "loss": 0.6748, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.04968777671456337, | |
| "rewards/margins": 0.04536419361829758, | |
| "rewards/rejected": -0.09505197405815125, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 6.094426950794661, | |
| "learning_rate": 4.924936749095969e-07, | |
| "logits/chosen": -0.6918126344680786, | |
| "logits/rejected": -0.7708092331886292, | |
| "logps/chosen": -167.59994506835938, | |
| "logps/rejected": -160.00057983398438, | |
| "loss": 0.6703, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.04972660169005394, | |
| "rewards/margins": 0.05021023750305176, | |
| "rewards/rejected": -0.0999368354678154, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17590618336886993, | |
| "grad_norm": 6.3293548148521905, | |
| "learning_rate": 4.913201476656838e-07, | |
| "logits/chosen": -0.7461433410644531, | |
| "logits/rejected": -0.8420252799987793, | |
| "logps/chosen": -161.7245330810547, | |
| "logps/rejected": -157.357177734375, | |
| "loss": 0.6684, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.05842015892267227, | |
| "rewards/margins": 0.06748794764280319, | |
| "rewards/rejected": -0.12590810656547546, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1812366737739872, | |
| "grad_norm": 6.5840976852855, | |
| "learning_rate": 4.900630339666717e-07, | |
| "logits/chosen": -0.7366148829460144, | |
| "logits/rejected": -0.8815475702285767, | |
| "logps/chosen": -183.88925170898438, | |
| "logps/rejected": -176.22451782226562, | |
| "loss": 0.6703, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.08390282094478607, | |
| "rewards/margins": 0.05011892318725586, | |
| "rewards/rejected": -0.13402177393436432, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 6.234539356652731, | |
| "learning_rate": 4.88722769241093e-07, | |
| "logits/chosen": -0.6534587144851685, | |
| "logits/rejected": -0.7359489798545837, | |
| "logps/chosen": -156.6703338623047, | |
| "logps/rejected": -150.93853759765625, | |
| "loss": 0.6747, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.07284261286258698, | |
| "rewards/margins": 0.05436049774289131, | |
| "rewards/rejected": -0.127203106880188, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19189765458422176, | |
| "grad_norm": 6.610956936672055, | |
| "learning_rate": 4.872998177186375e-07, | |
| "logits/chosen": -0.666496992111206, | |
| "logits/rejected": -0.7403326034545898, | |
| "logps/chosen": -160.12982177734375, | |
| "logps/rejected": -157.0307159423828, | |
| "loss": 0.6654, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.11026358604431152, | |
| "rewards/margins": 0.055197346955537796, | |
| "rewards/rejected": -0.16546092927455902, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19722814498933902, | |
| "grad_norm": 7.023716688535026, | |
| "learning_rate": 4.857946722693566e-07, | |
| "logits/chosen": -0.7221956849098206, | |
| "logits/rejected": -0.9377690553665161, | |
| "logps/chosen": -169.1468048095703, | |
| "logps/rejected": -154.34695434570312, | |
| "loss": 0.6676, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.09703753888607025, | |
| "rewards/margins": 0.06947065889835358, | |
| "rewards/rejected": -0.16650819778442383, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2025586353944563, | |
| "grad_norm": 7.14685316792679, | |
| "learning_rate": 4.842078542329463e-07, | |
| "logits/chosen": -0.8102830648422241, | |
| "logits/rejected": -0.892846941947937, | |
| "logps/chosen": -161.26129150390625, | |
| "logps/rejected": -156.14747619628906, | |
| "loss": 0.6671, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.08616851270198822, | |
| "rewards/margins": 0.06732877343893051, | |
| "rewards/rejected": -0.15349729359149933, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20788912579957355, | |
| "grad_norm": 6.752366375502548, | |
| "learning_rate": 4.825399132381714e-07, | |
| "logits/chosen": -0.6987568736076355, | |
| "logits/rejected": -0.8175935745239258, | |
| "logps/chosen": -171.1442108154297, | |
| "logps/rejected": -166.8987579345703, | |
| "loss": 0.6635, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.10287340730428696, | |
| "rewards/margins": 0.05335085466504097, | |
| "rewards/rejected": -0.15622428059577942, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.21321961620469082, | |
| "grad_norm": 7.347783152614852, | |
| "learning_rate": 4.807914270124876e-07, | |
| "logits/chosen": -0.7212746143341064, | |
| "logits/rejected": -0.8661853671073914, | |
| "logps/chosen": -154.50978088378906, | |
| "logps/rejected": -151.14486694335938, | |
| "loss": 0.6607, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.13088169693946838, | |
| "rewards/margins": 0.07638157904148102, | |
| "rewards/rejected": -0.2072632759809494, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21855010660980811, | |
| "grad_norm": 7.628582115766163, | |
| "learning_rate": 4.789630011819354e-07, | |
| "logits/chosen": -0.8047178387641907, | |
| "logits/rejected": -0.9235810041427612, | |
| "logps/chosen": -172.83328247070312, | |
| "logps/rejected": -167.29969787597656, | |
| "loss": 0.6594, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.13691547513008118, | |
| "rewards/margins": 0.08130868524312973, | |
| "rewards/rejected": -0.21822413802146912, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 8.158484143715567, | |
| "learning_rate": 4.770552690613665e-07, | |
| "logits/chosen": -0.6994116902351379, | |
| "logits/rejected": -0.8224090337753296, | |
| "logps/chosen": -165.53271484375, | |
| "logps/rejected": -160.39566040039062, | |
| "loss": 0.6632, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.15383288264274597, | |
| "rewards/margins": 0.08487533777952194, | |
| "rewards/rejected": -0.23870821297168732, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22921108742004265, | |
| "grad_norm": 6.818497799203832, | |
| "learning_rate": 4.750688914350824e-07, | |
| "logits/chosen": -0.7993873357772827, | |
| "logits/rejected": -0.8913162350654602, | |
| "logps/chosen": -168.80471801757812, | |
| "logps/rejected": -161.65472412109375, | |
| "loss": 0.6635, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.175734743475914, | |
| "rewards/margins": 0.0465971902012825, | |
| "rewards/rejected": -0.2223319262266159, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2345415778251599, | |
| "grad_norm": 6.951350944403186, | |
| "learning_rate": 4.730045563279577e-07, | |
| "logits/chosen": -0.8045557141304016, | |
| "logits/rejected": -0.9918710589408875, | |
| "logps/chosen": -166.38870239257812, | |
| "logps/rejected": -160.87380981445312, | |
| "loss": 0.6548, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.1676856279373169, | |
| "rewards/margins": 0.08295993506908417, | |
| "rewards/rejected": -0.25064557790756226, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23987206823027718, | |
| "grad_norm": 8.045186231414235, | |
| "learning_rate": 4.708629787671268e-07, | |
| "logits/chosen": -0.7760337591171265, | |
| "logits/rejected": -0.9154524803161621, | |
| "logps/chosen": -176.33999633789062, | |
| "logps/rejected": -174.62783813476562, | |
| "loss": 0.6562, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.17208227515220642, | |
| "rewards/margins": 0.07876059412956238, | |
| "rewards/rejected": -0.2508428692817688, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.24520255863539445, | |
| "grad_norm": 8.063146353723733, | |
| "learning_rate": 4.6864490053432e-07, | |
| "logits/chosen": -0.8260966539382935, | |
| "logits/rejected": -0.9618522524833679, | |
| "logps/chosen": -181.78347778320312, | |
| "logps/rejected": -169.95745849609375, | |
| "loss": 0.6563, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.19365951418876648, | |
| "rewards/margins": 0.0855235755443573, | |
| "rewards/rejected": -0.2791830897331238, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2505330490405117, | |
| "grad_norm": 8.049754139869536, | |
| "learning_rate": 4.6635108990893033e-07, | |
| "logits/chosen": -0.7784782648086548, | |
| "logits/rejected": -0.9096555709838867, | |
| "logps/chosen": -183.8284454345703, | |
| "logps/rejected": -179.8243408203125, | |
| "loss": 0.6471, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.22320905327796936, | |
| "rewards/margins": 0.07317076623439789, | |
| "rewards/rejected": -0.29637983441352844, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 7.983403287573849, | |
| "learning_rate": 4.6398234140190413e-07, | |
| "logits/chosen": -0.7042727470397949, | |
| "logits/rejected": -0.8362523317337036, | |
| "logps/chosen": -177.31149291992188, | |
| "logps/rejected": -171.16531372070312, | |
| "loss": 0.6495, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2963384687900543, | |
| "rewards/margins": 0.054160721600055695, | |
| "rewards/rejected": -0.3504992127418518, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26119402985074625, | |
| "grad_norm": 8.675362509945584, | |
| "learning_rate": 4.615394754805443e-07, | |
| "logits/chosen": -0.802803635597229, | |
| "logits/rejected": -0.8539141416549683, | |
| "logps/chosen": -185.90289306640625, | |
| "logps/rejected": -192.5127716064453, | |
| "loss": 0.641, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.28103405237197876, | |
| "rewards/margins": 0.13215723633766174, | |
| "rewards/rejected": -0.4131912589073181, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.26652452025586354, | |
| "grad_norm": 8.803685601945949, | |
| "learning_rate": 4.5902333828432416e-07, | |
| "logits/chosen": -0.7753912210464478, | |
| "logits/rejected": -0.9024080038070679, | |
| "logps/chosen": -181.60507202148438, | |
| "logps/rejected": -184.08474731445312, | |
| "loss": 0.651, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.34952813386917114, | |
| "rewards/margins": 0.10412784665822983, | |
| "rewards/rejected": -0.4536559581756592, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27185501066098083, | |
| "grad_norm": 8.599862072625585, | |
| "learning_rate": 4.5643480133180855e-07, | |
| "logits/chosen": -0.7215537428855896, | |
| "logits/rejected": -0.8429878354072571, | |
| "logps/chosen": -193.21871948242188, | |
| "logps/rejected": -192.76535034179688, | |
| "loss": 0.652, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.43663716316223145, | |
| "rewards/margins": 0.09049404412508011, | |
| "rewards/rejected": -0.527131199836731, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2771855010660981, | |
| "grad_norm": 10.219200493862845, | |
| "learning_rate": 4.537747612187848e-07, | |
| "logits/chosen": -0.83184415102005, | |
| "logits/rejected": -1.0026618242263794, | |
| "logps/chosen": -216.73391723632812, | |
| "logps/rejected": -207.87527465820312, | |
| "loss": 0.6408, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.4637986123561859, | |
| "rewards/margins": 0.050548046827316284, | |
| "rewards/rejected": -0.5143465995788574, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28251599147121537, | |
| "grad_norm": 9.931121908912841, | |
| "learning_rate": 4.510441393077069e-07, | |
| "logits/chosen": -0.8512382507324219, | |
| "logits/rejected": -1.0560386180877686, | |
| "logps/chosen": -200.3084259033203, | |
| "logps/rejected": -195.31561279296875, | |
| "loss": 0.6472, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4075559675693512, | |
| "rewards/margins": 0.09888825565576553, | |
| "rewards/rejected": -0.5064442157745361, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2878464818763326, | |
| "grad_norm": 8.859652819595963, | |
| "learning_rate": 4.4824388140856194e-07, | |
| "logits/chosen": -0.8754502534866333, | |
| "logits/rejected": -1.0318089723587036, | |
| "logps/chosen": -198.41824340820312, | |
| "logps/rejected": -189.89077758789062, | |
| "loss": 0.6502, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3337915539741516, | |
| "rewards/margins": 0.13341596722602844, | |
| "rewards/rejected": -0.46720752120018005, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2931769722814499, | |
| "grad_norm": 9.06466635441445, | |
| "learning_rate": 4.453749574512685e-07, | |
| "logits/chosen": -0.9197045564651489, | |
| "logits/rejected": -1.0155677795410156, | |
| "logps/chosen": -185.05616760253906, | |
| "logps/rejected": -186.17381286621094, | |
| "loss": 0.6518, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.36515626311302185, | |
| "rewards/margins": 0.09796006977558136, | |
| "rewards/rejected": -0.4631163477897644, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 8.989140378395783, | |
| "learning_rate": 4.4243836114972003e-07, | |
| "logits/chosen": -0.8735504150390625, | |
| "logits/rejected": -1.004237413406372, | |
| "logps/chosen": -186.00570678710938, | |
| "logps/rejected": -192.78012084960938, | |
| "loss": 0.6409, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3774046003818512, | |
| "rewards/margins": 0.15985320508480072, | |
| "rewards/rejected": -0.5372577905654907, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.30383795309168443, | |
| "grad_norm": 8.593968658418248, | |
| "learning_rate": 4.3943510965759113e-07, | |
| "logits/chosen": -0.9259954690933228, | |
| "logits/rejected": -1.008984088897705, | |
| "logps/chosen": -196.23764038085938, | |
| "logps/rejected": -200.01434326171875, | |
| "loss": 0.6385, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3551446795463562, | |
| "rewards/margins": 0.15665681660175323, | |
| "rewards/rejected": -0.5118014812469482, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3091684434968017, | |
| "grad_norm": 8.920829639132457, | |
| "learning_rate": 4.3636624321602354e-07, | |
| "logits/chosen": -0.9114233255386353, | |
| "logits/rejected": -1.0022578239440918, | |
| "logps/chosen": -199.855712890625, | |
| "logps/rejected": -202.50045776367188, | |
| "loss": 0.6372, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3673010468482971, | |
| "rewards/margins": 0.11292729526758194, | |
| "rewards/rejected": -0.48022833466529846, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.31449893390191896, | |
| "grad_norm": 9.061291327996578, | |
| "learning_rate": 4.3323282479331713e-07, | |
| "logits/chosen": -0.8595677614212036, | |
| "logits/rejected": -0.9653046727180481, | |
| "logps/chosen": -215.55239868164062, | |
| "logps/rejected": -208.42080688476562, | |
| "loss": 0.6506, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.4611906409263611, | |
| "rewards/margins": 0.046931833028793335, | |
| "rewards/rejected": -0.508122444152832, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.31982942430703626, | |
| "grad_norm": 8.932259281056975, | |
| "learning_rate": 4.300359397167469e-07, | |
| "logits/chosen": -0.899543285369873, | |
| "logits/rejected": -1.058935523033142, | |
| "logps/chosen": -221.4969024658203, | |
| "logps/rejected": -217.5653076171875, | |
| "loss": 0.6415, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3796769082546234, | |
| "rewards/margins": 0.15340924263000488, | |
| "rewards/rejected": -0.5330861806869507, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3251599147121535, | |
| "grad_norm": 10.69745472768232, | |
| "learning_rate": 4.2677669529663686e-07, | |
| "logits/chosen": -0.7874996066093445, | |
| "logits/rejected": -0.9376864433288574, | |
| "logps/chosen": -175.2811279296875, | |
| "logps/rejected": -174.26589965820312, | |
| "loss": 0.6345, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.3251371383666992, | |
| "rewards/margins": 0.14439386129379272, | |
| "rewards/rejected": -0.46953099966049194, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3304904051172708, | |
| "grad_norm": 9.502534309876182, | |
| "learning_rate": 4.2345622044281914e-07, | |
| "logits/chosen": -0.8365820646286011, | |
| "logits/rejected": -0.9602219462394714, | |
| "logps/chosen": -198.43325805664062, | |
| "logps/rejected": -202.5500946044922, | |
| "loss": 0.6393, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.44264811277389526, | |
| "rewards/margins": 0.15877890586853027, | |
| "rewards/rejected": -0.6014270186424255, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3358208955223881, | |
| "grad_norm": 10.39334291807327, | |
| "learning_rate": 4.200756652736115e-07, | |
| "logits/chosen": -0.8717101812362671, | |
| "logits/rejected": -0.9584082365036011, | |
| "logps/chosen": -212.91275024414062, | |
| "logps/rejected": -228.49215698242188, | |
| "loss": 0.6423, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.48386502265930176, | |
| "rewards/margins": 0.22382013499736786, | |
| "rewards/rejected": -0.7076851725578308, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 9.34889686698433, | |
| "learning_rate": 4.1663620071744896e-07, | |
| "logits/chosen": -0.8714283108711243, | |
| "logits/rejected": -0.9042676091194153, | |
| "logps/chosen": -184.66299438476562, | |
| "logps/rejected": -192.072021484375, | |
| "loss": 0.6433, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.47013959288597107, | |
| "rewards/margins": 0.11798025667667389, | |
| "rewards/rejected": -0.5881198644638062, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3464818763326226, | |
| "grad_norm": 8.959638666781437, | |
| "learning_rate": 4.131390181073076e-07, | |
| "logits/chosen": -0.8877362012863159, | |
| "logits/rejected": -1.0048226118087769, | |
| "logps/chosen": -208.8312530517578, | |
| "logps/rejected": -213.86831665039062, | |
| "loss": 0.6312, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.397031307220459, | |
| "rewards/margins": 0.15148170292377472, | |
| "rewards/rejected": -0.5485130548477173, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.35181236673773986, | |
| "grad_norm": 9.592550539528885, | |
| "learning_rate": 4.0958532876806036e-07, | |
| "logits/chosen": -0.8785327076911926, | |
| "logits/rejected": -0.9449760317802429, | |
| "logps/chosen": -222.5094757080078, | |
| "logps/rejected": -226.78274536132812, | |
| "loss": 0.6337, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.5226814150810242, | |
| "rewards/margins": 0.08113773167133331, | |
| "rewards/rejected": -0.6038191914558411, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 10.432646240734098, | |
| "learning_rate": 4.0597636359690854e-07, | |
| "logits/chosen": -0.927719235420227, | |
| "logits/rejected": -1.0275365114212036, | |
| "logps/chosen": -223.2044219970703, | |
| "logps/rejected": -225.4947967529297, | |
| "loss": 0.6146, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.5208097696304321, | |
| "rewards/margins": 0.20672473311424255, | |
| "rewards/rejected": -0.7275345325469971, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3624733475479744, | |
| "grad_norm": 11.479351717352401, | |
| "learning_rate": 4.023133726370341e-07, | |
| "logits/chosen": -0.9192001223564148, | |
| "logits/rejected": -1.064570426940918, | |
| "logps/chosen": -218.15878295898438, | |
| "logps/rejected": -227.5740966796875, | |
| "loss": 0.6338, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.5721922516822815, | |
| "rewards/margins": 0.18816125392913818, | |
| "rewards/rejected": -0.7603535056114197, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3678038379530917, | |
| "grad_norm": 9.752374588486973, | |
| "learning_rate": 3.9859762464461986e-07, | |
| "logits/chosen": -0.9149691462516785, | |
| "logits/rejected": -0.9972041845321655, | |
| "logps/chosen": -226.1667938232422, | |
| "logps/rejected": -231.6415557861328, | |
| "loss": 0.6171, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.5891625285148621, | |
| "rewards/margins": 0.15944533050060272, | |
| "rewards/rejected": -0.7486079931259155, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 10.686589782660716, | |
| "learning_rate": 3.9483040664938844e-07, | |
| "logits/chosen": -0.9657170176506042, | |
| "logits/rejected": -1.0521764755249023, | |
| "logps/chosen": -222.5596160888672, | |
| "logps/rejected": -228.71206665039062, | |
| "loss": 0.6368, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.5828268527984619, | |
| "rewards/margins": 0.16033609211444855, | |
| "rewards/rejected": -0.7431629300117493, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3784648187633262, | |
| "grad_norm": 11.539842635377966, | |
| "learning_rate": 3.910130235088118e-07, | |
| "logits/chosen": -0.9302359819412231, | |
| "logits/rejected": -0.9725440740585327, | |
| "logps/chosen": -218.52835083007812, | |
| "logps/rejected": -236.67294311523438, | |
| "loss": 0.6251, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6089349389076233, | |
| "rewards/margins": 0.1914242058992386, | |
| "rewards/rejected": -0.8003591299057007, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3837953091684435, | |
| "grad_norm": 11.169667795718752, | |
| "learning_rate": 3.8714679745614556e-07, | |
| "logits/chosen": -0.9223200082778931, | |
| "logits/rejected": -1.0504696369171143, | |
| "logps/chosen": -218.5863037109375, | |
| "logps/rejected": -221.20297241210938, | |
| "loss": 0.6276, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6130382418632507, | |
| "rewards/margins": 0.17048679292201996, | |
| "rewards/rejected": -0.7835251092910767, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.38912579957356075, | |
| "grad_norm": 11.400952450487356, | |
| "learning_rate": 3.8323306764244445e-07, | |
| "logits/chosen": -0.8188157081604004, | |
| "logits/rejected": -0.9803248643875122, | |
| "logps/chosen": -235.3826904296875, | |
| "logps/rejected": -230.9711456298828, | |
| "loss": 0.6338, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.6885900497436523, | |
| "rewards/margins": 0.15824738144874573, | |
| "rewards/rejected": -0.8468375205993652, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.39445628997867804, | |
| "grad_norm": 12.170280653967604, | |
| "learning_rate": 3.792731896727196e-07, | |
| "logits/chosen": -0.9494584798812866, | |
| "logits/rejected": -0.9925413131713867, | |
| "logps/chosen": -215.47842407226562, | |
| "logps/rejected": -234.8311767578125, | |
| "loss": 0.6269, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.6851028203964233, | |
| "rewards/margins": 0.2173648625612259, | |
| "rewards/rejected": -0.9024677276611328, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3997867803837953, | |
| "grad_norm": 10.692294507184064, | |
| "learning_rate": 3.752685351363937e-07, | |
| "logits/chosen": -0.9608640670776367, | |
| "logits/rejected": -1.0993045568466187, | |
| "logps/chosen": -240.19479370117188, | |
| "logps/rejected": -244.61849975585938, | |
| "loss": 0.61, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.772241473197937, | |
| "rewards/margins": 0.1992679387331009, | |
| "rewards/rejected": -0.9715094566345215, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4051172707889126, | |
| "grad_norm": 10.604201029450365, | |
| "learning_rate": 3.712204911322228e-07, | |
| "logits/chosen": -0.8940795660018921, | |
| "logits/rejected": -1.0112766027450562, | |
| "logps/chosen": -226.2286376953125, | |
| "logps/rejected": -236.16824340820312, | |
| "loss": 0.6243, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.7305911779403687, | |
| "rewards/margins": 0.17177362740039825, | |
| "rewards/rejected": -0.9023649096488953, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.41044776119402987, | |
| "grad_norm": 11.725383803893557, | |
| "learning_rate": 3.671304597878437e-07, | |
| "logits/chosen": -0.8430676460266113, | |
| "logits/rejected": -0.9990310668945312, | |
| "logps/chosen": -226.9669189453125, | |
| "logps/rejected": -233.7083740234375, | |
| "loss": 0.6201, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.7214430570602417, | |
| "rewards/margins": 0.18212191760540009, | |
| "rewards/rejected": -0.9035650491714478, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4157782515991471, | |
| "grad_norm": 11.91435588205294, | |
| "learning_rate": 3.629998577741174e-07, | |
| "logits/chosen": -0.9615923762321472, | |
| "logits/rejected": -1.166372537612915, | |
| "logps/chosen": -227.1701202392578, | |
| "logps/rejected": -231.6777801513672, | |
| "loss": 0.6179, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6583755016326904, | |
| "rewards/margins": 0.21044659614562988, | |
| "rewards/rejected": -0.8688220977783203, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4211087420042644, | |
| "grad_norm": 11.584436951407874, | |
| "learning_rate": 3.588301158144338e-07, | |
| "logits/chosen": -0.9084697961807251, | |
| "logits/rejected": -0.9693692922592163, | |
| "logps/chosen": -245.2818145751953, | |
| "logps/rejected": -247.6303253173828, | |
| "loss": 0.6356, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.822106659412384, | |
| "rewards/margins": 0.17483489215373993, | |
| "rewards/rejected": -0.9969415664672852, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 12.023549452448254, | |
| "learning_rate": 3.546226781891501e-07, | |
| "logits/chosen": -0.8738770484924316, | |
| "logits/rejected": -1.0339401960372925, | |
| "logps/chosen": -238.5256805419922, | |
| "logps/rejected": -247.3977813720703, | |
| "loss": 0.6265, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.713554322719574, | |
| "rewards/margins": 0.22547940909862518, | |
| "rewards/rejected": -0.9390336871147156, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.42643923240938164, | |
| "eval_logits/chosen": -1.4091081619262695, | |
| "eval_logits/rejected": -1.3678923845291138, | |
| "eval_logps/chosen": -229.8961181640625, | |
| "eval_logps/rejected": -245.27667236328125, | |
| "eval_loss": 0.6455010771751404, | |
| "eval_rewards/accuracies": 0.6504064798355103, | |
| "eval_rewards/chosen": -0.7831487059593201, | |
| "eval_rewards/margins": 0.165547713637352, | |
| "eval_rewards/rejected": -0.9486963748931885, | |
| "eval_runtime": 167.4485, | |
| "eval_samples_per_second": 11.717, | |
| "eval_steps_per_second": 1.469, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.43176972281449894, | |
| "grad_norm": 11.60850533610325, | |
| "learning_rate": 3.5037900223533325e-07, | |
| "logits/chosen": -0.9261396527290344, | |
| "logits/rejected": -1.0748217105865479, | |
| "logps/chosen": -221.2123565673828, | |
| "logps/rejected": -228.6689910888672, | |
| "loss": 0.5935, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6648604273796082, | |
| "rewards/margins": 0.26066452264785767, | |
| "rewards/rejected": -0.925524890422821, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.43710021321961623, | |
| "grad_norm": 11.18634804714827, | |
| "learning_rate": 3.461005578419791e-07, | |
| "logits/chosen": -0.8335205316543579, | |
| "logits/rejected": -0.9212998151779175, | |
| "logps/chosen": -244.4559783935547, | |
| "logps/rejected": -249.6932830810547, | |
| "loss": 0.6405, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.8230158090591431, | |
| "rewards/margins": 0.19074369966983795, | |
| "rewards/rejected": -1.0137594938278198, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.44243070362473347, | |
| "grad_norm": 12.840173089928633, | |
| "learning_rate": 3.4178882694088507e-07, | |
| "logits/chosen": -0.9584044218063354, | |
| "logits/rejected": -1.1188139915466309, | |
| "logps/chosen": -230.2344970703125, | |
| "logps/rejected": -227.662841796875, | |
| "loss": 0.6385, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.7208009958267212, | |
| "rewards/margins": 0.14610765874385834, | |
| "rewards/rejected": -0.8669085502624512, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 12.162616519674994, | |
| "learning_rate": 3.374453029933509e-07, | |
| "logits/chosen": -0.972398579120636, | |
| "logits/rejected": -1.1250841617584229, | |
| "logps/chosen": -232.1986541748047, | |
| "logps/rejected": -250.6172332763672, | |
| "loss": 0.6009, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.5975956320762634, | |
| "rewards/margins": 0.4015510678291321, | |
| "rewards/rejected": -0.9991466403007507, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.453091684434968, | |
| "grad_norm": 11.14473654749687, | |
| "learning_rate": 3.3307149047288575e-07, | |
| "logits/chosen": -0.9900253415107727, | |
| "logits/rejected": -1.084149718284607, | |
| "logps/chosen": -239.5063018798828, | |
| "logps/rejected": -251.7197723388672, | |
| "loss": 0.6014, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.7624539732933044, | |
| "rewards/margins": 0.22666020691394806, | |
| "rewards/rejected": -0.9891141653060913, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4584221748400853, | |
| "grad_norm": 12.058706301647955, | |
| "learning_rate": 3.286689043441015e-07, | |
| "logits/chosen": -0.9329894185066223, | |
| "logits/rejected": -1.0619364976882935, | |
| "logps/chosen": -246.6885223388672, | |
| "logps/rejected": -256.63360595703125, | |
| "loss": 0.6082, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.8027140498161316, | |
| "rewards/margins": 0.26076698303222656, | |
| "rewards/rejected": -1.063481092453003, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.46375266524520253, | |
| "grad_norm": 13.080197907672602, | |
| "learning_rate": 3.2423906953797207e-07, | |
| "logits/chosen": -0.8946924209594727, | |
| "logits/rejected": -0.9567023515701294, | |
| "logps/chosen": -211.9040069580078, | |
| "logps/rejected": -230.66299438476562, | |
| "loss": 0.6036, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.698184609413147, | |
| "rewards/margins": 0.28591758012771606, | |
| "rewards/rejected": -0.9841020703315735, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4690831556503198, | |
| "grad_norm": 10.950896035507196, | |
| "learning_rate": 3.197835204236402e-07, | |
| "logits/chosen": -1.0192838907241821, | |
| "logits/rejected": -1.1130427122116089, | |
| "logps/chosen": -241.8474578857422, | |
| "logps/rejected": -262.25335693359375, | |
| "loss": 0.6106, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.860467791557312, | |
| "rewards/margins": 0.19941401481628418, | |
| "rewards/rejected": -1.0598818063735962, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4744136460554371, | |
| "grad_norm": 12.737513715694588, | |
| "learning_rate": 3.153038002769558e-07, | |
| "logits/chosen": -0.9327136874198914, | |
| "logits/rejected": -1.030767560005188, | |
| "logps/chosen": -248.5272979736328, | |
| "logps/rejected": -258.75872802734375, | |
| "loss": 0.6136, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.8692724108695984, | |
| "rewards/margins": 0.2143835574388504, | |
| "rewards/rejected": -1.0836559534072876, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.47974413646055436, | |
| "grad_norm": 15.296734055580355, | |
| "learning_rate": 3.1080146074592877e-07, | |
| "logits/chosen": -0.9727839231491089, | |
| "logits/rejected": -1.0751718282699585, | |
| "logps/chosen": -241.76846313476562, | |
| "logps/rejected": -251.7017364501953, | |
| "loss": 0.6371, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.8884710073471069, | |
| "rewards/margins": 0.22023312747478485, | |
| "rewards/rejected": -1.1087043285369873, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.48507462686567165, | |
| "grad_norm": 11.780615719126844, | |
| "learning_rate": 3.0627806131328246e-07, | |
| "logits/chosen": -0.9416291117668152, | |
| "logits/rejected": -1.0612024068832397, | |
| "logps/chosen": -235.45767211914062, | |
| "logps/rejected": -247.3424530029297, | |
| "loss": 0.6173, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8884525299072266, | |
| "rewards/margins": 0.24100270867347717, | |
| "rewards/rejected": -1.1294552087783813, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.4904051172707889, | |
| "grad_norm": 13.447809886194765, | |
| "learning_rate": 3.017351687562928e-07, | |
| "logits/chosen": -1.0132644176483154, | |
| "logits/rejected": -1.1040401458740234, | |
| "logps/chosen": -247.7209014892578, | |
| "logps/rejected": -248.7446746826172, | |
| "loss": 0.6152, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.9005931615829468, | |
| "rewards/margins": 0.11596985161304474, | |
| "rewards/rejected": -1.016563057899475, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4957356076759062, | |
| "grad_norm": 13.620020353000207, | |
| "learning_rate": 2.971743566041009e-07, | |
| "logits/chosen": -1.0589954853057861, | |
| "logits/rejected": -1.066146731376648, | |
| "logps/chosen": -247.865478515625, | |
| "logps/rejected": -260.4234313964844, | |
| "loss": 0.607, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.8577459454536438, | |
| "rewards/margins": 0.17027950286865234, | |
| "rewards/rejected": -1.0280256271362305, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5010660980810234, | |
| "grad_norm": 12.958251619951241, | |
| "learning_rate": 2.925972045926878e-07, | |
| "logits/chosen": -0.9736588597297668, | |
| "logits/rejected": -1.0960971117019653, | |
| "logps/chosen": -220.01126098632812, | |
| "logps/rejected": -241.4611358642578, | |
| "loss": 0.6215, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.7264107465744019, | |
| "rewards/margins": 0.30126145482063293, | |
| "rewards/rejected": -1.0276721715927124, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5063965884861408, | |
| "grad_norm": 13.3140639310756, | |
| "learning_rate": 2.880052981176979e-07, | |
| "logits/chosen": -0.9312192797660828, | |
| "logits/rejected": -1.0160053968429565, | |
| "logps/chosen": -222.43057250976562, | |
| "logps/rejected": -225.4202423095703, | |
| "loss": 0.63, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8433539271354675, | |
| "rewards/margins": 0.1893020123243332, | |
| "rewards/rejected": -1.032655954360962, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 10.478988610995284, | |
| "learning_rate": 2.83400227685304e-07, | |
| "logits/chosen": -0.9926323890686035, | |
| "logits/rejected": -1.1106306314468384, | |
| "logps/chosen": -249.5770263671875, | |
| "logps/rejected": -254.35794067382812, | |
| "loss": 0.612, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.795128345489502, | |
| "rewards/margins": 0.20585620403289795, | |
| "rewards/rejected": -1.0009845495224, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5170575692963753, | |
| "grad_norm": 13.398290532585486, | |
| "learning_rate": 2.7878358836129984e-07, | |
| "logits/chosen": -1.035072922706604, | |
| "logits/rejected": -1.1353219747543335, | |
| "logps/chosen": -231.4602508544922, | |
| "logps/rejected": -246.7826690673828, | |
| "loss": 0.6198, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.7928118705749512, | |
| "rewards/margins": 0.24832260608673096, | |
| "rewards/rejected": -1.0411344766616821, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 17.87280922700601, | |
| "learning_rate": 2.7415697921861525e-07, | |
| "logits/chosen": -0.9991563558578491, | |
| "logits/rejected": -1.1898800134658813, | |
| "logps/chosen": -280.6573181152344, | |
| "logps/rejected": -277.5446472167969, | |
| "loss": 0.6233, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -1.0797468423843384, | |
| "rewards/margins": 0.14736375212669373, | |
| "rewards/rejected": -1.2271106243133545, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5277185501066098, | |
| "grad_norm": 14.03763690100759, | |
| "learning_rate": 2.6952200278344253e-07, | |
| "logits/chosen": -0.8905277252197266, | |
| "logits/rejected": -1.083092451095581, | |
| "logps/chosen": -234.2960662841797, | |
| "logps/rejected": -247.935791015625, | |
| "loss": 0.5984, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8255411982536316, | |
| "rewards/margins": 0.22554175555706024, | |
| "rewards/rejected": -1.051082968711853, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5330490405117271, | |
| "grad_norm": 13.488186802243888, | |
| "learning_rate": 2.6488026448016686e-07, | |
| "logits/chosen": -1.000211238861084, | |
| "logits/rejected": -1.137927770614624, | |
| "logps/chosen": -260.8866882324219, | |
| "logps/rejected": -279.22772216796875, | |
| "loss": 0.6074, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.9299663305282593, | |
| "rewards/margins": 0.32078418135643005, | |
| "rewards/rejected": -1.2507504224777222, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5383795309168443, | |
| "grad_norm": 13.891894437147359, | |
| "learning_rate": 2.602333720752927e-07, | |
| "logits/chosen": -1.0730583667755127, | |
| "logits/rejected": -1.1127971410751343, | |
| "logps/chosen": -249.81387329101562, | |
| "logps/rejected": -281.8894958496094, | |
| "loss": 0.6043, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.9583339691162109, | |
| "rewards/margins": 0.47916507720947266, | |
| "rewards/rejected": -1.4374990463256836, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5437100213219617, | |
| "grad_norm": 13.78432594287455, | |
| "learning_rate": 2.5558293512055923e-07, | |
| "logits/chosen": -0.9855419397354126, | |
| "logits/rejected": -1.0839966535568237, | |
| "logps/chosen": -259.2463684082031, | |
| "logps/rejected": -281.6179504394531, | |
| "loss": 0.575, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.9760101437568665, | |
| "rewards/margins": 0.3508725166320801, | |
| "rewards/rejected": -1.3268824815750122, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5490405117270789, | |
| "grad_norm": 13.647127313830234, | |
| "learning_rate": 2.509305643954369e-07, | |
| "logits/chosen": -1.0535143613815308, | |
| "logits/rejected": -1.2141129970550537, | |
| "logps/chosen": -235.3438720703125, | |
| "logps/rejected": -240.82583618164062, | |
| "loss": 0.6014, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.8737846612930298, | |
| "rewards/margins": 0.20960083603858948, | |
| "rewards/rejected": -1.0833853483200073, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5543710021321961, | |
| "grad_norm": 12.039038135090694, | |
| "learning_rate": 2.4627787134919946e-07, | |
| "logits/chosen": -0.9818887710571289, | |
| "logits/rejected": -1.1572598218917847, | |
| "logps/chosen": -270.9326171875, | |
| "logps/rejected": -290.0334167480469, | |
| "loss": 0.5699, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.9136640429496765, | |
| "rewards/margins": 0.3413304388523102, | |
| "rewards/rejected": -1.254994511604309, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 12.903483427378369, | |
| "learning_rate": 2.41626467542764e-07, | |
| "logits/chosen": -0.9903634190559387, | |
| "logits/rejected": -1.0747687816619873, | |
| "logps/chosen": -246.91549682617188, | |
| "logps/rejected": -270.34954833984375, | |
| "loss": 0.5932, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9555915594100952, | |
| "rewards/margins": 0.39527803659439087, | |
| "rewards/rejected": -1.3508695363998413, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5650319829424307, | |
| "grad_norm": 13.018513420276996, | |
| "learning_rate": 2.369779640904909e-07, | |
| "logits/chosen": -1.0167109966278076, | |
| "logits/rejected": -1.1171668767929077, | |
| "logps/chosen": -260.72833251953125, | |
| "logps/rejected": -275.2940673828125, | |
| "loss": 0.5986, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.9983251690864563, | |
| "rewards/margins": 0.2603410482406616, | |
| "rewards/rejected": -1.2586661577224731, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.570362473347548, | |
| "grad_norm": 14.17080589691093, | |
| "learning_rate": 2.3233397110214044e-07, | |
| "logits/chosen": -1.114485740661621, | |
| "logits/rejected": -1.2273226976394653, | |
| "logps/chosen": -267.9627380371094, | |
| "logps/rejected": -286.55926513671875, | |
| "loss": 0.6196, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.9473945498466492, | |
| "rewards/margins": 0.29965347051620483, | |
| "rewards/rejected": -1.247048020362854, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5756929637526652, | |
| "grad_norm": 14.907759901462226, | |
| "learning_rate": 2.2769609712517602e-07, | |
| "logits/chosen": -1.051343560218811, | |
| "logits/rejected": -1.112343668937683, | |
| "logps/chosen": -282.5835266113281, | |
| "logps/rejected": -286.35833740234375, | |
| "loss": 0.6281, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -1.1061525344848633, | |
| "rewards/margins": 0.1019170731306076, | |
| "rewards/rejected": -1.2080695629119873, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5810234541577826, | |
| "grad_norm": 14.332154011748669, | |
| "learning_rate": 2.2306594858760898e-07, | |
| "logits/chosen": -0.9886674880981445, | |
| "logits/rejected": -1.1096317768096924, | |
| "logps/chosen": -264.4873046875, | |
| "logps/rejected": -288.09381103515625, | |
| "loss": 0.6197, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.9765904545783997, | |
| "rewards/margins": 0.43369174003601074, | |
| "rewards/rejected": -1.4102822542190552, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5863539445628998, | |
| "grad_norm": 14.364439575082633, | |
| "learning_rate": 2.184451292415778e-07, | |
| "logits/chosen": -1.0294235944747925, | |
| "logits/rejected": -1.0585139989852905, | |
| "logps/chosen": -229.28250122070312, | |
| "logps/rejected": -251.99560546875, | |
| "loss": 0.6132, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.8699715733528137, | |
| "rewards/margins": 0.3365539610385895, | |
| "rewards/rejected": -1.2065255641937256, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.591684434968017, | |
| "grad_norm": 14.37697025208913, | |
| "learning_rate": 2.1383523960785342e-07, | |
| "logits/chosen": -1.0905894041061401, | |
| "logits/rejected": -1.2189487218856812, | |
| "logps/chosen": -245.4988250732422, | |
| "logps/rejected": -253.37771606445312, | |
| "loss": 0.611, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.8295791745185852, | |
| "rewards/margins": 0.25238728523254395, | |
| "rewards/rejected": -1.0819664001464844, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 13.273086730559621, | |
| "learning_rate": 2.0923787642146434e-07, | |
| "logits/chosen": -0.9458072781562805, | |
| "logits/rejected": -1.0978261232376099, | |
| "logps/chosen": -216.93496704101562, | |
| "logps/rejected": -238.4340362548828, | |
| "loss": 0.5878, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.8222309947013855, | |
| "rewards/margins": 0.3146277964115143, | |
| "rewards/rejected": -1.1368588209152222, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6023454157782516, | |
| "grad_norm": 12.275413565116182, | |
| "learning_rate": 2.046546320786331e-07, | |
| "logits/chosen": -1.0852059125900269, | |
| "logits/rejected": -1.217184066772461, | |
| "logps/chosen": -243.6894989013672, | |
| "logps/rejected": -254.8353271484375, | |
| "loss": 0.6099, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.8728886842727661, | |
| "rewards/margins": 0.2236359417438507, | |
| "rewards/rejected": -1.0965244770050049, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6076759061833689, | |
| "grad_norm": 13.385563460718537, | |
| "learning_rate": 2.0008709408521507e-07, | |
| "logits/chosen": -1.075157880783081, | |
| "logits/rejected": -1.1313838958740234, | |
| "logps/chosen": -230.10983276367188, | |
| "logps/rejected": -256.946044921875, | |
| "loss": 0.5973, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.7323789000511169, | |
| "rewards/margins": 0.291358083486557, | |
| "rewards/rejected": -1.0237371921539307, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6130063965884861, | |
| "grad_norm": 12.904686522939175, | |
| "learning_rate": 1.9553684450683193e-07, | |
| "logits/chosen": -1.092653512954712, | |
| "logits/rejected": -1.1976040601730347, | |
| "logps/chosen": -237.1493682861328, | |
| "logps/rejected": -256.56890869140625, | |
| "loss": 0.6163, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.8681387901306152, | |
| "rewards/margins": 0.29039478302001953, | |
| "rewards/rejected": -1.1585334539413452, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6183368869936035, | |
| "grad_norm": 18.21477968462917, | |
| "learning_rate": 1.9100545942088848e-07, | |
| "logits/chosen": -1.0292062759399414, | |
| "logits/rejected": -1.1282401084899902, | |
| "logps/chosen": -222.6328582763672, | |
| "logps/rejected": -251.3138885498047, | |
| "loss": 0.6092, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.8912476301193237, | |
| "rewards/margins": 0.3392987847328186, | |
| "rewards/rejected": -1.2305463552474976, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6236673773987207, | |
| "grad_norm": 13.078178032971321, | |
| "learning_rate": 1.8649450837066444e-07, | |
| "logits/chosen": -1.1086572408676147, | |
| "logits/rejected": -1.2702162265777588, | |
| "logps/chosen": -237.74673461914062, | |
| "logps/rejected": -256.3179626464844, | |
| "loss": 0.5969, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.7724729776382446, | |
| "rewards/margins": 0.34164559841156006, | |
| "rewards/rejected": -1.1141188144683838, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6289978678038379, | |
| "grad_norm": 14.54157536622139, | |
| "learning_rate": 1.8200555382166898e-07, | |
| "logits/chosen": -1.036029577255249, | |
| "logits/rejected": -1.1076552867889404, | |
| "logps/chosen": -257.90423583984375, | |
| "logps/rejected": -276.80859375, | |
| "loss": 0.5922, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.9389132261276245, | |
| "rewards/margins": 0.3674803078174591, | |
| "rewards/rejected": -1.3063933849334717, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6343283582089553, | |
| "grad_norm": 13.963862396350292, | |
| "learning_rate": 1.775401506204472e-07, | |
| "logits/chosen": -1.0365560054779053, | |
| "logits/rejected": -1.111011266708374, | |
| "logps/chosen": -249.289794921875, | |
| "logps/rejected": -261.4861755371094, | |
| "loss": 0.5923, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.9952207803726196, | |
| "rewards/margins": 0.24101737141609192, | |
| "rewards/rejected": -1.2362381219863892, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6396588486140725, | |
| "grad_norm": 15.013492823138591, | |
| "learning_rate": 1.7309984545602528e-07, | |
| "logits/chosen": -1.160706877708435, | |
| "logits/rejected": -1.2153781652450562, | |
| "logps/chosen": -283.8118896484375, | |
| "logps/rejected": -309.1605529785156, | |
| "loss": 0.6042, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2339740991592407, | |
| "rewards/margins": 0.33963102102279663, | |
| "rewards/rejected": -1.5736052989959717, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6449893390191898, | |
| "grad_norm": 12.416383191561394, | |
| "learning_rate": 1.6868617632418114e-07, | |
| "logits/chosen": -1.1419028043746948, | |
| "logits/rejected": -1.2748745679855347, | |
| "logps/chosen": -278.46051025390625, | |
| "logps/rejected": -305.10687255859375, | |
| "loss": 0.5995, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.106767177581787, | |
| "rewards/margins": 0.3402232229709625, | |
| "rewards/rejected": -1.4469903707504272, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.650319829424307, | |
| "grad_norm": 13.316722698785181, | |
| "learning_rate": 1.6430067199472657e-07, | |
| "logits/chosen": -1.0973302125930786, | |
| "logits/rejected": -1.1705577373504639, | |
| "logps/chosen": -238.37857055664062, | |
| "logps/rejected": -260.87811279296875, | |
| "loss": 0.593, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.9219633936882019, | |
| "rewards/margins": 0.3023082911968231, | |
| "rewards/rejected": -1.224271535873413, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6556503198294243, | |
| "grad_norm": 14.96464401167362, | |
| "learning_rate": 1.599448514819844e-07, | |
| "logits/chosen": -1.1169979572296143, | |
| "logits/rejected": -1.2400496006011963, | |
| "logps/chosen": -251.90786743164062, | |
| "logps/rejected": -275.16632080078125, | |
| "loss": 0.6088, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.9491860270500183, | |
| "rewards/margins": 0.33866086602211, | |
| "rewards/rejected": -1.2878468036651611, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6609808102345416, | |
| "grad_norm": 14.725406377953089, | |
| "learning_rate": 1.5562022351864534e-07, | |
| "logits/chosen": -1.0977303981781006, | |
| "logits/rejected": -1.129206657409668, | |
| "logps/chosen": -240.2178955078125, | |
| "logps/rejected": -280.0063171386719, | |
| "loss": 0.5894, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.890995979309082, | |
| "rewards/margins": 0.4513840079307556, | |
| "rewards/rejected": -1.3423799276351929, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6663113006396588, | |
| "grad_norm": 12.844371576008434, | |
| "learning_rate": 1.5132828603318577e-07, | |
| "logits/chosen": -1.0323293209075928, | |
| "logits/rejected": -1.139953374862671, | |
| "logps/chosen": -255.4688720703125, | |
| "logps/rejected": -270.874755859375, | |
| "loss": 0.5895, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.0522164106369019, | |
| "rewards/margins": 0.21118326485157013, | |
| "rewards/rejected": -1.2633997201919556, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 17.421764111517, | |
| "learning_rate": 1.4707052563102748e-07, | |
| "logits/chosen": -1.0614488124847412, | |
| "logits/rejected": -1.1523784399032593, | |
| "logps/chosen": -246.06497192382812, | |
| "logps/rejected": -260.67041015625, | |
| "loss": 0.5892, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.0199403762817383, | |
| "rewards/margins": 0.25652509927749634, | |
| "rewards/rejected": -1.2764654159545898, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6769722814498934, | |
| "grad_norm": 11.820948981122326, | |
| "learning_rate": 1.4284841707961987e-07, | |
| "logits/chosen": -1.1177728176116943, | |
| "logits/rejected": -1.2426093816757202, | |
| "logps/chosen": -238.572509765625, | |
| "logps/rejected": -274.74896240234375, | |
| "loss": 0.5731, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9242167472839355, | |
| "rewards/margins": 0.4471352994441986, | |
| "rewards/rejected": -1.371351957321167, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 16.810425880201876, | |
| "learning_rate": 1.386634227976224e-07, | |
| "logits/chosen": -1.0978498458862305, | |
| "logits/rejected": -1.1455624103546143, | |
| "logps/chosen": -252.0463409423828, | |
| "logps/rejected": -267.16900634765625, | |
| "loss": 0.6045, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.01809561252594, | |
| "rewards/margins": 0.2102915495634079, | |
| "rewards/rejected": -1.2283871173858643, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6876332622601279, | |
| "grad_norm": 16.16875613964214, | |
| "learning_rate": 1.345169923483642e-07, | |
| "logits/chosen": -1.074209451675415, | |
| "logits/rejected": -1.0901873111724854, | |
| "logps/chosen": -242.7150421142578, | |
| "logps/rejected": -255.5235595703125, | |
| "loss": 0.6142, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9898387789726257, | |
| "rewards/margins": 0.1817895770072937, | |
| "rewards/rejected": -1.1716282367706299, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6929637526652452, | |
| "grad_norm": 14.889847230913357, | |
| "learning_rate": 1.3041056193775665e-07, | |
| "logits/chosen": -1.1271008253097534, | |
| "logits/rejected": -1.2666515111923218, | |
| "logps/chosen": -270.30523681640625, | |
| "logps/rejected": -305.54791259765625, | |
| "loss": 0.5835, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0262703895568848, | |
| "rewards/margins": 0.49466007947921753, | |
| "rewards/rejected": -1.520930528640747, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6982942430703625, | |
| "grad_norm": 17.172517125459027, | |
| "learning_rate": 1.2634555391683188e-07, | |
| "logits/chosen": -1.1146763563156128, | |
| "logits/rejected": -1.1514074802398682, | |
| "logps/chosen": -282.617431640625, | |
| "logps/rejected": -305.576416015625, | |
| "loss": 0.6102, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.0780295133590698, | |
| "rewards/margins": 0.341577410697937, | |
| "rewards/rejected": -1.4196069240570068, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7036247334754797, | |
| "grad_norm": 12.769747207532605, | |
| "learning_rate": 1.2232337628908103e-07, | |
| "logits/chosen": -1.005274772644043, | |
| "logits/rejected": -1.120625376701355, | |
| "logps/chosen": -270.090087890625, | |
| "logps/rejected": -300.65325927734375, | |
| "loss": 0.5804, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.084705114364624, | |
| "rewards/margins": 0.36065369844436646, | |
| "rewards/rejected": -1.4453589916229248, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7089552238805971, | |
| "grad_norm": 13.848401050251594, | |
| "learning_rate": 1.1834542222276206e-07, | |
| "logits/chosen": -1.1191794872283936, | |
| "logits/rejected": -1.2426977157592773, | |
| "logps/chosen": -271.416015625, | |
| "logps/rejected": -293.0040588378906, | |
| "loss": 0.6129, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0729753971099854, | |
| "rewards/margins": 0.35853153467178345, | |
| "rewards/rejected": -1.4315071105957031, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 15.508341962305703, | |
| "learning_rate": 1.1441306956834504e-07, | |
| "logits/chosen": -1.1978566646575928, | |
| "logits/rejected": -1.2869031429290771, | |
| "logps/chosen": -252.2648162841797, | |
| "logps/rejected": -265.5303649902344, | |
| "loss": 0.6299, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.888351559638977, | |
| "rewards/margins": 0.23599569499492645, | |
| "rewards/rejected": -1.1243473291397095, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7196162046908315, | |
| "grad_norm": 12.326647139033, | |
| "learning_rate": 1.1052768038126464e-07, | |
| "logits/chosen": -1.0239012241363525, | |
| "logits/rejected": -1.1444613933563232, | |
| "logps/chosen": -273.5757751464844, | |
| "logps/rejected": -296.2203369140625, | |
| "loss": 0.594, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.0173006057739258, | |
| "rewards/margins": 0.30182453989982605, | |
| "rewards/rejected": -1.3191251754760742, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7249466950959488, | |
| "grad_norm": 18.71316397695756, | |
| "learning_rate": 1.0669060045014214e-07, | |
| "logits/chosen": -1.1475574970245361, | |
| "logits/rejected": -1.2453533411026, | |
| "logps/chosen": -267.3650207519531, | |
| "logps/rejected": -284.7320861816406, | |
| "loss": 0.6173, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.9687989354133606, | |
| "rewards/margins": 0.274304062128067, | |
| "rewards/rejected": -1.2431029081344604, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7302771855010661, | |
| "grad_norm": 20.228539694090426, | |
| "learning_rate": 1.0290315883064258e-07, | |
| "logits/chosen": -1.0727207660675049, | |
| "logits/rejected": -1.169166088104248, | |
| "logps/chosen": -234.42538452148438, | |
| "logps/rejected": -259.74664306640625, | |
| "loss": 0.5982, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.879969596862793, | |
| "rewards/margins": 0.3725913166999817, | |
| "rewards/rejected": -1.2525609731674194, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7356076759061834, | |
| "grad_norm": 13.518054635862288, | |
| "learning_rate": 9.9166667385128e-08, | |
| "logits/chosen": -1.0578858852386475, | |
| "logits/rejected": -1.159432053565979, | |
| "logps/chosen": -266.2313232421875, | |
| "logps/rejected": -277.4366149902344, | |
| "loss": 0.5997, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.0338330268859863, | |
| "rewards/margins": 0.32066407799720764, | |
| "rewards/rejected": -1.3544971942901611, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7409381663113006, | |
| "grad_norm": 13.588646234249182, | |
| "learning_rate": 9.54824203282647e-08, | |
| "logits/chosen": -1.1122428178787231, | |
| "logits/rejected": -1.1691913604736328, | |
| "logps/chosen": -292.1220703125, | |
| "logps/rejected": -314.33013916015625, | |
| "loss": 0.6016, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -1.232649564743042, | |
| "rewards/margins": 0.3411175310611725, | |
| "rewards/rejected": -1.573767066001892, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 13.39599747528393, | |
| "learning_rate": 9.185169377874488e-08, | |
| "logits/chosen": -1.0565317869186401, | |
| "logits/rejected": -1.104970932006836, | |
| "logps/chosen": -232.4084014892578, | |
| "logps/rejected": -271.18511962890625, | |
| "loss": 0.6054, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.9469151496887207, | |
| "rewards/margins": 0.4982013702392578, | |
| "rewards/rejected": -1.445116639137268, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7515991471215352, | |
| "grad_norm": 12.55265938358736, | |
| "learning_rate": 8.827574531727452e-08, | |
| "logits/chosen": -1.085356593132019, | |
| "logits/rejected": -1.254529595375061, | |
| "logps/chosen": -234.07498168945312, | |
| "logps/rejected": -255.6736297607422, | |
| "loss": 0.5812, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.9192997217178345, | |
| "rewards/margins": 0.39327552914619446, | |
| "rewards/rejected": -1.3125752210617065, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7569296375266524, | |
| "grad_norm": 16.169497185007014, | |
| "learning_rate": 8.475581355098379e-08, | |
| "logits/chosen": -1.1205322742462158, | |
| "logits/rejected": -1.227104902267456, | |
| "logps/chosen": -254.82998657226562, | |
| "logps/rejected": -265.16961669921875, | |
| "loss": 0.5946, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.9393894076347351, | |
| "rewards/margins": 0.3107382357120514, | |
| "rewards/rejected": -1.2501277923583984, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7622601279317697, | |
| "grad_norm": 12.716385801386352, | |
| "learning_rate": 8.129311768440807e-08, | |
| "logits/chosen": -1.0053701400756836, | |
| "logits/rejected": -1.096592903137207, | |
| "logps/chosen": -277.47540283203125, | |
| "logps/rejected": -304.0431823730469, | |
| "loss": 0.5821, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1990084648132324, | |
| "rewards/margins": 0.3271089792251587, | |
| "rewards/rejected": -1.5261173248291016, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 18.552637237088017, | |
| "learning_rate": 7.788885709719033e-08, | |
| "logits/chosen": -1.0947494506835938, | |
| "logits/rejected": -1.1496310234069824, | |
| "logps/chosen": -254.96630859375, | |
| "logps/rejected": -285.97308349609375, | |
| "loss": 0.6177, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.0156748294830322, | |
| "rewards/margins": 0.3178596496582031, | |
| "rewards/rejected": -1.333534598350525, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7729211087420043, | |
| "grad_norm": 12.746321182622895, | |
| "learning_rate": 7.454421092865037e-08, | |
| "logits/chosen": -1.0280872583389282, | |
| "logits/rejected": -1.1245920658111572, | |
| "logps/chosen": -273.39501953125, | |
| "logps/rejected": -283.32110595703125, | |
| "loss": 0.5903, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.220465898513794, | |
| "rewards/margins": 0.208203986287117, | |
| "rewards/rejected": -1.4286696910858154, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7782515991471215, | |
| "grad_norm": 11.876891342705504, | |
| "learning_rate": 7.126033766936365e-08, | |
| "logits/chosen": -1.1646153926849365, | |
| "logits/rejected": -1.2614471912384033, | |
| "logps/chosen": -267.27734375, | |
| "logps/rejected": -288.18731689453125, | |
| "loss": 0.6068, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.021458387374878, | |
| "rewards/margins": 0.3050800859928131, | |
| "rewards/rejected": -1.3265384435653687, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7835820895522388, | |
| "grad_norm": 13.39270512390593, | |
| "learning_rate": 6.80383747598938e-08, | |
| "logits/chosen": -1.1257355213165283, | |
| "logits/rejected": -1.1825156211853027, | |
| "logps/chosen": -269.59747314453125, | |
| "logps/rejected": -300.54266357421875, | |
| "loss": 0.5952, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0236284732818604, | |
| "rewards/margins": 0.42881250381469727, | |
| "rewards/rejected": -1.4524409770965576, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7889125799573561, | |
| "grad_norm": 14.236670336488492, | |
| "learning_rate": 6.487943819681488e-08, | |
| "logits/chosen": -1.0966401100158691, | |
| "logits/rejected": -1.1666018962860107, | |
| "logps/chosen": -249.0513153076172, | |
| "logps/rejected": -277.46051025390625, | |
| "loss": 0.5756, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.015439748764038, | |
| "rewards/margins": 0.33569568395614624, | |
| "rewards/rejected": -1.3511353731155396, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7942430703624733, | |
| "grad_norm": 13.439712780756642, | |
| "learning_rate": 6.178462214616203e-08, | |
| "logits/chosen": -1.0600165128707886, | |
| "logits/rejected": -1.1609599590301514, | |
| "logps/chosen": -253.85018920898438, | |
| "logps/rejected": -286.5955810546875, | |
| "loss": 0.5808, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0086755752563477, | |
| "rewards/margins": 0.4383172392845154, | |
| "rewards/rejected": -1.4469928741455078, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7995735607675906, | |
| "grad_norm": 13.727935713255976, | |
| "learning_rate": 5.875499856444358e-08, | |
| "logits/chosen": -1.052286148071289, | |
| "logits/rejected": -1.177433967590332, | |
| "logps/chosen": -263.67333984375, | |
| "logps/rejected": -291.416015625, | |
| "loss": 0.5914, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.058933138847351, | |
| "rewards/margins": 0.31677955389022827, | |
| "rewards/rejected": -1.3757128715515137, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8049040511727079, | |
| "grad_norm": 13.5774716097425, | |
| "learning_rate": 5.5791616827345484e-08, | |
| "logits/chosen": -1.1035162210464478, | |
| "logits/rejected": -1.2241528034210205, | |
| "logps/chosen": -258.1141357421875, | |
| "logps/rejected": -289.6146545410156, | |
| "loss": 0.5851, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0831810235977173, | |
| "rewards/margins": 0.37278053164482117, | |
| "rewards/rejected": -1.4559617042541504, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8102345415778252, | |
| "grad_norm": 13.872602372334944, | |
| "learning_rate": 5.289550336625731e-08, | |
| "logits/chosen": -0.967927098274231, | |
| "logits/rejected": -1.1444356441497803, | |
| "logps/chosen": -245.47329711914062, | |
| "logps/rejected": -270.84033203125, | |
| "loss": 0.5823, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.0041565895080566, | |
| "rewards/margins": 0.34393635392189026, | |
| "rewards/rejected": -1.348093032836914, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8155650319829424, | |
| "grad_norm": 14.466168736500185, | |
| "learning_rate": 5.006766131274559e-08, | |
| "logits/chosen": -1.1071698665618896, | |
| "logits/rejected": -1.1825703382492065, | |
| "logps/chosen": -275.54388427734375, | |
| "logps/rejected": -296.87689208984375, | |
| "loss": 0.604, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1156272888183594, | |
| "rewards/margins": 0.3059665858745575, | |
| "rewards/rejected": -1.4215937852859497, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 15.26501051880337, | |
| "learning_rate": 4.730907015109759e-08, | |
| "logits/chosen": -1.037107229232788, | |
| "logits/rejected": -1.1411950588226318, | |
| "logps/chosen": -266.26123046875, | |
| "logps/rejected": -296.9294128417969, | |
| "loss": 0.5551, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.0510127544403076, | |
| "rewards/margins": 0.46340426802635193, | |
| "rewards/rejected": -1.5144169330596924, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.826226012793177, | |
| "grad_norm": 10.911420515343652, | |
| "learning_rate": 4.4620685379055584e-08, | |
| "logits/chosen": -1.0774571895599365, | |
| "logits/rejected": -1.2212311029434204, | |
| "logps/chosen": -263.77813720703125, | |
| "logps/rejected": -281.2510070800781, | |
| "loss": 0.6032, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.093552589416504, | |
| "rewards/margins": 0.22536174952983856, | |
| "rewards/rejected": -1.318914532661438, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8315565031982942, | |
| "grad_norm": 13.252940181725066, | |
| "learning_rate": 4.200343817685981e-08, | |
| "logits/chosen": -1.134172797203064, | |
| "logits/rejected": -1.157869577407837, | |
| "logps/chosen": -237.58920288085938, | |
| "logps/rejected": -262.5208435058594, | |
| "loss": 0.59, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9115175008773804, | |
| "rewards/margins": 0.29485780000686646, | |
| "rewards/rejected": -1.2063753604888916, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8368869936034116, | |
| "grad_norm": 18.75722287778124, | |
| "learning_rate": 3.945823508471352e-08, | |
| "logits/chosen": -1.1293060779571533, | |
| "logits/rejected": -1.2253621816635132, | |
| "logps/chosen": -274.54595947265625, | |
| "logps/rejected": -298.9117126464844, | |
| "loss": 0.6294, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.0663083791732788, | |
| "rewards/margins": 0.33463555574417114, | |
| "rewards/rejected": -1.4009437561035156, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8422174840085288, | |
| "grad_norm": 12.55898038094129, | |
| "learning_rate": 3.698595768878363e-08, | |
| "logits/chosen": -1.0901148319244385, | |
| "logits/rejected": -1.2076427936553955, | |
| "logps/chosen": -242.2190704345703, | |
| "logps/rejected": -263.9744567871094, | |
| "loss": 0.5918, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.9093745946884155, | |
| "rewards/margins": 0.4039868712425232, | |
| "rewards/rejected": -1.313361406326294, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.847547974413646, | |
| "grad_norm": 12.79776093588139, | |
| "learning_rate": 3.458746231584414e-08, | |
| "logits/chosen": -1.1291230916976929, | |
| "logits/rejected": -1.2340444326400757, | |
| "logps/chosen": -260.56195068359375, | |
| "logps/rejected": -304.3238525390625, | |
| "loss": 0.5823, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.992597222328186, | |
| "rewards/margins": 0.5544020533561707, | |
| "rewards/rejected": -1.5469990968704224, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 13.33658395118552, | |
| "learning_rate": 3.226357973666888e-08, | |
| "logits/chosen": -1.10861074924469, | |
| "logits/rejected": -1.2813326120376587, | |
| "logps/chosen": -228.31155395507812, | |
| "logps/rejected": -253.84207153320312, | |
| "loss": 0.6053, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.8997095227241516, | |
| "rewards/margins": 0.36860379576683044, | |
| "rewards/rejected": -1.2683132886886597, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8528784648187633, | |
| "eval_logits/chosen": -1.526044487953186, | |
| "eval_logits/rejected": -1.4904903173446655, | |
| "eval_logps/chosen": -256.8968811035156, | |
| "eval_logps/rejected": -280.7786560058594, | |
| "eval_loss": 0.6389869451522827, | |
| "eval_rewards/accuracies": 0.6056910753250122, | |
| "eval_rewards/chosen": -1.0531564950942993, | |
| "eval_rewards/margins": 0.25055956840515137, | |
| "eval_rewards/rejected": -1.3037161827087402, | |
| "eval_runtime": 165.6574, | |
| "eval_samples_per_second": 11.844, | |
| "eval_steps_per_second": 1.485, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8582089552238806, | |
| "grad_norm": 13.707316978636483, | |
| "learning_rate": 3.001511487827582e-08, | |
| "logits/chosen": -1.117619514465332, | |
| "logits/rejected": -1.1415525674819946, | |
| "logps/chosen": -267.25225830078125, | |
| "logps/rejected": -302.49298095703125, | |
| "loss": 0.5991, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.0340323448181152, | |
| "rewards/margins": 0.3930579125881195, | |
| "rewards/rejected": -1.4270904064178467, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8635394456289979, | |
| "grad_norm": 16.362755169612413, | |
| "learning_rate": 2.7842846545123505e-08, | |
| "logits/chosen": -1.0309226512908936, | |
| "logits/rejected": -1.1349594593048096, | |
| "logps/chosen": -256.92706298828125, | |
| "logps/rejected": -267.79986572265625, | |
| "loss": 0.6232, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.9786995649337769, | |
| "rewards/margins": 0.1820269525051117, | |
| "rewards/rejected": -1.160726547241211, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8688699360341151, | |
| "grad_norm": 13.309838462940968, | |
| "learning_rate": 2.5747527149355018e-08, | |
| "logits/chosen": -1.1667518615722656, | |
| "logits/rejected": -1.2266581058502197, | |
| "logps/chosen": -267.74517822265625, | |
| "logps/rejected": -306.21563720703125, | |
| "loss": 0.5618, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.044034719467163, | |
| "rewards/margins": 0.49896711111068726, | |
| "rewards/rejected": -1.5430018901824951, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8742004264392325, | |
| "grad_norm": 12.846376086365586, | |
| "learning_rate": 2.372988245018401e-08, | |
| "logits/chosen": -1.05556321144104, | |
| "logits/rejected": -1.1795189380645752, | |
| "logps/chosen": -259.63458251953125, | |
| "logps/rejected": -298.86761474609375, | |
| "loss": 0.5695, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.0314178466796875, | |
| "rewards/margins": 0.522697925567627, | |
| "rewards/rejected": -1.5541157722473145, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8795309168443497, | |
| "grad_norm": 13.824707878509736, | |
| "learning_rate": 2.1790611302512114e-08, | |
| "logits/chosen": -1.1069999933242798, | |
| "logits/rejected": -1.1457974910736084, | |
| "logps/chosen": -283.31170654296875, | |
| "logps/rejected": -302.56658935546875, | |
| "loss": 0.5923, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -1.16603684425354, | |
| "rewards/margins": 0.2864134609699249, | |
| "rewards/rejected": -1.4524505138397217, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8848614072494669, | |
| "grad_norm": 15.28607581049521, | |
| "learning_rate": 1.9930385414865386e-08, | |
| "logits/chosen": -1.0714858770370483, | |
| "logits/rejected": -1.1165021657943726, | |
| "logps/chosen": -269.0600280761719, | |
| "logps/rejected": -298.10711669921875, | |
| "loss": 0.5989, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.1144940853118896, | |
| "rewards/margins": 0.3277047276496887, | |
| "rewards/rejected": -1.4421989917755127, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8901918976545842, | |
| "grad_norm": 16.521551762750804, | |
| "learning_rate": 1.8149849116733672e-08, | |
| "logits/chosen": -1.0863420963287354, | |
| "logits/rejected": -1.1994072198867798, | |
| "logps/chosen": -260.0115661621094, | |
| "logps/rejected": -284.62774658203125, | |
| "loss": 0.597, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9503141641616821, | |
| "rewards/margins": 0.32754647731781006, | |
| "rewards/rejected": -1.2778605222702026, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 18.673626539883045, | |
| "learning_rate": 1.6449619135393084e-08, | |
| "logits/chosen": -1.0423157215118408, | |
| "logits/rejected": -1.1916964054107666, | |
| "logps/chosen": -263.9278564453125, | |
| "logps/rejected": -286.19940185546875, | |
| "loss": 0.5925, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0104882717132568, | |
| "rewards/margins": 0.30736953020095825, | |
| "rewards/rejected": -1.3178579807281494, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9008528784648188, | |
| "grad_norm": 11.823082792807151, | |
| "learning_rate": 1.4830284382289144e-08, | |
| "logits/chosen": -1.114751935005188, | |
| "logits/rejected": -1.1412584781646729, | |
| "logps/chosen": -269.94757080078125, | |
| "logps/rejected": -289.11083984375, | |
| "loss": 0.5786, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.1332142353057861, | |
| "rewards/margins": 0.24711325764656067, | |
| "rewards/rejected": -1.3803274631500244, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.906183368869936, | |
| "grad_norm": 14.344497159976747, | |
| "learning_rate": 1.329240574905452e-08, | |
| "logits/chosen": -1.1762893199920654, | |
| "logits/rejected": -1.265937328338623, | |
| "logps/chosen": -289.95892333984375, | |
| "logps/rejected": -309.7876281738281, | |
| "loss": 0.6024, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0822083950042725, | |
| "rewards/margins": 0.27944907546043396, | |
| "rewards/rejected": -1.3616573810577393, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9115138592750534, | |
| "grad_norm": 18.008488344780044, | |
| "learning_rate": 1.1836515913232175e-08, | |
| "logits/chosen": -1.1288697719573975, | |
| "logits/rejected": -1.3069543838500977, | |
| "logps/chosen": -264.71087646484375, | |
| "logps/rejected": -277.7538757324219, | |
| "loss": 0.5839, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0512837171554565, | |
| "rewards/margins": 0.2792138457298279, | |
| "rewards/rejected": -1.3304975032806396, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9168443496801706, | |
| "grad_norm": 14.381654064223152, | |
| "learning_rate": 1.0463119153770989e-08, | |
| "logits/chosen": -1.143795132637024, | |
| "logits/rejected": -1.2443573474884033, | |
| "logps/chosen": -259.2528076171875, | |
| "logps/rejected": -280.7781066894531, | |
| "loss": 0.6172, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.0562851428985596, | |
| "rewards/margins": 0.3439714312553406, | |
| "rewards/rejected": -1.4002567529678345, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9221748400852878, | |
| "grad_norm": 18.214838134208975, | |
| "learning_rate": 9.172691176357633e-09, | |
| "logits/chosen": -1.1904518604278564, | |
| "logits/rejected": -1.3172063827514648, | |
| "logps/chosen": -243.25830078125, | |
| "logps/rejected": -253.4103240966797, | |
| "loss": 0.5922, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.9157142639160156, | |
| "rewards/margins": 0.22436395287513733, | |
| "rewards/rejected": -1.1400783061981201, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9275053304904051, | |
| "grad_norm": 13.634453891392663, | |
| "learning_rate": 7.965678948645832e-09, | |
| "logits/chosen": -1.1409590244293213, | |
| "logits/rejected": -1.193704605102539, | |
| "logps/chosen": -290.54986572265625, | |
| "logps/rejected": -311.5206604003906, | |
| "loss": 0.5871, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.1318142414093018, | |
| "rewards/margins": 0.27807727456092834, | |
| "rewards/rejected": -1.4098914861679077, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 16.785891539902277, | |
| "learning_rate": 6.842500545439278e-09, | |
| "logits/chosen": -1.189774751663208, | |
| "logits/rejected": -1.1961729526519775, | |
| "logps/chosen": -282.62677001953125, | |
| "logps/rejected": -313.8055419921875, | |
| "loss": 0.5976, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.145236611366272, | |
| "rewards/margins": 0.3282146751880646, | |
| "rewards/rejected": -1.4734513759613037, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 12.489851575915692, | |
| "learning_rate": 5.803545003882554e-09, | |
| "logits/chosen": -1.0940172672271729, | |
| "logits/rejected": -1.2261667251586914, | |
| "logps/chosen": -262.1616516113281, | |
| "logps/rejected": -284.6984558105469, | |
| "loss": 0.5974, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0160671472549438, | |
| "rewards/margins": 0.3600946366786957, | |
| "rewards/rejected": -1.376161813735962, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9434968017057569, | |
| "grad_norm": 13.817135678609631, | |
| "learning_rate": 4.849172188709588e-09, | |
| "logits/chosen": -1.124348521232605, | |
| "logits/rejected": -1.223716139793396, | |
| "logps/chosen": -275.583251953125, | |
| "logps/rejected": -284.2818298339844, | |
| "loss": 0.5971, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.0598669052124023, | |
| "rewards/margins": 0.21211442351341248, | |
| "rewards/rejected": -1.2719814777374268, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9488272921108742, | |
| "grad_norm": 17.35628545613914, | |
| "learning_rate": 3.979712667596669e-09, | |
| "logits/chosen": -1.0675632953643799, | |
| "logits/rejected": -1.1845059394836426, | |
| "logps/chosen": -253.7633819580078, | |
| "logps/rejected": -275.1048278808594, | |
| "loss": 0.5955, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.99676513671875, | |
| "rewards/margins": 0.29226452112197876, | |
| "rewards/rejected": -1.2890297174453735, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9541577825159915, | |
| "grad_norm": 16.142121068942174, | |
| "learning_rate": 3.195467596663254e-09, | |
| "logits/chosen": -1.131365180015564, | |
| "logits/rejected": -1.242356538772583, | |
| "logps/chosen": -240.4732208251953, | |
| "logps/rejected": -275.12884521484375, | |
| "loss": 0.5831, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.9128969311714172, | |
| "rewards/margins": 0.4684675335884094, | |
| "rewards/rejected": -1.3813644647598267, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9594882729211087, | |
| "grad_norm": 15.867103975451991, | |
| "learning_rate": 2.4967086161600814e-09, | |
| "logits/chosen": -1.082676649093628, | |
| "logits/rejected": -1.2301782369613647, | |
| "logps/chosen": -251.3439483642578, | |
| "logps/rejected": -259.95037841796875, | |
| "loss": 0.6056, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.056687355041504, | |
| "rewards/margins": 0.2243305891752243, | |
| "rewards/rejected": -1.2810180187225342, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.964818763326226, | |
| "grad_norm": 14.494532565146372, | |
| "learning_rate": 1.8836777563805416e-09, | |
| "logits/chosen": -1.1500489711761475, | |
| "logits/rejected": -1.267773151397705, | |
| "logps/chosen": -262.1219177246094, | |
| "logps/rejected": -284.3946228027344, | |
| "loss": 0.584, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.9880873560905457, | |
| "rewards/margins": 0.3360677659511566, | |
| "rewards/rejected": -1.3241552114486694, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 13.7532663379059, | |
| "learning_rate": 1.3565873538283757e-09, | |
| "logits/chosen": -1.0888932943344116, | |
| "logits/rejected": -1.2862221002578735, | |
| "logps/chosen": -281.6429443359375, | |
| "logps/rejected": -287.7558898925781, | |
| "loss": 0.5837, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.1525847911834717, | |
| "rewards/margins": 0.23554334044456482, | |
| "rewards/rejected": -1.3881282806396484, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9754797441364605, | |
| "grad_norm": 18.11310668134442, | |
| "learning_rate": 9.156199776702567e-10, | |
| "logits/chosen": -1.2114653587341309, | |
| "logits/rejected": -1.2886050939559937, | |
| "logps/chosen": -277.15283203125, | |
| "logps/rejected": -297.81988525390625, | |
| "loss": 0.6029, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1020017862319946, | |
| "rewards/margins": 0.3052862286567688, | |
| "rewards/rejected": -1.4072880744934082, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9808102345415778, | |
| "grad_norm": 16.38268266725161, | |
| "learning_rate": 5.609283664990693e-10, | |
| "logits/chosen": -1.1473147869110107, | |
| "logits/rejected": -1.2061867713928223, | |
| "logps/chosen": -269.15692138671875, | |
| "logps/rejected": -297.3847351074219, | |
| "loss": 0.6185, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0159175395965576, | |
| "rewards/margins": 0.310077965259552, | |
| "rewards/rejected": -1.3259953260421753, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9861407249466951, | |
| "grad_norm": 15.464029236617364, | |
| "learning_rate": 2.926353754295896e-10, | |
| "logits/chosen": -1.2007001638412476, | |
| "logits/rejected": -1.333519458770752, | |
| "logps/chosen": -266.4494934082031, | |
| "logps/rejected": -296.6923522949219, | |
| "loss": 0.5848, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.0838963985443115, | |
| "rewards/margins": 0.3357781171798706, | |
| "rewards/rejected": -1.4196745157241821, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9914712153518124, | |
| "grad_norm": 11.785459063580232, | |
| "learning_rate": 1.1083393354488491e-10, | |
| "logits/chosen": -1.0910792350769043, | |
| "logits/rejected": -1.14475417137146, | |
| "logps/chosen": -275.3228454589844, | |
| "logps/rejected": -302.8422546386719, | |
| "loss": 0.5652, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1003937721252441, | |
| "rewards/margins": 0.42316898703575134, | |
| "rewards/rejected": -1.5235626697540283, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9968017057569296, | |
| "grad_norm": 12.18026896660238, | |
| "learning_rate": 1.5587011708340092e-11, | |
| "logits/chosen": -1.0970802307128906, | |
| "logits/rejected": -1.167004108428955, | |
| "logps/chosen": -295.5314636230469, | |
| "logps/rejected": -330.5072937011719, | |
| "loss": 0.5541, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -1.1293576955795288, | |
| "rewards/margins": 0.5504059791564941, | |
| "rewards/rejected": -1.6797635555267334, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 938, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6256769998495513, | |
| "train_runtime": 22377.6313, | |
| "train_samples_per_second": 2.683, | |
| "train_steps_per_second": 0.042 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 938, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |