{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9965397923875434, "eval_steps": 2000.0, "global_step": 433, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006920415224913495, "grad_norm": 4.640795155497921, "kl": 1.7562136633841874e-08, "learning_rate": 5.000000000000001e-07, "loss": 0.5541, "step": 1, "step_loss": 0.5590814352035522 }, { "epoch": 0.01384083044982699, "grad_norm": 4.305280308829804, "kl": 0.08008483052253723, "learning_rate": 2.438044511330269e-06, "loss": 0.4687, "step": 2, "step_loss": 0.4241759181022644 }, { "epoch": 0.020761245674740483, "grad_norm": 3.3335341075195397, "kl": 0.116165891289711, "learning_rate": 3.5717278751869343e-06, "loss": 0.4356, "step": 3, "step_loss": 0.31100255250930786 }, { "epoch": 0.02768166089965398, "grad_norm": 1.9310401160499777, "kl": 0.10272125899791718, "learning_rate": 4.376089022660538e-06, "loss": 0.4129, "step": 4, "step_loss": 0.25058284401893616 }, { "epoch": 0.03460207612456748, "grad_norm": 2.247300117608575, "kl": 0.2335793375968933, "learning_rate": 5e-06, "loss": 0.4091, "step": 5, "step_loss": 0.32731616497039795 }, { "epoch": 0.04152249134948097, "grad_norm": 1.9864729956926086, "kl": 0.1950061321258545, "learning_rate": 4.999978281016993e-06, "loss": 0.4146, "step": 6, "step_loss": 0.29094406962394714 }, { "epoch": 0.04844290657439446, "grad_norm": 2.274582931519451, "kl": 0.29702895879745483, "learning_rate": 4.999913124487272e-06, "loss": 0.444, "step": 7, "step_loss": 0.3739759624004364 }, { "epoch": 0.05536332179930796, "grad_norm": 1.6000864065203788, "kl": 0.18979279696941376, "learning_rate": 4.999804531668733e-06, "loss": 0.4294, "step": 8, "step_loss": 0.4522823095321655 }, { "epoch": 0.06228373702422145, "grad_norm": 1.5623150525307812, "kl": 0.15496733784675598, "learning_rate": 4.999652504657845e-06, "loss": 0.3803, "step": 9, "step_loss": 0.4396539330482483 }, { "epoch": 0.06920415224913495, "grad_norm": 1.6507322420571549, "kl": 0.19352534413337708, "learning_rate": 4.999457046389603e-06, "loss": 0.376, "step": 10, "step_loss": 0.3174796402454376 }, { "epoch": 0.07612456747404844, "grad_norm": 1.6435962474552708, "kl": 0.25893163681030273, "learning_rate": 4.999218160637481e-06, "loss": 0.3697, "step": 11, "step_loss": 0.3205254077911377 }, { "epoch": 0.08304498269896193, "grad_norm": 1.61077168422504, "kl": 0.27821144461631775, "learning_rate": 4.998935852013348e-06, "loss": 0.4298, "step": 12, "step_loss": 0.6721363663673401 }, { "epoch": 0.08996539792387544, "grad_norm": 1.597023313917083, "kl": 0.24240414798259735, "learning_rate": 4.998610125967389e-06, "loss": 0.3871, "step": 13, "step_loss": 0.37448108196258545 }, { "epoch": 0.09688581314878893, "grad_norm": 1.5699769985433545, "kl": 0.19961310923099518, "learning_rate": 4.998240988787992e-06, "loss": 0.3856, "step": 14, "step_loss": 0.43761497735977173 }, { "epoch": 0.10380622837370242, "grad_norm": 1.4349146274598168, "kl": 0.27401474118232727, "learning_rate": 4.997828447601633e-06, "loss": 0.3771, "step": 15, "step_loss": 0.32319125533103943 }, { "epoch": 0.11072664359861592, "grad_norm": 1.4044908752986978, "kl": 0.2062491476535797, "learning_rate": 4.997372510372735e-06, "loss": 0.3804, "step": 16, "step_loss": 0.37521931529045105 }, { "epoch": 0.11764705882352941, "grad_norm": 1.4773843889784544, "kl": 0.20949065685272217, "learning_rate": 4.996873185903513e-06, "loss": 0.3617, "step": 17, "step_loss": 0.3842497169971466 }, { "epoch": 0.1245674740484429, "grad_norm": 1.5072531181350017, "kl": 0.15509894490242004, "learning_rate": 4.996330483833805e-06, "loss": 0.4158, "step": 18, "step_loss": 0.34048014879226685 }, { "epoch": 0.1314878892733564, "grad_norm": 1.670856201066122, "kl": 0.33413779735565186, "learning_rate": 4.995744414640892e-06, "loss": 0.4292, "step": 19, "step_loss": 0.43937426805496216 }, { "epoch": 0.1384083044982699, "grad_norm": 1.6831981787229338, "kl": 0.2407480776309967, "learning_rate": 4.995114989639284e-06, "loss": 0.3814, "step": 20, "step_loss": 0.4492904245853424 }, { "epoch": 0.1453287197231834, "grad_norm": 1.3969690722166537, "kl": 0.180492103099823, "learning_rate": 4.994442220980511e-06, "loss": 0.4037, "step": 21, "step_loss": 0.48107844591140747 }, { "epoch": 0.1522491349480969, "grad_norm": 1.4856585123393111, "kl": 0.23951730132102966, "learning_rate": 4.993726121652886e-06, "loss": 0.3937, "step": 22, "step_loss": 0.4078594446182251 }, { "epoch": 0.15916955017301038, "grad_norm": 1.5255931763726926, "kl": 0.19424757361412048, "learning_rate": 4.992966705481252e-06, "loss": 0.3993, "step": 23, "step_loss": 0.4908221960067749 }, { "epoch": 0.16608996539792387, "grad_norm": 1.4694463823991162, "kl": 0.22843009233474731, "learning_rate": 4.992163987126718e-06, "loss": 0.3745, "step": 24, "step_loss": 0.3980112075805664 }, { "epoch": 0.17301038062283736, "grad_norm": 1.4031809483441833, "kl": 0.24388673901557922, "learning_rate": 4.991317982086373e-06, "loss": 0.3859, "step": 25, "step_loss": 0.43945300579071045 }, { "epoch": 0.17993079584775087, "grad_norm": 1.783322116186688, "kl": 0.21628138422966003, "learning_rate": 4.99042870669299e-06, "loss": 0.3954, "step": 26, "step_loss": 0.415144145488739 }, { "epoch": 0.18685121107266436, "grad_norm": 1.6189159794259094, "kl": 0.2958230972290039, "learning_rate": 4.98949617811471e-06, "loss": 0.376, "step": 27, "step_loss": 0.45900237560272217 }, { "epoch": 0.19377162629757785, "grad_norm": 1.4653606672580968, "kl": 0.20048989355564117, "learning_rate": 4.988520414354706e-06, "loss": 0.4019, "step": 28, "step_loss": 0.4015248119831085 }, { "epoch": 0.20069204152249134, "grad_norm": 1.558521087405915, "kl": 0.327812135219574, "learning_rate": 4.987501434250844e-06, "loss": 0.3708, "step": 29, "step_loss": 0.35921841859817505 }, { "epoch": 0.20761245674740483, "grad_norm": 1.5543813739518615, "kl": 0.19216293096542358, "learning_rate": 4.98643925747531e-06, "loss": 0.3936, "step": 30, "step_loss": 0.38434386253356934 }, { "epoch": 0.21453287197231835, "grad_norm": 1.5248132997887467, "kl": 0.19090020656585693, "learning_rate": 4.985333904534238e-06, "loss": 0.385, "step": 31, "step_loss": 0.4138329029083252 }, { "epoch": 0.22145328719723184, "grad_norm": 1.5438526567311406, "kl": 0.20692601799964905, "learning_rate": 4.98418539676731e-06, "loss": 0.3819, "step": 32, "step_loss": 0.3041204512119293 }, { "epoch": 0.22837370242214533, "grad_norm": 1.540173589410089, "kl": 0.18289028108119965, "learning_rate": 4.982993756347342e-06, "loss": 0.3844, "step": 33, "step_loss": 0.3742879629135132 }, { "epoch": 0.23529411764705882, "grad_norm": 1.416665310952529, "kl": 0.17084398865699768, "learning_rate": 4.981759006279864e-06, "loss": 0.3968, "step": 34, "step_loss": 0.3381184935569763 }, { "epoch": 0.2422145328719723, "grad_norm": 1.4562152285374372, "kl": 0.21493682265281677, "learning_rate": 4.980481170402666e-06, "loss": 0.4003, "step": 35, "step_loss": 0.36377060413360596 }, { "epoch": 0.2491349480968858, "grad_norm": 1.4487988462598347, "kl": 0.25848644971847534, "learning_rate": 4.979160273385345e-06, "loss": 0.3801, "step": 36, "step_loss": 0.5595243573188782 }, { "epoch": 0.2560553633217993, "grad_norm": 1.3253225042014576, "kl": 0.2266976535320282, "learning_rate": 4.977796340728825e-06, "loss": 0.373, "step": 37, "step_loss": 0.3860607445240021 }, { "epoch": 0.2629757785467128, "grad_norm": 1.325971001087887, "kl": 0.2184259593486786, "learning_rate": 4.976389398764865e-06, "loss": 0.3706, "step": 38, "step_loss": 0.40882933139801025 }, { "epoch": 0.2698961937716263, "grad_norm": 1.3782685618113322, "kl": 0.23910871148109436, "learning_rate": 4.9749394746555555e-06, "loss": 0.3619, "step": 39, "step_loss": 0.31151267886161804 }, { "epoch": 0.2768166089965398, "grad_norm": 1.3651511585812601, "kl": 0.25367894768714905, "learning_rate": 4.973446596392784e-06, "loss": 0.37, "step": 40, "step_loss": 0.4918205142021179 }, { "epoch": 0.2837370242214533, "grad_norm": 1.4546794165889636, "kl": 0.21674197912216187, "learning_rate": 4.971910792797707e-06, "loss": 0.3829, "step": 41, "step_loss": 0.2661609947681427 }, { "epoch": 0.2906574394463668, "grad_norm": 1.4261420642555092, "kl": 0.21861481666564941, "learning_rate": 4.9703320935201815e-06, "loss": 0.3758, "step": 42, "step_loss": 0.3392365574836731 }, { "epoch": 0.2975778546712803, "grad_norm": 1.5783420828899348, "kl": 0.22008898854255676, "learning_rate": 4.968710529038203e-06, "loss": 0.3747, "step": 43, "step_loss": 0.38504350185394287 }, { "epoch": 0.3044982698961938, "grad_norm": 1.4387125623192067, "kl": 0.1690659523010254, "learning_rate": 4.96704613065731e-06, "loss": 0.3631, "step": 44, "step_loss": 0.28725486993789673 }, { "epoch": 0.31141868512110726, "grad_norm": 1.3802666801739574, "kl": 0.15376709401607513, "learning_rate": 4.965338930509982e-06, "loss": 0.4226, "step": 45, "step_loss": 0.27869558334350586 }, { "epoch": 0.31833910034602075, "grad_norm": 1.383562951200189, "kl": 0.27807509899139404, "learning_rate": 4.963588961555021e-06, "loss": 0.3669, "step": 46, "step_loss": 0.456065833568573 }, { "epoch": 0.32525951557093424, "grad_norm": 1.4143081367388621, "kl": 0.2472427487373352, "learning_rate": 4.961796257576911e-06, "loss": 0.3747, "step": 47, "step_loss": 0.3199511766433716 }, { "epoch": 0.33217993079584773, "grad_norm": 1.601111520679947, "kl": 0.2633623480796814, "learning_rate": 4.959960853185171e-06, "loss": 0.3866, "step": 48, "step_loss": 0.31443724036216736 }, { "epoch": 0.3391003460207612, "grad_norm": 1.5055736463632103, "kl": 0.2592260539531708, "learning_rate": 4.958082783813681e-06, "loss": 0.3824, "step": 49, "step_loss": 0.4724884331226349 }, { "epoch": 0.3460207612456747, "grad_norm": 1.5554336980743424, "kl": 0.30451565980911255, "learning_rate": 4.956162085720004e-06, "loss": 0.3809, "step": 50, "step_loss": 0.4805135726928711 }, { "epoch": 0.35294117647058826, "grad_norm": 1.23443680620582, "kl": 0.2692873179912567, "learning_rate": 4.95419879598468e-06, "loss": 0.3791, "step": 51, "step_loss": 0.2908138036727905 }, { "epoch": 0.35986159169550175, "grad_norm": 1.295199265264476, "kl": 0.24315288662910461, "learning_rate": 4.952192952510517e-06, "loss": 0.3531, "step": 52, "step_loss": 0.38140785694122314 }, { "epoch": 0.36678200692041524, "grad_norm": 1.5294095871120157, "kl": 0.21896788477897644, "learning_rate": 4.950144594021851e-06, "loss": 0.3755, "step": 53, "step_loss": 0.4136822819709778 }, { "epoch": 0.3737024221453287, "grad_norm": 1.517795542260376, "kl": 0.2169652134180069, "learning_rate": 4.948053760063806e-06, "loss": 0.3801, "step": 54, "step_loss": 0.32112282514572144 }, { "epoch": 0.3806228373702422, "grad_norm": 1.2844613466219532, "kl": 0.21869295835494995, "learning_rate": 4.945920491001525e-06, "loss": 0.3608, "step": 55, "step_loss": 0.4134087562561035 }, { "epoch": 0.3875432525951557, "grad_norm": 1.374532620150807, "kl": 0.26340222358703613, "learning_rate": 4.9437448280193955e-06, "loss": 0.3599, "step": 56, "step_loss": 0.34752991795539856 }, { "epoch": 0.3944636678200692, "grad_norm": 1.5763688088518486, "kl": 0.32769620418548584, "learning_rate": 4.941526813120251e-06, "loss": 0.3866, "step": 57, "step_loss": 0.40359005331993103 }, { "epoch": 0.4013840830449827, "grad_norm": 1.355049046336102, "kl": 0.1679612398147583, "learning_rate": 4.939266489124559e-06, "loss": 0.3419, "step": 58, "step_loss": 0.2941555976867676 }, { "epoch": 0.4083044982698962, "grad_norm": 1.5010280265250127, "kl": 0.29010993242263794, "learning_rate": 4.9369638996696e-06, "loss": 0.3493, "step": 59, "step_loss": 0.37273305654525757 }, { "epoch": 0.41522491349480967, "grad_norm": 1.4798051056253203, "kl": 0.2425830364227295, "learning_rate": 4.934619089208618e-06, "loss": 0.3819, "step": 60, "step_loss": 0.4395100474357605 }, { "epoch": 0.42214532871972316, "grad_norm": 1.6541679539263094, "kl": 0.1941874623298645, "learning_rate": 4.932232103009967e-06, "loss": 0.4022, "step": 61, "step_loss": 0.33487069606781006 }, { "epoch": 0.4290657439446367, "grad_norm": 1.383854575749936, "kl": 0.2648255527019501, "learning_rate": 4.929802987156238e-06, "loss": 0.3748, "step": 62, "step_loss": 0.3705274760723114 }, { "epoch": 0.4359861591695502, "grad_norm": 1.2807998796974045, "kl": 0.19192646443843842, "learning_rate": 4.927331788543364e-06, "loss": 0.3655, "step": 63, "step_loss": 0.2612764537334442 }, { "epoch": 0.4429065743944637, "grad_norm": 1.4280027040835297, "kl": 0.2673697769641876, "learning_rate": 4.924818554879719e-06, "loss": 0.4216, "step": 64, "step_loss": 0.47210419178009033 }, { "epoch": 0.44982698961937717, "grad_norm": 1.4090866301351332, "kl": 0.23192653059959412, "learning_rate": 4.922263334685196e-06, "loss": 0.414, "step": 65, "step_loss": 0.33031827211380005 }, { "epoch": 0.45674740484429066, "grad_norm": 1.4334024826639338, "kl": 0.20821967720985413, "learning_rate": 4.919666177290269e-06, "loss": 0.3858, "step": 66, "step_loss": 0.44853225350379944 }, { "epoch": 0.46366782006920415, "grad_norm": 1.3335579990808561, "kl": 0.21477144956588745, "learning_rate": 4.9170271328350435e-06, "loss": 0.3722, "step": 67, "step_loss": 0.3078211545944214 }, { "epoch": 0.47058823529411764, "grad_norm": 1.474983380110906, "kl": 0.24055710434913635, "learning_rate": 4.9143462522682835e-06, "loss": 0.3932, "step": 68, "step_loss": 0.4187226891517639 }, { "epoch": 0.47750865051903113, "grad_norm": 1.8476553467310821, "kl": 0.17827081680297852, "learning_rate": 4.911623587346435e-06, "loss": 0.4042, "step": 69, "step_loss": 0.3356584906578064 }, { "epoch": 0.4844290657439446, "grad_norm": 1.5343631316617512, "kl": 0.18888017535209656, "learning_rate": 4.90885919063262e-06, "loss": 0.3694, "step": 70, "step_loss": 0.35417163372039795 }, { "epoch": 0.4913494809688581, "grad_norm": 1.4570611127395907, "kl": 0.22511088848114014, "learning_rate": 4.906053115495624e-06, "loss": 0.3888, "step": 71, "step_loss": 0.36926978826522827 }, { "epoch": 0.4982698961937716, "grad_norm": 1.313023780578348, "kl": 0.2798750102519989, "learning_rate": 4.90320541610887e-06, "loss": 0.369, "step": 72, "step_loss": 0.4589427411556244 }, { "epoch": 0.5051903114186851, "grad_norm": 1.3427612748028703, "kl": 0.2155342400074005, "learning_rate": 4.900316147449365e-06, "loss": 0.3592, "step": 73, "step_loss": 0.31423911452293396 }, { "epoch": 0.5121107266435986, "grad_norm": 1.394208213512396, "kl": 0.24778303503990173, "learning_rate": 4.897385365296645e-06, "loss": 0.3474, "step": 74, "step_loss": 0.2751219868659973 }, { "epoch": 0.5190311418685121, "grad_norm": 1.4742334960458765, "kl": 0.19341081380844116, "learning_rate": 4.894413126231694e-06, "loss": 0.3721, "step": 75, "step_loss": 0.41064155101776123 }, { "epoch": 0.5259515570934256, "grad_norm": 1.5637455362794443, "kl": 0.29248443245887756, "learning_rate": 4.891399487635855e-06, "loss": 0.4012, "step": 76, "step_loss": 0.38351547718048096 }, { "epoch": 0.532871972318339, "grad_norm": 1.3390081263289173, "kl": 0.1861802488565445, "learning_rate": 4.888344507689717e-06, "loss": 0.3347, "step": 77, "step_loss": 0.3552294969558716 }, { "epoch": 0.5397923875432526, "grad_norm": 1.460547002326611, "kl": 0.20726068317890167, "learning_rate": 4.885248245372001e-06, "loss": 0.3635, "step": 78, "step_loss": 0.36633867025375366 }, { "epoch": 0.5467128027681661, "grad_norm": 1.3628315006113312, "kl": 0.24591244757175446, "learning_rate": 4.88211076045841e-06, "loss": 0.3711, "step": 79, "step_loss": 0.43157243728637695 }, { "epoch": 0.5536332179930796, "grad_norm": 1.3697791685203824, "kl": 0.1478469967842102, "learning_rate": 4.878932113520485e-06, "loss": 0.3661, "step": 80, "step_loss": 0.40548354387283325 }, { "epoch": 0.5605536332179931, "grad_norm": 1.4895258627877854, "kl": 0.2414688766002655, "learning_rate": 4.875712365924428e-06, "loss": 0.3826, "step": 81, "step_loss": 0.40023863315582275 }, { "epoch": 0.5674740484429066, "grad_norm": 1.383439563019832, "kl": 0.2975599765777588, "learning_rate": 4.872451579829922e-06, "loss": 0.371, "step": 82, "step_loss": 0.504254937171936 }, { "epoch": 0.5743944636678201, "grad_norm": 1.3730171384887822, "kl": 0.19072100520133972, "learning_rate": 4.869149818188931e-06, "loss": 0.3582, "step": 83, "step_loss": 0.2598760724067688 }, { "epoch": 0.5813148788927336, "grad_norm": 1.378337191553647, "kl": 0.19758760929107666, "learning_rate": 4.86580714474448e-06, "loss": 0.3768, "step": 84, "step_loss": 0.34732717275619507 }, { "epoch": 0.5882352941176471, "grad_norm": 1.2778648223576135, "kl": 0.21345382928848267, "learning_rate": 4.86242362402943e-06, "loss": 0.3986, "step": 85, "step_loss": 0.34689581394195557 }, { "epoch": 0.5951557093425606, "grad_norm": 1.3502354363607967, "kl": 0.34025681018829346, "learning_rate": 4.8589993213652295e-06, "loss": 0.3759, "step": 86, "step_loss": 0.3826929032802582 }, { "epoch": 0.6020761245674741, "grad_norm": 1.3572263727806926, "kl": 0.23151834309101105, "learning_rate": 4.855534302860651e-06, "loss": 0.3872, "step": 87, "step_loss": 0.304655522108078 }, { "epoch": 0.6089965397923875, "grad_norm": 1.388769733827819, "kl": 0.22393186390399933, "learning_rate": 4.852028635410521e-06, "loss": 0.3681, "step": 88, "step_loss": 0.45336365699768066 }, { "epoch": 0.615916955017301, "grad_norm": 1.3313564705695071, "kl": 0.2901771664619446, "learning_rate": 4.848482386694424e-06, "loss": 0.397, "step": 89, "step_loss": 0.46198803186416626 }, { "epoch": 0.6228373702422145, "grad_norm": 1.3494487838043665, "kl": 0.2241673767566681, "learning_rate": 4.844895625175394e-06, "loss": 0.396, "step": 90, "step_loss": 0.35387957096099854 }, { "epoch": 0.629757785467128, "grad_norm": 1.3776591770123285, "kl": 0.22018107771873474, "learning_rate": 4.8412684200986e-06, "loss": 0.3566, "step": 91, "step_loss": 0.35373783111572266 }, { "epoch": 0.6366782006920415, "grad_norm": 1.421406763132671, "kl": 0.21698667109012604, "learning_rate": 4.83760084149e-06, "loss": 0.3787, "step": 92, "step_loss": 0.4331267476081848 }, { "epoch": 0.643598615916955, "grad_norm": 1.407370650613625, "kl": 0.22892139852046967, "learning_rate": 4.833892960154997e-06, "loss": 0.3804, "step": 93, "step_loss": 0.32545018196105957 }, { "epoch": 0.6505190311418685, "grad_norm": 1.336826274210351, "kl": 0.2827298641204834, "learning_rate": 4.830144847677069e-06, "loss": 0.3641, "step": 94, "step_loss": 0.30257558822631836 }, { "epoch": 0.657439446366782, "grad_norm": 1.433610630195877, "kl": 0.1638009250164032, "learning_rate": 4.826356576416386e-06, "loss": 0.3824, "step": 95, "step_loss": 0.32953792810440063 }, { "epoch": 0.6643598615916955, "grad_norm": 1.3745608061838699, "kl": 0.3214288353919983, "learning_rate": 4.822528219508414e-06, "loss": 0.3602, "step": 96, "step_loss": 0.3973972201347351 }, { "epoch": 0.671280276816609, "grad_norm": 1.4329055672707731, "kl": 0.1840999275445938, "learning_rate": 4.8186598508625e-06, "loss": 0.3697, "step": 97, "step_loss": 0.2722057104110718 }, { "epoch": 0.6782006920415224, "grad_norm": 1.3478203272796772, "kl": 0.21309396624565125, "learning_rate": 4.814751545160454e-06, "loss": 0.3651, "step": 98, "step_loss": 0.30481356382369995 }, { "epoch": 0.6851211072664359, "grad_norm": 1.322543184119783, "kl": 0.25588440895080566, "learning_rate": 4.810803377855098e-06, "loss": 0.3572, "step": 99, "step_loss": 0.4064578413963318 }, { "epoch": 0.6920415224913494, "grad_norm": 1.1707749353327868, "kl": 0.1975734829902649, "learning_rate": 4.8068154251688135e-06, "loss": 0.3388, "step": 100, "step_loss": 0.2925851345062256 }, { "epoch": 0.698961937716263, "grad_norm": 1.3428773042214672, "kl": 0.17947202920913696, "learning_rate": 4.802787764092066e-06, "loss": 0.389, "step": 101, "step_loss": 0.2371835857629776 }, { "epoch": 0.7058823529411765, "grad_norm": 1.3871380239292772, "kl": 0.29313144087791443, "learning_rate": 4.798720472381926e-06, "loss": 0.3985, "step": 102, "step_loss": 0.4529065489768982 }, { "epoch": 0.71280276816609, "grad_norm": 1.5423920531501476, "kl": 0.26499220728874207, "learning_rate": 4.794613628560563e-06, "loss": 0.4161, "step": 103, "step_loss": 0.6960321664810181 }, { "epoch": 0.7197231833910035, "grad_norm": 1.3211232241292787, "kl": 0.19432584941387177, "learning_rate": 4.790467311913727e-06, "loss": 0.3422, "step": 104, "step_loss": 0.3973465859889984 }, { "epoch": 0.726643598615917, "grad_norm": 1.3062802032921341, "kl": 0.2566191852092743, "learning_rate": 4.786281602489225e-06, "loss": 0.3477, "step": 105, "step_loss": 0.23580798506736755 }, { "epoch": 0.7335640138408305, "grad_norm": 2.047715342170047, "kl": 0.2970131039619446, "learning_rate": 4.78205658109537e-06, "loss": 0.3866, "step": 106, "step_loss": 0.41464555263519287 }, { "epoch": 0.740484429065744, "grad_norm": 1.5113338798675648, "kl": 0.18342912197113037, "learning_rate": 4.777792329299421e-06, "loss": 0.363, "step": 107, "step_loss": 0.2928502559661865 }, { "epoch": 0.7474048442906575, "grad_norm": 1.505280577653933, "kl": 0.19258533418178558, "learning_rate": 4.77348892942601e-06, "loss": 0.3898, "step": 108, "step_loss": 0.31177830696105957 }, { "epoch": 0.754325259515571, "grad_norm": 1.222800834321301, "kl": 0.2556228041648865, "learning_rate": 4.769146464555557e-06, "loss": 0.3757, "step": 109, "step_loss": 0.37278926372528076 }, { "epoch": 0.7612456747404844, "grad_norm": 1.3662148639876117, "kl": 0.3114456236362457, "learning_rate": 4.764765018522655e-06, "loss": 0.3983, "step": 110, "step_loss": 0.521002471446991 }, { "epoch": 0.7681660899653979, "grad_norm": 1.3423141526316273, "kl": 0.23048266768455505, "learning_rate": 4.760344675914464e-06, "loss": 0.3682, "step": 111, "step_loss": 0.37016260623931885 }, { "epoch": 0.7750865051903114, "grad_norm": 1.3141966253497102, "kl": 0.1948433220386505, "learning_rate": 4.755885522069067e-06, "loss": 0.3475, "step": 112, "step_loss": 0.346007764339447 }, { "epoch": 0.7820069204152249, "grad_norm": 1.3990482296704756, "kl": 0.1630229651927948, "learning_rate": 4.751387643073832e-06, "loss": 0.3755, "step": 113, "step_loss": 0.308979332447052 }, { "epoch": 0.7889273356401384, "grad_norm": 1.2785394887215162, "kl": 0.16570787131786346, "learning_rate": 4.7468511257637415e-06, "loss": 0.3277, "step": 114, "step_loss": 0.22129052877426147 }, { "epoch": 0.7958477508650519, "grad_norm": 1.4921344527956342, "kl": 0.38078194856643677, "learning_rate": 4.742276057719722e-06, "loss": 0.3648, "step": 115, "step_loss": 0.3642709255218506 }, { "epoch": 0.8027681660899654, "grad_norm": 1.4326341624122143, "kl": 0.29677197337150574, "learning_rate": 4.737662527266954e-06, "loss": 0.3837, "step": 116, "step_loss": 0.47437724471092224 }, { "epoch": 0.8096885813148789, "grad_norm": 1.412356237674289, "kl": 0.2844409942626953, "learning_rate": 4.733010623473159e-06, "loss": 0.3519, "step": 117, "step_loss": 0.3444381356239319 }, { "epoch": 0.8166089965397924, "grad_norm": 1.2998723242492165, "kl": 0.19050753116607666, "learning_rate": 4.7283204361468875e-06, "loss": 0.3842, "step": 118, "step_loss": 0.4003710448741913 }, { "epoch": 0.8235294117647058, "grad_norm": 1.5217225423347087, "kl": 0.2770290970802307, "learning_rate": 4.723592055835785e-06, "loss": 0.3818, "step": 119, "step_loss": 0.417021781206131 }, { "epoch": 0.8304498269896193, "grad_norm": 1.5271566525179912, "kl": 0.28011956810951233, "learning_rate": 4.718825573824837e-06, "loss": 0.3914, "step": 120, "step_loss": 0.48274630308151245 }, { "epoch": 0.8373702422145328, "grad_norm": 1.471213040855903, "kl": 0.19891366362571716, "learning_rate": 4.714021082134617e-06, "loss": 0.3577, "step": 121, "step_loss": 0.37855416536331177 }, { "epoch": 0.8442906574394463, "grad_norm": 1.5083945934578107, "kl": 0.1857198178768158, "learning_rate": 4.7091786735194995e-06, "loss": 0.384, "step": 122, "step_loss": 0.26723700761795044 }, { "epoch": 0.8512110726643599, "grad_norm": 1.4296741589199946, "kl": 0.2854173183441162, "learning_rate": 4.704298441465875e-06, "loss": 0.3846, "step": 123, "step_loss": 0.4690515100955963 }, { "epoch": 0.8581314878892734, "grad_norm": 1.4062251618628943, "kl": 0.34962254762649536, "learning_rate": 4.6993804801903476e-06, "loss": 0.392, "step": 124, "step_loss": 0.41900360584259033 }, { "epoch": 0.8650519031141869, "grad_norm": 1.3395204737950765, "kl": 0.23505759239196777, "learning_rate": 4.694424884637909e-06, "loss": 0.3513, "step": 125, "step_loss": 0.3635313808917999 }, { "epoch": 0.8719723183391004, "grad_norm": 1.2735377547228304, "kl": 0.2686963975429535, "learning_rate": 4.6894317504801115e-06, "loss": 0.39, "step": 126, "step_loss": 0.4025835394859314 }, { "epoch": 0.8788927335640139, "grad_norm": 1.3045818720106488, "kl": 0.17857038974761963, "learning_rate": 4.684401174113218e-06, "loss": 0.3943, "step": 127, "step_loss": 0.3760674297809601 }, { "epoch": 0.8858131487889274, "grad_norm": 1.2042544129464812, "kl": 0.238363578915596, "learning_rate": 4.6793332526563414e-06, "loss": 0.3645, "step": 128, "step_loss": 0.244846910238266 }, { "epoch": 0.8927335640138409, "grad_norm": 1.3888635945665935, "kl": 0.21878215670585632, "learning_rate": 4.674228083949571e-06, "loss": 0.3676, "step": 129, "step_loss": 0.2716182470321655 }, { "epoch": 0.8996539792387543, "grad_norm": 1.3887094742074968, "kl": 0.15979516506195068, "learning_rate": 4.669085766552083e-06, "loss": 0.3769, "step": 130, "step_loss": 0.290435254573822 }, { "epoch": 0.9065743944636678, "grad_norm": 1.2437583539672625, "kl": 0.16919700801372528, "learning_rate": 4.663906399740235e-06, "loss": 0.334, "step": 131, "step_loss": 0.35298505425453186 }, { "epoch": 0.9134948096885813, "grad_norm": 1.4044581123072946, "kl": 0.28825414180755615, "learning_rate": 4.658690083505655e-06, "loss": 0.3824, "step": 132, "step_loss": 0.4055424928665161 }, { "epoch": 0.9204152249134948, "grad_norm": 1.3097689689140741, "kl": 0.21404924988746643, "learning_rate": 4.653436918553306e-06, "loss": 0.3784, "step": 133, "step_loss": 0.31625741720199585 }, { "epoch": 0.9273356401384083, "grad_norm": 1.331091764833466, "kl": 0.21199432015419006, "learning_rate": 4.648147006299543e-06, "loss": 0.3517, "step": 134, "step_loss": 0.3075428903102875 }, { "epoch": 0.9342560553633218, "grad_norm": 1.3408510297686604, "kl": 0.22315078973770142, "learning_rate": 4.642820448870158e-06, "loss": 0.4066, "step": 135, "step_loss": 0.43117380142211914 }, { "epoch": 0.9411764705882353, "grad_norm": 1.3530846992816732, "kl": 0.20779642462730408, "learning_rate": 4.6374573490984035e-06, "loss": 0.3656, "step": 136, "step_loss": 0.41166335344314575 }, { "epoch": 0.9480968858131488, "grad_norm": 1.312786161925372, "kl": 0.22472918033599854, "learning_rate": 4.63205781052301e-06, "loss": 0.3905, "step": 137, "step_loss": 0.3693259060382843 }, { "epoch": 0.9550173010380623, "grad_norm": 1.5266957969607173, "kl": 0.3166002035140991, "learning_rate": 4.6266219373861904e-06, "loss": 0.42, "step": 138, "step_loss": 0.42580902576446533 }, { "epoch": 0.9619377162629758, "grad_norm": 1.2917022495085693, "kl": 0.23865285515785217, "learning_rate": 4.62114983463162e-06, "loss": 0.3538, "step": 139, "step_loss": 0.26485127210617065 }, { "epoch": 0.9688581314878892, "grad_norm": 1.3692152408396865, "kl": 0.25416556000709534, "learning_rate": 4.615641607902414e-06, "loss": 0.3838, "step": 140, "step_loss": 0.4169631004333496 }, { "epoch": 0.9757785467128027, "grad_norm": 1.2404770501076883, "kl": 0.23534110188484192, "learning_rate": 4.610097363539093e-06, "loss": 0.3681, "step": 141, "step_loss": 0.3129902780056 }, { "epoch": 0.9826989619377162, "grad_norm": 1.3848019959659617, "kl": 0.2245711237192154, "learning_rate": 4.604517208577523e-06, "loss": 0.3658, "step": 142, "step_loss": 0.29483580589294434 }, { "epoch": 0.9896193771626297, "grad_norm": 1.2618562133357965, "kl": 0.1994638293981552, "learning_rate": 4.598901250746849e-06, "loss": 0.3495, "step": 143, "step_loss": 0.33490532636642456 }, { "epoch": 0.9965397923875432, "grad_norm": 1.4004416143905367, "kl": 0.24887260794639587, "learning_rate": 4.5932495984674225e-06, "loss": 0.3727, "step": 144, "step_loss": 0.4149697721004486 }, { "epoch": 0.9965397923875432, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 62.546, "eval_test_transformed.json_samples_per_second": 7.994, "eval_test_transformed.json_steps_per_second": 0.512, "step": 144 }, { "epoch": 1.0034602076124568, "grad_norm": 1.42462763689625, "kl": NaN, "learning_rate": 4.5875623608487e-06, "loss": 0.3335, "step": 145, "step_loss": NaN }, { "epoch": 1.0103806228373702, "grad_norm": 1.3192537114111225, "kl": 0.2632783055305481, "learning_rate": 4.5818396476871405e-06, "loss": 0.3013, "step": 146, "step_loss": 0.23957139253616333 }, { "epoch": 1.0173010380622838, "grad_norm": 1.2141916174453322, "kl": 0.3001609146595001, "learning_rate": 4.576081569464087e-06, "loss": 0.2829, "step": 147, "step_loss": 0.32372355461120605 }, { "epoch": 1.0242214532871972, "grad_norm": 1.2504083919961078, "kl": 0.31938454508781433, "learning_rate": 4.570288237343632e-06, "loss": 0.2866, "step": 148, "step_loss": 0.29251033067703247 }, { "epoch": 1.0311418685121108, "grad_norm": 1.354969295421274, "kl": 0.23495393991470337, "learning_rate": 4.564459763170469e-06, "loss": 0.266, "step": 149, "step_loss": 0.2911420166492462 }, { "epoch": 1.0380622837370241, "grad_norm": 1.4258460461250249, "kl": 0.2685220241546631, "learning_rate": 4.558596259467738e-06, "loss": 0.2505, "step": 150, "step_loss": 0.29984840750694275 }, { "epoch": 1.0449826989619377, "grad_norm": 1.6883091576073284, "kl": 0.37581148743629456, "learning_rate": 4.55269783943485e-06, "loss": 0.2525, "step": 151, "step_loss": 0.20608901977539062 }, { "epoch": 1.0519031141868511, "grad_norm": 1.7436495406770034, "kl": 0.3008899986743927, "learning_rate": 4.546764616945302e-06, "loss": 0.2686, "step": 152, "step_loss": 0.2491491436958313 }, { "epoch": 1.0588235294117647, "grad_norm": 1.856245918140997, "kl": 0.29776036739349365, "learning_rate": 4.540796706544478e-06, "loss": 0.265, "step": 153, "step_loss": 0.2793770432472229 }, { "epoch": 1.065743944636678, "grad_norm": 1.6151887640457867, "kl": 0.3980569541454315, "learning_rate": 4.534794223447443e-06, "loss": 0.2521, "step": 154, "step_loss": 0.22832265496253967 }, { "epoch": 1.0726643598615917, "grad_norm": 1.3325828685364087, "kl": 0.38784676790237427, "learning_rate": 4.528757283536708e-06, "loss": 0.2749, "step": 155, "step_loss": 0.4221150875091553 }, { "epoch": 1.0795847750865053, "grad_norm": 1.4404789285810164, "kl": 0.21392418444156647, "learning_rate": 4.522686003360004e-06, "loss": 0.2737, "step": 156, "step_loss": 0.19120973348617554 }, { "epoch": 1.0865051903114187, "grad_norm": 1.357830880658549, "kl": 0.2654949426651001, "learning_rate": 4.516580500128024e-06, "loss": 0.2677, "step": 157, "step_loss": 0.21460048854351044 }, { "epoch": 1.0934256055363323, "grad_norm": 1.3221135692984116, "kl": 0.15843716263771057, "learning_rate": 4.510440891712165e-06, "loss": 0.2519, "step": 158, "step_loss": 0.18512749671936035 }, { "epoch": 1.1003460207612457, "grad_norm": 1.3475017514332446, "kl": 0.18367037177085876, "learning_rate": 4.504267296642251e-06, "loss": 0.2476, "step": 159, "step_loss": 0.15729182958602905 }, { "epoch": 1.1072664359861593, "grad_norm": 1.3791356746883165, "kl": 0.302768737077713, "learning_rate": 4.498059834104242e-06, "loss": 0.2925, "step": 160, "step_loss": 0.2958143949508667 }, { "epoch": 1.1141868512110726, "grad_norm": 1.361996195329906, "kl": 0.26315340399742126, "learning_rate": 4.491818623937936e-06, "loss": 0.2753, "step": 161, "step_loss": 0.2913956642150879 }, { "epoch": 1.1211072664359862, "grad_norm": 1.1777652464405912, "kl": 0.2690778374671936, "learning_rate": 4.485543786634656e-06, "loss": 0.2618, "step": 162, "step_loss": 0.31568747758865356 }, { "epoch": 1.1280276816608996, "grad_norm": 1.3909504113233366, "kl": 0.25521889328956604, "learning_rate": 4.479235443334923e-06, "loss": 0.239, "step": 163, "step_loss": 0.33352556824684143 }, { "epoch": 1.1349480968858132, "grad_norm": 1.4612174352406104, "kl": 0.33453723788261414, "learning_rate": 4.472893715826114e-06, "loss": 0.2401, "step": 164, "step_loss": 0.221056267619133 }, { "epoch": 1.1418685121107266, "grad_norm": 1.533366006351055, "kl": 0.35625484585762024, "learning_rate": 4.4665187265401155e-06, "loss": 0.2843, "step": 165, "step_loss": 0.20856904983520508 }, { "epoch": 1.1487889273356402, "grad_norm": 1.4172886249839125, "kl": 0.31244146823883057, "learning_rate": 4.460110598550959e-06, "loss": 0.2866, "step": 166, "step_loss": 0.24128484725952148 }, { "epoch": 1.1557093425605536, "grad_norm": 1.3619724757344411, "kl": 0.1838628351688385, "learning_rate": 4.45366945557244e-06, "loss": 0.2455, "step": 167, "step_loss": 0.16488447785377502 }, { "epoch": 1.1626297577854672, "grad_norm": 1.2150542637987511, "kl": 0.3553676903247833, "learning_rate": 4.447195421955738e-06, "loss": 0.261, "step": 168, "step_loss": 0.28751328587532043 }, { "epoch": 1.1695501730103806, "grad_norm": 1.4054628076263387, "kl": 0.4374169707298279, "learning_rate": 4.440688622687011e-06, "loss": 0.2684, "step": 169, "step_loss": 0.17160476744174957 }, { "epoch": 1.1764705882352942, "grad_norm": 1.2917992411021666, "kl": 0.1984192579984665, "learning_rate": 4.434149183384978e-06, "loss": 0.2734, "step": 170, "step_loss": 0.28081196546554565 }, { "epoch": 1.1833910034602075, "grad_norm": 1.4840474620642776, "kl": 0.25900912284851074, "learning_rate": 4.427577230298504e-06, "loss": 0.2844, "step": 171, "step_loss": 0.17997631430625916 }, { "epoch": 1.1903114186851211, "grad_norm": 1.3421246657135284, "kl": 0.34312140941619873, "learning_rate": 4.420972890304156e-06, "loss": 0.2517, "step": 172, "step_loss": 0.20626190304756165 }, { "epoch": 1.1972318339100345, "grad_norm": 1.3359994706621092, "kl": 0.24445146322250366, "learning_rate": 4.414336290903755e-06, "loss": 0.2703, "step": 173, "step_loss": 0.19938965141773224 }, { "epoch": 1.2041522491349481, "grad_norm": 1.4389206044904708, "kl": 0.26172447204589844, "learning_rate": 4.407667560221911e-06, "loss": 0.2568, "step": 174, "step_loss": 0.2975597679615021 }, { "epoch": 1.2110726643598615, "grad_norm": 1.5272635124184517, "kl": 0.27965497970581055, "learning_rate": 4.400966827003559e-06, "loss": 0.2631, "step": 175, "step_loss": 0.20503826439380646 }, { "epoch": 1.217993079584775, "grad_norm": 1.3771679433356363, "kl": 0.3378610908985138, "learning_rate": 4.394234220611464e-06, "loss": 0.2656, "step": 176, "step_loss": 0.33795469999313354 }, { "epoch": 1.2249134948096887, "grad_norm": 1.4813398953614931, "kl": 0.21421468257904053, "learning_rate": 4.387469871023726e-06, "loss": 0.2708, "step": 177, "step_loss": 0.23807582259178162 }, { "epoch": 1.231833910034602, "grad_norm": 1.3367142231885196, "kl": 0.3515172004699707, "learning_rate": 4.380673908831271e-06, "loss": 0.2517, "step": 178, "step_loss": 0.27543121576309204 }, { "epoch": 1.2387543252595155, "grad_norm": 1.447369982294217, "kl": 0.23139739036560059, "learning_rate": 4.373846465235337e-06, "loss": 0.2369, "step": 179, "step_loss": 0.3218502402305603 }, { "epoch": 1.245674740484429, "grad_norm": 1.3897095999859435, "kl": 0.424311101436615, "learning_rate": 4.366987672044926e-06, "loss": 0.2722, "step": 180, "step_loss": 0.3534911870956421 }, { "epoch": 1.2525951557093427, "grad_norm": 1.4932577878428657, "kl": 0.315548300743103, "learning_rate": 4.360097661674273e-06, "loss": 0.2708, "step": 181, "step_loss": 0.21629858016967773 }, { "epoch": 1.259515570934256, "grad_norm": 1.287133548029122, "kl": 0.2639133036136627, "learning_rate": 4.353176567140282e-06, "loss": 0.2556, "step": 182, "step_loss": 0.22756239771842957 }, { "epoch": 1.2664359861591694, "grad_norm": 1.3774653354582462, "kl": 0.2900717854499817, "learning_rate": 4.346224522059963e-06, "loss": 0.2762, "step": 183, "step_loss": 0.2907130718231201 }, { "epoch": 1.273356401384083, "grad_norm": 1.4876066666513454, "kl": 0.24678638577461243, "learning_rate": 4.339241660647847e-06, "loss": 0.2518, "step": 184, "step_loss": 0.23766303062438965 }, { "epoch": 1.2802768166089966, "grad_norm": 1.458703847774484, "kl": 0.36160391569137573, "learning_rate": 4.332228117713401e-06, "loss": 0.2883, "step": 185, "step_loss": 0.2911599278450012 }, { "epoch": 1.28719723183391, "grad_norm": 1.3614445859550144, "kl": 0.3728656768798828, "learning_rate": 4.325184028658418e-06, "loss": 0.2971, "step": 186, "step_loss": 0.3668380379676819 }, { "epoch": 1.2941176470588236, "grad_norm": 1.376543825609578, "kl": 0.28326430916786194, "learning_rate": 4.318109529474412e-06, "loss": 0.2747, "step": 187, "step_loss": 0.27671247720718384 }, { "epoch": 1.301038062283737, "grad_norm": 1.325706973493193, "kl": 0.2098829746246338, "learning_rate": 4.311004756739982e-06, "loss": 0.2636, "step": 188, "step_loss": 0.22396397590637207 }, { "epoch": 1.3079584775086506, "grad_norm": 1.4140405811276011, "kl": 0.3520324230194092, "learning_rate": 4.3038698476181875e-06, "loss": 0.2904, "step": 189, "step_loss": 0.31357622146606445 }, { "epoch": 1.314878892733564, "grad_norm": 1.416415369895531, "kl": 0.24368271231651306, "learning_rate": 4.296704939853889e-06, "loss": 0.2749, "step": 190, "step_loss": 0.2540474534034729 }, { "epoch": 1.3217993079584776, "grad_norm": 1.2430570905499538, "kl": 0.2207806259393692, "learning_rate": 4.289510171771096e-06, "loss": 0.2653, "step": 191, "step_loss": 0.22346214950084686 }, { "epoch": 1.328719723183391, "grad_norm": 1.4543085508162197, "kl": 0.3568841516971588, "learning_rate": 4.282285682270294e-06, "loss": 0.2672, "step": 192, "step_loss": 0.3136281371116638 }, { "epoch": 1.3356401384083045, "grad_norm": 1.358672607309371, "kl": 0.2674166262149811, "learning_rate": 4.275031610825762e-06, "loss": 0.2353, "step": 193, "step_loss": 0.15774530172348022 }, { "epoch": 1.342560553633218, "grad_norm": 1.4412614289597345, "kl": 0.4660484194755554, "learning_rate": 4.267748097482882e-06, "loss": 0.2752, "step": 194, "step_loss": 0.25726318359375 }, { "epoch": 1.3494809688581315, "grad_norm": 1.4310449711566218, "kl": 0.2126007378101349, "learning_rate": 4.260435282855434e-06, "loss": 0.2684, "step": 195, "step_loss": 0.2513048052787781 }, { "epoch": 1.356401384083045, "grad_norm": 1.4403806930226881, "kl": 0.2859874367713928, "learning_rate": 4.253093308122881e-06, "loss": 0.2733, "step": 196, "step_loss": 0.35427212715148926 }, { "epoch": 1.3633217993079585, "grad_norm": 1.465834137853272, "kl": 0.20382992923259735, "learning_rate": 4.245722315027646e-06, "loss": 0.2519, "step": 197, "step_loss": 0.22243735194206238 }, { "epoch": 1.370242214532872, "grad_norm": 1.5147380003037518, "kl": 0.3818252682685852, "learning_rate": 4.238322445872371e-06, "loss": 0.2819, "step": 198, "step_loss": 0.4146631360054016 }, { "epoch": 1.3771626297577855, "grad_norm": 1.404666093402221, "kl": 0.3119218349456787, "learning_rate": 4.230893843517176e-06, "loss": 0.2646, "step": 199, "step_loss": 0.23590320348739624 }, { "epoch": 1.3840830449826989, "grad_norm": 1.473321981745929, "kl": 0.21454200148582458, "learning_rate": 4.223436651376892e-06, "loss": 0.2853, "step": 200, "step_loss": 0.30393069982528687 }, { "epoch": 1.3910034602076125, "grad_norm": 1.4344827250001912, "kl": 0.19859302043914795, "learning_rate": 4.215951013418302e-06, "loss": 0.2813, "step": 201, "step_loss": 0.21350112557411194 }, { "epoch": 1.397923875432526, "grad_norm": 1.4967966843666693, "kl": 0.21270081400871277, "learning_rate": 4.208437074157357e-06, "loss": 0.2504, "step": 202, "step_loss": 0.22422298789024353 }, { "epoch": 1.4048442906574394, "grad_norm": 1.2835703053019552, "kl": 0.23075464367866516, "learning_rate": 4.200894978656384e-06, "loss": 0.2556, "step": 203, "step_loss": 0.26125672459602356 }, { "epoch": 1.4117647058823528, "grad_norm": 1.337236052164568, "kl": 0.28172191977500916, "learning_rate": 4.193324872521289e-06, "loss": 0.2647, "step": 204, "step_loss": 0.3169599771499634 }, { "epoch": 1.4186851211072664, "grad_norm": 1.3675076790965381, "kl": 0.2751786708831787, "learning_rate": 4.185726901898745e-06, "loss": 0.2762, "step": 205, "step_loss": 0.29673999547958374 }, { "epoch": 1.42560553633218, "grad_norm": 1.4121561847278092, "kl": 0.48854154348373413, "learning_rate": 4.1781012134733685e-06, "loss": 0.2769, "step": 206, "step_loss": 0.23193585872650146 }, { "epoch": 1.4325259515570934, "grad_norm": 1.3796794333821554, "kl": 0.2728467583656311, "learning_rate": 4.170447954464891e-06, "loss": 0.2654, "step": 207, "step_loss": 0.29512521624565125 }, { "epoch": 1.439446366782007, "grad_norm": 1.309489931147239, "kl": 0.30353325605392456, "learning_rate": 4.1627672726253145e-06, "loss": 0.2689, "step": 208, "step_loss": 0.24627241492271423 }, { "epoch": 1.4463667820069204, "grad_norm": 1.2760063426000636, "kl": 0.36207133531570435, "learning_rate": 4.15505931623606e-06, "loss": 0.2725, "step": 209, "step_loss": 0.31113266944885254 }, { "epoch": 1.453287197231834, "grad_norm": 1.2376211916216977, "kl": 0.2999940514564514, "learning_rate": 4.147324234105105e-06, "loss": 0.2412, "step": 210, "step_loss": 0.25046539306640625 }, { "epoch": 1.4602076124567474, "grad_norm": 1.3818826943834186, "kl": 0.2346179485321045, "learning_rate": 4.139562175564108e-06, "loss": 0.2698, "step": 211, "step_loss": 0.226304292678833 }, { "epoch": 1.467128027681661, "grad_norm": 1.342163746527023, "kl": 0.23285089433193207, "learning_rate": 4.131773290465529e-06, "loss": 0.257, "step": 212, "step_loss": 0.19870123267173767 }, { "epoch": 1.4740484429065743, "grad_norm": 1.3813245278529478, "kl": 0.31553322076797485, "learning_rate": 4.123957729179735e-06, "loss": 0.2902, "step": 213, "step_loss": 0.4204574525356293 }, { "epoch": 1.480968858131488, "grad_norm": 1.4175511105050729, "kl": 0.34834322333335876, "learning_rate": 4.116115642592101e-06, "loss": 0.2824, "step": 214, "step_loss": 0.3582271933555603 }, { "epoch": 1.4878892733564013, "grad_norm": 1.4069114824714055, "kl": 0.3311484456062317, "learning_rate": 4.108247182100085e-06, "loss": 0.2591, "step": 215, "step_loss": 0.19999107718467712 }, { "epoch": 1.494809688581315, "grad_norm": 1.407164636502866, "kl": 0.2831442058086395, "learning_rate": 4.10035249961032e-06, "loss": 0.2922, "step": 216, "step_loss": 0.32796311378479004 }, { "epoch": 1.5017301038062283, "grad_norm": 1.330229942407144, "kl": 0.22607722878456116, "learning_rate": 4.092431747535671e-06, "loss": 0.2884, "step": 217, "step_loss": 0.313655287027359 }, { "epoch": 1.508650519031142, "grad_norm": 1.278525316368922, "kl": 0.16817724704742432, "learning_rate": 4.084485078792299e-06, "loss": 0.2998, "step": 218, "step_loss": 0.26590466499328613 }, { "epoch": 1.5155709342560555, "grad_norm": 1.4321083460156858, "kl": 0.4326663017272949, "learning_rate": 4.076512646796705e-06, "loss": 0.2689, "step": 219, "step_loss": 0.2319493293762207 }, { "epoch": 1.5224913494809689, "grad_norm": 1.4260076473721204, "kl": 0.25795766711235046, "learning_rate": 4.068514605462769e-06, "loss": 0.25, "step": 220, "step_loss": 0.2118414342403412 }, { "epoch": 1.5294117647058822, "grad_norm": 1.274973124800528, "kl": 0.3503516912460327, "learning_rate": 4.0604911091987785e-06, "loss": 0.2654, "step": 221, "step_loss": 0.2554680407047272 }, { "epoch": 1.5363321799307958, "grad_norm": 1.3759158292298865, "kl": 0.2329028844833374, "learning_rate": 4.052442312904448e-06, "loss": 0.2818, "step": 222, "step_loss": 0.28536927700042725 }, { "epoch": 1.5432525951557095, "grad_norm": 1.4160969467488984, "kl": 0.2772873640060425, "learning_rate": 4.044368371967929e-06, "loss": 0.2916, "step": 223, "step_loss": 0.21635562181472778 }, { "epoch": 1.5501730103806228, "grad_norm": 1.2813015438969948, "kl": 0.2725520133972168, "learning_rate": 4.036269442262808e-06, "loss": 0.2593, "step": 224, "step_loss": 0.2558884918689728 }, { "epoch": 1.5570934256055362, "grad_norm": 1.3235204875557913, "kl": 0.27981454133987427, "learning_rate": 4.028145680145101e-06, "loss": 0.2766, "step": 225, "step_loss": 0.2663520574569702 }, { "epoch": 1.5640138408304498, "grad_norm": 1.4354442326187724, "kl": 0.3738369345664978, "learning_rate": 4.019997242450231e-06, "loss": 0.2694, "step": 226, "step_loss": 0.3427594304084778 }, { "epoch": 1.5709342560553634, "grad_norm": 1.3505035838673114, "kl": 0.4058971107006073, "learning_rate": 4.011824286490002e-06, "loss": 0.2571, "step": 227, "step_loss": 0.29677870869636536 }, { "epoch": 1.5778546712802768, "grad_norm": 1.4691599821536188, "kl": 0.3535602390766144, "learning_rate": 4.003626970049564e-06, "loss": 0.272, "step": 228, "step_loss": 0.2732452154159546 }, { "epoch": 1.5847750865051902, "grad_norm": 1.378960411153587, "kl": 0.38157516717910767, "learning_rate": 3.99540545138436e-06, "loss": 0.2554, "step": 229, "step_loss": 0.26657626032829285 }, { "epoch": 1.5916955017301038, "grad_norm": 1.6443342149722984, "kl": 0.39396223425865173, "learning_rate": 3.987159889217079e-06, "loss": 0.2484, "step": 230, "step_loss": 0.14677917957305908 }, { "epoch": 1.5986159169550174, "grad_norm": 1.2346423861469087, "kl": 0.2936357259750366, "learning_rate": 3.9788904427345885e-06, "loss": 0.2663, "step": 231, "step_loss": 0.2197728157043457 }, { "epoch": 1.6055363321799307, "grad_norm": 1.4042077812190041, "kl": 0.1939917653799057, "learning_rate": 3.970597271584858e-06, "loss": 0.2283, "step": 232, "step_loss": 0.19675813615322113 }, { "epoch": 1.6124567474048441, "grad_norm": 1.2594225512441792, "kl": 0.26475217938423157, "learning_rate": 3.9622805358738834e-06, "loss": 0.2593, "step": 233, "step_loss": 0.28007546067237854 }, { "epoch": 1.6193771626297577, "grad_norm": 1.447309938645554, "kl": 0.340892493724823, "learning_rate": 3.95394039616259e-06, "loss": 0.2604, "step": 234, "step_loss": 0.2127062827348709 }, { "epoch": 1.6262975778546713, "grad_norm": 1.2866719129106237, "kl": 0.2766299843788147, "learning_rate": 3.945577013463734e-06, "loss": 0.273, "step": 235, "step_loss": 0.202714204788208 }, { "epoch": 1.633217993079585, "grad_norm": 1.4654771579770078, "kl": 0.36406049132347107, "learning_rate": 3.9371905492387994e-06, "loss": 0.2402, "step": 236, "step_loss": 0.28725528717041016 }, { "epoch": 1.6401384083044983, "grad_norm": 1.5104440445446627, "kl": 0.29089295864105225, "learning_rate": 3.928781165394872e-06, "loss": 0.2752, "step": 237, "step_loss": 0.2665597200393677 }, { "epoch": 1.6470588235294117, "grad_norm": 1.5918555874908318, "kl": 0.4465980529785156, "learning_rate": 3.920349024281523e-06, "loss": 0.2887, "step": 238, "step_loss": 0.36306196451187134 }, { "epoch": 1.6539792387543253, "grad_norm": 1.6103366090023579, "kl": 0.3899473249912262, "learning_rate": 3.911894288687665e-06, "loss": 0.2922, "step": 239, "step_loss": 0.23922008275985718 }, { "epoch": 1.6608996539792389, "grad_norm": 1.2880325826912928, "kl": 0.2514786124229431, "learning_rate": 3.903417121838418e-06, "loss": 0.2792, "step": 240, "step_loss": 0.3492111265659332 }, { "epoch": 1.6678200692041523, "grad_norm": 1.3298516661881563, "kl": 0.27591001987457275, "learning_rate": 3.8949176873919536e-06, "loss": 0.2517, "step": 241, "step_loss": 0.17425253987312317 }, { "epoch": 1.6747404844290656, "grad_norm": 1.2965257013394311, "kl": 0.2196529656648636, "learning_rate": 3.886396149436336e-06, "loss": 0.253, "step": 242, "step_loss": 0.2626924514770508 }, { "epoch": 1.6816608996539792, "grad_norm": 1.4267960790663161, "kl": 0.37125223875045776, "learning_rate": 3.8778526724863545e-06, "loss": 0.2704, "step": 243, "step_loss": 0.2899237871170044 }, { "epoch": 1.6885813148788928, "grad_norm": 1.387232639569157, "kl": 0.3684553802013397, "learning_rate": 3.869287421480347e-06, "loss": 0.2912, "step": 244, "step_loss": 0.3436232805252075 }, { "epoch": 1.6955017301038062, "grad_norm": 1.3614592961038203, "kl": 0.2793482542037964, "learning_rate": 3.860700561777017e-06, "loss": 0.2672, "step": 245, "step_loss": 0.2761251926422119 }, { "epoch": 1.7024221453287196, "grad_norm": 1.2695057088776902, "kl": 0.30109891295433044, "learning_rate": 3.8520922591522405e-06, "loss": 0.2613, "step": 246, "step_loss": 0.2006363719701767 }, { "epoch": 1.7093425605536332, "grad_norm": 1.3941911982680804, "kl": 0.3258303999900818, "learning_rate": 3.843462679795863e-06, "loss": 0.2792, "step": 247, "step_loss": 0.23254989087581635 }, { "epoch": 1.7162629757785468, "grad_norm": 1.2789870025177164, "kl": 0.15877297520637512, "learning_rate": 3.834811990308499e-06, "loss": 0.2685, "step": 248, "step_loss": 0.17785832285881042 }, { "epoch": 1.7231833910034602, "grad_norm": 1.5276900969496776, "kl": 0.32708844542503357, "learning_rate": 3.826140357698304e-06, "loss": 0.2874, "step": 249, "step_loss": 0.2922940254211426 }, { "epoch": 1.7301038062283736, "grad_norm": 1.5764012506628604, "kl": 0.28709709644317627, "learning_rate": 3.817447949377761e-06, "loss": 0.2853, "step": 250, "step_loss": 0.22907781600952148 }, { "epoch": 1.7370242214532872, "grad_norm": 1.308016065936332, "kl": 0.3036882281303406, "learning_rate": 3.8087349331604408e-06, "loss": 0.2616, "step": 251, "step_loss": 0.32394489645957947 }, { "epoch": 1.7439446366782008, "grad_norm": 1.410964269373556, "kl": 0.19599321484565735, "learning_rate": 3.800001477257766e-06, "loss": 0.2792, "step": 252, "step_loss": 0.3192153871059418 }, { "epoch": 1.7508650519031141, "grad_norm": 1.4328454998713103, "kl": 0.32434576749801636, "learning_rate": 3.7912477502757656e-06, "loss": 0.2639, "step": 253, "step_loss": 0.2853735685348511 }, { "epoch": 1.7577854671280275, "grad_norm": 1.5880674894774636, "kl": 0.3071787357330322, "learning_rate": 3.7824739212118132e-06, "loss": 0.2882, "step": 254, "step_loss": 0.24094262719154358 }, { "epoch": 1.7647058823529411, "grad_norm": 1.4626044081237708, "kl": 0.2698702812194824, "learning_rate": 3.7736801594513717e-06, "loss": 0.2514, "step": 255, "step_loss": 0.27880680561065674 }, { "epoch": 1.7716262975778547, "grad_norm": 1.5429131220193861, "kl": 0.2901202440261841, "learning_rate": 3.7648666347647183e-06, "loss": 0.2854, "step": 256, "step_loss": 0.31057730317115784 }, { "epoch": 1.7785467128027683, "grad_norm": 1.3982636327653568, "kl": 0.3490754961967468, "learning_rate": 3.756033517303669e-06, "loss": 0.2529, "step": 257, "step_loss": 0.3965347409248352 }, { "epoch": 1.7854671280276817, "grad_norm": 1.4858346287751483, "kl": 0.45699405670166016, "learning_rate": 3.7471809775982935e-06, "loss": 0.2824, "step": 258, "step_loss": 0.27909526228904724 }, { "epoch": 1.792387543252595, "grad_norm": 1.3967424914429802, "kl": 0.38061121106147766, "learning_rate": 3.7383091865536215e-06, "loss": 0.2527, "step": 259, "step_loss": 0.21938323974609375 }, { "epoch": 1.7993079584775087, "grad_norm": 1.304197847686241, "kl": 0.39026179909706116, "learning_rate": 3.7294183154463464e-06, "loss": 0.2806, "step": 260, "step_loss": 0.22161948680877686 }, { "epoch": 1.8062283737024223, "grad_norm": 1.349778867200545, "kl": 0.26626449823379517, "learning_rate": 3.720508535921515e-06, "loss": 0.2932, "step": 261, "step_loss": 0.3787621259689331 }, { "epoch": 1.8131487889273357, "grad_norm": 1.4248148948507744, "kl": 0.29090481996536255, "learning_rate": 3.7115800199892165e-06, "loss": 0.2662, "step": 262, "step_loss": 0.343997061252594 }, { "epoch": 1.820069204152249, "grad_norm": 1.313633565488741, "kl": 0.26761817932128906, "learning_rate": 3.702632940021261e-06, "loss": 0.2397, "step": 263, "step_loss": 0.25791415572166443 }, { "epoch": 1.8269896193771626, "grad_norm": 1.2951106743669643, "kl": 0.2644905149936676, "learning_rate": 3.69366746874785e-06, "loss": 0.2824, "step": 264, "step_loss": 0.2744329273700714 }, { "epoch": 1.8339100346020762, "grad_norm": 1.471465421088234, "kl": 0.2580920457839966, "learning_rate": 3.684683779254245e-06, "loss": 0.2687, "step": 265, "step_loss": 0.22518494725227356 }, { "epoch": 1.8408304498269896, "grad_norm": 1.4986596476830574, "kl": 0.33987006545066833, "learning_rate": 3.6756820449774226e-06, "loss": 0.2827, "step": 266, "step_loss": 0.34037211537361145 }, { "epoch": 1.847750865051903, "grad_norm": 1.4042446673242588, "kl": 0.2219737470149994, "learning_rate": 3.666662439702729e-06, "loss": 0.2836, "step": 267, "step_loss": 0.29281243681907654 }, { "epoch": 1.8546712802768166, "grad_norm": 1.5296169058200673, "kl": 0.3654361963272095, "learning_rate": 3.657625137560523e-06, "loss": 0.2401, "step": 268, "step_loss": 0.1859472393989563 }, { "epoch": 1.8615916955017302, "grad_norm": 1.4675058817704976, "kl": 0.43345117568969727, "learning_rate": 3.6485703130228156e-06, "loss": 0.2579, "step": 269, "step_loss": 0.23941168189048767 }, { "epoch": 1.8685121107266436, "grad_norm": 1.5012927842217516, "kl": 0.28073427081108093, "learning_rate": 3.6394981408998985e-06, "loss": 0.2754, "step": 270, "step_loss": 0.3322993516921997 }, { "epoch": 1.875432525951557, "grad_norm": 1.316076537364694, "kl": 0.2717742919921875, "learning_rate": 3.6304087963369757e-06, "loss": 0.2548, "step": 271, "step_loss": 0.2326144129037857 }, { "epoch": 1.8823529411764706, "grad_norm": 1.4279385067230814, "kl": 0.3231481909751892, "learning_rate": 3.6213024548107738e-06, "loss": 0.254, "step": 272, "step_loss": 0.2658352255821228 }, { "epoch": 1.8892733564013842, "grad_norm": 1.3484621154023628, "kl": 0.31264328956604004, "learning_rate": 3.612179292126164e-06, "loss": 0.2431, "step": 273, "step_loss": 0.17160722613334656 }, { "epoch": 1.8961937716262975, "grad_norm": 1.3884718438364925, "kl": 0.34462088346481323, "learning_rate": 3.6030394844127576e-06, "loss": 0.2449, "step": 274, "step_loss": 0.2328706681728363 }, { "epoch": 1.903114186851211, "grad_norm": 1.320641922343597, "kl": 0.26636019349098206, "learning_rate": 3.5938832081215146e-06, "loss": 0.2446, "step": 275, "step_loss": 0.2282252311706543 }, { "epoch": 1.9100346020761245, "grad_norm": 1.556625308699743, "kl": 0.2827800214290619, "learning_rate": 3.584710640021331e-06, "loss": 0.282, "step": 276, "step_loss": 0.21395893394947052 }, { "epoch": 1.9169550173010381, "grad_norm": 1.4854998693218964, "kl": 0.3498770594596863, "learning_rate": 3.57552195719563e-06, "loss": 0.2662, "step": 277, "step_loss": 0.2133881151676178 }, { "epoch": 1.9238754325259517, "grad_norm": 1.475079634504898, "kl": 0.35902613401412964, "learning_rate": 3.566317337038942e-06, "loss": 0.2706, "step": 278, "step_loss": 0.4103222191333771 }, { "epoch": 1.930795847750865, "grad_norm": 1.4189980714339954, "kl": 0.2766472101211548, "learning_rate": 3.5570969572534798e-06, "loss": 0.2625, "step": 279, "step_loss": 0.3432201147079468 }, { "epoch": 1.9377162629757785, "grad_norm": 1.5105212295995052, "kl": 0.35687515139579773, "learning_rate": 3.5478609958457057e-06, "loss": 0.2813, "step": 280, "step_loss": 0.24871104955673218 }, { "epoch": 1.944636678200692, "grad_norm": 1.531182112453977, "kl": 0.33728113770484924, "learning_rate": 3.5386096311228996e-06, "loss": 0.2595, "step": 281, "step_loss": 0.36530840396881104 }, { "epoch": 1.9515570934256057, "grad_norm": 1.438563243898424, "kl": 0.30970752239227295, "learning_rate": 3.5293430416897122e-06, "loss": 0.2893, "step": 282, "step_loss": 0.20446962118148804 }, { "epoch": 1.958477508650519, "grad_norm": 1.2824099802470088, "kl": 0.34302300214767456, "learning_rate": 3.520061406444722e-06, "loss": 0.2756, "step": 283, "step_loss": 0.4095374345779419 }, { "epoch": 1.9653979238754324, "grad_norm": 1.2696380570305423, "kl": 0.24217939376831055, "learning_rate": 3.5107649045769744e-06, "loss": 0.2781, "step": 284, "step_loss": 0.23996147513389587 }, { "epoch": 1.972318339100346, "grad_norm": 1.251814478964872, "kl": 0.23562920093536377, "learning_rate": 3.5014537155625287e-06, "loss": 0.281, "step": 285, "step_loss": 0.15825393795967102 }, { "epoch": 1.9792387543252596, "grad_norm": 1.3872535211707817, "kl": 0.3330673575401306, "learning_rate": 3.49212801916099e-06, "loss": 0.2857, "step": 286, "step_loss": 0.2466980516910553 }, { "epoch": 1.986159169550173, "grad_norm": 1.3309679010075988, "kl": 0.267617791891098, "learning_rate": 3.4827879954120414e-06, "loss": 0.2839, "step": 287, "step_loss": 0.33436891436576843 }, { "epoch": 1.9930795847750864, "grad_norm": 1.271275281926674, "kl": 0.19867797195911407, "learning_rate": 3.4734338246319615e-06, "loss": 0.2585, "step": 288, "step_loss": 0.23993399739265442 }, { "epoch": 2.0, "grad_norm": 1.3166644468570459, "kl": 0.2738046944141388, "learning_rate": 3.4640656874101543e-06, "loss": 0.2553, "step": 289, "step_loss": 0.23034729063510895 }, { "epoch": 2.0, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 62.4834, "eval_test_transformed.json_samples_per_second": 8.002, "eval_test_transformed.json_steps_per_second": 0.512, "step": 289 }, { "epoch": 2.0069204152249136, "grad_norm": 1.5361917900312048, "kl": NaN, "learning_rate": 3.4546837646056524e-06, "loss": 0.2018, "step": 290, "step_loss": NaN }, { "epoch": 2.013840830449827, "grad_norm": 1.3710607975752145, "kl": 0.4435328245162964, "learning_rate": 3.445288237343632e-06, "loss": 0.1864, "step": 291, "step_loss": 0.23043444752693176 }, { "epoch": 2.0207612456747404, "grad_norm": 1.3017112912495619, "kl": 0.2895858883857727, "learning_rate": 3.4358792870119113e-06, "loss": 0.1841, "step": 292, "step_loss": 0.1949908435344696 }, { "epoch": 2.027681660899654, "grad_norm": 1.3671307797246555, "kl": 0.4102199673652649, "learning_rate": 3.4264570952574527e-06, "loss": 0.2096, "step": 293, "step_loss": 0.2236202359199524 }, { "epoch": 2.0346020761245676, "grad_norm": 1.6144760178208069, "kl": 0.3745993971824646, "learning_rate": 3.417021843982855e-06, "loss": 0.1856, "step": 294, "step_loss": 0.18211515247821808 }, { "epoch": 2.041522491349481, "grad_norm": 1.8177480028372601, "kl": 0.42568713426589966, "learning_rate": 3.407573715342839e-06, "loss": 0.1766, "step": 295, "step_loss": 0.14928948879241943 }, { "epoch": 2.0484429065743943, "grad_norm": 2.3019295435380047, "kl": 0.386038213968277, "learning_rate": 3.3981128917407345e-06, "loss": 0.2012, "step": 296, "step_loss": 0.13102638721466064 }, { "epoch": 2.055363321799308, "grad_norm": 2.0346441228616756, "kl": 0.3399032950401306, "learning_rate": 3.3886395558249564e-06, "loss": 0.1741, "step": 297, "step_loss": 0.12017254531383514 }, { "epoch": 2.0622837370242215, "grad_norm": 2.0881703745146103, "kl": 0.4326462149620056, "learning_rate": 3.379153890485479e-06, "loss": 0.1759, "step": 298, "step_loss": 0.20946833491325378 }, { "epoch": 2.069204152249135, "grad_norm": 1.6739724095940993, "kl": 0.39245301485061646, "learning_rate": 3.3696560788503074e-06, "loss": 0.2006, "step": 299, "step_loss": 0.15242120623588562 }, { "epoch": 2.0761245674740483, "grad_norm": 1.5191703308920614, "kl": 0.43564096093177795, "learning_rate": 3.360146304281939e-06, "loss": 0.1926, "step": 300, "step_loss": 0.23279757797718048 }, { "epoch": 2.083044982698962, "grad_norm": 1.6193810665203963, "kl": 0.2558225393295288, "learning_rate": 3.350624750373823e-06, "loss": 0.1985, "step": 301, "step_loss": 0.10919232666492462 }, { "epoch": 2.0899653979238755, "grad_norm": 1.6046745024900377, "kl": 0.3050040602684021, "learning_rate": 3.3410916009468216e-06, "loss": 0.1851, "step": 302, "step_loss": 0.16975295543670654 }, { "epoch": 2.096885813148789, "grad_norm": 1.247457445676404, "kl": 0.3950898051261902, "learning_rate": 3.3315470400456523e-06, "loss": 0.1838, "step": 303, "step_loss": 0.1987011432647705 }, { "epoch": 2.1038062283737022, "grad_norm": 1.5456910659979042, "kl": 0.32785236835479736, "learning_rate": 3.3219912519353457e-06, "loss": 0.1927, "step": 304, "step_loss": 0.22714649140834808 }, { "epoch": 2.110726643598616, "grad_norm": 1.4279638019963992, "kl": 0.23470821976661682, "learning_rate": 3.312424421097678e-06, "loss": 0.1806, "step": 305, "step_loss": 0.11929035931825638 }, { "epoch": 2.1176470588235294, "grad_norm": 1.3909941622613629, "kl": 0.2978059947490692, "learning_rate": 3.3028467322276143e-06, "loss": 0.1824, "step": 306, "step_loss": 0.1708146631717682 }, { "epoch": 2.124567474048443, "grad_norm": 1.510409002175013, "kl": 0.31332799792289734, "learning_rate": 3.2932583702297444e-06, "loss": 0.1812, "step": 307, "step_loss": 0.18889173865318298 }, { "epoch": 2.131487889273356, "grad_norm": 1.3671870640345287, "kl": 0.4285860061645508, "learning_rate": 3.2836595202147077e-06, "loss": 0.1923, "step": 308, "step_loss": 0.33256828784942627 }, { "epoch": 2.13840830449827, "grad_norm": 1.5231703303889546, "kl": 0.3560940623283386, "learning_rate": 3.2740503674956254e-06, "loss": 0.1791, "step": 309, "step_loss": 0.1518571376800537 }, { "epoch": 2.1453287197231834, "grad_norm": 1.5152895841401772, "kl": 0.41723349690437317, "learning_rate": 3.2644310975845193e-06, "loss": 0.1679, "step": 310, "step_loss": 0.14912407100200653 }, { "epoch": 2.152249134948097, "grad_norm": 1.493866939037434, "kl": 0.43565088510513306, "learning_rate": 3.254801896188731e-06, "loss": 0.1818, "step": 311, "step_loss": 0.26908254623413086 }, { "epoch": 2.1591695501730106, "grad_norm": 1.6549319654927044, "kl": 0.34806838631629944, "learning_rate": 3.245162949207339e-06, "loss": 0.1739, "step": 312, "step_loss": 0.1576148122549057 }, { "epoch": 2.1660899653979238, "grad_norm": 1.5022176402976404, "kl": 0.3748667240142822, "learning_rate": 3.2355144427275643e-06, "loss": 0.1832, "step": 313, "step_loss": 0.13114894926548004 }, { "epoch": 2.1730103806228374, "grad_norm": 1.6750843937807558, "kl": 0.3623813986778259, "learning_rate": 3.2258565630211834e-06, "loss": 0.185, "step": 314, "step_loss": 0.2091883271932602 }, { "epoch": 2.179930795847751, "grad_norm": 1.6351444803498718, "kl": 0.4272215962409973, "learning_rate": 3.2161894965409307e-06, "loss": 0.1656, "step": 315, "step_loss": 0.15491512417793274 }, { "epoch": 2.1868512110726646, "grad_norm": 1.382247033317309, "kl": 0.3109212815761566, "learning_rate": 3.206513429916897e-06, "loss": 0.1847, "step": 316, "step_loss": 0.14636880159378052 }, { "epoch": 2.1937716262975777, "grad_norm": 1.6572634637216395, "kl": 0.5496642589569092, "learning_rate": 3.196828549952927e-06, "loss": 0.1922, "step": 317, "step_loss": 0.21070533990859985 }, { "epoch": 2.2006920415224913, "grad_norm": 1.35995690179303, "kl": 0.5228780508041382, "learning_rate": 3.1871350436230174e-06, "loss": 0.1692, "step": 318, "step_loss": 0.10645107179880142 }, { "epoch": 2.207612456747405, "grad_norm": 1.429711346870365, "kl": 0.5550024509429932, "learning_rate": 3.1774330980676994e-06, "loss": 0.1776, "step": 319, "step_loss": 0.18940532207489014 }, { "epoch": 2.2145328719723185, "grad_norm": 1.4208223407726583, "kl": 0.3191438913345337, "learning_rate": 3.1677229005904296e-06, "loss": 0.1584, "step": 320, "step_loss": 0.175113707780838 }, { "epoch": 2.2214532871972317, "grad_norm": 1.4089831600495957, "kl": 0.4652637243270874, "learning_rate": 3.158004638653979e-06, "loss": 0.1839, "step": 321, "step_loss": 0.1232675239443779 }, { "epoch": 2.2283737024221453, "grad_norm": 1.716282408339543, "kl": 0.5664535760879517, "learning_rate": 3.148278499876805e-06, "loss": 0.1694, "step": 322, "step_loss": 0.1706429123878479 }, { "epoch": 2.235294117647059, "grad_norm": 1.4405140997512074, "kl": 0.28291505575180054, "learning_rate": 3.138544672029434e-06, "loss": 0.1979, "step": 323, "step_loss": 0.2175317108631134 }, { "epoch": 2.2422145328719725, "grad_norm": 1.3653831564584462, "kl": 0.3663703203201294, "learning_rate": 3.1288033430308366e-06, "loss": 0.1785, "step": 324, "step_loss": 0.15021076798439026 }, { "epoch": 2.2491349480968856, "grad_norm": 1.4212418708132628, "kl": 0.363055557012558, "learning_rate": 3.119054700944799e-06, "loss": 0.1639, "step": 325, "step_loss": 0.12306660413742065 }, { "epoch": 2.2560553633217992, "grad_norm": 1.4095920691622124, "kl": 0.36084944009780884, "learning_rate": 3.1092989339762917e-06, "loss": 0.1775, "step": 326, "step_loss": 0.13729049265384674 }, { "epoch": 2.262975778546713, "grad_norm": 1.6513609792997586, "kl": 0.33422496914863586, "learning_rate": 3.0995362304678367e-06, "loss": 0.1871, "step": 327, "step_loss": 0.20222987234592438 }, { "epoch": 2.2698961937716264, "grad_norm": 1.5250961273414327, "kl": 0.4888485074043274, "learning_rate": 3.0897667788958704e-06, "loss": 0.1805, "step": 328, "step_loss": 0.17217440903186798 }, { "epoch": 2.2768166089965396, "grad_norm": 1.3429750825156792, "kl": 0.31097593903541565, "learning_rate": 3.0799907678671053e-06, "loss": 0.1804, "step": 329, "step_loss": 0.1632266640663147 }, { "epoch": 2.283737024221453, "grad_norm": 1.5041072562505853, "kl": 0.460746705532074, "learning_rate": 3.0702083861148924e-06, "loss": 0.1661, "step": 330, "step_loss": 0.18065579235553741 }, { "epoch": 2.290657439446367, "grad_norm": 1.3945757006658674, "kl": 0.46404534578323364, "learning_rate": 3.0604198224955707e-06, "loss": 0.1866, "step": 331, "step_loss": 0.1655866801738739 }, { "epoch": 2.2975778546712804, "grad_norm": 1.4937451605937886, "kl": 0.5370233058929443, "learning_rate": 3.0506252659848263e-06, "loss": 0.1758, "step": 332, "step_loss": 0.2519758939743042 }, { "epoch": 2.304498269896194, "grad_norm": 1.4564093113803518, "kl": 0.3449196517467499, "learning_rate": 3.040824905674044e-06, "loss": 0.1677, "step": 333, "step_loss": 0.1300605833530426 }, { "epoch": 2.311418685121107, "grad_norm": 1.2865073108487606, "kl": 0.35088473558425903, "learning_rate": 3.031018930766652e-06, "loss": 0.1956, "step": 334, "step_loss": 0.16601531207561493 }, { "epoch": 2.3183391003460208, "grad_norm": 1.4501400715488888, "kl": 0.2329348772764206, "learning_rate": 3.0212075305744776e-06, "loss": 0.1763, "step": 335, "step_loss": 0.1981351375579834 }, { "epoch": 2.3252595155709344, "grad_norm": 1.5873825930936403, "kl": 0.338948130607605, "learning_rate": 3.011390894514081e-06, "loss": 0.1745, "step": 336, "step_loss": 0.18890860676765442 }, { "epoch": 2.3321799307958475, "grad_norm": 1.627889997688411, "kl": 0.46196606755256653, "learning_rate": 3.001569212103111e-06, "loss": 0.1948, "step": 337, "step_loss": 0.24778449535369873 }, { "epoch": 2.339100346020761, "grad_norm": 1.3501940287569163, "kl": 0.3639797568321228, "learning_rate": 2.9917426729566363e-06, "loss": 0.2026, "step": 338, "step_loss": 0.17508842051029205 }, { "epoch": 2.3460207612456747, "grad_norm": 1.3362879154118215, "kl": 0.6229763627052307, "learning_rate": 2.981911466783489e-06, "loss": 0.1969, "step": 339, "step_loss": 0.16801509261131287 }, { "epoch": 2.3529411764705883, "grad_norm": 1.4493328368324427, "kl": 0.38414156436920166, "learning_rate": 2.972075783382603e-06, "loss": 0.1788, "step": 340, "step_loss": 0.22671052813529968 }, { "epoch": 2.359861591695502, "grad_norm": 1.6264671158466943, "kl": 0.358165979385376, "learning_rate": 2.962235812639347e-06, "loss": 0.1895, "step": 341, "step_loss": 0.19134527444839478 }, { "epoch": 2.366782006920415, "grad_norm": 1.4953517418378952, "kl": 0.5026807188987732, "learning_rate": 2.9523917445218625e-06, "loss": 0.1795, "step": 342, "step_loss": 0.13816913962364197 }, { "epoch": 2.3737024221453287, "grad_norm": 1.4940813439271365, "kl": 0.26116862893104553, "learning_rate": 2.94254376907739e-06, "loss": 0.1939, "step": 343, "step_loss": 0.21465972065925598 }, { "epoch": 2.3806228373702423, "grad_norm": 1.5409784928986425, "kl": 0.532651960849762, "learning_rate": 2.9326920764286084e-06, "loss": 0.18, "step": 344, "step_loss": 0.2392115294933319 }, { "epoch": 2.387543252595156, "grad_norm": 1.3990517231703177, "kl": 0.5056424736976624, "learning_rate": 2.9228368567699556e-06, "loss": 0.1762, "step": 345, "step_loss": 0.2116783857345581 }, { "epoch": 2.394463667820069, "grad_norm": 1.4111350933967293, "kl": 0.36347612738609314, "learning_rate": 2.912978300363966e-06, "loss": 0.1737, "step": 346, "step_loss": 0.18304967880249023 }, { "epoch": 2.4013840830449826, "grad_norm": 1.507105686372472, "kl": 0.2723758816719055, "learning_rate": 2.9031165975375887e-06, "loss": 0.1749, "step": 347, "step_loss": 0.25142043828964233 }, { "epoch": 2.4083044982698962, "grad_norm": 1.55899241614317, "kl": 0.22414946556091309, "learning_rate": 2.8932519386785164e-06, "loss": 0.1859, "step": 348, "step_loss": 0.1569632589817047 }, { "epoch": 2.41522491349481, "grad_norm": 1.4658289330296979, "kl": 0.3713468909263611, "learning_rate": 2.8833845142315154e-06, "loss": 0.1956, "step": 349, "step_loss": 0.1942533254623413 }, { "epoch": 2.422145328719723, "grad_norm": 1.4664548431946025, "kl": 0.3656342327594757, "learning_rate": 2.873514514694737e-06, "loss": 0.1861, "step": 350, "step_loss": 0.23527848720550537 }, { "epoch": 2.4290657439446366, "grad_norm": 1.4720180757801802, "kl": 0.3288955092430115, "learning_rate": 2.8636421306160523e-06, "loss": 0.1957, "step": 351, "step_loss": 0.12924553453922272 }, { "epoch": 2.43598615916955, "grad_norm": 1.4880254730946278, "kl": 0.24402765929698944, "learning_rate": 2.853767552589363e-06, "loss": 0.1714, "step": 352, "step_loss": 0.1053641140460968 }, { "epoch": 2.442906574394464, "grad_norm": 1.5749608413430207, "kl": 0.37472349405288696, "learning_rate": 2.843890971250931e-06, "loss": 0.1809, "step": 353, "step_loss": 0.1838831901550293 }, { "epoch": 2.4498269896193774, "grad_norm": 2.8821468256678098, "kl": 0.44829052686691284, "learning_rate": 2.8340125772756903e-06, "loss": 0.1566, "step": 354, "step_loss": 0.24845409393310547 }, { "epoch": 2.4567474048442905, "grad_norm": 1.5072102505151073, "kl": 0.43232619762420654, "learning_rate": 2.824132561373572e-06, "loss": 0.1873, "step": 355, "step_loss": 0.1785830557346344 }, { "epoch": 2.463667820069204, "grad_norm": 1.4546405723205693, "kl": 0.356367826461792, "learning_rate": 2.8142511142858162e-06, "loss": 0.1896, "step": 356, "step_loss": 0.17593206465244293 }, { "epoch": 2.4705882352941178, "grad_norm": 1.6786390714147215, "kl": 0.3610578775405884, "learning_rate": 2.8043684267812988e-06, "loss": 0.1758, "step": 357, "step_loss": 0.2510666251182556 }, { "epoch": 2.477508650519031, "grad_norm": 1.4046819633739693, "kl": 0.4202744960784912, "learning_rate": 2.7944846896528345e-06, "loss": 0.2062, "step": 358, "step_loss": 0.252973735332489 }, { "epoch": 2.4844290657439445, "grad_norm": 1.4816212056197557, "kl": 0.3425026535987854, "learning_rate": 2.7846000937135103e-06, "loss": 0.1744, "step": 359, "step_loss": 0.19031235575675964 }, { "epoch": 2.491349480968858, "grad_norm": 1.4678278523991035, "kl": 0.4513944983482361, "learning_rate": 2.774714829792988e-06, "loss": 0.1907, "step": 360, "step_loss": 0.14004002511501312 }, { "epoch": 2.4982698961937717, "grad_norm": 1.4515363972167796, "kl": 0.4561648368835449, "learning_rate": 2.7648290887338277e-06, "loss": 0.178, "step": 361, "step_loss": 0.2761346101760864 }, { "epoch": 2.5051903114186853, "grad_norm": 1.3935549869015753, "kl": 0.4574112892150879, "learning_rate": 2.7549430613878e-06, "loss": 0.2061, "step": 362, "step_loss": 0.16471461951732635 }, { "epoch": 2.5121107266435985, "grad_norm": 1.4311380167634122, "kl": 0.4757191240787506, "learning_rate": 2.745056938612201e-06, "loss": 0.2296, "step": 363, "step_loss": 0.23240873217582703 }, { "epoch": 2.519031141868512, "grad_norm": 1.3631764019103396, "kl": 0.49319159984588623, "learning_rate": 2.735170911266173e-06, "loss": 0.202, "step": 364, "step_loss": 0.24277761578559875 }, { "epoch": 2.5259515570934257, "grad_norm": 1.4420346593408337, "kl": 0.29453420639038086, "learning_rate": 2.7252851702070125e-06, "loss": 0.1686, "step": 365, "step_loss": 0.21713301539421082 }, { "epoch": 2.532871972318339, "grad_norm": 1.4763539484881092, "kl": 0.2807290554046631, "learning_rate": 2.7153999062864904e-06, "loss": 0.1809, "step": 366, "step_loss": 0.20903638005256653 }, { "epoch": 2.539792387543253, "grad_norm": 1.3122357602021801, "kl": 0.2314017415046692, "learning_rate": 2.705515310347166e-06, "loss": 0.1787, "step": 367, "step_loss": 0.1592029631137848 }, { "epoch": 2.546712802768166, "grad_norm": 1.7056226701954675, "kl": 0.4193629324436188, "learning_rate": 2.695631573218703e-06, "loss": 0.189, "step": 368, "step_loss": 0.202440083026886 }, { "epoch": 2.5536332179930796, "grad_norm": 1.3377458702911926, "kl": 0.3745248317718506, "learning_rate": 2.685748885714184e-06, "loss": 0.1797, "step": 369, "step_loss": 0.16764965653419495 }, { "epoch": 2.5605536332179932, "grad_norm": 1.3953683088019673, "kl": 0.5226297378540039, "learning_rate": 2.6758674386264286e-06, "loss": 0.1817, "step": 370, "step_loss": 0.18644654750823975 }, { "epoch": 2.5674740484429064, "grad_norm": 1.3462345751460496, "kl": 0.47047901153564453, "learning_rate": 2.6659874227243105e-06, "loss": 0.1969, "step": 371, "step_loss": 0.11603625863790512 }, { "epoch": 2.57439446366782, "grad_norm": 1.6212564777371328, "kl": 0.3617554008960724, "learning_rate": 2.6561090287490698e-06, "loss": 0.1867, "step": 372, "step_loss": 0.1289045363664627 }, { "epoch": 2.5813148788927336, "grad_norm": 1.4350189691545878, "kl": 0.40848982334136963, "learning_rate": 2.6462324474106376e-06, "loss": 0.1915, "step": 373, "step_loss": 0.16277310252189636 }, { "epoch": 2.588235294117647, "grad_norm": 1.383256054447009, "kl": 0.6102584004402161, "learning_rate": 2.6363578693839493e-06, "loss": 0.1713, "step": 374, "step_loss": 0.17344701290130615 }, { "epoch": 2.595155709342561, "grad_norm": 1.417639744108685, "kl": 0.43478095531463623, "learning_rate": 2.626485485305264e-06, "loss": 0.1821, "step": 375, "step_loss": 0.16545158624649048 }, { "epoch": 2.602076124567474, "grad_norm": 1.571084662989245, "kl": 0.3073910176753998, "learning_rate": 2.616615485768486e-06, "loss": 0.166, "step": 376, "step_loss": 0.18365763127803802 }, { "epoch": 2.6089965397923875, "grad_norm": 1.4600786754887365, "kl": 0.36251652240753174, "learning_rate": 2.6067480613214847e-06, "loss": 0.1852, "step": 377, "step_loss": 0.16077759861946106 }, { "epoch": 2.615916955017301, "grad_norm": 1.4575402237591017, "kl": 0.34005922079086304, "learning_rate": 2.596883402462413e-06, "loss": 0.1903, "step": 378, "step_loss": 0.16736389696598053 }, { "epoch": 2.6228373702422143, "grad_norm": 1.5188948762340495, "kl": 0.3583582043647766, "learning_rate": 2.587021699636035e-06, "loss": 0.188, "step": 379, "step_loss": 0.14255043864250183 }, { "epoch": 2.629757785467128, "grad_norm": 1.3063652991556196, "kl": 0.41703560948371887, "learning_rate": 2.5771631432300447e-06, "loss": 0.1924, "step": 380, "step_loss": 0.24634656310081482 }, { "epoch": 2.6366782006920415, "grad_norm": 1.4206093825133412, "kl": 0.3237816095352173, "learning_rate": 2.5673079235713928e-06, "loss": 0.1778, "step": 381, "step_loss": 0.1871148645877838 }, { "epoch": 2.643598615916955, "grad_norm": 1.4497849008905073, "kl": 0.46378111839294434, "learning_rate": 2.557456230922611e-06, "loss": 0.171, "step": 382, "step_loss": 0.16613160073757172 }, { "epoch": 2.6505190311418687, "grad_norm": 1.4841745341608454, "kl": 0.2825563848018646, "learning_rate": 2.547608255478138e-06, "loss": 0.2163, "step": 383, "step_loss": 0.22633197903633118 }, { "epoch": 2.657439446366782, "grad_norm": 1.2725209875568195, "kl": 0.4287489354610443, "learning_rate": 2.5377641873606534e-06, "loss": 0.1959, "step": 384, "step_loss": 0.2818206250667572 }, { "epoch": 2.6643598615916955, "grad_norm": 1.4719052613340626, "kl": 0.3095007836818695, "learning_rate": 2.527924216617398e-06, "loss": 0.1765, "step": 385, "step_loss": 0.20259426534175873 }, { "epoch": 2.671280276816609, "grad_norm": 1.6061627078549683, "kl": 0.32397839426994324, "learning_rate": 2.5180885332165117e-06, "loss": 0.1732, "step": 386, "step_loss": 0.23030860722064972 }, { "epoch": 2.6782006920415222, "grad_norm": 1.4636855773040067, "kl": 0.4925137162208557, "learning_rate": 2.5082573270433648e-06, "loss": 0.1885, "step": 387, "step_loss": 0.19955076277256012 }, { "epoch": 2.685121107266436, "grad_norm": 1.3837745035788023, "kl": 0.40367522835731506, "learning_rate": 2.4984307878968894e-06, "loss": 0.1778, "step": 388, "step_loss": 0.1275492012500763 }, { "epoch": 2.6920415224913494, "grad_norm": 1.3908014078439779, "kl": 0.28875648975372314, "learning_rate": 2.488609105485919e-06, "loss": 0.1856, "step": 389, "step_loss": 0.18337780237197876 }, { "epoch": 2.698961937716263, "grad_norm": 1.6522583274313996, "kl": 0.45856326818466187, "learning_rate": 2.4787924694255235e-06, "loss": 0.2003, "step": 390, "step_loss": 0.2134436070919037 }, { "epoch": 2.7058823529411766, "grad_norm": 1.3617316678211955, "kl": 0.3315746784210205, "learning_rate": 2.468981069233348e-06, "loss": 0.2034, "step": 391, "step_loss": 0.15377382934093475 }, { "epoch": 2.71280276816609, "grad_norm": 1.4480979677853978, "kl": 0.35114848613739014, "learning_rate": 2.4591750943259567e-06, "loss": 0.1718, "step": 392, "step_loss": 0.26950398087501526 }, { "epoch": 2.7197231833910034, "grad_norm": 1.508978516372024, "kl": 0.3491678833961487, "learning_rate": 2.449374734015175e-06, "loss": 0.1721, "step": 393, "step_loss": 0.15551432967185974 }, { "epoch": 2.726643598615917, "grad_norm": 1.3671504424386263, "kl": 0.3615281581878662, "learning_rate": 2.4395801775044305e-06, "loss": 0.1776, "step": 394, "step_loss": 0.15910974144935608 }, { "epoch": 2.7335640138408306, "grad_norm": 1.5663529765138162, "kl": 0.21693947911262512, "learning_rate": 2.429791613885109e-06, "loss": 0.186, "step": 395, "step_loss": 0.1277189552783966 }, { "epoch": 2.740484429065744, "grad_norm": 1.5636181454524656, "kl": 0.305446058511734, "learning_rate": 2.4200092321328954e-06, "loss": 0.1771, "step": 396, "step_loss": 0.3183583915233612 }, { "epoch": 2.7474048442906573, "grad_norm": 1.5269639891879354, "kl": 0.4980379343032837, "learning_rate": 2.410233221104131e-06, "loss": 0.1977, "step": 397, "step_loss": 0.19776971638202667 }, { "epoch": 2.754325259515571, "grad_norm": 1.2206447739960027, "kl": 0.3628636598587036, "learning_rate": 2.400463769532164e-06, "loss": 0.1865, "step": 398, "step_loss": 0.21354226768016815 }, { "epoch": 2.7612456747404845, "grad_norm": 1.457531308840606, "kl": 0.25415605306625366, "learning_rate": 2.390701066023709e-06, "loss": 0.1946, "step": 399, "step_loss": 0.28868022561073303 }, { "epoch": 2.7681660899653977, "grad_norm": 1.5131601169927764, "kl": 0.3197658061981201, "learning_rate": 2.380945299055201e-06, "loss": 0.1987, "step": 400, "step_loss": 0.13124696910381317 }, { "epoch": 2.7750865051903113, "grad_norm": 1.6275739092239627, "kl": 0.37099015712738037, "learning_rate": 2.3711966569691637e-06, "loss": 0.192, "step": 401, "step_loss": 0.1143123209476471 }, { "epoch": 2.782006920415225, "grad_norm": 1.3841905785335742, "kl": 0.28422728180885315, "learning_rate": 2.3614553279705664e-06, "loss": 0.1945, "step": 402, "step_loss": 0.11863328516483307 }, { "epoch": 2.7889273356401385, "grad_norm": 1.406282000707401, "kl": 0.4618176221847534, "learning_rate": 2.3517215001231958e-06, "loss": 0.2159, "step": 403, "step_loss": 0.16177824139595032 }, { "epoch": 2.795847750865052, "grad_norm": 1.5346694446232563, "kl": 0.26279217004776, "learning_rate": 2.3419953613460216e-06, "loss": 0.1914, "step": 404, "step_loss": 0.19532884657382965 }, { "epoch": 2.8027681660899653, "grad_norm": 1.350086530808591, "kl": 0.48455411195755005, "learning_rate": 2.3322770994095706e-06, "loss": 0.197, "step": 405, "step_loss": 0.1320147067308426 }, { "epoch": 2.809688581314879, "grad_norm": 1.4017312670157231, "kl": 0.24518997967243195, "learning_rate": 2.3225669019323026e-06, "loss": 0.1902, "step": 406, "step_loss": 0.14242634177207947 }, { "epoch": 2.8166089965397925, "grad_norm": 1.4630107346490955, "kl": 0.4306749105453491, "learning_rate": 2.312864956376983e-06, "loss": 0.1832, "step": 407, "step_loss": 0.1711290031671524 }, { "epoch": 2.8235294117647056, "grad_norm": 1.3819271205836487, "kl": 0.2796858549118042, "learning_rate": 2.3031714500470737e-06, "loss": 0.1722, "step": 408, "step_loss": 0.1999296396970749 }, { "epoch": 2.830449826989619, "grad_norm": 1.5952225408493903, "kl": 0.33123618364334106, "learning_rate": 2.293486570083104e-06, "loss": 0.2096, "step": 409, "step_loss": 0.1997143179178238 }, { "epoch": 2.837370242214533, "grad_norm": 1.5876993249426479, "kl": 0.5430201292037964, "learning_rate": 2.283810503459071e-06, "loss": 0.1876, "step": 410, "step_loss": 0.17905423045158386 }, { "epoch": 2.8442906574394464, "grad_norm": 1.446461401336389, "kl": 0.361461877822876, "learning_rate": 2.274143436978817e-06, "loss": 0.1945, "step": 411, "step_loss": 0.2270490527153015 }, { "epoch": 2.85121107266436, "grad_norm": 1.520164847971609, "kl": 0.30101829767227173, "learning_rate": 2.264485557272436e-06, "loss": 0.1777, "step": 412, "step_loss": 0.16772989928722382 }, { "epoch": 2.858131487889273, "grad_norm": 1.3328150795020557, "kl": 0.5203718543052673, "learning_rate": 2.2548370507926614e-06, "loss": 0.1823, "step": 413, "step_loss": 0.10101582854986191 }, { "epoch": 2.865051903114187, "grad_norm": 1.4505899939037972, "kl": 0.3817005753517151, "learning_rate": 2.2451981038112684e-06, "loss": 0.1748, "step": 414, "step_loss": 0.1377355456352234 }, { "epoch": 2.8719723183391004, "grad_norm": 1.603237857526954, "kl": 0.43916210532188416, "learning_rate": 2.2355689024154814e-06, "loss": 0.1865, "step": 415, "step_loss": 0.1121528148651123 }, { "epoch": 2.878892733564014, "grad_norm": 1.3332285939244766, "kl": 0.28741025924682617, "learning_rate": 2.225949632504375e-06, "loss": 0.1934, "step": 416, "step_loss": 0.1507042646408081 }, { "epoch": 2.8858131487889276, "grad_norm": 1.4257537421094282, "kl": 0.2877019941806793, "learning_rate": 2.2163404797852934e-06, "loss": 0.1873, "step": 417, "step_loss": 0.24453911185264587 }, { "epoch": 2.8927335640138407, "grad_norm": 1.3997810771617476, "kl": 0.4382472634315491, "learning_rate": 2.2067416297702567e-06, "loss": 0.1758, "step": 418, "step_loss": 0.23127016425132751 }, { "epoch": 2.8996539792387543, "grad_norm": 1.506065344670746, "kl": 0.3009997010231018, "learning_rate": 2.197153267772386e-06, "loss": 0.1853, "step": 419, "step_loss": 0.2110218107700348 }, { "epoch": 2.906574394463668, "grad_norm": 1.3685222976311966, "kl": 0.5872238874435425, "learning_rate": 2.1875755789023223e-06, "loss": 0.1827, "step": 420, "step_loss": 0.13580046594142914 }, { "epoch": 2.913494809688581, "grad_norm": 1.2905060978907605, "kl": 0.3573032021522522, "learning_rate": 2.1780087480646546e-06, "loss": 0.1847, "step": 421, "step_loss": 0.24796149134635925 }, { "epoch": 2.9204152249134947, "grad_norm": 1.5384745757760991, "kl": 0.3231239914894104, "learning_rate": 2.168452959954348e-06, "loss": 0.1943, "step": 422, "step_loss": 0.3082975447177887 }, { "epoch": 2.9273356401384083, "grad_norm": 1.3624290272837614, "kl": 0.31316041946411133, "learning_rate": 2.1589083990531796e-06, "loss": 0.1848, "step": 423, "step_loss": 0.1961549073457718 }, { "epoch": 2.934256055363322, "grad_norm": 1.5003774485944303, "kl": 0.2436741143465042, "learning_rate": 2.1493752496261784e-06, "loss": 0.177, "step": 424, "step_loss": 0.19419702887535095 }, { "epoch": 2.9411764705882355, "grad_norm": 1.4223229677227738, "kl": 0.38194429874420166, "learning_rate": 2.139853695718062e-06, "loss": 0.1999, "step": 425, "step_loss": 0.16269627213478088 }, { "epoch": 2.9480968858131487, "grad_norm": 1.584874966981612, "kl": 0.3749251961708069, "learning_rate": 2.1303439211496933e-06, "loss": 0.2057, "step": 426, "step_loss": 0.1851825714111328 }, { "epoch": 2.9550173010380623, "grad_norm": 1.4822907119644246, "kl": 0.2773294150829315, "learning_rate": 2.1208461095145216e-06, "loss": 0.1786, "step": 427, "step_loss": 0.17761939764022827 }, { "epoch": 2.961937716262976, "grad_norm": 1.604779097664543, "kl": 0.43890896439552307, "learning_rate": 2.1113604441750448e-06, "loss": 0.2199, "step": 428, "step_loss": 0.23295752704143524 }, { "epoch": 2.968858131487889, "grad_norm": 1.5217289660688909, "kl": 0.44053906202316284, "learning_rate": 2.1018871082592666e-06, "loss": 0.1683, "step": 429, "step_loss": 0.18539828062057495 }, { "epoch": 2.9757785467128026, "grad_norm": 1.4289089229969616, "kl": 0.23047330975532532, "learning_rate": 2.0924262846571617e-06, "loss": 0.1717, "step": 430, "step_loss": 0.22934721410274506 }, { "epoch": 2.982698961937716, "grad_norm": 1.550878026906904, "kl": 0.4683525562286377, "learning_rate": 2.082978156017146e-06, "loss": 0.2056, "step": 431, "step_loss": 0.19960898160934448 }, { "epoch": 2.98961937716263, "grad_norm": 1.492927248072018, "kl": 0.40442436933517456, "learning_rate": 2.073542904742547e-06, "loss": 0.1715, "step": 432, "step_loss": 0.21802842617034912 }, { "epoch": 2.9965397923875434, "grad_norm": 1.3419085460319926, "kl": 0.3841860592365265, "learning_rate": 2.064120712988089e-06, "loss": 0.1938, "step": 433, "step_loss": 0.15595637261867523 }, { "epoch": 2.9965397923875434, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 62.519, "eval_test_transformed.json_samples_per_second": 7.998, "eval_test_transformed.json_steps_per_second": 0.512, "step": 433 } ], "logging_steps": 1.0, "max_steps": 720, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 33524893409280.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }