diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,65162 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2000167658646994, + "eval_steps": 500, + "global_step": 93048, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0001289681899959375, + "grad_norm": 15.329981540370758, + "learning_rate": 3.8689708537529015e-09, + "loss": 0.9675, + "step": 10 + }, + { + "epoch": 0.000257936379991875, + "grad_norm": 15.976488848619558, + "learning_rate": 8.167827357922793e-09, + "loss": 0.9291, + "step": 20 + }, + { + "epoch": 0.0003869045699878125, + "grad_norm": 15.530414773092973, + "learning_rate": 1.2466683862092685e-08, + "loss": 0.9866, + "step": 30 + }, + { + "epoch": 0.00051587275998375, + "grad_norm": 15.434437173134748, + "learning_rate": 1.6765540366262574e-08, + "loss": 0.9671, + "step": 40 + }, + { + "epoch": 0.0006448409499796875, + "grad_norm": 15.822442750080922, + "learning_rate": 2.1064396870432467e-08, + "loss": 0.9702, + "step": 50 + }, + { + "epoch": 0.000773809139975625, + "grad_norm": 15.750761442246096, + "learning_rate": 2.5363253374602357e-08, + "loss": 0.9623, + "step": 60 + }, + { + "epoch": 0.0009027773299715626, + "grad_norm": 15.84476888397974, + "learning_rate": 2.966210987877225e-08, + "loss": 0.9521, + "step": 70 + }, + { + "epoch": 0.0010317455199675, + "grad_norm": 13.58150125623762, + "learning_rate": 3.3960966382942144e-08, + "loss": 0.9678, + "step": 80 + }, + { + "epoch": 0.0011607137099634375, + "grad_norm": 15.46366993327494, + "learning_rate": 3.8259822887112034e-08, + "loss": 0.974, + "step": 90 + }, + { + "epoch": 0.001289681899959375, + "grad_norm": 14.423329300543243, + "learning_rate": 4.2558679391281924e-08, + "loss": 0.9913, + "step": 100 + }, + { + "epoch": 0.0014186500899553125, + "grad_norm": 16.92550836756153, + "learning_rate": 4.6857535895451815e-08, + "loss": 0.9518, + "step": 110 + }, + { + "epoch": 0.00154761827995125, + "grad_norm": 14.587878553905067, + "learning_rate": 5.1156392399621705e-08, + "loss": 0.9809, + "step": 120 + }, + { + "epoch": 0.0016765864699471876, + "grad_norm": 14.479163407130148, + "learning_rate": 5.5455248903791595e-08, + "loss": 0.9689, + "step": 130 + }, + { + "epoch": 0.0018055546599431251, + "grad_norm": 13.75665287453743, + "learning_rate": 5.975410540796149e-08, + "loss": 0.9623, + "step": 140 + }, + { + "epoch": 0.0019345228499390624, + "grad_norm": 14.211275175513098, + "learning_rate": 6.405296191213138e-08, + "loss": 0.935, + "step": 150 + }, + { + "epoch": 0.002063491039935, + "grad_norm": 13.242381120057718, + "learning_rate": 6.835181841630128e-08, + "loss": 0.96, + "step": 160 + }, + { + "epoch": 0.0021924592299309375, + "grad_norm": 12.946315936823607, + "learning_rate": 7.265067492047117e-08, + "loss": 0.9496, + "step": 170 + }, + { + "epoch": 0.002321427419926875, + "grad_norm": 10.879373989329663, + "learning_rate": 7.694953142464106e-08, + "loss": 0.9509, + "step": 180 + }, + { + "epoch": 0.0024503956099228125, + "grad_norm": 13.764900788190326, + "learning_rate": 8.124838792881094e-08, + "loss": 0.9422, + "step": 190 + }, + { + "epoch": 0.00257936379991875, + "grad_norm": 11.178636849414199, + "learning_rate": 8.554724443298083e-08, + "loss": 0.9034, + "step": 200 + }, + { + "epoch": 0.0027083319899146876, + "grad_norm": 10.03716899184375, + "learning_rate": 8.984610093715072e-08, + "loss": 0.8699, + "step": 210 + }, + { + "epoch": 0.002837300179910625, + "grad_norm": 8.672544601496954, + "learning_rate": 9.414495744132062e-08, + "loss": 0.8782, + "step": 220 + }, + { + "epoch": 0.0029662683699065626, + "grad_norm": 9.93675211746465, + "learning_rate": 9.844381394549051e-08, + "loss": 0.8713, + "step": 230 + }, + { + "epoch": 0.0030952365599025, + "grad_norm": 9.196462083695115, + "learning_rate": 1.027426704496604e-07, + "loss": 0.8737, + "step": 240 + }, + { + "epoch": 0.0032242047498984377, + "grad_norm": 10.078386835036474, + "learning_rate": 1.0704152695383029e-07, + "loss": 0.8721, + "step": 250 + }, + { + "epoch": 0.003353172939894375, + "grad_norm": 9.467740554744227, + "learning_rate": 1.1134038345800018e-07, + "loss": 0.8584, + "step": 260 + }, + { + "epoch": 0.0034821411298903127, + "grad_norm": 6.622546046435882, + "learning_rate": 1.1563923996217007e-07, + "loss": 0.8086, + "step": 270 + }, + { + "epoch": 0.0036111093198862502, + "grad_norm": 3.7595459845901944, + "learning_rate": 1.1993809646633997e-07, + "loss": 0.7745, + "step": 280 + }, + { + "epoch": 0.0037400775098821878, + "grad_norm": 4.291329010437157, + "learning_rate": 1.2423695297050985e-07, + "loss": 0.7741, + "step": 290 + }, + { + "epoch": 0.003869045699878125, + "grad_norm": 3.5229401864430967, + "learning_rate": 1.2853580947467975e-07, + "loss": 0.7606, + "step": 300 + }, + { + "epoch": 0.003998013889874062, + "grad_norm": 3.822942315441901, + "learning_rate": 1.3283466597884963e-07, + "loss": 0.7598, + "step": 310 + }, + { + "epoch": 0.00412698207987, + "grad_norm": 3.10917659112614, + "learning_rate": 1.3713352248301953e-07, + "loss": 0.7652, + "step": 320 + }, + { + "epoch": 0.004255950269865937, + "grad_norm": 2.8839625460796174, + "learning_rate": 1.414323789871894e-07, + "loss": 0.7444, + "step": 330 + }, + { + "epoch": 0.004384918459861875, + "grad_norm": 2.2357197368231376, + "learning_rate": 1.4573123549135931e-07, + "loss": 0.7205, + "step": 340 + }, + { + "epoch": 0.0045138866498578125, + "grad_norm": 2.167617770810658, + "learning_rate": 1.500300919955292e-07, + "loss": 0.6755, + "step": 350 + }, + { + "epoch": 0.00464285483985375, + "grad_norm": 2.0726491551295894, + "learning_rate": 1.543289484996991e-07, + "loss": 0.6975, + "step": 360 + }, + { + "epoch": 0.0047718230298496875, + "grad_norm": 2.0501091821333888, + "learning_rate": 1.58627805003869e-07, + "loss": 0.6803, + "step": 370 + }, + { + "epoch": 0.004900791219845625, + "grad_norm": 1.8757151010785067, + "learning_rate": 1.6292666150803888e-07, + "loss": 0.6732, + "step": 380 + }, + { + "epoch": 0.0050297594098415626, + "grad_norm": 1.8471922709206414, + "learning_rate": 1.6722551801220878e-07, + "loss": 0.6516, + "step": 390 + }, + { + "epoch": 0.0051587275998375, + "grad_norm": 1.6676794709766332, + "learning_rate": 1.7152437451637868e-07, + "loss": 0.663, + "step": 400 + }, + { + "epoch": 0.005287695789833438, + "grad_norm": 1.666351424964313, + "learning_rate": 1.7582323102054856e-07, + "loss": 0.6636, + "step": 410 + }, + { + "epoch": 0.005416663979829375, + "grad_norm": 1.6335839262457643, + "learning_rate": 1.8012208752471846e-07, + "loss": 0.6471, + "step": 420 + }, + { + "epoch": 0.005545632169825313, + "grad_norm": 1.4929240023352703, + "learning_rate": 1.8442094402888834e-07, + "loss": 0.6289, + "step": 430 + }, + { + "epoch": 0.00567460035982125, + "grad_norm": 1.8545286054595496, + "learning_rate": 1.8871980053305824e-07, + "loss": 0.6278, + "step": 440 + }, + { + "epoch": 0.005803568549817188, + "grad_norm": 1.5677808046588277, + "learning_rate": 1.9301865703722812e-07, + "loss": 0.6166, + "step": 450 + }, + { + "epoch": 0.005932536739813125, + "grad_norm": 1.480404797350322, + "learning_rate": 1.9731751354139802e-07, + "loss": 0.6124, + "step": 460 + }, + { + "epoch": 0.006061504929809063, + "grad_norm": 1.5510569344681353, + "learning_rate": 2.016163700455679e-07, + "loss": 0.6011, + "step": 470 + }, + { + "epoch": 0.006190473119805, + "grad_norm": 1.4915138263737235, + "learning_rate": 2.059152265497378e-07, + "loss": 0.5828, + "step": 480 + }, + { + "epoch": 0.006319441309800938, + "grad_norm": 1.5242110012538372, + "learning_rate": 2.1021408305390768e-07, + "loss": 0.6047, + "step": 490 + }, + { + "epoch": 0.006448409499796875, + "grad_norm": 1.3842129501385239, + "learning_rate": 2.1451293955807758e-07, + "loss": 0.5672, + "step": 500 + }, + { + "epoch": 0.006577377689792813, + "grad_norm": 1.4276732934350205, + "learning_rate": 2.1881179606224746e-07, + "loss": 0.5882, + "step": 510 + }, + { + "epoch": 0.00670634587978875, + "grad_norm": 1.5133374881129484, + "learning_rate": 2.2311065256641736e-07, + "loss": 0.5811, + "step": 520 + }, + { + "epoch": 0.006835314069784688, + "grad_norm": 1.4087516947381948, + "learning_rate": 2.2740950907058724e-07, + "loss": 0.5628, + "step": 530 + }, + { + "epoch": 0.006964282259780625, + "grad_norm": 1.3907842894086206, + "learning_rate": 2.3170836557475714e-07, + "loss": 0.569, + "step": 540 + }, + { + "epoch": 0.007093250449776563, + "grad_norm": 1.465001755962806, + "learning_rate": 2.3600722207892702e-07, + "loss": 0.569, + "step": 550 + }, + { + "epoch": 0.0072222186397725005, + "grad_norm": 1.337775152243252, + "learning_rate": 2.403060785830969e-07, + "loss": 0.5719, + "step": 560 + }, + { + "epoch": 0.007351186829768438, + "grad_norm": 1.3156142239994197, + "learning_rate": 2.446049350872668e-07, + "loss": 0.5655, + "step": 570 + }, + { + "epoch": 0.0074801550197643755, + "grad_norm": 1.4836696372663511, + "learning_rate": 2.489037915914367e-07, + "loss": 0.563, + "step": 580 + }, + { + "epoch": 0.007609123209760313, + "grad_norm": 1.2418830609040026, + "learning_rate": 2.532026480956066e-07, + "loss": 0.5474, + "step": 590 + }, + { + "epoch": 0.00773809139975625, + "grad_norm": 1.3361644508681787, + "learning_rate": 2.575015045997765e-07, + "loss": 0.5537, + "step": 600 + }, + { + "epoch": 0.007867059589752187, + "grad_norm": 1.2693772809166728, + "learning_rate": 2.6180036110394636e-07, + "loss": 0.5719, + "step": 610 + }, + { + "epoch": 0.007996027779748125, + "grad_norm": 1.3399870180223823, + "learning_rate": 2.6609921760811624e-07, + "loss": 0.5441, + "step": 620 + }, + { + "epoch": 0.008124995969744062, + "grad_norm": 1.3750070502696032, + "learning_rate": 2.7039807411228617e-07, + "loss": 0.5434, + "step": 630 + }, + { + "epoch": 0.00825396415974, + "grad_norm": 1.2932559903511127, + "learning_rate": 2.7469693061645605e-07, + "loss": 0.5508, + "step": 640 + }, + { + "epoch": 0.008382932349735937, + "grad_norm": 1.4396822773479367, + "learning_rate": 2.789957871206259e-07, + "loss": 0.5566, + "step": 650 + }, + { + "epoch": 0.008511900539731875, + "grad_norm": 1.257312456147403, + "learning_rate": 2.832946436247958e-07, + "loss": 0.5423, + "step": 660 + }, + { + "epoch": 0.008640868729727812, + "grad_norm": 1.287845600413214, + "learning_rate": 2.8759350012896573e-07, + "loss": 0.5614, + "step": 670 + }, + { + "epoch": 0.00876983691972375, + "grad_norm": 1.445903082254619, + "learning_rate": 2.918923566331356e-07, + "loss": 0.5354, + "step": 680 + }, + { + "epoch": 0.008898805109719687, + "grad_norm": 1.2355121614422964, + "learning_rate": 2.961912131373055e-07, + "loss": 0.5263, + "step": 690 + }, + { + "epoch": 0.009027773299715625, + "grad_norm": 1.284436860094861, + "learning_rate": 3.0049006964147536e-07, + "loss": 0.5364, + "step": 700 + }, + { + "epoch": 0.009156741489711562, + "grad_norm": 1.237071256108109, + "learning_rate": 3.047889261456453e-07, + "loss": 0.5306, + "step": 710 + }, + { + "epoch": 0.0092857096797075, + "grad_norm": 1.2590389968352735, + "learning_rate": 3.0908778264981517e-07, + "loss": 0.5353, + "step": 720 + }, + { + "epoch": 0.009414677869703438, + "grad_norm": 1.2850904744143514, + "learning_rate": 3.1338663915398504e-07, + "loss": 0.5389, + "step": 730 + }, + { + "epoch": 0.009543646059699375, + "grad_norm": 1.2089186139792836, + "learning_rate": 3.176854956581549e-07, + "loss": 0.5394, + "step": 740 + }, + { + "epoch": 0.009672614249695313, + "grad_norm": 1.1865340885773557, + "learning_rate": 3.2198435216232485e-07, + "loss": 0.5166, + "step": 750 + }, + { + "epoch": 0.00980158243969125, + "grad_norm": 1.4079000764169889, + "learning_rate": 3.2628320866649473e-07, + "loss": 0.5425, + "step": 760 + }, + { + "epoch": 0.009930550629687188, + "grad_norm": 1.284708617010966, + "learning_rate": 3.305820651706646e-07, + "loss": 0.5414, + "step": 770 + }, + { + "epoch": 0.010059518819683125, + "grad_norm": 1.2695214036903308, + "learning_rate": 3.348809216748345e-07, + "loss": 0.5215, + "step": 780 + }, + { + "epoch": 0.010188487009679063, + "grad_norm": 1.347108934834412, + "learning_rate": 3.391797781790044e-07, + "loss": 0.503, + "step": 790 + }, + { + "epoch": 0.010317455199675, + "grad_norm": 1.2950827495268418, + "learning_rate": 3.434786346831743e-07, + "loss": 0.5243, + "step": 800 + }, + { + "epoch": 0.010446423389670938, + "grad_norm": 1.3919168645399769, + "learning_rate": 3.4777749118734417e-07, + "loss": 0.5252, + "step": 810 + }, + { + "epoch": 0.010575391579666875, + "grad_norm": 1.3088767292226924, + "learning_rate": 3.5207634769151404e-07, + "loss": 0.5273, + "step": 820 + }, + { + "epoch": 0.010704359769662813, + "grad_norm": 1.1549972895865668, + "learning_rate": 3.5637520419568397e-07, + "loss": 0.5191, + "step": 830 + }, + { + "epoch": 0.01083332795965875, + "grad_norm": 1.2355924855016558, + "learning_rate": 3.6067406069985385e-07, + "loss": 0.521, + "step": 840 + }, + { + "epoch": 0.010962296149654688, + "grad_norm": 1.2593010329552297, + "learning_rate": 3.649729172040237e-07, + "loss": 0.5297, + "step": 850 + }, + { + "epoch": 0.011091264339650625, + "grad_norm": 1.251591563272772, + "learning_rate": 3.692717737081936e-07, + "loss": 0.5012, + "step": 860 + }, + { + "epoch": 0.011220232529646563, + "grad_norm": 1.2278316914611531, + "learning_rate": 3.7357063021236353e-07, + "loss": 0.5011, + "step": 870 + }, + { + "epoch": 0.0113492007196425, + "grad_norm": 1.2579428238767227, + "learning_rate": 3.778694867165334e-07, + "loss": 0.5138, + "step": 880 + }, + { + "epoch": 0.011478168909638438, + "grad_norm": 1.233295875760975, + "learning_rate": 3.821683432207033e-07, + "loss": 0.5135, + "step": 890 + }, + { + "epoch": 0.011607137099634375, + "grad_norm": 1.3909083983346797, + "learning_rate": 3.864671997248732e-07, + "loss": 0.5171, + "step": 900 + }, + { + "epoch": 0.011736105289630313, + "grad_norm": 1.212597048465837, + "learning_rate": 3.907660562290431e-07, + "loss": 0.5142, + "step": 910 + }, + { + "epoch": 0.01186507347962625, + "grad_norm": 1.23975172461884, + "learning_rate": 3.9506491273321297e-07, + "loss": 0.5178, + "step": 920 + }, + { + "epoch": 0.011994041669622188, + "grad_norm": 1.2184870695910408, + "learning_rate": 3.9936376923738285e-07, + "loss": 0.5009, + "step": 930 + }, + { + "epoch": 0.012123009859618126, + "grad_norm": 1.3249868868941357, + "learning_rate": 4.036626257415528e-07, + "loss": 0.5151, + "step": 940 + }, + { + "epoch": 0.012251978049614063, + "grad_norm": 1.3413891431163774, + "learning_rate": 4.0796148224572265e-07, + "loss": 0.5066, + "step": 950 + }, + { + "epoch": 0.01238094623961, + "grad_norm": 1.3791574509454765, + "learning_rate": 4.1226033874989253e-07, + "loss": 0.5238, + "step": 960 + }, + { + "epoch": 0.012509914429605938, + "grad_norm": 1.1773361123586814, + "learning_rate": 4.165591952540624e-07, + "loss": 0.5048, + "step": 970 + }, + { + "epoch": 0.012638882619601876, + "grad_norm": 1.447839255012678, + "learning_rate": 4.2085805175823234e-07, + "loss": 0.5138, + "step": 980 + }, + { + "epoch": 0.012767850809597813, + "grad_norm": 1.1806413611663191, + "learning_rate": 4.251569082624022e-07, + "loss": 0.5056, + "step": 990 + }, + { + "epoch": 0.01289681899959375, + "grad_norm": 1.4721865362266586, + "learning_rate": 4.294557647665721e-07, + "loss": 0.5097, + "step": 1000 + }, + { + "epoch": 0.013025787189589688, + "grad_norm": 1.258123327603579, + "learning_rate": 4.3375462127074197e-07, + "loss": 0.5089, + "step": 1010 + }, + { + "epoch": 0.013154755379585626, + "grad_norm": 1.2872970129760037, + "learning_rate": 4.380534777749119e-07, + "loss": 0.5261, + "step": 1020 + }, + { + "epoch": 0.013283723569581563, + "grad_norm": 1.4876466038183704, + "learning_rate": 4.423523342790818e-07, + "loss": 0.499, + "step": 1030 + }, + { + "epoch": 0.0134126917595775, + "grad_norm": 1.2909655839911272, + "learning_rate": 4.466511907832517e-07, + "loss": 0.5218, + "step": 1040 + }, + { + "epoch": 0.013541659949573438, + "grad_norm": 1.2737641745043389, + "learning_rate": 4.5095004728742164e-07, + "loss": 0.5191, + "step": 1050 + }, + { + "epoch": 0.013670628139569376, + "grad_norm": 1.3087211850920022, + "learning_rate": 4.552489037915915e-07, + "loss": 0.5045, + "step": 1060 + }, + { + "epoch": 0.013799596329565313, + "grad_norm": 1.2591430899750773, + "learning_rate": 4.595477602957614e-07, + "loss": 0.5005, + "step": 1070 + }, + { + "epoch": 0.01392856451956125, + "grad_norm": 1.238294439448016, + "learning_rate": 4.6384661679993127e-07, + "loss": 0.5081, + "step": 1080 + }, + { + "epoch": 0.014057532709557188, + "grad_norm": 1.303852980404123, + "learning_rate": 4.681454733041012e-07, + "loss": 0.4996, + "step": 1090 + }, + { + "epoch": 0.014186500899553126, + "grad_norm": 1.180596077014857, + "learning_rate": 4.7244432980827107e-07, + "loss": 0.5113, + "step": 1100 + }, + { + "epoch": 0.014315469089549063, + "grad_norm": 1.3958414797893095, + "learning_rate": 4.7674318631244095e-07, + "loss": 0.4937, + "step": 1110 + }, + { + "epoch": 0.014444437279545001, + "grad_norm": 1.203622303030335, + "learning_rate": 4.810420428166109e-07, + "loss": 0.4754, + "step": 1120 + }, + { + "epoch": 0.014573405469540938, + "grad_norm": 1.3016611070946704, + "learning_rate": 4.853408993207808e-07, + "loss": 0.4735, + "step": 1130 + }, + { + "epoch": 0.014702373659536876, + "grad_norm": 1.2907092286800297, + "learning_rate": 4.896397558249506e-07, + "loss": 0.492, + "step": 1140 + }, + { + "epoch": 0.014831341849532814, + "grad_norm": 1.430267301262, + "learning_rate": 4.939386123291205e-07, + "loss": 0.4817, + "step": 1150 + }, + { + "epoch": 0.014960310039528751, + "grad_norm": 1.3067360503362428, + "learning_rate": 4.982374688332904e-07, + "loss": 0.5072, + "step": 1160 + }, + { + "epoch": 0.015089278229524689, + "grad_norm": 1.283531129884972, + "learning_rate": 5.025363253374603e-07, + "loss": 0.4822, + "step": 1170 + }, + { + "epoch": 0.015218246419520626, + "grad_norm": 1.3015082685388273, + "learning_rate": 5.068351818416301e-07, + "loss": 0.4836, + "step": 1180 + }, + { + "epoch": 0.015347214609516564, + "grad_norm": 1.4405334287650375, + "learning_rate": 5.111340383458001e-07, + "loss": 0.4771, + "step": 1190 + }, + { + "epoch": 0.0154761827995125, + "grad_norm": 1.3887274173214341, + "learning_rate": 5.1543289484997e-07, + "loss": 0.5002, + "step": 1200 + }, + { + "epoch": 0.015605150989508437, + "grad_norm": 1.3232861377565945, + "learning_rate": 5.197317513541399e-07, + "loss": 0.507, + "step": 1210 + }, + { + "epoch": 0.015734119179504374, + "grad_norm": 1.2716297350901906, + "learning_rate": 5.240306078583098e-07, + "loss": 0.5025, + "step": 1220 + }, + { + "epoch": 0.015863087369500314, + "grad_norm": 1.292300636960963, + "learning_rate": 5.283294643624796e-07, + "loss": 0.4907, + "step": 1230 + }, + { + "epoch": 0.01599205555949625, + "grad_norm": 1.3831290045379332, + "learning_rate": 5.326283208666495e-07, + "loss": 0.5115, + "step": 1240 + }, + { + "epoch": 0.01612102374949219, + "grad_norm": 1.3262230901581993, + "learning_rate": 5.369271773708194e-07, + "loss": 0.4996, + "step": 1250 + }, + { + "epoch": 0.016249991939488125, + "grad_norm": 1.3170113417305596, + "learning_rate": 5.412260338749893e-07, + "loss": 0.4805, + "step": 1260 + }, + { + "epoch": 0.016378960129484064, + "grad_norm": 1.3550095207991983, + "learning_rate": 5.455248903791592e-07, + "loss": 0.4846, + "step": 1270 + }, + { + "epoch": 0.01650792831948, + "grad_norm": 1.2512754862279878, + "learning_rate": 5.498237468833291e-07, + "loss": 0.4991, + "step": 1280 + }, + { + "epoch": 0.01663689650947594, + "grad_norm": 1.344855323744519, + "learning_rate": 5.54122603387499e-07, + "loss": 0.475, + "step": 1290 + }, + { + "epoch": 0.016765864699471875, + "grad_norm": 1.3987222451594132, + "learning_rate": 5.584214598916689e-07, + "loss": 0.4853, + "step": 1300 + }, + { + "epoch": 0.016894832889467814, + "grad_norm": 1.2490413302750158, + "learning_rate": 5.627203163958388e-07, + "loss": 0.4859, + "step": 1310 + }, + { + "epoch": 0.01702380107946375, + "grad_norm": 1.3529214369028302, + "learning_rate": 5.670191729000086e-07, + "loss": 0.4704, + "step": 1320 + }, + { + "epoch": 0.01715276926945969, + "grad_norm": 1.2429569936893974, + "learning_rate": 5.713180294041785e-07, + "loss": 0.467, + "step": 1330 + }, + { + "epoch": 0.017281737459455625, + "grad_norm": 1.1952192497446046, + "learning_rate": 5.756168859083485e-07, + "loss": 0.4941, + "step": 1340 + }, + { + "epoch": 0.017410705649451564, + "grad_norm": 1.282500598675099, + "learning_rate": 5.799157424125184e-07, + "loss": 0.483, + "step": 1350 + }, + { + "epoch": 0.0175396738394475, + "grad_norm": 1.2327951135972188, + "learning_rate": 5.842145989166882e-07, + "loss": 0.4808, + "step": 1360 + }, + { + "epoch": 0.01766864202944344, + "grad_norm": 1.3424814623993437, + "learning_rate": 5.885134554208581e-07, + "loss": 0.4695, + "step": 1370 + }, + { + "epoch": 0.017797610219439375, + "grad_norm": 1.1967398772051736, + "learning_rate": 5.92812311925028e-07, + "loss": 0.463, + "step": 1380 + }, + { + "epoch": 0.017926578409435314, + "grad_norm": 1.2005474135095464, + "learning_rate": 5.971111684291979e-07, + "loss": 0.4735, + "step": 1390 + }, + { + "epoch": 0.01805554659943125, + "grad_norm": 1.278987698559484, + "learning_rate": 6.014100249333678e-07, + "loss": 0.4927, + "step": 1400 + }, + { + "epoch": 0.01818451478942719, + "grad_norm": 1.183683604925414, + "learning_rate": 6.057088814375376e-07, + "loss": 0.4587, + "step": 1410 + }, + { + "epoch": 0.018313482979423125, + "grad_norm": 1.3982936142438274, + "learning_rate": 6.100077379417076e-07, + "loss": 0.4786, + "step": 1420 + }, + { + "epoch": 0.018442451169419064, + "grad_norm": 1.3128385818153423, + "learning_rate": 6.143065944458775e-07, + "loss": 0.488, + "step": 1430 + }, + { + "epoch": 0.018571419359415, + "grad_norm": 1.3856860448009445, + "learning_rate": 6.186054509500474e-07, + "loss": 0.477, + "step": 1440 + }, + { + "epoch": 0.01870038754941094, + "grad_norm": 1.3118161483413815, + "learning_rate": 6.229043074542172e-07, + "loss": 0.4815, + "step": 1450 + }, + { + "epoch": 0.018829355739406875, + "grad_norm": 1.2383611685929683, + "learning_rate": 6.272031639583871e-07, + "loss": 0.4896, + "step": 1460 + }, + { + "epoch": 0.018958323929402814, + "grad_norm": 1.3488240920212462, + "learning_rate": 6.31502020462557e-07, + "loss": 0.4685, + "step": 1470 + }, + { + "epoch": 0.01908729211939875, + "grad_norm": 1.170187922568467, + "learning_rate": 6.358008769667269e-07, + "loss": 0.4727, + "step": 1480 + }, + { + "epoch": 0.01921626030939469, + "grad_norm": 1.2580774404703792, + "learning_rate": 6.400997334708967e-07, + "loss": 0.484, + "step": 1490 + }, + { + "epoch": 0.019345228499390625, + "grad_norm": 1.3131908449236673, + "learning_rate": 6.443985899750667e-07, + "loss": 0.4702, + "step": 1500 + }, + { + "epoch": 0.019474196689386564, + "grad_norm": 1.251651979279598, + "learning_rate": 6.486974464792366e-07, + "loss": 0.4677, + "step": 1510 + }, + { + "epoch": 0.0196031648793825, + "grad_norm": 1.34584441340836, + "learning_rate": 6.529963029834065e-07, + "loss": 0.4805, + "step": 1520 + }, + { + "epoch": 0.01973213306937844, + "grad_norm": 1.2774766161286784, + "learning_rate": 6.572951594875764e-07, + "loss": 0.4721, + "step": 1530 + }, + { + "epoch": 0.019861101259374375, + "grad_norm": 1.4159705391143618, + "learning_rate": 6.615940159917462e-07, + "loss": 0.4709, + "step": 1540 + }, + { + "epoch": 0.019990069449370314, + "grad_norm": 1.2676867338417395, + "learning_rate": 6.658928724959161e-07, + "loss": 0.4834, + "step": 1550 + }, + { + "epoch": 0.02011903763936625, + "grad_norm": 1.3338012697123478, + "learning_rate": 6.70191729000086e-07, + "loss": 0.4797, + "step": 1560 + }, + { + "epoch": 0.02024800582936219, + "grad_norm": 1.2919857720179335, + "learning_rate": 6.744905855042559e-07, + "loss": 0.4678, + "step": 1570 + }, + { + "epoch": 0.020376974019358125, + "grad_norm": 1.3833739152124798, + "learning_rate": 6.787894420084259e-07, + "loss": 0.4826, + "step": 1580 + }, + { + "epoch": 0.02050594220935406, + "grad_norm": 1.3195777371063502, + "learning_rate": 6.830882985125957e-07, + "loss": 0.4582, + "step": 1590 + }, + { + "epoch": 0.02063491039935, + "grad_norm": 1.388148911343476, + "learning_rate": 6.873871550167656e-07, + "loss": 0.4758, + "step": 1600 + }, + { + "epoch": 0.020763878589345936, + "grad_norm": 1.2991131323083154, + "learning_rate": 6.916860115209355e-07, + "loss": 0.4766, + "step": 1610 + }, + { + "epoch": 0.020892846779341875, + "grad_norm": 1.2390433476361746, + "learning_rate": 6.959848680251054e-07, + "loss": 0.4621, + "step": 1620 + }, + { + "epoch": 0.02102181496933781, + "grad_norm": 1.294683389920105, + "learning_rate": 7.002837245292752e-07, + "loss": 0.4647, + "step": 1630 + }, + { + "epoch": 0.02115078315933375, + "grad_norm": 1.3400581880948512, + "learning_rate": 7.045825810334451e-07, + "loss": 0.4693, + "step": 1640 + }, + { + "epoch": 0.021279751349329686, + "grad_norm": 1.4021034097730503, + "learning_rate": 7.08881437537615e-07, + "loss": 0.4758, + "step": 1650 + }, + { + "epoch": 0.021408719539325625, + "grad_norm": 1.2256684102417892, + "learning_rate": 7.13180294041785e-07, + "loss": 0.4835, + "step": 1660 + }, + { + "epoch": 0.02153768772932156, + "grad_norm": 1.4200837888634845, + "learning_rate": 7.174791505459549e-07, + "loss": 0.4526, + "step": 1670 + }, + { + "epoch": 0.0216666559193175, + "grad_norm": 1.2830828569052806, + "learning_rate": 7.217780070501247e-07, + "loss": 0.465, + "step": 1680 + }, + { + "epoch": 0.021795624109313436, + "grad_norm": 1.3648045546525251, + "learning_rate": 7.260768635542946e-07, + "loss": 0.4767, + "step": 1690 + }, + { + "epoch": 0.021924592299309376, + "grad_norm": 1.2781424709315203, + "learning_rate": 7.303757200584645e-07, + "loss": 0.4562, + "step": 1700 + }, + { + "epoch": 0.02205356048930531, + "grad_norm": 1.378224091299996, + "learning_rate": 7.346745765626344e-07, + "loss": 0.4714, + "step": 1710 + }, + { + "epoch": 0.02218252867930125, + "grad_norm": 1.3235025923245036, + "learning_rate": 7.389734330668042e-07, + "loss": 0.48, + "step": 1720 + }, + { + "epoch": 0.022311496869297186, + "grad_norm": 1.2148727750150836, + "learning_rate": 7.432722895709741e-07, + "loss": 0.4537, + "step": 1730 + }, + { + "epoch": 0.022440465059293126, + "grad_norm": 1.2798220912331086, + "learning_rate": 7.475711460751441e-07, + "loss": 0.4665, + "step": 1740 + }, + { + "epoch": 0.02256943324928906, + "grad_norm": 1.3501412048090875, + "learning_rate": 7.51870002579314e-07, + "loss": 0.4498, + "step": 1750 + }, + { + "epoch": 0.022698401439285, + "grad_norm": 1.2527129233238834, + "learning_rate": 7.561688590834838e-07, + "loss": 0.4619, + "step": 1760 + }, + { + "epoch": 0.022827369629280937, + "grad_norm": 1.2572733096607995, + "learning_rate": 7.604677155876537e-07, + "loss": 0.4685, + "step": 1770 + }, + { + "epoch": 0.022956337819276876, + "grad_norm": 1.2004944881209316, + "learning_rate": 7.647665720918236e-07, + "loss": 0.4492, + "step": 1780 + }, + { + "epoch": 0.02308530600927281, + "grad_norm": 1.3227777735777666, + "learning_rate": 7.690654285959935e-07, + "loss": 0.4748, + "step": 1790 + }, + { + "epoch": 0.02321427419926875, + "grad_norm": 1.4721374009488157, + "learning_rate": 7.733642851001634e-07, + "loss": 0.4598, + "step": 1800 + }, + { + "epoch": 0.023343242389264687, + "grad_norm": 1.2925997613236466, + "learning_rate": 7.776631416043333e-07, + "loss": 0.4651, + "step": 1810 + }, + { + "epoch": 0.023472210579260626, + "grad_norm": 1.327748193048444, + "learning_rate": 7.819619981085032e-07, + "loss": 0.4618, + "step": 1820 + }, + { + "epoch": 0.02360117876925656, + "grad_norm": 1.3361876189305608, + "learning_rate": 7.862608546126731e-07, + "loss": 0.4764, + "step": 1830 + }, + { + "epoch": 0.0237301469592525, + "grad_norm": 1.227884213820535, + "learning_rate": 7.90559711116843e-07, + "loss": 0.4602, + "step": 1840 + }, + { + "epoch": 0.023859115149248437, + "grad_norm": 1.3306647378045344, + "learning_rate": 7.948585676210128e-07, + "loss": 0.4685, + "step": 1850 + }, + { + "epoch": 0.023988083339244376, + "grad_norm": 1.1975189092589493, + "learning_rate": 7.991574241251827e-07, + "loss": 0.4399, + "step": 1860 + }, + { + "epoch": 0.024117051529240312, + "grad_norm": 1.2713564335245875, + "learning_rate": 8.034562806293526e-07, + "loss": 0.446, + "step": 1870 + }, + { + "epoch": 0.02424601971923625, + "grad_norm": 1.2420312338108308, + "learning_rate": 8.077551371335225e-07, + "loss": 0.4581, + "step": 1880 + }, + { + "epoch": 0.024374987909232187, + "grad_norm": 1.1777597771020856, + "learning_rate": 8.120539936376925e-07, + "loss": 0.4326, + "step": 1890 + }, + { + "epoch": 0.024503956099228126, + "grad_norm": 1.246315558014805, + "learning_rate": 8.163528501418623e-07, + "loss": 0.4718, + "step": 1900 + }, + { + "epoch": 0.024632924289224062, + "grad_norm": 1.3127639232678456, + "learning_rate": 8.206517066460322e-07, + "loss": 0.4365, + "step": 1910 + }, + { + "epoch": 0.02476189247922, + "grad_norm": 1.4027371785025662, + "learning_rate": 8.249505631502021e-07, + "loss": 0.4641, + "step": 1920 + }, + { + "epoch": 0.024890860669215937, + "grad_norm": 1.3814714652507556, + "learning_rate": 8.29249419654372e-07, + "loss": 0.4459, + "step": 1930 + }, + { + "epoch": 0.025019828859211876, + "grad_norm": 1.2554124853452866, + "learning_rate": 8.335482761585418e-07, + "loss": 0.4446, + "step": 1940 + }, + { + "epoch": 0.025148797049207812, + "grad_norm": 1.265531806375767, + "learning_rate": 8.378471326627117e-07, + "loss": 0.4602, + "step": 1950 + }, + { + "epoch": 0.02527776523920375, + "grad_norm": 1.3261041762609505, + "learning_rate": 8.421459891668816e-07, + "loss": 0.4626, + "step": 1960 + }, + { + "epoch": 0.025406733429199687, + "grad_norm": 1.342956786459469, + "learning_rate": 8.464448456710516e-07, + "loss": 0.4779, + "step": 1970 + }, + { + "epoch": 0.025535701619195626, + "grad_norm": 1.2879146674543642, + "learning_rate": 8.507437021752215e-07, + "loss": 0.4518, + "step": 1980 + }, + { + "epoch": 0.025664669809191562, + "grad_norm": 1.3629187243289758, + "learning_rate": 8.550425586793913e-07, + "loss": 0.4544, + "step": 1990 + }, + { + "epoch": 0.0257936379991875, + "grad_norm": 1.3474246631100089, + "learning_rate": 8.593414151835612e-07, + "loss": 0.4512, + "step": 2000 + }, + { + "epoch": 0.025922606189183437, + "grad_norm": 1.196994061909337, + "learning_rate": 8.636402716877311e-07, + "loss": 0.4511, + "step": 2010 + }, + { + "epoch": 0.026051574379179376, + "grad_norm": 1.3068500800858196, + "learning_rate": 8.67939128191901e-07, + "loss": 0.4546, + "step": 2020 + }, + { + "epoch": 0.026180542569175312, + "grad_norm": 1.2746018932318226, + "learning_rate": 8.722379846960708e-07, + "loss": 0.4414, + "step": 2030 + }, + { + "epoch": 0.02630951075917125, + "grad_norm": 1.2221640339169932, + "learning_rate": 8.765368412002407e-07, + "loss": 0.4565, + "step": 2040 + }, + { + "epoch": 0.026438478949167187, + "grad_norm": 1.2532043184510593, + "learning_rate": 8.808356977044107e-07, + "loss": 0.4435, + "step": 2050 + }, + { + "epoch": 0.026567447139163126, + "grad_norm": 1.1869725877345125, + "learning_rate": 8.851345542085806e-07, + "loss": 0.4322, + "step": 2060 + }, + { + "epoch": 0.026696415329159062, + "grad_norm": 1.3062902479734209, + "learning_rate": 8.894334107127505e-07, + "loss": 0.4374, + "step": 2070 + }, + { + "epoch": 0.026825383519155, + "grad_norm": 1.239056011103741, + "learning_rate": 8.937322672169203e-07, + "loss": 0.4467, + "step": 2080 + }, + { + "epoch": 0.026954351709150937, + "grad_norm": 1.1837923732338074, + "learning_rate": 8.980311237210902e-07, + "loss": 0.4379, + "step": 2090 + }, + { + "epoch": 0.027083319899146877, + "grad_norm": 1.1563448086534778, + "learning_rate": 9.023299802252601e-07, + "loss": 0.4436, + "step": 2100 + }, + { + "epoch": 0.027212288089142812, + "grad_norm": 1.3114470222584935, + "learning_rate": 9.0662883672943e-07, + "loss": 0.4608, + "step": 2110 + }, + { + "epoch": 0.02734125627913875, + "grad_norm": 1.3189387839826368, + "learning_rate": 9.109276932335998e-07, + "loss": 0.4577, + "step": 2120 + }, + { + "epoch": 0.027470224469134687, + "grad_norm": 1.2684392601863208, + "learning_rate": 9.152265497377698e-07, + "loss": 0.4386, + "step": 2130 + }, + { + "epoch": 0.027599192659130627, + "grad_norm": 1.2806178845570526, + "learning_rate": 9.195254062419397e-07, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.027728160849126562, + "grad_norm": 1.2063358578707373, + "learning_rate": 9.238242627461096e-07, + "loss": 0.4479, + "step": 2150 + }, + { + "epoch": 0.0278571290391225, + "grad_norm": 1.4121916759022701, + "learning_rate": 9.281231192502795e-07, + "loss": 0.4271, + "step": 2160 + }, + { + "epoch": 0.027986097229118437, + "grad_norm": 1.4632427719167822, + "learning_rate": 9.324219757544493e-07, + "loss": 0.4469, + "step": 2170 + }, + { + "epoch": 0.028115065419114377, + "grad_norm": 1.3574820963829033, + "learning_rate": 9.367208322586192e-07, + "loss": 0.4538, + "step": 2180 + }, + { + "epoch": 0.028244033609110313, + "grad_norm": 1.2708586945292812, + "learning_rate": 9.410196887627891e-07, + "loss": 0.4519, + "step": 2190 + }, + { + "epoch": 0.028373001799106252, + "grad_norm": 1.2721644373474865, + "learning_rate": 9.45318545266959e-07, + "loss": 0.4664, + "step": 2200 + }, + { + "epoch": 0.028501969989102188, + "grad_norm": 1.2173328762207825, + "learning_rate": 9.496174017711289e-07, + "loss": 0.45, + "step": 2210 + }, + { + "epoch": 0.028630938179098127, + "grad_norm": 1.307125609637025, + "learning_rate": 9.539162582752988e-07, + "loss": 0.4593, + "step": 2220 + }, + { + "epoch": 0.028759906369094063, + "grad_norm": 1.2324241783314855, + "learning_rate": 9.582151147794687e-07, + "loss": 0.4391, + "step": 2230 + }, + { + "epoch": 0.028888874559090002, + "grad_norm": 1.1897183779457963, + "learning_rate": 9.625139712836386e-07, + "loss": 0.4296, + "step": 2240 + }, + { + "epoch": 0.029017842749085938, + "grad_norm": 1.2734948810906963, + "learning_rate": 9.668128277878085e-07, + "loss": 0.4319, + "step": 2250 + }, + { + "epoch": 0.029146810939081877, + "grad_norm": 1.1773173877292087, + "learning_rate": 9.711116842919783e-07, + "loss": 0.431, + "step": 2260 + }, + { + "epoch": 0.029275779129077813, + "grad_norm": 1.3758949699246756, + "learning_rate": 9.754105407961482e-07, + "loss": 0.4502, + "step": 2270 + }, + { + "epoch": 0.029404747319073752, + "grad_norm": 1.3010704995110685, + "learning_rate": 9.79709397300318e-07, + "loss": 0.4409, + "step": 2280 + }, + { + "epoch": 0.029533715509069688, + "grad_norm": 1.3495626647833958, + "learning_rate": 9.84008253804488e-07, + "loss": 0.4364, + "step": 2290 + }, + { + "epoch": 0.029662683699065627, + "grad_norm": 1.2634261417803265, + "learning_rate": 9.883071103086578e-07, + "loss": 0.4356, + "step": 2300 + }, + { + "epoch": 0.029791651889061563, + "grad_norm": 1.4385036489834209, + "learning_rate": 9.926059668128277e-07, + "loss": 0.4337, + "step": 2310 + }, + { + "epoch": 0.029920620079057502, + "grad_norm": 1.2542405529190963, + "learning_rate": 9.969048233169978e-07, + "loss": 0.4439, + "step": 2320 + }, + { + "epoch": 0.030049588269053438, + "grad_norm": 1.4628003266002014, + "learning_rate": 1.0012036798211677e-06, + "loss": 0.4537, + "step": 2330 + }, + { + "epoch": 0.030178556459049377, + "grad_norm": 1.2615073062344822, + "learning_rate": 1.0055025363253376e-06, + "loss": 0.4505, + "step": 2340 + }, + { + "epoch": 0.030307524649045313, + "grad_norm": 1.3505231808670588, + "learning_rate": 1.0098013928295074e-06, + "loss": 0.4447, + "step": 2350 + }, + { + "epoch": 0.030436492839041252, + "grad_norm": 1.267824277902632, + "learning_rate": 1.0141002493336773e-06, + "loss": 0.46, + "step": 2360 + }, + { + "epoch": 0.030565461029037188, + "grad_norm": 1.2520128889499254, + "learning_rate": 1.0183991058378472e-06, + "loss": 0.4265, + "step": 2370 + }, + { + "epoch": 0.030694429219033127, + "grad_norm": 1.3688515942323993, + "learning_rate": 1.022697962342017e-06, + "loss": 0.4464, + "step": 2380 + }, + { + "epoch": 0.030823397409029063, + "grad_norm": 1.2727278433077762, + "learning_rate": 1.026996818846187e-06, + "loss": 0.4352, + "step": 2390 + }, + { + "epoch": 0.030952365599025, + "grad_norm": 1.397877897733501, + "learning_rate": 1.0312956753503568e-06, + "loss": 0.4496, + "step": 2400 + }, + { + "epoch": 0.031081333789020938, + "grad_norm": 1.4149326935501825, + "learning_rate": 1.0355945318545267e-06, + "loss": 0.4456, + "step": 2410 + }, + { + "epoch": 0.031210301979016874, + "grad_norm": 1.203879935114693, + "learning_rate": 1.0398933883586966e-06, + "loss": 0.4491, + "step": 2420 + }, + { + "epoch": 0.03133927016901281, + "grad_norm": 1.3767807155008227, + "learning_rate": 1.0441922448628665e-06, + "loss": 0.4297, + "step": 2430 + }, + { + "epoch": 0.03146823835900875, + "grad_norm": 1.3528397532041603, + "learning_rate": 1.0484911013670365e-06, + "loss": 0.4224, + "step": 2440 + }, + { + "epoch": 0.031597206549004685, + "grad_norm": 1.3138748726145602, + "learning_rate": 1.0527899578712064e-06, + "loss": 0.4261, + "step": 2450 + }, + { + "epoch": 0.03172617473900063, + "grad_norm": 1.2479152064971584, + "learning_rate": 1.0570888143753763e-06, + "loss": 0.4282, + "step": 2460 + }, + { + "epoch": 0.03185514292899656, + "grad_norm": 1.2243344526251352, + "learning_rate": 1.0613876708795462e-06, + "loss": 0.4415, + "step": 2470 + }, + { + "epoch": 0.0319841111189925, + "grad_norm": 1.3019344444488339, + "learning_rate": 1.065686527383716e-06, + "loss": 0.4249, + "step": 2480 + }, + { + "epoch": 0.032113079308988435, + "grad_norm": 1.2884865813315187, + "learning_rate": 1.069985383887886e-06, + "loss": 0.4318, + "step": 2490 + }, + { + "epoch": 0.03224204749898438, + "grad_norm": 1.292140181466742, + "learning_rate": 1.0742842403920558e-06, + "loss": 0.4196, + "step": 2500 + }, + { + "epoch": 0.03237101568898031, + "grad_norm": 1.198302615584322, + "learning_rate": 1.0785830968962257e-06, + "loss": 0.449, + "step": 2510 + }, + { + "epoch": 0.03249998387897625, + "grad_norm": 1.2873749642934251, + "learning_rate": 1.0828819534003956e-06, + "loss": 0.4311, + "step": 2520 + }, + { + "epoch": 0.032628952068972185, + "grad_norm": 1.3079808589891906, + "learning_rate": 1.0871808099045654e-06, + "loss": 0.4352, + "step": 2530 + }, + { + "epoch": 0.03275792025896813, + "grad_norm": 1.2620520137329123, + "learning_rate": 1.0914796664087355e-06, + "loss": 0.4371, + "step": 2540 + }, + { + "epoch": 0.03288688844896406, + "grad_norm": 1.5000081249510855, + "learning_rate": 1.0957785229129054e-06, + "loss": 0.4504, + "step": 2550 + }, + { + "epoch": 0.03301585663896, + "grad_norm": 1.2774037524569934, + "learning_rate": 1.1000773794170753e-06, + "loss": 0.4314, + "step": 2560 + }, + { + "epoch": 0.033144824828955935, + "grad_norm": 1.3497171547936961, + "learning_rate": 1.1043762359212452e-06, + "loss": 0.4425, + "step": 2570 + }, + { + "epoch": 0.03327379301895188, + "grad_norm": 1.404768386431086, + "learning_rate": 1.108675092425415e-06, + "loss": 0.439, + "step": 2580 + }, + { + "epoch": 0.033402761208947813, + "grad_norm": 1.265591691098657, + "learning_rate": 1.112973948929585e-06, + "loss": 0.4326, + "step": 2590 + }, + { + "epoch": 0.03353172939894375, + "grad_norm": 1.3036127625653942, + "learning_rate": 1.1172728054337548e-06, + "loss": 0.4309, + "step": 2600 + }, + { + "epoch": 0.033660697588939685, + "grad_norm": 1.3187018264067318, + "learning_rate": 1.1215716619379247e-06, + "loss": 0.4415, + "step": 2610 + }, + { + "epoch": 0.03378966577893563, + "grad_norm": 1.4021752724719732, + "learning_rate": 1.1258705184420945e-06, + "loss": 0.4472, + "step": 2620 + }, + { + "epoch": 0.033918633968931564, + "grad_norm": 1.1074415000330249, + "learning_rate": 1.1301693749462644e-06, + "loss": 0.431, + "step": 2630 + }, + { + "epoch": 0.0340476021589275, + "grad_norm": 1.3466425917784692, + "learning_rate": 1.1344682314504343e-06, + "loss": 0.435, + "step": 2640 + }, + { + "epoch": 0.034176570348923435, + "grad_norm": 1.3361833462419794, + "learning_rate": 1.1387670879546042e-06, + "loss": 0.4341, + "step": 2650 + }, + { + "epoch": 0.03430553853891938, + "grad_norm": 1.3170165754530057, + "learning_rate": 1.143065944458774e-06, + "loss": 0.4314, + "step": 2660 + }, + { + "epoch": 0.034434506728915314, + "grad_norm": 1.3344696149380086, + "learning_rate": 1.147364800962944e-06, + "loss": 0.4164, + "step": 2670 + }, + { + "epoch": 0.03456347491891125, + "grad_norm": 1.4270016074908678, + "learning_rate": 1.1516636574671138e-06, + "loss": 0.4342, + "step": 2680 + }, + { + "epoch": 0.034692443108907185, + "grad_norm": 1.3188257974813613, + "learning_rate": 1.1559625139712839e-06, + "loss": 0.4351, + "step": 2690 + }, + { + "epoch": 0.03482141129890313, + "grad_norm": 1.3916119196806613, + "learning_rate": 1.1602613704754538e-06, + "loss": 0.4398, + "step": 2700 + }, + { + "epoch": 0.034950379488899064, + "grad_norm": 1.340775766052423, + "learning_rate": 1.1645602269796236e-06, + "loss": 0.4266, + "step": 2710 + }, + { + "epoch": 0.035079347678895, + "grad_norm": 1.4968042654404279, + "learning_rate": 1.1688590834837935e-06, + "loss": 0.4404, + "step": 2720 + }, + { + "epoch": 0.035208315868890935, + "grad_norm": 1.40600203429942, + "learning_rate": 1.1731579399879634e-06, + "loss": 0.4363, + "step": 2730 + }, + { + "epoch": 0.03533728405888688, + "grad_norm": 1.259793878770834, + "learning_rate": 1.1774567964921333e-06, + "loss": 0.425, + "step": 2740 + }, + { + "epoch": 0.035466252248882814, + "grad_norm": 1.283338984912107, + "learning_rate": 1.1817556529963031e-06, + "loss": 0.4389, + "step": 2750 + }, + { + "epoch": 0.03559522043887875, + "grad_norm": 1.5056915930293808, + "learning_rate": 1.186054509500473e-06, + "loss": 0.4483, + "step": 2760 + }, + { + "epoch": 0.035724188628874685, + "grad_norm": 1.3668106144856433, + "learning_rate": 1.190353366004643e-06, + "loss": 0.424, + "step": 2770 + }, + { + "epoch": 0.03585315681887063, + "grad_norm": 1.3928154658877443, + "learning_rate": 1.1946522225088128e-06, + "loss": 0.4302, + "step": 2780 + }, + { + "epoch": 0.035982125008866564, + "grad_norm": 1.4169698482616084, + "learning_rate": 1.1989510790129827e-06, + "loss": 0.4199, + "step": 2790 + }, + { + "epoch": 0.0361110931988625, + "grad_norm": 1.1814935203684138, + "learning_rate": 1.2032499355171525e-06, + "loss": 0.4275, + "step": 2800 + }, + { + "epoch": 0.036240061388858436, + "grad_norm": 1.3073308319700812, + "learning_rate": 1.2075487920213224e-06, + "loss": 0.4452, + "step": 2810 + }, + { + "epoch": 0.03636902957885438, + "grad_norm": 1.3376783656641436, + "learning_rate": 1.2118476485254923e-06, + "loss": 0.4517, + "step": 2820 + }, + { + "epoch": 0.036497997768850314, + "grad_norm": 1.1828285369879679, + "learning_rate": 1.2161465050296622e-06, + "loss": 0.422, + "step": 2830 + }, + { + "epoch": 0.03662696595884625, + "grad_norm": 1.2105185052466658, + "learning_rate": 1.220445361533832e-06, + "loss": 0.421, + "step": 2840 + }, + { + "epoch": 0.036755934148842186, + "grad_norm": 1.2070283023997004, + "learning_rate": 1.2247442180380021e-06, + "loss": 0.4282, + "step": 2850 + }, + { + "epoch": 0.03688490233883813, + "grad_norm": 1.2947758688504019, + "learning_rate": 1.229043074542172e-06, + "loss": 0.4406, + "step": 2860 + }, + { + "epoch": 0.037013870528834064, + "grad_norm": 1.2845375653495057, + "learning_rate": 1.2333419310463419e-06, + "loss": 0.4238, + "step": 2870 + }, + { + "epoch": 0.03714283871883, + "grad_norm": 1.459778743241837, + "learning_rate": 1.2376407875505118e-06, + "loss": 0.4247, + "step": 2880 + }, + { + "epoch": 0.037271806908825936, + "grad_norm": 1.439639603648901, + "learning_rate": 1.2419396440546816e-06, + "loss": 0.4202, + "step": 2890 + }, + { + "epoch": 0.03740077509882188, + "grad_norm": 1.2476030775296196, + "learning_rate": 1.2462385005588515e-06, + "loss": 0.4126, + "step": 2900 + }, + { + "epoch": 0.037529743288817814, + "grad_norm": 1.3333265834774837, + "learning_rate": 1.2505373570630214e-06, + "loss": 0.4274, + "step": 2910 + }, + { + "epoch": 0.03765871147881375, + "grad_norm": 1.3580757683528306, + "learning_rate": 1.2548362135671913e-06, + "loss": 0.4192, + "step": 2920 + }, + { + "epoch": 0.037787679668809686, + "grad_norm": 1.2230813749350666, + "learning_rate": 1.2591350700713611e-06, + "loss": 0.4266, + "step": 2930 + }, + { + "epoch": 0.03791664785880563, + "grad_norm": 1.370718898190844, + "learning_rate": 1.263433926575531e-06, + "loss": 0.4376, + "step": 2940 + }, + { + "epoch": 0.038045616048801564, + "grad_norm": 1.400622710355847, + "learning_rate": 1.267732783079701e-06, + "loss": 0.4225, + "step": 2950 + }, + { + "epoch": 0.0381745842387975, + "grad_norm": 1.345640144625144, + "learning_rate": 1.2720316395838708e-06, + "loss": 0.4194, + "step": 2960 + }, + { + "epoch": 0.038303552428793436, + "grad_norm": 1.3033028864488245, + "learning_rate": 1.2763304960880407e-06, + "loss": 0.4377, + "step": 2970 + }, + { + "epoch": 0.03843252061878938, + "grad_norm": 1.5073849961201715, + "learning_rate": 1.2806293525922105e-06, + "loss": 0.4131, + "step": 2980 + }, + { + "epoch": 0.038561488808785314, + "grad_norm": 1.3265622351053146, + "learning_rate": 1.2849282090963804e-06, + "loss": 0.4183, + "step": 2990 + }, + { + "epoch": 0.03869045699878125, + "grad_norm": 1.490167908340459, + "learning_rate": 1.2892270656005503e-06, + "loss": 0.4279, + "step": 3000 + }, + { + "epoch": 0.038819425188777186, + "grad_norm": 1.293179971825631, + "learning_rate": 1.2935259221047204e-06, + "loss": 0.4355, + "step": 3010 + }, + { + "epoch": 0.03894839337877313, + "grad_norm": 1.3043245012025797, + "learning_rate": 1.2978247786088902e-06, + "loss": 0.4338, + "step": 3020 + }, + { + "epoch": 0.039077361568769065, + "grad_norm": 1.404447548080887, + "learning_rate": 1.3021236351130601e-06, + "loss": 0.4127, + "step": 3030 + }, + { + "epoch": 0.039206329758765, + "grad_norm": 1.3005018553316732, + "learning_rate": 1.30642249161723e-06, + "loss": 0.4136, + "step": 3040 + }, + { + "epoch": 0.039335297948760936, + "grad_norm": 1.1815888585293441, + "learning_rate": 1.3107213481213999e-06, + "loss": 0.4092, + "step": 3050 + }, + { + "epoch": 0.03946426613875688, + "grad_norm": 1.4120526347385818, + "learning_rate": 1.3150202046255698e-06, + "loss": 0.4278, + "step": 3060 + }, + { + "epoch": 0.039593234328752815, + "grad_norm": 1.2305070842470642, + "learning_rate": 1.3193190611297396e-06, + "loss": 0.4181, + "step": 3070 + }, + { + "epoch": 0.03972220251874875, + "grad_norm": 1.2357310766923726, + "learning_rate": 1.3236179176339095e-06, + "loss": 0.4276, + "step": 3080 + }, + { + "epoch": 0.039851170708744686, + "grad_norm": 1.3942625713927972, + "learning_rate": 1.3279167741380794e-06, + "loss": 0.4257, + "step": 3090 + }, + { + "epoch": 0.03998013889874063, + "grad_norm": 1.2285795252800311, + "learning_rate": 1.3322156306422493e-06, + "loss": 0.4039, + "step": 3100 + }, + { + "epoch": 0.040109107088736565, + "grad_norm": 1.2255831623933802, + "learning_rate": 1.3365144871464191e-06, + "loss": 0.4231, + "step": 3110 + }, + { + "epoch": 0.0402380752787325, + "grad_norm": 1.221929135308376, + "learning_rate": 1.340813343650589e-06, + "loss": 0.429, + "step": 3120 + }, + { + "epoch": 0.040367043468728436, + "grad_norm": 1.3134311560714993, + "learning_rate": 1.3451122001547589e-06, + "loss": 0.4308, + "step": 3130 + }, + { + "epoch": 0.04049601165872438, + "grad_norm": 1.2749387685522013, + "learning_rate": 1.3494110566589288e-06, + "loss": 0.4217, + "step": 3140 + }, + { + "epoch": 0.040624979848720315, + "grad_norm": 1.4145212979592694, + "learning_rate": 1.3537099131630986e-06, + "loss": 0.4383, + "step": 3150 + }, + { + "epoch": 0.04075394803871625, + "grad_norm": 1.3682057549048858, + "learning_rate": 1.3580087696672687e-06, + "loss": 0.4284, + "step": 3160 + }, + { + "epoch": 0.040882916228712186, + "grad_norm": 1.3227367449276213, + "learning_rate": 1.3623076261714386e-06, + "loss": 0.4091, + "step": 3170 + }, + { + "epoch": 0.04101188441870812, + "grad_norm": 1.3604432822234156, + "learning_rate": 1.3666064826756085e-06, + "loss": 0.4154, + "step": 3180 + }, + { + "epoch": 0.041140852608704065, + "grad_norm": 1.3358031717953363, + "learning_rate": 1.3709053391797784e-06, + "loss": 0.439, + "step": 3190 + }, + { + "epoch": 0.0412698207987, + "grad_norm": 1.2937862803036806, + "learning_rate": 1.3752041956839482e-06, + "loss": 0.4279, + "step": 3200 + }, + { + "epoch": 0.041398788988695936, + "grad_norm": 1.3476087318247674, + "learning_rate": 1.3795030521881181e-06, + "loss": 0.4323, + "step": 3210 + }, + { + "epoch": 0.04152775717869187, + "grad_norm": 1.4035391790406568, + "learning_rate": 1.383801908692288e-06, + "loss": 0.4146, + "step": 3220 + }, + { + "epoch": 0.041656725368687815, + "grad_norm": 1.3417637965202676, + "learning_rate": 1.3881007651964579e-06, + "loss": 0.4126, + "step": 3230 + }, + { + "epoch": 0.04178569355868375, + "grad_norm": 1.1897660659086977, + "learning_rate": 1.3923996217006278e-06, + "loss": 0.4287, + "step": 3240 + }, + { + "epoch": 0.04191466174867969, + "grad_norm": 1.2964911535382733, + "learning_rate": 1.3966984782047976e-06, + "loss": 0.428, + "step": 3250 + }, + { + "epoch": 0.04204362993867562, + "grad_norm": 1.2798293920065882, + "learning_rate": 1.4009973347089675e-06, + "loss": 0.4221, + "step": 3260 + }, + { + "epoch": 0.042172598128671565, + "grad_norm": 1.3082558147244205, + "learning_rate": 1.4052961912131374e-06, + "loss": 0.4206, + "step": 3270 + }, + { + "epoch": 0.0423015663186675, + "grad_norm": 1.2939291301525473, + "learning_rate": 1.4095950477173073e-06, + "loss": 0.4155, + "step": 3280 + }, + { + "epoch": 0.04243053450866344, + "grad_norm": 1.3098751868573442, + "learning_rate": 1.4138939042214771e-06, + "loss": 0.4285, + "step": 3290 + }, + { + "epoch": 0.04255950269865937, + "grad_norm": 1.3122819450715242, + "learning_rate": 1.418192760725647e-06, + "loss": 0.4233, + "step": 3300 + }, + { + "epoch": 0.042688470888655315, + "grad_norm": 1.2222388870563106, + "learning_rate": 1.4224916172298169e-06, + "loss": 0.4071, + "step": 3310 + }, + { + "epoch": 0.04281743907865125, + "grad_norm": 1.302573451022893, + "learning_rate": 1.426790473733987e-06, + "loss": 0.4363, + "step": 3320 + }, + { + "epoch": 0.04294640726864719, + "grad_norm": 1.3190603814856283, + "learning_rate": 1.4310893302381569e-06, + "loss": 0.4178, + "step": 3330 + }, + { + "epoch": 0.04307537545864312, + "grad_norm": 1.329065350642886, + "learning_rate": 1.4353881867423267e-06, + "loss": 0.4138, + "step": 3340 + }, + { + "epoch": 0.043204343648639065, + "grad_norm": 1.3008971971881216, + "learning_rate": 1.4396870432464966e-06, + "loss": 0.4076, + "step": 3350 + }, + { + "epoch": 0.043333311838635, + "grad_norm": 1.3488393737343274, + "learning_rate": 1.4439858997506665e-06, + "loss": 0.4227, + "step": 3360 + }, + { + "epoch": 0.04346228002863094, + "grad_norm": 1.3841821791564877, + "learning_rate": 1.4482847562548364e-06, + "loss": 0.427, + "step": 3370 + }, + { + "epoch": 0.04359124821862687, + "grad_norm": 1.3468896044314824, + "learning_rate": 1.4525836127590062e-06, + "loss": 0.4198, + "step": 3380 + }, + { + "epoch": 0.043720216408622815, + "grad_norm": 1.1681682482552647, + "learning_rate": 1.4568824692631761e-06, + "loss": 0.4277, + "step": 3390 + }, + { + "epoch": 0.04384918459861875, + "grad_norm": 1.454924853657252, + "learning_rate": 1.461181325767346e-06, + "loss": 0.4222, + "step": 3400 + }, + { + "epoch": 0.04397815278861469, + "grad_norm": 1.2827134255468895, + "learning_rate": 1.4654801822715159e-06, + "loss": 0.4304, + "step": 3410 + }, + { + "epoch": 0.04410712097861062, + "grad_norm": 1.3550257698961308, + "learning_rate": 1.4697790387756857e-06, + "loss": 0.4178, + "step": 3420 + }, + { + "epoch": 0.044236089168606565, + "grad_norm": 1.5238785943515734, + "learning_rate": 1.4740778952798556e-06, + "loss": 0.4342, + "step": 3430 + }, + { + "epoch": 0.0443650573586025, + "grad_norm": 1.344719834429467, + "learning_rate": 1.4783767517840255e-06, + "loss": 0.4196, + "step": 3440 + }, + { + "epoch": 0.04449402554859844, + "grad_norm": 1.235742772437303, + "learning_rate": 1.4826756082881954e-06, + "loss": 0.4224, + "step": 3450 + }, + { + "epoch": 0.04462299373859437, + "grad_norm": 1.2628698513638883, + "learning_rate": 1.4869744647923653e-06, + "loss": 0.4129, + "step": 3460 + }, + { + "epoch": 0.044751961928590316, + "grad_norm": 1.3934315366272805, + "learning_rate": 1.4912733212965351e-06, + "loss": 0.4094, + "step": 3470 + }, + { + "epoch": 0.04488093011858625, + "grad_norm": 1.4338168540855696, + "learning_rate": 1.4955721778007052e-06, + "loss": 0.4092, + "step": 3480 + }, + { + "epoch": 0.04500989830858219, + "grad_norm": 1.5593037587138896, + "learning_rate": 1.499871034304875e-06, + "loss": 0.423, + "step": 3490 + }, + { + "epoch": 0.04513886649857812, + "grad_norm": 1.234808570378308, + "learning_rate": 1.504169890809045e-06, + "loss": 0.4108, + "step": 3500 + }, + { + "epoch": 0.045267834688574066, + "grad_norm": 1.3438413009366215, + "learning_rate": 1.5084687473132149e-06, + "loss": 0.4104, + "step": 3510 + }, + { + "epoch": 0.04539680287857, + "grad_norm": 1.4670326827322269, + "learning_rate": 1.5127676038173847e-06, + "loss": 0.4036, + "step": 3520 + }, + { + "epoch": 0.04552577106856594, + "grad_norm": 1.3680940153697392, + "learning_rate": 1.5170664603215546e-06, + "loss": 0.4215, + "step": 3530 + }, + { + "epoch": 0.04565473925856187, + "grad_norm": 1.3472386466428874, + "learning_rate": 1.5213653168257245e-06, + "loss": 0.4181, + "step": 3540 + }, + { + "epoch": 0.045783707448557816, + "grad_norm": 1.3307688023003204, + "learning_rate": 1.5256641733298944e-06, + "loss": 0.4071, + "step": 3550 + }, + { + "epoch": 0.04591267563855375, + "grad_norm": 1.2731819855578521, + "learning_rate": 1.5299630298340642e-06, + "loss": 0.4153, + "step": 3560 + }, + { + "epoch": 0.04604164382854969, + "grad_norm": 1.4121010713595075, + "learning_rate": 1.5342618863382341e-06, + "loss": 0.4075, + "step": 3570 + }, + { + "epoch": 0.04617061201854562, + "grad_norm": 1.3848747452289267, + "learning_rate": 1.538560742842404e-06, + "loss": 0.4115, + "step": 3580 + }, + { + "epoch": 0.046299580208541566, + "grad_norm": 1.4555058319122316, + "learning_rate": 1.5428595993465739e-06, + "loss": 0.4173, + "step": 3590 + }, + { + "epoch": 0.0464285483985375, + "grad_norm": 1.3737559860724147, + "learning_rate": 1.5471584558507437e-06, + "loss": 0.4256, + "step": 3600 + }, + { + "epoch": 0.04655751658853344, + "grad_norm": 1.2437465260793705, + "learning_rate": 1.5514573123549136e-06, + "loss": 0.4188, + "step": 3610 + }, + { + "epoch": 0.04668648477852937, + "grad_norm": 1.175014984428312, + "learning_rate": 1.5557561688590835e-06, + "loss": 0.4255, + "step": 3620 + }, + { + "epoch": 0.046815452968525316, + "grad_norm": 1.2662693098509208, + "learning_rate": 1.5600550253632536e-06, + "loss": 0.4099, + "step": 3630 + }, + { + "epoch": 0.04694442115852125, + "grad_norm": 1.3242654571750285, + "learning_rate": 1.5643538818674235e-06, + "loss": 0.4181, + "step": 3640 + }, + { + "epoch": 0.04707338934851719, + "grad_norm": 1.2199850413215325, + "learning_rate": 1.5686527383715933e-06, + "loss": 0.4027, + "step": 3650 + }, + { + "epoch": 0.04720235753851312, + "grad_norm": 1.2635509270273413, + "learning_rate": 1.5729515948757632e-06, + "loss": 0.4063, + "step": 3660 + }, + { + "epoch": 0.047331325728509066, + "grad_norm": 1.2679680136254816, + "learning_rate": 1.577250451379933e-06, + "loss": 0.4187, + "step": 3670 + }, + { + "epoch": 0.047460293918505, + "grad_norm": 1.2829164922922818, + "learning_rate": 1.581549307884103e-06, + "loss": 0.4084, + "step": 3680 + }, + { + "epoch": 0.04758926210850094, + "grad_norm": 1.2310936329745357, + "learning_rate": 1.5858481643882728e-06, + "loss": 0.419, + "step": 3690 + }, + { + "epoch": 0.04771823029849687, + "grad_norm": 1.32344653297051, + "learning_rate": 1.5901470208924427e-06, + "loss": 0.4155, + "step": 3700 + }, + { + "epoch": 0.047847198488492816, + "grad_norm": 1.3439893022570513, + "learning_rate": 1.5944458773966126e-06, + "loss": 0.4122, + "step": 3710 + }, + { + "epoch": 0.04797616667848875, + "grad_norm": 1.3514581029034867, + "learning_rate": 1.5987447339007825e-06, + "loss": 0.399, + "step": 3720 + }, + { + "epoch": 0.04810513486848469, + "grad_norm": 1.3344307468351269, + "learning_rate": 1.6030435904049524e-06, + "loss": 0.4068, + "step": 3730 + }, + { + "epoch": 0.048234103058480624, + "grad_norm": 1.27432085135716, + "learning_rate": 1.6073424469091222e-06, + "loss": 0.4095, + "step": 3740 + }, + { + "epoch": 0.048363071248476566, + "grad_norm": 1.3097145583629715, + "learning_rate": 1.6116413034132921e-06, + "loss": 0.3982, + "step": 3750 + }, + { + "epoch": 0.0484920394384725, + "grad_norm": 1.3989594018129505, + "learning_rate": 1.615940159917462e-06, + "loss": 0.4145, + "step": 3760 + }, + { + "epoch": 0.04862100762846844, + "grad_norm": 1.3693300415282776, + "learning_rate": 1.6202390164216319e-06, + "loss": 0.4071, + "step": 3770 + }, + { + "epoch": 0.048749975818464374, + "grad_norm": 1.2950041435102508, + "learning_rate": 1.6245378729258017e-06, + "loss": 0.4181, + "step": 3780 + }, + { + "epoch": 0.048878944008460316, + "grad_norm": 1.3491036530317393, + "learning_rate": 1.6288367294299718e-06, + "loss": 0.4204, + "step": 3790 + }, + { + "epoch": 0.04900791219845625, + "grad_norm": 1.2787746216451295, + "learning_rate": 1.6331355859341417e-06, + "loss": 0.4056, + "step": 3800 + }, + { + "epoch": 0.04913688038845219, + "grad_norm": 1.3804530169571883, + "learning_rate": 1.6374344424383116e-06, + "loss": 0.4055, + "step": 3810 + }, + { + "epoch": 0.049265848578448124, + "grad_norm": 1.3427575703406094, + "learning_rate": 1.6417332989424815e-06, + "loss": 0.4027, + "step": 3820 + }, + { + "epoch": 0.049394816768444066, + "grad_norm": 1.2428289868413713, + "learning_rate": 1.6460321554466513e-06, + "loss": 0.4122, + "step": 3830 + }, + { + "epoch": 0.04952378495844, + "grad_norm": 1.3882431169242655, + "learning_rate": 1.6503310119508212e-06, + "loss": 0.4155, + "step": 3840 + }, + { + "epoch": 0.04965275314843594, + "grad_norm": 1.2676179161711656, + "learning_rate": 1.654629868454991e-06, + "loss": 0.4274, + "step": 3850 + }, + { + "epoch": 0.049781721338431874, + "grad_norm": 1.2291437658592792, + "learning_rate": 1.658928724959161e-06, + "loss": 0.416, + "step": 3860 + }, + { + "epoch": 0.049910689528427817, + "grad_norm": 1.263007309728464, + "learning_rate": 1.6632275814633308e-06, + "loss": 0.4184, + "step": 3870 + }, + { + "epoch": 0.05003965771842375, + "grad_norm": 1.2723477518889748, + "learning_rate": 1.6675264379675007e-06, + "loss": 0.3964, + "step": 3880 + }, + { + "epoch": 0.05016862590841969, + "grad_norm": 1.332723794479474, + "learning_rate": 1.6718252944716706e-06, + "loss": 0.3944, + "step": 3890 + }, + { + "epoch": 0.050297594098415624, + "grad_norm": 1.3972726118964875, + "learning_rate": 1.6761241509758405e-06, + "loss": 0.405, + "step": 3900 + }, + { + "epoch": 0.05042656228841157, + "grad_norm": 1.4345717996662035, + "learning_rate": 1.6804230074800104e-06, + "loss": 0.4033, + "step": 3910 + }, + { + "epoch": 0.0505555304784075, + "grad_norm": 1.3507448248863287, + "learning_rate": 1.6847218639841802e-06, + "loss": 0.4267, + "step": 3920 + }, + { + "epoch": 0.05068449866840344, + "grad_norm": 1.3766809220075602, + "learning_rate": 1.6890207204883501e-06, + "loss": 0.401, + "step": 3930 + }, + { + "epoch": 0.050813466858399374, + "grad_norm": 1.2246004423021313, + "learning_rate": 1.69331957699252e-06, + "loss": 0.4182, + "step": 3940 + }, + { + "epoch": 0.05094243504839532, + "grad_norm": 1.39327976667926, + "learning_rate": 1.69761843349669e-06, + "loss": 0.4073, + "step": 3950 + }, + { + "epoch": 0.05107140323839125, + "grad_norm": 1.412740251619544, + "learning_rate": 1.70191729000086e-06, + "loss": 0.4047, + "step": 3960 + }, + { + "epoch": 0.05120037142838719, + "grad_norm": 1.3832691967109936, + "learning_rate": 1.7062161465050298e-06, + "loss": 0.4266, + "step": 3970 + }, + { + "epoch": 0.051329339618383124, + "grad_norm": 1.3502771918204945, + "learning_rate": 1.7105150030091997e-06, + "loss": 0.399, + "step": 3980 + }, + { + "epoch": 0.05145830780837906, + "grad_norm": 1.2682668293506396, + "learning_rate": 1.7148138595133696e-06, + "loss": 0.402, + "step": 3990 + }, + { + "epoch": 0.051587275998375, + "grad_norm": 1.3079916147446269, + "learning_rate": 1.7191127160175395e-06, + "loss": 0.4079, + "step": 4000 + }, + { + "epoch": 0.05171624418837094, + "grad_norm": 1.2593940005421407, + "learning_rate": 1.7234115725217093e-06, + "loss": 0.4031, + "step": 4010 + }, + { + "epoch": 0.051845212378366874, + "grad_norm": 1.29735299856293, + "learning_rate": 1.7277104290258792e-06, + "loss": 0.3906, + "step": 4020 + }, + { + "epoch": 0.05197418056836281, + "grad_norm": 1.382178558170271, + "learning_rate": 1.732009285530049e-06, + "loss": 0.4044, + "step": 4030 + }, + { + "epoch": 0.05210314875835875, + "grad_norm": 1.4244487945059192, + "learning_rate": 1.736308142034219e-06, + "loss": 0.3962, + "step": 4040 + }, + { + "epoch": 0.05223211694835469, + "grad_norm": 1.3227336040393574, + "learning_rate": 1.7406069985383888e-06, + "loss": 0.4154, + "step": 4050 + }, + { + "epoch": 0.052361085138350624, + "grad_norm": 1.2565541747302253, + "learning_rate": 1.7449058550425587e-06, + "loss": 0.3882, + "step": 4060 + }, + { + "epoch": 0.05249005332834656, + "grad_norm": 1.2765692042460706, + "learning_rate": 1.7492047115467286e-06, + "loss": 0.3957, + "step": 4070 + }, + { + "epoch": 0.0526190215183425, + "grad_norm": 1.3094885238512988, + "learning_rate": 1.7535035680508985e-06, + "loss": 0.3967, + "step": 4080 + }, + { + "epoch": 0.05274798970833844, + "grad_norm": 1.4494967012665987, + "learning_rate": 1.7578024245550683e-06, + "loss": 0.3953, + "step": 4090 + }, + { + "epoch": 0.052876957898334374, + "grad_norm": 1.439987445594872, + "learning_rate": 1.7621012810592384e-06, + "loss": 0.4116, + "step": 4100 + }, + { + "epoch": 0.05300592608833031, + "grad_norm": 1.3743127411914924, + "learning_rate": 1.7664001375634083e-06, + "loss": 0.4161, + "step": 4110 + }, + { + "epoch": 0.05313489427832625, + "grad_norm": 1.3581091376154075, + "learning_rate": 1.7706989940675782e-06, + "loss": 0.4052, + "step": 4120 + }, + { + "epoch": 0.05326386246832219, + "grad_norm": 1.369862761764106, + "learning_rate": 1.774997850571748e-06, + "loss": 0.3996, + "step": 4130 + }, + { + "epoch": 0.053392830658318124, + "grad_norm": 1.3017772504218819, + "learning_rate": 1.779296707075918e-06, + "loss": 0.4035, + "step": 4140 + }, + { + "epoch": 0.05352179884831406, + "grad_norm": 1.3878660586984262, + "learning_rate": 1.7835955635800878e-06, + "loss": 0.4033, + "step": 4150 + }, + { + "epoch": 0.05365076703831, + "grad_norm": 1.342165260667782, + "learning_rate": 1.7878944200842577e-06, + "loss": 0.391, + "step": 4160 + }, + { + "epoch": 0.05377973522830594, + "grad_norm": 1.2743353900524492, + "learning_rate": 1.7921932765884276e-06, + "loss": 0.4114, + "step": 4170 + }, + { + "epoch": 0.053908703418301875, + "grad_norm": 1.2731185324392875, + "learning_rate": 1.7964921330925975e-06, + "loss": 0.4113, + "step": 4180 + }, + { + "epoch": 0.05403767160829781, + "grad_norm": 1.3159332515090119, + "learning_rate": 1.8007909895967673e-06, + "loss": 0.4085, + "step": 4190 + }, + { + "epoch": 0.05416663979829375, + "grad_norm": 1.3793751281719648, + "learning_rate": 1.8050898461009372e-06, + "loss": 0.3983, + "step": 4200 + }, + { + "epoch": 0.05429560798828969, + "grad_norm": 1.4451733861522824, + "learning_rate": 1.809388702605107e-06, + "loss": 0.3944, + "step": 4210 + }, + { + "epoch": 0.054424576178285625, + "grad_norm": 1.3507110003889642, + "learning_rate": 1.813687559109277e-06, + "loss": 0.391, + "step": 4220 + }, + { + "epoch": 0.05455354436828156, + "grad_norm": 1.3101192203622696, + "learning_rate": 1.8179864156134468e-06, + "loss": 0.3943, + "step": 4230 + }, + { + "epoch": 0.0546825125582775, + "grad_norm": 1.3094272164916976, + "learning_rate": 1.8222852721176167e-06, + "loss": 0.4091, + "step": 4240 + }, + { + "epoch": 0.05481148074827344, + "grad_norm": 1.3592825291001578, + "learning_rate": 1.8265841286217866e-06, + "loss": 0.3825, + "step": 4250 + }, + { + "epoch": 0.054940448938269375, + "grad_norm": 1.3372183875963768, + "learning_rate": 1.8308829851259567e-06, + "loss": 0.393, + "step": 4260 + }, + { + "epoch": 0.05506941712826531, + "grad_norm": 1.3275839000149912, + "learning_rate": 1.8351818416301266e-06, + "loss": 0.3978, + "step": 4270 + }, + { + "epoch": 0.05519838531826125, + "grad_norm": 1.3568920058152751, + "learning_rate": 1.8394806981342964e-06, + "loss": 0.395, + "step": 4280 + }, + { + "epoch": 0.05532735350825719, + "grad_norm": 1.3201366816241502, + "learning_rate": 1.8437795546384663e-06, + "loss": 0.3865, + "step": 4290 + }, + { + "epoch": 0.055456321698253125, + "grad_norm": 1.2959873845695935, + "learning_rate": 1.8480784111426362e-06, + "loss": 0.4062, + "step": 4300 + }, + { + "epoch": 0.05558528988824906, + "grad_norm": 1.368814095306724, + "learning_rate": 1.852377267646806e-06, + "loss": 0.4009, + "step": 4310 + }, + { + "epoch": 0.055714258078245, + "grad_norm": 1.3136762724306492, + "learning_rate": 1.856676124150976e-06, + "loss": 0.4187, + "step": 4320 + }, + { + "epoch": 0.05584322626824094, + "grad_norm": 1.3725572308649456, + "learning_rate": 1.8609749806551458e-06, + "loss": 0.4027, + "step": 4330 + }, + { + "epoch": 0.055972194458236875, + "grad_norm": 1.4291335806815055, + "learning_rate": 1.8652738371593157e-06, + "loss": 0.3984, + "step": 4340 + }, + { + "epoch": 0.05610116264823281, + "grad_norm": 1.4552785032546154, + "learning_rate": 1.8695726936634856e-06, + "loss": 0.4083, + "step": 4350 + }, + { + "epoch": 0.05623013083822875, + "grad_norm": 1.2984770642764332, + "learning_rate": 1.8738715501676554e-06, + "loss": 0.3961, + "step": 4360 + }, + { + "epoch": 0.05635909902822469, + "grad_norm": 1.3566103673241046, + "learning_rate": 1.8781704066718253e-06, + "loss": 0.3983, + "step": 4370 + }, + { + "epoch": 0.056488067218220625, + "grad_norm": 1.324298563614767, + "learning_rate": 1.8824692631759952e-06, + "loss": 0.3761, + "step": 4380 + }, + { + "epoch": 0.05661703540821656, + "grad_norm": 1.293369658167834, + "learning_rate": 1.886768119680165e-06, + "loss": 0.3934, + "step": 4390 + }, + { + "epoch": 0.056746003598212504, + "grad_norm": 1.3113504894330268, + "learning_rate": 1.891066976184335e-06, + "loss": 0.4029, + "step": 4400 + }, + { + "epoch": 0.05687497178820844, + "grad_norm": 1.319248816180501, + "learning_rate": 1.8953658326885048e-06, + "loss": 0.3913, + "step": 4410 + }, + { + "epoch": 0.057003939978204375, + "grad_norm": 1.304888053250441, + "learning_rate": 1.899664689192675e-06, + "loss": 0.4111, + "step": 4420 + }, + { + "epoch": 0.05713290816820031, + "grad_norm": 1.244344793887255, + "learning_rate": 1.9039635456968448e-06, + "loss": 0.4077, + "step": 4430 + }, + { + "epoch": 0.057261876358196254, + "grad_norm": 1.3398134020610255, + "learning_rate": 1.9082624022010147e-06, + "loss": 0.392, + "step": 4440 + }, + { + "epoch": 0.05739084454819219, + "grad_norm": 1.3391625546076429, + "learning_rate": 1.9125612587051846e-06, + "loss": 0.406, + "step": 4450 + }, + { + "epoch": 0.057519812738188125, + "grad_norm": 1.3250947583267558, + "learning_rate": 1.9168601152093544e-06, + "loss": 0.4004, + "step": 4460 + }, + { + "epoch": 0.05764878092818406, + "grad_norm": 1.4258655059975782, + "learning_rate": 1.9211589717135243e-06, + "loss": 0.3962, + "step": 4470 + }, + { + "epoch": 0.057777749118180004, + "grad_norm": 1.2955039339368446, + "learning_rate": 1.925457828217694e-06, + "loss": 0.3918, + "step": 4480 + }, + { + "epoch": 0.05790671730817594, + "grad_norm": 1.282609092153226, + "learning_rate": 1.929756684721864e-06, + "loss": 0.3968, + "step": 4490 + }, + { + "epoch": 0.058035685498171875, + "grad_norm": 1.3786417536192401, + "learning_rate": 1.934055541226034e-06, + "loss": 0.3969, + "step": 4500 + }, + { + "epoch": 0.05816465368816781, + "grad_norm": 1.2408031423141006, + "learning_rate": 1.938354397730204e-06, + "loss": 0.391, + "step": 4510 + }, + { + "epoch": 0.058293621878163754, + "grad_norm": 1.3441154955639192, + "learning_rate": 1.9426532542343737e-06, + "loss": 0.3952, + "step": 4520 + }, + { + "epoch": 0.05842259006815969, + "grad_norm": 1.3357946327293893, + "learning_rate": 1.9469521107385436e-06, + "loss": 0.4127, + "step": 4530 + }, + { + "epoch": 0.058551558258155625, + "grad_norm": 1.3032637682953134, + "learning_rate": 1.9512509672427134e-06, + "loss": 0.3841, + "step": 4540 + }, + { + "epoch": 0.05868052644815156, + "grad_norm": 1.3365183387252075, + "learning_rate": 1.9555498237468833e-06, + "loss": 0.4007, + "step": 4550 + }, + { + "epoch": 0.058809494638147504, + "grad_norm": 1.2592065858316905, + "learning_rate": 1.959848680251053e-06, + "loss": 0.3793, + "step": 4560 + }, + { + "epoch": 0.05893846282814344, + "grad_norm": 1.4796806806740204, + "learning_rate": 1.964147536755223e-06, + "loss": 0.3919, + "step": 4570 + }, + { + "epoch": 0.059067431018139376, + "grad_norm": 1.3136712758573539, + "learning_rate": 1.968446393259393e-06, + "loss": 0.4049, + "step": 4580 + }, + { + "epoch": 0.05919639920813531, + "grad_norm": 1.3574767397256928, + "learning_rate": 1.972745249763563e-06, + "loss": 0.4192, + "step": 4590 + }, + { + "epoch": 0.059325367398131254, + "grad_norm": 1.3302644480131964, + "learning_rate": 1.9770441062677327e-06, + "loss": 0.3967, + "step": 4600 + }, + { + "epoch": 0.05945433558812719, + "grad_norm": 1.345032188909606, + "learning_rate": 1.9813429627719026e-06, + "loss": 0.3925, + "step": 4610 + }, + { + "epoch": 0.059583303778123126, + "grad_norm": 1.3213752203945572, + "learning_rate": 1.9856418192760725e-06, + "loss": 0.3888, + "step": 4620 + }, + { + "epoch": 0.05971227196811906, + "grad_norm": 1.367879752995254, + "learning_rate": 1.9899406757802423e-06, + "loss": 0.4074, + "step": 4630 + }, + { + "epoch": 0.059841240158115004, + "grad_norm": 1.4218812186746992, + "learning_rate": 1.9942395322844122e-06, + "loss": 0.3882, + "step": 4640 + }, + { + "epoch": 0.05997020834811094, + "grad_norm": 1.465714888245894, + "learning_rate": 1.9985383887885825e-06, + "loss": 0.406, + "step": 4650 + }, + { + "epoch": 0.060099176538106876, + "grad_norm": 1.3744146049055375, + "learning_rate": 2.0028372452927524e-06, + "loss": 0.3951, + "step": 4660 + }, + { + "epoch": 0.06022814472810281, + "grad_norm": 1.4529293715781675, + "learning_rate": 2.0071361017969223e-06, + "loss": 0.3874, + "step": 4670 + }, + { + "epoch": 0.060357112918098754, + "grad_norm": 1.4127240070103093, + "learning_rate": 2.011434958301092e-06, + "loss": 0.4014, + "step": 4680 + }, + { + "epoch": 0.06048608110809469, + "grad_norm": 1.2391130931805385, + "learning_rate": 2.015733814805262e-06, + "loss": 0.3922, + "step": 4690 + }, + { + "epoch": 0.060615049298090626, + "grad_norm": 1.3107792176561432, + "learning_rate": 2.020032671309432e-06, + "loss": 0.3999, + "step": 4700 + }, + { + "epoch": 0.06074401748808656, + "grad_norm": 1.2416001498961835, + "learning_rate": 2.0243315278136018e-06, + "loss": 0.4058, + "step": 4710 + }, + { + "epoch": 0.060872985678082504, + "grad_norm": 1.427724517612662, + "learning_rate": 2.0286303843177717e-06, + "loss": 0.391, + "step": 4720 + }, + { + "epoch": 0.06100195386807844, + "grad_norm": 1.3951120349241324, + "learning_rate": 2.0329292408219415e-06, + "loss": 0.3988, + "step": 4730 + }, + { + "epoch": 0.061130922058074376, + "grad_norm": 1.477951186643124, + "learning_rate": 2.0372280973261114e-06, + "loss": 0.3971, + "step": 4740 + }, + { + "epoch": 0.06125989024807031, + "grad_norm": 1.3561564443566818, + "learning_rate": 2.0415269538302813e-06, + "loss": 0.392, + "step": 4750 + }, + { + "epoch": 0.061388858438066254, + "grad_norm": 1.4067027654614341, + "learning_rate": 2.045825810334451e-06, + "loss": 0.4014, + "step": 4760 + }, + { + "epoch": 0.06151782662806219, + "grad_norm": 1.4410678714239846, + "learning_rate": 2.050124666838621e-06, + "loss": 0.3966, + "step": 4770 + }, + { + "epoch": 0.061646794818058126, + "grad_norm": 1.351829892894227, + "learning_rate": 2.054423523342791e-06, + "loss": 0.3837, + "step": 4780 + }, + { + "epoch": 0.06177576300805406, + "grad_norm": 1.346467224009561, + "learning_rate": 2.058722379846961e-06, + "loss": 0.3888, + "step": 4790 + }, + { + "epoch": 0.06190473119805, + "grad_norm": 1.2516041546813714, + "learning_rate": 2.0630212363511307e-06, + "loss": 0.3797, + "step": 4800 + }, + { + "epoch": 0.06203369938804594, + "grad_norm": 1.3316578179587668, + "learning_rate": 2.0673200928553005e-06, + "loss": 0.4083, + "step": 4810 + }, + { + "epoch": 0.062162667578041876, + "grad_norm": 1.4690109035941026, + "learning_rate": 2.0716189493594704e-06, + "loss": 0.3857, + "step": 4820 + }, + { + "epoch": 0.06229163576803781, + "grad_norm": 1.302380939141067, + "learning_rate": 2.0759178058636403e-06, + "loss": 0.4059, + "step": 4830 + }, + { + "epoch": 0.06242060395803375, + "grad_norm": 1.2858313095997764, + "learning_rate": 2.08021666236781e-06, + "loss": 0.3862, + "step": 4840 + }, + { + "epoch": 0.06254957214802968, + "grad_norm": 1.3167789974425128, + "learning_rate": 2.08451551887198e-06, + "loss": 0.3854, + "step": 4850 + }, + { + "epoch": 0.06267854033802563, + "grad_norm": 1.231929209660233, + "learning_rate": 2.08881437537615e-06, + "loss": 0.3889, + "step": 4860 + }, + { + "epoch": 0.06280750852802157, + "grad_norm": 1.4867361304300608, + "learning_rate": 2.09311323188032e-06, + "loss": 0.3944, + "step": 4870 + }, + { + "epoch": 0.0629364767180175, + "grad_norm": 1.2945191072490356, + "learning_rate": 2.0974120883844897e-06, + "loss": 0.3997, + "step": 4880 + }, + { + "epoch": 0.06306544490801344, + "grad_norm": 1.3011848393040657, + "learning_rate": 2.1017109448886596e-06, + "loss": 0.3753, + "step": 4890 + }, + { + "epoch": 0.06319441309800937, + "grad_norm": 1.3984391031580408, + "learning_rate": 2.1060098013928294e-06, + "loss": 0.3895, + "step": 4900 + }, + { + "epoch": 0.06332338128800531, + "grad_norm": 1.3510261587113053, + "learning_rate": 2.1103086578969993e-06, + "loss": 0.3944, + "step": 4910 + }, + { + "epoch": 0.06345234947800125, + "grad_norm": 1.4665009876357173, + "learning_rate": 2.114607514401169e-06, + "loss": 0.38, + "step": 4920 + }, + { + "epoch": 0.06358131766799718, + "grad_norm": 1.3044095677880156, + "learning_rate": 2.118906370905339e-06, + "loss": 0.3911, + "step": 4930 + }, + { + "epoch": 0.06371028585799313, + "grad_norm": 1.1867547723146021, + "learning_rate": 2.123205227409509e-06, + "loss": 0.3901, + "step": 4940 + }, + { + "epoch": 0.06383925404798907, + "grad_norm": 1.30169501473274, + "learning_rate": 2.127504083913679e-06, + "loss": 0.3808, + "step": 4950 + }, + { + "epoch": 0.063968222237985, + "grad_norm": 1.2734543431672394, + "learning_rate": 2.1318029404178487e-06, + "loss": 0.3878, + "step": 4960 + }, + { + "epoch": 0.06409719042798094, + "grad_norm": 1.289815674977146, + "learning_rate": 2.136101796922019e-06, + "loss": 0.3884, + "step": 4970 + }, + { + "epoch": 0.06422615861797687, + "grad_norm": 1.3365037213763171, + "learning_rate": 2.140400653426189e-06, + "loss": 0.3836, + "step": 4980 + }, + { + "epoch": 0.06435512680797281, + "grad_norm": 1.36782076467337, + "learning_rate": 2.1446995099303588e-06, + "loss": 0.3867, + "step": 4990 + }, + { + "epoch": 0.06448409499796875, + "grad_norm": 1.5151551435450727, + "learning_rate": 2.1489983664345286e-06, + "loss": 0.3965, + "step": 5000 + }, + { + "epoch": 0.06461306318796468, + "grad_norm": 1.491495662437576, + "learning_rate": 2.1532972229386985e-06, + "loss": 0.4017, + "step": 5010 + }, + { + "epoch": 0.06474203137796063, + "grad_norm": 1.3230992014568441, + "learning_rate": 2.1575960794428684e-06, + "loss": 0.3967, + "step": 5020 + }, + { + "epoch": 0.06487099956795657, + "grad_norm": 1.3790236860603313, + "learning_rate": 2.1618949359470383e-06, + "loss": 0.3813, + "step": 5030 + }, + { + "epoch": 0.0649999677579525, + "grad_norm": 1.3024974066613213, + "learning_rate": 2.166193792451208e-06, + "loss": 0.3825, + "step": 5040 + }, + { + "epoch": 0.06512893594794844, + "grad_norm": 1.339225079368569, + "learning_rate": 2.170492648955378e-06, + "loss": 0.3912, + "step": 5050 + }, + { + "epoch": 0.06525790413794437, + "grad_norm": 1.2956414746486442, + "learning_rate": 2.174791505459548e-06, + "loss": 0.384, + "step": 5060 + }, + { + "epoch": 0.06538687232794031, + "grad_norm": 1.2475039129129117, + "learning_rate": 2.1790903619637178e-06, + "loss": 0.3852, + "step": 5070 + }, + { + "epoch": 0.06551584051793626, + "grad_norm": 1.3997453099578772, + "learning_rate": 2.1833892184678876e-06, + "loss": 0.393, + "step": 5080 + }, + { + "epoch": 0.06564480870793218, + "grad_norm": 1.5362962006589231, + "learning_rate": 2.1876880749720575e-06, + "loss": 0.3894, + "step": 5090 + }, + { + "epoch": 0.06577377689792813, + "grad_norm": 1.3059193617626714, + "learning_rate": 2.1919869314762274e-06, + "loss": 0.3862, + "step": 5100 + }, + { + "epoch": 0.06590274508792407, + "grad_norm": 1.426890790630524, + "learning_rate": 2.1962857879803973e-06, + "loss": 0.4038, + "step": 5110 + }, + { + "epoch": 0.06603171327792, + "grad_norm": 1.2799414699527307, + "learning_rate": 2.200584644484567e-06, + "loss": 0.3825, + "step": 5120 + }, + { + "epoch": 0.06616068146791594, + "grad_norm": 1.326855650031259, + "learning_rate": 2.204883500988737e-06, + "loss": 0.3816, + "step": 5130 + }, + { + "epoch": 0.06628964965791187, + "grad_norm": 1.2666375786225674, + "learning_rate": 2.209182357492907e-06, + "loss": 0.4031, + "step": 5140 + }, + { + "epoch": 0.06641861784790781, + "grad_norm": 1.2202544995243805, + "learning_rate": 2.2134812139970768e-06, + "loss": 0.3833, + "step": 5150 + }, + { + "epoch": 0.06654758603790376, + "grad_norm": 1.2597463242206075, + "learning_rate": 2.2177800705012467e-06, + "loss": 0.3758, + "step": 5160 + }, + { + "epoch": 0.06667655422789968, + "grad_norm": 1.320358147928223, + "learning_rate": 2.2220789270054165e-06, + "loss": 0.3981, + "step": 5170 + }, + { + "epoch": 0.06680552241789563, + "grad_norm": 1.305531659768506, + "learning_rate": 2.2263777835095864e-06, + "loss": 0.3807, + "step": 5180 + }, + { + "epoch": 0.06693449060789157, + "grad_norm": 1.2293474645323852, + "learning_rate": 2.2306766400137563e-06, + "loss": 0.3688, + "step": 5190 + }, + { + "epoch": 0.0670634587978875, + "grad_norm": 1.3992596816748006, + "learning_rate": 2.234975496517926e-06, + "loss": 0.3906, + "step": 5200 + }, + { + "epoch": 0.06719242698788344, + "grad_norm": 1.2508744887171068, + "learning_rate": 2.239274353022096e-06, + "loss": 0.3799, + "step": 5210 + }, + { + "epoch": 0.06732139517787937, + "grad_norm": 1.3708133444234765, + "learning_rate": 2.243573209526266e-06, + "loss": 0.4123, + "step": 5220 + }, + { + "epoch": 0.06745036336787531, + "grad_norm": 1.48514967056443, + "learning_rate": 2.247872066030436e-06, + "loss": 0.3824, + "step": 5230 + }, + { + "epoch": 0.06757933155787126, + "grad_norm": 1.303394706413457, + "learning_rate": 2.2521709225346057e-06, + "loss": 0.3916, + "step": 5240 + }, + { + "epoch": 0.06770829974786718, + "grad_norm": 1.3758969130092638, + "learning_rate": 2.2564697790387756e-06, + "loss": 0.3899, + "step": 5250 + }, + { + "epoch": 0.06783726793786313, + "grad_norm": 1.3837383949900564, + "learning_rate": 2.260768635542946e-06, + "loss": 0.3804, + "step": 5260 + }, + { + "epoch": 0.06796623612785907, + "grad_norm": 1.3358419716516012, + "learning_rate": 2.2650674920471157e-06, + "loss": 0.391, + "step": 5270 + }, + { + "epoch": 0.068095204317855, + "grad_norm": 1.3906957217401237, + "learning_rate": 2.2693663485512856e-06, + "loss": 0.3883, + "step": 5280 + }, + { + "epoch": 0.06822417250785094, + "grad_norm": 1.3035847735715467, + "learning_rate": 2.2736652050554555e-06, + "loss": 0.3943, + "step": 5290 + }, + { + "epoch": 0.06835314069784687, + "grad_norm": 1.3914825682367327, + "learning_rate": 2.2779640615596254e-06, + "loss": 0.3751, + "step": 5300 + }, + { + "epoch": 0.06848210888784281, + "grad_norm": 1.4677960740082703, + "learning_rate": 2.2822629180637952e-06, + "loss": 0.3843, + "step": 5310 + }, + { + "epoch": 0.06861107707783876, + "grad_norm": 1.5322429806872027, + "learning_rate": 2.286561774567965e-06, + "loss": 0.3753, + "step": 5320 + }, + { + "epoch": 0.06874004526783468, + "grad_norm": 1.3569850486947395, + "learning_rate": 2.290860631072135e-06, + "loss": 0.3971, + "step": 5330 + }, + { + "epoch": 0.06886901345783063, + "grad_norm": 1.3338071230292594, + "learning_rate": 2.295159487576305e-06, + "loss": 0.3726, + "step": 5340 + }, + { + "epoch": 0.06899798164782657, + "grad_norm": 1.4279519850654574, + "learning_rate": 2.2994583440804747e-06, + "loss": 0.3823, + "step": 5350 + }, + { + "epoch": 0.0691269498378225, + "grad_norm": 1.304405429904558, + "learning_rate": 2.3037572005846446e-06, + "loss": 0.3775, + "step": 5360 + }, + { + "epoch": 0.06925591802781844, + "grad_norm": 1.3439957510045315, + "learning_rate": 2.3080560570888145e-06, + "loss": 0.3945, + "step": 5370 + }, + { + "epoch": 0.06938488621781437, + "grad_norm": 1.4457473188283125, + "learning_rate": 2.3123549135929844e-06, + "loss": 0.3906, + "step": 5380 + }, + { + "epoch": 0.06951385440781031, + "grad_norm": 1.3346683965609567, + "learning_rate": 2.3166537700971547e-06, + "loss": 0.3692, + "step": 5390 + }, + { + "epoch": 0.06964282259780626, + "grad_norm": 1.3552976170189732, + "learning_rate": 2.3209526266013246e-06, + "loss": 0.3783, + "step": 5400 + }, + { + "epoch": 0.06977179078780218, + "grad_norm": 1.260654579599766, + "learning_rate": 2.3252514831054944e-06, + "loss": 0.3803, + "step": 5410 + }, + { + "epoch": 0.06990075897779813, + "grad_norm": 1.3730748456300612, + "learning_rate": 2.3295503396096643e-06, + "loss": 0.3841, + "step": 5420 + }, + { + "epoch": 0.07002972716779407, + "grad_norm": 1.4528097616249929, + "learning_rate": 2.333849196113834e-06, + "loss": 0.3828, + "step": 5430 + }, + { + "epoch": 0.07015869535779, + "grad_norm": 1.4026890091881732, + "learning_rate": 2.338148052618004e-06, + "loss": 0.3786, + "step": 5440 + }, + { + "epoch": 0.07028766354778594, + "grad_norm": 1.3200793620538493, + "learning_rate": 2.342446909122174e-06, + "loss": 0.3967, + "step": 5450 + }, + { + "epoch": 0.07041663173778187, + "grad_norm": 1.2930127512997385, + "learning_rate": 2.346745765626344e-06, + "loss": 0.378, + "step": 5460 + }, + { + "epoch": 0.07054559992777781, + "grad_norm": 1.4710077895043132, + "learning_rate": 2.3510446221305137e-06, + "loss": 0.3793, + "step": 5470 + }, + { + "epoch": 0.07067456811777376, + "grad_norm": 1.2488307494017055, + "learning_rate": 2.3553434786346836e-06, + "loss": 0.3692, + "step": 5480 + }, + { + "epoch": 0.07080353630776968, + "grad_norm": 1.3882003515238694, + "learning_rate": 2.3596423351388534e-06, + "loss": 0.3897, + "step": 5490 + }, + { + "epoch": 0.07093250449776563, + "grad_norm": 1.4300307459852502, + "learning_rate": 2.3639411916430233e-06, + "loss": 0.3943, + "step": 5500 + }, + { + "epoch": 0.07106147268776157, + "grad_norm": 1.1988656879921145, + "learning_rate": 2.368240048147193e-06, + "loss": 0.3883, + "step": 5510 + }, + { + "epoch": 0.0711904408777575, + "grad_norm": 1.3074902781290654, + "learning_rate": 2.372538904651363e-06, + "loss": 0.3806, + "step": 5520 + }, + { + "epoch": 0.07131940906775344, + "grad_norm": 1.3716053196898201, + "learning_rate": 2.376837761155533e-06, + "loss": 0.3889, + "step": 5530 + }, + { + "epoch": 0.07144837725774937, + "grad_norm": 1.4360617871034453, + "learning_rate": 2.381136617659703e-06, + "loss": 0.3758, + "step": 5540 + }, + { + "epoch": 0.07157734544774531, + "grad_norm": 1.3580301208984233, + "learning_rate": 2.3854354741638727e-06, + "loss": 0.3656, + "step": 5550 + }, + { + "epoch": 0.07170631363774126, + "grad_norm": 1.2391607611256508, + "learning_rate": 2.3897343306680426e-06, + "loss": 0.3741, + "step": 5560 + }, + { + "epoch": 0.07183528182773719, + "grad_norm": 1.223415700553054, + "learning_rate": 2.3940331871722125e-06, + "loss": 0.3802, + "step": 5570 + }, + { + "epoch": 0.07196425001773313, + "grad_norm": 1.2142604211731953, + "learning_rate": 2.3983320436763823e-06, + "loss": 0.3854, + "step": 5580 + }, + { + "epoch": 0.07209321820772906, + "grad_norm": 1.366029494485364, + "learning_rate": 2.4026309001805522e-06, + "loss": 0.3708, + "step": 5590 + }, + { + "epoch": 0.072222186397725, + "grad_norm": 1.3060972853244774, + "learning_rate": 2.406929756684722e-06, + "loss": 0.3911, + "step": 5600 + }, + { + "epoch": 0.07235115458772094, + "grad_norm": 1.3519739772026689, + "learning_rate": 2.411228613188892e-06, + "loss": 0.3902, + "step": 5610 + }, + { + "epoch": 0.07248012277771687, + "grad_norm": 1.2444202939821736, + "learning_rate": 2.415527469693062e-06, + "loss": 0.3817, + "step": 5620 + }, + { + "epoch": 0.07260909096771281, + "grad_norm": 1.4197047682556518, + "learning_rate": 2.4198263261972317e-06, + "loss": 0.3815, + "step": 5630 + }, + { + "epoch": 0.07273805915770876, + "grad_norm": 1.3925687206947113, + "learning_rate": 2.4241251827014016e-06, + "loss": 0.3884, + "step": 5640 + }, + { + "epoch": 0.07286702734770469, + "grad_norm": 1.3697137524773433, + "learning_rate": 2.4284240392055715e-06, + "loss": 0.3713, + "step": 5650 + }, + { + "epoch": 0.07299599553770063, + "grad_norm": 1.3694845416330121, + "learning_rate": 2.4327228957097414e-06, + "loss": 0.3775, + "step": 5660 + }, + { + "epoch": 0.07312496372769656, + "grad_norm": 1.5062035917677814, + "learning_rate": 2.4370217522139112e-06, + "loss": 0.3722, + "step": 5670 + }, + { + "epoch": 0.0732539319176925, + "grad_norm": 1.3817683684467297, + "learning_rate": 2.441320608718081e-06, + "loss": 0.3741, + "step": 5680 + }, + { + "epoch": 0.07338290010768844, + "grad_norm": 1.3026247686079375, + "learning_rate": 2.445619465222251e-06, + "loss": 0.3746, + "step": 5690 + }, + { + "epoch": 0.07351186829768437, + "grad_norm": 1.336662455029684, + "learning_rate": 2.4499183217264213e-06, + "loss": 0.3863, + "step": 5700 + }, + { + "epoch": 0.07364083648768031, + "grad_norm": 1.4807887801473234, + "learning_rate": 2.454217178230591e-06, + "loss": 0.4004, + "step": 5710 + }, + { + "epoch": 0.07376980467767626, + "grad_norm": 1.4039352683273238, + "learning_rate": 2.458516034734761e-06, + "loss": 0.3729, + "step": 5720 + }, + { + "epoch": 0.07389877286767219, + "grad_norm": 1.3989998569933584, + "learning_rate": 2.462814891238931e-06, + "loss": 0.3837, + "step": 5730 + }, + { + "epoch": 0.07402774105766813, + "grad_norm": 1.3483844140450243, + "learning_rate": 2.467113747743101e-06, + "loss": 0.3823, + "step": 5740 + }, + { + "epoch": 0.07415670924766406, + "grad_norm": 1.3349013747537413, + "learning_rate": 2.4714126042472707e-06, + "loss": 0.3678, + "step": 5750 + }, + { + "epoch": 0.07428567743766, + "grad_norm": 1.4280571189809204, + "learning_rate": 2.4757114607514405e-06, + "loss": 0.3863, + "step": 5760 + }, + { + "epoch": 0.07441464562765594, + "grad_norm": 1.347715494379059, + "learning_rate": 2.4800103172556104e-06, + "loss": 0.3705, + "step": 5770 + }, + { + "epoch": 0.07454361381765187, + "grad_norm": 1.459231922799646, + "learning_rate": 2.4843091737597803e-06, + "loss": 0.3794, + "step": 5780 + }, + { + "epoch": 0.07467258200764781, + "grad_norm": 1.2952823168885483, + "learning_rate": 2.48860803026395e-06, + "loss": 0.391, + "step": 5790 + }, + { + "epoch": 0.07480155019764376, + "grad_norm": 1.5415265575972321, + "learning_rate": 2.49290688676812e-06, + "loss": 0.3709, + "step": 5800 + }, + { + "epoch": 0.07493051838763969, + "grad_norm": 1.624362620067533, + "learning_rate": 2.49720574327229e-06, + "loss": 0.3686, + "step": 5810 + }, + { + "epoch": 0.07505948657763563, + "grad_norm": 1.4728541940463706, + "learning_rate": 2.50150459977646e-06, + "loss": 0.379, + "step": 5820 + }, + { + "epoch": 0.07518845476763156, + "grad_norm": 1.340199445481173, + "learning_rate": 2.5058034562806293e-06, + "loss": 0.364, + "step": 5830 + }, + { + "epoch": 0.0753174229576275, + "grad_norm": 1.3685375652858587, + "learning_rate": 2.5101023127847996e-06, + "loss": 0.3711, + "step": 5840 + }, + { + "epoch": 0.07544639114762344, + "grad_norm": 1.3214991929401996, + "learning_rate": 2.514401169288969e-06, + "loss": 0.3807, + "step": 5850 + }, + { + "epoch": 0.07557535933761937, + "grad_norm": 1.3104418201909143, + "learning_rate": 2.5187000257931393e-06, + "loss": 0.3784, + "step": 5860 + }, + { + "epoch": 0.07570432752761531, + "grad_norm": 1.4210784698349626, + "learning_rate": 2.5229988822973088e-06, + "loss": 0.3833, + "step": 5870 + }, + { + "epoch": 0.07583329571761126, + "grad_norm": 1.4203561741004969, + "learning_rate": 2.527297738801479e-06, + "loss": 0.378, + "step": 5880 + }, + { + "epoch": 0.07596226390760719, + "grad_norm": 1.3922583404880438, + "learning_rate": 2.5315965953056485e-06, + "loss": 0.3721, + "step": 5890 + }, + { + "epoch": 0.07609123209760313, + "grad_norm": 1.4093862080520043, + "learning_rate": 2.535895451809819e-06, + "loss": 0.3927, + "step": 5900 + }, + { + "epoch": 0.07622020028759906, + "grad_norm": 1.371786159047408, + "learning_rate": 2.5401943083139887e-06, + "loss": 0.3746, + "step": 5910 + }, + { + "epoch": 0.076349168477595, + "grad_norm": 1.3010294968782876, + "learning_rate": 2.5444931648181586e-06, + "loss": 0.3771, + "step": 5920 + }, + { + "epoch": 0.07647813666759094, + "grad_norm": 1.454871634029278, + "learning_rate": 2.5487920213223285e-06, + "loss": 0.3707, + "step": 5930 + }, + { + "epoch": 0.07660710485758687, + "grad_norm": 1.383075729879537, + "learning_rate": 2.5530908778264983e-06, + "loss": 0.3746, + "step": 5940 + }, + { + "epoch": 0.07673607304758281, + "grad_norm": 1.275267958958288, + "learning_rate": 2.557389734330668e-06, + "loss": 0.3592, + "step": 5950 + }, + { + "epoch": 0.07686504123757876, + "grad_norm": 1.4192263230772912, + "learning_rate": 2.561688590834838e-06, + "loss": 0.3711, + "step": 5960 + }, + { + "epoch": 0.07699400942757469, + "grad_norm": 1.4107724257390468, + "learning_rate": 2.565987447339008e-06, + "loss": 0.3872, + "step": 5970 + }, + { + "epoch": 0.07712297761757063, + "grad_norm": 1.4091010909610722, + "learning_rate": 2.570286303843178e-06, + "loss": 0.37, + "step": 5980 + }, + { + "epoch": 0.07725194580756656, + "grad_norm": 1.4427855221580543, + "learning_rate": 2.5745851603473477e-06, + "loss": 0.3673, + "step": 5990 + }, + { + "epoch": 0.0773809139975625, + "grad_norm": 1.5357066433104334, + "learning_rate": 2.5788840168515176e-06, + "loss": 0.3904, + "step": 6000 + }, + { + "epoch": 0.07750988218755844, + "grad_norm": 1.522463773041534, + "learning_rate": 2.5831828733556875e-06, + "loss": 0.3721, + "step": 6010 + }, + { + "epoch": 0.07763885037755437, + "grad_norm": 1.500634167193824, + "learning_rate": 2.5874817298598578e-06, + "loss": 0.3846, + "step": 6020 + }, + { + "epoch": 0.07776781856755031, + "grad_norm": 1.3755182552181837, + "learning_rate": 2.5917805863640272e-06, + "loss": 0.3633, + "step": 6030 + }, + { + "epoch": 0.07789678675754626, + "grad_norm": 1.2228387030510561, + "learning_rate": 2.5960794428681975e-06, + "loss": 0.3801, + "step": 6040 + }, + { + "epoch": 0.07802575494754219, + "grad_norm": 1.325089704401817, + "learning_rate": 2.600378299372367e-06, + "loss": 0.3708, + "step": 6050 + }, + { + "epoch": 0.07815472313753813, + "grad_norm": 1.2038806129250117, + "learning_rate": 2.6046771558765373e-06, + "loss": 0.358, + "step": 6060 + }, + { + "epoch": 0.07828369132753406, + "grad_norm": 1.4955514507352614, + "learning_rate": 2.6089760123807067e-06, + "loss": 0.3765, + "step": 6070 + }, + { + "epoch": 0.07841265951753, + "grad_norm": 1.3190747333940538, + "learning_rate": 2.613274868884877e-06, + "loss": 0.3733, + "step": 6080 + }, + { + "epoch": 0.07854162770752594, + "grad_norm": 1.4228340209713264, + "learning_rate": 2.6175737253890465e-06, + "loss": 0.3746, + "step": 6090 + }, + { + "epoch": 0.07867059589752187, + "grad_norm": 1.323452058839702, + "learning_rate": 2.6218725818932168e-06, + "loss": 0.3809, + "step": 6100 + }, + { + "epoch": 0.07879956408751781, + "grad_norm": 1.4391391339445336, + "learning_rate": 2.6261714383973862e-06, + "loss": 0.3761, + "step": 6110 + }, + { + "epoch": 0.07892853227751376, + "grad_norm": 1.3809560034089097, + "learning_rate": 2.6304702949015565e-06, + "loss": 0.3819, + "step": 6120 + }, + { + "epoch": 0.07905750046750969, + "grad_norm": 1.3763426285077, + "learning_rate": 2.634769151405726e-06, + "loss": 0.3818, + "step": 6130 + }, + { + "epoch": 0.07918646865750563, + "grad_norm": 1.3156905767329257, + "learning_rate": 2.6390680079098963e-06, + "loss": 0.3689, + "step": 6140 + }, + { + "epoch": 0.07931543684750156, + "grad_norm": 1.3444499943946397, + "learning_rate": 2.6433668644140657e-06, + "loss": 0.3849, + "step": 6150 + }, + { + "epoch": 0.0794444050374975, + "grad_norm": 1.26907487303165, + "learning_rate": 2.647665720918236e-06, + "loss": 0.376, + "step": 6160 + }, + { + "epoch": 0.07957337322749344, + "grad_norm": 1.3192232747581722, + "learning_rate": 2.6519645774224055e-06, + "loss": 0.3763, + "step": 6170 + }, + { + "epoch": 0.07970234141748937, + "grad_norm": 1.3386970299951628, + "learning_rate": 2.656263433926576e-06, + "loss": 0.3862, + "step": 6180 + }, + { + "epoch": 0.07983130960748532, + "grad_norm": 1.3620503103101629, + "learning_rate": 2.6605622904307453e-06, + "loss": 0.3809, + "step": 6190 + }, + { + "epoch": 0.07996027779748126, + "grad_norm": 1.3207098652623024, + "learning_rate": 2.6648611469349156e-06, + "loss": 0.3727, + "step": 6200 + }, + { + "epoch": 0.08008924598747719, + "grad_norm": 1.2879602909051369, + "learning_rate": 2.669160003439085e-06, + "loss": 0.3683, + "step": 6210 + }, + { + "epoch": 0.08021821417747313, + "grad_norm": 1.4688210901803258, + "learning_rate": 2.6734588599432553e-06, + "loss": 0.3753, + "step": 6220 + }, + { + "epoch": 0.08034718236746906, + "grad_norm": 1.3858551811801325, + "learning_rate": 2.677757716447425e-06, + "loss": 0.366, + "step": 6230 + }, + { + "epoch": 0.080476150557465, + "grad_norm": 1.475741074824146, + "learning_rate": 2.682056572951595e-06, + "loss": 0.3782, + "step": 6240 + }, + { + "epoch": 0.08060511874746094, + "grad_norm": 1.3540112023573438, + "learning_rate": 2.686355429455765e-06, + "loss": 0.3522, + "step": 6250 + }, + { + "epoch": 0.08073408693745687, + "grad_norm": 1.4029029114663616, + "learning_rate": 2.690654285959935e-06, + "loss": 0.3706, + "step": 6260 + }, + { + "epoch": 0.08086305512745282, + "grad_norm": 1.2853582295546866, + "learning_rate": 2.6949531424641047e-06, + "loss": 0.3558, + "step": 6270 + }, + { + "epoch": 0.08099202331744876, + "grad_norm": 1.286100752948049, + "learning_rate": 2.6992519989682746e-06, + "loss": 0.367, + "step": 6280 + }, + { + "epoch": 0.08112099150744469, + "grad_norm": 1.4352907497630778, + "learning_rate": 2.7035508554724444e-06, + "loss": 0.3893, + "step": 6290 + }, + { + "epoch": 0.08124995969744063, + "grad_norm": 1.5038603742660004, + "learning_rate": 2.7078497119766143e-06, + "loss": 0.3643, + "step": 6300 + }, + { + "epoch": 0.08137892788743656, + "grad_norm": 1.4410020739639082, + "learning_rate": 2.712148568480784e-06, + "loss": 0.3829, + "step": 6310 + }, + { + "epoch": 0.0815078960774325, + "grad_norm": 1.462209086261189, + "learning_rate": 2.716447424984954e-06, + "loss": 0.3702, + "step": 6320 + }, + { + "epoch": 0.08163686426742844, + "grad_norm": 1.4174827521223259, + "learning_rate": 2.720746281489124e-06, + "loss": 0.3772, + "step": 6330 + }, + { + "epoch": 0.08176583245742437, + "grad_norm": 1.3498944621190643, + "learning_rate": 2.7250451379932943e-06, + "loss": 0.379, + "step": 6340 + }, + { + "epoch": 0.08189480064742032, + "grad_norm": 1.313242785314934, + "learning_rate": 2.7293439944974637e-06, + "loss": 0.3803, + "step": 6350 + }, + { + "epoch": 0.08202376883741624, + "grad_norm": 1.2903202881464626, + "learning_rate": 2.733642851001634e-06, + "loss": 0.3813, + "step": 6360 + }, + { + "epoch": 0.08215273702741219, + "grad_norm": 1.3034962877049192, + "learning_rate": 2.7379417075058035e-06, + "loss": 0.3665, + "step": 6370 + }, + { + "epoch": 0.08228170521740813, + "grad_norm": 1.478253322974782, + "learning_rate": 2.7422405640099738e-06, + "loss": 0.3784, + "step": 6380 + }, + { + "epoch": 0.08241067340740406, + "grad_norm": 1.2858781389013083, + "learning_rate": 2.7465394205141432e-06, + "loss": 0.3637, + "step": 6390 + }, + { + "epoch": 0.0825396415974, + "grad_norm": 1.2404785661206437, + "learning_rate": 2.7508382770183135e-06, + "loss": 0.3632, + "step": 6400 + }, + { + "epoch": 0.08266860978739594, + "grad_norm": 1.3727526711204756, + "learning_rate": 2.755137133522483e-06, + "loss": 0.3785, + "step": 6410 + }, + { + "epoch": 0.08279757797739187, + "grad_norm": 1.2774893774001312, + "learning_rate": 2.7594359900266533e-06, + "loss": 0.3577, + "step": 6420 + }, + { + "epoch": 0.08292654616738782, + "grad_norm": 1.447302133582548, + "learning_rate": 2.7637348465308227e-06, + "loss": 0.3745, + "step": 6430 + }, + { + "epoch": 0.08305551435738374, + "grad_norm": 1.2966290149853792, + "learning_rate": 2.768033703034993e-06, + "loss": 0.3734, + "step": 6440 + }, + { + "epoch": 0.08318448254737969, + "grad_norm": 1.3977416770234714, + "learning_rate": 2.7723325595391625e-06, + "loss": 0.3665, + "step": 6450 + }, + { + "epoch": 0.08331345073737563, + "grad_norm": 1.3005344572489252, + "learning_rate": 2.7766314160433328e-06, + "loss": 0.3689, + "step": 6460 + }, + { + "epoch": 0.08344241892737156, + "grad_norm": 1.4835854653873841, + "learning_rate": 2.7809302725475022e-06, + "loss": 0.3722, + "step": 6470 + }, + { + "epoch": 0.0835713871173675, + "grad_norm": 1.435035003481776, + "learning_rate": 2.7852291290516725e-06, + "loss": 0.3765, + "step": 6480 + }, + { + "epoch": 0.08370035530736344, + "grad_norm": 1.3708602568326271, + "learning_rate": 2.789527985555842e-06, + "loss": 0.3685, + "step": 6490 + }, + { + "epoch": 0.08382932349735937, + "grad_norm": 1.3714984566874797, + "learning_rate": 2.7938268420600123e-06, + "loss": 0.3682, + "step": 6500 + }, + { + "epoch": 0.08395829168735532, + "grad_norm": 1.4054636116244275, + "learning_rate": 2.7981256985641817e-06, + "loss": 0.3772, + "step": 6510 + }, + { + "epoch": 0.08408725987735124, + "grad_norm": 1.49868927855396, + "learning_rate": 2.802424555068352e-06, + "loss": 0.3725, + "step": 6520 + }, + { + "epoch": 0.08421622806734719, + "grad_norm": 1.4568778839956569, + "learning_rate": 2.8067234115725215e-06, + "loss": 0.3676, + "step": 6530 + }, + { + "epoch": 0.08434519625734313, + "grad_norm": 1.3139771974111656, + "learning_rate": 2.811022268076692e-06, + "loss": 0.3738, + "step": 6540 + }, + { + "epoch": 0.08447416444733906, + "grad_norm": 1.2654717247479805, + "learning_rate": 2.8153211245808617e-06, + "loss": 0.3729, + "step": 6550 + }, + { + "epoch": 0.084603132637335, + "grad_norm": 1.2324299425867558, + "learning_rate": 2.8196199810850315e-06, + "loss": 0.3625, + "step": 6560 + }, + { + "epoch": 0.08473210082733094, + "grad_norm": 1.3927302223493734, + "learning_rate": 2.8239188375892014e-06, + "loss": 0.377, + "step": 6570 + }, + { + "epoch": 0.08486106901732687, + "grad_norm": 1.3439459885355982, + "learning_rate": 2.8282176940933713e-06, + "loss": 0.3601, + "step": 6580 + }, + { + "epoch": 0.08499003720732282, + "grad_norm": 1.247315265010177, + "learning_rate": 2.832516550597541e-06, + "loss": 0.3823, + "step": 6590 + }, + { + "epoch": 0.08511900539731874, + "grad_norm": 1.2494221677821504, + "learning_rate": 2.836815407101711e-06, + "loss": 0.3601, + "step": 6600 + }, + { + "epoch": 0.08524797358731469, + "grad_norm": 1.3574974368874047, + "learning_rate": 2.841114263605881e-06, + "loss": 0.3825, + "step": 6610 + }, + { + "epoch": 0.08537694177731063, + "grad_norm": 1.4641899185903127, + "learning_rate": 2.845413120110051e-06, + "loss": 0.3697, + "step": 6620 + }, + { + "epoch": 0.08550590996730656, + "grad_norm": 1.4005281190188998, + "learning_rate": 2.8497119766142207e-06, + "loss": 0.376, + "step": 6630 + }, + { + "epoch": 0.0856348781573025, + "grad_norm": 1.2954271758224112, + "learning_rate": 2.854010833118391e-06, + "loss": 0.3599, + "step": 6640 + }, + { + "epoch": 0.08576384634729844, + "grad_norm": 1.3363578417962856, + "learning_rate": 2.8583096896225604e-06, + "loss": 0.3836, + "step": 6650 + }, + { + "epoch": 0.08589281453729437, + "grad_norm": 1.485444675755616, + "learning_rate": 2.8626085461267307e-06, + "loss": 0.351, + "step": 6660 + }, + { + "epoch": 0.08602178272729032, + "grad_norm": 1.2757566308189456, + "learning_rate": 2.8669074026309e-06, + "loss": 0.3794, + "step": 6670 + }, + { + "epoch": 0.08615075091728625, + "grad_norm": 1.3442214724099923, + "learning_rate": 2.8712062591350705e-06, + "loss": 0.3768, + "step": 6680 + }, + { + "epoch": 0.08627971910728219, + "grad_norm": 1.2223882122037917, + "learning_rate": 2.87550511563924e-06, + "loss": 0.3456, + "step": 6690 + }, + { + "epoch": 0.08640868729727813, + "grad_norm": 1.4126388413699305, + "learning_rate": 2.8798039721434102e-06, + "loss": 0.3582, + "step": 6700 + }, + { + "epoch": 0.08653765548727406, + "grad_norm": 1.5534632748237094, + "learning_rate": 2.8841028286475797e-06, + "loss": 0.3756, + "step": 6710 + }, + { + "epoch": 0.08666662367727, + "grad_norm": 1.3603107817597522, + "learning_rate": 2.88840168515175e-06, + "loss": 0.3573, + "step": 6720 + }, + { + "epoch": 0.08679559186726594, + "grad_norm": 1.3324880649476445, + "learning_rate": 2.8927005416559195e-06, + "loss": 0.3731, + "step": 6730 + }, + { + "epoch": 0.08692456005726187, + "grad_norm": 1.49419895821023, + "learning_rate": 2.8969993981600898e-06, + "loss": 0.3657, + "step": 6740 + }, + { + "epoch": 0.08705352824725782, + "grad_norm": 1.3365584245092805, + "learning_rate": 2.901298254664259e-06, + "loss": 0.3732, + "step": 6750 + }, + { + "epoch": 0.08718249643725375, + "grad_norm": 1.4150447682128349, + "learning_rate": 2.9055971111684295e-06, + "loss": 0.3667, + "step": 6760 + }, + { + "epoch": 0.08731146462724969, + "grad_norm": 1.285819764464214, + "learning_rate": 2.909895967672599e-06, + "loss": 0.3678, + "step": 6770 + }, + { + "epoch": 0.08744043281724563, + "grad_norm": 1.4690878313019498, + "learning_rate": 2.9141948241767693e-06, + "loss": 0.3787, + "step": 6780 + }, + { + "epoch": 0.08756940100724156, + "grad_norm": 1.378428545029699, + "learning_rate": 2.9184936806809387e-06, + "loss": 0.3649, + "step": 6790 + }, + { + "epoch": 0.0876983691972375, + "grad_norm": 1.3377898277436537, + "learning_rate": 2.922792537185109e-06, + "loss": 0.3648, + "step": 6800 + }, + { + "epoch": 0.08782733738723345, + "grad_norm": 1.3379674307208442, + "learning_rate": 2.9270913936892785e-06, + "loss": 0.3665, + "step": 6810 + }, + { + "epoch": 0.08795630557722937, + "grad_norm": 1.3958009932003743, + "learning_rate": 2.9313902501934488e-06, + "loss": 0.3648, + "step": 6820 + }, + { + "epoch": 0.08808527376722532, + "grad_norm": 1.3964055749641084, + "learning_rate": 2.9356891066976182e-06, + "loss": 0.357, + "step": 6830 + }, + { + "epoch": 0.08821424195722125, + "grad_norm": 1.4170553415285372, + "learning_rate": 2.9399879632017885e-06, + "loss": 0.3629, + "step": 6840 + }, + { + "epoch": 0.08834321014721719, + "grad_norm": 1.4453088136411534, + "learning_rate": 2.9442868197059584e-06, + "loss": 0.3676, + "step": 6850 + }, + { + "epoch": 0.08847217833721313, + "grad_norm": 1.2880414306765098, + "learning_rate": 2.9485856762101283e-06, + "loss": 0.3703, + "step": 6860 + }, + { + "epoch": 0.08860114652720906, + "grad_norm": 1.3281259495666122, + "learning_rate": 2.952884532714298e-06, + "loss": 0.3641, + "step": 6870 + }, + { + "epoch": 0.088730114717205, + "grad_norm": 1.3792212925134681, + "learning_rate": 2.957183389218468e-06, + "loss": 0.3616, + "step": 6880 + }, + { + "epoch": 0.08885908290720095, + "grad_norm": 1.2677724615779542, + "learning_rate": 2.961482245722638e-06, + "loss": 0.3654, + "step": 6890 + }, + { + "epoch": 0.08898805109719687, + "grad_norm": 1.3621729286494115, + "learning_rate": 2.9657811022268078e-06, + "loss": 0.3486, + "step": 6900 + }, + { + "epoch": 0.08911701928719282, + "grad_norm": 1.28646866948629, + "learning_rate": 2.9700799587309777e-06, + "loss": 0.3769, + "step": 6910 + }, + { + "epoch": 0.08924598747718875, + "grad_norm": 1.3470712658161403, + "learning_rate": 2.9743788152351475e-06, + "loss": 0.3783, + "step": 6920 + }, + { + "epoch": 0.08937495566718469, + "grad_norm": 1.254254167977638, + "learning_rate": 2.9786776717393174e-06, + "loss": 0.3679, + "step": 6930 + }, + { + "epoch": 0.08950392385718063, + "grad_norm": 1.3701613762233085, + "learning_rate": 2.9829765282434873e-06, + "loss": 0.3621, + "step": 6940 + }, + { + "epoch": 0.08963289204717656, + "grad_norm": 1.4175650075413841, + "learning_rate": 2.987275384747657e-06, + "loss": 0.3684, + "step": 6950 + }, + { + "epoch": 0.0897618602371725, + "grad_norm": 1.3654360980784699, + "learning_rate": 2.9915742412518275e-06, + "loss": 0.3605, + "step": 6960 + }, + { + "epoch": 0.08989082842716845, + "grad_norm": 1.3436314106457736, + "learning_rate": 2.995873097755997e-06, + "loss": 0.3693, + "step": 6970 + }, + { + "epoch": 0.09001979661716437, + "grad_norm": 1.4178986834137082, + "learning_rate": 3.0001719542601672e-06, + "loss": 0.3665, + "step": 6980 + }, + { + "epoch": 0.09014876480716032, + "grad_norm": 1.3245702346570374, + "learning_rate": 3.0044708107643367e-06, + "loss": 0.36, + "step": 6990 + }, + { + "epoch": 0.09027773299715625, + "grad_norm": 1.334430104608185, + "learning_rate": 3.008769667268507e-06, + "loss": 0.3588, + "step": 7000 + }, + { + "epoch": 0.09040670118715219, + "grad_norm": 1.3779772677622086, + "learning_rate": 3.0130685237726764e-06, + "loss": 0.3638, + "step": 7010 + }, + { + "epoch": 0.09053566937714813, + "grad_norm": 1.3474598228002677, + "learning_rate": 3.0173673802768467e-06, + "loss": 0.3728, + "step": 7020 + }, + { + "epoch": 0.09066463756714406, + "grad_norm": 1.3982574952161089, + "learning_rate": 3.021666236781016e-06, + "loss": 0.3685, + "step": 7030 + }, + { + "epoch": 0.09079360575714, + "grad_norm": 1.3757511936384357, + "learning_rate": 3.0259650932851865e-06, + "loss": 0.355, + "step": 7040 + }, + { + "epoch": 0.09092257394713595, + "grad_norm": 1.489586273906956, + "learning_rate": 3.030263949789356e-06, + "loss": 0.3568, + "step": 7050 + }, + { + "epoch": 0.09105154213713187, + "grad_norm": 1.3212638519591529, + "learning_rate": 3.0345628062935262e-06, + "loss": 0.3669, + "step": 7060 + }, + { + "epoch": 0.09118051032712782, + "grad_norm": 1.4500030295538573, + "learning_rate": 3.0388616627976957e-06, + "loss": 0.3673, + "step": 7070 + }, + { + "epoch": 0.09130947851712375, + "grad_norm": 1.4119375610608031, + "learning_rate": 3.043160519301866e-06, + "loss": 0.3654, + "step": 7080 + }, + { + "epoch": 0.09143844670711969, + "grad_norm": 1.2456528095130055, + "learning_rate": 3.0474593758060354e-06, + "loss": 0.353, + "step": 7090 + }, + { + "epoch": 0.09156741489711563, + "grad_norm": 1.2679472332763717, + "learning_rate": 3.0517582323102057e-06, + "loss": 0.3545, + "step": 7100 + }, + { + "epoch": 0.09169638308711156, + "grad_norm": 1.4275600850344494, + "learning_rate": 3.056057088814375e-06, + "loss": 0.3721, + "step": 7110 + }, + { + "epoch": 0.0918253512771075, + "grad_norm": 1.4204705758734462, + "learning_rate": 3.0603559453185455e-06, + "loss": 0.3604, + "step": 7120 + }, + { + "epoch": 0.09195431946710345, + "grad_norm": 1.309664899081989, + "learning_rate": 3.064654801822716e-06, + "loss": 0.3754, + "step": 7130 + }, + { + "epoch": 0.09208328765709937, + "grad_norm": 1.3592184178942055, + "learning_rate": 3.0689536583268853e-06, + "loss": 0.357, + "step": 7140 + }, + { + "epoch": 0.09221225584709532, + "grad_norm": 1.4014552209506874, + "learning_rate": 3.0732525148310556e-06, + "loss": 0.3675, + "step": 7150 + }, + { + "epoch": 0.09234122403709125, + "grad_norm": 1.527622327418022, + "learning_rate": 3.077551371335225e-06, + "loss": 0.3486, + "step": 7160 + }, + { + "epoch": 0.09247019222708719, + "grad_norm": 1.3710481513526662, + "learning_rate": 3.0818502278393953e-06, + "loss": 0.3617, + "step": 7170 + }, + { + "epoch": 0.09259916041708313, + "grad_norm": 1.2544264237361034, + "learning_rate": 3.0861490843435648e-06, + "loss": 0.3607, + "step": 7180 + }, + { + "epoch": 0.09272812860707906, + "grad_norm": 1.32469074304581, + "learning_rate": 3.090447940847735e-06, + "loss": 0.3542, + "step": 7190 + }, + { + "epoch": 0.092857096797075, + "grad_norm": 1.3228806198693779, + "learning_rate": 3.0947467973519045e-06, + "loss": 0.3474, + "step": 7200 + }, + { + "epoch": 0.09298606498707093, + "grad_norm": 1.3484781937583443, + "learning_rate": 3.099045653856075e-06, + "loss": 0.3625, + "step": 7210 + }, + { + "epoch": 0.09311503317706687, + "grad_norm": 1.3860442253373613, + "learning_rate": 3.1033445103602443e-06, + "loss": 0.3664, + "step": 7220 + }, + { + "epoch": 0.09324400136706282, + "grad_norm": 1.4114855863157465, + "learning_rate": 3.1076433668644146e-06, + "loss": 0.3698, + "step": 7230 + }, + { + "epoch": 0.09337296955705875, + "grad_norm": 1.5277793703810099, + "learning_rate": 3.111942223368584e-06, + "loss": 0.351, + "step": 7240 + }, + { + "epoch": 0.09350193774705469, + "grad_norm": 1.4914720725304966, + "learning_rate": 3.1162410798727543e-06, + "loss": 0.3565, + "step": 7250 + }, + { + "epoch": 0.09363090593705063, + "grad_norm": 1.4896525596683177, + "learning_rate": 3.1205399363769238e-06, + "loss": 0.3616, + "step": 7260 + }, + { + "epoch": 0.09375987412704656, + "grad_norm": 1.3393854982169349, + "learning_rate": 3.124838792881094e-06, + "loss": 0.3595, + "step": 7270 + }, + { + "epoch": 0.0938888423170425, + "grad_norm": 1.3196894820995209, + "learning_rate": 3.129137649385264e-06, + "loss": 0.3608, + "step": 7280 + }, + { + "epoch": 0.09401781050703843, + "grad_norm": 1.37563550242186, + "learning_rate": 3.133436505889434e-06, + "loss": 0.3522, + "step": 7290 + }, + { + "epoch": 0.09414677869703438, + "grad_norm": 1.3113876290713882, + "learning_rate": 3.1377353623936037e-06, + "loss": 0.3545, + "step": 7300 + }, + { + "epoch": 0.09427574688703032, + "grad_norm": 1.3013829899020222, + "learning_rate": 3.1420342188977736e-06, + "loss": 0.3483, + "step": 7310 + }, + { + "epoch": 0.09440471507702625, + "grad_norm": 1.346094815922994, + "learning_rate": 3.1463330754019435e-06, + "loss": 0.3634, + "step": 7320 + }, + { + "epoch": 0.09453368326702219, + "grad_norm": 1.4317227331803961, + "learning_rate": 3.1506319319061133e-06, + "loss": 0.3494, + "step": 7330 + }, + { + "epoch": 0.09466265145701813, + "grad_norm": 1.2054712383203576, + "learning_rate": 3.1549307884102832e-06, + "loss": 0.3647, + "step": 7340 + }, + { + "epoch": 0.09479161964701406, + "grad_norm": 1.2278154112768265, + "learning_rate": 3.159229644914453e-06, + "loss": 0.3608, + "step": 7350 + }, + { + "epoch": 0.09492058783701, + "grad_norm": 1.314597406105844, + "learning_rate": 3.163528501418623e-06, + "loss": 0.3565, + "step": 7360 + }, + { + "epoch": 0.09504955602700593, + "grad_norm": 1.5673186795406797, + "learning_rate": 3.167827357922793e-06, + "loss": 0.3699, + "step": 7370 + }, + { + "epoch": 0.09517852421700188, + "grad_norm": 1.5138215713147822, + "learning_rate": 3.1721262144269627e-06, + "loss": 0.374, + "step": 7380 + }, + { + "epoch": 0.09530749240699782, + "grad_norm": 1.408899741501263, + "learning_rate": 3.176425070931133e-06, + "loss": 0.3598, + "step": 7390 + }, + { + "epoch": 0.09543646059699375, + "grad_norm": 1.2610977945242632, + "learning_rate": 3.1807239274353025e-06, + "loss": 0.3737, + "step": 7400 + }, + { + "epoch": 0.09556542878698969, + "grad_norm": 1.38062269192855, + "learning_rate": 3.1850227839394728e-06, + "loss": 0.3572, + "step": 7410 + }, + { + "epoch": 0.09569439697698563, + "grad_norm": 1.3033223687849382, + "learning_rate": 3.1893216404436422e-06, + "loss": 0.3779, + "step": 7420 + }, + { + "epoch": 0.09582336516698156, + "grad_norm": 1.311963378327743, + "learning_rate": 3.1936204969478125e-06, + "loss": 0.3567, + "step": 7430 + }, + { + "epoch": 0.0959523333569775, + "grad_norm": 1.2644005252828452, + "learning_rate": 3.197919353451982e-06, + "loss": 0.3542, + "step": 7440 + }, + { + "epoch": 0.09608130154697343, + "grad_norm": 1.300206407468459, + "learning_rate": 3.2022182099561523e-06, + "loss": 0.3465, + "step": 7450 + }, + { + "epoch": 0.09621026973696938, + "grad_norm": 1.3265377212283729, + "learning_rate": 3.2065170664603217e-06, + "loss": 0.3854, + "step": 7460 + }, + { + "epoch": 0.09633923792696532, + "grad_norm": 1.4147178215451364, + "learning_rate": 3.210815922964492e-06, + "loss": 0.3609, + "step": 7470 + }, + { + "epoch": 0.09646820611696125, + "grad_norm": 1.3635764440642786, + "learning_rate": 3.2151147794686615e-06, + "loss": 0.3557, + "step": 7480 + }, + { + "epoch": 0.09659717430695719, + "grad_norm": 1.3830472712833006, + "learning_rate": 3.219413635972832e-06, + "loss": 0.3671, + "step": 7490 + }, + { + "epoch": 0.09672614249695313, + "grad_norm": 1.4800343402834253, + "learning_rate": 3.2237124924770013e-06, + "loss": 0.3743, + "step": 7500 + }, + { + "epoch": 0.09685511068694906, + "grad_norm": 1.55232285975204, + "learning_rate": 3.2280113489811716e-06, + "loss": 0.3744, + "step": 7510 + }, + { + "epoch": 0.096984078876945, + "grad_norm": 1.3015640013468435, + "learning_rate": 3.232310205485341e-06, + "loss": 0.3554, + "step": 7520 + }, + { + "epoch": 0.09711304706694093, + "grad_norm": 1.5528939007496503, + "learning_rate": 3.2366090619895113e-06, + "loss": 0.3533, + "step": 7530 + }, + { + "epoch": 0.09724201525693688, + "grad_norm": 1.3572412678553085, + "learning_rate": 3.2409079184936808e-06, + "loss": 0.3551, + "step": 7540 + }, + { + "epoch": 0.09737098344693282, + "grad_norm": 1.2279412882955034, + "learning_rate": 3.245206774997851e-06, + "loss": 0.3395, + "step": 7550 + }, + { + "epoch": 0.09749995163692875, + "grad_norm": 1.4447605253893394, + "learning_rate": 3.2495056315020205e-06, + "loss": 0.3721, + "step": 7560 + }, + { + "epoch": 0.09762891982692469, + "grad_norm": 1.5211520538690706, + "learning_rate": 3.253804488006191e-06, + "loss": 0.3621, + "step": 7570 + }, + { + "epoch": 0.09775788801692063, + "grad_norm": 1.359370279468202, + "learning_rate": 3.2581033445103603e-06, + "loss": 0.3739, + "step": 7580 + }, + { + "epoch": 0.09788685620691656, + "grad_norm": 1.3937972533577094, + "learning_rate": 3.2624022010145306e-06, + "loss": 0.3682, + "step": 7590 + }, + { + "epoch": 0.0980158243969125, + "grad_norm": 1.3876694523451014, + "learning_rate": 3.2667010575187004e-06, + "loss": 0.3575, + "step": 7600 + }, + { + "epoch": 0.09814479258690843, + "grad_norm": 1.2879126436193575, + "learning_rate": 3.2709999140228703e-06, + "loss": 0.3478, + "step": 7610 + }, + { + "epoch": 0.09827376077690438, + "grad_norm": 1.3541678236256152, + "learning_rate": 3.27529877052704e-06, + "loss": 0.3644, + "step": 7620 + }, + { + "epoch": 0.09840272896690032, + "grad_norm": 1.3322826798658973, + "learning_rate": 3.27959762703121e-06, + "loss": 0.3669, + "step": 7630 + }, + { + "epoch": 0.09853169715689625, + "grad_norm": 1.3063447237769439, + "learning_rate": 3.28389648353538e-06, + "loss": 0.3805, + "step": 7640 + }, + { + "epoch": 0.09866066534689219, + "grad_norm": 1.3408475480202877, + "learning_rate": 3.28819534003955e-06, + "loss": 0.3457, + "step": 7650 + }, + { + "epoch": 0.09878963353688813, + "grad_norm": 1.356559160825962, + "learning_rate": 3.2924941965437197e-06, + "loss": 0.3521, + "step": 7660 + }, + { + "epoch": 0.09891860172688406, + "grad_norm": 1.3385958073972604, + "learning_rate": 3.2967930530478896e-06, + "loss": 0.3582, + "step": 7670 + }, + { + "epoch": 0.09904756991688, + "grad_norm": 1.4726700903236545, + "learning_rate": 3.3010919095520595e-06, + "loss": 0.3583, + "step": 7680 + }, + { + "epoch": 0.09917653810687593, + "grad_norm": 1.1880652610533782, + "learning_rate": 3.3053907660562298e-06, + "loss": 0.363, + "step": 7690 + }, + { + "epoch": 0.09930550629687188, + "grad_norm": 1.3094423677782576, + "learning_rate": 3.3096896225603992e-06, + "loss": 0.3673, + "step": 7700 + }, + { + "epoch": 0.09943447448686782, + "grad_norm": 1.3237206546590072, + "learning_rate": 3.3139884790645695e-06, + "loss": 0.3643, + "step": 7710 + }, + { + "epoch": 0.09956344267686375, + "grad_norm": 1.3300964294815023, + "learning_rate": 3.318287335568739e-06, + "loss": 0.3414, + "step": 7720 + }, + { + "epoch": 0.09969241086685969, + "grad_norm": 1.4457825974574148, + "learning_rate": 3.3225861920729093e-06, + "loss": 0.3648, + "step": 7730 + }, + { + "epoch": 0.09982137905685563, + "grad_norm": 1.2636600650898413, + "learning_rate": 3.3268850485770787e-06, + "loss": 0.3667, + "step": 7740 + }, + { + "epoch": 0.09995034724685156, + "grad_norm": 1.1994322403559867, + "learning_rate": 3.331183905081249e-06, + "loss": 0.358, + "step": 7750 + }, + { + "epoch": 0.1000793154368475, + "grad_norm": 1.3917739496465422, + "learning_rate": 3.3354827615854185e-06, + "loss": 0.3594, + "step": 7760 + }, + { + "epoch": 0.10020828362684343, + "grad_norm": 1.3221822662964575, + "learning_rate": 3.3397816180895888e-06, + "loss": 0.3616, + "step": 7770 + }, + { + "epoch": 0.10033725181683938, + "grad_norm": 1.4224203408427882, + "learning_rate": 3.3440804745937582e-06, + "loss": 0.3514, + "step": 7780 + }, + { + "epoch": 0.10046622000683532, + "grad_norm": 1.4619885248865152, + "learning_rate": 3.3483793310979285e-06, + "loss": 0.3512, + "step": 7790 + }, + { + "epoch": 0.10059518819683125, + "grad_norm": 1.2122578082649924, + "learning_rate": 3.352678187602098e-06, + "loss": 0.3527, + "step": 7800 + }, + { + "epoch": 0.10072415638682719, + "grad_norm": 1.4505843463851877, + "learning_rate": 3.3569770441062683e-06, + "loss": 0.3625, + "step": 7810 + }, + { + "epoch": 0.10085312457682313, + "grad_norm": 1.385452968023475, + "learning_rate": 3.3612759006104377e-06, + "loss": 0.3445, + "step": 7820 + }, + { + "epoch": 0.10098209276681906, + "grad_norm": 1.3613745229337175, + "learning_rate": 3.365574757114608e-06, + "loss": 0.3755, + "step": 7830 + }, + { + "epoch": 0.101111060956815, + "grad_norm": 1.428355033867981, + "learning_rate": 3.3698736136187775e-06, + "loss": 0.3587, + "step": 7840 + }, + { + "epoch": 0.10124002914681093, + "grad_norm": 1.3680434681902536, + "learning_rate": 3.374172470122948e-06, + "loss": 0.3753, + "step": 7850 + }, + { + "epoch": 0.10136899733680688, + "grad_norm": 1.3783203156069979, + "learning_rate": 3.3784713266271172e-06, + "loss": 0.3716, + "step": 7860 + }, + { + "epoch": 0.10149796552680282, + "grad_norm": 1.4073386684045623, + "learning_rate": 3.3827701831312875e-06, + "loss": 0.3556, + "step": 7870 + }, + { + "epoch": 0.10162693371679875, + "grad_norm": 1.4070855088273508, + "learning_rate": 3.387069039635457e-06, + "loss": 0.3593, + "step": 7880 + }, + { + "epoch": 0.10175590190679469, + "grad_norm": 1.4337866679161064, + "learning_rate": 3.3913678961396273e-06, + "loss": 0.3597, + "step": 7890 + }, + { + "epoch": 0.10188487009679063, + "grad_norm": 1.4374970478215678, + "learning_rate": 3.395666752643797e-06, + "loss": 0.3557, + "step": 7900 + }, + { + "epoch": 0.10201383828678656, + "grad_norm": 1.527263385766843, + "learning_rate": 3.399965609147967e-06, + "loss": 0.3577, + "step": 7910 + }, + { + "epoch": 0.1021428064767825, + "grad_norm": 1.3727307998418787, + "learning_rate": 3.404264465652137e-06, + "loss": 0.353, + "step": 7920 + }, + { + "epoch": 0.10227177466677843, + "grad_norm": 1.2912395339945413, + "learning_rate": 3.408563322156307e-06, + "loss": 0.338, + "step": 7930 + }, + { + "epoch": 0.10240074285677438, + "grad_norm": 1.2590358131661998, + "learning_rate": 3.4128621786604767e-06, + "loss": 0.3563, + "step": 7940 + }, + { + "epoch": 0.10252971104677032, + "grad_norm": 1.3036778002754008, + "learning_rate": 3.4171610351646466e-06, + "loss": 0.3503, + "step": 7950 + }, + { + "epoch": 0.10265867923676625, + "grad_norm": 1.4110914296931734, + "learning_rate": 3.4214598916688164e-06, + "loss": 0.3403, + "step": 7960 + }, + { + "epoch": 0.10278764742676219, + "grad_norm": 1.4176938645691344, + "learning_rate": 3.4257587481729863e-06, + "loss": 0.3682, + "step": 7970 + }, + { + "epoch": 0.10291661561675812, + "grad_norm": 1.2657249363982037, + "learning_rate": 3.430057604677156e-06, + "loss": 0.3705, + "step": 7980 + }, + { + "epoch": 0.10304558380675406, + "grad_norm": 1.3302705018560559, + "learning_rate": 3.434356461181326e-06, + "loss": 0.3548, + "step": 7990 + }, + { + "epoch": 0.10317455199675, + "grad_norm": 1.3872840605978156, + "learning_rate": 3.438655317685496e-06, + "loss": 0.3606, + "step": 8000 + }, + { + "epoch": 0.10330352018674593, + "grad_norm": 1.486413656159866, + "learning_rate": 3.4429541741896662e-06, + "loss": 0.3688, + "step": 8010 + }, + { + "epoch": 0.10343248837674188, + "grad_norm": 1.4634738858949878, + "learning_rate": 3.4472530306938357e-06, + "loss": 0.3524, + "step": 8020 + }, + { + "epoch": 0.10356145656673782, + "grad_norm": 1.3589510107285174, + "learning_rate": 3.451551887198006e-06, + "loss": 0.3681, + "step": 8030 + }, + { + "epoch": 0.10369042475673375, + "grad_norm": 1.4552768451997429, + "learning_rate": 3.4558507437021755e-06, + "loss": 0.3337, + "step": 8040 + }, + { + "epoch": 0.10381939294672969, + "grad_norm": 1.43801598462378, + "learning_rate": 3.4601496002063458e-06, + "loss": 0.356, + "step": 8050 + }, + { + "epoch": 0.10394836113672562, + "grad_norm": 1.4862277759744522, + "learning_rate": 3.464448456710515e-06, + "loss": 0.37, + "step": 8060 + }, + { + "epoch": 0.10407732932672156, + "grad_norm": 1.321660848095465, + "learning_rate": 3.4687473132146855e-06, + "loss": 0.3763, + "step": 8070 + }, + { + "epoch": 0.1042062975167175, + "grad_norm": 1.3130720501221818, + "learning_rate": 3.473046169718855e-06, + "loss": 0.352, + "step": 8080 + }, + { + "epoch": 0.10433526570671343, + "grad_norm": 1.562641504223917, + "learning_rate": 3.4773450262230253e-06, + "loss": 0.3488, + "step": 8090 + }, + { + "epoch": 0.10446423389670938, + "grad_norm": 1.366479194591602, + "learning_rate": 3.4816438827271947e-06, + "loss": 0.3591, + "step": 8100 + }, + { + "epoch": 0.10459320208670532, + "grad_norm": 1.4817352552801033, + "learning_rate": 3.485942739231365e-06, + "loss": 0.3496, + "step": 8110 + }, + { + "epoch": 0.10472217027670125, + "grad_norm": 1.3534453272238158, + "learning_rate": 3.4902415957355345e-06, + "loss": 0.355, + "step": 8120 + }, + { + "epoch": 0.10485113846669719, + "grad_norm": 1.3157822756392659, + "learning_rate": 3.4945404522397048e-06, + "loss": 0.3513, + "step": 8130 + }, + { + "epoch": 0.10498010665669312, + "grad_norm": 1.4137792064032548, + "learning_rate": 3.4988393087438742e-06, + "loss": 0.3577, + "step": 8140 + }, + { + "epoch": 0.10510907484668906, + "grad_norm": 1.2420453089123562, + "learning_rate": 3.5031381652480445e-06, + "loss": 0.3471, + "step": 8150 + }, + { + "epoch": 0.105238043036685, + "grad_norm": 1.3957281241099737, + "learning_rate": 3.507437021752214e-06, + "loss": 0.3638, + "step": 8160 + }, + { + "epoch": 0.10536701122668093, + "grad_norm": 1.4205545735760448, + "learning_rate": 3.5117358782563843e-06, + "loss": 0.3605, + "step": 8170 + }, + { + "epoch": 0.10549597941667688, + "grad_norm": 1.395627577962588, + "learning_rate": 3.5160347347605537e-06, + "loss": 0.3431, + "step": 8180 + }, + { + "epoch": 0.10562494760667282, + "grad_norm": 1.3809921073835498, + "learning_rate": 3.520333591264724e-06, + "loss": 0.3411, + "step": 8190 + }, + { + "epoch": 0.10575391579666875, + "grad_norm": 1.3456925172591323, + "learning_rate": 3.5246324477688935e-06, + "loss": 0.3351, + "step": 8200 + }, + { + "epoch": 0.10588288398666469, + "grad_norm": 1.284325279791479, + "learning_rate": 3.5289313042730638e-06, + "loss": 0.3569, + "step": 8210 + }, + { + "epoch": 0.10601185217666062, + "grad_norm": 1.4500019653445755, + "learning_rate": 3.5332301607772337e-06, + "loss": 0.3494, + "step": 8220 + }, + { + "epoch": 0.10614082036665656, + "grad_norm": 1.4081099793958147, + "learning_rate": 3.5375290172814035e-06, + "loss": 0.3519, + "step": 8230 + }, + { + "epoch": 0.1062697885566525, + "grad_norm": 1.4942870471807865, + "learning_rate": 3.5418278737855734e-06, + "loss": 0.348, + "step": 8240 + }, + { + "epoch": 0.10639875674664843, + "grad_norm": 1.2885294800965899, + "learning_rate": 3.5461267302897433e-06, + "loss": 0.3449, + "step": 8250 + }, + { + "epoch": 0.10652772493664438, + "grad_norm": 1.3658238342187146, + "learning_rate": 3.550425586793913e-06, + "loss": 0.3618, + "step": 8260 + }, + { + "epoch": 0.10665669312664032, + "grad_norm": 1.5375813865508698, + "learning_rate": 3.554724443298083e-06, + "loss": 0.3757, + "step": 8270 + }, + { + "epoch": 0.10678566131663625, + "grad_norm": 1.297668409035843, + "learning_rate": 3.559023299802253e-06, + "loss": 0.3447, + "step": 8280 + }, + { + "epoch": 0.10691462950663219, + "grad_norm": 1.48724177940339, + "learning_rate": 3.563322156306423e-06, + "loss": 0.3484, + "step": 8290 + }, + { + "epoch": 0.10704359769662812, + "grad_norm": 1.297460470346068, + "learning_rate": 3.5676210128105927e-06, + "loss": 0.3554, + "step": 8300 + }, + { + "epoch": 0.10717256588662406, + "grad_norm": 1.3533942667925203, + "learning_rate": 3.5719198693147626e-06, + "loss": 0.3653, + "step": 8310 + }, + { + "epoch": 0.10730153407662, + "grad_norm": 1.424711986195457, + "learning_rate": 3.5762187258189324e-06, + "loss": 0.3398, + "step": 8320 + }, + { + "epoch": 0.10743050226661593, + "grad_norm": 1.458673701419987, + "learning_rate": 3.5805175823231027e-06, + "loss": 0.3456, + "step": 8330 + }, + { + "epoch": 0.10755947045661188, + "grad_norm": 1.410311278515434, + "learning_rate": 3.584816438827272e-06, + "loss": 0.3702, + "step": 8340 + }, + { + "epoch": 0.10768843864660782, + "grad_norm": 1.4908726172325717, + "learning_rate": 3.5891152953314425e-06, + "loss": 0.3493, + "step": 8350 + }, + { + "epoch": 0.10781740683660375, + "grad_norm": 1.4267511751312887, + "learning_rate": 3.593414151835612e-06, + "loss": 0.3385, + "step": 8360 + }, + { + "epoch": 0.10794637502659969, + "grad_norm": 1.2405559081688582, + "learning_rate": 3.5977130083397822e-06, + "loss": 0.3437, + "step": 8370 + }, + { + "epoch": 0.10807534321659562, + "grad_norm": 1.3660608121770506, + "learning_rate": 3.6020118648439517e-06, + "loss": 0.3438, + "step": 8380 + }, + { + "epoch": 0.10820431140659156, + "grad_norm": 1.3001694671107664, + "learning_rate": 3.606310721348122e-06, + "loss": 0.3495, + "step": 8390 + }, + { + "epoch": 0.1083332795965875, + "grad_norm": 1.4196145156634437, + "learning_rate": 3.6106095778522914e-06, + "loss": 0.3429, + "step": 8400 + }, + { + "epoch": 0.10846224778658344, + "grad_norm": 1.3267434399799627, + "learning_rate": 3.6149084343564617e-06, + "loss": 0.3458, + "step": 8410 + }, + { + "epoch": 0.10859121597657938, + "grad_norm": 1.3623791801936223, + "learning_rate": 3.619207290860631e-06, + "loss": 0.3401, + "step": 8420 + }, + { + "epoch": 0.10872018416657532, + "grad_norm": 1.4192593044089183, + "learning_rate": 3.6235061473648015e-06, + "loss": 0.3498, + "step": 8430 + }, + { + "epoch": 0.10884915235657125, + "grad_norm": 1.342427979551147, + "learning_rate": 3.627805003868971e-06, + "loss": 0.3559, + "step": 8440 + }, + { + "epoch": 0.10897812054656719, + "grad_norm": 1.2783112434775956, + "learning_rate": 3.6321038603731413e-06, + "loss": 0.3531, + "step": 8450 + }, + { + "epoch": 0.10910708873656312, + "grad_norm": 1.2529163801790275, + "learning_rate": 3.6364027168773107e-06, + "loss": 0.3401, + "step": 8460 + }, + { + "epoch": 0.10923605692655906, + "grad_norm": 1.5187552036393102, + "learning_rate": 3.640701573381481e-06, + "loss": 0.3561, + "step": 8470 + }, + { + "epoch": 0.109365025116555, + "grad_norm": 1.40353741506317, + "learning_rate": 3.6450004298856505e-06, + "loss": 0.3335, + "step": 8480 + }, + { + "epoch": 0.10949399330655094, + "grad_norm": 1.3228877437283275, + "learning_rate": 3.6492992863898208e-06, + "loss": 0.3468, + "step": 8490 + }, + { + "epoch": 0.10962296149654688, + "grad_norm": 1.3316371106709166, + "learning_rate": 3.6535981428939902e-06, + "loss": 0.3468, + "step": 8500 + }, + { + "epoch": 0.10975192968654282, + "grad_norm": 1.3136328159652029, + "learning_rate": 3.6578969993981605e-06, + "loss": 0.3442, + "step": 8510 + }, + { + "epoch": 0.10988089787653875, + "grad_norm": 1.3828196519803948, + "learning_rate": 3.66219585590233e-06, + "loss": 0.3564, + "step": 8520 + }, + { + "epoch": 0.11000986606653469, + "grad_norm": 1.289906646108839, + "learning_rate": 3.6664947124065003e-06, + "loss": 0.3584, + "step": 8530 + }, + { + "epoch": 0.11013883425653062, + "grad_norm": 1.4554555787854808, + "learning_rate": 3.67079356891067e-06, + "loss": 0.3318, + "step": 8540 + }, + { + "epoch": 0.11026780244652656, + "grad_norm": 1.3901610683542918, + "learning_rate": 3.67509242541484e-06, + "loss": 0.3468, + "step": 8550 + }, + { + "epoch": 0.1103967706365225, + "grad_norm": 1.3728897758065879, + "learning_rate": 3.67939128191901e-06, + "loss": 0.3511, + "step": 8560 + }, + { + "epoch": 0.11052573882651844, + "grad_norm": 1.3898850471481747, + "learning_rate": 3.6836901384231798e-06, + "loss": 0.3437, + "step": 8570 + }, + { + "epoch": 0.11065470701651438, + "grad_norm": 1.3710301554061446, + "learning_rate": 3.6879889949273497e-06, + "loss": 0.3671, + "step": 8580 + }, + { + "epoch": 0.11078367520651032, + "grad_norm": 1.3377561911794946, + "learning_rate": 3.6922878514315195e-06, + "loss": 0.3383, + "step": 8590 + }, + { + "epoch": 0.11091264339650625, + "grad_norm": 1.443238339977118, + "learning_rate": 3.6965867079356894e-06, + "loss": 0.3665, + "step": 8600 + }, + { + "epoch": 0.11104161158650219, + "grad_norm": 1.5255212225143433, + "learning_rate": 3.7008855644398593e-06, + "loss": 0.3641, + "step": 8610 + }, + { + "epoch": 0.11117057977649812, + "grad_norm": 1.4315431002660928, + "learning_rate": 3.705184420944029e-06, + "loss": 0.3535, + "step": 8620 + }, + { + "epoch": 0.11129954796649406, + "grad_norm": 1.5374928762291034, + "learning_rate": 3.7094832774481995e-06, + "loss": 0.3477, + "step": 8630 + }, + { + "epoch": 0.11142851615649, + "grad_norm": 1.3836783240866817, + "learning_rate": 3.713782133952369e-06, + "loss": 0.338, + "step": 8640 + }, + { + "epoch": 0.11155748434648594, + "grad_norm": 1.5112126589347485, + "learning_rate": 3.7180809904565392e-06, + "loss": 0.3617, + "step": 8650 + }, + { + "epoch": 0.11168645253648188, + "grad_norm": 1.1838960557533813, + "learning_rate": 3.7223798469607087e-06, + "loss": 0.3293, + "step": 8660 + }, + { + "epoch": 0.11181542072647782, + "grad_norm": 1.2542895628231088, + "learning_rate": 3.726678703464879e-06, + "loss": 0.3658, + "step": 8670 + }, + { + "epoch": 0.11194438891647375, + "grad_norm": 1.5394331574759905, + "learning_rate": 3.7309775599690484e-06, + "loss": 0.3554, + "step": 8680 + }, + { + "epoch": 0.11207335710646969, + "grad_norm": 1.3391576725687042, + "learning_rate": 3.7352764164732187e-06, + "loss": 0.3551, + "step": 8690 + }, + { + "epoch": 0.11220232529646562, + "grad_norm": 1.3367951556134554, + "learning_rate": 3.739575272977388e-06, + "loss": 0.3475, + "step": 8700 + }, + { + "epoch": 0.11233129348646156, + "grad_norm": 1.4767605557748158, + "learning_rate": 3.7438741294815585e-06, + "loss": 0.3612, + "step": 8710 + }, + { + "epoch": 0.1124602616764575, + "grad_norm": 1.4448337357082024, + "learning_rate": 3.748172985985728e-06, + "loss": 0.356, + "step": 8720 + }, + { + "epoch": 0.11258922986645344, + "grad_norm": 1.3696800263827522, + "learning_rate": 3.7524718424898982e-06, + "loss": 0.3458, + "step": 8730 + }, + { + "epoch": 0.11271819805644938, + "grad_norm": 1.3318943396159832, + "learning_rate": 3.7567706989940677e-06, + "loss": 0.3421, + "step": 8740 + }, + { + "epoch": 0.11284716624644531, + "grad_norm": 1.3653363240087075, + "learning_rate": 3.761069555498238e-06, + "loss": 0.3404, + "step": 8750 + }, + { + "epoch": 0.11297613443644125, + "grad_norm": 1.3505071480225503, + "learning_rate": 3.7653684120024074e-06, + "loss": 0.349, + "step": 8760 + }, + { + "epoch": 0.11310510262643719, + "grad_norm": 1.378757100328975, + "learning_rate": 3.7696672685065777e-06, + "loss": 0.3415, + "step": 8770 + }, + { + "epoch": 0.11323407081643312, + "grad_norm": 1.528599747680683, + "learning_rate": 3.773966125010747e-06, + "loss": 0.344, + "step": 8780 + }, + { + "epoch": 0.11336303900642906, + "grad_norm": 1.3582903310394987, + "learning_rate": 3.7782649815149175e-06, + "loss": 0.3466, + "step": 8790 + }, + { + "epoch": 0.11349200719642501, + "grad_norm": 1.2637537862621353, + "learning_rate": 3.782563838019087e-06, + "loss": 0.3344, + "step": 8800 + }, + { + "epoch": 0.11362097538642094, + "grad_norm": 1.2956596171003023, + "learning_rate": 3.7868626945232572e-06, + "loss": 0.349, + "step": 8810 + }, + { + "epoch": 0.11374994357641688, + "grad_norm": 1.2962683959880987, + "learning_rate": 3.7911615510274267e-06, + "loss": 0.3463, + "step": 8820 + }, + { + "epoch": 0.11387891176641281, + "grad_norm": 1.2820100249660265, + "learning_rate": 3.795460407531597e-06, + "loss": 0.3301, + "step": 8830 + }, + { + "epoch": 0.11400787995640875, + "grad_norm": 1.3164171818987531, + "learning_rate": 3.799759264035767e-06, + "loss": 0.3428, + "step": 8840 + }, + { + "epoch": 0.11413684814640469, + "grad_norm": 1.4632437754076066, + "learning_rate": 3.8040581205399368e-06, + "loss": 0.3535, + "step": 8850 + }, + { + "epoch": 0.11426581633640062, + "grad_norm": 1.3360140761384458, + "learning_rate": 3.8083569770441066e-06, + "loss": 0.364, + "step": 8860 + }, + { + "epoch": 0.11439478452639656, + "grad_norm": 1.2594229379668844, + "learning_rate": 3.8126558335482765e-06, + "loss": 0.3337, + "step": 8870 + }, + { + "epoch": 0.11452375271639251, + "grad_norm": 1.3020120991345434, + "learning_rate": 3.816954690052446e-06, + "loss": 0.3546, + "step": 8880 + }, + { + "epoch": 0.11465272090638844, + "grad_norm": 1.5364135470250286, + "learning_rate": 3.821253546556617e-06, + "loss": 0.3438, + "step": 8890 + }, + { + "epoch": 0.11478168909638438, + "grad_norm": 1.2577578020789462, + "learning_rate": 3.825552403060786e-06, + "loss": 0.3465, + "step": 8900 + }, + { + "epoch": 0.11491065728638031, + "grad_norm": 1.344742913363827, + "learning_rate": 3.8298512595649564e-06, + "loss": 0.3655, + "step": 8910 + }, + { + "epoch": 0.11503962547637625, + "grad_norm": 1.4253844336349306, + "learning_rate": 3.834150116069126e-06, + "loss": 0.3521, + "step": 8920 + }, + { + "epoch": 0.1151685936663722, + "grad_norm": 1.234107994374098, + "learning_rate": 3.838448972573296e-06, + "loss": 0.3369, + "step": 8930 + }, + { + "epoch": 0.11529756185636812, + "grad_norm": 1.3637613819212915, + "learning_rate": 3.842747829077466e-06, + "loss": 0.3318, + "step": 8940 + }, + { + "epoch": 0.11542653004636406, + "grad_norm": 1.281120082747489, + "learning_rate": 3.847046685581636e-06, + "loss": 0.3312, + "step": 8950 + }, + { + "epoch": 0.11555549823636001, + "grad_norm": 1.3057094340370676, + "learning_rate": 3.851345542085805e-06, + "loss": 0.3497, + "step": 8960 + }, + { + "epoch": 0.11568446642635594, + "grad_norm": 1.4064732659284713, + "learning_rate": 3.855644398589976e-06, + "loss": 0.3327, + "step": 8970 + }, + { + "epoch": 0.11581343461635188, + "grad_norm": 1.3706857979745517, + "learning_rate": 3.859943255094145e-06, + "loss": 0.3509, + "step": 8980 + }, + { + "epoch": 0.11594240280634781, + "grad_norm": 1.4836803584059282, + "learning_rate": 3.8642421115983155e-06, + "loss": 0.3458, + "step": 8990 + }, + { + "epoch": 0.11607137099634375, + "grad_norm": 1.3859651634626948, + "learning_rate": 3.868540968102485e-06, + "loss": 0.3442, + "step": 9000 + }, + { + "epoch": 0.1162003391863397, + "grad_norm": 1.4925269574191522, + "learning_rate": 3.872839824606655e-06, + "loss": 0.3452, + "step": 9010 + }, + { + "epoch": 0.11632930737633562, + "grad_norm": 1.4573166517663718, + "learning_rate": 3.877138681110825e-06, + "loss": 0.3466, + "step": 9020 + }, + { + "epoch": 0.11645827556633156, + "grad_norm": 1.2418174755582403, + "learning_rate": 3.881437537614995e-06, + "loss": 0.3457, + "step": 9030 + }, + { + "epoch": 0.11658724375632751, + "grad_norm": 1.4044079962433424, + "learning_rate": 3.885736394119164e-06, + "loss": 0.3446, + "step": 9040 + }, + { + "epoch": 0.11671621194632344, + "grad_norm": 1.244184043045155, + "learning_rate": 3.890035250623335e-06, + "loss": 0.3546, + "step": 9050 + }, + { + "epoch": 0.11684518013631938, + "grad_norm": 1.5083164984099635, + "learning_rate": 3.894334107127504e-06, + "loss": 0.335, + "step": 9060 + }, + { + "epoch": 0.11697414832631531, + "grad_norm": 1.309706521280654, + "learning_rate": 3.8986329636316745e-06, + "loss": 0.3449, + "step": 9070 + }, + { + "epoch": 0.11710311651631125, + "grad_norm": 1.2400948789855335, + "learning_rate": 3.902931820135844e-06, + "loss": 0.3431, + "step": 9080 + }, + { + "epoch": 0.1172320847063072, + "grad_norm": 1.369945059497769, + "learning_rate": 3.907230676640014e-06, + "loss": 0.3564, + "step": 9090 + }, + { + "epoch": 0.11736105289630312, + "grad_norm": 1.4015806847034453, + "learning_rate": 3.911529533144184e-06, + "loss": 0.3408, + "step": 9100 + }, + { + "epoch": 0.11749002108629907, + "grad_norm": 1.3781860077844987, + "learning_rate": 3.915828389648354e-06, + "loss": 0.3441, + "step": 9110 + }, + { + "epoch": 0.11761898927629501, + "grad_norm": 1.3937194108484006, + "learning_rate": 3.9201272461525234e-06, + "loss": 0.3379, + "step": 9120 + }, + { + "epoch": 0.11774795746629094, + "grad_norm": 1.2271492005559064, + "learning_rate": 3.924426102656694e-06, + "loss": 0.3359, + "step": 9130 + }, + { + "epoch": 0.11787692565628688, + "grad_norm": 1.277846899789057, + "learning_rate": 3.928724959160863e-06, + "loss": 0.3501, + "step": 9140 + }, + { + "epoch": 0.11800589384628281, + "grad_norm": 1.342696707780398, + "learning_rate": 3.9330238156650335e-06, + "loss": 0.3485, + "step": 9150 + }, + { + "epoch": 0.11813486203627875, + "grad_norm": 1.2890159308505078, + "learning_rate": 3.937322672169203e-06, + "loss": 0.3382, + "step": 9160 + }, + { + "epoch": 0.1182638302262747, + "grad_norm": 1.3203585346246223, + "learning_rate": 3.941621528673373e-06, + "loss": 0.363, + "step": 9170 + }, + { + "epoch": 0.11839279841627062, + "grad_norm": 1.4395622876999323, + "learning_rate": 3.945920385177543e-06, + "loss": 0.3416, + "step": 9180 + }, + { + "epoch": 0.11852176660626657, + "grad_norm": 1.3794403392065466, + "learning_rate": 3.950219241681713e-06, + "loss": 0.339, + "step": 9190 + }, + { + "epoch": 0.11865073479626251, + "grad_norm": 1.1972076429270422, + "learning_rate": 3.9545180981858824e-06, + "loss": 0.3451, + "step": 9200 + }, + { + "epoch": 0.11877970298625844, + "grad_norm": 1.3396899324285398, + "learning_rate": 3.958816954690053e-06, + "loss": 0.3501, + "step": 9210 + }, + { + "epoch": 0.11890867117625438, + "grad_norm": 1.5161561223422286, + "learning_rate": 3.963115811194222e-06, + "loss": 0.3392, + "step": 9220 + }, + { + "epoch": 0.11903763936625031, + "grad_norm": 1.316738842074475, + "learning_rate": 3.9674146676983925e-06, + "loss": 0.3665, + "step": 9230 + }, + { + "epoch": 0.11916660755624625, + "grad_norm": 1.3604633808307909, + "learning_rate": 3.971713524202562e-06, + "loss": 0.3405, + "step": 9240 + }, + { + "epoch": 0.1192955757462422, + "grad_norm": 1.2939680234600548, + "learning_rate": 3.976012380706732e-06, + "loss": 0.3546, + "step": 9250 + }, + { + "epoch": 0.11942454393623812, + "grad_norm": 1.258599022569911, + "learning_rate": 3.980311237210902e-06, + "loss": 0.3419, + "step": 9260 + }, + { + "epoch": 0.11955351212623407, + "grad_norm": 1.4629629854072355, + "learning_rate": 3.984610093715072e-06, + "loss": 0.3512, + "step": 9270 + }, + { + "epoch": 0.11968248031623001, + "grad_norm": 1.4025264932012222, + "learning_rate": 3.9889089502192415e-06, + "loss": 0.3315, + "step": 9280 + }, + { + "epoch": 0.11981144850622594, + "grad_norm": 1.321901871115717, + "learning_rate": 3.993207806723412e-06, + "loss": 0.3486, + "step": 9290 + }, + { + "epoch": 0.11994041669622188, + "grad_norm": 1.3975735434299472, + "learning_rate": 3.997506663227581e-06, + "loss": 0.3448, + "step": 9300 + }, + { + "epoch": 0.12006938488621781, + "grad_norm": 1.4672979555077823, + "learning_rate": 4.0018055197317515e-06, + "loss": 0.3509, + "step": 9310 + }, + { + "epoch": 0.12019835307621375, + "grad_norm": 1.4225096787825824, + "learning_rate": 4.006104376235922e-06, + "loss": 0.3429, + "step": 9320 + }, + { + "epoch": 0.1203273212662097, + "grad_norm": 1.320719771084718, + "learning_rate": 4.010403232740091e-06, + "loss": 0.33, + "step": 9330 + }, + { + "epoch": 0.12045628945620562, + "grad_norm": 1.242187170886375, + "learning_rate": 4.0147020892442616e-06, + "loss": 0.3416, + "step": 9340 + }, + { + "epoch": 0.12058525764620157, + "grad_norm": 1.4650478049047875, + "learning_rate": 4.019000945748431e-06, + "loss": 0.3418, + "step": 9350 + }, + { + "epoch": 0.12071422583619751, + "grad_norm": 1.3601072198060549, + "learning_rate": 4.023299802252601e-06, + "loss": 0.3552, + "step": 9360 + }, + { + "epoch": 0.12084319402619344, + "grad_norm": 1.4789262931920255, + "learning_rate": 4.027598658756771e-06, + "loss": 0.3466, + "step": 9370 + }, + { + "epoch": 0.12097216221618938, + "grad_norm": 1.396325369963004, + "learning_rate": 4.031897515260941e-06, + "loss": 0.3526, + "step": 9380 + }, + { + "epoch": 0.12110113040618531, + "grad_norm": 1.2702081580673756, + "learning_rate": 4.0361963717651105e-06, + "loss": 0.3506, + "step": 9390 + }, + { + "epoch": 0.12123009859618125, + "grad_norm": 1.3410709845026534, + "learning_rate": 4.040495228269281e-06, + "loss": 0.3406, + "step": 9400 + }, + { + "epoch": 0.1213590667861772, + "grad_norm": 1.2929748885113628, + "learning_rate": 4.04479408477345e-06, + "loss": 0.3325, + "step": 9410 + }, + { + "epoch": 0.12148803497617312, + "grad_norm": 1.1923944986989048, + "learning_rate": 4.049092941277621e-06, + "loss": 0.3536, + "step": 9420 + }, + { + "epoch": 0.12161700316616907, + "grad_norm": 1.2847840743582242, + "learning_rate": 4.053391797781791e-06, + "loss": 0.3297, + "step": 9430 + }, + { + "epoch": 0.12174597135616501, + "grad_norm": 1.5421620886744825, + "learning_rate": 4.05769065428596e-06, + "loss": 0.3583, + "step": 9440 + }, + { + "epoch": 0.12187493954616094, + "grad_norm": 1.1791041973665837, + "learning_rate": 4.061989510790131e-06, + "loss": 0.3351, + "step": 9450 + }, + { + "epoch": 0.12200390773615688, + "grad_norm": 1.267664539360928, + "learning_rate": 4.0662883672943e-06, + "loss": 0.3482, + "step": 9460 + }, + { + "epoch": 0.12213287592615281, + "grad_norm": 1.296276306680534, + "learning_rate": 4.07058722379847e-06, + "loss": 0.3364, + "step": 9470 + }, + { + "epoch": 0.12226184411614875, + "grad_norm": 1.4113449464639465, + "learning_rate": 4.07488608030264e-06, + "loss": 0.3435, + "step": 9480 + }, + { + "epoch": 0.1223908123061447, + "grad_norm": 1.3406244398270775, + "learning_rate": 4.07918493680681e-06, + "loss": 0.3392, + "step": 9490 + }, + { + "epoch": 0.12251978049614062, + "grad_norm": 1.2219824969855617, + "learning_rate": 4.08348379331098e-06, + "loss": 0.3271, + "step": 9500 + }, + { + "epoch": 0.12264874868613657, + "grad_norm": 1.3975412519137362, + "learning_rate": 4.08778264981515e-06, + "loss": 0.3526, + "step": 9510 + }, + { + "epoch": 0.12277771687613251, + "grad_norm": 1.3308324932745141, + "learning_rate": 4.092081506319319e-06, + "loss": 0.3361, + "step": 9520 + }, + { + "epoch": 0.12290668506612844, + "grad_norm": 1.2970932792778918, + "learning_rate": 4.09638036282349e-06, + "loss": 0.3465, + "step": 9530 + }, + { + "epoch": 0.12303565325612438, + "grad_norm": 1.494481807734204, + "learning_rate": 4.100679219327659e-06, + "loss": 0.3298, + "step": 9540 + }, + { + "epoch": 0.12316462144612031, + "grad_norm": 1.4527612567980355, + "learning_rate": 4.104978075831829e-06, + "loss": 0.3331, + "step": 9550 + }, + { + "epoch": 0.12329358963611625, + "grad_norm": 1.3604992578516897, + "learning_rate": 4.109276932335999e-06, + "loss": 0.3447, + "step": 9560 + }, + { + "epoch": 0.1234225578261122, + "grad_norm": 1.3552680731145852, + "learning_rate": 4.113575788840169e-06, + "loss": 0.3408, + "step": 9570 + }, + { + "epoch": 0.12355152601610812, + "grad_norm": 1.3479582964878631, + "learning_rate": 4.117874645344339e-06, + "loss": 0.3432, + "step": 9580 + }, + { + "epoch": 0.12368049420610407, + "grad_norm": 1.4493728542922746, + "learning_rate": 4.122173501848509e-06, + "loss": 0.3446, + "step": 9590 + }, + { + "epoch": 0.1238094623961, + "grad_norm": 1.326194267512295, + "learning_rate": 4.126472358352678e-06, + "loss": 0.3238, + "step": 9600 + }, + { + "epoch": 0.12393843058609594, + "grad_norm": 1.330690610095433, + "learning_rate": 4.130771214856849e-06, + "loss": 0.3483, + "step": 9610 + }, + { + "epoch": 0.12406739877609188, + "grad_norm": 1.337270347482971, + "learning_rate": 4.135070071361018e-06, + "loss": 0.3446, + "step": 9620 + }, + { + "epoch": 0.12419636696608781, + "grad_norm": 1.5113189305677874, + "learning_rate": 4.139368927865188e-06, + "loss": 0.3413, + "step": 9630 + }, + { + "epoch": 0.12432533515608375, + "grad_norm": 1.2808515519242465, + "learning_rate": 4.143667784369358e-06, + "loss": 0.3281, + "step": 9640 + }, + { + "epoch": 0.1244543033460797, + "grad_norm": 1.230290729378114, + "learning_rate": 4.147966640873528e-06, + "loss": 0.3502, + "step": 9650 + }, + { + "epoch": 0.12458327153607562, + "grad_norm": 1.337077688644887, + "learning_rate": 4.152265497377698e-06, + "loss": 0.3317, + "step": 9660 + }, + { + "epoch": 0.12471223972607157, + "grad_norm": 1.3905510070970484, + "learning_rate": 4.156564353881868e-06, + "loss": 0.3388, + "step": 9670 + }, + { + "epoch": 0.1248412079160675, + "grad_norm": 1.2651191945219056, + "learning_rate": 4.160863210386037e-06, + "loss": 0.3342, + "step": 9680 + }, + { + "epoch": 0.12497017610606344, + "grad_norm": 1.364977637565327, + "learning_rate": 4.165162066890208e-06, + "loss": 0.3449, + "step": 9690 + }, + { + "epoch": 0.12509914429605937, + "grad_norm": 1.4535897116277667, + "learning_rate": 4.169460923394377e-06, + "loss": 0.3561, + "step": 9700 + }, + { + "epoch": 0.1252281124860553, + "grad_norm": 1.447098775325234, + "learning_rate": 4.1737597798985474e-06, + "loss": 0.3441, + "step": 9710 + }, + { + "epoch": 0.12535708067605125, + "grad_norm": 1.376969548760786, + "learning_rate": 4.178058636402717e-06, + "loss": 0.3336, + "step": 9720 + }, + { + "epoch": 0.1254860488660472, + "grad_norm": 1.3671340439529445, + "learning_rate": 4.182357492906887e-06, + "loss": 0.3455, + "step": 9730 + }, + { + "epoch": 0.12561501705604314, + "grad_norm": 1.3651122415520487, + "learning_rate": 4.186656349411057e-06, + "loss": 0.3468, + "step": 9740 + }, + { + "epoch": 0.12574398524603905, + "grad_norm": 1.3800421172431838, + "learning_rate": 4.190955205915227e-06, + "loss": 0.3495, + "step": 9750 + }, + { + "epoch": 0.125872953436035, + "grad_norm": 1.2879497575037635, + "learning_rate": 4.195254062419396e-06, + "loss": 0.3418, + "step": 9760 + }, + { + "epoch": 0.12600192162603094, + "grad_norm": 1.2607643696674942, + "learning_rate": 4.199552918923567e-06, + "loss": 0.339, + "step": 9770 + }, + { + "epoch": 0.12613088981602688, + "grad_norm": 1.3045451458352262, + "learning_rate": 4.203851775427736e-06, + "loss": 0.338, + "step": 9780 + }, + { + "epoch": 0.12625985800602282, + "grad_norm": 1.4052057473365873, + "learning_rate": 4.2081506319319065e-06, + "loss": 0.3635, + "step": 9790 + }, + { + "epoch": 0.12638882619601874, + "grad_norm": 1.2367004319047614, + "learning_rate": 4.212449488436076e-06, + "loss": 0.3402, + "step": 9800 + }, + { + "epoch": 0.12651779438601468, + "grad_norm": 1.291446878930991, + "learning_rate": 4.216748344940246e-06, + "loss": 0.3403, + "step": 9810 + }, + { + "epoch": 0.12664676257601062, + "grad_norm": 1.2683699674830913, + "learning_rate": 4.221047201444416e-06, + "loss": 0.3455, + "step": 9820 + }, + { + "epoch": 0.12677573076600657, + "grad_norm": 1.3516632549509524, + "learning_rate": 4.225346057948586e-06, + "loss": 0.3486, + "step": 9830 + }, + { + "epoch": 0.1269046989560025, + "grad_norm": 1.2349247340534624, + "learning_rate": 4.229644914452755e-06, + "loss": 0.3444, + "step": 9840 + }, + { + "epoch": 0.12703366714599845, + "grad_norm": 1.3440459167464425, + "learning_rate": 4.233943770956926e-06, + "loss": 0.3384, + "step": 9850 + }, + { + "epoch": 0.12716263533599437, + "grad_norm": 1.3734302211513763, + "learning_rate": 4.238242627461095e-06, + "loss": 0.342, + "step": 9860 + }, + { + "epoch": 0.1272916035259903, + "grad_norm": 1.3305005215881949, + "learning_rate": 4.2425414839652655e-06, + "loss": 0.3388, + "step": 9870 + }, + { + "epoch": 0.12742057171598625, + "grad_norm": 1.3343784862370414, + "learning_rate": 4.246840340469435e-06, + "loss": 0.3371, + "step": 9880 + }, + { + "epoch": 0.1275495399059822, + "grad_norm": 1.4600814160410236, + "learning_rate": 4.251139196973605e-06, + "loss": 0.3539, + "step": 9890 + }, + { + "epoch": 0.12767850809597814, + "grad_norm": 1.3488912396078454, + "learning_rate": 4.255438053477775e-06, + "loss": 0.3457, + "step": 9900 + }, + { + "epoch": 0.12780747628597405, + "grad_norm": 1.3663460144906576, + "learning_rate": 4.259736909981945e-06, + "loss": 0.3546, + "step": 9910 + }, + { + "epoch": 0.12793644447597, + "grad_norm": 1.3259520635651385, + "learning_rate": 4.2640357664861144e-06, + "loss": 0.3403, + "step": 9920 + }, + { + "epoch": 0.12806541266596594, + "grad_norm": 1.2573942274899486, + "learning_rate": 4.268334622990285e-06, + "loss": 0.3253, + "step": 9930 + }, + { + "epoch": 0.12819438085596188, + "grad_norm": 1.3706125309150536, + "learning_rate": 4.272633479494455e-06, + "loss": 0.3336, + "step": 9940 + }, + { + "epoch": 0.12832334904595782, + "grad_norm": 1.4009022882991515, + "learning_rate": 4.2769323359986245e-06, + "loss": 0.3499, + "step": 9950 + }, + { + "epoch": 0.12845231723595374, + "grad_norm": 1.4256933922837072, + "learning_rate": 4.281231192502795e-06, + "loss": 0.3217, + "step": 9960 + }, + { + "epoch": 0.12858128542594968, + "grad_norm": 1.326879196692921, + "learning_rate": 4.285530049006964e-06, + "loss": 0.3327, + "step": 9970 + }, + { + "epoch": 0.12871025361594562, + "grad_norm": 1.290527636736471, + "learning_rate": 4.2898289055111345e-06, + "loss": 0.3355, + "step": 9980 + }, + { + "epoch": 0.12883922180594157, + "grad_norm": 1.4233552323396064, + "learning_rate": 4.294127762015304e-06, + "loss": 0.3409, + "step": 9990 + }, + { + "epoch": 0.1289681899959375, + "grad_norm": 1.4194271204636615, + "learning_rate": 4.298426618519474e-06, + "loss": 0.3329, + "step": 10000 + }, + { + "epoch": 0.12909715818593345, + "grad_norm": 1.3974387041696246, + "learning_rate": 4.302725475023644e-06, + "loss": 0.3385, + "step": 10010 + }, + { + "epoch": 0.12922612637592937, + "grad_norm": 1.4710022196120205, + "learning_rate": 4.307024331527814e-06, + "loss": 0.3475, + "step": 10020 + }, + { + "epoch": 0.1293550945659253, + "grad_norm": 1.3829055409122644, + "learning_rate": 4.3113231880319835e-06, + "loss": 0.3434, + "step": 10030 + }, + { + "epoch": 0.12948406275592125, + "grad_norm": 1.2471520622328853, + "learning_rate": 4.315622044536154e-06, + "loss": 0.328, + "step": 10040 + }, + { + "epoch": 0.1296130309459172, + "grad_norm": 1.4184824964108687, + "learning_rate": 4.319920901040324e-06, + "loss": 0.3305, + "step": 10050 + }, + { + "epoch": 0.12974199913591314, + "grad_norm": 1.3805503513775232, + "learning_rate": 4.3242197575444936e-06, + "loss": 0.3375, + "step": 10060 + }, + { + "epoch": 0.12987096732590905, + "grad_norm": 1.2437096878093128, + "learning_rate": 4.328518614048664e-06, + "loss": 0.3475, + "step": 10070 + }, + { + "epoch": 0.129999935515905, + "grad_norm": 1.3432244451472246, + "learning_rate": 4.332817470552833e-06, + "loss": 0.3189, + "step": 10080 + }, + { + "epoch": 0.13012890370590094, + "grad_norm": 1.3353441765758443, + "learning_rate": 4.337116327057004e-06, + "loss": 0.3271, + "step": 10090 + }, + { + "epoch": 0.13025787189589688, + "grad_norm": 1.393099585681416, + "learning_rate": 4.341415183561173e-06, + "loss": 0.3258, + "step": 10100 + }, + { + "epoch": 0.13038684008589282, + "grad_norm": 1.3869945970408073, + "learning_rate": 4.345714040065343e-06, + "loss": 0.3342, + "step": 10110 + }, + { + "epoch": 0.13051580827588874, + "grad_norm": 1.6284299654279453, + "learning_rate": 4.350012896569513e-06, + "loss": 0.3395, + "step": 10120 + }, + { + "epoch": 0.13064477646588468, + "grad_norm": 1.1848813430583054, + "learning_rate": 4.354311753073683e-06, + "loss": 0.3372, + "step": 10130 + }, + { + "epoch": 0.13077374465588062, + "grad_norm": 1.3366316024296399, + "learning_rate": 4.3586106095778526e-06, + "loss": 0.3338, + "step": 10140 + }, + { + "epoch": 0.13090271284587657, + "grad_norm": 1.427479118993049, + "learning_rate": 4.362909466082023e-06, + "loss": 0.3395, + "step": 10150 + }, + { + "epoch": 0.1310316810358725, + "grad_norm": 1.333531394883805, + "learning_rate": 4.367208322586192e-06, + "loss": 0.3363, + "step": 10160 + }, + { + "epoch": 0.13116064922586845, + "grad_norm": 1.2151532032315657, + "learning_rate": 4.371507179090363e-06, + "loss": 0.3255, + "step": 10170 + }, + { + "epoch": 0.13128961741586437, + "grad_norm": 1.3636680839127353, + "learning_rate": 4.375806035594532e-06, + "loss": 0.35, + "step": 10180 + }, + { + "epoch": 0.1314185856058603, + "grad_norm": 1.4370218626784164, + "learning_rate": 4.380104892098702e-06, + "loss": 0.3239, + "step": 10190 + }, + { + "epoch": 0.13154755379585625, + "grad_norm": 1.458832810255691, + "learning_rate": 4.384403748602872e-06, + "loss": 0.3338, + "step": 10200 + }, + { + "epoch": 0.1316765219858522, + "grad_norm": 1.2915150705414238, + "learning_rate": 4.388702605107042e-06, + "loss": 0.3481, + "step": 10210 + }, + { + "epoch": 0.13180549017584814, + "grad_norm": 1.3651443435906767, + "learning_rate": 4.393001461611212e-06, + "loss": 0.3383, + "step": 10220 + }, + { + "epoch": 0.13193445836584405, + "grad_norm": 1.3472894085535303, + "learning_rate": 4.397300318115382e-06, + "loss": 0.3488, + "step": 10230 + }, + { + "epoch": 0.13206342655584, + "grad_norm": 1.3365301685731537, + "learning_rate": 4.401599174619551e-06, + "loss": 0.3387, + "step": 10240 + }, + { + "epoch": 0.13219239474583594, + "grad_norm": 1.3522750675222108, + "learning_rate": 4.405898031123722e-06, + "loss": 0.3314, + "step": 10250 + }, + { + "epoch": 0.13232136293583188, + "grad_norm": 1.3734335633719137, + "learning_rate": 4.410196887627891e-06, + "loss": 0.3266, + "step": 10260 + }, + { + "epoch": 0.13245033112582782, + "grad_norm": 1.4564794894632853, + "learning_rate": 4.414495744132061e-06, + "loss": 0.3451, + "step": 10270 + }, + { + "epoch": 0.13257929931582374, + "grad_norm": 1.28605227356771, + "learning_rate": 4.418794600636231e-06, + "loss": 0.3351, + "step": 10280 + }, + { + "epoch": 0.13270826750581968, + "grad_norm": 1.4675159613536184, + "learning_rate": 4.423093457140401e-06, + "loss": 0.3415, + "step": 10290 + }, + { + "epoch": 0.13283723569581563, + "grad_norm": 1.291990799565025, + "learning_rate": 4.427392313644571e-06, + "loss": 0.3413, + "step": 10300 + }, + { + "epoch": 0.13296620388581157, + "grad_norm": 1.5192793173038268, + "learning_rate": 4.431691170148741e-06, + "loss": 0.3414, + "step": 10310 + }, + { + "epoch": 0.1330951720758075, + "grad_norm": 1.3826556012131843, + "learning_rate": 4.43599002665291e-06, + "loss": 0.3323, + "step": 10320 + }, + { + "epoch": 0.13322414026580343, + "grad_norm": 1.3235065689549292, + "learning_rate": 4.440288883157081e-06, + "loss": 0.3447, + "step": 10330 + }, + { + "epoch": 0.13335310845579937, + "grad_norm": 1.4340352506725609, + "learning_rate": 4.44458773966125e-06, + "loss": 0.3425, + "step": 10340 + }, + { + "epoch": 0.1334820766457953, + "grad_norm": 1.3748182548870602, + "learning_rate": 4.44888659616542e-06, + "loss": 0.3444, + "step": 10350 + }, + { + "epoch": 0.13361104483579125, + "grad_norm": 1.5134509462107968, + "learning_rate": 4.45318545266959e-06, + "loss": 0.3497, + "step": 10360 + }, + { + "epoch": 0.1337400130257872, + "grad_norm": 1.4397297807036513, + "learning_rate": 4.45748430917376e-06, + "loss": 0.3315, + "step": 10370 + }, + { + "epoch": 0.13386898121578314, + "grad_norm": 1.3990006349487674, + "learning_rate": 4.46178316567793e-06, + "loss": 0.3191, + "step": 10380 + }, + { + "epoch": 0.13399794940577905, + "grad_norm": 1.3254173300687517, + "learning_rate": 4.4660820221821e-06, + "loss": 0.3248, + "step": 10390 + }, + { + "epoch": 0.134126917595775, + "grad_norm": 1.3021496250297722, + "learning_rate": 4.470380878686269e-06, + "loss": 0.3467, + "step": 10400 + }, + { + "epoch": 0.13425588578577094, + "grad_norm": 1.3803408959643835, + "learning_rate": 4.47467973519044e-06, + "loss": 0.3373, + "step": 10410 + }, + { + "epoch": 0.13438485397576688, + "grad_norm": 1.2976253196690593, + "learning_rate": 4.478978591694609e-06, + "loss": 0.3294, + "step": 10420 + }, + { + "epoch": 0.13451382216576283, + "grad_norm": 1.2714167705464618, + "learning_rate": 4.4832774481987794e-06, + "loss": 0.3347, + "step": 10430 + }, + { + "epoch": 0.13464279035575874, + "grad_norm": 1.471165139261598, + "learning_rate": 4.487576304702949e-06, + "loss": 0.3333, + "step": 10440 + }, + { + "epoch": 0.13477175854575468, + "grad_norm": 1.4422242290382596, + "learning_rate": 4.491875161207119e-06, + "loss": 0.3338, + "step": 10450 + }, + { + "epoch": 0.13490072673575063, + "grad_norm": 1.4692688772387992, + "learning_rate": 4.496174017711289e-06, + "loss": 0.3372, + "step": 10460 + }, + { + "epoch": 0.13502969492574657, + "grad_norm": 1.570100233973871, + "learning_rate": 4.500472874215459e-06, + "loss": 0.3536, + "step": 10470 + }, + { + "epoch": 0.1351586631157425, + "grad_norm": 1.5005474150970892, + "learning_rate": 4.504771730719628e-06, + "loss": 0.3395, + "step": 10480 + }, + { + "epoch": 0.13528763130573843, + "grad_norm": 1.3341915715863828, + "learning_rate": 4.509070587223799e-06, + "loss": 0.338, + "step": 10490 + }, + { + "epoch": 0.13541659949573437, + "grad_norm": 1.3417229360466398, + "learning_rate": 4.513369443727968e-06, + "loss": 0.3286, + "step": 10500 + }, + { + "epoch": 0.1355455676857303, + "grad_norm": 1.4169112373773352, + "learning_rate": 4.5176683002321384e-06, + "loss": 0.3464, + "step": 10510 + }, + { + "epoch": 0.13567453587572625, + "grad_norm": 1.2478133702811394, + "learning_rate": 4.521967156736308e-06, + "loss": 0.3447, + "step": 10520 + }, + { + "epoch": 0.1358035040657222, + "grad_norm": 1.5237455500337804, + "learning_rate": 4.526266013240478e-06, + "loss": 0.3307, + "step": 10530 + }, + { + "epoch": 0.13593247225571814, + "grad_norm": 1.3500793040132264, + "learning_rate": 4.530564869744648e-06, + "loss": 0.3281, + "step": 10540 + }, + { + "epoch": 0.13606144044571405, + "grad_norm": 1.3462184529336159, + "learning_rate": 4.534863726248818e-06, + "loss": 0.3278, + "step": 10550 + }, + { + "epoch": 0.13619040863571, + "grad_norm": 1.3933221099647652, + "learning_rate": 4.539162582752987e-06, + "loss": 0.3247, + "step": 10560 + }, + { + "epoch": 0.13631937682570594, + "grad_norm": 1.3106463238287276, + "learning_rate": 4.543461439257158e-06, + "loss": 0.3199, + "step": 10570 + }, + { + "epoch": 0.13644834501570188, + "grad_norm": 1.355952185212378, + "learning_rate": 4.547760295761328e-06, + "loss": 0.3434, + "step": 10580 + }, + { + "epoch": 0.13657731320569783, + "grad_norm": 1.3676929699568066, + "learning_rate": 4.5520591522654975e-06, + "loss": 0.3462, + "step": 10590 + }, + { + "epoch": 0.13670628139569374, + "grad_norm": 1.3500389748413397, + "learning_rate": 4.556358008769668e-06, + "loss": 0.3395, + "step": 10600 + }, + { + "epoch": 0.13683524958568968, + "grad_norm": 1.4192420308837148, + "learning_rate": 4.560656865273837e-06, + "loss": 0.3286, + "step": 10610 + }, + { + "epoch": 0.13696421777568563, + "grad_norm": 1.3963510791148332, + "learning_rate": 4.5649557217780075e-06, + "loss": 0.3325, + "step": 10620 + }, + { + "epoch": 0.13709318596568157, + "grad_norm": 1.3987885829512818, + "learning_rate": 4.569254578282177e-06, + "loss": 0.3328, + "step": 10630 + }, + { + "epoch": 0.1372221541556775, + "grad_norm": 1.379951151476853, + "learning_rate": 4.573553434786347e-06, + "loss": 0.3375, + "step": 10640 + }, + { + "epoch": 0.13735112234567343, + "grad_norm": 1.3481808625431375, + "learning_rate": 4.577852291290517e-06, + "loss": 0.3213, + "step": 10650 + }, + { + "epoch": 0.13748009053566937, + "grad_norm": 1.3500682098652406, + "learning_rate": 4.582151147794687e-06, + "loss": 0.3296, + "step": 10660 + }, + { + "epoch": 0.1376090587256653, + "grad_norm": 1.3341092102729464, + "learning_rate": 4.586450004298857e-06, + "loss": 0.3418, + "step": 10670 + }, + { + "epoch": 0.13773802691566125, + "grad_norm": 1.4377975632595623, + "learning_rate": 4.590748860803027e-06, + "loss": 0.333, + "step": 10680 + }, + { + "epoch": 0.1378669951056572, + "grad_norm": 1.445921598651917, + "learning_rate": 4.595047717307197e-06, + "loss": 0.3345, + "step": 10690 + }, + { + "epoch": 0.13799596329565314, + "grad_norm": 1.2555435285505487, + "learning_rate": 4.5993465738113665e-06, + "loss": 0.3323, + "step": 10700 + }, + { + "epoch": 0.13812493148564906, + "grad_norm": 1.3143887373463445, + "learning_rate": 4.603645430315537e-06, + "loss": 0.3411, + "step": 10710 + }, + { + "epoch": 0.138253899675645, + "grad_norm": 1.2299147979530716, + "learning_rate": 4.607944286819706e-06, + "loss": 0.338, + "step": 10720 + }, + { + "epoch": 0.13838286786564094, + "grad_norm": 1.4066386848173695, + "learning_rate": 4.612243143323877e-06, + "loss": 0.3234, + "step": 10730 + }, + { + "epoch": 0.13851183605563688, + "grad_norm": 1.3689194806264633, + "learning_rate": 4.616541999828046e-06, + "loss": 0.3248, + "step": 10740 + }, + { + "epoch": 0.13864080424563283, + "grad_norm": 1.2964298481670395, + "learning_rate": 4.620840856332216e-06, + "loss": 0.3352, + "step": 10750 + }, + { + "epoch": 0.13876977243562874, + "grad_norm": 1.403204369290487, + "learning_rate": 4.625139712836386e-06, + "loss": 0.331, + "step": 10760 + }, + { + "epoch": 0.13889874062562468, + "grad_norm": 1.335640567592727, + "learning_rate": 4.629438569340556e-06, + "loss": 0.3222, + "step": 10770 + }, + { + "epoch": 0.13902770881562063, + "grad_norm": 1.3472022426543935, + "learning_rate": 4.6337374258447255e-06, + "loss": 0.3291, + "step": 10780 + }, + { + "epoch": 0.13915667700561657, + "grad_norm": 1.4133300415862151, + "learning_rate": 4.638036282348896e-06, + "loss": 0.3115, + "step": 10790 + }, + { + "epoch": 0.1392856451956125, + "grad_norm": 1.5118018062526504, + "learning_rate": 4.642335138853065e-06, + "loss": 0.3403, + "step": 10800 + }, + { + "epoch": 0.13941461338560843, + "grad_norm": 1.4946245597897636, + "learning_rate": 4.646633995357236e-06, + "loss": 0.3261, + "step": 10810 + }, + { + "epoch": 0.13954358157560437, + "grad_norm": 1.227669687636268, + "learning_rate": 4.650932851861405e-06, + "loss": 0.3182, + "step": 10820 + }, + { + "epoch": 0.1396725497656003, + "grad_norm": 1.3500704918194504, + "learning_rate": 4.655231708365575e-06, + "loss": 0.3328, + "step": 10830 + }, + { + "epoch": 0.13980151795559626, + "grad_norm": 1.2814227399895082, + "learning_rate": 4.659530564869745e-06, + "loss": 0.3274, + "step": 10840 + }, + { + "epoch": 0.1399304861455922, + "grad_norm": 1.3916795473616972, + "learning_rate": 4.663829421373915e-06, + "loss": 0.3426, + "step": 10850 + }, + { + "epoch": 0.14005945433558814, + "grad_norm": 1.5036437031495882, + "learning_rate": 4.6681282778780846e-06, + "loss": 0.3071, + "step": 10860 + }, + { + "epoch": 0.14018842252558406, + "grad_norm": 1.393244567539567, + "learning_rate": 4.672427134382255e-06, + "loss": 0.3399, + "step": 10870 + }, + { + "epoch": 0.14031739071558, + "grad_norm": 1.4499834267193632, + "learning_rate": 4.676725990886424e-06, + "loss": 0.3401, + "step": 10880 + }, + { + "epoch": 0.14044635890557594, + "grad_norm": 1.3334370853074706, + "learning_rate": 4.681024847390595e-06, + "loss": 0.3313, + "step": 10890 + }, + { + "epoch": 0.14057532709557188, + "grad_norm": 1.440739416509355, + "learning_rate": 4.685323703894765e-06, + "loss": 0.3256, + "step": 10900 + }, + { + "epoch": 0.14070429528556783, + "grad_norm": 1.4024078845421166, + "learning_rate": 4.689622560398934e-06, + "loss": 0.3164, + "step": 10910 + }, + { + "epoch": 0.14083326347556374, + "grad_norm": 1.3548262761491472, + "learning_rate": 4.693921416903105e-06, + "loss": 0.3438, + "step": 10920 + }, + { + "epoch": 0.14096223166555968, + "grad_norm": 1.3269782248560265, + "learning_rate": 4.698220273407274e-06, + "loss": 0.3351, + "step": 10930 + }, + { + "epoch": 0.14109119985555563, + "grad_norm": 1.4949607511113412, + "learning_rate": 4.702519129911444e-06, + "loss": 0.3595, + "step": 10940 + }, + { + "epoch": 0.14122016804555157, + "grad_norm": 1.435994530736605, + "learning_rate": 4.706817986415614e-06, + "loss": 0.3341, + "step": 10950 + }, + { + "epoch": 0.1413491362355475, + "grad_norm": 1.3305587053780605, + "learning_rate": 4.711116842919784e-06, + "loss": 0.3219, + "step": 10960 + }, + { + "epoch": 0.14147810442554343, + "grad_norm": 1.4310611096451984, + "learning_rate": 4.715415699423954e-06, + "loss": 0.3321, + "step": 10970 + }, + { + "epoch": 0.14160707261553937, + "grad_norm": 1.3740354182801013, + "learning_rate": 4.719714555928124e-06, + "loss": 0.3287, + "step": 10980 + }, + { + "epoch": 0.1417360408055353, + "grad_norm": 1.267184149178491, + "learning_rate": 4.724013412432293e-06, + "loss": 0.3278, + "step": 10990 + }, + { + "epoch": 0.14186500899553126, + "grad_norm": 1.3767599584293906, + "learning_rate": 4.728312268936464e-06, + "loss": 0.3311, + "step": 11000 + }, + { + "epoch": 0.1419939771855272, + "grad_norm": 1.3732269092996634, + "learning_rate": 4.732611125440633e-06, + "loss": 0.3229, + "step": 11010 + }, + { + "epoch": 0.14212294537552314, + "grad_norm": 1.4220346151732377, + "learning_rate": 4.7369099819448034e-06, + "loss": 0.3362, + "step": 11020 + }, + { + "epoch": 0.14225191356551906, + "grad_norm": 1.352417102524133, + "learning_rate": 4.741208838448973e-06, + "loss": 0.3322, + "step": 11030 + }, + { + "epoch": 0.142380881755515, + "grad_norm": 1.1803240081114141, + "learning_rate": 4.745507694953143e-06, + "loss": 0.3259, + "step": 11040 + }, + { + "epoch": 0.14250984994551094, + "grad_norm": 1.291665480019225, + "learning_rate": 4.749806551457313e-06, + "loss": 0.3206, + "step": 11050 + }, + { + "epoch": 0.14263881813550688, + "grad_norm": 1.2617459609667476, + "learning_rate": 4.754105407961483e-06, + "loss": 0.3345, + "step": 11060 + }, + { + "epoch": 0.14276778632550283, + "grad_norm": 1.316201195695493, + "learning_rate": 4.758404264465652e-06, + "loss": 0.3278, + "step": 11070 + }, + { + "epoch": 0.14289675451549874, + "grad_norm": 1.3854514420735833, + "learning_rate": 4.762703120969823e-06, + "loss": 0.3384, + "step": 11080 + }, + { + "epoch": 0.14302572270549468, + "grad_norm": 1.3670673486215337, + "learning_rate": 4.767001977473992e-06, + "loss": 0.332, + "step": 11090 + }, + { + "epoch": 0.14315469089549063, + "grad_norm": 1.2188537659161591, + "learning_rate": 4.7713008339781624e-06, + "loss": 0.3281, + "step": 11100 + }, + { + "epoch": 0.14328365908548657, + "grad_norm": 1.3245008792490403, + "learning_rate": 4.775599690482332e-06, + "loss": 0.3418, + "step": 11110 + }, + { + "epoch": 0.1434126272754825, + "grad_norm": 1.2347166559607634, + "learning_rate": 4.779898546986502e-06, + "loss": 0.3208, + "step": 11120 + }, + { + "epoch": 0.14354159546547843, + "grad_norm": 1.3095327785995208, + "learning_rate": 4.784197403490672e-06, + "loss": 0.3308, + "step": 11130 + }, + { + "epoch": 0.14367056365547437, + "grad_norm": 1.2912437292499024, + "learning_rate": 4.788496259994842e-06, + "loss": 0.3359, + "step": 11140 + }, + { + "epoch": 0.1437995318454703, + "grad_norm": 1.3332272516530677, + "learning_rate": 4.792795116499011e-06, + "loss": 0.3119, + "step": 11150 + }, + { + "epoch": 0.14392850003546626, + "grad_norm": 1.2823306996134354, + "learning_rate": 4.797093973003182e-06, + "loss": 0.32, + "step": 11160 + }, + { + "epoch": 0.1440574682254622, + "grad_norm": 1.2697166061098195, + "learning_rate": 4.801392829507351e-06, + "loss": 0.3249, + "step": 11170 + }, + { + "epoch": 0.1441864364154581, + "grad_norm": 1.44750599032534, + "learning_rate": 4.8056916860115215e-06, + "loss": 0.3347, + "step": 11180 + }, + { + "epoch": 0.14431540460545406, + "grad_norm": 1.2908066911933767, + "learning_rate": 4.809990542515691e-06, + "loss": 0.337, + "step": 11190 + }, + { + "epoch": 0.14444437279545, + "grad_norm": 1.4302753809835693, + "learning_rate": 4.814289399019861e-06, + "loss": 0.333, + "step": 11200 + }, + { + "epoch": 0.14457334098544594, + "grad_norm": 1.5333832590548024, + "learning_rate": 4.818588255524031e-06, + "loss": 0.307, + "step": 11210 + }, + { + "epoch": 0.14470230917544188, + "grad_norm": 1.3560075264345426, + "learning_rate": 4.822887112028201e-06, + "loss": 0.3188, + "step": 11220 + }, + { + "epoch": 0.14483127736543783, + "grad_norm": 1.3920787302427595, + "learning_rate": 4.8271859685323704e-06, + "loss": 0.3321, + "step": 11230 + }, + { + "epoch": 0.14496024555543374, + "grad_norm": 1.506054067468425, + "learning_rate": 4.831484825036541e-06, + "loss": 0.3167, + "step": 11240 + }, + { + "epoch": 0.14508921374542968, + "grad_norm": 1.2579198231573836, + "learning_rate": 4.83578368154071e-06, + "loss": 0.3398, + "step": 11250 + }, + { + "epoch": 0.14521818193542563, + "grad_norm": 1.3673977324707576, + "learning_rate": 4.8400825380448805e-06, + "loss": 0.3261, + "step": 11260 + }, + { + "epoch": 0.14534715012542157, + "grad_norm": 1.2469580934010205, + "learning_rate": 4.84438139454905e-06, + "loss": 0.3322, + "step": 11270 + }, + { + "epoch": 0.1454761183154175, + "grad_norm": 1.9598556017052575, + "learning_rate": 4.84868025105322e-06, + "loss": 0.3294, + "step": 11280 + }, + { + "epoch": 0.14560508650541343, + "grad_norm": 1.4232213705124048, + "learning_rate": 4.85297910755739e-06, + "loss": 0.315, + "step": 11290 + }, + { + "epoch": 0.14573405469540937, + "grad_norm": 1.2012577374844482, + "learning_rate": 4.85727796406156e-06, + "loss": 0.3295, + "step": 11300 + }, + { + "epoch": 0.1458630228854053, + "grad_norm": 1.429342667012249, + "learning_rate": 4.86157682056573e-06, + "loss": 0.3331, + "step": 11310 + }, + { + "epoch": 0.14599199107540126, + "grad_norm": 1.3967545133939896, + "learning_rate": 4.8658756770699e-06, + "loss": 0.3262, + "step": 11320 + }, + { + "epoch": 0.1461209592653972, + "grad_norm": 1.2987955028843707, + "learning_rate": 4.87017453357407e-06, + "loss": 0.3246, + "step": 11330 + }, + { + "epoch": 0.14624992745539311, + "grad_norm": 1.3388182361457994, + "learning_rate": 4.8744733900782395e-06, + "loss": 0.3289, + "step": 11340 + }, + { + "epoch": 0.14637889564538906, + "grad_norm": 1.329510214160768, + "learning_rate": 4.87877224658241e-06, + "loss": 0.3319, + "step": 11350 + }, + { + "epoch": 0.146507863835385, + "grad_norm": 1.428417772277415, + "learning_rate": 4.883071103086579e-06, + "loss": 0.3339, + "step": 11360 + }, + { + "epoch": 0.14663683202538094, + "grad_norm": 1.4093564879708858, + "learning_rate": 4.8873699595907495e-06, + "loss": 0.3338, + "step": 11370 + }, + { + "epoch": 0.14676580021537688, + "grad_norm": 1.4273008886452716, + "learning_rate": 4.891668816094919e-06, + "loss": 0.3426, + "step": 11380 + }, + { + "epoch": 0.14689476840537283, + "grad_norm": 1.305686034991687, + "learning_rate": 4.895967672599089e-06, + "loss": 0.324, + "step": 11390 + }, + { + "epoch": 0.14702373659536874, + "grad_norm": 1.3991780389534902, + "learning_rate": 4.900266529103259e-06, + "loss": 0.3325, + "step": 11400 + }, + { + "epoch": 0.14715270478536469, + "grad_norm": 1.3227183292173716, + "learning_rate": 4.904565385607429e-06, + "loss": 0.3258, + "step": 11410 + }, + { + "epoch": 0.14728167297536063, + "grad_norm": 1.3840410938845151, + "learning_rate": 4.908864242111599e-06, + "loss": 0.3364, + "step": 11420 + }, + { + "epoch": 0.14741064116535657, + "grad_norm": 1.429386086679869, + "learning_rate": 4.913163098615769e-06, + "loss": 0.3282, + "step": 11430 + }, + { + "epoch": 0.1475396093553525, + "grad_norm": 1.4796886127615856, + "learning_rate": 4.917461955119939e-06, + "loss": 0.3368, + "step": 11440 + }, + { + "epoch": 0.14766857754534843, + "grad_norm": 1.2879712968223778, + "learning_rate": 4.9217608116241086e-06, + "loss": 0.3171, + "step": 11450 + }, + { + "epoch": 0.14779754573534437, + "grad_norm": 1.3079892860407145, + "learning_rate": 4.926059668128279e-06, + "loss": 0.3146, + "step": 11460 + }, + { + "epoch": 0.1479265139253403, + "grad_norm": 1.250293630831423, + "learning_rate": 4.930358524632448e-06, + "loss": 0.3379, + "step": 11470 + }, + { + "epoch": 0.14805548211533626, + "grad_norm": 1.2859177385705625, + "learning_rate": 4.934657381136619e-06, + "loss": 0.3144, + "step": 11480 + }, + { + "epoch": 0.1481844503053322, + "grad_norm": 1.314532732186083, + "learning_rate": 4.938956237640788e-06, + "loss": 0.3123, + "step": 11490 + }, + { + "epoch": 0.14831341849532811, + "grad_norm": 1.3283120591890525, + "learning_rate": 4.943255094144958e-06, + "loss": 0.3315, + "step": 11500 + }, + { + "epoch": 0.14844238668532406, + "grad_norm": 1.4268869657954564, + "learning_rate": 4.947553950649128e-06, + "loss": 0.3202, + "step": 11510 + }, + { + "epoch": 0.14857135487532, + "grad_norm": 1.4445316531878767, + "learning_rate": 4.951852807153298e-06, + "loss": 0.3305, + "step": 11520 + }, + { + "epoch": 0.14870032306531594, + "grad_norm": 1.2530466172629642, + "learning_rate": 4.956151663657468e-06, + "loss": 0.3295, + "step": 11530 + }, + { + "epoch": 0.14882929125531189, + "grad_norm": 1.405659440742632, + "learning_rate": 4.960450520161638e-06, + "loss": 0.3178, + "step": 11540 + }, + { + "epoch": 0.14895825944530783, + "grad_norm": 1.3745516538451408, + "learning_rate": 4.964749376665807e-06, + "loss": 0.317, + "step": 11550 + }, + { + "epoch": 0.14908722763530374, + "grad_norm": 1.3679203896700876, + "learning_rate": 4.969048233169978e-06, + "loss": 0.3304, + "step": 11560 + }, + { + "epoch": 0.14921619582529969, + "grad_norm": 1.3723539906906026, + "learning_rate": 4.973347089674147e-06, + "loss": 0.3322, + "step": 11570 + }, + { + "epoch": 0.14934516401529563, + "grad_norm": 1.3887264516242903, + "learning_rate": 4.977645946178317e-06, + "loss": 0.3142, + "step": 11580 + }, + { + "epoch": 0.14947413220529157, + "grad_norm": 1.3528603784258184, + "learning_rate": 4.981944802682487e-06, + "loss": 0.3263, + "step": 11590 + }, + { + "epoch": 0.1496031003952875, + "grad_norm": 1.3165532511739166, + "learning_rate": 4.986243659186657e-06, + "loss": 0.3297, + "step": 11600 + }, + { + "epoch": 0.14973206858528343, + "grad_norm": 1.363498789104129, + "learning_rate": 4.990542515690827e-06, + "loss": 0.3335, + "step": 11610 + }, + { + "epoch": 0.14986103677527937, + "grad_norm": 1.3776677622631481, + "learning_rate": 4.994841372194997e-06, + "loss": 0.3161, + "step": 11620 + }, + { + "epoch": 0.14999000496527531, + "grad_norm": 1.4456957913965445, + "learning_rate": 4.999140228699166e-06, + "loss": 0.3344, + "step": 11630 + }, + { + "epoch": 0.15011897315527126, + "grad_norm": 1.3542885421117228, + "learning_rate": 5.003439085203337e-06, + "loss": 0.3325, + "step": 11640 + }, + { + "epoch": 0.1502479413452672, + "grad_norm": 1.4514144515324536, + "learning_rate": 5.007737941707507e-06, + "loss": 0.3256, + "step": 11650 + }, + { + "epoch": 0.15037690953526311, + "grad_norm": 1.3170983254706912, + "learning_rate": 5.0120367982116756e-06, + "loss": 0.3241, + "step": 11660 + }, + { + "epoch": 0.15050587772525906, + "grad_norm": 1.271995009722916, + "learning_rate": 5.016335654715846e-06, + "loss": 0.3112, + "step": 11670 + }, + { + "epoch": 0.150634845915255, + "grad_norm": 1.0933473653037034, + "learning_rate": 5.020634511220016e-06, + "loss": 0.3237, + "step": 11680 + }, + { + "epoch": 0.15076381410525094, + "grad_norm": 1.3275880860778329, + "learning_rate": 5.0249333677241865e-06, + "loss": 0.3281, + "step": 11690 + }, + { + "epoch": 0.15089278229524689, + "grad_norm": 1.3187256005079107, + "learning_rate": 5.029232224228355e-06, + "loss": 0.3229, + "step": 11700 + }, + { + "epoch": 0.15102175048524283, + "grad_norm": 1.2292784059131128, + "learning_rate": 5.033531080732525e-06, + "loss": 0.3252, + "step": 11710 + }, + { + "epoch": 0.15115071867523874, + "grad_norm": 1.4169786096635109, + "learning_rate": 5.037829937236696e-06, + "loss": 0.3155, + "step": 11720 + }, + { + "epoch": 0.15127968686523469, + "grad_norm": 1.4906543098913045, + "learning_rate": 5.042128793740866e-06, + "loss": 0.3196, + "step": 11730 + }, + { + "epoch": 0.15140865505523063, + "grad_norm": 1.2750355193899936, + "learning_rate": 5.0464276502450346e-06, + "loss": 0.3194, + "step": 11740 + }, + { + "epoch": 0.15153762324522657, + "grad_norm": 1.3426631315140145, + "learning_rate": 5.050726506749205e-06, + "loss": 0.3081, + "step": 11750 + }, + { + "epoch": 0.15166659143522251, + "grad_norm": 1.3347891174360382, + "learning_rate": 5.055025363253375e-06, + "loss": 0.3258, + "step": 11760 + }, + { + "epoch": 0.15179555962521843, + "grad_norm": 1.5023659863131664, + "learning_rate": 5.0593242197575455e-06, + "loss": 0.3192, + "step": 11770 + }, + { + "epoch": 0.15192452781521437, + "grad_norm": 1.3777748509554197, + "learning_rate": 5.063623076261714e-06, + "loss": 0.3173, + "step": 11780 + }, + { + "epoch": 0.15205349600521031, + "grad_norm": 1.3690226738635045, + "learning_rate": 5.067921932765884e-06, + "loss": 0.3319, + "step": 11790 + }, + { + "epoch": 0.15218246419520626, + "grad_norm": 1.4628779474852218, + "learning_rate": 5.072220789270055e-06, + "loss": 0.3153, + "step": 11800 + }, + { + "epoch": 0.1523114323852022, + "grad_norm": 1.526925084919704, + "learning_rate": 5.076519645774225e-06, + "loss": 0.334, + "step": 11810 + }, + { + "epoch": 0.15244040057519812, + "grad_norm": 1.4048042321961212, + "learning_rate": 5.0808185022783944e-06, + "loss": 0.3222, + "step": 11820 + }, + { + "epoch": 0.15256936876519406, + "grad_norm": 1.2622470617340869, + "learning_rate": 5.085117358782564e-06, + "loss": 0.3203, + "step": 11830 + }, + { + "epoch": 0.15269833695519, + "grad_norm": 1.3774143491033966, + "learning_rate": 5.089416215286734e-06, + "loss": 0.3359, + "step": 11840 + }, + { + "epoch": 0.15282730514518594, + "grad_norm": 1.4886401451476017, + "learning_rate": 5.0937150717909045e-06, + "loss": 0.3228, + "step": 11850 + }, + { + "epoch": 0.15295627333518189, + "grad_norm": 1.3157815233402461, + "learning_rate": 5.098013928295074e-06, + "loss": 0.3307, + "step": 11860 + }, + { + "epoch": 0.15308524152517783, + "grad_norm": 1.298940218424006, + "learning_rate": 5.102312784799243e-06, + "loss": 0.3281, + "step": 11870 + }, + { + "epoch": 0.15321420971517374, + "grad_norm": 1.4261165362470003, + "learning_rate": 5.106611641303414e-06, + "loss": 0.3191, + "step": 11880 + }, + { + "epoch": 0.1533431779051697, + "grad_norm": 1.2853951560127757, + "learning_rate": 5.110910497807584e-06, + "loss": 0.3102, + "step": 11890 + }, + { + "epoch": 0.15347214609516563, + "grad_norm": 1.4783909911714253, + "learning_rate": 5.1152093543117534e-06, + "loss": 0.3261, + "step": 11900 + }, + { + "epoch": 0.15360111428516157, + "grad_norm": 1.3258433049867582, + "learning_rate": 5.119508210815923e-06, + "loss": 0.3251, + "step": 11910 + }, + { + "epoch": 0.15373008247515751, + "grad_norm": 1.3002377093973276, + "learning_rate": 5.123807067320093e-06, + "loss": 0.328, + "step": 11920 + }, + { + "epoch": 0.15385905066515343, + "grad_norm": 1.6481520320932677, + "learning_rate": 5.1281059238242635e-06, + "loss": 0.3364, + "step": 11930 + }, + { + "epoch": 0.15398801885514937, + "grad_norm": 1.461763253576694, + "learning_rate": 5.132404780328433e-06, + "loss": 0.3146, + "step": 11940 + }, + { + "epoch": 0.15411698704514531, + "grad_norm": 1.2683731035758852, + "learning_rate": 5.136703636832603e-06, + "loss": 0.3228, + "step": 11950 + }, + { + "epoch": 0.15424595523514126, + "grad_norm": 1.434456635778629, + "learning_rate": 5.141002493336773e-06, + "loss": 0.3246, + "step": 11960 + }, + { + "epoch": 0.1543749234251372, + "grad_norm": 1.3943121307239854, + "learning_rate": 5.145301349840943e-06, + "loss": 0.3175, + "step": 11970 + }, + { + "epoch": 0.15450389161513312, + "grad_norm": 1.2996173037588696, + "learning_rate": 5.1496002063451125e-06, + "loss": 0.3343, + "step": 11980 + }, + { + "epoch": 0.15463285980512906, + "grad_norm": 1.3262087822318964, + "learning_rate": 5.153899062849283e-06, + "loss": 0.3135, + "step": 11990 + }, + { + "epoch": 0.154761827995125, + "grad_norm": 1.4083463159705227, + "learning_rate": 5.158197919353452e-06, + "loss": 0.3202, + "step": 12000 + }, + { + "epoch": 0.15489079618512094, + "grad_norm": 1.2682188921807356, + "learning_rate": 5.1624967758576225e-06, + "loss": 0.3196, + "step": 12010 + }, + { + "epoch": 0.1550197643751169, + "grad_norm": 1.3811229827898994, + "learning_rate": 5.166795632361792e-06, + "loss": 0.3245, + "step": 12020 + }, + { + "epoch": 0.1551487325651128, + "grad_norm": 1.3919183407682665, + "learning_rate": 5.171094488865962e-06, + "loss": 0.315, + "step": 12030 + }, + { + "epoch": 0.15527770075510874, + "grad_norm": 1.449868876095192, + "learning_rate": 5.1753933453701326e-06, + "loss": 0.3321, + "step": 12040 + }, + { + "epoch": 0.1554066689451047, + "grad_norm": 1.2601951170972745, + "learning_rate": 5.179692201874302e-06, + "loss": 0.3083, + "step": 12050 + }, + { + "epoch": 0.15553563713510063, + "grad_norm": 1.4947430594449083, + "learning_rate": 5.1839910583784715e-06, + "loss": 0.3403, + "step": 12060 + }, + { + "epoch": 0.15566460532509657, + "grad_norm": 1.3479650789982915, + "learning_rate": 5.188289914882642e-06, + "loss": 0.3309, + "step": 12070 + }, + { + "epoch": 0.15579357351509251, + "grad_norm": 1.3166561901087752, + "learning_rate": 5.192588771386812e-06, + "loss": 0.323, + "step": 12080 + }, + { + "epoch": 0.15592254170508843, + "grad_norm": 1.370182008173817, + "learning_rate": 5.1968876278909815e-06, + "loss": 0.3379, + "step": 12090 + }, + { + "epoch": 0.15605150989508437, + "grad_norm": 1.3541276577412646, + "learning_rate": 5.201186484395151e-06, + "loss": 0.3098, + "step": 12100 + }, + { + "epoch": 0.15618047808508032, + "grad_norm": 1.2951837124658763, + "learning_rate": 5.205485340899321e-06, + "loss": 0.3315, + "step": 12110 + }, + { + "epoch": 0.15630944627507626, + "grad_norm": 1.4048290655992721, + "learning_rate": 5.209784197403492e-06, + "loss": 0.3147, + "step": 12120 + }, + { + "epoch": 0.1564384144650722, + "grad_norm": 1.3958357452484929, + "learning_rate": 5.214083053907661e-06, + "loss": 0.3187, + "step": 12130 + }, + { + "epoch": 0.15656738265506812, + "grad_norm": 1.314441199920928, + "learning_rate": 5.2183819104118305e-06, + "loss": 0.316, + "step": 12140 + }, + { + "epoch": 0.15669635084506406, + "grad_norm": 1.2506385446474377, + "learning_rate": 5.222680766916001e-06, + "loss": 0.3054, + "step": 12150 + }, + { + "epoch": 0.15682531903506, + "grad_norm": 1.4477592002060413, + "learning_rate": 5.226979623420171e-06, + "loss": 0.331, + "step": 12160 + }, + { + "epoch": 0.15695428722505594, + "grad_norm": 1.2525085497787685, + "learning_rate": 5.231278479924341e-06, + "loss": 0.3082, + "step": 12170 + }, + { + "epoch": 0.1570832554150519, + "grad_norm": 1.3282408542344069, + "learning_rate": 5.23557733642851e-06, + "loss": 0.3172, + "step": 12180 + }, + { + "epoch": 0.1572122236050478, + "grad_norm": 1.329726683339463, + "learning_rate": 5.23987619293268e-06, + "loss": 0.3213, + "step": 12190 + }, + { + "epoch": 0.15734119179504374, + "grad_norm": 1.2302740750740204, + "learning_rate": 5.244175049436851e-06, + "loss": 0.3265, + "step": 12200 + }, + { + "epoch": 0.1574701599850397, + "grad_norm": 1.2823493384823188, + "learning_rate": 5.248473905941021e-06, + "loss": 0.32, + "step": 12210 + }, + { + "epoch": 0.15759912817503563, + "grad_norm": 1.3477495853247172, + "learning_rate": 5.2527727624451895e-06, + "loss": 0.318, + "step": 12220 + }, + { + "epoch": 0.15772809636503157, + "grad_norm": 1.370068488800554, + "learning_rate": 5.25707161894936e-06, + "loss": 0.3258, + "step": 12230 + }, + { + "epoch": 0.15785706455502752, + "grad_norm": 1.259428143284222, + "learning_rate": 5.26137047545353e-06, + "loss": 0.3304, + "step": 12240 + }, + { + "epoch": 0.15798603274502343, + "grad_norm": 1.339929134315048, + "learning_rate": 5.2656693319577e-06, + "loss": 0.3163, + "step": 12250 + }, + { + "epoch": 0.15811500093501937, + "grad_norm": 1.269622789044558, + "learning_rate": 5.269968188461869e-06, + "loss": 0.3311, + "step": 12260 + }, + { + "epoch": 0.15824396912501532, + "grad_norm": 1.4106100580119936, + "learning_rate": 5.274267044966039e-06, + "loss": 0.3197, + "step": 12270 + }, + { + "epoch": 0.15837293731501126, + "grad_norm": 1.447690924299512, + "learning_rate": 5.27856590147021e-06, + "loss": 0.3171, + "step": 12280 + }, + { + "epoch": 0.1585019055050072, + "grad_norm": 1.547970144158794, + "learning_rate": 5.28286475797438e-06, + "loss": 0.3112, + "step": 12290 + }, + { + "epoch": 0.15863087369500312, + "grad_norm": 1.4372681318442477, + "learning_rate": 5.2871636144785485e-06, + "loss": 0.3482, + "step": 12300 + }, + { + "epoch": 0.15875984188499906, + "grad_norm": 1.274573212051248, + "learning_rate": 5.291462470982719e-06, + "loss": 0.3203, + "step": 12310 + }, + { + "epoch": 0.158888810074995, + "grad_norm": 1.4404032562579254, + "learning_rate": 5.295761327486889e-06, + "loss": 0.3065, + "step": 12320 + }, + { + "epoch": 0.15901777826499094, + "grad_norm": 1.3290974946001322, + "learning_rate": 5.3000601839910594e-06, + "loss": 0.3261, + "step": 12330 + }, + { + "epoch": 0.1591467464549869, + "grad_norm": 1.2911611620951848, + "learning_rate": 5.304359040495228e-06, + "loss": 0.3094, + "step": 12340 + }, + { + "epoch": 0.1592757146449828, + "grad_norm": 1.3683903770968915, + "learning_rate": 5.308657896999398e-06, + "loss": 0.3162, + "step": 12350 + }, + { + "epoch": 0.15940468283497874, + "grad_norm": 1.3863966970596018, + "learning_rate": 5.312956753503569e-06, + "loss": 0.3238, + "step": 12360 + }, + { + "epoch": 0.1595336510249747, + "grad_norm": 1.395658694549991, + "learning_rate": 5.317255610007739e-06, + "loss": 0.3227, + "step": 12370 + }, + { + "epoch": 0.15966261921497063, + "grad_norm": 1.3657755786833965, + "learning_rate": 5.3215544665119075e-06, + "loss": 0.3398, + "step": 12380 + }, + { + "epoch": 0.15979158740496657, + "grad_norm": 1.3575213967856268, + "learning_rate": 5.325853323016078e-06, + "loss": 0.312, + "step": 12390 + }, + { + "epoch": 0.15992055559496252, + "grad_norm": 1.4198545857438163, + "learning_rate": 5.330152179520248e-06, + "loss": 0.3187, + "step": 12400 + }, + { + "epoch": 0.16004952378495843, + "grad_norm": 1.344563164171641, + "learning_rate": 5.3344510360244184e-06, + "loss": 0.3246, + "step": 12410 + }, + { + "epoch": 0.16017849197495437, + "grad_norm": 1.3484206290934382, + "learning_rate": 5.338749892528587e-06, + "loss": 0.3264, + "step": 12420 + }, + { + "epoch": 0.16030746016495032, + "grad_norm": 1.2837338226770088, + "learning_rate": 5.343048749032757e-06, + "loss": 0.318, + "step": 12430 + }, + { + "epoch": 0.16043642835494626, + "grad_norm": 1.2678826325751005, + "learning_rate": 5.347347605536928e-06, + "loss": 0.3241, + "step": 12440 + }, + { + "epoch": 0.1605653965449422, + "grad_norm": 1.291769231481168, + "learning_rate": 5.351646462041098e-06, + "loss": 0.3182, + "step": 12450 + }, + { + "epoch": 0.16069436473493812, + "grad_norm": 1.2857840190331782, + "learning_rate": 5.355945318545267e-06, + "loss": 0.3195, + "step": 12460 + }, + { + "epoch": 0.16082333292493406, + "grad_norm": 1.3422421307086205, + "learning_rate": 5.360244175049437e-06, + "loss": 0.3234, + "step": 12470 + }, + { + "epoch": 0.16095230111493, + "grad_norm": 1.5113766708269012, + "learning_rate": 5.364543031553607e-06, + "loss": 0.3124, + "step": 12480 + }, + { + "epoch": 0.16108126930492594, + "grad_norm": 1.2308885528194624, + "learning_rate": 5.3688418880577775e-06, + "loss": 0.3229, + "step": 12490 + }, + { + "epoch": 0.1612102374949219, + "grad_norm": 1.4141518669555029, + "learning_rate": 5.373140744561947e-06, + "loss": 0.3362, + "step": 12500 + }, + { + "epoch": 0.1613392056849178, + "grad_norm": 1.2589160897920826, + "learning_rate": 5.377439601066116e-06, + "loss": 0.3213, + "step": 12510 + }, + { + "epoch": 0.16146817387491375, + "grad_norm": 1.3124632942215693, + "learning_rate": 5.381738457570287e-06, + "loss": 0.3201, + "step": 12520 + }, + { + "epoch": 0.1615971420649097, + "grad_norm": 1.3307719675057932, + "learning_rate": 5.386037314074457e-06, + "loss": 0.3192, + "step": 12530 + }, + { + "epoch": 0.16172611025490563, + "grad_norm": 1.3416400311585457, + "learning_rate": 5.390336170578626e-06, + "loss": 0.322, + "step": 12540 + }, + { + "epoch": 0.16185507844490157, + "grad_norm": 1.274100880137548, + "learning_rate": 5.394635027082796e-06, + "loss": 0.3066, + "step": 12550 + }, + { + "epoch": 0.16198404663489752, + "grad_norm": 1.391564197145899, + "learning_rate": 5.398933883586966e-06, + "loss": 0.3227, + "step": 12560 + }, + { + "epoch": 0.16211301482489343, + "grad_norm": 1.4288934445258195, + "learning_rate": 5.4032327400911365e-06, + "loss": 0.3097, + "step": 12570 + }, + { + "epoch": 0.16224198301488937, + "grad_norm": 1.2965222852255363, + "learning_rate": 5.407531596595306e-06, + "loss": 0.3155, + "step": 12580 + }, + { + "epoch": 0.16237095120488532, + "grad_norm": 1.3093836200293996, + "learning_rate": 5.411830453099476e-06, + "loss": 0.3147, + "step": 12590 + }, + { + "epoch": 0.16249991939488126, + "grad_norm": 1.5844066221303743, + "learning_rate": 5.416129309603646e-06, + "loss": 0.3434, + "step": 12600 + }, + { + "epoch": 0.1626288875848772, + "grad_norm": 1.3243004327556507, + "learning_rate": 5.420428166107816e-06, + "loss": 0.3107, + "step": 12610 + }, + { + "epoch": 0.16275785577487312, + "grad_norm": 1.452364506722362, + "learning_rate": 5.4247270226119854e-06, + "loss": 0.3254, + "step": 12620 + }, + { + "epoch": 0.16288682396486906, + "grad_norm": 1.468542898988025, + "learning_rate": 5.429025879116156e-06, + "loss": 0.3275, + "step": 12630 + }, + { + "epoch": 0.163015792154865, + "grad_norm": 1.4747534093219419, + "learning_rate": 5.433324735620325e-06, + "loss": 0.3264, + "step": 12640 + }, + { + "epoch": 0.16314476034486095, + "grad_norm": 1.2480784585297775, + "learning_rate": 5.4376235921244955e-06, + "loss": 0.3044, + "step": 12650 + }, + { + "epoch": 0.1632737285348569, + "grad_norm": 1.2435578244264631, + "learning_rate": 5.441922448628665e-06, + "loss": 0.3137, + "step": 12660 + }, + { + "epoch": 0.1634026967248528, + "grad_norm": 1.363137160534876, + "learning_rate": 5.446221305132835e-06, + "loss": 0.3024, + "step": 12670 + }, + { + "epoch": 0.16353166491484875, + "grad_norm": 1.4329031018098575, + "learning_rate": 5.4505201616370055e-06, + "loss": 0.3181, + "step": 12680 + }, + { + "epoch": 0.1636606331048447, + "grad_norm": 1.3075260032241116, + "learning_rate": 5.454819018141175e-06, + "loss": 0.3226, + "step": 12690 + }, + { + "epoch": 0.16378960129484063, + "grad_norm": 1.35373957243452, + "learning_rate": 5.4591178746453445e-06, + "loss": 0.3187, + "step": 12700 + }, + { + "epoch": 0.16391856948483657, + "grad_norm": 1.3115342617341212, + "learning_rate": 5.463416731149515e-06, + "loss": 0.3155, + "step": 12710 + }, + { + "epoch": 0.1640475376748325, + "grad_norm": 1.4134080888162261, + "learning_rate": 5.467715587653685e-06, + "loss": 0.3282, + "step": 12720 + }, + { + "epoch": 0.16417650586482843, + "grad_norm": 1.3524136789547034, + "learning_rate": 5.4720144441578545e-06, + "loss": 0.3084, + "step": 12730 + }, + { + "epoch": 0.16430547405482437, + "grad_norm": 1.362082003042397, + "learning_rate": 5.476313300662024e-06, + "loss": 0.3229, + "step": 12740 + }, + { + "epoch": 0.16443444224482032, + "grad_norm": 1.2097042071455466, + "learning_rate": 5.480612157166194e-06, + "loss": 0.3168, + "step": 12750 + }, + { + "epoch": 0.16456341043481626, + "grad_norm": 1.2687210279342762, + "learning_rate": 5.4849110136703646e-06, + "loss": 0.3135, + "step": 12760 + }, + { + "epoch": 0.1646923786248122, + "grad_norm": 1.328521019801466, + "learning_rate": 5.489209870174535e-06, + "loss": 0.3222, + "step": 12770 + }, + { + "epoch": 0.16482134681480812, + "grad_norm": 1.3748494197177843, + "learning_rate": 5.4935087266787035e-06, + "loss": 0.3163, + "step": 12780 + }, + { + "epoch": 0.16495031500480406, + "grad_norm": 1.2471392549266924, + "learning_rate": 5.497807583182874e-06, + "loss": 0.3236, + "step": 12790 + }, + { + "epoch": 0.1650792831948, + "grad_norm": 1.211059390761258, + "learning_rate": 5.502106439687044e-06, + "loss": 0.3159, + "step": 12800 + }, + { + "epoch": 0.16520825138479595, + "grad_norm": 1.2775886876128089, + "learning_rate": 5.506405296191214e-06, + "loss": 0.3281, + "step": 12810 + }, + { + "epoch": 0.1653372195747919, + "grad_norm": 1.2842063874680816, + "learning_rate": 5.510704152695383e-06, + "loss": 0.3115, + "step": 12820 + }, + { + "epoch": 0.1654661877647878, + "grad_norm": 1.393965754700936, + "learning_rate": 5.515003009199553e-06, + "loss": 0.3189, + "step": 12830 + }, + { + "epoch": 0.16559515595478375, + "grad_norm": 1.4393511889935606, + "learning_rate": 5.5193018657037236e-06, + "loss": 0.3295, + "step": 12840 + }, + { + "epoch": 0.1657241241447797, + "grad_norm": 1.3193911569066097, + "learning_rate": 5.523600722207894e-06, + "loss": 0.3249, + "step": 12850 + }, + { + "epoch": 0.16585309233477563, + "grad_norm": 1.3191929148170773, + "learning_rate": 5.5278995787120625e-06, + "loss": 0.3164, + "step": 12860 + }, + { + "epoch": 0.16598206052477157, + "grad_norm": 1.3545902388698219, + "learning_rate": 5.532198435216233e-06, + "loss": 0.323, + "step": 12870 + }, + { + "epoch": 0.1661110287147675, + "grad_norm": 1.349934268530799, + "learning_rate": 5.536497291720403e-06, + "loss": 0.3197, + "step": 12880 + }, + { + "epoch": 0.16623999690476343, + "grad_norm": 1.453528660822824, + "learning_rate": 5.540796148224573e-06, + "loss": 0.3237, + "step": 12890 + }, + { + "epoch": 0.16636896509475937, + "grad_norm": 1.3951815040358266, + "learning_rate": 5.545095004728742e-06, + "loss": 0.3283, + "step": 12900 + }, + { + "epoch": 0.16649793328475532, + "grad_norm": 1.310129400328561, + "learning_rate": 5.549393861232912e-06, + "loss": 0.3055, + "step": 12910 + }, + { + "epoch": 0.16662690147475126, + "grad_norm": 1.4622725961398306, + "learning_rate": 5.553692717737083e-06, + "loss": 0.3101, + "step": 12920 + }, + { + "epoch": 0.1667558696647472, + "grad_norm": 1.2795044794282273, + "learning_rate": 5.557991574241253e-06, + "loss": 0.3322, + "step": 12930 + }, + { + "epoch": 0.16688483785474312, + "grad_norm": 1.4609397771860895, + "learning_rate": 5.5622904307454215e-06, + "loss": 0.3298, + "step": 12940 + }, + { + "epoch": 0.16701380604473906, + "grad_norm": 1.334841570720802, + "learning_rate": 5.566589287249592e-06, + "loss": 0.3154, + "step": 12950 + }, + { + "epoch": 0.167142774234735, + "grad_norm": 1.2975611642786835, + "learning_rate": 5.570888143753762e-06, + "loss": 0.3035, + "step": 12960 + }, + { + "epoch": 0.16727174242473095, + "grad_norm": 1.3590611964352763, + "learning_rate": 5.575187000257932e-06, + "loss": 0.3115, + "step": 12970 + }, + { + "epoch": 0.1674007106147269, + "grad_norm": 1.2954079827235616, + "learning_rate": 5.579485856762101e-06, + "loss": 0.3144, + "step": 12980 + }, + { + "epoch": 0.1675296788047228, + "grad_norm": 1.4012101335273892, + "learning_rate": 5.583784713266271e-06, + "loss": 0.3185, + "step": 12990 + }, + { + "epoch": 0.16765864699471875, + "grad_norm": 1.3455246182931897, + "learning_rate": 5.588083569770442e-06, + "loss": 0.3055, + "step": 13000 + }, + { + "epoch": 0.1677876151847147, + "grad_norm": 1.3412522034322598, + "learning_rate": 5.592382426274612e-06, + "loss": 0.3208, + "step": 13010 + }, + { + "epoch": 0.16791658337471063, + "grad_norm": 1.2318423997826868, + "learning_rate": 5.5966812827787805e-06, + "loss": 0.3119, + "step": 13020 + }, + { + "epoch": 0.16804555156470657, + "grad_norm": 1.2496795122742417, + "learning_rate": 5.600980139282951e-06, + "loss": 0.3225, + "step": 13030 + }, + { + "epoch": 0.1681745197547025, + "grad_norm": 1.509856786458958, + "learning_rate": 5.605278995787121e-06, + "loss": 0.324, + "step": 13040 + }, + { + "epoch": 0.16830348794469843, + "grad_norm": 1.484052323581062, + "learning_rate": 5.609577852291291e-06, + "loss": 0.3147, + "step": 13050 + }, + { + "epoch": 0.16843245613469437, + "grad_norm": 1.2621191880285123, + "learning_rate": 5.61387670879546e-06, + "loss": 0.3023, + "step": 13060 + }, + { + "epoch": 0.16856142432469032, + "grad_norm": 1.408610702748505, + "learning_rate": 5.61817556529963e-06, + "loss": 0.3278, + "step": 13070 + }, + { + "epoch": 0.16869039251468626, + "grad_norm": 1.1892753377794247, + "learning_rate": 5.622474421803801e-06, + "loss": 0.32, + "step": 13080 + }, + { + "epoch": 0.1688193607046822, + "grad_norm": 1.3771395389029761, + "learning_rate": 5.626773278307971e-06, + "loss": 0.3297, + "step": 13090 + }, + { + "epoch": 0.16894832889467812, + "grad_norm": 1.2198069611640652, + "learning_rate": 5.63107213481214e-06, + "loss": 0.3157, + "step": 13100 + }, + { + "epoch": 0.16907729708467406, + "grad_norm": 1.2548026950043654, + "learning_rate": 5.63537099131631e-06, + "loss": 0.3235, + "step": 13110 + }, + { + "epoch": 0.16920626527467, + "grad_norm": 1.4021628264788533, + "learning_rate": 5.63966984782048e-06, + "loss": 0.3104, + "step": 13120 + }, + { + "epoch": 0.16933523346466595, + "grad_norm": 1.3482623376956397, + "learning_rate": 5.6439687043246504e-06, + "loss": 0.3172, + "step": 13130 + }, + { + "epoch": 0.1694642016546619, + "grad_norm": 1.3051238419260613, + "learning_rate": 5.64826756082882e-06, + "loss": 0.3179, + "step": 13140 + }, + { + "epoch": 0.1695931698446578, + "grad_norm": 1.4515208912750106, + "learning_rate": 5.652566417332989e-06, + "loss": 0.3034, + "step": 13150 + }, + { + "epoch": 0.16972213803465375, + "grad_norm": 1.2411631562735586, + "learning_rate": 5.65686527383716e-06, + "loss": 0.3112, + "step": 13160 + }, + { + "epoch": 0.1698511062246497, + "grad_norm": 1.43718105724712, + "learning_rate": 5.66116413034133e-06, + "loss": 0.3103, + "step": 13170 + }, + { + "epoch": 0.16998007441464563, + "grad_norm": 1.4487731912696495, + "learning_rate": 5.665462986845499e-06, + "loss": 0.3197, + "step": 13180 + }, + { + "epoch": 0.17010904260464157, + "grad_norm": 1.4312873119955998, + "learning_rate": 5.66976184334967e-06, + "loss": 0.3105, + "step": 13190 + }, + { + "epoch": 0.1702380107946375, + "grad_norm": 1.2877076496235063, + "learning_rate": 5.674060699853839e-06, + "loss": 0.3099, + "step": 13200 + }, + { + "epoch": 0.17036697898463343, + "grad_norm": 1.2690890712152085, + "learning_rate": 5.6783595563580094e-06, + "loss": 0.3202, + "step": 13210 + }, + { + "epoch": 0.17049594717462938, + "grad_norm": 1.3458666311814287, + "learning_rate": 5.682658412862179e-06, + "loss": 0.3014, + "step": 13220 + }, + { + "epoch": 0.17062491536462532, + "grad_norm": 1.3901832906873242, + "learning_rate": 5.686957269366349e-06, + "loss": 0.3119, + "step": 13230 + }, + { + "epoch": 0.17075388355462126, + "grad_norm": 1.4176672291647803, + "learning_rate": 5.691256125870519e-06, + "loss": 0.3232, + "step": 13240 + }, + { + "epoch": 0.1708828517446172, + "grad_norm": 1.3743797561129538, + "learning_rate": 5.695554982374689e-06, + "loss": 0.3161, + "step": 13250 + }, + { + "epoch": 0.17101181993461312, + "grad_norm": 1.269606532643558, + "learning_rate": 5.699853838878858e-06, + "loss": 0.2994, + "step": 13260 + }, + { + "epoch": 0.17114078812460906, + "grad_norm": 1.4412296495664763, + "learning_rate": 5.704152695383029e-06, + "loss": 0.3154, + "step": 13270 + }, + { + "epoch": 0.171269756314605, + "grad_norm": 1.2717059308035896, + "learning_rate": 5.708451551887198e-06, + "loss": 0.3064, + "step": 13280 + }, + { + "epoch": 0.17139872450460095, + "grad_norm": 1.3823820954464396, + "learning_rate": 5.7127504083913685e-06, + "loss": 0.3, + "step": 13290 + }, + { + "epoch": 0.1715276926945969, + "grad_norm": 1.334501472451656, + "learning_rate": 5.717049264895538e-06, + "loss": 0.3075, + "step": 13300 + }, + { + "epoch": 0.1716566608845928, + "grad_norm": 1.342316195024784, + "learning_rate": 5.721348121399708e-06, + "loss": 0.301, + "step": 13310 + }, + { + "epoch": 0.17178562907458875, + "grad_norm": 1.3203483643000442, + "learning_rate": 5.7256469779038785e-06, + "loss": 0.3285, + "step": 13320 + }, + { + "epoch": 0.1719145972645847, + "grad_norm": 1.400774256976589, + "learning_rate": 5.729945834408048e-06, + "loss": 0.2996, + "step": 13330 + }, + { + "epoch": 0.17204356545458063, + "grad_norm": 1.2889388855712853, + "learning_rate": 5.734244690912217e-06, + "loss": 0.3069, + "step": 13340 + }, + { + "epoch": 0.17217253364457658, + "grad_norm": 1.4649422417682278, + "learning_rate": 5.738543547416388e-06, + "loss": 0.3209, + "step": 13350 + }, + { + "epoch": 0.1723015018345725, + "grad_norm": 1.3097672082571, + "learning_rate": 5.742842403920558e-06, + "loss": 0.3132, + "step": 13360 + }, + { + "epoch": 0.17243047002456843, + "grad_norm": 1.2785824063597615, + "learning_rate": 5.7471412604247275e-06, + "loss": 0.3001, + "step": 13370 + }, + { + "epoch": 0.17255943821456438, + "grad_norm": 1.4275544989379876, + "learning_rate": 5.751440116928897e-06, + "loss": 0.3107, + "step": 13380 + }, + { + "epoch": 0.17268840640456032, + "grad_norm": 1.395535600974424, + "learning_rate": 5.755738973433067e-06, + "loss": 0.3162, + "step": 13390 + }, + { + "epoch": 0.17281737459455626, + "grad_norm": 1.2368077142844507, + "learning_rate": 5.7600378299372375e-06, + "loss": 0.3161, + "step": 13400 + }, + { + "epoch": 0.1729463427845522, + "grad_norm": 1.3310682525127528, + "learning_rate": 5.764336686441408e-06, + "loss": 0.3144, + "step": 13410 + }, + { + "epoch": 0.17307531097454812, + "grad_norm": 1.3457525807215804, + "learning_rate": 5.7686355429455764e-06, + "loss": 0.3257, + "step": 13420 + }, + { + "epoch": 0.17320427916454406, + "grad_norm": 1.4035512682069837, + "learning_rate": 5.772934399449747e-06, + "loss": 0.3072, + "step": 13430 + }, + { + "epoch": 0.17333324735454, + "grad_norm": 1.4178892721312117, + "learning_rate": 5.777233255953917e-06, + "loss": 0.3092, + "step": 13440 + }, + { + "epoch": 0.17346221554453595, + "grad_norm": 1.323342131950906, + "learning_rate": 5.781532112458087e-06, + "loss": 0.3142, + "step": 13450 + }, + { + "epoch": 0.1735911837345319, + "grad_norm": 1.4399871873549788, + "learning_rate": 5.785830968962256e-06, + "loss": 0.3179, + "step": 13460 + }, + { + "epoch": 0.1737201519245278, + "grad_norm": 1.1514181754899906, + "learning_rate": 5.790129825466426e-06, + "loss": 0.3071, + "step": 13470 + }, + { + "epoch": 0.17384912011452375, + "grad_norm": 1.5232030909634735, + "learning_rate": 5.7944286819705965e-06, + "loss": 0.2913, + "step": 13480 + }, + { + "epoch": 0.1739780883045197, + "grad_norm": 1.2709239384832443, + "learning_rate": 5.798727538474767e-06, + "loss": 0.3073, + "step": 13490 + }, + { + "epoch": 0.17410705649451563, + "grad_norm": 1.3600009896107075, + "learning_rate": 5.8030263949789355e-06, + "loss": 0.3085, + "step": 13500 + }, + { + "epoch": 0.17423602468451158, + "grad_norm": 1.3479720280073144, + "learning_rate": 5.807325251483106e-06, + "loss": 0.3161, + "step": 13510 + }, + { + "epoch": 0.1743649928745075, + "grad_norm": 1.1920165823806552, + "learning_rate": 5.811624107987276e-06, + "loss": 0.3294, + "step": 13520 + }, + { + "epoch": 0.17449396106450343, + "grad_norm": 1.4127508097424404, + "learning_rate": 5.815922964491446e-06, + "loss": 0.3136, + "step": 13530 + }, + { + "epoch": 0.17462292925449938, + "grad_norm": 1.3201770217712359, + "learning_rate": 5.820221820995615e-06, + "loss": 0.3268, + "step": 13540 + }, + { + "epoch": 0.17475189744449532, + "grad_norm": 1.369729535810165, + "learning_rate": 5.824520677499785e-06, + "loss": 0.3097, + "step": 13550 + }, + { + "epoch": 0.17488086563449126, + "grad_norm": 1.2954191447800607, + "learning_rate": 5.8288195340039556e-06, + "loss": 0.3214, + "step": 13560 + }, + { + "epoch": 0.17500983382448718, + "grad_norm": 1.3296355972301364, + "learning_rate": 5.833118390508126e-06, + "loss": 0.3207, + "step": 13570 + }, + { + "epoch": 0.17513880201448312, + "grad_norm": 1.4140863919914424, + "learning_rate": 5.8374172470122945e-06, + "loss": 0.2996, + "step": 13580 + }, + { + "epoch": 0.17526777020447906, + "grad_norm": 1.21344907754274, + "learning_rate": 5.841716103516465e-06, + "loss": 0.3083, + "step": 13590 + }, + { + "epoch": 0.175396738394475, + "grad_norm": 1.3092919518699773, + "learning_rate": 5.846014960020635e-06, + "loss": 0.3116, + "step": 13600 + }, + { + "epoch": 0.17552570658447095, + "grad_norm": 1.3193607960491744, + "learning_rate": 5.850313816524805e-06, + "loss": 0.3211, + "step": 13610 + }, + { + "epoch": 0.1756546747744669, + "grad_norm": 1.1969409256350487, + "learning_rate": 5.854612673028974e-06, + "loss": 0.3172, + "step": 13620 + }, + { + "epoch": 0.1757836429644628, + "grad_norm": 1.4807848671716592, + "learning_rate": 5.858911529533144e-06, + "loss": 0.3072, + "step": 13630 + }, + { + "epoch": 0.17591261115445875, + "grad_norm": 1.3627695577256627, + "learning_rate": 5.8632103860373146e-06, + "loss": 0.3065, + "step": 13640 + }, + { + "epoch": 0.1760415793444547, + "grad_norm": 1.3943888174858277, + "learning_rate": 5.867509242541485e-06, + "loss": 0.3176, + "step": 13650 + }, + { + "epoch": 0.17617054753445063, + "grad_norm": 1.2249720153604322, + "learning_rate": 5.8718080990456535e-06, + "loss": 0.3111, + "step": 13660 + }, + { + "epoch": 0.17629951572444658, + "grad_norm": 1.3773935438802094, + "learning_rate": 5.876106955549824e-06, + "loss": 0.3139, + "step": 13670 + }, + { + "epoch": 0.1764284839144425, + "grad_norm": 1.3911416514921087, + "learning_rate": 5.880405812053994e-06, + "loss": 0.3114, + "step": 13680 + }, + { + "epoch": 0.17655745210443843, + "grad_norm": 1.409507826205411, + "learning_rate": 5.884704668558164e-06, + "loss": 0.3135, + "step": 13690 + }, + { + "epoch": 0.17668642029443438, + "grad_norm": 1.3072624461713513, + "learning_rate": 5.889003525062334e-06, + "loss": 0.3089, + "step": 13700 + }, + { + "epoch": 0.17681538848443032, + "grad_norm": 1.4585148261045533, + "learning_rate": 5.893302381566503e-06, + "loss": 0.3019, + "step": 13710 + }, + { + "epoch": 0.17694435667442626, + "grad_norm": 1.2027651272408029, + "learning_rate": 5.897601238070674e-06, + "loss": 0.3043, + "step": 13720 + }, + { + "epoch": 0.17707332486442218, + "grad_norm": 1.3381538497022156, + "learning_rate": 5.901900094574844e-06, + "loss": 0.3259, + "step": 13730 + }, + { + "epoch": 0.17720229305441812, + "grad_norm": 1.2511399886434418, + "learning_rate": 5.906198951079013e-06, + "loss": 0.3163, + "step": 13740 + }, + { + "epoch": 0.17733126124441406, + "grad_norm": 1.3546829221255425, + "learning_rate": 5.910497807583183e-06, + "loss": 0.3158, + "step": 13750 + }, + { + "epoch": 0.17746022943441, + "grad_norm": 1.441259717962409, + "learning_rate": 5.914796664087353e-06, + "loss": 0.3231, + "step": 13760 + }, + { + "epoch": 0.17758919762440595, + "grad_norm": 1.252538504227043, + "learning_rate": 5.919095520591523e-06, + "loss": 0.3049, + "step": 13770 + }, + { + "epoch": 0.1777181658144019, + "grad_norm": 1.3428466604784, + "learning_rate": 5.923394377095693e-06, + "loss": 0.2971, + "step": 13780 + }, + { + "epoch": 0.1778471340043978, + "grad_norm": 1.282767089132665, + "learning_rate": 5.927693233599862e-06, + "loss": 0.3073, + "step": 13790 + }, + { + "epoch": 0.17797610219439375, + "grad_norm": 1.2702844845917893, + "learning_rate": 5.931992090104033e-06, + "loss": 0.3199, + "step": 13800 + }, + { + "epoch": 0.1781050703843897, + "grad_norm": 1.3527627500672608, + "learning_rate": 5.936290946608203e-06, + "loss": 0.3233, + "step": 13810 + }, + { + "epoch": 0.17823403857438563, + "grad_norm": 1.3324651468014013, + "learning_rate": 5.940589803112372e-06, + "loss": 0.311, + "step": 13820 + }, + { + "epoch": 0.17836300676438158, + "grad_norm": 1.3466440388704721, + "learning_rate": 5.944888659616543e-06, + "loss": 0.3087, + "step": 13830 + }, + { + "epoch": 0.1784919749543775, + "grad_norm": 1.3481554611959239, + "learning_rate": 5.949187516120712e-06, + "loss": 0.3101, + "step": 13840 + }, + { + "epoch": 0.17862094314437343, + "grad_norm": 1.4297441362434138, + "learning_rate": 5.953486372624882e-06, + "loss": 0.3139, + "step": 13850 + }, + { + "epoch": 0.17874991133436938, + "grad_norm": 1.2024770229506248, + "learning_rate": 5.957785229129052e-06, + "loss": 0.3114, + "step": 13860 + }, + { + "epoch": 0.17887887952436532, + "grad_norm": 1.3134097475475905, + "learning_rate": 5.962084085633222e-06, + "loss": 0.3036, + "step": 13870 + }, + { + "epoch": 0.17900784771436126, + "grad_norm": 1.30846264146052, + "learning_rate": 5.966382942137392e-06, + "loss": 0.3077, + "step": 13880 + }, + { + "epoch": 0.17913681590435718, + "grad_norm": 1.4125340737856444, + "learning_rate": 5.970681798641562e-06, + "loss": 0.3126, + "step": 13890 + }, + { + "epoch": 0.17926578409435312, + "grad_norm": 1.3166447314152077, + "learning_rate": 5.974980655145731e-06, + "loss": 0.3205, + "step": 13900 + }, + { + "epoch": 0.17939475228434906, + "grad_norm": 1.4856179352257952, + "learning_rate": 5.979279511649902e-06, + "loss": 0.3168, + "step": 13910 + }, + { + "epoch": 0.179523720474345, + "grad_norm": 1.3236879945782711, + "learning_rate": 5.983578368154072e-06, + "loss": 0.3263, + "step": 13920 + }, + { + "epoch": 0.17965268866434095, + "grad_norm": 1.4577267641055447, + "learning_rate": 5.9878772246582414e-06, + "loss": 0.3148, + "step": 13930 + }, + { + "epoch": 0.1797816568543369, + "grad_norm": 1.3964284285334887, + "learning_rate": 5.992176081162411e-06, + "loss": 0.3252, + "step": 13940 + }, + { + "epoch": 0.1799106250443328, + "grad_norm": 1.2724108952701974, + "learning_rate": 5.996474937666581e-06, + "loss": 0.3036, + "step": 13950 + }, + { + "epoch": 0.18003959323432875, + "grad_norm": 1.2844750749739857, + "learning_rate": 6.0007737941707515e-06, + "loss": 0.3039, + "step": 13960 + }, + { + "epoch": 0.1801685614243247, + "grad_norm": 1.2272154958372161, + "learning_rate": 6.005072650674921e-06, + "loss": 0.3153, + "step": 13970 + }, + { + "epoch": 0.18029752961432063, + "grad_norm": 1.368554628585163, + "learning_rate": 6.00937150717909e-06, + "loss": 0.3123, + "step": 13980 + }, + { + "epoch": 0.18042649780431658, + "grad_norm": 1.283093580826319, + "learning_rate": 6.013670363683261e-06, + "loss": 0.3053, + "step": 13990 + }, + { + "epoch": 0.1805554659943125, + "grad_norm": 1.3458026350544756, + "learning_rate": 6.017969220187431e-06, + "loss": 0.3208, + "step": 14000 + }, + { + "epoch": 0.18068443418430843, + "grad_norm": 1.3265247316705104, + "learning_rate": 6.0222680766916004e-06, + "loss": 0.3176, + "step": 14010 + }, + { + "epoch": 0.18081340237430438, + "grad_norm": 1.3747796336951168, + "learning_rate": 6.02656693319577e-06, + "loss": 0.306, + "step": 14020 + }, + { + "epoch": 0.18094237056430032, + "grad_norm": 1.284328305689498, + "learning_rate": 6.03086578969994e-06, + "loss": 0.3192, + "step": 14030 + }, + { + "epoch": 0.18107133875429626, + "grad_norm": 1.2047936498271155, + "learning_rate": 6.0351646462041105e-06, + "loss": 0.3069, + "step": 14040 + }, + { + "epoch": 0.18120030694429218, + "grad_norm": 1.34056014464582, + "learning_rate": 6.039463502708281e-06, + "loss": 0.3187, + "step": 14050 + }, + { + "epoch": 0.18132927513428812, + "grad_norm": 1.308007777426076, + "learning_rate": 6.043762359212449e-06, + "loss": 0.3208, + "step": 14060 + }, + { + "epoch": 0.18145824332428406, + "grad_norm": 1.1948201347267673, + "learning_rate": 6.04806121571662e-06, + "loss": 0.3026, + "step": 14070 + }, + { + "epoch": 0.18158721151428, + "grad_norm": 1.299781174678577, + "learning_rate": 6.05236007222079e-06, + "loss": 0.3193, + "step": 14080 + }, + { + "epoch": 0.18171617970427595, + "grad_norm": 1.2796186388587303, + "learning_rate": 6.05665892872496e-06, + "loss": 0.311, + "step": 14090 + }, + { + "epoch": 0.1818451478942719, + "grad_norm": 1.448507602780041, + "learning_rate": 6.060957785229129e-06, + "loss": 0.3141, + "step": 14100 + }, + { + "epoch": 0.1819741160842678, + "grad_norm": 1.2473103647777937, + "learning_rate": 6.065256641733299e-06, + "loss": 0.3105, + "step": 14110 + }, + { + "epoch": 0.18210308427426375, + "grad_norm": 1.4091284410180105, + "learning_rate": 6.0695554982374695e-06, + "loss": 0.2999, + "step": 14120 + }, + { + "epoch": 0.1822320524642597, + "grad_norm": 1.3007424302898023, + "learning_rate": 6.07385435474164e-06, + "loss": 0.3086, + "step": 14130 + }, + { + "epoch": 0.18236102065425563, + "grad_norm": 1.2338500020008412, + "learning_rate": 6.078153211245808e-06, + "loss": 0.3168, + "step": 14140 + }, + { + "epoch": 0.18248998884425158, + "grad_norm": 1.2908620871572327, + "learning_rate": 6.082452067749979e-06, + "loss": 0.2924, + "step": 14150 + }, + { + "epoch": 0.1826189570342475, + "grad_norm": 1.2698976862691356, + "learning_rate": 6.086750924254149e-06, + "loss": 0.3181, + "step": 14160 + }, + { + "epoch": 0.18274792522424343, + "grad_norm": 1.3896075523252471, + "learning_rate": 6.091049780758319e-06, + "loss": 0.3103, + "step": 14170 + }, + { + "epoch": 0.18287689341423938, + "grad_norm": 1.3156772656342586, + "learning_rate": 6.095348637262488e-06, + "loss": 0.3027, + "step": 14180 + }, + { + "epoch": 0.18300586160423532, + "grad_norm": 1.3654423309480077, + "learning_rate": 6.099647493766658e-06, + "loss": 0.3286, + "step": 14190 + }, + { + "epoch": 0.18313482979423126, + "grad_norm": 1.1440392321020667, + "learning_rate": 6.1039463502708285e-06, + "loss": 0.2943, + "step": 14200 + }, + { + "epoch": 0.18326379798422718, + "grad_norm": 1.3353278583837342, + "learning_rate": 6.108245206774999e-06, + "loss": 0.3003, + "step": 14210 + }, + { + "epoch": 0.18339276617422312, + "grad_norm": 1.2863074330931117, + "learning_rate": 6.1125440632791674e-06, + "loss": 0.299, + "step": 14220 + }, + { + "epoch": 0.18352173436421906, + "grad_norm": 1.3420415051154961, + "learning_rate": 6.116842919783338e-06, + "loss": 0.3146, + "step": 14230 + }, + { + "epoch": 0.183650702554215, + "grad_norm": 1.3538392904095218, + "learning_rate": 6.121141776287508e-06, + "loss": 0.3193, + "step": 14240 + }, + { + "epoch": 0.18377967074421095, + "grad_norm": 1.3970362449154934, + "learning_rate": 6.125440632791678e-06, + "loss": 0.3183, + "step": 14250 + }, + { + "epoch": 0.1839086389342069, + "grad_norm": 1.3126623269984363, + "learning_rate": 6.129739489295847e-06, + "loss": 0.3139, + "step": 14260 + }, + { + "epoch": 0.1840376071242028, + "grad_norm": 1.2741086435556417, + "learning_rate": 6.134038345800017e-06, + "loss": 0.3213, + "step": 14270 + }, + { + "epoch": 0.18416657531419875, + "grad_norm": 1.4173793142139577, + "learning_rate": 6.1383372023041875e-06, + "loss": 0.3177, + "step": 14280 + }, + { + "epoch": 0.1842955435041947, + "grad_norm": 1.2373743812929547, + "learning_rate": 6.142636058808358e-06, + "loss": 0.299, + "step": 14290 + }, + { + "epoch": 0.18442451169419063, + "grad_norm": 1.3129105296809243, + "learning_rate": 6.1469349153125265e-06, + "loss": 0.3131, + "step": 14300 + }, + { + "epoch": 0.18455347988418658, + "grad_norm": 1.4116794724544603, + "learning_rate": 6.151233771816697e-06, + "loss": 0.3127, + "step": 14310 + }, + { + "epoch": 0.1846824480741825, + "grad_norm": 1.3426759800371948, + "learning_rate": 6.155532628320867e-06, + "loss": 0.2901, + "step": 14320 + }, + { + "epoch": 0.18481141626417844, + "grad_norm": 1.3480875895917033, + "learning_rate": 6.159831484825037e-06, + "loss": 0.3162, + "step": 14330 + }, + { + "epoch": 0.18494038445417438, + "grad_norm": 1.3195625791551628, + "learning_rate": 6.164130341329207e-06, + "loss": 0.3136, + "step": 14340 + }, + { + "epoch": 0.18506935264417032, + "grad_norm": 1.2992511932532818, + "learning_rate": 6.168429197833376e-06, + "loss": 0.3326, + "step": 14350 + }, + { + "epoch": 0.18519832083416626, + "grad_norm": 1.358557781122178, + "learning_rate": 6.1727280543375466e-06, + "loss": 0.3129, + "step": 14360 + }, + { + "epoch": 0.18532728902416218, + "grad_norm": 1.47709939970727, + "learning_rate": 6.177026910841717e-06, + "loss": 0.3043, + "step": 14370 + }, + { + "epoch": 0.18545625721415812, + "grad_norm": 1.762837458189061, + "learning_rate": 6.181325767345886e-06, + "loss": 0.2977, + "step": 14380 + }, + { + "epoch": 0.18558522540415406, + "grad_norm": 1.1335596221994382, + "learning_rate": 6.185624623850056e-06, + "loss": 0.3156, + "step": 14390 + }, + { + "epoch": 0.18571419359415, + "grad_norm": 1.3291663318283198, + "learning_rate": 6.189923480354226e-06, + "loss": 0.3223, + "step": 14400 + }, + { + "epoch": 0.18584316178414595, + "grad_norm": 1.4158607970607802, + "learning_rate": 6.194222336858396e-06, + "loss": 0.2962, + "step": 14410 + }, + { + "epoch": 0.18597212997414186, + "grad_norm": 1.4791441805270762, + "learning_rate": 6.198521193362566e-06, + "loss": 0.31, + "step": 14420 + }, + { + "epoch": 0.1861010981641378, + "grad_norm": 1.2879501410892167, + "learning_rate": 6.202820049866735e-06, + "loss": 0.2961, + "step": 14430 + }, + { + "epoch": 0.18623006635413375, + "grad_norm": 1.2594078722762687, + "learning_rate": 6.2071189063709056e-06, + "loss": 0.3082, + "step": 14440 + }, + { + "epoch": 0.1863590345441297, + "grad_norm": 1.3244619668708302, + "learning_rate": 6.211417762875076e-06, + "loss": 0.2984, + "step": 14450 + }, + { + "epoch": 0.18648800273412564, + "grad_norm": 1.2577909227170763, + "learning_rate": 6.215716619379245e-06, + "loss": 0.3023, + "step": 14460 + }, + { + "epoch": 0.18661697092412158, + "grad_norm": 1.4655096718385434, + "learning_rate": 6.220015475883416e-06, + "loss": 0.3045, + "step": 14470 + }, + { + "epoch": 0.1867459391141175, + "grad_norm": 1.2834689093440799, + "learning_rate": 6.224314332387585e-06, + "loss": 0.2948, + "step": 14480 + }, + { + "epoch": 0.18687490730411344, + "grad_norm": 1.2790142751024065, + "learning_rate": 6.228613188891755e-06, + "loss": 0.3158, + "step": 14490 + }, + { + "epoch": 0.18700387549410938, + "grad_norm": 1.2882520304671488, + "learning_rate": 6.232912045395925e-06, + "loss": 0.2902, + "step": 14500 + }, + { + "epoch": 0.18713284368410532, + "grad_norm": 1.276185586195502, + "learning_rate": 6.237210901900095e-06, + "loss": 0.3159, + "step": 14510 + }, + { + "epoch": 0.18726181187410126, + "grad_norm": 1.285485945980715, + "learning_rate": 6.241509758404265e-06, + "loss": 0.3128, + "step": 14520 + }, + { + "epoch": 0.18739078006409718, + "grad_norm": 1.3026232762917391, + "learning_rate": 6.245808614908435e-06, + "loss": 0.3022, + "step": 14530 + }, + { + "epoch": 0.18751974825409312, + "grad_norm": 1.2754755124506274, + "learning_rate": 6.250107471412604e-06, + "loss": 0.3031, + "step": 14540 + }, + { + "epoch": 0.18764871644408906, + "grad_norm": 1.32987312989979, + "learning_rate": 6.254406327916775e-06, + "loss": 0.3069, + "step": 14550 + }, + { + "epoch": 0.187777684634085, + "grad_norm": 1.457484724218453, + "learning_rate": 6.258705184420945e-06, + "loss": 0.3023, + "step": 14560 + }, + { + "epoch": 0.18790665282408095, + "grad_norm": 1.5361086595061622, + "learning_rate": 6.263004040925114e-06, + "loss": 0.3149, + "step": 14570 + }, + { + "epoch": 0.18803562101407686, + "grad_norm": 1.4935771807775458, + "learning_rate": 6.267302897429284e-06, + "loss": 0.2969, + "step": 14580 + }, + { + "epoch": 0.1881645892040728, + "grad_norm": 1.2335093851383063, + "learning_rate": 6.271601753933454e-06, + "loss": 0.2994, + "step": 14590 + }, + { + "epoch": 0.18829355739406875, + "grad_norm": 1.3541955345474355, + "learning_rate": 6.2759006104376245e-06, + "loss": 0.3041, + "step": 14600 + }, + { + "epoch": 0.1884225255840647, + "grad_norm": 1.3722754068497043, + "learning_rate": 6.280199466941794e-06, + "loss": 0.3065, + "step": 14610 + }, + { + "epoch": 0.18855149377406064, + "grad_norm": 1.274174057459246, + "learning_rate": 6.284498323445963e-06, + "loss": 0.2985, + "step": 14620 + }, + { + "epoch": 0.18868046196405658, + "grad_norm": 1.180307362403386, + "learning_rate": 6.288797179950134e-06, + "loss": 0.3001, + "step": 14630 + }, + { + "epoch": 0.1888094301540525, + "grad_norm": 1.304710594807531, + "learning_rate": 6.293096036454304e-06, + "loss": 0.3023, + "step": 14640 + }, + { + "epoch": 0.18893839834404844, + "grad_norm": 1.3370955591706724, + "learning_rate": 6.297394892958473e-06, + "loss": 0.303, + "step": 14650 + }, + { + "epoch": 0.18906736653404438, + "grad_norm": 1.483001910538981, + "learning_rate": 6.301693749462644e-06, + "loss": 0.3196, + "step": 14660 + }, + { + "epoch": 0.18919633472404032, + "grad_norm": 1.2627796666211537, + "learning_rate": 6.305992605966813e-06, + "loss": 0.3048, + "step": 14670 + }, + { + "epoch": 0.18932530291403626, + "grad_norm": 1.3218874082052892, + "learning_rate": 6.3102914624709835e-06, + "loss": 0.3112, + "step": 14680 + }, + { + "epoch": 0.18945427110403218, + "grad_norm": 1.4492632847183249, + "learning_rate": 6.314590318975154e-06, + "loss": 0.3133, + "step": 14690 + }, + { + "epoch": 0.18958323929402812, + "grad_norm": 1.388043125662553, + "learning_rate": 6.318889175479323e-06, + "loss": 0.3114, + "step": 14700 + }, + { + "epoch": 0.18971220748402406, + "grad_norm": 1.305228123910064, + "learning_rate": 6.323188031983493e-06, + "loss": 0.31, + "step": 14710 + }, + { + "epoch": 0.18984117567402, + "grad_norm": 1.2000460265451833, + "learning_rate": 6.327486888487663e-06, + "loss": 0.294, + "step": 14720 + }, + { + "epoch": 0.18997014386401595, + "grad_norm": 1.2389539063809223, + "learning_rate": 6.331785744991833e-06, + "loss": 0.3097, + "step": 14730 + }, + { + "epoch": 0.19009911205401187, + "grad_norm": 1.3529063433595376, + "learning_rate": 6.336084601496003e-06, + "loss": 0.2974, + "step": 14740 + }, + { + "epoch": 0.1902280802440078, + "grad_norm": 1.1611615691679422, + "learning_rate": 6.340383458000172e-06, + "loss": 0.3095, + "step": 14750 + }, + { + "epoch": 0.19035704843400375, + "grad_norm": 1.2695177026129323, + "learning_rate": 6.3446823145043425e-06, + "loss": 0.3045, + "step": 14760 + }, + { + "epoch": 0.1904860166239997, + "grad_norm": 1.41666460523003, + "learning_rate": 6.348981171008513e-06, + "loss": 0.3066, + "step": 14770 + }, + { + "epoch": 0.19061498481399564, + "grad_norm": 1.2594426964890524, + "learning_rate": 6.353280027512683e-06, + "loss": 0.2896, + "step": 14780 + }, + { + "epoch": 0.19074395300399158, + "grad_norm": 1.4600266849285648, + "learning_rate": 6.357578884016852e-06, + "loss": 0.3079, + "step": 14790 + }, + { + "epoch": 0.1908729211939875, + "grad_norm": 1.326385231517558, + "learning_rate": 6.361877740521022e-06, + "loss": 0.3094, + "step": 14800 + }, + { + "epoch": 0.19100188938398344, + "grad_norm": 1.2590778358046415, + "learning_rate": 6.366176597025192e-06, + "loss": 0.2959, + "step": 14810 + }, + { + "epoch": 0.19113085757397938, + "grad_norm": 1.2996982815256495, + "learning_rate": 6.370475453529363e-06, + "loss": 0.306, + "step": 14820 + }, + { + "epoch": 0.19125982576397532, + "grad_norm": 1.4894276917979454, + "learning_rate": 6.374774310033531e-06, + "loss": 0.2926, + "step": 14830 + }, + { + "epoch": 0.19138879395397126, + "grad_norm": 1.438868847542591, + "learning_rate": 6.3790731665377015e-06, + "loss": 0.2945, + "step": 14840 + }, + { + "epoch": 0.19151776214396718, + "grad_norm": 1.2594139348305422, + "learning_rate": 6.383372023041872e-06, + "loss": 0.3062, + "step": 14850 + }, + { + "epoch": 0.19164673033396312, + "grad_norm": 1.3082435537052313, + "learning_rate": 6.387670879546042e-06, + "loss": 0.3038, + "step": 14860 + }, + { + "epoch": 0.19177569852395907, + "grad_norm": 1.3212586344128237, + "learning_rate": 6.391969736050211e-06, + "loss": 0.3192, + "step": 14870 + }, + { + "epoch": 0.191904666713955, + "grad_norm": 1.4853171234064029, + "learning_rate": 6.396268592554381e-06, + "loss": 0.3097, + "step": 14880 + }, + { + "epoch": 0.19203363490395095, + "grad_norm": 1.160200326310731, + "learning_rate": 6.400567449058551e-06, + "loss": 0.2942, + "step": 14890 + }, + { + "epoch": 0.19216260309394687, + "grad_norm": 1.3361112350484718, + "learning_rate": 6.404866305562722e-06, + "loss": 0.3002, + "step": 14900 + }, + { + "epoch": 0.1922915712839428, + "grad_norm": 1.3873247042574093, + "learning_rate": 6.40916516206689e-06, + "loss": 0.3014, + "step": 14910 + }, + { + "epoch": 0.19242053947393875, + "grad_norm": 1.283417547413056, + "learning_rate": 6.4134640185710605e-06, + "loss": 0.303, + "step": 14920 + }, + { + "epoch": 0.1925495076639347, + "grad_norm": 1.3778547279299542, + "learning_rate": 6.417762875075231e-06, + "loss": 0.3153, + "step": 14930 + }, + { + "epoch": 0.19267847585393064, + "grad_norm": 1.2853890547033164, + "learning_rate": 6.422061731579401e-06, + "loss": 0.3145, + "step": 14940 + }, + { + "epoch": 0.19280744404392658, + "grad_norm": 1.280641713645326, + "learning_rate": 6.42636058808357e-06, + "loss": 0.2999, + "step": 14950 + }, + { + "epoch": 0.1929364122339225, + "grad_norm": 1.272513256950387, + "learning_rate": 6.43065944458774e-06, + "loss": 0.2979, + "step": 14960 + }, + { + "epoch": 0.19306538042391844, + "grad_norm": 1.3228207772078777, + "learning_rate": 6.43495830109191e-06, + "loss": 0.3032, + "step": 14970 + }, + { + "epoch": 0.19319434861391438, + "grad_norm": 1.2582203067882118, + "learning_rate": 6.439257157596081e-06, + "loss": 0.3022, + "step": 14980 + }, + { + "epoch": 0.19332331680391032, + "grad_norm": 1.4070373451347846, + "learning_rate": 6.443556014100249e-06, + "loss": 0.3183, + "step": 14990 + }, + { + "epoch": 0.19345228499390626, + "grad_norm": 1.4980699040705554, + "learning_rate": 6.4478548706044195e-06, + "loss": 0.3171, + "step": 15000 + }, + { + "epoch": 0.19358125318390218, + "grad_norm": 1.170692625982384, + "learning_rate": 6.45215372710859e-06, + "loss": 0.2895, + "step": 15010 + }, + { + "epoch": 0.19371022137389812, + "grad_norm": 1.4115052109482888, + "learning_rate": 6.45645258361276e-06, + "loss": 0.3235, + "step": 15020 + }, + { + "epoch": 0.19383918956389407, + "grad_norm": 1.3872078003450474, + "learning_rate": 6.460751440116929e-06, + "loss": 0.3077, + "step": 15030 + }, + { + "epoch": 0.19396815775389, + "grad_norm": 1.2755927710745638, + "learning_rate": 6.465050296621099e-06, + "loss": 0.308, + "step": 15040 + }, + { + "epoch": 0.19409712594388595, + "grad_norm": 1.4056327825427504, + "learning_rate": 6.469349153125269e-06, + "loss": 0.3165, + "step": 15050 + }, + { + "epoch": 0.19422609413388187, + "grad_norm": 1.2024181384589632, + "learning_rate": 6.47364800962944e-06, + "loss": 0.3083, + "step": 15060 + }, + { + "epoch": 0.1943550623238778, + "grad_norm": 1.1570874908352633, + "learning_rate": 6.477946866133609e-06, + "loss": 0.3038, + "step": 15070 + }, + { + "epoch": 0.19448403051387375, + "grad_norm": 1.4552860694702754, + "learning_rate": 6.4822457226377785e-06, + "loss": 0.317, + "step": 15080 + }, + { + "epoch": 0.1946129987038697, + "grad_norm": 1.2458893391972587, + "learning_rate": 6.486544579141949e-06, + "loss": 0.3119, + "step": 15090 + }, + { + "epoch": 0.19474196689386564, + "grad_norm": 1.3382324889931325, + "learning_rate": 6.490843435646119e-06, + "loss": 0.2967, + "step": 15100 + }, + { + "epoch": 0.19487093508386155, + "grad_norm": 1.3110996751763855, + "learning_rate": 6.495142292150289e-06, + "loss": 0.3134, + "step": 15110 + }, + { + "epoch": 0.1949999032738575, + "grad_norm": 1.4489928565508354, + "learning_rate": 6.499441148654458e-06, + "loss": 0.3057, + "step": 15120 + }, + { + "epoch": 0.19512887146385344, + "grad_norm": 1.40590429204342, + "learning_rate": 6.503740005158628e-06, + "loss": 0.3147, + "step": 15130 + }, + { + "epoch": 0.19525783965384938, + "grad_norm": 1.3387436371488997, + "learning_rate": 6.508038861662799e-06, + "loss": 0.2854, + "step": 15140 + }, + { + "epoch": 0.19538680784384532, + "grad_norm": 1.2496997962606302, + "learning_rate": 6.512337718166968e-06, + "loss": 0.3221, + "step": 15150 + }, + { + "epoch": 0.19551577603384127, + "grad_norm": 1.2967006098514693, + "learning_rate": 6.5166365746711376e-06, + "loss": 0.2984, + "step": 15160 + }, + { + "epoch": 0.19564474422383718, + "grad_norm": 1.39409172271497, + "learning_rate": 6.520935431175308e-06, + "loss": 0.3071, + "step": 15170 + }, + { + "epoch": 0.19577371241383312, + "grad_norm": 1.2192096770022545, + "learning_rate": 6.525234287679478e-06, + "loss": 0.305, + "step": 15180 + }, + { + "epoch": 0.19590268060382907, + "grad_norm": 1.3568355799967555, + "learning_rate": 6.529533144183648e-06, + "loss": 0.3063, + "step": 15190 + }, + { + "epoch": 0.196031648793825, + "grad_norm": 1.4388614904413803, + "learning_rate": 6.533832000687818e-06, + "loss": 0.3076, + "step": 15200 + }, + { + "epoch": 0.19616061698382095, + "grad_norm": 1.2284462269170837, + "learning_rate": 6.538130857191987e-06, + "loss": 0.2999, + "step": 15210 + }, + { + "epoch": 0.19628958517381687, + "grad_norm": 1.2733815647704765, + "learning_rate": 6.542429713696158e-06, + "loss": 0.2946, + "step": 15220 + }, + { + "epoch": 0.1964185533638128, + "grad_norm": 1.35558697458718, + "learning_rate": 6.546728570200327e-06, + "loss": 0.3133, + "step": 15230 + }, + { + "epoch": 0.19654752155380875, + "grad_norm": 1.3598835403064726, + "learning_rate": 6.5510274267044974e-06, + "loss": 0.2987, + "step": 15240 + }, + { + "epoch": 0.1966764897438047, + "grad_norm": 1.3147718223320222, + "learning_rate": 6.555326283208667e-06, + "loss": 0.3061, + "step": 15250 + }, + { + "epoch": 0.19680545793380064, + "grad_norm": 1.3508248348390046, + "learning_rate": 6.559625139712837e-06, + "loss": 0.2854, + "step": 15260 + }, + { + "epoch": 0.19693442612379655, + "grad_norm": 1.1734062124820155, + "learning_rate": 6.563923996217007e-06, + "loss": 0.3046, + "step": 15270 + }, + { + "epoch": 0.1970633943137925, + "grad_norm": 1.3275144152484093, + "learning_rate": 6.568222852721177e-06, + "loss": 0.3002, + "step": 15280 + }, + { + "epoch": 0.19719236250378844, + "grad_norm": 1.3212729013306659, + "learning_rate": 6.572521709225347e-06, + "loss": 0.3012, + "step": 15290 + }, + { + "epoch": 0.19732133069378438, + "grad_norm": 1.280820622536084, + "learning_rate": 6.576820565729517e-06, + "loss": 0.3024, + "step": 15300 + }, + { + "epoch": 0.19745029888378032, + "grad_norm": 1.3025814473326223, + "learning_rate": 6.581119422233686e-06, + "loss": 0.3031, + "step": 15310 + }, + { + "epoch": 0.19757926707377627, + "grad_norm": 1.156258171851669, + "learning_rate": 6.5854182787378564e-06, + "loss": 0.2838, + "step": 15320 + }, + { + "epoch": 0.19770823526377218, + "grad_norm": 1.3436651117352678, + "learning_rate": 6.589717135242027e-06, + "loss": 0.2952, + "step": 15330 + }, + { + "epoch": 0.19783720345376812, + "grad_norm": 1.337777928728206, + "learning_rate": 6.594015991746196e-06, + "loss": 0.2957, + "step": 15340 + }, + { + "epoch": 0.19796617164376407, + "grad_norm": 1.3321059569237903, + "learning_rate": 6.598314848250366e-06, + "loss": 0.317, + "step": 15350 + }, + { + "epoch": 0.19809513983376, + "grad_norm": 1.2701563489174437, + "learning_rate": 6.602613704754536e-06, + "loss": 0.2955, + "step": 15360 + }, + { + "epoch": 0.19822410802375595, + "grad_norm": 1.488258866301267, + "learning_rate": 6.606912561258706e-06, + "loss": 0.3068, + "step": 15370 + }, + { + "epoch": 0.19835307621375187, + "grad_norm": 1.2872128891150685, + "learning_rate": 6.611211417762876e-06, + "loss": 0.3144, + "step": 15380 + }, + { + "epoch": 0.1984820444037478, + "grad_norm": 1.338593239457764, + "learning_rate": 6.615510274267045e-06, + "loss": 0.2936, + "step": 15390 + }, + { + "epoch": 0.19861101259374375, + "grad_norm": 1.4369442830002193, + "learning_rate": 6.6198091307712155e-06, + "loss": 0.3069, + "step": 15400 + }, + { + "epoch": 0.1987399807837397, + "grad_norm": 1.4460237566808514, + "learning_rate": 6.624107987275386e-06, + "loss": 0.3024, + "step": 15410 + }, + { + "epoch": 0.19886894897373564, + "grad_norm": 1.290558091071899, + "learning_rate": 6.628406843779556e-06, + "loss": 0.2976, + "step": 15420 + }, + { + "epoch": 0.19899791716373155, + "grad_norm": 1.4213307729601785, + "learning_rate": 6.632705700283725e-06, + "loss": 0.2995, + "step": 15430 + }, + { + "epoch": 0.1991268853537275, + "grad_norm": 1.2600088395526083, + "learning_rate": 6.637004556787895e-06, + "loss": 0.3018, + "step": 15440 + }, + { + "epoch": 0.19925585354372344, + "grad_norm": 1.4338382363616888, + "learning_rate": 6.641303413292065e-06, + "loss": 0.2962, + "step": 15450 + }, + { + "epoch": 0.19938482173371938, + "grad_norm": 1.1295989128856059, + "learning_rate": 6.6456022697962356e-06, + "loss": 0.3011, + "step": 15460 + }, + { + "epoch": 0.19951378992371532, + "grad_norm": 1.4781435504320044, + "learning_rate": 6.649901126300404e-06, + "loss": 0.2929, + "step": 15470 + }, + { + "epoch": 0.19964275811371127, + "grad_norm": 1.433788755297971, + "learning_rate": 6.6541999828045745e-06, + "loss": 0.3088, + "step": 15480 + }, + { + "epoch": 0.19977172630370718, + "grad_norm": 1.4419477751551946, + "learning_rate": 6.658498839308745e-06, + "loss": 0.3026, + "step": 15490 + }, + { + "epoch": 0.19990069449370312, + "grad_norm": 1.3795591391739308, + "learning_rate": 6.662797695812915e-06, + "loss": 0.2979, + "step": 15500 + }, + { + "epoch": 0.20002966268369907, + "grad_norm": 1.3228342103692279, + "learning_rate": 6.667096552317084e-06, + "loss": 0.3199, + "step": 15510 + }, + { + "epoch": 0.200158630873695, + "grad_norm": 1.2517412403955022, + "learning_rate": 6.671395408821254e-06, + "loss": 0.3101, + "step": 15520 + }, + { + "epoch": 0.20028759906369095, + "grad_norm": 1.3130657919068285, + "learning_rate": 6.675694265325424e-06, + "loss": 0.2863, + "step": 15530 + }, + { + "epoch": 0.20041656725368687, + "grad_norm": 1.2619256468447915, + "learning_rate": 6.679993121829595e-06, + "loss": 0.2974, + "step": 15540 + }, + { + "epoch": 0.2005455354436828, + "grad_norm": 1.2529468633236664, + "learning_rate": 6.684291978333763e-06, + "loss": 0.2971, + "step": 15550 + }, + { + "epoch": 0.20067450363367875, + "grad_norm": 1.2757598938249237, + "learning_rate": 6.6885908348379335e-06, + "loss": 0.2999, + "step": 15560 + }, + { + "epoch": 0.2008034718236747, + "grad_norm": 1.3501862672634632, + "learning_rate": 6.692889691342104e-06, + "loss": 0.3016, + "step": 15570 + }, + { + "epoch": 0.20093244001367064, + "grad_norm": 1.3355046196397216, + "learning_rate": 6.697188547846274e-06, + "loss": 0.3079, + "step": 15580 + }, + { + "epoch": 0.20106140820366655, + "grad_norm": 1.2736381572842514, + "learning_rate": 6.701487404350443e-06, + "loss": 0.2956, + "step": 15590 + }, + { + "epoch": 0.2011903763936625, + "grad_norm": 1.3347694049900272, + "learning_rate": 6.705786260854613e-06, + "loss": 0.2971, + "step": 15600 + }, + { + "epoch": 0.20131934458365844, + "grad_norm": 1.3105517956560986, + "learning_rate": 6.710085117358783e-06, + "loss": 0.3022, + "step": 15610 + }, + { + "epoch": 0.20144831277365438, + "grad_norm": 1.1904104686490233, + "learning_rate": 6.714383973862954e-06, + "loss": 0.3032, + "step": 15620 + }, + { + "epoch": 0.20157728096365032, + "grad_norm": 1.4297515950319637, + "learning_rate": 6.718682830367122e-06, + "loss": 0.3081, + "step": 15630 + }, + { + "epoch": 0.20170624915364627, + "grad_norm": 1.259856972638246, + "learning_rate": 6.7229816868712925e-06, + "loss": 0.3, + "step": 15640 + }, + { + "epoch": 0.20183521734364218, + "grad_norm": 1.3733755515934347, + "learning_rate": 6.727280543375463e-06, + "loss": 0.3156, + "step": 15650 + }, + { + "epoch": 0.20196418553363812, + "grad_norm": 1.2550810237784893, + "learning_rate": 6.731579399879633e-06, + "loss": 0.2981, + "step": 15660 + }, + { + "epoch": 0.20209315372363407, + "grad_norm": 1.2478163360846315, + "learning_rate": 6.735878256383802e-06, + "loss": 0.286, + "step": 15670 + }, + { + "epoch": 0.20222212191363, + "grad_norm": 1.2460031594165493, + "learning_rate": 6.740177112887972e-06, + "loss": 0.3052, + "step": 15680 + }, + { + "epoch": 0.20235109010362595, + "grad_norm": 1.4408786698076557, + "learning_rate": 6.744475969392142e-06, + "loss": 0.3015, + "step": 15690 + }, + { + "epoch": 0.20248005829362187, + "grad_norm": 1.2285267523503143, + "learning_rate": 6.748774825896313e-06, + "loss": 0.3122, + "step": 15700 + }, + { + "epoch": 0.2026090264836178, + "grad_norm": 1.3565371169044205, + "learning_rate": 6.753073682400482e-06, + "loss": 0.3157, + "step": 15710 + }, + { + "epoch": 0.20273799467361375, + "grad_norm": 1.3585112765431397, + "learning_rate": 6.7573725389046515e-06, + "loss": 0.2959, + "step": 15720 + }, + { + "epoch": 0.2028669628636097, + "grad_norm": 1.2824979761041597, + "learning_rate": 6.761671395408822e-06, + "loss": 0.3092, + "step": 15730 + }, + { + "epoch": 0.20299593105360564, + "grad_norm": 1.2815899850303427, + "learning_rate": 6.765970251912992e-06, + "loss": 0.2904, + "step": 15740 + }, + { + "epoch": 0.20312489924360155, + "grad_norm": 1.2420954687152699, + "learning_rate": 6.7702691084171616e-06, + "loss": 0.3164, + "step": 15750 + }, + { + "epoch": 0.2032538674335975, + "grad_norm": 1.2395862517304395, + "learning_rate": 6.774567964921331e-06, + "loss": 0.2924, + "step": 15760 + }, + { + "epoch": 0.20338283562359344, + "grad_norm": 1.3874773726184686, + "learning_rate": 6.778866821425501e-06, + "loss": 0.2931, + "step": 15770 + }, + { + "epoch": 0.20351180381358938, + "grad_norm": 1.376883668096418, + "learning_rate": 6.783165677929672e-06, + "loss": 0.301, + "step": 15780 + }, + { + "epoch": 0.20364077200358532, + "grad_norm": 1.1160860252802356, + "learning_rate": 6.787464534433841e-06, + "loss": 0.3074, + "step": 15790 + }, + { + "epoch": 0.20376974019358127, + "grad_norm": 1.4671141811961637, + "learning_rate": 6.791763390938011e-06, + "loss": 0.3013, + "step": 15800 + }, + { + "epoch": 0.20389870838357718, + "grad_norm": 1.3942158150485278, + "learning_rate": 6.796062247442181e-06, + "loss": 0.2993, + "step": 15810 + }, + { + "epoch": 0.20402767657357312, + "grad_norm": 1.3391503597226795, + "learning_rate": 6.800361103946351e-06, + "loss": 0.2892, + "step": 15820 + }, + { + "epoch": 0.20415664476356907, + "grad_norm": 1.4262144611413574, + "learning_rate": 6.804659960450521e-06, + "loss": 0.3085, + "step": 15830 + }, + { + "epoch": 0.204285612953565, + "grad_norm": 1.3410715638925481, + "learning_rate": 6.808958816954691e-06, + "loss": 0.295, + "step": 15840 + }, + { + "epoch": 0.20441458114356095, + "grad_norm": 1.2898789367004715, + "learning_rate": 6.81325767345886e-06, + "loss": 0.2855, + "step": 15850 + }, + { + "epoch": 0.20454354933355687, + "grad_norm": 1.3000266768251312, + "learning_rate": 6.817556529963031e-06, + "loss": 0.3124, + "step": 15860 + }, + { + "epoch": 0.2046725175235528, + "grad_norm": 1.274431634022074, + "learning_rate": 6.8218553864672e-06, + "loss": 0.296, + "step": 15870 + }, + { + "epoch": 0.20480148571354875, + "grad_norm": 1.4285802450356646, + "learning_rate": 6.82615424297137e-06, + "loss": 0.2966, + "step": 15880 + }, + { + "epoch": 0.2049304539035447, + "grad_norm": 1.233488965761431, + "learning_rate": 6.83045309947554e-06, + "loss": 0.2892, + "step": 15890 + }, + { + "epoch": 0.20505942209354064, + "grad_norm": 1.2940738872242101, + "learning_rate": 6.83475195597971e-06, + "loss": 0.3075, + "step": 15900 + }, + { + "epoch": 0.20518839028353655, + "grad_norm": 1.2389894651291524, + "learning_rate": 6.83905081248388e-06, + "loss": 0.2981, + "step": 15910 + }, + { + "epoch": 0.2053173584735325, + "grad_norm": 1.3459494765424744, + "learning_rate": 6.84334966898805e-06, + "loss": 0.2967, + "step": 15920 + }, + { + "epoch": 0.20544632666352844, + "grad_norm": 1.5294996223956492, + "learning_rate": 6.84764852549222e-06, + "loss": 0.3084, + "step": 15930 + }, + { + "epoch": 0.20557529485352438, + "grad_norm": 1.1428922937571209, + "learning_rate": 6.85194738199639e-06, + "loss": 0.305, + "step": 15940 + }, + { + "epoch": 0.20570426304352032, + "grad_norm": 1.197890118713601, + "learning_rate": 6.856246238500559e-06, + "loss": 0.2983, + "step": 15950 + }, + { + "epoch": 0.20583323123351624, + "grad_norm": 1.168126527166433, + "learning_rate": 6.860545095004729e-06, + "loss": 0.2947, + "step": 15960 + }, + { + "epoch": 0.20596219942351218, + "grad_norm": 1.2964272504158163, + "learning_rate": 6.8648439515089e-06, + "loss": 0.2821, + "step": 15970 + }, + { + "epoch": 0.20609116761350812, + "grad_norm": 1.3705599437532672, + "learning_rate": 6.869142808013069e-06, + "loss": 0.2909, + "step": 15980 + }, + { + "epoch": 0.20622013580350407, + "grad_norm": 1.2064735437522032, + "learning_rate": 6.873441664517239e-06, + "loss": 0.3004, + "step": 15990 + }, + { + "epoch": 0.2063491039935, + "grad_norm": 1.2880029438811744, + "learning_rate": 6.877740521021409e-06, + "loss": 0.2765, + "step": 16000 + }, + { + "epoch": 0.20647807218349595, + "grad_norm": 1.2691630053068672, + "learning_rate": 6.882039377525579e-06, + "loss": 0.2975, + "step": 16010 + }, + { + "epoch": 0.20660704037349187, + "grad_norm": 1.364566380864311, + "learning_rate": 6.8863382340297495e-06, + "loss": 0.312, + "step": 16020 + }, + { + "epoch": 0.2067360085634878, + "grad_norm": 1.2855209595018693, + "learning_rate": 6.890637090533918e-06, + "loss": 0.3041, + "step": 16030 + }, + { + "epoch": 0.20686497675348375, + "grad_norm": 1.3118042793682152, + "learning_rate": 6.8949359470380884e-06, + "loss": 0.315, + "step": 16040 + }, + { + "epoch": 0.2069939449434797, + "grad_norm": 1.4188526862347413, + "learning_rate": 6.899234803542259e-06, + "loss": 0.2972, + "step": 16050 + }, + { + "epoch": 0.20712291313347564, + "grad_norm": 1.3674895690413693, + "learning_rate": 6.903533660046429e-06, + "loss": 0.3084, + "step": 16060 + }, + { + "epoch": 0.20725188132347155, + "grad_norm": 1.314492648493741, + "learning_rate": 6.907832516550598e-06, + "loss": 0.2928, + "step": 16070 + }, + { + "epoch": 0.2073808495134675, + "grad_norm": 1.240403489393904, + "learning_rate": 6.912131373054768e-06, + "loss": 0.2854, + "step": 16080 + }, + { + "epoch": 0.20750981770346344, + "grad_norm": 1.2721654736163621, + "learning_rate": 6.916430229558938e-06, + "loss": 0.3084, + "step": 16090 + }, + { + "epoch": 0.20763878589345938, + "grad_norm": 1.353075062575082, + "learning_rate": 6.9207290860631085e-06, + "loss": 0.3103, + "step": 16100 + }, + { + "epoch": 0.20776775408345532, + "grad_norm": 1.2820429242937212, + "learning_rate": 6.925027942567277e-06, + "loss": 0.2959, + "step": 16110 + }, + { + "epoch": 0.20789672227345124, + "grad_norm": 1.2929396233899955, + "learning_rate": 6.9293267990714474e-06, + "loss": 0.2933, + "step": 16120 + }, + { + "epoch": 0.20802569046344718, + "grad_norm": 1.4332901316101416, + "learning_rate": 6.933625655575618e-06, + "loss": 0.3042, + "step": 16130 + }, + { + "epoch": 0.20815465865344313, + "grad_norm": 1.3413116580615279, + "learning_rate": 6.937924512079788e-06, + "loss": 0.2883, + "step": 16140 + }, + { + "epoch": 0.20828362684343907, + "grad_norm": 1.3500572460482618, + "learning_rate": 6.942223368583957e-06, + "loss": 0.2997, + "step": 16150 + }, + { + "epoch": 0.208412595033435, + "grad_norm": 1.2266899292190658, + "learning_rate": 6.946522225088127e-06, + "loss": 0.3086, + "step": 16160 + }, + { + "epoch": 0.20854156322343095, + "grad_norm": 1.2970500529822486, + "learning_rate": 6.950821081592297e-06, + "loss": 0.2958, + "step": 16170 + }, + { + "epoch": 0.20867053141342687, + "grad_norm": 1.397515131760756, + "learning_rate": 6.9551199380964675e-06, + "loss": 0.3005, + "step": 16180 + }, + { + "epoch": 0.2087994996034228, + "grad_norm": 1.3307065420739737, + "learning_rate": 6.959418794600636e-06, + "loss": 0.3021, + "step": 16190 + }, + { + "epoch": 0.20892846779341875, + "grad_norm": 1.3911677263042355, + "learning_rate": 6.9637176511048065e-06, + "loss": 0.2976, + "step": 16200 + }, + { + "epoch": 0.2090574359834147, + "grad_norm": 1.268616743063648, + "learning_rate": 6.968016507608977e-06, + "loss": 0.3047, + "step": 16210 + }, + { + "epoch": 0.20918640417341064, + "grad_norm": 1.3294867073525403, + "learning_rate": 6.972315364113147e-06, + "loss": 0.2984, + "step": 16220 + }, + { + "epoch": 0.20931537236340655, + "grad_norm": 1.3234681486084894, + "learning_rate": 6.976614220617316e-06, + "loss": 0.2964, + "step": 16230 + }, + { + "epoch": 0.2094443405534025, + "grad_norm": 1.4042722138671686, + "learning_rate": 6.980913077121486e-06, + "loss": 0.2866, + "step": 16240 + }, + { + "epoch": 0.20957330874339844, + "grad_norm": 1.1748813797943205, + "learning_rate": 6.985211933625656e-06, + "loss": 0.2977, + "step": 16250 + }, + { + "epoch": 0.20970227693339438, + "grad_norm": 1.3561263833914412, + "learning_rate": 6.9895107901298266e-06, + "loss": 0.3004, + "step": 16260 + }, + { + "epoch": 0.20983124512339033, + "grad_norm": 1.336631716946766, + "learning_rate": 6.993809646633995e-06, + "loss": 0.2986, + "step": 16270 + }, + { + "epoch": 0.20996021331338624, + "grad_norm": 1.4941886286264914, + "learning_rate": 6.9981085031381655e-06, + "loss": 0.3054, + "step": 16280 + }, + { + "epoch": 0.21008918150338218, + "grad_norm": 1.2207646204794638, + "learning_rate": 7.002407359642336e-06, + "loss": 0.3018, + "step": 16290 + }, + { + "epoch": 0.21021814969337813, + "grad_norm": 1.2692386945290102, + "learning_rate": 7.006706216146506e-06, + "loss": 0.2998, + "step": 16300 + }, + { + "epoch": 0.21034711788337407, + "grad_norm": 1.465461240341572, + "learning_rate": 7.011005072650675e-06, + "loss": 0.3108, + "step": 16310 + }, + { + "epoch": 0.21047608607337, + "grad_norm": 1.43386089823541, + "learning_rate": 7.015303929154845e-06, + "loss": 0.3096, + "step": 16320 + }, + { + "epoch": 0.21060505426336595, + "grad_norm": 1.3507212957024675, + "learning_rate": 7.019602785659015e-06, + "loss": 0.2894, + "step": 16330 + }, + { + "epoch": 0.21073402245336187, + "grad_norm": 1.218687450147542, + "learning_rate": 7.023901642163186e-06, + "loss": 0.2963, + "step": 16340 + }, + { + "epoch": 0.2108629906433578, + "grad_norm": 1.3247838424586684, + "learning_rate": 7.028200498667355e-06, + "loss": 0.2894, + "step": 16350 + }, + { + "epoch": 0.21099195883335375, + "grad_norm": 1.2601625711476099, + "learning_rate": 7.0324993551715245e-06, + "loss": 0.3083, + "step": 16360 + }, + { + "epoch": 0.2111209270233497, + "grad_norm": 1.2693603734371537, + "learning_rate": 7.036798211675695e-06, + "loss": 0.3066, + "step": 16370 + }, + { + "epoch": 0.21124989521334564, + "grad_norm": 1.3164417842991063, + "learning_rate": 7.041097068179865e-06, + "loss": 0.2908, + "step": 16380 + }, + { + "epoch": 0.21137886340334155, + "grad_norm": 1.2858167785809367, + "learning_rate": 7.0453959246840345e-06, + "loss": 0.2867, + "step": 16390 + }, + { + "epoch": 0.2115078315933375, + "grad_norm": 1.4904406445024956, + "learning_rate": 7.049694781188204e-06, + "loss": 0.2906, + "step": 16400 + }, + { + "epoch": 0.21163679978333344, + "grad_norm": 1.2175560776455594, + "learning_rate": 7.053993637692374e-06, + "loss": 0.2856, + "step": 16410 + }, + { + "epoch": 0.21176576797332938, + "grad_norm": 1.3934463671871093, + "learning_rate": 7.058292494196545e-06, + "loss": 0.3049, + "step": 16420 + }, + { + "epoch": 0.21189473616332533, + "grad_norm": 1.396555827658244, + "learning_rate": 7.062591350700714e-06, + "loss": 0.2929, + "step": 16430 + }, + { + "epoch": 0.21202370435332124, + "grad_norm": 1.1133272676621557, + "learning_rate": 7.066890207204884e-06, + "loss": 0.2959, + "step": 16440 + }, + { + "epoch": 0.21215267254331718, + "grad_norm": 1.325259732318582, + "learning_rate": 7.071189063709054e-06, + "loss": 0.3094, + "step": 16450 + }, + { + "epoch": 0.21228164073331313, + "grad_norm": 1.385478945375366, + "learning_rate": 7.075487920213224e-06, + "loss": 0.2995, + "step": 16460 + }, + { + "epoch": 0.21241060892330907, + "grad_norm": 1.3146933517741441, + "learning_rate": 7.0797867767173936e-06, + "loss": 0.3079, + "step": 16470 + }, + { + "epoch": 0.212539577113305, + "grad_norm": 1.3046708592124148, + "learning_rate": 7.084085633221564e-06, + "loss": 0.298, + "step": 16480 + }, + { + "epoch": 0.21266854530330095, + "grad_norm": 1.3722719000362271, + "learning_rate": 7.088384489725733e-06, + "loss": 0.288, + "step": 16490 + }, + { + "epoch": 0.21279751349329687, + "grad_norm": 1.2106880304771286, + "learning_rate": 7.092683346229904e-06, + "loss": 0.2999, + "step": 16500 + }, + { + "epoch": 0.2129264816832928, + "grad_norm": 1.3165636855428664, + "learning_rate": 7.096982202734073e-06, + "loss": 0.2954, + "step": 16510 + }, + { + "epoch": 0.21305544987328875, + "grad_norm": 1.3815824288183909, + "learning_rate": 7.101281059238243e-06, + "loss": 0.2945, + "step": 16520 + }, + { + "epoch": 0.2131844180632847, + "grad_norm": 1.2551461980824148, + "learning_rate": 7.105579915742413e-06, + "loss": 0.2801, + "step": 16530 + }, + { + "epoch": 0.21331338625328064, + "grad_norm": 1.261874140510263, + "learning_rate": 7.109878772246583e-06, + "loss": 0.2936, + "step": 16540 + }, + { + "epoch": 0.21344235444327656, + "grad_norm": 1.4340248316233726, + "learning_rate": 7.1141776287507526e-06, + "loss": 0.2909, + "step": 16550 + }, + { + "epoch": 0.2135713226332725, + "grad_norm": 1.3836094059303847, + "learning_rate": 7.118476485254923e-06, + "loss": 0.29, + "step": 16560 + }, + { + "epoch": 0.21370029082326844, + "grad_norm": 1.3130641259417306, + "learning_rate": 7.122775341759093e-06, + "loss": 0.2901, + "step": 16570 + }, + { + "epoch": 0.21382925901326438, + "grad_norm": 1.2833332402882747, + "learning_rate": 7.127074198263263e-06, + "loss": 0.3088, + "step": 16580 + }, + { + "epoch": 0.21395822720326033, + "grad_norm": 1.3903110761783661, + "learning_rate": 7.131373054767432e-06, + "loss": 0.2936, + "step": 16590 + }, + { + "epoch": 0.21408719539325624, + "grad_norm": 1.2897473957963543, + "learning_rate": 7.135671911271602e-06, + "loss": 0.3085, + "step": 16600 + }, + { + "epoch": 0.21421616358325218, + "grad_norm": 1.3074638044944924, + "learning_rate": 7.139970767775773e-06, + "loss": 0.2865, + "step": 16610 + }, + { + "epoch": 0.21434513177324813, + "grad_norm": 1.4288796065089513, + "learning_rate": 7.144269624279942e-06, + "loss": 0.2867, + "step": 16620 + }, + { + "epoch": 0.21447409996324407, + "grad_norm": 1.281738052731776, + "learning_rate": 7.148568480784112e-06, + "loss": 0.2778, + "step": 16630 + }, + { + "epoch": 0.21460306815324, + "grad_norm": 1.2004007800710987, + "learning_rate": 7.152867337288282e-06, + "loss": 0.2819, + "step": 16640 + }, + { + "epoch": 0.21473203634323595, + "grad_norm": 1.2873277750499665, + "learning_rate": 7.157166193792452e-06, + "loss": 0.2995, + "step": 16650 + }, + { + "epoch": 0.21486100453323187, + "grad_norm": 1.3952221346686833, + "learning_rate": 7.1614650502966225e-06, + "loss": 0.3002, + "step": 16660 + }, + { + "epoch": 0.2149899727232278, + "grad_norm": 1.2377994575062343, + "learning_rate": 7.165763906800791e-06, + "loss": 0.3009, + "step": 16670 + }, + { + "epoch": 0.21511894091322376, + "grad_norm": 1.2586214601305068, + "learning_rate": 7.170062763304961e-06, + "loss": 0.3024, + "step": 16680 + }, + { + "epoch": 0.2152479091032197, + "grad_norm": 1.3497636370141706, + "learning_rate": 7.174361619809132e-06, + "loss": 0.2981, + "step": 16690 + }, + { + "epoch": 0.21537687729321564, + "grad_norm": 1.2176810454230758, + "learning_rate": 7.178660476313302e-06, + "loss": 0.3111, + "step": 16700 + }, + { + "epoch": 0.21550584548321156, + "grad_norm": 1.3146361547590302, + "learning_rate": 7.182959332817471e-06, + "loss": 0.2948, + "step": 16710 + }, + { + "epoch": 0.2156348136732075, + "grad_norm": 1.3755604660530707, + "learning_rate": 7.187258189321641e-06, + "loss": 0.3037, + "step": 16720 + }, + { + "epoch": 0.21576378186320344, + "grad_norm": 1.2459785310145082, + "learning_rate": 7.191557045825811e-06, + "loss": 0.2964, + "step": 16730 + }, + { + "epoch": 0.21589275005319938, + "grad_norm": 1.2750033617950414, + "learning_rate": 7.1958559023299815e-06, + "loss": 0.2968, + "step": 16740 + }, + { + "epoch": 0.21602171824319533, + "grad_norm": 1.4972843447076185, + "learning_rate": 7.20015475883415e-06, + "loss": 0.2995, + "step": 16750 + }, + { + "epoch": 0.21615068643319124, + "grad_norm": 1.1799346292454855, + "learning_rate": 7.20445361533832e-06, + "loss": 0.2981, + "step": 16760 + }, + { + "epoch": 0.21627965462318718, + "grad_norm": 1.3964431174325125, + "learning_rate": 7.208752471842491e-06, + "loss": 0.2919, + "step": 16770 + }, + { + "epoch": 0.21640862281318313, + "grad_norm": 1.2813909816489817, + "learning_rate": 7.213051328346661e-06, + "loss": 0.2981, + "step": 16780 + }, + { + "epoch": 0.21653759100317907, + "grad_norm": 1.290983997870129, + "learning_rate": 7.21735018485083e-06, + "loss": 0.3022, + "step": 16790 + }, + { + "epoch": 0.216666559193175, + "grad_norm": 1.2608908771113778, + "learning_rate": 7.221649041355e-06, + "loss": 0.2834, + "step": 16800 + }, + { + "epoch": 0.21679552738317093, + "grad_norm": 1.247130345593866, + "learning_rate": 7.22594789785917e-06, + "loss": 0.285, + "step": 16810 + }, + { + "epoch": 0.21692449557316687, + "grad_norm": 1.3001565524150576, + "learning_rate": 7.2302467543633405e-06, + "loss": 0.309, + "step": 16820 + }, + { + "epoch": 0.2170534637631628, + "grad_norm": 1.2081715429792579, + "learning_rate": 7.234545610867509e-06, + "loss": 0.2959, + "step": 16830 + }, + { + "epoch": 0.21718243195315876, + "grad_norm": 1.345906826793327, + "learning_rate": 7.2388444673716794e-06, + "loss": 0.2908, + "step": 16840 + }, + { + "epoch": 0.2173114001431547, + "grad_norm": 1.3372239442220357, + "learning_rate": 7.24314332387585e-06, + "loss": 0.3118, + "step": 16850 + }, + { + "epoch": 0.21744036833315064, + "grad_norm": 1.2002558714659655, + "learning_rate": 7.24744218038002e-06, + "loss": 0.2984, + "step": 16860 + }, + { + "epoch": 0.21756933652314656, + "grad_norm": 1.1654809787659244, + "learning_rate": 7.251741036884189e-06, + "loss": 0.2854, + "step": 16870 + }, + { + "epoch": 0.2176983047131425, + "grad_norm": 1.1532894824174624, + "learning_rate": 7.256039893388359e-06, + "loss": 0.2834, + "step": 16880 + }, + { + "epoch": 0.21782727290313844, + "grad_norm": 1.2657452539915095, + "learning_rate": 7.260338749892529e-06, + "loss": 0.2982, + "step": 16890 + }, + { + "epoch": 0.21795624109313438, + "grad_norm": 1.443119613358703, + "learning_rate": 7.2646376063966995e-06, + "loss": 0.2771, + "step": 16900 + }, + { + "epoch": 0.21808520928313033, + "grad_norm": 1.2473566345288456, + "learning_rate": 7.268936462900868e-06, + "loss": 0.3067, + "step": 16910 + }, + { + "epoch": 0.21821417747312624, + "grad_norm": 1.2437068175952077, + "learning_rate": 7.2732353194050384e-06, + "loss": 0.3037, + "step": 16920 + }, + { + "epoch": 0.21834314566312218, + "grad_norm": 1.4545640846392915, + "learning_rate": 7.277534175909209e-06, + "loss": 0.2976, + "step": 16930 + }, + { + "epoch": 0.21847211385311813, + "grad_norm": 1.5107386949726815, + "learning_rate": 7.281833032413379e-06, + "loss": 0.2985, + "step": 16940 + }, + { + "epoch": 0.21860108204311407, + "grad_norm": 1.378918425581999, + "learning_rate": 7.2861318889175485e-06, + "loss": 0.3115, + "step": 16950 + }, + { + "epoch": 0.21873005023311, + "grad_norm": 1.2847143674261463, + "learning_rate": 7.290430745421718e-06, + "loss": 0.2931, + "step": 16960 + }, + { + "epoch": 0.21885901842310593, + "grad_norm": 1.2421544855925473, + "learning_rate": 7.294729601925888e-06, + "loss": 0.2879, + "step": 16970 + }, + { + "epoch": 0.21898798661310187, + "grad_norm": 1.3452029979312095, + "learning_rate": 7.2990284584300585e-06, + "loss": 0.3174, + "step": 16980 + }, + { + "epoch": 0.2191169548030978, + "grad_norm": 1.2365318510456624, + "learning_rate": 7.303327314934228e-06, + "loss": 0.2905, + "step": 16990 + }, + { + "epoch": 0.21924592299309376, + "grad_norm": 1.2482846128019796, + "learning_rate": 7.3076261714383975e-06, + "loss": 0.2922, + "step": 17000 + }, + { + "epoch": 0.2193748911830897, + "grad_norm": 1.1372230175761844, + "learning_rate": 7.311925027942568e-06, + "loss": 0.3007, + "step": 17010 + }, + { + "epoch": 0.21950385937308564, + "grad_norm": 1.2565521043986316, + "learning_rate": 7.316223884446738e-06, + "loss": 0.2882, + "step": 17020 + }, + { + "epoch": 0.21963282756308156, + "grad_norm": 1.358989544239753, + "learning_rate": 7.3205227409509075e-06, + "loss": 0.2843, + "step": 17030 + }, + { + "epoch": 0.2197617957530775, + "grad_norm": 1.234672873283731, + "learning_rate": 7.324821597455077e-06, + "loss": 0.2832, + "step": 17040 + }, + { + "epoch": 0.21989076394307344, + "grad_norm": 1.2266395300331026, + "learning_rate": 7.329120453959247e-06, + "loss": 0.2936, + "step": 17050 + }, + { + "epoch": 0.22001973213306938, + "grad_norm": 1.3034603326113834, + "learning_rate": 7.3334193104634176e-06, + "loss": 0.2999, + "step": 17060 + }, + { + "epoch": 0.22014870032306533, + "grad_norm": 1.1772154027231174, + "learning_rate": 7.337718166967587e-06, + "loss": 0.2866, + "step": 17070 + }, + { + "epoch": 0.22027766851306124, + "grad_norm": 1.3268605003372258, + "learning_rate": 7.342017023471757e-06, + "loss": 0.2979, + "step": 17080 + }, + { + "epoch": 0.22040663670305718, + "grad_norm": 1.3087208034085551, + "learning_rate": 7.346315879975927e-06, + "loss": 0.2866, + "step": 17090 + }, + { + "epoch": 0.22053560489305313, + "grad_norm": 1.2633688739850146, + "learning_rate": 7.350614736480097e-06, + "loss": 0.2926, + "step": 17100 + }, + { + "epoch": 0.22066457308304907, + "grad_norm": 1.3435440934832008, + "learning_rate": 7.3549135929842665e-06, + "loss": 0.2963, + "step": 17110 + }, + { + "epoch": 0.220793541273045, + "grad_norm": 1.3175772633913636, + "learning_rate": 7.359212449488437e-06, + "loss": 0.2914, + "step": 17120 + }, + { + "epoch": 0.22092250946304093, + "grad_norm": 1.224981171864648, + "learning_rate": 7.363511305992606e-06, + "loss": 0.3018, + "step": 17130 + }, + { + "epoch": 0.22105147765303687, + "grad_norm": 1.2514605190364363, + "learning_rate": 7.367810162496777e-06, + "loss": 0.3032, + "step": 17140 + }, + { + "epoch": 0.2211804458430328, + "grad_norm": 1.2688431972383145, + "learning_rate": 7.372109019000946e-06, + "loss": 0.2938, + "step": 17150 + }, + { + "epoch": 0.22130941403302876, + "grad_norm": 1.2503861341421876, + "learning_rate": 7.376407875505116e-06, + "loss": 0.299, + "step": 17160 + }, + { + "epoch": 0.2214383822230247, + "grad_norm": 1.3149008910187123, + "learning_rate": 7.380706732009287e-06, + "loss": 0.2987, + "step": 17170 + }, + { + "epoch": 0.22156735041302064, + "grad_norm": 1.3811828990441501, + "learning_rate": 7.385005588513456e-06, + "loss": 0.2953, + "step": 17180 + }, + { + "epoch": 0.22169631860301656, + "grad_norm": 1.3141821446166853, + "learning_rate": 7.3893044450176255e-06, + "loss": 0.3025, + "step": 17190 + }, + { + "epoch": 0.2218252867930125, + "grad_norm": 1.3657682586134334, + "learning_rate": 7.393603301521796e-06, + "loss": 0.284, + "step": 17200 + }, + { + "epoch": 0.22195425498300844, + "grad_norm": 1.2136271749970826, + "learning_rate": 7.397902158025966e-06, + "loss": 0.2964, + "step": 17210 + }, + { + "epoch": 0.22208322317300438, + "grad_norm": 1.2718696601071564, + "learning_rate": 7.402201014530136e-06, + "loss": 0.2988, + "step": 17220 + }, + { + "epoch": 0.22221219136300033, + "grad_norm": 1.3148930691916054, + "learning_rate": 7.406499871034305e-06, + "loss": 0.2933, + "step": 17230 + }, + { + "epoch": 0.22234115955299624, + "grad_norm": 1.243495868689076, + "learning_rate": 7.410798727538475e-06, + "loss": 0.2783, + "step": 17240 + }, + { + "epoch": 0.22247012774299219, + "grad_norm": 1.304604889589705, + "learning_rate": 7.415097584042646e-06, + "loss": 0.3111, + "step": 17250 + }, + { + "epoch": 0.22259909593298813, + "grad_norm": 1.2702169913761796, + "learning_rate": 7.419396440546815e-06, + "loss": 0.2833, + "step": 17260 + }, + { + "epoch": 0.22272806412298407, + "grad_norm": 1.420657449160827, + "learning_rate": 7.4236952970509846e-06, + "loss": 0.3065, + "step": 17270 + }, + { + "epoch": 0.22285703231298, + "grad_norm": 1.18902838680512, + "learning_rate": 7.427994153555155e-06, + "loss": 0.286, + "step": 17280 + }, + { + "epoch": 0.22298600050297593, + "grad_norm": 1.2423075731139976, + "learning_rate": 7.432293010059325e-06, + "loss": 0.2916, + "step": 17290 + }, + { + "epoch": 0.22311496869297187, + "grad_norm": 1.3775749325444884, + "learning_rate": 7.4365918665634955e-06, + "loss": 0.3011, + "step": 17300 + }, + { + "epoch": 0.22324393688296781, + "grad_norm": 1.4720441561150426, + "learning_rate": 7.440890723067664e-06, + "loss": 0.2806, + "step": 17310 + }, + { + "epoch": 0.22337290507296376, + "grad_norm": 1.2426393985004611, + "learning_rate": 7.445189579571834e-06, + "loss": 0.2987, + "step": 17320 + }, + { + "epoch": 0.2235018732629597, + "grad_norm": 1.3505261853503019, + "learning_rate": 7.449488436076005e-06, + "loss": 0.295, + "step": 17330 + }, + { + "epoch": 0.22363084145295564, + "grad_norm": 1.1775358621153826, + "learning_rate": 7.453787292580175e-06, + "loss": 0.3032, + "step": 17340 + }, + { + "epoch": 0.22375980964295156, + "grad_norm": 1.2257404355483241, + "learning_rate": 7.4580861490843436e-06, + "loss": 0.2967, + "step": 17350 + }, + { + "epoch": 0.2238887778329475, + "grad_norm": 1.3025061795977946, + "learning_rate": 7.462385005588514e-06, + "loss": 0.2934, + "step": 17360 + }, + { + "epoch": 0.22401774602294344, + "grad_norm": 1.2618731753397896, + "learning_rate": 7.466683862092684e-06, + "loss": 0.3062, + "step": 17370 + }, + { + "epoch": 0.22414671421293939, + "grad_norm": 1.3419831194418326, + "learning_rate": 7.4709827185968545e-06, + "loss": 0.3049, + "step": 17380 + }, + { + "epoch": 0.22427568240293533, + "grad_norm": 1.25077176234192, + "learning_rate": 7.475281575101023e-06, + "loss": 0.2937, + "step": 17390 + }, + { + "epoch": 0.22440465059293124, + "grad_norm": 1.2581196036608258, + "learning_rate": 7.479580431605193e-06, + "loss": 0.295, + "step": 17400 + }, + { + "epoch": 0.22453361878292719, + "grad_norm": 1.3012016812599199, + "learning_rate": 7.483879288109364e-06, + "loss": 0.3119, + "step": 17410 + }, + { + "epoch": 0.22466258697292313, + "grad_norm": 1.2952591613944242, + "learning_rate": 7.488178144613534e-06, + "loss": 0.2964, + "step": 17420 + }, + { + "epoch": 0.22479155516291907, + "grad_norm": 1.30852161994169, + "learning_rate": 7.492477001117703e-06, + "loss": 0.2927, + "step": 17430 + }, + { + "epoch": 0.224920523352915, + "grad_norm": 1.3184662192397194, + "learning_rate": 7.496775857621873e-06, + "loss": 0.2946, + "step": 17440 + }, + { + "epoch": 0.22504949154291093, + "grad_norm": 1.2605072807111588, + "learning_rate": 7.501074714126043e-06, + "loss": 0.2784, + "step": 17450 + }, + { + "epoch": 0.22517845973290687, + "grad_norm": 1.2640253481789356, + "learning_rate": 7.5053735706302135e-06, + "loss": 0.3025, + "step": 17460 + }, + { + "epoch": 0.22530742792290281, + "grad_norm": 1.236895830563133, + "learning_rate": 7.509672427134382e-06, + "loss": 0.2986, + "step": 17470 + }, + { + "epoch": 0.22543639611289876, + "grad_norm": 1.3367371889634299, + "learning_rate": 7.513971283638552e-06, + "loss": 0.2961, + "step": 17480 + }, + { + "epoch": 0.2255653643028947, + "grad_norm": 1.201448194696756, + "learning_rate": 7.518270140142723e-06, + "loss": 0.3071, + "step": 17490 + }, + { + "epoch": 0.22569433249289061, + "grad_norm": 1.2488082621308911, + "learning_rate": 7.522568996646893e-06, + "loss": 0.287, + "step": 17500 + }, + { + "epoch": 0.22582330068288656, + "grad_norm": 1.3119646824388198, + "learning_rate": 7.526867853151062e-06, + "loss": 0.3034, + "step": 17510 + }, + { + "epoch": 0.2259522688728825, + "grad_norm": 1.2403042202188796, + "learning_rate": 7.531166709655232e-06, + "loss": 0.2897, + "step": 17520 + }, + { + "epoch": 0.22608123706287844, + "grad_norm": 1.3498388886359982, + "learning_rate": 7.535465566159402e-06, + "loss": 0.2929, + "step": 17530 + }, + { + "epoch": 0.22621020525287439, + "grad_norm": 1.2149183735870699, + "learning_rate": 7.5397644226635725e-06, + "loss": 0.2903, + "step": 17540 + }, + { + "epoch": 0.22633917344287033, + "grad_norm": 1.399591695817459, + "learning_rate": 7.544063279167741e-06, + "loss": 0.2753, + "step": 17550 + }, + { + "epoch": 0.22646814163286624, + "grad_norm": 1.2974619397539071, + "learning_rate": 7.548362135671911e-06, + "loss": 0.2907, + "step": 17560 + }, + { + "epoch": 0.22659710982286219, + "grad_norm": 1.3357731442691705, + "learning_rate": 7.552660992176082e-06, + "loss": 0.2899, + "step": 17570 + }, + { + "epoch": 0.22672607801285813, + "grad_norm": 1.4881528930450358, + "learning_rate": 7.556959848680252e-06, + "loss": 0.299, + "step": 17580 + }, + { + "epoch": 0.22685504620285407, + "grad_norm": 1.1080247238751602, + "learning_rate": 7.5612587051844215e-06, + "loss": 0.3062, + "step": 17590 + }, + { + "epoch": 0.22698401439285001, + "grad_norm": 1.2240741212474318, + "learning_rate": 7.565557561688591e-06, + "loss": 0.285, + "step": 17600 + }, + { + "epoch": 0.22711298258284593, + "grad_norm": 1.3228905455445648, + "learning_rate": 7.569856418192761e-06, + "loss": 0.2853, + "step": 17610 + }, + { + "epoch": 0.22724195077284187, + "grad_norm": 1.3293431397795739, + "learning_rate": 7.5741552746969315e-06, + "loss": 0.2904, + "step": 17620 + }, + { + "epoch": 0.22737091896283781, + "grad_norm": 1.354344208698406, + "learning_rate": 7.578454131201101e-06, + "loss": 0.2843, + "step": 17630 + }, + { + "epoch": 0.22749988715283376, + "grad_norm": 1.2690307995110368, + "learning_rate": 7.5827529877052704e-06, + "loss": 0.2878, + "step": 17640 + }, + { + "epoch": 0.2276288553428297, + "grad_norm": 1.2926539599727682, + "learning_rate": 7.587051844209441e-06, + "loss": 0.2748, + "step": 17650 + }, + { + "epoch": 0.22775782353282562, + "grad_norm": 1.2430787068205547, + "learning_rate": 7.591350700713611e-06, + "loss": 0.2928, + "step": 17660 + }, + { + "epoch": 0.22788679172282156, + "grad_norm": 1.2547282697001623, + "learning_rate": 7.5956495572177805e-06, + "loss": 0.2897, + "step": 17670 + }, + { + "epoch": 0.2280157599128175, + "grad_norm": 1.191801032495836, + "learning_rate": 7.59994841372195e-06, + "loss": 0.2737, + "step": 17680 + }, + { + "epoch": 0.22814472810281344, + "grad_norm": 1.1584266466820694, + "learning_rate": 7.60424727022612e-06, + "loss": 0.3069, + "step": 17690 + }, + { + "epoch": 0.22827369629280939, + "grad_norm": 1.3639801551996764, + "learning_rate": 7.6085461267302905e-06, + "loss": 0.2913, + "step": 17700 + }, + { + "epoch": 0.22840266448280533, + "grad_norm": 1.242644344128678, + "learning_rate": 7.61284498323446e-06, + "loss": 0.308, + "step": 17710 + }, + { + "epoch": 0.22853163267280124, + "grad_norm": 1.2418526405977381, + "learning_rate": 7.61714383973863e-06, + "loss": 0.288, + "step": 17720 + }, + { + "epoch": 0.2286606008627972, + "grad_norm": 1.3325357555425235, + "learning_rate": 7.6214426962428e-06, + "loss": 0.2926, + "step": 17730 + }, + { + "epoch": 0.22878956905279313, + "grad_norm": 1.2627838993192413, + "learning_rate": 7.62574155274697e-06, + "loss": 0.3012, + "step": 17740 + }, + { + "epoch": 0.22891853724278907, + "grad_norm": 1.2084786912410193, + "learning_rate": 7.630040409251139e-06, + "loss": 0.2871, + "step": 17750 + }, + { + "epoch": 0.22904750543278501, + "grad_norm": 1.3922441423645007, + "learning_rate": 7.634339265755309e-06, + "loss": 0.297, + "step": 17760 + }, + { + "epoch": 0.22917647362278093, + "grad_norm": 1.1248150673477693, + "learning_rate": 7.63863812225948e-06, + "loss": 0.2881, + "step": 17770 + }, + { + "epoch": 0.22930544181277687, + "grad_norm": 1.2694650999210007, + "learning_rate": 7.64293697876365e-06, + "loss": 0.2859, + "step": 17780 + }, + { + "epoch": 0.22943441000277282, + "grad_norm": 1.2618068135237563, + "learning_rate": 7.647235835267818e-06, + "loss": 0.3008, + "step": 17790 + }, + { + "epoch": 0.22956337819276876, + "grad_norm": 1.2230984944337049, + "learning_rate": 7.651534691771988e-06, + "loss": 0.2914, + "step": 17800 + }, + { + "epoch": 0.2296923463827647, + "grad_norm": 1.3262781200872553, + "learning_rate": 7.655833548276159e-06, + "loss": 0.2967, + "step": 17810 + }, + { + "epoch": 0.22982131457276062, + "grad_norm": 1.1647574690237659, + "learning_rate": 7.660132404780329e-06, + "loss": 0.2935, + "step": 17820 + }, + { + "epoch": 0.22995028276275656, + "grad_norm": 1.1721999799893454, + "learning_rate": 7.664431261284498e-06, + "loss": 0.2963, + "step": 17830 + }, + { + "epoch": 0.2300792509527525, + "grad_norm": 1.2793124256467354, + "learning_rate": 7.668730117788668e-06, + "loss": 0.2896, + "step": 17840 + }, + { + "epoch": 0.23020821914274844, + "grad_norm": 1.3153642104675347, + "learning_rate": 7.673028974292838e-06, + "loss": 0.2998, + "step": 17850 + }, + { + "epoch": 0.2303371873327444, + "grad_norm": 1.1913909111215304, + "learning_rate": 7.677327830797009e-06, + "loss": 0.2754, + "step": 17860 + }, + { + "epoch": 0.23046615552274033, + "grad_norm": 1.2148885753177183, + "learning_rate": 7.681626687301177e-06, + "loss": 0.2861, + "step": 17870 + }, + { + "epoch": 0.23059512371273624, + "grad_norm": 1.2177709846839728, + "learning_rate": 7.685925543805347e-06, + "loss": 0.2877, + "step": 17880 + }, + { + "epoch": 0.2307240919027322, + "grad_norm": 1.1324906479175487, + "learning_rate": 7.690224400309518e-06, + "loss": 0.3011, + "step": 17890 + }, + { + "epoch": 0.23085306009272813, + "grad_norm": 1.2569551980265337, + "learning_rate": 7.694523256813688e-06, + "loss": 0.2813, + "step": 17900 + }, + { + "epoch": 0.23098202828272407, + "grad_norm": 1.3698474949078159, + "learning_rate": 7.698822113317858e-06, + "loss": 0.2927, + "step": 17910 + }, + { + "epoch": 0.23111099647272002, + "grad_norm": 1.2004680092781315, + "learning_rate": 7.703120969822027e-06, + "loss": 0.2913, + "step": 17920 + }, + { + "epoch": 0.23123996466271593, + "grad_norm": 1.2560671676381387, + "learning_rate": 7.707419826326197e-06, + "loss": 0.2848, + "step": 17930 + }, + { + "epoch": 0.23136893285271187, + "grad_norm": 1.326741747573823, + "learning_rate": 7.711718682830368e-06, + "loss": 0.2889, + "step": 17940 + }, + { + "epoch": 0.23149790104270782, + "grad_norm": 1.3190428965149712, + "learning_rate": 7.716017539334538e-06, + "loss": 0.2755, + "step": 17950 + }, + { + "epoch": 0.23162686923270376, + "grad_norm": 1.2225719346296722, + "learning_rate": 7.720316395838706e-06, + "loss": 0.3078, + "step": 17960 + }, + { + "epoch": 0.2317558374226997, + "grad_norm": 1.2772877104801212, + "learning_rate": 7.724615252342877e-06, + "loss": 0.2874, + "step": 17970 + }, + { + "epoch": 0.23188480561269562, + "grad_norm": 1.252054221010894, + "learning_rate": 7.728914108847047e-06, + "loss": 0.2963, + "step": 17980 + }, + { + "epoch": 0.23201377380269156, + "grad_norm": 1.2117338739946701, + "learning_rate": 7.733212965351217e-06, + "loss": 0.2993, + "step": 17990 + }, + { + "epoch": 0.2321427419926875, + "grad_norm": 1.2042579947601169, + "learning_rate": 7.737511821855388e-06, + "loss": 0.3049, + "step": 18000 + }, + { + "epoch": 0.23227171018268344, + "grad_norm": 1.098967891972267, + "learning_rate": 7.741810678359556e-06, + "loss": 0.2767, + "step": 18010 + }, + { + "epoch": 0.2324006783726794, + "grad_norm": 1.2438190315325028, + "learning_rate": 7.746109534863727e-06, + "loss": 0.2996, + "step": 18020 + }, + { + "epoch": 0.23252964656267533, + "grad_norm": 1.3447951660539388, + "learning_rate": 7.750408391367897e-06, + "loss": 0.2934, + "step": 18030 + }, + { + "epoch": 0.23265861475267124, + "grad_norm": 1.3485640273167407, + "learning_rate": 7.754707247872067e-06, + "loss": 0.2825, + "step": 18040 + }, + { + "epoch": 0.2327875829426672, + "grad_norm": 1.1989664571825738, + "learning_rate": 7.759006104376236e-06, + "loss": 0.3013, + "step": 18050 + }, + { + "epoch": 0.23291655113266313, + "grad_norm": 1.1776664848240315, + "learning_rate": 7.763304960880406e-06, + "loss": 0.2899, + "step": 18060 + }, + { + "epoch": 0.23304551932265907, + "grad_norm": 1.2435894116802964, + "learning_rate": 7.767603817384576e-06, + "loss": 0.2815, + "step": 18070 + }, + { + "epoch": 0.23317448751265502, + "grad_norm": 1.159585999974489, + "learning_rate": 7.771902673888747e-06, + "loss": 0.2799, + "step": 18080 + }, + { + "epoch": 0.23330345570265093, + "grad_norm": 1.3359705511712672, + "learning_rate": 7.776201530392915e-06, + "loss": 0.2841, + "step": 18090 + }, + { + "epoch": 0.23343242389264687, + "grad_norm": 1.243507904264857, + "learning_rate": 7.780500386897086e-06, + "loss": 0.2987, + "step": 18100 + }, + { + "epoch": 0.23356139208264282, + "grad_norm": 1.2646732686255466, + "learning_rate": 7.784799243401256e-06, + "loss": 0.296, + "step": 18110 + }, + { + "epoch": 0.23369036027263876, + "grad_norm": 1.3690530403566803, + "learning_rate": 7.789098099905426e-06, + "loss": 0.3, + "step": 18120 + }, + { + "epoch": 0.2338193284626347, + "grad_norm": 1.158618520414578, + "learning_rate": 7.793396956409597e-06, + "loss": 0.2788, + "step": 18130 + }, + { + "epoch": 0.23394829665263062, + "grad_norm": 1.2729138654064363, + "learning_rate": 7.797695812913765e-06, + "loss": 0.2824, + "step": 18140 + }, + { + "epoch": 0.23407726484262656, + "grad_norm": 1.346107954771874, + "learning_rate": 7.801994669417935e-06, + "loss": 0.3041, + "step": 18150 + }, + { + "epoch": 0.2342062330326225, + "grad_norm": 1.1841614433254317, + "learning_rate": 7.806293525922106e-06, + "loss": 0.2854, + "step": 18160 + }, + { + "epoch": 0.23433520122261844, + "grad_norm": 1.3940509498500442, + "learning_rate": 7.810592382426276e-06, + "loss": 0.2921, + "step": 18170 + }, + { + "epoch": 0.2344641694126144, + "grad_norm": 1.3608941872087634, + "learning_rate": 7.814891238930445e-06, + "loss": 0.2882, + "step": 18180 + }, + { + "epoch": 0.23459313760261033, + "grad_norm": 1.2815269118471413, + "learning_rate": 7.819190095434615e-06, + "loss": 0.2793, + "step": 18190 + }, + { + "epoch": 0.23472210579260624, + "grad_norm": 1.2069142879348322, + "learning_rate": 7.823488951938785e-06, + "loss": 0.2985, + "step": 18200 + }, + { + "epoch": 0.2348510739826022, + "grad_norm": 1.148598800756478, + "learning_rate": 7.827787808442956e-06, + "loss": 0.2729, + "step": 18210 + }, + { + "epoch": 0.23498004217259813, + "grad_norm": 1.249477128373098, + "learning_rate": 7.832086664947126e-06, + "loss": 0.3, + "step": 18220 + }, + { + "epoch": 0.23510901036259407, + "grad_norm": 1.3496507491166934, + "learning_rate": 7.836385521451294e-06, + "loss": 0.2867, + "step": 18230 + }, + { + "epoch": 0.23523797855259002, + "grad_norm": 1.2886469026510818, + "learning_rate": 7.840684377955465e-06, + "loss": 0.2806, + "step": 18240 + }, + { + "epoch": 0.23536694674258593, + "grad_norm": 1.2909406347645438, + "learning_rate": 7.844983234459635e-06, + "loss": 0.291, + "step": 18250 + }, + { + "epoch": 0.23549591493258187, + "grad_norm": 1.2647737413615852, + "learning_rate": 7.849282090963805e-06, + "loss": 0.2863, + "step": 18260 + }, + { + "epoch": 0.23562488312257782, + "grad_norm": 1.2295643320795004, + "learning_rate": 7.853580947467974e-06, + "loss": 0.2809, + "step": 18270 + }, + { + "epoch": 0.23575385131257376, + "grad_norm": 1.330371138746088, + "learning_rate": 7.857879803972144e-06, + "loss": 0.2947, + "step": 18280 + }, + { + "epoch": 0.2358828195025697, + "grad_norm": 1.4741018516466977, + "learning_rate": 7.862178660476315e-06, + "loss": 0.2995, + "step": 18290 + }, + { + "epoch": 0.23601178769256562, + "grad_norm": 1.1871074265705828, + "learning_rate": 7.866477516980485e-06, + "loss": 0.2799, + "step": 18300 + }, + { + "epoch": 0.23614075588256156, + "grad_norm": 1.3933001269057674, + "learning_rate": 7.870776373484653e-06, + "loss": 0.2762, + "step": 18310 + }, + { + "epoch": 0.2362697240725575, + "grad_norm": 1.3925539950606556, + "learning_rate": 7.875075229988824e-06, + "loss": 0.2823, + "step": 18320 + }, + { + "epoch": 0.23639869226255344, + "grad_norm": 1.327496729832962, + "learning_rate": 7.879374086492994e-06, + "loss": 0.2946, + "step": 18330 + }, + { + "epoch": 0.2365276604525494, + "grad_norm": 1.1079960549366876, + "learning_rate": 7.883672942997164e-06, + "loss": 0.2842, + "step": 18340 + }, + { + "epoch": 0.2366566286425453, + "grad_norm": 1.2064328227923904, + "learning_rate": 7.887971799501333e-06, + "loss": 0.2795, + "step": 18350 + }, + { + "epoch": 0.23678559683254125, + "grad_norm": 1.3089714744847347, + "learning_rate": 7.892270656005503e-06, + "loss": 0.2919, + "step": 18360 + }, + { + "epoch": 0.2369145650225372, + "grad_norm": 1.357651698066711, + "learning_rate": 7.896569512509674e-06, + "loss": 0.2855, + "step": 18370 + }, + { + "epoch": 0.23704353321253313, + "grad_norm": 1.4009436211649593, + "learning_rate": 7.900868369013844e-06, + "loss": 0.2891, + "step": 18380 + }, + { + "epoch": 0.23717250140252907, + "grad_norm": 1.3499041353195145, + "learning_rate": 7.905167225518014e-06, + "loss": 0.3138, + "step": 18390 + }, + { + "epoch": 0.23730146959252502, + "grad_norm": 1.4060616101817913, + "learning_rate": 7.909466082022183e-06, + "loss": 0.2856, + "step": 18400 + }, + { + "epoch": 0.23743043778252093, + "grad_norm": 1.1859566164709396, + "learning_rate": 7.913764938526353e-06, + "loss": 0.2899, + "step": 18410 + }, + { + "epoch": 0.23755940597251687, + "grad_norm": 1.2693021926911248, + "learning_rate": 7.918063795030523e-06, + "loss": 0.3094, + "step": 18420 + }, + { + "epoch": 0.23768837416251282, + "grad_norm": 1.1324450728066169, + "learning_rate": 7.922362651534694e-06, + "loss": 0.2757, + "step": 18430 + }, + { + "epoch": 0.23781734235250876, + "grad_norm": 1.2141540821160246, + "learning_rate": 7.926661508038862e-06, + "loss": 0.284, + "step": 18440 + }, + { + "epoch": 0.2379463105425047, + "grad_norm": 1.3588340822097489, + "learning_rate": 7.930960364543033e-06, + "loss": 0.2967, + "step": 18450 + }, + { + "epoch": 0.23807527873250062, + "grad_norm": 1.1584500923598295, + "learning_rate": 7.935259221047203e-06, + "loss": 0.287, + "step": 18460 + }, + { + "epoch": 0.23820424692249656, + "grad_norm": 1.234027382101662, + "learning_rate": 7.939558077551373e-06, + "loss": 0.2813, + "step": 18470 + }, + { + "epoch": 0.2383332151124925, + "grad_norm": 1.320820734718266, + "learning_rate": 7.943856934055542e-06, + "loss": 0.2922, + "step": 18480 + }, + { + "epoch": 0.23846218330248845, + "grad_norm": 1.3496674418457222, + "learning_rate": 7.948155790559712e-06, + "loss": 0.28, + "step": 18490 + }, + { + "epoch": 0.2385911514924844, + "grad_norm": 1.3520700170630229, + "learning_rate": 7.952454647063882e-06, + "loss": 0.2788, + "step": 18500 + }, + { + "epoch": 0.2387201196824803, + "grad_norm": 1.2392464240905257, + "learning_rate": 7.956753503568053e-06, + "loss": 0.2817, + "step": 18510 + }, + { + "epoch": 0.23884908787247625, + "grad_norm": 1.2860257064831695, + "learning_rate": 7.961052360072221e-06, + "loss": 0.288, + "step": 18520 + }, + { + "epoch": 0.2389780560624722, + "grad_norm": 1.213095527565705, + "learning_rate": 7.965351216576392e-06, + "loss": 0.2935, + "step": 18530 + }, + { + "epoch": 0.23910702425246813, + "grad_norm": 1.1751040621916076, + "learning_rate": 7.969650073080562e-06, + "loss": 0.2901, + "step": 18540 + }, + { + "epoch": 0.23923599244246407, + "grad_norm": 1.2352621525301901, + "learning_rate": 7.973948929584732e-06, + "loss": 0.2906, + "step": 18550 + }, + { + "epoch": 0.23936496063246002, + "grad_norm": 1.5074956753755673, + "learning_rate": 7.9782477860889e-06, + "loss": 0.2995, + "step": 18560 + }, + { + "epoch": 0.23949392882245593, + "grad_norm": 1.1419601658888165, + "learning_rate": 7.982546642593071e-06, + "loss": 0.2776, + "step": 18570 + }, + { + "epoch": 0.23962289701245187, + "grad_norm": 1.294606340483662, + "learning_rate": 7.986845499097241e-06, + "loss": 0.2845, + "step": 18580 + }, + { + "epoch": 0.23975186520244782, + "grad_norm": 1.2713586728403945, + "learning_rate": 7.991144355601412e-06, + "loss": 0.3082, + "step": 18590 + }, + { + "epoch": 0.23988083339244376, + "grad_norm": 1.151306687919376, + "learning_rate": 7.99544321210558e-06, + "loss": 0.2937, + "step": 18600 + }, + { + "epoch": 0.2400098015824397, + "grad_norm": 1.2911704812011875, + "learning_rate": 7.99974206860975e-06, + "loss": 0.2907, + "step": 18610 + }, + { + "epoch": 0.24013876977243562, + "grad_norm": 1.3340950361630424, + "learning_rate": 8.004040925113921e-06, + "loss": 0.281, + "step": 18620 + }, + { + "epoch": 0.24026773796243156, + "grad_norm": 1.1311092669116853, + "learning_rate": 8.008339781618091e-06, + "loss": 0.2783, + "step": 18630 + }, + { + "epoch": 0.2403967061524275, + "grad_norm": 1.2132789299420876, + "learning_rate": 8.01263863812226e-06, + "loss": 0.2936, + "step": 18640 + }, + { + "epoch": 0.24052567434242345, + "grad_norm": 1.2522341302518152, + "learning_rate": 8.01693749462643e-06, + "loss": 0.2897, + "step": 18650 + }, + { + "epoch": 0.2406546425324194, + "grad_norm": 1.136981989708504, + "learning_rate": 8.0212363511306e-06, + "loss": 0.2886, + "step": 18660 + }, + { + "epoch": 0.2407836107224153, + "grad_norm": 1.2675685357515152, + "learning_rate": 8.02553520763477e-06, + "loss": 0.2895, + "step": 18670 + }, + { + "epoch": 0.24091257891241125, + "grad_norm": 1.2257642849554915, + "learning_rate": 8.02983406413894e-06, + "loss": 0.2763, + "step": 18680 + }, + { + "epoch": 0.2410415471024072, + "grad_norm": 1.1621540037971416, + "learning_rate": 8.03413292064311e-06, + "loss": 0.2922, + "step": 18690 + }, + { + "epoch": 0.24117051529240313, + "grad_norm": 1.2685249253892024, + "learning_rate": 8.03843177714728e-06, + "loss": 0.302, + "step": 18700 + }, + { + "epoch": 0.24129948348239907, + "grad_norm": 1.1984541406991516, + "learning_rate": 8.04273063365145e-06, + "loss": 0.2865, + "step": 18710 + }, + { + "epoch": 0.24142845167239502, + "grad_norm": 1.1948734715966862, + "learning_rate": 8.047029490155619e-06, + "loss": 0.2972, + "step": 18720 + }, + { + "epoch": 0.24155741986239093, + "grad_norm": 1.2438073244310637, + "learning_rate": 8.051328346659789e-06, + "loss": 0.2765, + "step": 18730 + }, + { + "epoch": 0.24168638805238687, + "grad_norm": 1.307208724463049, + "learning_rate": 8.05562720316396e-06, + "loss": 0.2847, + "step": 18740 + }, + { + "epoch": 0.24181535624238282, + "grad_norm": 1.4041033808050838, + "learning_rate": 8.05992605966813e-06, + "loss": 0.2892, + "step": 18750 + }, + { + "epoch": 0.24194432443237876, + "grad_norm": 1.16590794596037, + "learning_rate": 8.064224916172298e-06, + "loss": 0.2861, + "step": 18760 + }, + { + "epoch": 0.2420732926223747, + "grad_norm": 1.2422536567722187, + "learning_rate": 8.068523772676469e-06, + "loss": 0.2835, + "step": 18770 + }, + { + "epoch": 0.24220226081237062, + "grad_norm": 1.2426454636433444, + "learning_rate": 8.072822629180639e-06, + "loss": 0.2787, + "step": 18780 + }, + { + "epoch": 0.24233122900236656, + "grad_norm": 1.4203451378890177, + "learning_rate": 8.07712148568481e-06, + "loss": 0.2833, + "step": 18790 + }, + { + "epoch": 0.2424601971923625, + "grad_norm": 1.3419960267790643, + "learning_rate": 8.081420342188978e-06, + "loss": 0.2963, + "step": 18800 + }, + { + "epoch": 0.24258916538235845, + "grad_norm": 1.256991929663097, + "learning_rate": 8.085719198693148e-06, + "loss": 0.2941, + "step": 18810 + }, + { + "epoch": 0.2427181335723544, + "grad_norm": 1.5689217264171509, + "learning_rate": 8.090018055197318e-06, + "loss": 0.2861, + "step": 18820 + }, + { + "epoch": 0.2428471017623503, + "grad_norm": 1.2677855332794534, + "learning_rate": 8.094316911701489e-06, + "loss": 0.2874, + "step": 18830 + }, + { + "epoch": 0.24297606995234625, + "grad_norm": 1.3455610665612918, + "learning_rate": 8.098615768205657e-06, + "loss": 0.278, + "step": 18840 + }, + { + "epoch": 0.2431050381423422, + "grad_norm": 1.2955888091162693, + "learning_rate": 8.102914624709828e-06, + "loss": 0.2922, + "step": 18850 + }, + { + "epoch": 0.24323400633233813, + "grad_norm": 1.2348484397773112, + "learning_rate": 8.107213481213998e-06, + "loss": 0.2788, + "step": 18860 + }, + { + "epoch": 0.24336297452233407, + "grad_norm": 1.2689772191553643, + "learning_rate": 8.111512337718168e-06, + "loss": 0.2793, + "step": 18870 + }, + { + "epoch": 0.24349194271233002, + "grad_norm": 1.2385855817244693, + "learning_rate": 8.115811194222337e-06, + "loss": 0.3006, + "step": 18880 + }, + { + "epoch": 0.24362091090232593, + "grad_norm": 1.1381845620003543, + "learning_rate": 8.120110050726507e-06, + "loss": 0.2974, + "step": 18890 + }, + { + "epoch": 0.24374987909232187, + "grad_norm": 1.4271994246124733, + "learning_rate": 8.124408907230677e-06, + "loss": 0.2829, + "step": 18900 + }, + { + "epoch": 0.24387884728231782, + "grad_norm": 1.3570794797624273, + "learning_rate": 8.128707763734848e-06, + "loss": 0.2883, + "step": 18910 + }, + { + "epoch": 0.24400781547231376, + "grad_norm": 1.271764300096347, + "learning_rate": 8.133006620239016e-06, + "loss": 0.3025, + "step": 18920 + }, + { + "epoch": 0.2441367836623097, + "grad_norm": 1.1546396507964125, + "learning_rate": 8.137305476743187e-06, + "loss": 0.284, + "step": 18930 + }, + { + "epoch": 0.24426575185230562, + "grad_norm": 1.23005885316267, + "learning_rate": 8.141604333247357e-06, + "loss": 0.2835, + "step": 18940 + }, + { + "epoch": 0.24439472004230156, + "grad_norm": 1.2415370981715446, + "learning_rate": 8.145903189751527e-06, + "loss": 0.2794, + "step": 18950 + }, + { + "epoch": 0.2445236882322975, + "grad_norm": 1.1158453990719377, + "learning_rate": 8.150202046255696e-06, + "loss": 0.2973, + "step": 18960 + }, + { + "epoch": 0.24465265642229345, + "grad_norm": 1.2240523458202968, + "learning_rate": 8.154500902759866e-06, + "loss": 0.2791, + "step": 18970 + }, + { + "epoch": 0.2447816246122894, + "grad_norm": 1.2339871595279668, + "learning_rate": 8.158799759264036e-06, + "loss": 0.2804, + "step": 18980 + }, + { + "epoch": 0.2449105928022853, + "grad_norm": 1.229242446968727, + "learning_rate": 8.163098615768207e-06, + "loss": 0.2819, + "step": 18990 + }, + { + "epoch": 0.24503956099228125, + "grad_norm": 1.2506553907828355, + "learning_rate": 8.167397472272375e-06, + "loss": 0.302, + "step": 19000 + }, + { + "epoch": 0.2451685291822772, + "grad_norm": 1.1036333139051577, + "learning_rate": 8.171696328776546e-06, + "loss": 0.284, + "step": 19010 + }, + { + "epoch": 0.24529749737227313, + "grad_norm": 1.1379453186359625, + "learning_rate": 8.175995185280716e-06, + "loss": 0.2954, + "step": 19020 + }, + { + "epoch": 0.24542646556226907, + "grad_norm": 1.223831613024789, + "learning_rate": 8.180294041784886e-06, + "loss": 0.277, + "step": 19030 + }, + { + "epoch": 0.24555543375226502, + "grad_norm": 1.371507728896123, + "learning_rate": 8.184592898289055e-06, + "loss": 0.2853, + "step": 19040 + }, + { + "epoch": 0.24568440194226093, + "grad_norm": 1.3864374883238593, + "learning_rate": 8.188891754793225e-06, + "loss": 0.2783, + "step": 19050 + }, + { + "epoch": 0.24581337013225688, + "grad_norm": 1.1832718770876551, + "learning_rate": 8.193190611297395e-06, + "loss": 0.2783, + "step": 19060 + }, + { + "epoch": 0.24594233832225282, + "grad_norm": 1.20512229291949, + "learning_rate": 8.197489467801566e-06, + "loss": 0.2861, + "step": 19070 + }, + { + "epoch": 0.24607130651224876, + "grad_norm": 1.1885105114380456, + "learning_rate": 8.201788324305734e-06, + "loss": 0.2761, + "step": 19080 + }, + { + "epoch": 0.2462002747022447, + "grad_norm": 1.1368099856798808, + "learning_rate": 8.206087180809905e-06, + "loss": 0.2785, + "step": 19090 + }, + { + "epoch": 0.24632924289224062, + "grad_norm": 1.1626191966447994, + "learning_rate": 8.210386037314075e-06, + "loss": 0.2927, + "step": 19100 + }, + { + "epoch": 0.24645821108223656, + "grad_norm": 1.203663311662906, + "learning_rate": 8.214684893818245e-06, + "loss": 0.2943, + "step": 19110 + }, + { + "epoch": 0.2465871792722325, + "grad_norm": 1.2357310478802148, + "learning_rate": 8.218983750322414e-06, + "loss": 0.2906, + "step": 19120 + }, + { + "epoch": 0.24671614746222845, + "grad_norm": 1.1950816927461245, + "learning_rate": 8.223282606826584e-06, + "loss": 0.2856, + "step": 19130 + }, + { + "epoch": 0.2468451156522244, + "grad_norm": 1.2528051125550614, + "learning_rate": 8.227581463330755e-06, + "loss": 0.2905, + "step": 19140 + }, + { + "epoch": 0.2469740838422203, + "grad_norm": 1.298585512293197, + "learning_rate": 8.231880319834925e-06, + "loss": 0.3061, + "step": 19150 + }, + { + "epoch": 0.24710305203221625, + "grad_norm": 1.2909441978499794, + "learning_rate": 8.236179176339093e-06, + "loss": 0.2794, + "step": 19160 + }, + { + "epoch": 0.2472320202222122, + "grad_norm": 1.2015173940813568, + "learning_rate": 8.240478032843264e-06, + "loss": 0.28, + "step": 19170 + }, + { + "epoch": 0.24736098841220813, + "grad_norm": 1.3480235335760804, + "learning_rate": 8.244776889347434e-06, + "loss": 0.2802, + "step": 19180 + }, + { + "epoch": 0.24748995660220408, + "grad_norm": 1.219286892078629, + "learning_rate": 8.249075745851604e-06, + "loss": 0.2801, + "step": 19190 + }, + { + "epoch": 0.2476189247922, + "grad_norm": 1.147465592720777, + "learning_rate": 8.253374602355773e-06, + "loss": 0.2793, + "step": 19200 + }, + { + "epoch": 0.24774789298219593, + "grad_norm": 1.2346851729302435, + "learning_rate": 8.257673458859943e-06, + "loss": 0.2857, + "step": 19210 + }, + { + "epoch": 0.24787686117219188, + "grad_norm": 1.1974564177154212, + "learning_rate": 8.261972315364114e-06, + "loss": 0.2973, + "step": 19220 + }, + { + "epoch": 0.24800582936218782, + "grad_norm": 1.3168332868853208, + "learning_rate": 8.266271171868284e-06, + "loss": 0.285, + "step": 19230 + }, + { + "epoch": 0.24813479755218376, + "grad_norm": 1.4640169250803623, + "learning_rate": 8.270570028372452e-06, + "loss": 0.2989, + "step": 19240 + }, + { + "epoch": 0.2482637657421797, + "grad_norm": 1.1532139627683748, + "learning_rate": 8.274868884876623e-06, + "loss": 0.2934, + "step": 19250 + }, + { + "epoch": 0.24839273393217562, + "grad_norm": 1.2134642172881973, + "learning_rate": 8.279167741380793e-06, + "loss": 0.2978, + "step": 19260 + }, + { + "epoch": 0.24852170212217156, + "grad_norm": 1.3795551447897958, + "learning_rate": 8.283466597884963e-06, + "loss": 0.3025, + "step": 19270 + }, + { + "epoch": 0.2486506703121675, + "grad_norm": 1.3330987079374044, + "learning_rate": 8.287765454389134e-06, + "loss": 0.2891, + "step": 19280 + }, + { + "epoch": 0.24877963850216345, + "grad_norm": 1.1506719865569885, + "learning_rate": 8.292064310893302e-06, + "loss": 0.2889, + "step": 19290 + }, + { + "epoch": 0.2489086066921594, + "grad_norm": 1.2338414162782352, + "learning_rate": 8.296363167397473e-06, + "loss": 0.2881, + "step": 19300 + }, + { + "epoch": 0.2490375748821553, + "grad_norm": 1.2318089066311284, + "learning_rate": 8.300662023901643e-06, + "loss": 0.2924, + "step": 19310 + }, + { + "epoch": 0.24916654307215125, + "grad_norm": 1.298636965374088, + "learning_rate": 8.304960880405813e-06, + "loss": 0.2846, + "step": 19320 + }, + { + "epoch": 0.2492955112621472, + "grad_norm": 1.2057167129806432, + "learning_rate": 8.309259736909982e-06, + "loss": 0.2858, + "step": 19330 + }, + { + "epoch": 0.24942447945214313, + "grad_norm": 1.2225799108658006, + "learning_rate": 8.313558593414152e-06, + "loss": 0.3001, + "step": 19340 + }, + { + "epoch": 0.24955344764213908, + "grad_norm": 2.3145419893367767, + "learning_rate": 8.317857449918322e-06, + "loss": 0.2996, + "step": 19350 + }, + { + "epoch": 0.249682415832135, + "grad_norm": 1.2078191411266483, + "learning_rate": 8.322156306422493e-06, + "loss": 0.2836, + "step": 19360 + }, + { + "epoch": 0.24981138402213093, + "grad_norm": 1.2496797772292942, + "learning_rate": 8.326455162926663e-06, + "loss": 0.278, + "step": 19370 + }, + { + "epoch": 0.24994035221212688, + "grad_norm": 1.321574873176443, + "learning_rate": 8.330754019430832e-06, + "loss": 0.2953, + "step": 19380 + }, + { + "epoch": 0.2500693204021228, + "grad_norm": 1.2886807746907973, + "learning_rate": 8.335052875935002e-06, + "loss": 0.2846, + "step": 19390 + }, + { + "epoch": 0.25019828859211873, + "grad_norm": 1.2776982382546946, + "learning_rate": 8.339351732439172e-06, + "loss": 0.2795, + "step": 19400 + }, + { + "epoch": 0.2503272567821147, + "grad_norm": 1.3125035759884989, + "learning_rate": 8.343650588943342e-06, + "loss": 0.2824, + "step": 19410 + }, + { + "epoch": 0.2504562249721106, + "grad_norm": 1.317843354582878, + "learning_rate": 8.347949445447511e-06, + "loss": 0.2868, + "step": 19420 + }, + { + "epoch": 0.2505851931621066, + "grad_norm": 1.181131575854692, + "learning_rate": 8.352248301951681e-06, + "loss": 0.2806, + "step": 19430 + }, + { + "epoch": 0.2507141613521025, + "grad_norm": 1.2085380796951843, + "learning_rate": 8.356547158455852e-06, + "loss": 0.2939, + "step": 19440 + }, + { + "epoch": 0.2508431295420984, + "grad_norm": 1.2381214005456835, + "learning_rate": 8.360846014960022e-06, + "loss": 0.2692, + "step": 19450 + }, + { + "epoch": 0.2509720977320944, + "grad_norm": 1.386634173896935, + "learning_rate": 8.36514487146419e-06, + "loss": 0.2941, + "step": 19460 + }, + { + "epoch": 0.2511010659220903, + "grad_norm": 1.2735483937866776, + "learning_rate": 8.369443727968361e-06, + "loss": 0.2878, + "step": 19470 + }, + { + "epoch": 0.2512300341120863, + "grad_norm": 1.1758718625019053, + "learning_rate": 8.373742584472531e-06, + "loss": 0.2709, + "step": 19480 + }, + { + "epoch": 0.2513590023020822, + "grad_norm": 1.7424245001443797, + "learning_rate": 8.378041440976701e-06, + "loss": 0.2901, + "step": 19490 + }, + { + "epoch": 0.2514879704920781, + "grad_norm": 1.225005347493721, + "learning_rate": 8.382340297480872e-06, + "loss": 0.2853, + "step": 19500 + }, + { + "epoch": 0.2516169386820741, + "grad_norm": 1.1730707750421145, + "learning_rate": 8.38663915398504e-06, + "loss": 0.2932, + "step": 19510 + }, + { + "epoch": 0.25174590687207, + "grad_norm": 1.226612182023169, + "learning_rate": 8.39093801048921e-06, + "loss": 0.2888, + "step": 19520 + }, + { + "epoch": 0.25187487506206596, + "grad_norm": 1.1757890930971377, + "learning_rate": 8.395236866993381e-06, + "loss": 0.2804, + "step": 19530 + }, + { + "epoch": 0.2520038432520619, + "grad_norm": 1.2190004555397214, + "learning_rate": 8.399535723497551e-06, + "loss": 0.2812, + "step": 19540 + }, + { + "epoch": 0.2521328114420578, + "grad_norm": 1.1386623695013711, + "learning_rate": 8.40383458000172e-06, + "loss": 0.2921, + "step": 19550 + }, + { + "epoch": 0.25226177963205376, + "grad_norm": 1.2159088791081838, + "learning_rate": 8.40813343650589e-06, + "loss": 0.2882, + "step": 19560 + }, + { + "epoch": 0.2523907478220497, + "grad_norm": 1.1860089192469567, + "learning_rate": 8.41243229301006e-06, + "loss": 0.2842, + "step": 19570 + }, + { + "epoch": 0.25251971601204565, + "grad_norm": 1.2205932275812463, + "learning_rate": 8.41673114951423e-06, + "loss": 0.2737, + "step": 19580 + }, + { + "epoch": 0.25264868420204156, + "grad_norm": 1.2621007117592313, + "learning_rate": 8.421030006018401e-06, + "loss": 0.2963, + "step": 19590 + }, + { + "epoch": 0.2527776523920375, + "grad_norm": 1.2655948893666678, + "learning_rate": 8.42532886252257e-06, + "loss": 0.2926, + "step": 19600 + }, + { + "epoch": 0.25290662058203345, + "grad_norm": 1.2685975622974486, + "learning_rate": 8.42962771902674e-06, + "loss": 0.2997, + "step": 19610 + }, + { + "epoch": 0.25303558877202936, + "grad_norm": 1.2314076081833185, + "learning_rate": 8.43392657553091e-06, + "loss": 0.283, + "step": 19620 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.283576798719345, + "learning_rate": 8.43822543203508e-06, + "loss": 0.2897, + "step": 19630 + }, + { + "epoch": 0.25329352515202125, + "grad_norm": 1.2245398550783653, + "learning_rate": 8.44252428853925e-06, + "loss": 0.2844, + "step": 19640 + }, + { + "epoch": 0.2534224933420172, + "grad_norm": 1.0215000125278846, + "learning_rate": 8.44682314504342e-06, + "loss": 0.2864, + "step": 19650 + }, + { + "epoch": 0.25355146153201313, + "grad_norm": 1.1792645918366267, + "learning_rate": 8.45112200154759e-06, + "loss": 0.2914, + "step": 19660 + }, + { + "epoch": 0.25368042972200905, + "grad_norm": 1.2435324942948567, + "learning_rate": 8.45542085805176e-06, + "loss": 0.293, + "step": 19670 + }, + { + "epoch": 0.253809397912005, + "grad_norm": 1.2334093257680032, + "learning_rate": 8.459719714555929e-06, + "loss": 0.2966, + "step": 19680 + }, + { + "epoch": 0.25393836610200093, + "grad_norm": 1.2226645145711807, + "learning_rate": 8.464018571060099e-06, + "loss": 0.2838, + "step": 19690 + }, + { + "epoch": 0.2540673342919969, + "grad_norm": 1.2608990305581504, + "learning_rate": 8.46831742756427e-06, + "loss": 0.2903, + "step": 19700 + }, + { + "epoch": 0.2541963024819928, + "grad_norm": 1.3512991433479562, + "learning_rate": 8.47261628406844e-06, + "loss": 0.2934, + "step": 19710 + }, + { + "epoch": 0.25432527067198873, + "grad_norm": 1.1926329354146776, + "learning_rate": 8.476915140572608e-06, + "loss": 0.2886, + "step": 19720 + }, + { + "epoch": 0.2544542388619847, + "grad_norm": 1.2487077889786702, + "learning_rate": 8.481213997076779e-06, + "loss": 0.2788, + "step": 19730 + }, + { + "epoch": 0.2545832070519806, + "grad_norm": 1.2634122327280128, + "learning_rate": 8.485512853580949e-06, + "loss": 0.2874, + "step": 19740 + }, + { + "epoch": 0.2547121752419766, + "grad_norm": 1.1358825367808432, + "learning_rate": 8.489811710085119e-06, + "loss": 0.2821, + "step": 19750 + }, + { + "epoch": 0.2548411434319725, + "grad_norm": 1.4077321981456088, + "learning_rate": 8.494110566589288e-06, + "loss": 0.2949, + "step": 19760 + }, + { + "epoch": 0.2549701116219684, + "grad_norm": 1.3044053729976146, + "learning_rate": 8.498409423093458e-06, + "loss": 0.2924, + "step": 19770 + }, + { + "epoch": 0.2550990798119644, + "grad_norm": 1.3010543279402214, + "learning_rate": 8.502708279597628e-06, + "loss": 0.291, + "step": 19780 + }, + { + "epoch": 0.2552280480019603, + "grad_norm": 1.2624333468546856, + "learning_rate": 8.507007136101799e-06, + "loss": 0.2988, + "step": 19790 + }, + { + "epoch": 0.2553570161919563, + "grad_norm": 1.288714976900743, + "learning_rate": 8.511305992605967e-06, + "loss": 0.2985, + "step": 19800 + }, + { + "epoch": 0.2554859843819522, + "grad_norm": 1.1073594238863536, + "learning_rate": 8.515604849110138e-06, + "loss": 0.2917, + "step": 19810 + }, + { + "epoch": 0.2556149525719481, + "grad_norm": 1.1656643977120347, + "learning_rate": 8.519903705614308e-06, + "loss": 0.2959, + "step": 19820 + }, + { + "epoch": 0.2557439207619441, + "grad_norm": 1.2428917035875398, + "learning_rate": 8.524202562118478e-06, + "loss": 0.288, + "step": 19830 + }, + { + "epoch": 0.25587288895194, + "grad_norm": 1.1491359571602522, + "learning_rate": 8.528501418622647e-06, + "loss": 0.2808, + "step": 19840 + }, + { + "epoch": 0.25600185714193596, + "grad_norm": 1.2887565514153623, + "learning_rate": 8.532800275126817e-06, + "loss": 0.2988, + "step": 19850 + }, + { + "epoch": 0.2561308253319319, + "grad_norm": 1.2292164836139563, + "learning_rate": 8.537099131630987e-06, + "loss": 0.2843, + "step": 19860 + }, + { + "epoch": 0.2562597935219278, + "grad_norm": 1.3712676500219088, + "learning_rate": 8.541397988135158e-06, + "loss": 0.2797, + "step": 19870 + }, + { + "epoch": 0.25638876171192376, + "grad_norm": 1.1254888579057158, + "learning_rate": 8.545696844639326e-06, + "loss": 0.2871, + "step": 19880 + }, + { + "epoch": 0.2565177299019197, + "grad_norm": 1.1153640780307903, + "learning_rate": 8.549995701143497e-06, + "loss": 0.2804, + "step": 19890 + }, + { + "epoch": 0.25664669809191565, + "grad_norm": 1.2164704293570947, + "learning_rate": 8.554294557647667e-06, + "loss": 0.2748, + "step": 19900 + }, + { + "epoch": 0.25677566628191156, + "grad_norm": 1.338066038948949, + "learning_rate": 8.558593414151837e-06, + "loss": 0.2935, + "step": 19910 + }, + { + "epoch": 0.2569046344719075, + "grad_norm": 1.1508204052300093, + "learning_rate": 8.562892270656006e-06, + "loss": 0.2823, + "step": 19920 + }, + { + "epoch": 0.25703360266190345, + "grad_norm": 1.1909604418257662, + "learning_rate": 8.567191127160176e-06, + "loss": 0.296, + "step": 19930 + }, + { + "epoch": 0.25716257085189936, + "grad_norm": 1.186109847967728, + "learning_rate": 8.571489983664346e-06, + "loss": 0.2998, + "step": 19940 + }, + { + "epoch": 0.25729153904189533, + "grad_norm": 1.0856386935954159, + "learning_rate": 8.575788840168517e-06, + "loss": 0.2819, + "step": 19950 + }, + { + "epoch": 0.25742050723189125, + "grad_norm": 1.2694391711786401, + "learning_rate": 8.580087696672685e-06, + "loss": 0.2826, + "step": 19960 + }, + { + "epoch": 0.25754947542188716, + "grad_norm": 1.4054952671119731, + "learning_rate": 8.584386553176856e-06, + "loss": 0.2905, + "step": 19970 + }, + { + "epoch": 0.25767844361188313, + "grad_norm": 1.297236938728875, + "learning_rate": 8.588685409681026e-06, + "loss": 0.3024, + "step": 19980 + }, + { + "epoch": 0.25780741180187905, + "grad_norm": 1.1176842744569309, + "learning_rate": 8.592984266185196e-06, + "loss": 0.2735, + "step": 19990 + }, + { + "epoch": 0.257936379991875, + "grad_norm": 1.2999987605026604, + "learning_rate": 8.597283122689365e-06, + "loss": 0.2853, + "step": 20000 + }, + { + "epoch": 0.25806534818187093, + "grad_norm": 1.3026613498779023, + "learning_rate": 8.601581979193535e-06, + "loss": 0.3006, + "step": 20010 + }, + { + "epoch": 0.2581943163718669, + "grad_norm": 1.2123020159496443, + "learning_rate": 8.605880835697705e-06, + "loss": 0.2825, + "step": 20020 + }, + { + "epoch": 0.2583232845618628, + "grad_norm": 1.2577319439647334, + "learning_rate": 8.610179692201876e-06, + "loss": 0.2808, + "step": 20030 + }, + { + "epoch": 0.25845225275185874, + "grad_norm": 1.1289072495825287, + "learning_rate": 8.614478548706044e-06, + "loss": 0.2968, + "step": 20040 + }, + { + "epoch": 0.2585812209418547, + "grad_norm": 1.18886789564288, + "learning_rate": 8.618777405210215e-06, + "loss": 0.2848, + "step": 20050 + }, + { + "epoch": 0.2587101891318506, + "grad_norm": 1.267998605091728, + "learning_rate": 8.623076261714385e-06, + "loss": 0.2806, + "step": 20060 + }, + { + "epoch": 0.2588391573218466, + "grad_norm": 1.254193352964064, + "learning_rate": 8.627375118218555e-06, + "loss": 0.2847, + "step": 20070 + }, + { + "epoch": 0.2589681255118425, + "grad_norm": 1.1560067442526734, + "learning_rate": 8.631673974722724e-06, + "loss": 0.2771, + "step": 20080 + }, + { + "epoch": 0.2590970937018384, + "grad_norm": 1.3817058502174704, + "learning_rate": 8.635972831226894e-06, + "loss": 0.2942, + "step": 20090 + }, + { + "epoch": 0.2592260618918344, + "grad_norm": 1.102303536256828, + "learning_rate": 8.640271687731064e-06, + "loss": 0.2868, + "step": 20100 + }, + { + "epoch": 0.2593550300818303, + "grad_norm": 1.2675701018124577, + "learning_rate": 8.644570544235235e-06, + "loss": 0.2823, + "step": 20110 + }, + { + "epoch": 0.2594839982718263, + "grad_norm": 1.2077451473199072, + "learning_rate": 8.648869400739403e-06, + "loss": 0.2717, + "step": 20120 + }, + { + "epoch": 0.2596129664618222, + "grad_norm": 1.1999299273691135, + "learning_rate": 8.653168257243574e-06, + "loss": 0.2857, + "step": 20130 + }, + { + "epoch": 0.2597419346518181, + "grad_norm": 1.1871644295615396, + "learning_rate": 8.657467113747744e-06, + "loss": 0.2785, + "step": 20140 + }, + { + "epoch": 0.2598709028418141, + "grad_norm": 1.2120912452620194, + "learning_rate": 8.661765970251914e-06, + "loss": 0.282, + "step": 20150 + }, + { + "epoch": 0.25999987103181, + "grad_norm": 1.1621574888261181, + "learning_rate": 8.666064826756083e-06, + "loss": 0.2931, + "step": 20160 + }, + { + "epoch": 0.26012883922180596, + "grad_norm": 1.155059072758355, + "learning_rate": 8.670363683260253e-06, + "loss": 0.2665, + "step": 20170 + }, + { + "epoch": 0.2602578074118019, + "grad_norm": 1.3264500445448038, + "learning_rate": 8.674662539764423e-06, + "loss": 0.2954, + "step": 20180 + }, + { + "epoch": 0.2603867756017978, + "grad_norm": 1.2329389647231392, + "learning_rate": 8.678961396268594e-06, + "loss": 0.2783, + "step": 20190 + }, + { + "epoch": 0.26051574379179376, + "grad_norm": 1.1132171766947143, + "learning_rate": 8.683260252772762e-06, + "loss": 0.284, + "step": 20200 + }, + { + "epoch": 0.2606447119817897, + "grad_norm": 1.2355341451267183, + "learning_rate": 8.687559109276933e-06, + "loss": 0.2953, + "step": 20210 + }, + { + "epoch": 0.26077368017178565, + "grad_norm": 1.1736828159754347, + "learning_rate": 8.691857965781103e-06, + "loss": 0.2935, + "step": 20220 + }, + { + "epoch": 0.26090264836178156, + "grad_norm": 1.275279493805125, + "learning_rate": 8.696156822285273e-06, + "loss": 0.2967, + "step": 20230 + }, + { + "epoch": 0.2610316165517775, + "grad_norm": 1.2141552979491508, + "learning_rate": 8.700455678789442e-06, + "loss": 0.2737, + "step": 20240 + }, + { + "epoch": 0.26116058474177345, + "grad_norm": 1.2397821402870755, + "learning_rate": 8.704754535293612e-06, + "loss": 0.2837, + "step": 20250 + }, + { + "epoch": 0.26128955293176936, + "grad_norm": 1.2508906707499274, + "learning_rate": 8.709053391797782e-06, + "loss": 0.2884, + "step": 20260 + }, + { + "epoch": 0.26141852112176533, + "grad_norm": 1.2342650277684195, + "learning_rate": 8.713352248301953e-06, + "loss": 0.2821, + "step": 20270 + }, + { + "epoch": 0.26154748931176125, + "grad_norm": 1.1231472439146872, + "learning_rate": 8.717651104806121e-06, + "loss": 0.2818, + "step": 20280 + }, + { + "epoch": 0.26167645750175716, + "grad_norm": 1.0930649675443693, + "learning_rate": 8.721949961310292e-06, + "loss": 0.2905, + "step": 20290 + }, + { + "epoch": 0.26180542569175314, + "grad_norm": 1.2710818982849228, + "learning_rate": 8.726248817814462e-06, + "loss": 0.2672, + "step": 20300 + }, + { + "epoch": 0.26193439388174905, + "grad_norm": 1.1556875785532978, + "learning_rate": 8.730547674318632e-06, + "loss": 0.2853, + "step": 20310 + }, + { + "epoch": 0.262063362071745, + "grad_norm": 1.1689864681560225, + "learning_rate": 8.7348465308228e-06, + "loss": 0.2863, + "step": 20320 + }, + { + "epoch": 0.26219233026174094, + "grad_norm": 1.2212511186461743, + "learning_rate": 8.739145387326971e-06, + "loss": 0.2841, + "step": 20330 + }, + { + "epoch": 0.2623212984517369, + "grad_norm": 1.206598821026787, + "learning_rate": 8.743444243831141e-06, + "loss": 0.2903, + "step": 20340 + }, + { + "epoch": 0.2624502666417328, + "grad_norm": 1.2097823374060623, + "learning_rate": 8.747743100335312e-06, + "loss": 0.269, + "step": 20350 + }, + { + "epoch": 0.26257923483172874, + "grad_norm": 1.2300608823951056, + "learning_rate": 8.75204195683948e-06, + "loss": 0.2927, + "step": 20360 + }, + { + "epoch": 0.2627082030217247, + "grad_norm": 1.1444051058466016, + "learning_rate": 8.75634081334365e-06, + "loss": 0.3054, + "step": 20370 + }, + { + "epoch": 0.2628371712117206, + "grad_norm": 1.1468357235038826, + "learning_rate": 8.760639669847821e-06, + "loss": 0.2857, + "step": 20380 + }, + { + "epoch": 0.2629661394017166, + "grad_norm": 1.3851175974364054, + "learning_rate": 8.764938526351991e-06, + "loss": 0.2696, + "step": 20390 + }, + { + "epoch": 0.2630951075917125, + "grad_norm": 1.209154413565608, + "learning_rate": 8.76923738285616e-06, + "loss": 0.2874, + "step": 20400 + }, + { + "epoch": 0.2632240757817084, + "grad_norm": 1.2410232107340085, + "learning_rate": 8.77353623936033e-06, + "loss": 0.2744, + "step": 20410 + }, + { + "epoch": 0.2633530439717044, + "grad_norm": 1.2403753805343432, + "learning_rate": 8.7778350958645e-06, + "loss": 0.2936, + "step": 20420 + }, + { + "epoch": 0.2634820121617003, + "grad_norm": 1.4635409714861864, + "learning_rate": 8.78213395236867e-06, + "loss": 0.287, + "step": 20430 + }, + { + "epoch": 0.2636109803516963, + "grad_norm": 1.2299725293734034, + "learning_rate": 8.78643280887284e-06, + "loss": 0.2855, + "step": 20440 + }, + { + "epoch": 0.2637399485416922, + "grad_norm": 1.4027013212836423, + "learning_rate": 8.79073166537701e-06, + "loss": 0.284, + "step": 20450 + }, + { + "epoch": 0.2638689167316881, + "grad_norm": 1.3962729913888834, + "learning_rate": 8.79503052188118e-06, + "loss": 0.2812, + "step": 20460 + }, + { + "epoch": 0.2639978849216841, + "grad_norm": 1.238595262301501, + "learning_rate": 8.79932937838535e-06, + "loss": 0.2788, + "step": 20470 + }, + { + "epoch": 0.26412685311168, + "grad_norm": 1.1711665858674576, + "learning_rate": 8.803628234889519e-06, + "loss": 0.2714, + "step": 20480 + }, + { + "epoch": 0.26425582130167596, + "grad_norm": 1.1866471558545448, + "learning_rate": 8.80792709139369e-06, + "loss": 0.275, + "step": 20490 + }, + { + "epoch": 0.2643847894916719, + "grad_norm": 1.2397160542008445, + "learning_rate": 8.81222594789786e-06, + "loss": 0.2862, + "step": 20500 + }, + { + "epoch": 0.2645137576816678, + "grad_norm": 1.205611147861744, + "learning_rate": 8.81652480440203e-06, + "loss": 0.2757, + "step": 20510 + }, + { + "epoch": 0.26464272587166376, + "grad_norm": 1.2351593707893316, + "learning_rate": 8.8208236609062e-06, + "loss": 0.3002, + "step": 20520 + }, + { + "epoch": 0.2647716940616597, + "grad_norm": 1.1216482200092903, + "learning_rate": 8.825122517410369e-06, + "loss": 0.2848, + "step": 20530 + }, + { + "epoch": 0.26490066225165565, + "grad_norm": 1.1802897146795834, + "learning_rate": 8.829421373914539e-06, + "loss": 0.287, + "step": 20540 + }, + { + "epoch": 0.26502963044165156, + "grad_norm": 1.2636470604702297, + "learning_rate": 8.83372023041871e-06, + "loss": 0.2865, + "step": 20550 + }, + { + "epoch": 0.2651585986316475, + "grad_norm": 1.164047381471185, + "learning_rate": 8.83801908692288e-06, + "loss": 0.2834, + "step": 20560 + }, + { + "epoch": 0.26528756682164345, + "grad_norm": 1.1066993515487356, + "learning_rate": 8.842317943427048e-06, + "loss": 0.2807, + "step": 20570 + }, + { + "epoch": 0.26541653501163937, + "grad_norm": 1.2293830295401413, + "learning_rate": 8.846616799931218e-06, + "loss": 0.2761, + "step": 20580 + }, + { + "epoch": 0.26554550320163534, + "grad_norm": 1.2220262180857921, + "learning_rate": 8.850915656435389e-06, + "loss": 0.2941, + "step": 20590 + }, + { + "epoch": 0.26567447139163125, + "grad_norm": 1.2137351238205865, + "learning_rate": 8.855214512939559e-06, + "loss": 0.2694, + "step": 20600 + }, + { + "epoch": 0.26580343958162717, + "grad_norm": 1.2455307420349133, + "learning_rate": 8.859513369443728e-06, + "loss": 0.2697, + "step": 20610 + }, + { + "epoch": 0.26593240777162314, + "grad_norm": 1.1499469138727907, + "learning_rate": 8.863812225947898e-06, + "loss": 0.2902, + "step": 20620 + }, + { + "epoch": 0.26606137596161905, + "grad_norm": 1.157927635443526, + "learning_rate": 8.868111082452068e-06, + "loss": 0.2774, + "step": 20630 + }, + { + "epoch": 0.266190344151615, + "grad_norm": 1.1868444063590229, + "learning_rate": 8.872409938956239e-06, + "loss": 0.2857, + "step": 20640 + }, + { + "epoch": 0.26631931234161094, + "grad_norm": 1.2776496844518346, + "learning_rate": 8.876708795460409e-06, + "loss": 0.2722, + "step": 20650 + }, + { + "epoch": 0.26644828053160685, + "grad_norm": 1.2613203331527763, + "learning_rate": 8.881007651964577e-06, + "loss": 0.2913, + "step": 20660 + }, + { + "epoch": 0.2665772487216028, + "grad_norm": 1.2696849493153943, + "learning_rate": 8.885306508468748e-06, + "loss": 0.2863, + "step": 20670 + }, + { + "epoch": 0.26670621691159874, + "grad_norm": 1.19627056817796, + "learning_rate": 8.889605364972918e-06, + "loss": 0.2784, + "step": 20680 + }, + { + "epoch": 0.2668351851015947, + "grad_norm": 1.2195284202618188, + "learning_rate": 8.893904221477088e-06, + "loss": 0.2704, + "step": 20690 + }, + { + "epoch": 0.2669641532915906, + "grad_norm": 1.2834880148001058, + "learning_rate": 8.898203077981257e-06, + "loss": 0.2868, + "step": 20700 + }, + { + "epoch": 0.2670931214815866, + "grad_norm": 1.3763256699152262, + "learning_rate": 8.902501934485427e-06, + "loss": 0.2825, + "step": 20710 + }, + { + "epoch": 0.2672220896715825, + "grad_norm": 1.2264414061452666, + "learning_rate": 8.906800790989598e-06, + "loss": 0.2906, + "step": 20720 + }, + { + "epoch": 0.2673510578615784, + "grad_norm": 1.23583452092494, + "learning_rate": 8.911099647493768e-06, + "loss": 0.2818, + "step": 20730 + }, + { + "epoch": 0.2674800260515744, + "grad_norm": 1.165081028679378, + "learning_rate": 8.915398503997938e-06, + "loss": 0.2967, + "step": 20740 + }, + { + "epoch": 0.2676089942415703, + "grad_norm": 1.0918013016002255, + "learning_rate": 8.919697360502107e-06, + "loss": 0.2886, + "step": 20750 + }, + { + "epoch": 0.2677379624315663, + "grad_norm": 1.093261694175882, + "learning_rate": 8.923996217006277e-06, + "loss": 0.2761, + "step": 20760 + }, + { + "epoch": 0.2678669306215622, + "grad_norm": 1.2239908194525724, + "learning_rate": 8.928295073510447e-06, + "loss": 0.2853, + "step": 20770 + }, + { + "epoch": 0.2679958988115581, + "grad_norm": 1.2379232876167885, + "learning_rate": 8.932593930014618e-06, + "loss": 0.2932, + "step": 20780 + }, + { + "epoch": 0.2681248670015541, + "grad_norm": 1.253560565535808, + "learning_rate": 8.936892786518786e-06, + "loss": 0.2912, + "step": 20790 + }, + { + "epoch": 0.26825383519155, + "grad_norm": 1.2097973118352037, + "learning_rate": 8.941191643022957e-06, + "loss": 0.2875, + "step": 20800 + }, + { + "epoch": 0.26838280338154596, + "grad_norm": 1.365578056443223, + "learning_rate": 8.945490499527127e-06, + "loss": 0.2863, + "step": 20810 + }, + { + "epoch": 0.2685117715715419, + "grad_norm": 1.2215778937400015, + "learning_rate": 8.949789356031297e-06, + "loss": 0.2902, + "step": 20820 + }, + { + "epoch": 0.2686407397615378, + "grad_norm": 1.1957719062534027, + "learning_rate": 8.954088212535468e-06, + "loss": 0.2799, + "step": 20830 + }, + { + "epoch": 0.26876970795153377, + "grad_norm": 1.267880785655731, + "learning_rate": 8.958387069039636e-06, + "loss": 0.2856, + "step": 20840 + }, + { + "epoch": 0.2688986761415297, + "grad_norm": 1.0947311787395735, + "learning_rate": 8.962685925543806e-06, + "loss": 0.2804, + "step": 20850 + }, + { + "epoch": 0.26902764433152565, + "grad_norm": 1.2005575490246796, + "learning_rate": 8.966984782047977e-06, + "loss": 0.2844, + "step": 20860 + }, + { + "epoch": 0.26915661252152157, + "grad_norm": 1.3269232387933196, + "learning_rate": 8.971283638552147e-06, + "loss": 0.267, + "step": 20870 + }, + { + "epoch": 0.2692855807115175, + "grad_norm": 1.2756101630267378, + "learning_rate": 8.975582495056316e-06, + "loss": 0.2756, + "step": 20880 + }, + { + "epoch": 0.26941454890151345, + "grad_norm": 1.2317624016420095, + "learning_rate": 8.979881351560486e-06, + "loss": 0.281, + "step": 20890 + }, + { + "epoch": 0.26954351709150937, + "grad_norm": 1.287595844290043, + "learning_rate": 8.984180208064656e-06, + "loss": 0.2809, + "step": 20900 + }, + { + "epoch": 0.26967248528150534, + "grad_norm": 1.2230039348346866, + "learning_rate": 8.988479064568827e-06, + "loss": 0.2941, + "step": 20910 + }, + { + "epoch": 0.26980145347150125, + "grad_norm": 1.1163236003840142, + "learning_rate": 8.992777921072995e-06, + "loss": 0.2828, + "step": 20920 + }, + { + "epoch": 0.26993042166149717, + "grad_norm": 1.247272276933754, + "learning_rate": 8.997076777577165e-06, + "loss": 0.2777, + "step": 20930 + }, + { + "epoch": 0.27005938985149314, + "grad_norm": 1.2988998732307813, + "learning_rate": 9.001375634081336e-06, + "loss": 0.288, + "step": 20940 + }, + { + "epoch": 0.27018835804148905, + "grad_norm": 1.185791571324246, + "learning_rate": 9.005674490585506e-06, + "loss": 0.2817, + "step": 20950 + }, + { + "epoch": 0.270317326231485, + "grad_norm": 1.293802835839815, + "learning_rate": 9.009973347089675e-06, + "loss": 0.2982, + "step": 20960 + }, + { + "epoch": 0.27044629442148094, + "grad_norm": 1.1900237426612796, + "learning_rate": 9.014272203593845e-06, + "loss": 0.298, + "step": 20970 + }, + { + "epoch": 0.27057526261147685, + "grad_norm": 1.200847644334586, + "learning_rate": 9.018571060098015e-06, + "loss": 0.2786, + "step": 20980 + }, + { + "epoch": 0.2707042308014728, + "grad_norm": 1.22818529400226, + "learning_rate": 9.022869916602186e-06, + "loss": 0.2632, + "step": 20990 + }, + { + "epoch": 0.27083319899146874, + "grad_norm": 1.1427626624893759, + "learning_rate": 9.027168773106354e-06, + "loss": 0.2889, + "step": 21000 + }, + { + "epoch": 0.2709621671814647, + "grad_norm": 1.2176454544178041, + "learning_rate": 9.031467629610524e-06, + "loss": 0.273, + "step": 21010 + }, + { + "epoch": 0.2710911353714606, + "grad_norm": 1.4573491951668753, + "learning_rate": 9.035766486114695e-06, + "loss": 0.2809, + "step": 21020 + }, + { + "epoch": 0.2712201035614566, + "grad_norm": 1.2760141332126025, + "learning_rate": 9.040065342618865e-06, + "loss": 0.2888, + "step": 21030 + }, + { + "epoch": 0.2713490717514525, + "grad_norm": 1.1339034482580137, + "learning_rate": 9.044364199123034e-06, + "loss": 0.288, + "step": 21040 + }, + { + "epoch": 0.2714780399414484, + "grad_norm": 1.1573759073428282, + "learning_rate": 9.048663055627204e-06, + "loss": 0.2861, + "step": 21050 + }, + { + "epoch": 0.2716070081314444, + "grad_norm": 1.1462213895183548, + "learning_rate": 9.052961912131374e-06, + "loss": 0.295, + "step": 21060 + }, + { + "epoch": 0.2717359763214403, + "grad_norm": 1.1351520439993774, + "learning_rate": 9.057260768635545e-06, + "loss": 0.2764, + "step": 21070 + }, + { + "epoch": 0.2718649445114363, + "grad_norm": 1.2271977194040862, + "learning_rate": 9.061559625139713e-06, + "loss": 0.2935, + "step": 21080 + }, + { + "epoch": 0.2719939127014322, + "grad_norm": 1.2369477295234643, + "learning_rate": 9.065858481643883e-06, + "loss": 0.2904, + "step": 21090 + }, + { + "epoch": 0.2721228808914281, + "grad_norm": 1.1655752242377082, + "learning_rate": 9.070157338148054e-06, + "loss": 0.282, + "step": 21100 + }, + { + "epoch": 0.2722518490814241, + "grad_norm": 1.2531691762126498, + "learning_rate": 9.074456194652224e-06, + "loss": 0.29, + "step": 21110 + }, + { + "epoch": 0.27238081727142, + "grad_norm": 1.1489293341102707, + "learning_rate": 9.078755051156393e-06, + "loss": 0.2773, + "step": 21120 + }, + { + "epoch": 0.27250978546141597, + "grad_norm": 1.1113023519888163, + "learning_rate": 9.083053907660563e-06, + "loss": 0.2829, + "step": 21130 + }, + { + "epoch": 0.2726387536514119, + "grad_norm": 1.270566449119434, + "learning_rate": 9.087352764164733e-06, + "loss": 0.2647, + "step": 21140 + }, + { + "epoch": 0.2727677218414078, + "grad_norm": 1.1850571234880527, + "learning_rate": 9.091651620668904e-06, + "loss": 0.2792, + "step": 21150 + }, + { + "epoch": 0.27289669003140377, + "grad_norm": 1.0971059120467224, + "learning_rate": 9.095950477173072e-06, + "loss": 0.2771, + "step": 21160 + }, + { + "epoch": 0.2730256582213997, + "grad_norm": 1.2578179421446596, + "learning_rate": 9.100249333677242e-06, + "loss": 0.2833, + "step": 21170 + }, + { + "epoch": 0.27315462641139565, + "grad_norm": 1.0683992858338076, + "learning_rate": 9.104548190181413e-06, + "loss": 0.2813, + "step": 21180 + }, + { + "epoch": 0.27328359460139157, + "grad_norm": 1.2720087808353058, + "learning_rate": 9.108847046685583e-06, + "loss": 0.2751, + "step": 21190 + }, + { + "epoch": 0.2734125627913875, + "grad_norm": 1.054246987505351, + "learning_rate": 9.113145903189752e-06, + "loss": 0.2944, + "step": 21200 + }, + { + "epoch": 0.27354153098138345, + "grad_norm": 1.2275663954823475, + "learning_rate": 9.117444759693922e-06, + "loss": 0.2753, + "step": 21210 + }, + { + "epoch": 0.27367049917137937, + "grad_norm": 1.2522204141369504, + "learning_rate": 9.121743616198092e-06, + "loss": 0.2818, + "step": 21220 + }, + { + "epoch": 0.27379946736137534, + "grad_norm": 1.0780410639343434, + "learning_rate": 9.126042472702263e-06, + "loss": 0.2773, + "step": 21230 + }, + { + "epoch": 0.27392843555137125, + "grad_norm": 1.1681112878182158, + "learning_rate": 9.130341329206431e-06, + "loss": 0.2781, + "step": 21240 + }, + { + "epoch": 0.27405740374136717, + "grad_norm": 1.2520601538753158, + "learning_rate": 9.134640185710602e-06, + "loss": 0.2698, + "step": 21250 + }, + { + "epoch": 0.27418637193136314, + "grad_norm": 1.1821564057180374, + "learning_rate": 9.138939042214772e-06, + "loss": 0.2911, + "step": 21260 + }, + { + "epoch": 0.27431534012135905, + "grad_norm": 1.2978275285244147, + "learning_rate": 9.143237898718942e-06, + "loss": 0.2795, + "step": 21270 + }, + { + "epoch": 0.274444308311355, + "grad_norm": 1.244051444175893, + "learning_rate": 9.14753675522311e-06, + "loss": 0.2962, + "step": 21280 + }, + { + "epoch": 0.27457327650135094, + "grad_norm": 1.2400246463046598, + "learning_rate": 9.151835611727281e-06, + "loss": 0.2912, + "step": 21290 + }, + { + "epoch": 0.27470224469134685, + "grad_norm": 1.199145594778179, + "learning_rate": 9.156134468231451e-06, + "loss": 0.2775, + "step": 21300 + }, + { + "epoch": 0.2748312128813428, + "grad_norm": 1.3324595179489782, + "learning_rate": 9.160433324735622e-06, + "loss": 0.2949, + "step": 21310 + }, + { + "epoch": 0.27496018107133874, + "grad_norm": 1.397257922648067, + "learning_rate": 9.16473218123979e-06, + "loss": 0.2933, + "step": 21320 + }, + { + "epoch": 0.2750891492613347, + "grad_norm": 1.0732358060600455, + "learning_rate": 9.16903103774396e-06, + "loss": 0.2651, + "step": 21330 + }, + { + "epoch": 0.2752181174513306, + "grad_norm": 1.1959744280111106, + "learning_rate": 9.17332989424813e-06, + "loss": 0.279, + "step": 21340 + }, + { + "epoch": 0.2753470856413266, + "grad_norm": 1.1878163658115553, + "learning_rate": 9.177628750752301e-06, + "loss": 0.2718, + "step": 21350 + }, + { + "epoch": 0.2754760538313225, + "grad_norm": 1.1042410995958363, + "learning_rate": 9.18192760725647e-06, + "loss": 0.2888, + "step": 21360 + }, + { + "epoch": 0.2756050220213184, + "grad_norm": 1.1652257720958945, + "learning_rate": 9.18622646376064e-06, + "loss": 0.2838, + "step": 21370 + }, + { + "epoch": 0.2757339902113144, + "grad_norm": 1.248276792708013, + "learning_rate": 9.19052532026481e-06, + "loss": 0.2767, + "step": 21380 + }, + { + "epoch": 0.2758629584013103, + "grad_norm": 1.221047931007732, + "learning_rate": 9.19482417676898e-06, + "loss": 0.2883, + "step": 21390 + }, + { + "epoch": 0.2759919265913063, + "grad_norm": 1.185300263813856, + "learning_rate": 9.19912303327315e-06, + "loss": 0.2857, + "step": 21400 + }, + { + "epoch": 0.2761208947813022, + "grad_norm": 1.2123785062509558, + "learning_rate": 9.20342188977732e-06, + "loss": 0.283, + "step": 21410 + }, + { + "epoch": 0.2762498629712981, + "grad_norm": 1.0858969717015163, + "learning_rate": 9.20772074628149e-06, + "loss": 0.2791, + "step": 21420 + }, + { + "epoch": 0.2763788311612941, + "grad_norm": 1.216136430048943, + "learning_rate": 9.21201960278566e-06, + "loss": 0.2909, + "step": 21430 + }, + { + "epoch": 0.27650779935129, + "grad_norm": 1.285818074514038, + "learning_rate": 9.216318459289829e-06, + "loss": 0.2796, + "step": 21440 + }, + { + "epoch": 0.27663676754128597, + "grad_norm": 1.1484693832227757, + "learning_rate": 9.220617315793999e-06, + "loss": 0.2826, + "step": 21450 + }, + { + "epoch": 0.2767657357312819, + "grad_norm": 1.2031042822728257, + "learning_rate": 9.22491617229817e-06, + "loss": 0.2716, + "step": 21460 + }, + { + "epoch": 0.2768947039212778, + "grad_norm": 1.270206785490928, + "learning_rate": 9.22921502880234e-06, + "loss": 0.285, + "step": 21470 + }, + { + "epoch": 0.27702367211127377, + "grad_norm": 1.1534782826569874, + "learning_rate": 9.233513885306508e-06, + "loss": 0.2845, + "step": 21480 + }, + { + "epoch": 0.2771526403012697, + "grad_norm": 1.2032711764953776, + "learning_rate": 9.237812741810679e-06, + "loss": 0.2755, + "step": 21490 + }, + { + "epoch": 0.27728160849126565, + "grad_norm": 1.2789586626318112, + "learning_rate": 9.242111598314849e-06, + "loss": 0.2952, + "step": 21500 + }, + { + "epoch": 0.27741057668126157, + "grad_norm": 1.314193972427406, + "learning_rate": 9.246410454819019e-06, + "loss": 0.2837, + "step": 21510 + }, + { + "epoch": 0.2775395448712575, + "grad_norm": 1.2223977755884248, + "learning_rate": 9.250709311323188e-06, + "loss": 0.2888, + "step": 21520 + }, + { + "epoch": 0.27766851306125345, + "grad_norm": 1.2208596094449302, + "learning_rate": 9.255008167827358e-06, + "loss": 0.2973, + "step": 21530 + }, + { + "epoch": 0.27779748125124937, + "grad_norm": 1.175275237188252, + "learning_rate": 9.259307024331528e-06, + "loss": 0.2882, + "step": 21540 + }, + { + "epoch": 0.27792644944124534, + "grad_norm": 1.156724297852319, + "learning_rate": 9.263605880835699e-06, + "loss": 0.3016, + "step": 21550 + }, + { + "epoch": 0.27805541763124125, + "grad_norm": 1.3356842352626797, + "learning_rate": 9.267904737339867e-06, + "loss": 0.2981, + "step": 21560 + }, + { + "epoch": 0.27818438582123717, + "grad_norm": 1.0882257532782433, + "learning_rate": 9.272203593844038e-06, + "loss": 0.2805, + "step": 21570 + }, + { + "epoch": 0.27831335401123314, + "grad_norm": 1.2179092037597767, + "learning_rate": 9.276502450348208e-06, + "loss": 0.279, + "step": 21580 + }, + { + "epoch": 0.27844232220122905, + "grad_norm": 1.1470665517166188, + "learning_rate": 9.280801306852378e-06, + "loss": 0.2964, + "step": 21590 + }, + { + "epoch": 0.278571290391225, + "grad_norm": 1.1522178772480791, + "learning_rate": 9.285100163356547e-06, + "loss": 0.2787, + "step": 21600 + }, + { + "epoch": 0.27870025858122094, + "grad_norm": 1.3442731636894611, + "learning_rate": 9.289399019860717e-06, + "loss": 0.2784, + "step": 21610 + }, + { + "epoch": 0.27882922677121685, + "grad_norm": 1.1377533951063967, + "learning_rate": 9.293697876364887e-06, + "loss": 0.2798, + "step": 21620 + }, + { + "epoch": 0.2789581949612128, + "grad_norm": 1.2104610177274986, + "learning_rate": 9.297996732869058e-06, + "loss": 0.2859, + "step": 21630 + }, + { + "epoch": 0.27908716315120874, + "grad_norm": 1.13104357407801, + "learning_rate": 9.302295589373226e-06, + "loss": 0.2674, + "step": 21640 + }, + { + "epoch": 0.2792161313412047, + "grad_norm": 1.3142843652051963, + "learning_rate": 9.306594445877397e-06, + "loss": 0.2659, + "step": 21650 + }, + { + "epoch": 0.2793450995312006, + "grad_norm": 1.2223541657691692, + "learning_rate": 9.310893302381567e-06, + "loss": 0.2725, + "step": 21660 + }, + { + "epoch": 0.27947406772119654, + "grad_norm": 1.141089506142478, + "learning_rate": 9.315192158885737e-06, + "loss": 0.2863, + "step": 21670 + }, + { + "epoch": 0.2796030359111925, + "grad_norm": 1.187024076806071, + "learning_rate": 9.319491015389906e-06, + "loss": 0.2945, + "step": 21680 + }, + { + "epoch": 0.2797320041011884, + "grad_norm": 1.211221281582722, + "learning_rate": 9.323789871894076e-06, + "loss": 0.2755, + "step": 21690 + }, + { + "epoch": 0.2798609722911844, + "grad_norm": 1.1846754071209848, + "learning_rate": 9.328088728398246e-06, + "loss": 0.2995, + "step": 21700 + }, + { + "epoch": 0.2799899404811803, + "grad_norm": 1.1475907065407485, + "learning_rate": 9.332387584902417e-06, + "loss": 0.2754, + "step": 21710 + }, + { + "epoch": 0.2801189086711763, + "grad_norm": 1.0596189251632486, + "learning_rate": 9.336686441406585e-06, + "loss": 0.2746, + "step": 21720 + }, + { + "epoch": 0.2802478768611722, + "grad_norm": 1.1948623317189693, + "learning_rate": 9.340985297910756e-06, + "loss": 0.2758, + "step": 21730 + }, + { + "epoch": 0.2803768450511681, + "grad_norm": 1.1029956256231974, + "learning_rate": 9.345284154414926e-06, + "loss": 0.2784, + "step": 21740 + }, + { + "epoch": 0.2805058132411641, + "grad_norm": 1.0833838033299559, + "learning_rate": 9.349583010919096e-06, + "loss": 0.2973, + "step": 21750 + }, + { + "epoch": 0.28063478143116, + "grad_norm": 1.180629245233867, + "learning_rate": 9.353881867423266e-06, + "loss": 0.2809, + "step": 21760 + }, + { + "epoch": 0.28076374962115597, + "grad_norm": 1.0979031485226363, + "learning_rate": 9.358180723927435e-06, + "loss": 0.2814, + "step": 21770 + }, + { + "epoch": 0.2808927178111519, + "grad_norm": 1.2623731398994473, + "learning_rate": 9.362479580431605e-06, + "loss": 0.2763, + "step": 21780 + }, + { + "epoch": 0.2810216860011478, + "grad_norm": 1.1972286048539262, + "learning_rate": 9.366778436935776e-06, + "loss": 0.2712, + "step": 21790 + }, + { + "epoch": 0.28115065419114377, + "grad_norm": 1.2747317503473066, + "learning_rate": 9.371077293439946e-06, + "loss": 0.2828, + "step": 21800 + }, + { + "epoch": 0.2812796223811397, + "grad_norm": 1.3548673203843096, + "learning_rate": 9.375376149944115e-06, + "loss": 0.2844, + "step": 21810 + }, + { + "epoch": 0.28140859057113565, + "grad_norm": 1.263568171518427, + "learning_rate": 9.379675006448285e-06, + "loss": 0.284, + "step": 21820 + }, + { + "epoch": 0.28153755876113157, + "grad_norm": 1.3130364845022229, + "learning_rate": 9.383973862952455e-06, + "loss": 0.2709, + "step": 21830 + }, + { + "epoch": 0.2816665269511275, + "grad_norm": 1.218575004670508, + "learning_rate": 9.388272719456626e-06, + "loss": 0.2867, + "step": 21840 + }, + { + "epoch": 0.28179549514112345, + "grad_norm": 1.2404616155500094, + "learning_rate": 9.392571575960794e-06, + "loss": 0.2905, + "step": 21850 + }, + { + "epoch": 0.28192446333111937, + "grad_norm": 1.110173990161952, + "learning_rate": 9.396870432464964e-06, + "loss": 0.2722, + "step": 21860 + }, + { + "epoch": 0.28205343152111534, + "grad_norm": 1.2683199162704026, + "learning_rate": 9.401169288969135e-06, + "loss": 0.2874, + "step": 21870 + }, + { + "epoch": 0.28218239971111125, + "grad_norm": 1.2327737392860185, + "learning_rate": 9.405468145473305e-06, + "loss": 0.2688, + "step": 21880 + }, + { + "epoch": 0.28231136790110717, + "grad_norm": 1.0706940046066795, + "learning_rate": 9.409767001977475e-06, + "loss": 0.2778, + "step": 21890 + }, + { + "epoch": 0.28244033609110314, + "grad_norm": 1.3209525282034944, + "learning_rate": 9.414065858481644e-06, + "loss": 0.2674, + "step": 21900 + }, + { + "epoch": 0.28256930428109905, + "grad_norm": 1.139257118662385, + "learning_rate": 9.418364714985814e-06, + "loss": 0.2928, + "step": 21910 + }, + { + "epoch": 0.282698272471095, + "grad_norm": 1.0486454016493014, + "learning_rate": 9.422663571489985e-06, + "loss": 0.2922, + "step": 21920 + }, + { + "epoch": 0.28282724066109094, + "grad_norm": 1.3247455356614048, + "learning_rate": 9.426962427994155e-06, + "loss": 0.2841, + "step": 21930 + }, + { + "epoch": 0.28295620885108685, + "grad_norm": 1.0856540681020783, + "learning_rate": 9.431261284498323e-06, + "loss": 0.2813, + "step": 21940 + }, + { + "epoch": 0.2830851770410828, + "grad_norm": 1.2357856584421811, + "learning_rate": 9.435560141002494e-06, + "loss": 0.2931, + "step": 21950 + }, + { + "epoch": 0.28321414523107874, + "grad_norm": 1.3081891717930922, + "learning_rate": 9.439858997506664e-06, + "loss": 0.2962, + "step": 21960 + }, + { + "epoch": 0.2833431134210747, + "grad_norm": 1.063129666278427, + "learning_rate": 9.444157854010834e-06, + "loss": 0.2661, + "step": 21970 + }, + { + "epoch": 0.2834720816110706, + "grad_norm": 1.1800072861179047, + "learning_rate": 9.448456710515005e-06, + "loss": 0.2728, + "step": 21980 + }, + { + "epoch": 0.28360104980106654, + "grad_norm": 1.193995149367534, + "learning_rate": 9.452755567019173e-06, + "loss": 0.2874, + "step": 21990 + }, + { + "epoch": 0.2837300179910625, + "grad_norm": 1.09856861169371, + "learning_rate": 9.457054423523344e-06, + "loss": 0.2846, + "step": 22000 + }, + { + "epoch": 0.2838589861810584, + "grad_norm": 1.232420931399586, + "learning_rate": 9.461353280027514e-06, + "loss": 0.2811, + "step": 22010 + }, + { + "epoch": 0.2839879543710544, + "grad_norm": 1.1567258336574615, + "learning_rate": 9.465652136531684e-06, + "loss": 0.262, + "step": 22020 + }, + { + "epoch": 0.2841169225610503, + "grad_norm": 1.044033404298731, + "learning_rate": 9.469950993035853e-06, + "loss": 0.2684, + "step": 22030 + }, + { + "epoch": 0.2842458907510463, + "grad_norm": 1.1172435343769962, + "learning_rate": 9.474249849540023e-06, + "loss": 0.2732, + "step": 22040 + }, + { + "epoch": 0.2843748589410422, + "grad_norm": 1.3354804216174583, + "learning_rate": 9.478548706044193e-06, + "loss": 0.2873, + "step": 22050 + }, + { + "epoch": 0.2845038271310381, + "grad_norm": 1.2018392234632953, + "learning_rate": 9.482847562548364e-06, + "loss": 0.2849, + "step": 22060 + }, + { + "epoch": 0.2846327953210341, + "grad_norm": 1.0821413457807356, + "learning_rate": 9.487146419052532e-06, + "loss": 0.2711, + "step": 22070 + }, + { + "epoch": 0.28476176351103, + "grad_norm": 1.2854825637793779, + "learning_rate": 9.491445275556703e-06, + "loss": 0.3015, + "step": 22080 + }, + { + "epoch": 0.28489073170102597, + "grad_norm": 1.1266733020196291, + "learning_rate": 9.495744132060873e-06, + "loss": 0.2591, + "step": 22090 + }, + { + "epoch": 0.2850196998910219, + "grad_norm": 1.2550272152112982, + "learning_rate": 9.500042988565043e-06, + "loss": 0.2774, + "step": 22100 + }, + { + "epoch": 0.2851486680810178, + "grad_norm": 1.1731931839382064, + "learning_rate": 9.504341845069213e-06, + "loss": 0.2788, + "step": 22110 + }, + { + "epoch": 0.28527763627101377, + "grad_norm": 1.2840124274433717, + "learning_rate": 9.508640701573382e-06, + "loss": 0.2845, + "step": 22120 + }, + { + "epoch": 0.2854066044610097, + "grad_norm": 1.1295343605142227, + "learning_rate": 9.512939558077552e-06, + "loss": 0.282, + "step": 22130 + }, + { + "epoch": 0.28553557265100565, + "grad_norm": 1.3475595031509453, + "learning_rate": 9.517238414581723e-06, + "loss": 0.2736, + "step": 22140 + }, + { + "epoch": 0.28566454084100157, + "grad_norm": 1.1576001121318074, + "learning_rate": 9.521537271085893e-06, + "loss": 0.2752, + "step": 22150 + }, + { + "epoch": 0.2857935090309975, + "grad_norm": 1.2327548369517694, + "learning_rate": 9.525836127590062e-06, + "loss": 0.2882, + "step": 22160 + }, + { + "epoch": 0.28592247722099345, + "grad_norm": 1.1806980741694246, + "learning_rate": 9.530134984094232e-06, + "loss": 0.2878, + "step": 22170 + }, + { + "epoch": 0.28605144541098937, + "grad_norm": 1.1145935835542917, + "learning_rate": 9.534433840598402e-06, + "loss": 0.2791, + "step": 22180 + }, + { + "epoch": 0.28618041360098534, + "grad_norm": 1.1638340783201668, + "learning_rate": 9.538732697102572e-06, + "loss": 0.2745, + "step": 22190 + }, + { + "epoch": 0.28630938179098125, + "grad_norm": 1.2575341257978008, + "learning_rate": 9.543031553606743e-06, + "loss": 0.2899, + "step": 22200 + }, + { + "epoch": 0.28643834998097717, + "grad_norm": 1.150088416562124, + "learning_rate": 9.547330410110911e-06, + "loss": 0.2878, + "step": 22210 + }, + { + "epoch": 0.28656731817097314, + "grad_norm": 1.1558092954098842, + "learning_rate": 9.551629266615082e-06, + "loss": 0.2823, + "step": 22220 + }, + { + "epoch": 0.28669628636096905, + "grad_norm": 1.1651035152269371, + "learning_rate": 9.555928123119252e-06, + "loss": 0.2792, + "step": 22230 + }, + { + "epoch": 0.286825254550965, + "grad_norm": 1.2481477002755126, + "learning_rate": 9.560226979623422e-06, + "loss": 0.2892, + "step": 22240 + }, + { + "epoch": 0.28695422274096094, + "grad_norm": 1.1304812355356062, + "learning_rate": 9.564525836127591e-06, + "loss": 0.2718, + "step": 22250 + }, + { + "epoch": 0.28708319093095686, + "grad_norm": 1.1993802129058662, + "learning_rate": 9.568824692631761e-06, + "loss": 0.2947, + "step": 22260 + }, + { + "epoch": 0.2872121591209528, + "grad_norm": 1.2062541554842374, + "learning_rate": 9.573123549135931e-06, + "loss": 0.2814, + "step": 22270 + }, + { + "epoch": 0.28734112731094874, + "grad_norm": 1.181550615850118, + "learning_rate": 9.577422405640102e-06, + "loss": 0.2757, + "step": 22280 + }, + { + "epoch": 0.2874700955009447, + "grad_norm": 1.1590602056844252, + "learning_rate": 9.58172126214427e-06, + "loss": 0.2774, + "step": 22290 + }, + { + "epoch": 0.2875990636909406, + "grad_norm": 1.2005919026081853, + "learning_rate": 9.58602011864844e-06, + "loss": 0.2842, + "step": 22300 + }, + { + "epoch": 0.28772803188093654, + "grad_norm": 1.19093037093619, + "learning_rate": 9.590318975152611e-06, + "loss": 0.2842, + "step": 22310 + }, + { + "epoch": 0.2878570000709325, + "grad_norm": 1.1817410106306439, + "learning_rate": 9.594617831656781e-06, + "loss": 0.282, + "step": 22320 + }, + { + "epoch": 0.2879859682609284, + "grad_norm": 1.2840484893856738, + "learning_rate": 9.59891668816095e-06, + "loss": 0.2917, + "step": 22330 + }, + { + "epoch": 0.2881149364509244, + "grad_norm": 1.2449830626881067, + "learning_rate": 9.60321554466512e-06, + "loss": 0.295, + "step": 22340 + }, + { + "epoch": 0.2882439046409203, + "grad_norm": 1.1609273926694317, + "learning_rate": 9.60751440116929e-06, + "loss": 0.2787, + "step": 22350 + }, + { + "epoch": 0.2883728728309162, + "grad_norm": 1.1822698254476696, + "learning_rate": 9.61181325767346e-06, + "loss": 0.2708, + "step": 22360 + }, + { + "epoch": 0.2885018410209122, + "grad_norm": 1.1253286887828886, + "learning_rate": 9.61611211417763e-06, + "loss": 0.2722, + "step": 22370 + }, + { + "epoch": 0.2886308092109081, + "grad_norm": 1.1186491986423357, + "learning_rate": 9.6204109706818e-06, + "loss": 0.2958, + "step": 22380 + }, + { + "epoch": 0.2887597774009041, + "grad_norm": 1.1647584244637905, + "learning_rate": 9.62470982718597e-06, + "loss": 0.271, + "step": 22390 + }, + { + "epoch": 0.2888887455909, + "grad_norm": 1.2645683823191636, + "learning_rate": 9.62900868369014e-06, + "loss": 0.2911, + "step": 22400 + }, + { + "epoch": 0.28901771378089597, + "grad_norm": 1.2018348539960506, + "learning_rate": 9.633307540194309e-06, + "loss": 0.2696, + "step": 22410 + }, + { + "epoch": 0.2891466819708919, + "grad_norm": 1.2150093452577548, + "learning_rate": 9.63760639669848e-06, + "loss": 0.2781, + "step": 22420 + }, + { + "epoch": 0.2892756501608878, + "grad_norm": 1.1242505937760865, + "learning_rate": 9.64190525320265e-06, + "loss": 0.272, + "step": 22430 + }, + { + "epoch": 0.28940461835088377, + "grad_norm": 1.1559249055343914, + "learning_rate": 9.64620410970682e-06, + "loss": 0.2706, + "step": 22440 + }, + { + "epoch": 0.2895335865408797, + "grad_norm": 1.164425790121461, + "learning_rate": 9.650502966210988e-06, + "loss": 0.2634, + "step": 22450 + }, + { + "epoch": 0.28966255473087565, + "grad_norm": 1.2069875187126045, + "learning_rate": 9.654801822715159e-06, + "loss": 0.2892, + "step": 22460 + }, + { + "epoch": 0.28979152292087157, + "grad_norm": 1.2416672807432032, + "learning_rate": 9.659100679219329e-06, + "loss": 0.2771, + "step": 22470 + }, + { + "epoch": 0.2899204911108675, + "grad_norm": 1.1613782118881804, + "learning_rate": 9.6633995357235e-06, + "loss": 0.2821, + "step": 22480 + }, + { + "epoch": 0.29004945930086345, + "grad_norm": 1.1702781496071437, + "learning_rate": 9.667698392227668e-06, + "loss": 0.2809, + "step": 22490 + }, + { + "epoch": 0.29017842749085937, + "grad_norm": 1.1348508636635686, + "learning_rate": 9.671997248731838e-06, + "loss": 0.269, + "step": 22500 + }, + { + "epoch": 0.29030739568085534, + "grad_norm": 1.2685620194434213, + "learning_rate": 9.676296105236009e-06, + "loss": 0.2786, + "step": 22510 + }, + { + "epoch": 0.29043636387085126, + "grad_norm": 1.092818222877725, + "learning_rate": 9.680594961740179e-06, + "loss": 0.2882, + "step": 22520 + }, + { + "epoch": 0.29056533206084717, + "grad_norm": 1.2274047776058503, + "learning_rate": 9.684893818244347e-06, + "loss": 0.2639, + "step": 22530 + }, + { + "epoch": 0.29069430025084314, + "grad_norm": 1.3273868665439823, + "learning_rate": 9.689192674748518e-06, + "loss": 0.2663, + "step": 22540 + }, + { + "epoch": 0.29082326844083906, + "grad_norm": 1.2612225373009214, + "learning_rate": 9.693491531252688e-06, + "loss": 0.2785, + "step": 22550 + }, + { + "epoch": 0.290952236630835, + "grad_norm": 1.2337708226786188, + "learning_rate": 9.697790387756858e-06, + "loss": 0.2764, + "step": 22560 + }, + { + "epoch": 0.29108120482083094, + "grad_norm": 1.2047417679478365, + "learning_rate": 9.702089244261027e-06, + "loss": 0.2895, + "step": 22570 + }, + { + "epoch": 0.29121017301082686, + "grad_norm": 1.2611850587175972, + "learning_rate": 9.706388100765197e-06, + "loss": 0.2752, + "step": 22580 + }, + { + "epoch": 0.2913391412008228, + "grad_norm": 1.2494041245065894, + "learning_rate": 9.710686957269368e-06, + "loss": 0.2686, + "step": 22590 + }, + { + "epoch": 0.29146810939081874, + "grad_norm": 1.143635622509657, + "learning_rate": 9.714985813773538e-06, + "loss": 0.2763, + "step": 22600 + }, + { + "epoch": 0.2915970775808147, + "grad_norm": 1.2284657204794953, + "learning_rate": 9.719284670277706e-06, + "loss": 0.2701, + "step": 22610 + }, + { + "epoch": 0.2917260457708106, + "grad_norm": 1.1077858999045296, + "learning_rate": 9.723583526781877e-06, + "loss": 0.2813, + "step": 22620 + }, + { + "epoch": 0.29185501396080654, + "grad_norm": 1.2110272100527848, + "learning_rate": 9.727882383286047e-06, + "loss": 0.2774, + "step": 22630 + }, + { + "epoch": 0.2919839821508025, + "grad_norm": 1.2166777644252944, + "learning_rate": 9.732181239790217e-06, + "loss": 0.2791, + "step": 22640 + }, + { + "epoch": 0.2921129503407984, + "grad_norm": 1.230257614375728, + "learning_rate": 9.736480096294386e-06, + "loss": 0.2821, + "step": 22650 + }, + { + "epoch": 0.2922419185307944, + "grad_norm": 1.3034414143419635, + "learning_rate": 9.740778952798556e-06, + "loss": 0.2755, + "step": 22660 + }, + { + "epoch": 0.2923708867207903, + "grad_norm": 1.2584940260480157, + "learning_rate": 9.745077809302727e-06, + "loss": 0.2796, + "step": 22670 + }, + { + "epoch": 0.29249985491078623, + "grad_norm": 1.198619107602622, + "learning_rate": 9.749376665806897e-06, + "loss": 0.2764, + "step": 22680 + }, + { + "epoch": 0.2926288231007822, + "grad_norm": 1.3264061091769426, + "learning_rate": 9.753675522311065e-06, + "loss": 0.2765, + "step": 22690 + }, + { + "epoch": 0.2927577912907781, + "grad_norm": 1.2491534285762413, + "learning_rate": 9.757974378815236e-06, + "loss": 0.2674, + "step": 22700 + }, + { + "epoch": 0.2928867594807741, + "grad_norm": 1.1547247396549576, + "learning_rate": 9.762273235319406e-06, + "loss": 0.2724, + "step": 22710 + }, + { + "epoch": 0.29301572767077, + "grad_norm": 1.1338607621424075, + "learning_rate": 9.766572091823576e-06, + "loss": 0.2735, + "step": 22720 + }, + { + "epoch": 0.29314469586076597, + "grad_norm": 1.1299865317173339, + "learning_rate": 9.770870948327745e-06, + "loss": 0.2653, + "step": 22730 + }, + { + "epoch": 0.2932736640507619, + "grad_norm": 1.1879099301400116, + "learning_rate": 9.775169804831915e-06, + "loss": 0.2692, + "step": 22740 + }, + { + "epoch": 0.2934026322407578, + "grad_norm": 1.1298729869719888, + "learning_rate": 9.779468661336086e-06, + "loss": 0.284, + "step": 22750 + }, + { + "epoch": 0.29353160043075377, + "grad_norm": 1.0832289494704708, + "learning_rate": 9.783767517840256e-06, + "loss": 0.2785, + "step": 22760 + }, + { + "epoch": 0.2936605686207497, + "grad_norm": 1.2176890707780077, + "learning_rate": 9.788066374344424e-06, + "loss": 0.2825, + "step": 22770 + }, + { + "epoch": 0.29378953681074566, + "grad_norm": 1.042960358998339, + "learning_rate": 9.792365230848595e-06, + "loss": 0.2752, + "step": 22780 + }, + { + "epoch": 0.29391850500074157, + "grad_norm": 1.107156033969775, + "learning_rate": 9.796664087352765e-06, + "loss": 0.2798, + "step": 22790 + }, + { + "epoch": 0.2940474731907375, + "grad_norm": 1.3166845916535972, + "learning_rate": 9.800962943856935e-06, + "loss": 0.2845, + "step": 22800 + }, + { + "epoch": 0.29417644138073346, + "grad_norm": 1.1645197506049303, + "learning_rate": 9.805261800361104e-06, + "loss": 0.2905, + "step": 22810 + }, + { + "epoch": 0.29430540957072937, + "grad_norm": 1.027341921659574, + "learning_rate": 9.809560656865274e-06, + "loss": 0.2877, + "step": 22820 + }, + { + "epoch": 0.29443437776072534, + "grad_norm": 1.0991903128724985, + "learning_rate": 9.813859513369445e-06, + "loss": 0.2792, + "step": 22830 + }, + { + "epoch": 0.29456334595072126, + "grad_norm": 1.3639593800002376, + "learning_rate": 9.818158369873615e-06, + "loss": 0.2635, + "step": 22840 + }, + { + "epoch": 0.29469231414071717, + "grad_norm": 1.0592833408152713, + "learning_rate": 9.822457226377784e-06, + "loss": 0.2725, + "step": 22850 + }, + { + "epoch": 0.29482128233071314, + "grad_norm": 1.1058465587257449, + "learning_rate": 9.826756082881954e-06, + "loss": 0.2872, + "step": 22860 + }, + { + "epoch": 0.29495025052070906, + "grad_norm": 1.2275242666232766, + "learning_rate": 9.831054939386124e-06, + "loss": 0.2953, + "step": 22870 + }, + { + "epoch": 0.295079218710705, + "grad_norm": 1.3667508139730988, + "learning_rate": 9.835353795890294e-06, + "loss": 0.2696, + "step": 22880 + }, + { + "epoch": 0.29520818690070094, + "grad_norm": 1.1720545023038187, + "learning_rate": 9.839652652394463e-06, + "loss": 0.2656, + "step": 22890 + }, + { + "epoch": 0.29533715509069686, + "grad_norm": 1.156181055143942, + "learning_rate": 9.843951508898633e-06, + "loss": 0.2779, + "step": 22900 + }, + { + "epoch": 0.2954661232806928, + "grad_norm": 1.2388626707867918, + "learning_rate": 9.848250365402804e-06, + "loss": 0.2689, + "step": 22910 + }, + { + "epoch": 0.29559509147068874, + "grad_norm": 1.149817871181753, + "learning_rate": 9.852549221906974e-06, + "loss": 0.2731, + "step": 22920 + }, + { + "epoch": 0.2957240596606847, + "grad_norm": 1.0770628479933961, + "learning_rate": 9.856848078411143e-06, + "loss": 0.288, + "step": 22930 + }, + { + "epoch": 0.2958530278506806, + "grad_norm": 1.1447500539549695, + "learning_rate": 9.861146934915313e-06, + "loss": 0.2775, + "step": 22940 + }, + { + "epoch": 0.29598199604067654, + "grad_norm": 1.0807888270621466, + "learning_rate": 9.865445791419483e-06, + "loss": 0.2796, + "step": 22950 + }, + { + "epoch": 0.2961109642306725, + "grad_norm": 1.1970724811095383, + "learning_rate": 9.869744647923653e-06, + "loss": 0.2855, + "step": 22960 + }, + { + "epoch": 0.29623993242066843, + "grad_norm": 1.1875827500294314, + "learning_rate": 9.874043504427822e-06, + "loss": 0.2862, + "step": 22970 + }, + { + "epoch": 0.2963689006106644, + "grad_norm": 1.2537973527374333, + "learning_rate": 9.878342360931992e-06, + "loss": 0.2662, + "step": 22980 + }, + { + "epoch": 0.2964978688006603, + "grad_norm": 1.3073063152966682, + "learning_rate": 9.882641217436163e-06, + "loss": 0.301, + "step": 22990 + }, + { + "epoch": 0.29662683699065623, + "grad_norm": 1.161931206593462, + "learning_rate": 9.886940073940333e-06, + "loss": 0.2739, + "step": 23000 + }, + { + "epoch": 0.2967558051806522, + "grad_norm": 1.272196417001723, + "learning_rate": 9.891238930444502e-06, + "loss": 0.2901, + "step": 23010 + }, + { + "epoch": 0.2968847733706481, + "grad_norm": 1.0698666921272075, + "learning_rate": 9.895537786948672e-06, + "loss": 0.2755, + "step": 23020 + }, + { + "epoch": 0.2970137415606441, + "grad_norm": 1.1642879816814284, + "learning_rate": 9.899836643452842e-06, + "loss": 0.2742, + "step": 23030 + }, + { + "epoch": 0.29714270975064, + "grad_norm": 1.2580254376920512, + "learning_rate": 9.904135499957012e-06, + "loss": 0.2936, + "step": 23040 + }, + { + "epoch": 0.2972716779406359, + "grad_norm": 1.3210664972385742, + "learning_rate": 9.908434356461181e-06, + "loss": 0.2703, + "step": 23050 + }, + { + "epoch": 0.2974006461306319, + "grad_norm": 1.0985485202234628, + "learning_rate": 9.912733212965351e-06, + "loss": 0.2742, + "step": 23060 + }, + { + "epoch": 0.2975296143206278, + "grad_norm": 1.1300384210398053, + "learning_rate": 9.917032069469522e-06, + "loss": 0.2786, + "step": 23070 + }, + { + "epoch": 0.29765858251062377, + "grad_norm": 1.122006947733289, + "learning_rate": 9.921330925973692e-06, + "loss": 0.2699, + "step": 23080 + }, + { + "epoch": 0.2977875507006197, + "grad_norm": 1.2045987501505424, + "learning_rate": 9.92562978247786e-06, + "loss": 0.2795, + "step": 23090 + }, + { + "epoch": 0.29791651889061566, + "grad_norm": 1.1869083540274046, + "learning_rate": 9.929928638982031e-06, + "loss": 0.2846, + "step": 23100 + }, + { + "epoch": 0.29804548708061157, + "grad_norm": 1.0374717414885586, + "learning_rate": 9.934227495486201e-06, + "loss": 0.2838, + "step": 23110 + }, + { + "epoch": 0.2981744552706075, + "grad_norm": 1.1701220548621551, + "learning_rate": 9.938526351990371e-06, + "loss": 0.2738, + "step": 23120 + }, + { + "epoch": 0.29830342346060346, + "grad_norm": 1.0505386860119001, + "learning_rate": 9.942825208494542e-06, + "loss": 0.2803, + "step": 23130 + }, + { + "epoch": 0.29843239165059937, + "grad_norm": 1.058258125620788, + "learning_rate": 9.94712406499871e-06, + "loss": 0.2666, + "step": 23140 + }, + { + "epoch": 0.29856135984059534, + "grad_norm": 1.1443877452308955, + "learning_rate": 9.95142292150288e-06, + "loss": 0.2886, + "step": 23150 + }, + { + "epoch": 0.29869032803059126, + "grad_norm": 1.0968934615561674, + "learning_rate": 9.955721778007051e-06, + "loss": 0.2719, + "step": 23160 + }, + { + "epoch": 0.29881929622058717, + "grad_norm": 1.0907307632516856, + "learning_rate": 9.960020634511221e-06, + "loss": 0.2763, + "step": 23170 + }, + { + "epoch": 0.29894826441058314, + "grad_norm": 1.120518362187853, + "learning_rate": 9.96431949101539e-06, + "loss": 0.281, + "step": 23180 + }, + { + "epoch": 0.29907723260057906, + "grad_norm": 1.2930827092590833, + "learning_rate": 9.96861834751956e-06, + "loss": 0.2853, + "step": 23190 + }, + { + "epoch": 0.299206200790575, + "grad_norm": 1.081189935440256, + "learning_rate": 9.97291720402373e-06, + "loss": 0.2754, + "step": 23200 + }, + { + "epoch": 0.29933516898057094, + "grad_norm": 1.2139783159206845, + "learning_rate": 9.9772160605279e-06, + "loss": 0.2633, + "step": 23210 + }, + { + "epoch": 0.29946413717056686, + "grad_norm": 1.2384282508134852, + "learning_rate": 9.98151491703207e-06, + "loss": 0.2793, + "step": 23220 + }, + { + "epoch": 0.29959310536056283, + "grad_norm": 1.2211761698314172, + "learning_rate": 9.98581377353624e-06, + "loss": 0.2716, + "step": 23230 + }, + { + "epoch": 0.29972207355055874, + "grad_norm": 1.2543921141287973, + "learning_rate": 9.99011263004041e-06, + "loss": 0.2703, + "step": 23240 + }, + { + "epoch": 0.2998510417405547, + "grad_norm": 1.150359781923331, + "learning_rate": 9.99441148654458e-06, + "loss": 0.2821, + "step": 23250 + }, + { + "epoch": 0.29998000993055063, + "grad_norm": 1.2255040782151485, + "learning_rate": 9.99871034304875e-06, + "loss": 0.2746, + "step": 23260 + }, + { + "epoch": 0.30010897812054654, + "grad_norm": 1.0435899089759002, + "learning_rate": 9.999999972415245e-06, + "loss": 0.2573, + "step": 23270 + }, + { + "epoch": 0.3002379463105425, + "grad_norm": 1.1584545729899447, + "learning_rate": 9.999999837306232e-06, + "loss": 0.288, + "step": 23280 + }, + { + "epoch": 0.30036691450053843, + "grad_norm": 1.1639247930652108, + "learning_rate": 9.99999958960638e-06, + "loss": 0.2821, + "step": 23290 + }, + { + "epoch": 0.3004958826905344, + "grad_norm": 1.2196306293447634, + "learning_rate": 9.999999229315693e-06, + "loss": 0.2761, + "step": 23300 + }, + { + "epoch": 0.3006248508805303, + "grad_norm": 1.2305947408744555, + "learning_rate": 9.99999875643418e-06, + "loss": 0.2809, + "step": 23310 + }, + { + "epoch": 0.30075381907052623, + "grad_norm": 1.2345177012032913, + "learning_rate": 9.999998170961852e-06, + "loss": 0.2935, + "step": 23320 + }, + { + "epoch": 0.3008827872605222, + "grad_norm": 1.1527022578235058, + "learning_rate": 9.99999747289872e-06, + "loss": 0.2794, + "step": 23330 + }, + { + "epoch": 0.3010117554505181, + "grad_norm": 1.1987902840716815, + "learning_rate": 9.999996662244802e-06, + "loss": 0.2928, + "step": 23340 + }, + { + "epoch": 0.3011407236405141, + "grad_norm": 1.2138580744991225, + "learning_rate": 9.999995739000116e-06, + "loss": 0.2807, + "step": 23350 + }, + { + "epoch": 0.30126969183051, + "grad_norm": 1.1850244045468046, + "learning_rate": 9.999994703164682e-06, + "loss": 0.278, + "step": 23360 + }, + { + "epoch": 0.3013986600205059, + "grad_norm": 1.0648483187109195, + "learning_rate": 9.999993554738521e-06, + "loss": 0.2711, + "step": 23370 + }, + { + "epoch": 0.3015276282105019, + "grad_norm": 1.1115652107234713, + "learning_rate": 9.999992293721663e-06, + "loss": 0.2771, + "step": 23380 + }, + { + "epoch": 0.3016565964004978, + "grad_norm": 1.1322695329447907, + "learning_rate": 9.999990920114135e-06, + "loss": 0.2829, + "step": 23390 + }, + { + "epoch": 0.30178556459049377, + "grad_norm": 1.1694002098693241, + "learning_rate": 9.999989433915969e-06, + "loss": 0.281, + "step": 23400 + }, + { + "epoch": 0.3019145327804897, + "grad_norm": 1.1250156382470344, + "learning_rate": 9.999987835127196e-06, + "loss": 0.2792, + "step": 23410 + }, + { + "epoch": 0.30204350097048566, + "grad_norm": 1.2077625870776936, + "learning_rate": 9.999986123747854e-06, + "loss": 0.2667, + "step": 23420 + }, + { + "epoch": 0.30217246916048157, + "grad_norm": 1.0833185675236288, + "learning_rate": 9.999984299777978e-06, + "loss": 0.262, + "step": 23430 + }, + { + "epoch": 0.3023014373504775, + "grad_norm": 1.2188915632448398, + "learning_rate": 9.999982363217614e-06, + "loss": 0.2782, + "step": 23440 + }, + { + "epoch": 0.30243040554047346, + "grad_norm": 1.0653747957942916, + "learning_rate": 9.999980314066802e-06, + "loss": 0.2801, + "step": 23450 + }, + { + "epoch": 0.30255937373046937, + "grad_norm": 1.2052271688875718, + "learning_rate": 9.999978152325591e-06, + "loss": 0.2836, + "step": 23460 + }, + { + "epoch": 0.30268834192046534, + "grad_norm": 1.1403686216073647, + "learning_rate": 9.999975877994028e-06, + "loss": 0.2777, + "step": 23470 + }, + { + "epoch": 0.30281731011046126, + "grad_norm": 1.0207226843394555, + "learning_rate": 9.999973491072164e-06, + "loss": 0.2636, + "step": 23480 + }, + { + "epoch": 0.3029462783004572, + "grad_norm": 1.0401312856194493, + "learning_rate": 9.999970991560053e-06, + "loss": 0.2869, + "step": 23490 + }, + { + "epoch": 0.30307524649045314, + "grad_norm": 1.1243214000723498, + "learning_rate": 9.999968379457752e-06, + "loss": 0.2835, + "step": 23500 + }, + { + "epoch": 0.30320421468044906, + "grad_norm": 1.0183776132775324, + "learning_rate": 9.999965654765318e-06, + "loss": 0.2574, + "step": 23510 + }, + { + "epoch": 0.30333318287044503, + "grad_norm": 1.2584129315101846, + "learning_rate": 9.999962817482816e-06, + "loss": 0.264, + "step": 23520 + }, + { + "epoch": 0.30346215106044094, + "grad_norm": 1.1913566451613478, + "learning_rate": 9.999959867610305e-06, + "loss": 0.2749, + "step": 23530 + }, + { + "epoch": 0.30359111925043686, + "grad_norm": 1.2358338203623336, + "learning_rate": 9.999956805147855e-06, + "loss": 0.275, + "step": 23540 + }, + { + "epoch": 0.30372008744043283, + "grad_norm": 1.3152051688553368, + "learning_rate": 9.999953630095533e-06, + "loss": 0.2803, + "step": 23550 + }, + { + "epoch": 0.30384905563042874, + "grad_norm": 1.1874319757538565, + "learning_rate": 9.999950342453412e-06, + "loss": 0.2853, + "step": 23560 + }, + { + "epoch": 0.3039780238204247, + "grad_norm": 1.251667102893108, + "learning_rate": 9.999946942221566e-06, + "loss": 0.2749, + "step": 23570 + }, + { + "epoch": 0.30410699201042063, + "grad_norm": 1.161921202016273, + "learning_rate": 9.999943429400071e-06, + "loss": 0.2787, + "step": 23580 + }, + { + "epoch": 0.30423596020041654, + "grad_norm": 1.1374399183705008, + "learning_rate": 9.999939803989007e-06, + "loss": 0.2586, + "step": 23590 + }, + { + "epoch": 0.3043649283904125, + "grad_norm": 1.0380093677457525, + "learning_rate": 9.999936065988453e-06, + "loss": 0.2724, + "step": 23600 + }, + { + "epoch": 0.30449389658040843, + "grad_norm": 1.1329880897124016, + "learning_rate": 9.999932215398496e-06, + "loss": 0.2886, + "step": 23610 + }, + { + "epoch": 0.3046228647704044, + "grad_norm": 1.2223150867239883, + "learning_rate": 9.999928252219219e-06, + "loss": 0.2674, + "step": 23620 + }, + { + "epoch": 0.3047518329604003, + "grad_norm": 1.2076706497422938, + "learning_rate": 9.999924176450717e-06, + "loss": 0.2868, + "step": 23630 + }, + { + "epoch": 0.30488080115039623, + "grad_norm": 1.1494187894772705, + "learning_rate": 9.999919988093077e-06, + "loss": 0.2789, + "step": 23640 + }, + { + "epoch": 0.3050097693403922, + "grad_norm": 1.2737018309487693, + "learning_rate": 9.999915687146395e-06, + "loss": 0.2809, + "step": 23650 + }, + { + "epoch": 0.3051387375303881, + "grad_norm": 1.1526012234185814, + "learning_rate": 9.99991127361077e-06, + "loss": 0.2979, + "step": 23660 + }, + { + "epoch": 0.3052677057203841, + "grad_norm": 1.2291741608351698, + "learning_rate": 9.999906747486297e-06, + "loss": 0.2889, + "step": 23670 + }, + { + "epoch": 0.30539667391038, + "grad_norm": 1.1524442961936803, + "learning_rate": 9.99990210877308e-06, + "loss": 0.2736, + "step": 23680 + }, + { + "epoch": 0.3055256421003759, + "grad_norm": 1.3813155382701154, + "learning_rate": 9.999897357471225e-06, + "loss": 0.2792, + "step": 23690 + }, + { + "epoch": 0.3056546102903719, + "grad_norm": 1.1953021350810658, + "learning_rate": 9.999892493580835e-06, + "loss": 0.2756, + "step": 23700 + }, + { + "epoch": 0.3057835784803678, + "grad_norm": 1.1566572286151524, + "learning_rate": 9.999887517102025e-06, + "loss": 0.2708, + "step": 23710 + }, + { + "epoch": 0.30591254667036377, + "grad_norm": 1.0808400180732254, + "learning_rate": 9.999882428034903e-06, + "loss": 0.273, + "step": 23720 + }, + { + "epoch": 0.3060415148603597, + "grad_norm": 1.1011281550876577, + "learning_rate": 9.999877226379583e-06, + "loss": 0.2712, + "step": 23730 + }, + { + "epoch": 0.30617048305035566, + "grad_norm": 1.3034489329101753, + "learning_rate": 9.999871912136187e-06, + "loss": 0.2828, + "step": 23740 + }, + { + "epoch": 0.3062994512403516, + "grad_norm": 1.220359881725738, + "learning_rate": 9.99986648530483e-06, + "loss": 0.2922, + "step": 23750 + }, + { + "epoch": 0.3064284194303475, + "grad_norm": 1.2023327886926736, + "learning_rate": 9.999860945885635e-06, + "loss": 0.2945, + "step": 23760 + }, + { + "epoch": 0.30655738762034346, + "grad_norm": 1.072957158651106, + "learning_rate": 9.999855293878729e-06, + "loss": 0.2631, + "step": 23770 + }, + { + "epoch": 0.3066863558103394, + "grad_norm": 1.1726202942527275, + "learning_rate": 9.999849529284237e-06, + "loss": 0.2876, + "step": 23780 + }, + { + "epoch": 0.30681532400033534, + "grad_norm": 1.1720249037314747, + "learning_rate": 9.99984365210229e-06, + "loss": 0.2789, + "step": 23790 + }, + { + "epoch": 0.30694429219033126, + "grad_norm": 1.0778055697367743, + "learning_rate": 9.999837662333019e-06, + "loss": 0.262, + "step": 23800 + }, + { + "epoch": 0.3070732603803272, + "grad_norm": 1.2556467943056895, + "learning_rate": 9.99983155997656e-06, + "loss": 0.2869, + "step": 23810 + }, + { + "epoch": 0.30720222857032314, + "grad_norm": 1.1904383238583216, + "learning_rate": 9.99982534503305e-06, + "loss": 0.2851, + "step": 23820 + }, + { + "epoch": 0.30733119676031906, + "grad_norm": 1.1714129391212682, + "learning_rate": 9.99981901750263e-06, + "loss": 0.2935, + "step": 23830 + }, + { + "epoch": 0.30746016495031503, + "grad_norm": 1.001150463293014, + "learning_rate": 9.999812577385441e-06, + "loss": 0.2785, + "step": 23840 + }, + { + "epoch": 0.30758913314031094, + "grad_norm": 1.136316838159614, + "learning_rate": 9.999806024681629e-06, + "loss": 0.2731, + "step": 23850 + }, + { + "epoch": 0.30771810133030686, + "grad_norm": 1.1401154685933492, + "learning_rate": 9.99979935939134e-06, + "loss": 0.2569, + "step": 23860 + }, + { + "epoch": 0.30784706952030283, + "grad_norm": 1.1589335313733484, + "learning_rate": 9.999792581514727e-06, + "loss": 0.28, + "step": 23870 + }, + { + "epoch": 0.30797603771029874, + "grad_norm": 1.1050329747323056, + "learning_rate": 9.999785691051941e-06, + "loss": 0.275, + "step": 23880 + }, + { + "epoch": 0.3081050059002947, + "grad_norm": 1.0524183686952342, + "learning_rate": 9.999778688003137e-06, + "loss": 0.2812, + "step": 23890 + }, + { + "epoch": 0.30823397409029063, + "grad_norm": 1.2240011210812725, + "learning_rate": 9.999771572368471e-06, + "loss": 0.2869, + "step": 23900 + }, + { + "epoch": 0.30836294228028654, + "grad_norm": 1.2108371430152685, + "learning_rate": 9.999764344148106e-06, + "loss": 0.2853, + "step": 23910 + }, + { + "epoch": 0.3084919104702825, + "grad_norm": 1.194523286654479, + "learning_rate": 9.999757003342203e-06, + "loss": 0.2799, + "step": 23920 + }, + { + "epoch": 0.30862087866027843, + "grad_norm": 1.213400630581267, + "learning_rate": 9.99974954995093e-06, + "loss": 0.2932, + "step": 23930 + }, + { + "epoch": 0.3087498468502744, + "grad_norm": 1.0689798854743657, + "learning_rate": 9.999741983974452e-06, + "loss": 0.2787, + "step": 23940 + }, + { + "epoch": 0.3088788150402703, + "grad_norm": 1.026890609465518, + "learning_rate": 9.99973430541294e-06, + "loss": 0.2703, + "step": 23950 + }, + { + "epoch": 0.30900778323026623, + "grad_norm": 1.1394104999813885, + "learning_rate": 9.999726514266567e-06, + "loss": 0.2658, + "step": 23960 + }, + { + "epoch": 0.3091367514202622, + "grad_norm": 1.1900971535635034, + "learning_rate": 9.99971861053551e-06, + "loss": 0.2828, + "step": 23970 + }, + { + "epoch": 0.3092657196102581, + "grad_norm": 1.263369074892342, + "learning_rate": 9.999710594219944e-06, + "loss": 0.2789, + "step": 23980 + }, + { + "epoch": 0.3093946878002541, + "grad_norm": 1.0459818696232668, + "learning_rate": 9.99970246532005e-06, + "loss": 0.2858, + "step": 23990 + }, + { + "epoch": 0.30952365599025, + "grad_norm": 1.2298150113336817, + "learning_rate": 9.999694223836014e-06, + "loss": 0.2697, + "step": 24000 + }, + { + "epoch": 0.3096526241802459, + "grad_norm": 1.149307223402714, + "learning_rate": 9.99968586976802e-06, + "loss": 0.2712, + "step": 24010 + }, + { + "epoch": 0.3097815923702419, + "grad_norm": 1.2150842891673104, + "learning_rate": 9.999677403116257e-06, + "loss": 0.2735, + "step": 24020 + }, + { + "epoch": 0.3099105605602378, + "grad_norm": 1.111807133588439, + "learning_rate": 9.999668823880912e-06, + "loss": 0.2874, + "step": 24030 + }, + { + "epoch": 0.3100395287502338, + "grad_norm": 1.0175199971619346, + "learning_rate": 9.999660132062181e-06, + "loss": 0.2891, + "step": 24040 + }, + { + "epoch": 0.3101684969402297, + "grad_norm": 1.117754438297475, + "learning_rate": 9.99965132766026e-06, + "loss": 0.2847, + "step": 24050 + }, + { + "epoch": 0.3102974651302256, + "grad_norm": 1.1069737685244978, + "learning_rate": 9.999642410675347e-06, + "loss": 0.2885, + "step": 24060 + }, + { + "epoch": 0.3104264333202216, + "grad_norm": 1.1390234400911605, + "learning_rate": 9.999633381107643e-06, + "loss": 0.2678, + "step": 24070 + }, + { + "epoch": 0.3105554015102175, + "grad_norm": 1.152571410195124, + "learning_rate": 9.99962423895735e-06, + "loss": 0.2858, + "step": 24080 + }, + { + "epoch": 0.31068436970021346, + "grad_norm": 0.9687270555586781, + "learning_rate": 9.999614984224675e-06, + "loss": 0.2739, + "step": 24090 + }, + { + "epoch": 0.3108133378902094, + "grad_norm": 1.0977035848881387, + "learning_rate": 9.999605616909825e-06, + "loss": 0.2651, + "step": 24100 + }, + { + "epoch": 0.31094230608020534, + "grad_norm": 1.2876444274654024, + "learning_rate": 9.999596137013013e-06, + "loss": 0.2832, + "step": 24110 + }, + { + "epoch": 0.31107127427020126, + "grad_norm": 1.0811569944067856, + "learning_rate": 9.999586544534453e-06, + "loss": 0.2626, + "step": 24120 + }, + { + "epoch": 0.3112002424601972, + "grad_norm": 1.1351949174844647, + "learning_rate": 9.999576839474357e-06, + "loss": 0.2779, + "step": 24130 + }, + { + "epoch": 0.31132921065019314, + "grad_norm": 1.2140380309031018, + "learning_rate": 9.999567021832945e-06, + "loss": 0.2639, + "step": 24140 + }, + { + "epoch": 0.31145817884018906, + "grad_norm": 1.1712477033016073, + "learning_rate": 9.99955709161044e-06, + "loss": 0.2713, + "step": 24150 + }, + { + "epoch": 0.31158714703018503, + "grad_norm": 1.1508901485495302, + "learning_rate": 9.999547048807068e-06, + "loss": 0.2663, + "step": 24160 + }, + { + "epoch": 0.31171611522018094, + "grad_norm": 1.2513236295071128, + "learning_rate": 9.99953689342305e-06, + "loss": 0.2761, + "step": 24170 + }, + { + "epoch": 0.31184508341017686, + "grad_norm": 1.0843224812943448, + "learning_rate": 9.999526625458616e-06, + "loss": 0.2876, + "step": 24180 + }, + { + "epoch": 0.31197405160017283, + "grad_norm": 1.09233723373721, + "learning_rate": 9.999516244913997e-06, + "loss": 0.2907, + "step": 24190 + }, + { + "epoch": 0.31210301979016875, + "grad_norm": 1.0929399059451035, + "learning_rate": 9.999505751789429e-06, + "loss": 0.274, + "step": 24200 + }, + { + "epoch": 0.3122319879801647, + "grad_norm": 1.0839273421038733, + "learning_rate": 9.999495146085146e-06, + "loss": 0.2681, + "step": 24210 + }, + { + "epoch": 0.31236095617016063, + "grad_norm": 1.20864991108686, + "learning_rate": 9.999484427801387e-06, + "loss": 0.2602, + "step": 24220 + }, + { + "epoch": 0.31248992436015655, + "grad_norm": 1.1374672254329006, + "learning_rate": 9.999473596938395e-06, + "loss": 0.2799, + "step": 24230 + }, + { + "epoch": 0.3126188925501525, + "grad_norm": 1.1800856281269172, + "learning_rate": 9.999462653496413e-06, + "loss": 0.2621, + "step": 24240 + }, + { + "epoch": 0.31274786074014843, + "grad_norm": 1.1217748019994138, + "learning_rate": 9.999451597475686e-06, + "loss": 0.2887, + "step": 24250 + }, + { + "epoch": 0.3128768289301444, + "grad_norm": 1.1572541600433333, + "learning_rate": 9.999440428876467e-06, + "loss": 0.2739, + "step": 24260 + }, + { + "epoch": 0.3130057971201403, + "grad_norm": 1.2053322397294464, + "learning_rate": 9.999429147699e-06, + "loss": 0.278, + "step": 24270 + }, + { + "epoch": 0.31313476531013623, + "grad_norm": 1.1704169656227303, + "learning_rate": 9.999417753943548e-06, + "loss": 0.2824, + "step": 24280 + }, + { + "epoch": 0.3132637335001322, + "grad_norm": 1.12530276772706, + "learning_rate": 9.99940624761036e-06, + "loss": 0.2836, + "step": 24290 + }, + { + "epoch": 0.3133927016901281, + "grad_norm": 1.2820508400873327, + "learning_rate": 9.999394628699702e-06, + "loss": 0.2805, + "step": 24300 + }, + { + "epoch": 0.3135216698801241, + "grad_norm": 1.113967785587326, + "learning_rate": 9.999382897211832e-06, + "loss": 0.2779, + "step": 24310 + }, + { + "epoch": 0.31365063807012, + "grad_norm": 1.1782257660309223, + "learning_rate": 9.999371053147011e-06, + "loss": 0.2759, + "step": 24320 + }, + { + "epoch": 0.3137796062601159, + "grad_norm": 1.1493105195994564, + "learning_rate": 9.999359096505512e-06, + "loss": 0.2729, + "step": 24330 + }, + { + "epoch": 0.3139085744501119, + "grad_norm": 1.1325986678309226, + "learning_rate": 9.9993470272876e-06, + "loss": 0.2702, + "step": 24340 + }, + { + "epoch": 0.3140375426401078, + "grad_norm": 1.2166994868441636, + "learning_rate": 9.999334845493547e-06, + "loss": 0.2784, + "step": 24350 + }, + { + "epoch": 0.3141665108301038, + "grad_norm": 1.066641130383277, + "learning_rate": 9.99932255112363e-06, + "loss": 0.281, + "step": 24360 + }, + { + "epoch": 0.3142954790200997, + "grad_norm": 1.1568532172491939, + "learning_rate": 9.999310144178123e-06, + "loss": 0.2859, + "step": 24370 + }, + { + "epoch": 0.3144244472100956, + "grad_norm": 1.153967645928006, + "learning_rate": 9.999297624657308e-06, + "loss": 0.2624, + "step": 24380 + }, + { + "epoch": 0.3145534154000916, + "grad_norm": 1.1255533853040345, + "learning_rate": 9.999284992561463e-06, + "loss": 0.2865, + "step": 24390 + }, + { + "epoch": 0.3146823835900875, + "grad_norm": 1.0015341116634533, + "learning_rate": 9.999272247890876e-06, + "loss": 0.2769, + "step": 24400 + }, + { + "epoch": 0.31481135178008346, + "grad_norm": 1.3717043179374822, + "learning_rate": 9.999259390645834e-06, + "loss": 0.279, + "step": 24410 + }, + { + "epoch": 0.3149403199700794, + "grad_norm": 0.9438602573102757, + "learning_rate": 9.999246420826624e-06, + "loss": 0.2709, + "step": 24420 + }, + { + "epoch": 0.31506928816007534, + "grad_norm": 1.1315258194239566, + "learning_rate": 9.99923333843354e-06, + "loss": 0.2766, + "step": 24430 + }, + { + "epoch": 0.31519825635007126, + "grad_norm": 1.0625205054728075, + "learning_rate": 9.999220143466875e-06, + "loss": 0.2696, + "step": 24440 + }, + { + "epoch": 0.3153272245400672, + "grad_norm": 1.2203468462588478, + "learning_rate": 9.99920683592693e-06, + "loss": 0.2745, + "step": 24450 + }, + { + "epoch": 0.31545619273006315, + "grad_norm": 1.0382811280839985, + "learning_rate": 9.999193415814e-06, + "loss": 0.2661, + "step": 24460 + }, + { + "epoch": 0.31558516092005906, + "grad_norm": 1.1116521605519452, + "learning_rate": 9.999179883128389e-06, + "loss": 0.2856, + "step": 24470 + }, + { + "epoch": 0.31571412911005503, + "grad_norm": 1.1258106435478192, + "learning_rate": 9.999166237870402e-06, + "loss": 0.2729, + "step": 24480 + }, + { + "epoch": 0.31584309730005095, + "grad_norm": 1.042619966116267, + "learning_rate": 9.999152480040344e-06, + "loss": 0.2803, + "step": 24490 + }, + { + "epoch": 0.31597206549004686, + "grad_norm": 1.3558782138736252, + "learning_rate": 9.99913860963853e-06, + "loss": 0.2803, + "step": 24500 + }, + { + "epoch": 0.31610103368004283, + "grad_norm": 1.0730482617811734, + "learning_rate": 9.999124626665267e-06, + "loss": 0.2875, + "step": 24510 + }, + { + "epoch": 0.31623000187003875, + "grad_norm": 1.1172266196029255, + "learning_rate": 9.999110531120872e-06, + "loss": 0.2812, + "step": 24520 + }, + { + "epoch": 0.3163589700600347, + "grad_norm": 1.202353990976486, + "learning_rate": 9.999096323005663e-06, + "loss": 0.2786, + "step": 24530 + }, + { + "epoch": 0.31648793825003063, + "grad_norm": 1.2390411721724852, + "learning_rate": 9.99908200231996e-06, + "loss": 0.2792, + "step": 24540 + }, + { + "epoch": 0.31661690644002655, + "grad_norm": 1.293352756302388, + "learning_rate": 9.999067569064085e-06, + "loss": 0.264, + "step": 24550 + }, + { + "epoch": 0.3167458746300225, + "grad_norm": 1.0831937340664384, + "learning_rate": 9.999053023238363e-06, + "loss": 0.2632, + "step": 24560 + }, + { + "epoch": 0.31687484282001843, + "grad_norm": 1.1768898093079943, + "learning_rate": 9.99903836484312e-06, + "loss": 0.2678, + "step": 24570 + }, + { + "epoch": 0.3170038110100144, + "grad_norm": 1.165981192074534, + "learning_rate": 9.999023593878689e-06, + "loss": 0.2728, + "step": 24580 + }, + { + "epoch": 0.3171327792000103, + "grad_norm": 1.2245575799748512, + "learning_rate": 9.9990087103454e-06, + "loss": 0.2866, + "step": 24590 + }, + { + "epoch": 0.31726174739000623, + "grad_norm": 1.0909877713116958, + "learning_rate": 9.99899371424359e-06, + "loss": 0.2889, + "step": 24600 + }, + { + "epoch": 0.3173907155800022, + "grad_norm": 1.1406888431365638, + "learning_rate": 9.998978605573595e-06, + "loss": 0.2771, + "step": 24610 + }, + { + "epoch": 0.3175196837699981, + "grad_norm": 1.0900928170753725, + "learning_rate": 9.998963384335758e-06, + "loss": 0.2889, + "step": 24620 + }, + { + "epoch": 0.3176486519599941, + "grad_norm": 1.1596834670428846, + "learning_rate": 9.998948050530418e-06, + "loss": 0.256, + "step": 24630 + }, + { + "epoch": 0.31777762014999, + "grad_norm": 1.067204668874767, + "learning_rate": 9.998932604157923e-06, + "loss": 0.2655, + "step": 24640 + }, + { + "epoch": 0.3179065883399859, + "grad_norm": 1.106757865292681, + "learning_rate": 9.998917045218618e-06, + "loss": 0.2681, + "step": 24650 + }, + { + "epoch": 0.3180355565299819, + "grad_norm": 1.0961985030283468, + "learning_rate": 9.998901373712859e-06, + "loss": 0.2706, + "step": 24660 + }, + { + "epoch": 0.3181645247199778, + "grad_norm": 1.4984904190790527, + "learning_rate": 9.998885589640992e-06, + "loss": 0.2699, + "step": 24670 + }, + { + "epoch": 0.3182934929099738, + "grad_norm": 1.041919088725257, + "learning_rate": 9.998869693003379e-06, + "loss": 0.2545, + "step": 24680 + }, + { + "epoch": 0.3184224610999697, + "grad_norm": 1.0683075421879515, + "learning_rate": 9.998853683800372e-06, + "loss": 0.2817, + "step": 24690 + }, + { + "epoch": 0.3185514292899656, + "grad_norm": 1.203226988556553, + "learning_rate": 9.998837562032335e-06, + "loss": 0.2744, + "step": 24700 + }, + { + "epoch": 0.3186803974799616, + "grad_norm": 1.078868188195533, + "learning_rate": 9.998821327699631e-06, + "loss": 0.2664, + "step": 24710 + }, + { + "epoch": 0.3188093656699575, + "grad_norm": 1.1749113722904585, + "learning_rate": 9.998804980802624e-06, + "loss": 0.2626, + "step": 24720 + }, + { + "epoch": 0.31893833385995346, + "grad_norm": 1.2340034188676354, + "learning_rate": 9.998788521341683e-06, + "loss": 0.2615, + "step": 24730 + }, + { + "epoch": 0.3190673020499494, + "grad_norm": 1.1802924845709335, + "learning_rate": 9.998771949317179e-06, + "loss": 0.2789, + "step": 24740 + }, + { + "epoch": 0.3191962702399453, + "grad_norm": 1.0787513848014132, + "learning_rate": 9.998755264729482e-06, + "loss": 0.2742, + "step": 24750 + }, + { + "epoch": 0.31932523842994126, + "grad_norm": 1.290232404007386, + "learning_rate": 9.998738467578974e-06, + "loss": 0.2776, + "step": 24760 + }, + { + "epoch": 0.3194542066199372, + "grad_norm": 1.124579250589611, + "learning_rate": 9.998721557866028e-06, + "loss": 0.2732, + "step": 24770 + }, + { + "epoch": 0.31958317480993315, + "grad_norm": 1.1021556896601932, + "learning_rate": 9.998704535591025e-06, + "loss": 0.2687, + "step": 24780 + }, + { + "epoch": 0.31971214299992906, + "grad_norm": 1.2210609799075136, + "learning_rate": 9.998687400754353e-06, + "loss": 0.2711, + "step": 24790 + }, + { + "epoch": 0.31984111118992503, + "grad_norm": 1.0275790910469778, + "learning_rate": 9.998670153356393e-06, + "loss": 0.2876, + "step": 24800 + }, + { + "epoch": 0.31997007937992095, + "grad_norm": 1.1151283060128951, + "learning_rate": 9.998652793397534e-06, + "loss": 0.266, + "step": 24810 + }, + { + "epoch": 0.32009904756991686, + "grad_norm": 1.1097852855406611, + "learning_rate": 9.99863532087817e-06, + "loss": 0.2662, + "step": 24820 + }, + { + "epoch": 0.32022801575991283, + "grad_norm": 1.1673008786445749, + "learning_rate": 9.998617735798691e-06, + "loss": 0.2705, + "step": 24830 + }, + { + "epoch": 0.32035698394990875, + "grad_norm": 1.1826612773991065, + "learning_rate": 9.998600038159494e-06, + "loss": 0.2769, + "step": 24840 + }, + { + "epoch": 0.3204859521399047, + "grad_norm": 1.1611203973099704, + "learning_rate": 9.998582227960978e-06, + "loss": 0.2776, + "step": 24850 + }, + { + "epoch": 0.32061492032990063, + "grad_norm": 1.3075411914134802, + "learning_rate": 9.998564305203544e-06, + "loss": 0.2806, + "step": 24860 + }, + { + "epoch": 0.32074388851989655, + "grad_norm": 1.096250938150204, + "learning_rate": 9.998546269887594e-06, + "loss": 0.2765, + "step": 24870 + }, + { + "epoch": 0.3208728567098925, + "grad_norm": 1.1554379730668798, + "learning_rate": 9.998528122013538e-06, + "loss": 0.2705, + "step": 24880 + }, + { + "epoch": 0.32100182489988843, + "grad_norm": 0.9674761202036238, + "learning_rate": 9.99850986158178e-06, + "loss": 0.2559, + "step": 24890 + }, + { + "epoch": 0.3211307930898844, + "grad_norm": 1.1196240672072095, + "learning_rate": 9.998491488592737e-06, + "loss": 0.2657, + "step": 24900 + }, + { + "epoch": 0.3212597612798803, + "grad_norm": 1.2272300238173677, + "learning_rate": 9.998473003046815e-06, + "loss": 0.2728, + "step": 24910 + }, + { + "epoch": 0.32138872946987623, + "grad_norm": 1.1340304273525263, + "learning_rate": 9.998454404944436e-06, + "loss": 0.2758, + "step": 24920 + }, + { + "epoch": 0.3215176976598722, + "grad_norm": 1.2972482254572577, + "learning_rate": 9.998435694286017e-06, + "loss": 0.2587, + "step": 24930 + }, + { + "epoch": 0.3216466658498681, + "grad_norm": 1.0828925572769408, + "learning_rate": 9.99841687107198e-06, + "loss": 0.2812, + "step": 24940 + }, + { + "epoch": 0.3217756340398641, + "grad_norm": 1.0841919698126719, + "learning_rate": 9.998397935302748e-06, + "loss": 0.2828, + "step": 24950 + }, + { + "epoch": 0.32190460222986, + "grad_norm": 1.0647222681467319, + "learning_rate": 9.998378886978749e-06, + "loss": 0.2757, + "step": 24960 + }, + { + "epoch": 0.3220335704198559, + "grad_norm": 1.096847448517318, + "learning_rate": 9.99835972610041e-06, + "loss": 0.2682, + "step": 24970 + }, + { + "epoch": 0.3221625386098519, + "grad_norm": 1.1008475263381448, + "learning_rate": 9.998340452668163e-06, + "loss": 0.2761, + "step": 24980 + }, + { + "epoch": 0.3222915067998478, + "grad_norm": 1.0677531528902156, + "learning_rate": 9.998321066682441e-06, + "loss": 0.2744, + "step": 24990 + }, + { + "epoch": 0.3224204749898438, + "grad_norm": 1.1307017961433103, + "learning_rate": 9.998301568143685e-06, + "loss": 0.2711, + "step": 25000 + }, + { + "epoch": 0.3225494431798397, + "grad_norm": 1.0957142515292937, + "learning_rate": 9.998281957052327e-06, + "loss": 0.275, + "step": 25010 + }, + { + "epoch": 0.3226784113698356, + "grad_norm": 1.0743110296611684, + "learning_rate": 9.998262233408816e-06, + "loss": 0.2613, + "step": 25020 + }, + { + "epoch": 0.3228073795598316, + "grad_norm": 1.1014667095823159, + "learning_rate": 9.998242397213588e-06, + "loss": 0.2798, + "step": 25030 + }, + { + "epoch": 0.3229363477498275, + "grad_norm": 1.1105101283465788, + "learning_rate": 9.998222448467098e-06, + "loss": 0.2736, + "step": 25040 + }, + { + "epoch": 0.32306531593982346, + "grad_norm": 1.1554257119053137, + "learning_rate": 9.998202387169791e-06, + "loss": 0.2838, + "step": 25050 + }, + { + "epoch": 0.3231942841298194, + "grad_norm": 1.0841778872157795, + "learning_rate": 9.998182213322118e-06, + "loss": 0.2824, + "step": 25060 + }, + { + "epoch": 0.3233232523198153, + "grad_norm": 1.2059559465242349, + "learning_rate": 9.998161926924534e-06, + "loss": 0.277, + "step": 25070 + }, + { + "epoch": 0.32345222050981126, + "grad_norm": 1.110924881623183, + "learning_rate": 9.998141527977498e-06, + "loss": 0.2791, + "step": 25080 + }, + { + "epoch": 0.3235811886998072, + "grad_norm": 1.0869186471766537, + "learning_rate": 9.998121016481466e-06, + "loss": 0.285, + "step": 25090 + }, + { + "epoch": 0.32371015688980315, + "grad_norm": 1.0540705487112891, + "learning_rate": 9.998100392436904e-06, + "loss": 0.2798, + "step": 25100 + }, + { + "epoch": 0.32383912507979906, + "grad_norm": 1.2200675819595213, + "learning_rate": 9.998079655844271e-06, + "loss": 0.2806, + "step": 25110 + }, + { + "epoch": 0.32396809326979503, + "grad_norm": 1.1079809735586479, + "learning_rate": 9.998058806704038e-06, + "loss": 0.284, + "step": 25120 + }, + { + "epoch": 0.32409706145979095, + "grad_norm": 1.167983581793647, + "learning_rate": 9.998037845016674e-06, + "loss": 0.278, + "step": 25130 + }, + { + "epoch": 0.32422602964978686, + "grad_norm": 1.1035736111781347, + "learning_rate": 9.99801677078265e-06, + "loss": 0.2881, + "step": 25140 + }, + { + "epoch": 0.32435499783978283, + "grad_norm": 1.0436975224943583, + "learning_rate": 9.99799558400244e-06, + "loss": 0.263, + "step": 25150 + }, + { + "epoch": 0.32448396602977875, + "grad_norm": 1.215598637572157, + "learning_rate": 9.997974284676524e-06, + "loss": 0.2743, + "step": 25160 + }, + { + "epoch": 0.3246129342197747, + "grad_norm": 1.1946584104882738, + "learning_rate": 9.997952872805378e-06, + "loss": 0.2737, + "step": 25170 + }, + { + "epoch": 0.32474190240977063, + "grad_norm": 1.057987235111208, + "learning_rate": 9.997931348389487e-06, + "loss": 0.277, + "step": 25180 + }, + { + "epoch": 0.32487087059976655, + "grad_norm": 1.1432077221586983, + "learning_rate": 9.997909711429333e-06, + "loss": 0.269, + "step": 25190 + }, + { + "epoch": 0.3249998387897625, + "grad_norm": 1.1822752253343185, + "learning_rate": 9.997887961925405e-06, + "loss": 0.2695, + "step": 25200 + }, + { + "epoch": 0.32512880697975843, + "grad_norm": 1.0799621764620773, + "learning_rate": 9.997866099878193e-06, + "loss": 0.2639, + "step": 25210 + }, + { + "epoch": 0.3252577751697544, + "grad_norm": 1.1528059119445189, + "learning_rate": 9.997844125288189e-06, + "loss": 0.2828, + "step": 25220 + }, + { + "epoch": 0.3253867433597503, + "grad_norm": 1.265652822447665, + "learning_rate": 9.997822038155886e-06, + "loss": 0.2737, + "step": 25230 + }, + { + "epoch": 0.32551571154974623, + "grad_norm": 1.129935982737682, + "learning_rate": 9.997799838481785e-06, + "loss": 0.2743, + "step": 25240 + }, + { + "epoch": 0.3256446797397422, + "grad_norm": 1.1801329201767892, + "learning_rate": 9.997777526266383e-06, + "loss": 0.2734, + "step": 25250 + }, + { + "epoch": 0.3257736479297381, + "grad_norm": 1.1410242199959937, + "learning_rate": 9.997755101510183e-06, + "loss": 0.2662, + "step": 25260 + }, + { + "epoch": 0.3259026161197341, + "grad_norm": 1.0652519089027008, + "learning_rate": 9.997732564213691e-06, + "loss": 0.2737, + "step": 25270 + }, + { + "epoch": 0.32603158430973, + "grad_norm": 1.123492606999291, + "learning_rate": 9.997709914377412e-06, + "loss": 0.2671, + "step": 25280 + }, + { + "epoch": 0.3261605524997259, + "grad_norm": 1.0977519343108795, + "learning_rate": 9.99768715200186e-06, + "loss": 0.2783, + "step": 25290 + }, + { + "epoch": 0.3262895206897219, + "grad_norm": 1.2543133675408324, + "learning_rate": 9.997664277087544e-06, + "loss": 0.2752, + "step": 25300 + }, + { + "epoch": 0.3264184888797178, + "grad_norm": 1.0508172442625956, + "learning_rate": 9.99764128963498e-06, + "loss": 0.2686, + "step": 25310 + }, + { + "epoch": 0.3265474570697138, + "grad_norm": 1.1979499587114082, + "learning_rate": 9.997618189644687e-06, + "loss": 0.28, + "step": 25320 + }, + { + "epoch": 0.3266764252597097, + "grad_norm": 1.0809016106868203, + "learning_rate": 9.997594977117185e-06, + "loss": 0.2804, + "step": 25330 + }, + { + "epoch": 0.3268053934497056, + "grad_norm": 1.1911674962010528, + "learning_rate": 9.997571652052994e-06, + "loss": 0.2654, + "step": 25340 + }, + { + "epoch": 0.3269343616397016, + "grad_norm": 1.0165547348423347, + "learning_rate": 9.997548214452642e-06, + "loss": 0.2752, + "step": 25350 + }, + { + "epoch": 0.3270633298296975, + "grad_norm": 1.1845572132479132, + "learning_rate": 9.997524664316656e-06, + "loss": 0.2659, + "step": 25360 + }, + { + "epoch": 0.32719229801969346, + "grad_norm": 1.0510501865852386, + "learning_rate": 9.997501001645566e-06, + "loss": 0.2621, + "step": 25370 + }, + { + "epoch": 0.3273212662096894, + "grad_norm": 1.2947698836728772, + "learning_rate": 9.997477226439907e-06, + "loss": 0.294, + "step": 25380 + }, + { + "epoch": 0.3274502343996853, + "grad_norm": 1.1359347597290501, + "learning_rate": 9.99745333870021e-06, + "loss": 0.2656, + "step": 25390 + }, + { + "epoch": 0.32757920258968126, + "grad_norm": 1.071832260340353, + "learning_rate": 9.997429338427016e-06, + "loss": 0.272, + "step": 25400 + }, + { + "epoch": 0.3277081707796772, + "grad_norm": 1.1826133009651507, + "learning_rate": 9.997405225620865e-06, + "loss": 0.2699, + "step": 25410 + }, + { + "epoch": 0.32783713896967315, + "grad_norm": 1.1178771680730424, + "learning_rate": 9.9973810002823e-06, + "loss": 0.2798, + "step": 25420 + }, + { + "epoch": 0.32796610715966906, + "grad_norm": 1.0497900065986525, + "learning_rate": 9.997356662411867e-06, + "loss": 0.2575, + "step": 25430 + }, + { + "epoch": 0.328095075349665, + "grad_norm": 1.0890892982983533, + "learning_rate": 9.997332212010113e-06, + "loss": 0.269, + "step": 25440 + }, + { + "epoch": 0.32822404353966095, + "grad_norm": 1.05625073826168, + "learning_rate": 9.997307649077587e-06, + "loss": 0.2658, + "step": 25450 + }, + { + "epoch": 0.32835301172965686, + "grad_norm": 1.262166049874743, + "learning_rate": 9.997282973614846e-06, + "loss": 0.2812, + "step": 25460 + }, + { + "epoch": 0.32848197991965283, + "grad_norm": 1.0274280840955745, + "learning_rate": 9.997258185622442e-06, + "loss": 0.2667, + "step": 25470 + }, + { + "epoch": 0.32861094810964875, + "grad_norm": 1.0885017056380284, + "learning_rate": 9.997233285100938e-06, + "loss": 0.2727, + "step": 25480 + }, + { + "epoch": 0.3287399162996447, + "grad_norm": 1.1350583343431904, + "learning_rate": 9.997208272050889e-06, + "loss": 0.2679, + "step": 25490 + }, + { + "epoch": 0.32886888448964063, + "grad_norm": 1.1859185911712835, + "learning_rate": 9.997183146472862e-06, + "loss": 0.2741, + "step": 25500 + }, + { + "epoch": 0.32899785267963655, + "grad_norm": 1.1308880893880209, + "learning_rate": 9.997157908367422e-06, + "loss": 0.2793, + "step": 25510 + }, + { + "epoch": 0.3291268208696325, + "grad_norm": 1.0892664349560484, + "learning_rate": 9.997132557735135e-06, + "loss": 0.2686, + "step": 25520 + }, + { + "epoch": 0.32925578905962843, + "grad_norm": 1.080214907547656, + "learning_rate": 9.997107094576575e-06, + "loss": 0.2588, + "step": 25530 + }, + { + "epoch": 0.3293847572496244, + "grad_norm": 1.0960827663750876, + "learning_rate": 9.997081518892313e-06, + "loss": 0.2622, + "step": 25540 + }, + { + "epoch": 0.3295137254396203, + "grad_norm": 1.0700479834569945, + "learning_rate": 9.997055830682928e-06, + "loss": 0.2623, + "step": 25550 + }, + { + "epoch": 0.32964269362961623, + "grad_norm": 1.1664793016725268, + "learning_rate": 9.997030029948996e-06, + "loss": 0.2693, + "step": 25560 + }, + { + "epoch": 0.3297716618196122, + "grad_norm": 1.174847779596889, + "learning_rate": 9.997004116691098e-06, + "loss": 0.26, + "step": 25570 + }, + { + "epoch": 0.3299006300096081, + "grad_norm": 1.0863940685631148, + "learning_rate": 9.996978090909818e-06, + "loss": 0.2635, + "step": 25580 + }, + { + "epoch": 0.3300295981996041, + "grad_norm": 1.0446780507141171, + "learning_rate": 9.996951952605742e-06, + "loss": 0.2567, + "step": 25590 + }, + { + "epoch": 0.3301585663896, + "grad_norm": 1.1200500170000467, + "learning_rate": 9.996925701779459e-06, + "loss": 0.2819, + "step": 25600 + }, + { + "epoch": 0.3302875345795959, + "grad_norm": 1.142071378672708, + "learning_rate": 9.99689933843156e-06, + "loss": 0.268, + "step": 25610 + }, + { + "epoch": 0.3304165027695919, + "grad_norm": 1.0970458590381416, + "learning_rate": 9.996872862562638e-06, + "loss": 0.2719, + "step": 25620 + }, + { + "epoch": 0.3305454709595878, + "grad_norm": 0.9881577952822763, + "learning_rate": 9.996846274173289e-06, + "loss": 0.2808, + "step": 25630 + }, + { + "epoch": 0.3306744391495838, + "grad_norm": 1.166300101158555, + "learning_rate": 9.996819573264114e-06, + "loss": 0.2667, + "step": 25640 + }, + { + "epoch": 0.3308034073395797, + "grad_norm": 1.2100213569869216, + "learning_rate": 9.996792759835712e-06, + "loss": 0.2836, + "step": 25650 + }, + { + "epoch": 0.3309323755295756, + "grad_norm": 1.2261427883288736, + "learning_rate": 9.996765833888686e-06, + "loss": 0.2525, + "step": 25660 + }, + { + "epoch": 0.3310613437195716, + "grad_norm": 1.0773992826274477, + "learning_rate": 9.996738795423646e-06, + "loss": 0.2796, + "step": 25670 + }, + { + "epoch": 0.3311903119095675, + "grad_norm": 1.0405874391143646, + "learning_rate": 9.996711644441196e-06, + "loss": 0.2561, + "step": 25680 + }, + { + "epoch": 0.33131928009956346, + "grad_norm": 1.166604658594094, + "learning_rate": 9.996684380941951e-06, + "loss": 0.2848, + "step": 25690 + }, + { + "epoch": 0.3314482482895594, + "grad_norm": 1.0335049922159614, + "learning_rate": 9.996657004926526e-06, + "loss": 0.2644, + "step": 25700 + }, + { + "epoch": 0.3315772164795553, + "grad_norm": 1.2136983196290245, + "learning_rate": 9.996629516395533e-06, + "loss": 0.2813, + "step": 25710 + }, + { + "epoch": 0.33170618466955126, + "grad_norm": 1.0700310349915114, + "learning_rate": 9.996601915349595e-06, + "loss": 0.2695, + "step": 25720 + }, + { + "epoch": 0.3318351528595472, + "grad_norm": 1.1814120154736591, + "learning_rate": 9.99657420178933e-06, + "loss": 0.271, + "step": 25730 + }, + { + "epoch": 0.33196412104954315, + "grad_norm": 1.1445913898637061, + "learning_rate": 9.996546375715366e-06, + "loss": 0.2734, + "step": 25740 + }, + { + "epoch": 0.33209308923953906, + "grad_norm": 1.0522236519747583, + "learning_rate": 9.996518437128326e-06, + "loss": 0.2743, + "step": 25750 + }, + { + "epoch": 0.332222057429535, + "grad_norm": 1.156061410169608, + "learning_rate": 9.996490386028841e-06, + "loss": 0.2668, + "step": 25760 + }, + { + "epoch": 0.33235102561953095, + "grad_norm": 1.1056260264320201, + "learning_rate": 9.996462222417543e-06, + "loss": 0.2757, + "step": 25770 + }, + { + "epoch": 0.33247999380952686, + "grad_norm": 1.147147406774383, + "learning_rate": 9.996433946295063e-06, + "loss": 0.2791, + "step": 25780 + }, + { + "epoch": 0.33260896199952283, + "grad_norm": 1.149580057237515, + "learning_rate": 9.996405557662044e-06, + "loss": 0.2747, + "step": 25790 + }, + { + "epoch": 0.33273793018951875, + "grad_norm": 1.0488387269537693, + "learning_rate": 9.996377056519119e-06, + "loss": 0.2639, + "step": 25800 + }, + { + "epoch": 0.3328668983795147, + "grad_norm": 1.2193825984136197, + "learning_rate": 9.996348442866932e-06, + "loss": 0.2714, + "step": 25810 + }, + { + "epoch": 0.33299586656951063, + "grad_norm": 1.1463365723019017, + "learning_rate": 9.996319716706128e-06, + "loss": 0.2755, + "step": 25820 + }, + { + "epoch": 0.33312483475950655, + "grad_norm": 1.1458758470282082, + "learning_rate": 9.996290878037355e-06, + "loss": 0.2655, + "step": 25830 + }, + { + "epoch": 0.3332538029495025, + "grad_norm": 1.2166949702441616, + "learning_rate": 9.996261926861259e-06, + "loss": 0.2778, + "step": 25840 + }, + { + "epoch": 0.33338277113949843, + "grad_norm": 1.099879145067688, + "learning_rate": 9.996232863178495e-06, + "loss": 0.2536, + "step": 25850 + }, + { + "epoch": 0.3335117393294944, + "grad_norm": 1.1336772663041188, + "learning_rate": 9.996203686989717e-06, + "loss": 0.2797, + "step": 25860 + }, + { + "epoch": 0.3336407075194903, + "grad_norm": 1.0819400354122493, + "learning_rate": 9.99617439829558e-06, + "loss": 0.2701, + "step": 25870 + }, + { + "epoch": 0.33376967570948624, + "grad_norm": 1.08489135401852, + "learning_rate": 9.996144997096744e-06, + "loss": 0.2687, + "step": 25880 + }, + { + "epoch": 0.3338986438994822, + "grad_norm": 1.1684372887241328, + "learning_rate": 9.996115483393873e-06, + "loss": 0.2614, + "step": 25890 + }, + { + "epoch": 0.3340276120894781, + "grad_norm": 1.134739543149857, + "learning_rate": 9.99608585718763e-06, + "loss": 0.271, + "step": 25900 + }, + { + "epoch": 0.3341565802794741, + "grad_norm": 1.1753382008803892, + "learning_rate": 9.996056118478683e-06, + "loss": 0.254, + "step": 25910 + }, + { + "epoch": 0.33428554846947, + "grad_norm": 1.0887882301559384, + "learning_rate": 9.9960262672677e-06, + "loss": 0.2645, + "step": 25920 + }, + { + "epoch": 0.3344145166594659, + "grad_norm": 1.068007023971528, + "learning_rate": 9.995996303555356e-06, + "loss": 0.2689, + "step": 25930 + }, + { + "epoch": 0.3345434848494619, + "grad_norm": 1.1509516394300667, + "learning_rate": 9.995966227342324e-06, + "loss": 0.2835, + "step": 25940 + }, + { + "epoch": 0.3346724530394578, + "grad_norm": 1.1127073766881757, + "learning_rate": 9.99593603862928e-06, + "loss": 0.2707, + "step": 25950 + }, + { + "epoch": 0.3348014212294538, + "grad_norm": 1.1052947536346953, + "learning_rate": 9.995905737416906e-06, + "loss": 0.2742, + "step": 25960 + }, + { + "epoch": 0.3349303894194497, + "grad_norm": 1.0711022989271184, + "learning_rate": 9.995875323705884e-06, + "loss": 0.2764, + "step": 25970 + }, + { + "epoch": 0.3350593576094456, + "grad_norm": 1.0042102628999403, + "learning_rate": 9.995844797496899e-06, + "loss": 0.2736, + "step": 25980 + }, + { + "epoch": 0.3351883257994416, + "grad_norm": 1.1418048595045056, + "learning_rate": 9.995814158790636e-06, + "loss": 0.2634, + "step": 25990 + }, + { + "epoch": 0.3353172939894375, + "grad_norm": 1.1182612706943276, + "learning_rate": 9.995783407587787e-06, + "loss": 0.2677, + "step": 26000 + }, + { + "epoch": 0.33544626217943346, + "grad_norm": 1.1051932335293544, + "learning_rate": 9.995752543889046e-06, + "loss": 0.2817, + "step": 26010 + }, + { + "epoch": 0.3355752303694294, + "grad_norm": 1.0893695020008067, + "learning_rate": 9.995721567695104e-06, + "loss": 0.2546, + "step": 26020 + }, + { + "epoch": 0.3357041985594253, + "grad_norm": 1.1574938863215056, + "learning_rate": 9.995690479006662e-06, + "loss": 0.2665, + "step": 26030 + }, + { + "epoch": 0.33583316674942126, + "grad_norm": 1.149148061811642, + "learning_rate": 9.995659277824417e-06, + "loss": 0.2687, + "step": 26040 + }, + { + "epoch": 0.3359621349394172, + "grad_norm": 1.2262124800094023, + "learning_rate": 9.995627964149074e-06, + "loss": 0.2678, + "step": 26050 + }, + { + "epoch": 0.33609110312941315, + "grad_norm": 1.143644495525986, + "learning_rate": 9.995596537981339e-06, + "loss": 0.266, + "step": 26060 + }, + { + "epoch": 0.33622007131940906, + "grad_norm": 1.0674458516370782, + "learning_rate": 9.995564999321919e-06, + "loss": 0.284, + "step": 26070 + }, + { + "epoch": 0.336349039509405, + "grad_norm": 1.115244640908993, + "learning_rate": 9.99553334817152e-06, + "loss": 0.2529, + "step": 26080 + }, + { + "epoch": 0.33647800769940095, + "grad_norm": 1.133555791797022, + "learning_rate": 9.99550158453086e-06, + "loss": 0.2717, + "step": 26090 + }, + { + "epoch": 0.33660697588939686, + "grad_norm": 1.0070681756822042, + "learning_rate": 9.995469708400652e-06, + "loss": 0.2592, + "step": 26100 + }, + { + "epoch": 0.33673594407939283, + "grad_norm": 1.133052778989379, + "learning_rate": 9.995437719781613e-06, + "loss": 0.2656, + "step": 26110 + }, + { + "epoch": 0.33686491226938875, + "grad_norm": 1.0733215233585667, + "learning_rate": 9.995405618674465e-06, + "loss": 0.2689, + "step": 26120 + }, + { + "epoch": 0.3369938804593847, + "grad_norm": 1.171086186906235, + "learning_rate": 9.995373405079931e-06, + "loss": 0.2642, + "step": 26130 + }, + { + "epoch": 0.33712284864938064, + "grad_norm": 1.1036990940089737, + "learning_rate": 9.995341078998736e-06, + "loss": 0.2705, + "step": 26140 + }, + { + "epoch": 0.33725181683937655, + "grad_norm": 1.1154998178086866, + "learning_rate": 9.995308640431607e-06, + "loss": 0.2658, + "step": 26150 + }, + { + "epoch": 0.3373807850293725, + "grad_norm": 1.1626268868823515, + "learning_rate": 9.995276089379276e-06, + "loss": 0.2876, + "step": 26160 + }, + { + "epoch": 0.33750975321936844, + "grad_norm": 1.1489352775056334, + "learning_rate": 9.995243425842473e-06, + "loss": 0.2823, + "step": 26170 + }, + { + "epoch": 0.3376387214093644, + "grad_norm": 1.1125304798599474, + "learning_rate": 9.995210649821937e-06, + "loss": 0.2706, + "step": 26180 + }, + { + "epoch": 0.3377676895993603, + "grad_norm": 1.1989717858905191, + "learning_rate": 9.995177761318404e-06, + "loss": 0.2807, + "step": 26190 + }, + { + "epoch": 0.33789665778935624, + "grad_norm": 1.088137680920595, + "learning_rate": 9.995144760332615e-06, + "loss": 0.285, + "step": 26200 + }, + { + "epoch": 0.3380256259793522, + "grad_norm": 1.1485288588456521, + "learning_rate": 9.995111646865315e-06, + "loss": 0.268, + "step": 26210 + }, + { + "epoch": 0.3381545941693481, + "grad_norm": 1.0849041105130868, + "learning_rate": 9.995078420917246e-06, + "loss": 0.263, + "step": 26220 + }, + { + "epoch": 0.3382835623593441, + "grad_norm": 1.0691741840700562, + "learning_rate": 9.995045082489159e-06, + "loss": 0.259, + "step": 26230 + }, + { + "epoch": 0.33841253054934, + "grad_norm": 1.066604572996577, + "learning_rate": 9.995011631581803e-06, + "loss": 0.2673, + "step": 26240 + }, + { + "epoch": 0.3385414987393359, + "grad_norm": 1.1144961343759583, + "learning_rate": 9.994978068195934e-06, + "loss": 0.2708, + "step": 26250 + }, + { + "epoch": 0.3386704669293319, + "grad_norm": 1.1244731399576975, + "learning_rate": 9.994944392332305e-06, + "loss": 0.2713, + "step": 26260 + }, + { + "epoch": 0.3387994351193278, + "grad_norm": 1.0594916479583196, + "learning_rate": 9.994910603991675e-06, + "loss": 0.259, + "step": 26270 + }, + { + "epoch": 0.3389284033093238, + "grad_norm": 1.0742946182649187, + "learning_rate": 9.994876703174805e-06, + "loss": 0.2842, + "step": 26280 + }, + { + "epoch": 0.3390573714993197, + "grad_norm": 1.0571074804589284, + "learning_rate": 9.99484268988246e-06, + "loss": 0.2728, + "step": 26290 + }, + { + "epoch": 0.3391863396893156, + "grad_norm": 1.0512388395855663, + "learning_rate": 9.994808564115403e-06, + "loss": 0.2617, + "step": 26300 + }, + { + "epoch": 0.3393153078793116, + "grad_norm": 1.0045114380047688, + "learning_rate": 9.994774325874403e-06, + "loss": 0.2644, + "step": 26310 + }, + { + "epoch": 0.3394442760693075, + "grad_norm": 1.2008328992032804, + "learning_rate": 9.994739975160234e-06, + "loss": 0.2589, + "step": 26320 + }, + { + "epoch": 0.33957324425930346, + "grad_norm": 1.2137775504448811, + "learning_rate": 9.994705511973666e-06, + "loss": 0.27, + "step": 26330 + }, + { + "epoch": 0.3397022124492994, + "grad_norm": 1.1196155816284488, + "learning_rate": 9.994670936315477e-06, + "loss": 0.2659, + "step": 26340 + }, + { + "epoch": 0.3398311806392953, + "grad_norm": 1.0710626408476775, + "learning_rate": 9.994636248186445e-06, + "loss": 0.269, + "step": 26350 + }, + { + "epoch": 0.33996014882929126, + "grad_norm": 1.0428372357790792, + "learning_rate": 9.994601447587351e-06, + "loss": 0.266, + "step": 26360 + }, + { + "epoch": 0.3400891170192872, + "grad_norm": 1.1326426691357547, + "learning_rate": 9.994566534518979e-06, + "loss": 0.2655, + "step": 26370 + }, + { + "epoch": 0.34021808520928315, + "grad_norm": 1.123101197301001, + "learning_rate": 9.994531508982112e-06, + "loss": 0.2734, + "step": 26380 + }, + { + "epoch": 0.34034705339927906, + "grad_norm": 1.0602384533232714, + "learning_rate": 9.994496370977545e-06, + "loss": 0.2793, + "step": 26390 + }, + { + "epoch": 0.340476021589275, + "grad_norm": 1.1624584337317718, + "learning_rate": 9.994461120506065e-06, + "loss": 0.2935, + "step": 26400 + }, + { + "epoch": 0.34060498977927095, + "grad_norm": 1.038680108948017, + "learning_rate": 9.994425757568467e-06, + "loss": 0.2665, + "step": 26410 + }, + { + "epoch": 0.34073395796926687, + "grad_norm": 1.122409779461526, + "learning_rate": 9.994390282165546e-06, + "loss": 0.2684, + "step": 26420 + }, + { + "epoch": 0.34086292615926284, + "grad_norm": 1.0593498967827257, + "learning_rate": 9.994354694298104e-06, + "loss": 0.2529, + "step": 26430 + }, + { + "epoch": 0.34099189434925875, + "grad_norm": 0.9480784910143117, + "learning_rate": 9.994318993966939e-06, + "loss": 0.2568, + "step": 26440 + }, + { + "epoch": 0.34112086253925467, + "grad_norm": 1.0577159401036738, + "learning_rate": 9.994283181172855e-06, + "loss": 0.2625, + "step": 26450 + }, + { + "epoch": 0.34124983072925064, + "grad_norm": 1.3436724736492274, + "learning_rate": 9.99424725591666e-06, + "loss": 0.2747, + "step": 26460 + }, + { + "epoch": 0.34137879891924655, + "grad_norm": 1.2750251877328593, + "learning_rate": 9.994211218199162e-06, + "loss": 0.2732, + "step": 26470 + }, + { + "epoch": 0.3415077671092425, + "grad_norm": 1.062184348016505, + "learning_rate": 9.994175068021174e-06, + "loss": 0.2708, + "step": 26480 + }, + { + "epoch": 0.34163673529923844, + "grad_norm": 1.172596315769572, + "learning_rate": 9.99413880538351e-06, + "loss": 0.2829, + "step": 26490 + }, + { + "epoch": 0.3417657034892344, + "grad_norm": 1.1233876062714856, + "learning_rate": 9.994102430286983e-06, + "loss": 0.2634, + "step": 26500 + }, + { + "epoch": 0.3418946716792303, + "grad_norm": 1.0739575699565742, + "learning_rate": 9.994065942732416e-06, + "loss": 0.2715, + "step": 26510 + }, + { + "epoch": 0.34202363986922624, + "grad_norm": 1.2358027435517724, + "learning_rate": 9.994029342720629e-06, + "loss": 0.2541, + "step": 26520 + }, + { + "epoch": 0.3421526080592222, + "grad_norm": 1.2308682514901126, + "learning_rate": 9.993992630252446e-06, + "loss": 0.2743, + "step": 26530 + }, + { + "epoch": 0.3422815762492181, + "grad_norm": 1.1314253402142687, + "learning_rate": 9.993955805328695e-06, + "loss": 0.2751, + "step": 26540 + }, + { + "epoch": 0.3424105444392141, + "grad_norm": 1.167957430716449, + "learning_rate": 9.993918867950206e-06, + "loss": 0.2811, + "step": 26550 + }, + { + "epoch": 0.34253951262921, + "grad_norm": 1.175412203273185, + "learning_rate": 9.993881818117806e-06, + "loss": 0.2639, + "step": 26560 + }, + { + "epoch": 0.3426684808192059, + "grad_norm": 1.0881302653320528, + "learning_rate": 9.993844655832335e-06, + "loss": 0.2505, + "step": 26570 + }, + { + "epoch": 0.3427974490092019, + "grad_norm": 0.9923012816025671, + "learning_rate": 9.993807381094627e-06, + "loss": 0.256, + "step": 26580 + }, + { + "epoch": 0.3429264171991978, + "grad_norm": 1.1425399491167825, + "learning_rate": 9.99376999390552e-06, + "loss": 0.2846, + "step": 26590 + }, + { + "epoch": 0.3430553853891938, + "grad_norm": 1.1152835141917252, + "learning_rate": 9.993732494265859e-06, + "loss": 0.2664, + "step": 26600 + }, + { + "epoch": 0.3431843535791897, + "grad_norm": 1.0947926324095145, + "learning_rate": 9.993694882176487e-06, + "loss": 0.2657, + "step": 26610 + }, + { + "epoch": 0.3433133217691856, + "grad_norm": 1.0746229364458197, + "learning_rate": 9.99365715763825e-06, + "loss": 0.2711, + "step": 26620 + }, + { + "epoch": 0.3434422899591816, + "grad_norm": 1.0686034304399659, + "learning_rate": 9.993619320651999e-06, + "loss": 0.2899, + "step": 26630 + }, + { + "epoch": 0.3435712581491775, + "grad_norm": 1.1251136037838134, + "learning_rate": 9.993581371218586e-06, + "loss": 0.2686, + "step": 26640 + }, + { + "epoch": 0.34370022633917346, + "grad_norm": 1.0532209257204728, + "learning_rate": 9.993543309338864e-06, + "loss": 0.2787, + "step": 26650 + }, + { + "epoch": 0.3438291945291694, + "grad_norm": 1.1639591218062197, + "learning_rate": 9.993505135013691e-06, + "loss": 0.2723, + "step": 26660 + }, + { + "epoch": 0.3439581627191653, + "grad_norm": 1.1052568399209322, + "learning_rate": 9.993466848243929e-06, + "loss": 0.2716, + "step": 26670 + }, + { + "epoch": 0.34408713090916127, + "grad_norm": 1.1662034907692154, + "learning_rate": 9.993428449030436e-06, + "loss": 0.2754, + "step": 26680 + }, + { + "epoch": 0.3442160990991572, + "grad_norm": 1.063274636828944, + "learning_rate": 9.993389937374078e-06, + "loss": 0.2805, + "step": 26690 + }, + { + "epoch": 0.34434506728915315, + "grad_norm": 1.077330273743686, + "learning_rate": 9.993351313275723e-06, + "loss": 0.2567, + "step": 26700 + }, + { + "epoch": 0.34447403547914907, + "grad_norm": 1.1139799954084204, + "learning_rate": 9.99331257673624e-06, + "loss": 0.2811, + "step": 26710 + }, + { + "epoch": 0.344603003669145, + "grad_norm": 1.098784142082267, + "learning_rate": 9.993273727756503e-06, + "loss": 0.2643, + "step": 26720 + }, + { + "epoch": 0.34473197185914095, + "grad_norm": 1.1102892111864935, + "learning_rate": 9.993234766337385e-06, + "loss": 0.2739, + "step": 26730 + }, + { + "epoch": 0.34486094004913687, + "grad_norm": 1.1874215547797182, + "learning_rate": 9.993195692479764e-06, + "loss": 0.2729, + "step": 26740 + }, + { + "epoch": 0.34498990823913284, + "grad_norm": 1.1060611279930688, + "learning_rate": 9.99315650618452e-06, + "loss": 0.2798, + "step": 26750 + }, + { + "epoch": 0.34511887642912875, + "grad_norm": 1.2198013841586073, + "learning_rate": 9.993117207452535e-06, + "loss": 0.2638, + "step": 26760 + }, + { + "epoch": 0.34524784461912467, + "grad_norm": 1.0043919218006503, + "learning_rate": 9.993077796284694e-06, + "loss": 0.2627, + "step": 26770 + }, + { + "epoch": 0.34537681280912064, + "grad_norm": 1.0719448363820874, + "learning_rate": 9.993038272681886e-06, + "loss": 0.2576, + "step": 26780 + }, + { + "epoch": 0.34550578099911655, + "grad_norm": 1.09899379131294, + "learning_rate": 9.992998636644997e-06, + "loss": 0.2826, + "step": 26790 + }, + { + "epoch": 0.3456347491891125, + "grad_norm": 1.2424259777748328, + "learning_rate": 9.992958888174924e-06, + "loss": 0.2744, + "step": 26800 + }, + { + "epoch": 0.34576371737910844, + "grad_norm": 1.0733984596467543, + "learning_rate": 9.99291902727256e-06, + "loss": 0.2732, + "step": 26810 + }, + { + "epoch": 0.3458926855691044, + "grad_norm": 0.9723980480886966, + "learning_rate": 9.9928790539388e-06, + "loss": 0.253, + "step": 26820 + }, + { + "epoch": 0.3460216537591003, + "grad_norm": 1.0647243444582344, + "learning_rate": 9.99283896817455e-06, + "loss": 0.2667, + "step": 26830 + }, + { + "epoch": 0.34615062194909624, + "grad_norm": 1.1196014622792285, + "learning_rate": 9.99279876998071e-06, + "loss": 0.2801, + "step": 26840 + }, + { + "epoch": 0.3462795901390922, + "grad_norm": 1.290878380090146, + "learning_rate": 9.992758459358184e-06, + "loss": 0.2795, + "step": 26850 + }, + { + "epoch": 0.3464085583290881, + "grad_norm": 1.3110797406381363, + "learning_rate": 9.992718036307879e-06, + "loss": 0.2805, + "step": 26860 + }, + { + "epoch": 0.3465375265190841, + "grad_norm": 1.0766493739567955, + "learning_rate": 9.992677500830707e-06, + "loss": 0.2646, + "step": 26870 + }, + { + "epoch": 0.34666649470908, + "grad_norm": 1.144759496795657, + "learning_rate": 9.992636852927582e-06, + "loss": 0.2823, + "step": 26880 + }, + { + "epoch": 0.3467954628990759, + "grad_norm": 1.1131787589714517, + "learning_rate": 9.992596092599416e-06, + "loss": 0.2638, + "step": 26890 + }, + { + "epoch": 0.3469244310890719, + "grad_norm": 1.0955209983130103, + "learning_rate": 9.99255521984713e-06, + "loss": 0.2577, + "step": 26900 + }, + { + "epoch": 0.3470533992790678, + "grad_norm": 1.0869709297824188, + "learning_rate": 9.992514234671641e-06, + "loss": 0.269, + "step": 26910 + }, + { + "epoch": 0.3471823674690638, + "grad_norm": 1.0612902138068754, + "learning_rate": 9.992473137073877e-06, + "loss": 0.2696, + "step": 26920 + }, + { + "epoch": 0.3473113356590597, + "grad_norm": 1.1581357974121556, + "learning_rate": 9.99243192705476e-06, + "loss": 0.2594, + "step": 26930 + }, + { + "epoch": 0.3474403038490556, + "grad_norm": 1.0651263191136566, + "learning_rate": 9.992390604615217e-06, + "loss": 0.2649, + "step": 26940 + }, + { + "epoch": 0.3475692720390516, + "grad_norm": 1.0653003032286907, + "learning_rate": 9.99234916975618e-06, + "loss": 0.2796, + "step": 26950 + }, + { + "epoch": 0.3476982402290475, + "grad_norm": 1.156835031842906, + "learning_rate": 9.992307622478582e-06, + "loss": 0.2791, + "step": 26960 + }, + { + "epoch": 0.34782720841904347, + "grad_norm": 0.9823616483075562, + "learning_rate": 9.992265962783359e-06, + "loss": 0.2676, + "step": 26970 + }, + { + "epoch": 0.3479561766090394, + "grad_norm": 1.0628227041964897, + "learning_rate": 9.99222419067145e-06, + "loss": 0.2578, + "step": 26980 + }, + { + "epoch": 0.3480851447990353, + "grad_norm": 1.0894855196009041, + "learning_rate": 9.99218230614379e-06, + "loss": 0.256, + "step": 26990 + }, + { + "epoch": 0.34821411298903127, + "grad_norm": 1.0044730242228008, + "learning_rate": 9.99214030920133e-06, + "loss": 0.2603, + "step": 27000 + }, + { + "epoch": 0.3483430811790272, + "grad_norm": 1.1526919692223045, + "learning_rate": 9.992098199845011e-06, + "loss": 0.2816, + "step": 27010 + }, + { + "epoch": 0.34847204936902315, + "grad_norm": 1.147657639276662, + "learning_rate": 9.992055978075782e-06, + "loss": 0.2682, + "step": 27020 + }, + { + "epoch": 0.34860101755901907, + "grad_norm": 1.126482077248931, + "learning_rate": 9.992013643894593e-06, + "loss": 0.2721, + "step": 27030 + }, + { + "epoch": 0.348729985749015, + "grad_norm": 1.108406661062782, + "learning_rate": 9.9919711973024e-06, + "loss": 0.2624, + "step": 27040 + }, + { + "epoch": 0.34885895393901095, + "grad_norm": 1.159378497237613, + "learning_rate": 9.991928638300158e-06, + "loss": 0.2618, + "step": 27050 + }, + { + "epoch": 0.34898792212900687, + "grad_norm": 1.0869033897183935, + "learning_rate": 9.991885966888823e-06, + "loss": 0.2691, + "step": 27060 + }, + { + "epoch": 0.34911689031900284, + "grad_norm": 1.0997037320447194, + "learning_rate": 9.991843183069359e-06, + "loss": 0.2811, + "step": 27070 + }, + { + "epoch": 0.34924585850899875, + "grad_norm": 1.1707050118506082, + "learning_rate": 9.991800286842725e-06, + "loss": 0.2626, + "step": 27080 + }, + { + "epoch": 0.34937482669899467, + "grad_norm": 1.1240374922547878, + "learning_rate": 9.991757278209891e-06, + "loss": 0.2608, + "step": 27090 + }, + { + "epoch": 0.34950379488899064, + "grad_norm": 1.0315938414455592, + "learning_rate": 9.991714157171825e-06, + "loss": 0.2811, + "step": 27100 + }, + { + "epoch": 0.34963276307898655, + "grad_norm": 1.03778658186293, + "learning_rate": 9.991670923729498e-06, + "loss": 0.2771, + "step": 27110 + }, + { + "epoch": 0.3497617312689825, + "grad_norm": 1.0578920682938273, + "learning_rate": 9.99162757788388e-06, + "loss": 0.263, + "step": 27120 + }, + { + "epoch": 0.34989069945897844, + "grad_norm": 1.0365971119062225, + "learning_rate": 9.991584119635952e-06, + "loss": 0.2643, + "step": 27130 + }, + { + "epoch": 0.35001966764897435, + "grad_norm": 1.1592094563059345, + "learning_rate": 9.991540548986688e-06, + "loss": 0.2725, + "step": 27140 + }, + { + "epoch": 0.3501486358389703, + "grad_norm": 1.197317302898826, + "learning_rate": 9.991496865937076e-06, + "loss": 0.2648, + "step": 27150 + }, + { + "epoch": 0.35027760402896624, + "grad_norm": 1.1074761029121805, + "learning_rate": 9.991453070488092e-06, + "loss": 0.2785, + "step": 27160 + }, + { + "epoch": 0.3504065722189622, + "grad_norm": 1.057711489049587, + "learning_rate": 9.991409162640726e-06, + "loss": 0.2565, + "step": 27170 + }, + { + "epoch": 0.3505355404089581, + "grad_norm": 1.1592430762077748, + "learning_rate": 9.991365142395966e-06, + "loss": 0.2586, + "step": 27180 + }, + { + "epoch": 0.3506645085989541, + "grad_norm": 1.0930108858337124, + "learning_rate": 9.991321009754804e-06, + "loss": 0.2741, + "step": 27190 + }, + { + "epoch": 0.35079347678895, + "grad_norm": 1.0611277976835864, + "learning_rate": 9.991276764718233e-06, + "loss": 0.2696, + "step": 27200 + }, + { + "epoch": 0.3509224449789459, + "grad_norm": 1.122221717710148, + "learning_rate": 9.991232407287249e-06, + "loss": 0.2722, + "step": 27210 + }, + { + "epoch": 0.3510514131689419, + "grad_norm": 0.9986844320416697, + "learning_rate": 9.991187937462852e-06, + "loss": 0.2564, + "step": 27220 + }, + { + "epoch": 0.3511803813589378, + "grad_norm": 1.1677639854611233, + "learning_rate": 9.991143355246043e-06, + "loss": 0.2597, + "step": 27230 + }, + { + "epoch": 0.3513093495489338, + "grad_norm": 1.137788357305074, + "learning_rate": 9.991098660637824e-06, + "loss": 0.2736, + "step": 27240 + }, + { + "epoch": 0.3514383177389297, + "grad_norm": 1.0333319348933236, + "learning_rate": 9.991053853639204e-06, + "loss": 0.2651, + "step": 27250 + }, + { + "epoch": 0.3515672859289256, + "grad_norm": 1.0554746494675795, + "learning_rate": 9.991008934251191e-06, + "loss": 0.2672, + "step": 27260 + }, + { + "epoch": 0.3516962541189216, + "grad_norm": 1.1078725071317286, + "learning_rate": 9.990963902474798e-06, + "loss": 0.2703, + "step": 27270 + }, + { + "epoch": 0.3518252223089175, + "grad_norm": 1.1165665423594326, + "learning_rate": 9.990918758311037e-06, + "loss": 0.2672, + "step": 27280 + }, + { + "epoch": 0.35195419049891347, + "grad_norm": 0.9807220772433175, + "learning_rate": 9.990873501760924e-06, + "loss": 0.2684, + "step": 27290 + }, + { + "epoch": 0.3520831586889094, + "grad_norm": 1.1193652044121112, + "learning_rate": 9.99082813282548e-06, + "loss": 0.2602, + "step": 27300 + }, + { + "epoch": 0.3522121268789053, + "grad_norm": 1.0958249371792275, + "learning_rate": 9.990782651505725e-06, + "loss": 0.2766, + "step": 27310 + }, + { + "epoch": 0.35234109506890127, + "grad_norm": 1.00310439273859, + "learning_rate": 9.990737057802686e-06, + "loss": 0.2556, + "step": 27320 + }, + { + "epoch": 0.3524700632588972, + "grad_norm": 1.0327074689557234, + "learning_rate": 9.990691351717385e-06, + "loss": 0.2742, + "step": 27330 + }, + { + "epoch": 0.35259903144889315, + "grad_norm": 1.120295590410789, + "learning_rate": 9.990645533250856e-06, + "loss": 0.2608, + "step": 27340 + }, + { + "epoch": 0.35272799963888907, + "grad_norm": 1.108428445122089, + "learning_rate": 9.990599602404128e-06, + "loss": 0.268, + "step": 27350 + }, + { + "epoch": 0.352856967828885, + "grad_norm": 1.0253536112974646, + "learning_rate": 9.990553559178235e-06, + "loss": 0.256, + "step": 27360 + }, + { + "epoch": 0.35298593601888095, + "grad_norm": 1.014684194193353, + "learning_rate": 9.990507403574217e-06, + "loss": 0.2624, + "step": 27370 + }, + { + "epoch": 0.35311490420887687, + "grad_norm": 1.114889858294156, + "learning_rate": 9.99046113559311e-06, + "loss": 0.2687, + "step": 27380 + }, + { + "epoch": 0.35324387239887284, + "grad_norm": 1.0243675211305334, + "learning_rate": 9.990414755235958e-06, + "loss": 0.2698, + "step": 27390 + }, + { + "epoch": 0.35337284058886875, + "grad_norm": 0.9999124226072316, + "learning_rate": 9.990368262503802e-06, + "loss": 0.2647, + "step": 27400 + }, + { + "epoch": 0.35350180877886467, + "grad_norm": 1.040240559361812, + "learning_rate": 9.990321657397692e-06, + "loss": 0.2626, + "step": 27410 + }, + { + "epoch": 0.35363077696886064, + "grad_norm": 1.018268624740003, + "learning_rate": 9.990274939918679e-06, + "loss": 0.2689, + "step": 27420 + }, + { + "epoch": 0.35375974515885655, + "grad_norm": 1.0920168594062696, + "learning_rate": 9.990228110067811e-06, + "loss": 0.2674, + "step": 27430 + }, + { + "epoch": 0.3538887133488525, + "grad_norm": 1.1118266188902906, + "learning_rate": 9.990181167846144e-06, + "loss": 0.2695, + "step": 27440 + }, + { + "epoch": 0.35401768153884844, + "grad_norm": 1.0029769930229386, + "learning_rate": 9.990134113254737e-06, + "loss": 0.2629, + "step": 27450 + }, + { + "epoch": 0.35414664972884435, + "grad_norm": 1.1226759964449533, + "learning_rate": 9.990086946294646e-06, + "loss": 0.2597, + "step": 27460 + }, + { + "epoch": 0.3542756179188403, + "grad_norm": 1.0079615014461003, + "learning_rate": 9.990039666966935e-06, + "loss": 0.26, + "step": 27470 + }, + { + "epoch": 0.35440458610883624, + "grad_norm": 1.2764503189586156, + "learning_rate": 9.989992275272671e-06, + "loss": 0.275, + "step": 27480 + }, + { + "epoch": 0.3545335542988322, + "grad_norm": 1.2037793735387148, + "learning_rate": 9.989944771212917e-06, + "loss": 0.2729, + "step": 27490 + }, + { + "epoch": 0.3546625224888281, + "grad_norm": 1.0700107502160698, + "learning_rate": 9.989897154788746e-06, + "loss": 0.2656, + "step": 27500 + }, + { + "epoch": 0.3547914906788241, + "grad_norm": 1.1793112440616256, + "learning_rate": 9.989849426001227e-06, + "loss": 0.2599, + "step": 27510 + }, + { + "epoch": 0.35492045886882, + "grad_norm": 1.1002893072814455, + "learning_rate": 9.989801584851437e-06, + "loss": 0.2745, + "step": 27520 + }, + { + "epoch": 0.3550494270588159, + "grad_norm": 1.0111743201256904, + "learning_rate": 9.989753631340453e-06, + "loss": 0.2681, + "step": 27530 + }, + { + "epoch": 0.3551783952488119, + "grad_norm": 1.1077728046903617, + "learning_rate": 9.989705565469355e-06, + "loss": 0.2672, + "step": 27540 + }, + { + "epoch": 0.3553073634388078, + "grad_norm": 1.1330829798702613, + "learning_rate": 9.989657387239226e-06, + "loss": 0.2604, + "step": 27550 + }, + { + "epoch": 0.3554363316288038, + "grad_norm": 1.2663473194107233, + "learning_rate": 9.989609096651148e-06, + "loss": 0.2825, + "step": 27560 + }, + { + "epoch": 0.3555652998187997, + "grad_norm": 1.1185314400202753, + "learning_rate": 9.989560693706212e-06, + "loss": 0.2692, + "step": 27570 + }, + { + "epoch": 0.3556942680087956, + "grad_norm": 1.1259618168901022, + "learning_rate": 9.989512178405506e-06, + "loss": 0.2715, + "step": 27580 + }, + { + "epoch": 0.3558232361987916, + "grad_norm": 1.090349159375847, + "learning_rate": 9.989463550750121e-06, + "loss": 0.2535, + "step": 27590 + }, + { + "epoch": 0.3559522043887875, + "grad_norm": 1.153173588239139, + "learning_rate": 9.989414810741156e-06, + "loss": 0.2664, + "step": 27600 + }, + { + "epoch": 0.35608117257878347, + "grad_norm": 1.0418675929259649, + "learning_rate": 9.989365958379705e-06, + "loss": 0.2535, + "step": 27610 + }, + { + "epoch": 0.3562101407687794, + "grad_norm": 1.1488531084047833, + "learning_rate": 9.98931699366687e-06, + "loss": 0.2779, + "step": 27620 + }, + { + "epoch": 0.3563391089587753, + "grad_norm": 1.1788613492915365, + "learning_rate": 9.989267916603752e-06, + "loss": 0.2819, + "step": 27630 + }, + { + "epoch": 0.35646807714877127, + "grad_norm": 1.2395311193345535, + "learning_rate": 9.989218727191458e-06, + "loss": 0.2712, + "step": 27640 + }, + { + "epoch": 0.3565970453387672, + "grad_norm": 1.0262417171907177, + "learning_rate": 9.989169425431094e-06, + "loss": 0.2608, + "step": 27650 + }, + { + "epoch": 0.35672601352876315, + "grad_norm": 1.0679421690051987, + "learning_rate": 9.989120011323772e-06, + "loss": 0.2644, + "step": 27660 + }, + { + "epoch": 0.35685498171875907, + "grad_norm": 1.0960832914881824, + "learning_rate": 9.989070484870604e-06, + "loss": 0.2792, + "step": 27670 + }, + { + "epoch": 0.356983949908755, + "grad_norm": 1.1303477317148742, + "learning_rate": 9.989020846072703e-06, + "loss": 0.277, + "step": 27680 + }, + { + "epoch": 0.35711291809875095, + "grad_norm": 1.0809669614492694, + "learning_rate": 9.98897109493119e-06, + "loss": 0.2557, + "step": 27690 + }, + { + "epoch": 0.35724188628874687, + "grad_norm": 1.1065580445387868, + "learning_rate": 9.988921231447183e-06, + "loss": 0.2734, + "step": 27700 + }, + { + "epoch": 0.35737085447874284, + "grad_norm": 0.9962814107268495, + "learning_rate": 9.988871255621808e-06, + "loss": 0.2633, + "step": 27710 + }, + { + "epoch": 0.35749982266873875, + "grad_norm": 1.1342033514010261, + "learning_rate": 9.988821167456187e-06, + "loss": 0.2728, + "step": 27720 + }, + { + "epoch": 0.35762879085873467, + "grad_norm": 0.9867340635681329, + "learning_rate": 9.988770966951448e-06, + "loss": 0.2741, + "step": 27730 + }, + { + "epoch": 0.35775775904873064, + "grad_norm": 1.052950310677346, + "learning_rate": 9.988720654108723e-06, + "loss": 0.268, + "step": 27740 + }, + { + "epoch": 0.35788672723872655, + "grad_norm": 1.079516257853287, + "learning_rate": 9.988670228929144e-06, + "loss": 0.2692, + "step": 27750 + }, + { + "epoch": 0.3580156954287225, + "grad_norm": 0.997180205477902, + "learning_rate": 9.988619691413848e-06, + "loss": 0.2636, + "step": 27760 + }, + { + "epoch": 0.35814466361871844, + "grad_norm": 0.9923298583901718, + "learning_rate": 9.98856904156397e-06, + "loss": 0.2597, + "step": 27770 + }, + { + "epoch": 0.35827363180871435, + "grad_norm": 1.1650264194741866, + "learning_rate": 9.988518279380655e-06, + "loss": 0.2743, + "step": 27780 + }, + { + "epoch": 0.3584025999987103, + "grad_norm": 1.0410643334225944, + "learning_rate": 9.988467404865043e-06, + "loss": 0.2721, + "step": 27790 + }, + { + "epoch": 0.35853156818870624, + "grad_norm": 1.023882017114823, + "learning_rate": 9.98841641801828e-06, + "loss": 0.2687, + "step": 27800 + }, + { + "epoch": 0.3586605363787022, + "grad_norm": 1.0738554518310546, + "learning_rate": 9.988365318841514e-06, + "loss": 0.2735, + "step": 27810 + }, + { + "epoch": 0.3587895045686981, + "grad_norm": 0.9736490195063426, + "learning_rate": 9.988314107335897e-06, + "loss": 0.2575, + "step": 27820 + }, + { + "epoch": 0.35891847275869404, + "grad_norm": 1.0162076979670902, + "learning_rate": 9.98826278350258e-06, + "loss": 0.2671, + "step": 27830 + }, + { + "epoch": 0.35904744094869, + "grad_norm": 1.020194384472499, + "learning_rate": 9.98821134734272e-06, + "loss": 0.2586, + "step": 27840 + }, + { + "epoch": 0.3591764091386859, + "grad_norm": 1.0146721035931012, + "learning_rate": 9.988159798857474e-06, + "loss": 0.2683, + "step": 27850 + }, + { + "epoch": 0.3593053773286819, + "grad_norm": 1.2129217598782986, + "learning_rate": 9.988108138048005e-06, + "loss": 0.2799, + "step": 27860 + }, + { + "epoch": 0.3594343455186778, + "grad_norm": 1.1234268351070644, + "learning_rate": 9.988056364915475e-06, + "loss": 0.284, + "step": 27870 + }, + { + "epoch": 0.3595633137086738, + "grad_norm": 1.1635029765201244, + "learning_rate": 9.98800447946105e-06, + "loss": 0.2624, + "step": 27880 + }, + { + "epoch": 0.3596922818986697, + "grad_norm": 1.0759603522896504, + "learning_rate": 9.987952481685899e-06, + "loss": 0.2804, + "step": 27890 + }, + { + "epoch": 0.3598212500886656, + "grad_norm": 1.0695087623100341, + "learning_rate": 9.987900371591192e-06, + "loss": 0.2601, + "step": 27900 + }, + { + "epoch": 0.3599502182786616, + "grad_norm": 1.194541657625318, + "learning_rate": 9.987848149178103e-06, + "loss": 0.2687, + "step": 27910 + }, + { + "epoch": 0.3600791864686575, + "grad_norm": 1.0319616348913725, + "learning_rate": 9.987795814447807e-06, + "loss": 0.2711, + "step": 27920 + }, + { + "epoch": 0.36020815465865347, + "grad_norm": 1.092073296166564, + "learning_rate": 9.987743367401483e-06, + "loss": 0.284, + "step": 27930 + }, + { + "epoch": 0.3603371228486494, + "grad_norm": 1.1266651989689858, + "learning_rate": 9.987690808040313e-06, + "loss": 0.2872, + "step": 27940 + }, + { + "epoch": 0.3604660910386453, + "grad_norm": 1.0881489989320836, + "learning_rate": 9.987638136365479e-06, + "loss": 0.2625, + "step": 27950 + }, + { + "epoch": 0.36059505922864127, + "grad_norm": 1.093528423023056, + "learning_rate": 9.987585352378167e-06, + "loss": 0.266, + "step": 27960 + }, + { + "epoch": 0.3607240274186372, + "grad_norm": 1.13041594915124, + "learning_rate": 9.987532456079567e-06, + "loss": 0.2575, + "step": 27970 + }, + { + "epoch": 0.36085299560863315, + "grad_norm": 1.0335627222535413, + "learning_rate": 9.98747944747087e-06, + "loss": 0.2757, + "step": 27980 + }, + { + "epoch": 0.36098196379862907, + "grad_norm": 0.9923394675752558, + "learning_rate": 9.98742632655327e-06, + "loss": 0.2658, + "step": 27990 + }, + { + "epoch": 0.361110931988625, + "grad_norm": 1.1179611608405968, + "learning_rate": 9.987373093327962e-06, + "loss": 0.2729, + "step": 28000 + }, + { + "epoch": 0.36123990017862095, + "grad_norm": 1.0897993336362501, + "learning_rate": 9.987319747796143e-06, + "loss": 0.265, + "step": 28010 + }, + { + "epoch": 0.36136886836861687, + "grad_norm": 1.0795022653131836, + "learning_rate": 9.987266289959016e-06, + "loss": 0.2582, + "step": 28020 + }, + { + "epoch": 0.36149783655861284, + "grad_norm": 1.1486587240685744, + "learning_rate": 9.987212719817788e-06, + "loss": 0.2681, + "step": 28030 + }, + { + "epoch": 0.36162680474860875, + "grad_norm": 1.0658410218088008, + "learning_rate": 9.987159037373661e-06, + "loss": 0.2731, + "step": 28040 + }, + { + "epoch": 0.36175577293860467, + "grad_norm": 1.1374642541794648, + "learning_rate": 9.987105242627846e-06, + "loss": 0.264, + "step": 28050 + }, + { + "epoch": 0.36188474112860064, + "grad_norm": 1.0768117963766684, + "learning_rate": 9.987051335581552e-06, + "loss": 0.2704, + "step": 28060 + }, + { + "epoch": 0.36201370931859655, + "grad_norm": 0.9165549760264449, + "learning_rate": 9.986997316235996e-06, + "loss": 0.2663, + "step": 28070 + }, + { + "epoch": 0.3621426775085925, + "grad_norm": 1.2346591072654325, + "learning_rate": 9.986943184592391e-06, + "loss": 0.2846, + "step": 28080 + }, + { + "epoch": 0.36227164569858844, + "grad_norm": 1.1327000693709337, + "learning_rate": 9.98688894065196e-06, + "loss": 0.2687, + "step": 28090 + }, + { + "epoch": 0.36240061388858436, + "grad_norm": 1.0505552840469443, + "learning_rate": 9.98683458441592e-06, + "loss": 0.2693, + "step": 28100 + }, + { + "epoch": 0.3625295820785803, + "grad_norm": 0.9274076397616974, + "learning_rate": 9.986780115885499e-06, + "loss": 0.2552, + "step": 28110 + }, + { + "epoch": 0.36265855026857624, + "grad_norm": 1.1084205452099145, + "learning_rate": 9.986725535061921e-06, + "loss": 0.2759, + "step": 28120 + }, + { + "epoch": 0.3627875184585722, + "grad_norm": 0.933818264352436, + "learning_rate": 9.986670841946415e-06, + "loss": 0.2721, + "step": 28130 + }, + { + "epoch": 0.3629164866485681, + "grad_norm": 1.0825604066064092, + "learning_rate": 9.986616036540216e-06, + "loss": 0.2806, + "step": 28140 + }, + { + "epoch": 0.36304545483856404, + "grad_norm": 1.056613934504238, + "learning_rate": 9.986561118844554e-06, + "loss": 0.2787, + "step": 28150 + }, + { + "epoch": 0.36317442302856, + "grad_norm": 1.0932288540182626, + "learning_rate": 9.986506088860666e-06, + "loss": 0.2645, + "step": 28160 + }, + { + "epoch": 0.3633033912185559, + "grad_norm": 0.9908928310042554, + "learning_rate": 9.986450946589795e-06, + "loss": 0.2578, + "step": 28170 + }, + { + "epoch": 0.3634323594085519, + "grad_norm": 1.0538048528168316, + "learning_rate": 9.986395692033178e-06, + "loss": 0.2568, + "step": 28180 + }, + { + "epoch": 0.3635613275985478, + "grad_norm": 1.1353782338285767, + "learning_rate": 9.986340325192063e-06, + "loss": 0.2659, + "step": 28190 + }, + { + "epoch": 0.3636902957885438, + "grad_norm": 1.0004512765683684, + "learning_rate": 9.986284846067693e-06, + "loss": 0.2618, + "step": 28200 + }, + { + "epoch": 0.3638192639785397, + "grad_norm": 1.0515101451160227, + "learning_rate": 9.986229254661322e-06, + "loss": 0.2602, + "step": 28210 + }, + { + "epoch": 0.3639482321685356, + "grad_norm": 1.0494640415208893, + "learning_rate": 9.986173550974197e-06, + "loss": 0.2814, + "step": 28220 + }, + { + "epoch": 0.3640772003585316, + "grad_norm": 1.014494081749498, + "learning_rate": 9.986117735007575e-06, + "loss": 0.2716, + "step": 28230 + }, + { + "epoch": 0.3642061685485275, + "grad_norm": 1.3289111996621454, + "learning_rate": 9.986061806762713e-06, + "loss": 0.2624, + "step": 28240 + }, + { + "epoch": 0.36433513673852347, + "grad_norm": 1.1363225258002536, + "learning_rate": 9.986005766240869e-06, + "loss": 0.2583, + "step": 28250 + }, + { + "epoch": 0.3644641049285194, + "grad_norm": 1.0643818556512967, + "learning_rate": 9.985949613443308e-06, + "loss": 0.2621, + "step": 28260 + }, + { + "epoch": 0.3645930731185153, + "grad_norm": 1.2956852101932141, + "learning_rate": 9.98589334837129e-06, + "loss": 0.2737, + "step": 28270 + }, + { + "epoch": 0.36472204130851127, + "grad_norm": 1.0561453415042423, + "learning_rate": 9.985836971026086e-06, + "loss": 0.2678, + "step": 28280 + }, + { + "epoch": 0.3648510094985072, + "grad_norm": 1.15973840967292, + "learning_rate": 9.985780481408962e-06, + "loss": 0.2874, + "step": 28290 + }, + { + "epoch": 0.36497997768850315, + "grad_norm": 1.140822152955673, + "learning_rate": 9.985723879521192e-06, + "loss": 0.2536, + "step": 28300 + }, + { + "epoch": 0.36510894587849907, + "grad_norm": 1.1336263619833729, + "learning_rate": 9.98566716536405e-06, + "loss": 0.2713, + "step": 28310 + }, + { + "epoch": 0.365237914068495, + "grad_norm": 1.0755193901377562, + "learning_rate": 9.985610338938813e-06, + "loss": 0.2598, + "step": 28320 + }, + { + "epoch": 0.36536688225849095, + "grad_norm": 0.9734880314946623, + "learning_rate": 9.985553400246764e-06, + "loss": 0.2751, + "step": 28330 + }, + { + "epoch": 0.36549585044848687, + "grad_norm": 1.1642887400679816, + "learning_rate": 9.98549634928918e-06, + "loss": 0.2631, + "step": 28340 + }, + { + "epoch": 0.36562481863848284, + "grad_norm": 1.086377955462144, + "learning_rate": 9.985439186067347e-06, + "loss": 0.266, + "step": 28350 + }, + { + "epoch": 0.36575378682847876, + "grad_norm": 1.0232592645831566, + "learning_rate": 9.985381910582554e-06, + "loss": 0.2691, + "step": 28360 + }, + { + "epoch": 0.36588275501847467, + "grad_norm": 1.0519442912373733, + "learning_rate": 9.98532452283609e-06, + "loss": 0.2686, + "step": 28370 + }, + { + "epoch": 0.36601172320847064, + "grad_norm": 1.033660718981243, + "learning_rate": 9.985267022829248e-06, + "loss": 0.2656, + "step": 28380 + }, + { + "epoch": 0.36614069139846656, + "grad_norm": 1.0835234864058907, + "learning_rate": 9.98520941056332e-06, + "loss": 0.2613, + "step": 28390 + }, + { + "epoch": 0.3662696595884625, + "grad_norm": 1.176351934887179, + "learning_rate": 9.985151686039607e-06, + "loss": 0.2674, + "step": 28400 + }, + { + "epoch": 0.36639862777845844, + "grad_norm": 1.0360551706120025, + "learning_rate": 9.985093849259407e-06, + "loss": 0.2659, + "step": 28410 + }, + { + "epoch": 0.36652759596845436, + "grad_norm": 1.109234816906115, + "learning_rate": 9.98503590022402e-06, + "loss": 0.2742, + "step": 28420 + }, + { + "epoch": 0.3666565641584503, + "grad_norm": 1.113306264351864, + "learning_rate": 9.984977838934755e-06, + "loss": 0.2743, + "step": 28430 + }, + { + "epoch": 0.36678553234844624, + "grad_norm": 1.0842497592024085, + "learning_rate": 9.984919665392919e-06, + "loss": 0.26, + "step": 28440 + }, + { + "epoch": 0.3669145005384422, + "grad_norm": 1.142495388178321, + "learning_rate": 9.98486137959982e-06, + "loss": 0.2684, + "step": 28450 + }, + { + "epoch": 0.3670434687284381, + "grad_norm": 1.2034762227979596, + "learning_rate": 9.98480298155677e-06, + "loss": 0.2617, + "step": 28460 + }, + { + "epoch": 0.36717243691843404, + "grad_norm": 1.111412174383252, + "learning_rate": 9.984744471265086e-06, + "loss": 0.2646, + "step": 28470 + }, + { + "epoch": 0.36730140510843, + "grad_norm": 0.9825003355454277, + "learning_rate": 9.984685848726085e-06, + "loss": 0.2579, + "step": 28480 + }, + { + "epoch": 0.3674303732984259, + "grad_norm": 1.054341258238923, + "learning_rate": 9.984627113941088e-06, + "loss": 0.2591, + "step": 28490 + }, + { + "epoch": 0.3675593414884219, + "grad_norm": 1.0916109274259502, + "learning_rate": 9.984568266911413e-06, + "loss": 0.2695, + "step": 28500 + }, + { + "epoch": 0.3676883096784178, + "grad_norm": 0.9795504883127661, + "learning_rate": 9.984509307638392e-06, + "loss": 0.2533, + "step": 28510 + }, + { + "epoch": 0.3678172778684138, + "grad_norm": 0.9887294056330189, + "learning_rate": 9.984450236123349e-06, + "loss": 0.2771, + "step": 28520 + }, + { + "epoch": 0.3679462460584097, + "grad_norm": 1.0597173436025342, + "learning_rate": 9.984391052367613e-06, + "loss": 0.2659, + "step": 28530 + }, + { + "epoch": 0.3680752142484056, + "grad_norm": 1.1293078407464328, + "learning_rate": 9.984331756372519e-06, + "loss": 0.2773, + "step": 28540 + }, + { + "epoch": 0.3682041824384016, + "grad_norm": 1.1717756551769607, + "learning_rate": 9.984272348139402e-06, + "loss": 0.2664, + "step": 28550 + }, + { + "epoch": 0.3683331506283975, + "grad_norm": 1.0033899963600161, + "learning_rate": 9.984212827669597e-06, + "loss": 0.2546, + "step": 28560 + }, + { + "epoch": 0.36846211881839347, + "grad_norm": 1.0074955564911403, + "learning_rate": 9.984153194964448e-06, + "loss": 0.2654, + "step": 28570 + }, + { + "epoch": 0.3685910870083894, + "grad_norm": 1.026655384329462, + "learning_rate": 9.984093450025295e-06, + "loss": 0.2592, + "step": 28580 + }, + { + "epoch": 0.3687200551983853, + "grad_norm": 1.0953086348073566, + "learning_rate": 9.984033592853486e-06, + "loss": 0.2806, + "step": 28590 + }, + { + "epoch": 0.36884902338838127, + "grad_norm": 1.1237249839518455, + "learning_rate": 9.983973623450367e-06, + "loss": 0.2702, + "step": 28600 + }, + { + "epoch": 0.3689779915783772, + "grad_norm": 0.9663351908911127, + "learning_rate": 9.98391354181729e-06, + "loss": 0.2848, + "step": 28610 + }, + { + "epoch": 0.36910695976837316, + "grad_norm": 1.0567903865087525, + "learning_rate": 9.983853347955606e-06, + "loss": 0.2501, + "step": 28620 + }, + { + "epoch": 0.36923592795836907, + "grad_norm": 1.1327141854539835, + "learning_rate": 9.98379304186667e-06, + "loss": 0.2608, + "step": 28630 + }, + { + "epoch": 0.369364896148365, + "grad_norm": 1.1051886083881752, + "learning_rate": 9.983732623551842e-06, + "loss": 0.2648, + "step": 28640 + }, + { + "epoch": 0.36949386433836096, + "grad_norm": 1.0783584994441053, + "learning_rate": 9.983672093012482e-06, + "loss": 0.2608, + "step": 28650 + }, + { + "epoch": 0.36962283252835687, + "grad_norm": 1.0536290152213545, + "learning_rate": 9.983611450249952e-06, + "loss": 0.2583, + "step": 28660 + }, + { + "epoch": 0.36975180071835284, + "grad_norm": 1.1442419529934682, + "learning_rate": 9.983550695265621e-06, + "loss": 0.2607, + "step": 28670 + }, + { + "epoch": 0.36988076890834876, + "grad_norm": 1.1094895843531498, + "learning_rate": 9.983489828060853e-06, + "loss": 0.2586, + "step": 28680 + }, + { + "epoch": 0.37000973709834467, + "grad_norm": 1.0448775257270886, + "learning_rate": 9.98342884863702e-06, + "loss": 0.2551, + "step": 28690 + }, + { + "epoch": 0.37013870528834064, + "grad_norm": 1.0658327226751005, + "learning_rate": 9.983367756995493e-06, + "loss": 0.2858, + "step": 28700 + }, + { + "epoch": 0.37026767347833656, + "grad_norm": 1.0156598222401165, + "learning_rate": 9.983306553137652e-06, + "loss": 0.2684, + "step": 28710 + }, + { + "epoch": 0.3703966416683325, + "grad_norm": 1.1072304993812732, + "learning_rate": 9.983245237064873e-06, + "loss": 0.2712, + "step": 28720 + }, + { + "epoch": 0.37052560985832844, + "grad_norm": 1.1491748818906944, + "learning_rate": 9.983183808778537e-06, + "loss": 0.2627, + "step": 28730 + }, + { + "epoch": 0.37065457804832436, + "grad_norm": 1.0586549881543013, + "learning_rate": 9.983122268280027e-06, + "loss": 0.2574, + "step": 28740 + }, + { + "epoch": 0.3707835462383203, + "grad_norm": 1.1175348820287578, + "learning_rate": 9.983060615570728e-06, + "loss": 0.2662, + "step": 28750 + }, + { + "epoch": 0.37091251442831624, + "grad_norm": 1.0416997386594962, + "learning_rate": 9.98299885065203e-06, + "loss": 0.2612, + "step": 28760 + }, + { + "epoch": 0.3710414826183122, + "grad_norm": 1.0906036650609252, + "learning_rate": 9.982936973525323e-06, + "loss": 0.2798, + "step": 28770 + }, + { + "epoch": 0.3711704508083081, + "grad_norm": 1.1722894356656852, + "learning_rate": 9.982874984191999e-06, + "loss": 0.2586, + "step": 28780 + }, + { + "epoch": 0.37129941899830404, + "grad_norm": 1.162253310377609, + "learning_rate": 9.982812882653456e-06, + "loss": 0.2542, + "step": 28790 + }, + { + "epoch": 0.3714283871883, + "grad_norm": 1.0435231708477328, + "learning_rate": 9.982750668911094e-06, + "loss": 0.2596, + "step": 28800 + }, + { + "epoch": 0.37155735537829593, + "grad_norm": 1.1917886102422737, + "learning_rate": 9.98268834296631e-06, + "loss": 0.2688, + "step": 28810 + }, + { + "epoch": 0.3716863235682919, + "grad_norm": 1.2036849669688519, + "learning_rate": 9.982625904820506e-06, + "loss": 0.2762, + "step": 28820 + }, + { + "epoch": 0.3718152917582878, + "grad_norm": 1.1337113226624085, + "learning_rate": 9.982563354475095e-06, + "loss": 0.2685, + "step": 28830 + }, + { + "epoch": 0.37194425994828373, + "grad_norm": 1.1434624940662883, + "learning_rate": 9.982500691931479e-06, + "loss": 0.2566, + "step": 28840 + }, + { + "epoch": 0.3720732281382797, + "grad_norm": 1.0565802317957393, + "learning_rate": 9.982437917191074e-06, + "loss": 0.2603, + "step": 28850 + }, + { + "epoch": 0.3722021963282756, + "grad_norm": 1.0241393485952814, + "learning_rate": 9.98237503025529e-06, + "loss": 0.2809, + "step": 28860 + }, + { + "epoch": 0.3723311645182716, + "grad_norm": 1.0067586242599524, + "learning_rate": 9.982312031125542e-06, + "loss": 0.2623, + "step": 28870 + }, + { + "epoch": 0.3724601327082675, + "grad_norm": 1.0128917246733207, + "learning_rate": 9.982248919803254e-06, + "loss": 0.2514, + "step": 28880 + }, + { + "epoch": 0.37258910089826347, + "grad_norm": 0.9946862061264281, + "learning_rate": 9.982185696289844e-06, + "loss": 0.2668, + "step": 28890 + }, + { + "epoch": 0.3727180690882594, + "grad_norm": 1.2015721258263712, + "learning_rate": 9.982122360586734e-06, + "loss": 0.2541, + "step": 28900 + }, + { + "epoch": 0.3728470372782553, + "grad_norm": 1.1901753144947402, + "learning_rate": 9.982058912695354e-06, + "loss": 0.2805, + "step": 28910 + }, + { + "epoch": 0.37297600546825127, + "grad_norm": 1.0917118017588305, + "learning_rate": 9.981995352617129e-06, + "loss": 0.2628, + "step": 28920 + }, + { + "epoch": 0.3731049736582472, + "grad_norm": 0.992018115181419, + "learning_rate": 9.981931680353492e-06, + "loss": 0.2643, + "step": 28930 + }, + { + "epoch": 0.37323394184824316, + "grad_norm": 1.1095228342439294, + "learning_rate": 9.981867895905877e-06, + "loss": 0.2632, + "step": 28940 + }, + { + "epoch": 0.37336291003823907, + "grad_norm": 1.1358123927894974, + "learning_rate": 9.98180399927572e-06, + "loss": 0.2657, + "step": 28950 + }, + { + "epoch": 0.373491878228235, + "grad_norm": 1.0796113421495166, + "learning_rate": 9.981739990464461e-06, + "loss": 0.2657, + "step": 28960 + }, + { + "epoch": 0.37362084641823096, + "grad_norm": 1.050047053506503, + "learning_rate": 9.981675869473538e-06, + "loss": 0.2573, + "step": 28970 + }, + { + "epoch": 0.37374981460822687, + "grad_norm": 1.032196097802054, + "learning_rate": 9.981611636304399e-06, + "loss": 0.261, + "step": 28980 + }, + { + "epoch": 0.37387878279822284, + "grad_norm": 1.1515634764713232, + "learning_rate": 9.981547290958487e-06, + "loss": 0.2636, + "step": 28990 + }, + { + "epoch": 0.37400775098821876, + "grad_norm": 1.0669838645608913, + "learning_rate": 9.981482833437252e-06, + "loss": 0.2586, + "step": 29000 + }, + { + "epoch": 0.37413671917821467, + "grad_norm": 1.082684531235811, + "learning_rate": 9.981418263742148e-06, + "loss": 0.2731, + "step": 29010 + }, + { + "epoch": 0.37426568736821064, + "grad_norm": 1.0170445026560093, + "learning_rate": 9.981353581874627e-06, + "loss": 0.263, + "step": 29020 + }, + { + "epoch": 0.37439465555820656, + "grad_norm": 1.0996610532108784, + "learning_rate": 9.981288787836143e-06, + "loss": 0.2594, + "step": 29030 + }, + { + "epoch": 0.3745236237482025, + "grad_norm": 0.9741376292577005, + "learning_rate": 9.98122388162816e-06, + "loss": 0.265, + "step": 29040 + }, + { + "epoch": 0.37465259193819844, + "grad_norm": 1.0654354494509999, + "learning_rate": 9.981158863252136e-06, + "loss": 0.2476, + "step": 29050 + }, + { + "epoch": 0.37478156012819436, + "grad_norm": 0.9879561355749601, + "learning_rate": 9.981093732709535e-06, + "loss": 0.2505, + "step": 29060 + }, + { + "epoch": 0.37491052831819033, + "grad_norm": 1.0796601961346148, + "learning_rate": 9.981028490001826e-06, + "loss": 0.2543, + "step": 29070 + }, + { + "epoch": 0.37503949650818624, + "grad_norm": 0.9900632047944773, + "learning_rate": 9.980963135130477e-06, + "loss": 0.2651, + "step": 29080 + }, + { + "epoch": 0.3751684646981822, + "grad_norm": 1.1072018686089213, + "learning_rate": 9.980897668096959e-06, + "loss": 0.2711, + "step": 29090 + }, + { + "epoch": 0.37529743288817813, + "grad_norm": 1.0215183659675509, + "learning_rate": 9.980832088902747e-06, + "loss": 0.2646, + "step": 29100 + }, + { + "epoch": 0.37542640107817404, + "grad_norm": 1.0009373775142267, + "learning_rate": 9.980766397549318e-06, + "loss": 0.2564, + "step": 29110 + }, + { + "epoch": 0.37555536926817, + "grad_norm": 1.0710025527755203, + "learning_rate": 9.980700594038149e-06, + "loss": 0.2688, + "step": 29120 + }, + { + "epoch": 0.37568433745816593, + "grad_norm": 1.178349817457476, + "learning_rate": 9.980634678370726e-06, + "loss": 0.2626, + "step": 29130 + }, + { + "epoch": 0.3758133056481619, + "grad_norm": 1.055234703552256, + "learning_rate": 9.980568650548529e-06, + "loss": 0.2489, + "step": 29140 + }, + { + "epoch": 0.3759422738381578, + "grad_norm": 1.095125390702857, + "learning_rate": 9.980502510573047e-06, + "loss": 0.2803, + "step": 29150 + }, + { + "epoch": 0.37607124202815373, + "grad_norm": 0.9529351550211221, + "learning_rate": 9.98043625844577e-06, + "loss": 0.2655, + "step": 29160 + }, + { + "epoch": 0.3762002102181497, + "grad_norm": 1.1754064346252977, + "learning_rate": 9.980369894168186e-06, + "loss": 0.2703, + "step": 29170 + }, + { + "epoch": 0.3763291784081456, + "grad_norm": 1.017025584731662, + "learning_rate": 9.980303417741796e-06, + "loss": 0.2677, + "step": 29180 + }, + { + "epoch": 0.3764581465981416, + "grad_norm": 1.0556559166754422, + "learning_rate": 9.98023682916809e-06, + "loss": 0.2626, + "step": 29190 + }, + { + "epoch": 0.3765871147881375, + "grad_norm": 1.050583646886929, + "learning_rate": 9.980170128448575e-06, + "loss": 0.2712, + "step": 29200 + }, + { + "epoch": 0.37671608297813347, + "grad_norm": 0.9710155798328216, + "learning_rate": 9.980103315584743e-06, + "loss": 0.2585, + "step": 29210 + }, + { + "epoch": 0.3768450511681294, + "grad_norm": 1.0629335228913612, + "learning_rate": 9.980036390578108e-06, + "loss": 0.278, + "step": 29220 + }, + { + "epoch": 0.3769740193581253, + "grad_norm": 1.0309962586035766, + "learning_rate": 9.979969353430172e-06, + "loss": 0.2635, + "step": 29230 + }, + { + "epoch": 0.37710298754812127, + "grad_norm": 1.018468438870126, + "learning_rate": 9.979902204142446e-06, + "loss": 0.2582, + "step": 29240 + }, + { + "epoch": 0.3772319557381172, + "grad_norm": 1.088234395274512, + "learning_rate": 9.979834942716442e-06, + "loss": 0.2539, + "step": 29250 + }, + { + "epoch": 0.37736092392811316, + "grad_norm": 1.0315759946210796, + "learning_rate": 9.979767569153674e-06, + "loss": 0.2639, + "step": 29260 + }, + { + "epoch": 0.37748989211810907, + "grad_norm": 1.0437445064483653, + "learning_rate": 9.97970008345566e-06, + "loss": 0.2811, + "step": 29270 + }, + { + "epoch": 0.377618860308105, + "grad_norm": 1.0670841668374416, + "learning_rate": 9.979632485623919e-06, + "loss": 0.2532, + "step": 29280 + }, + { + "epoch": 0.37774782849810096, + "grad_norm": 1.0911307364233098, + "learning_rate": 9.97956477565997e-06, + "loss": 0.2707, + "step": 29290 + }, + { + "epoch": 0.37787679668809687, + "grad_norm": 1.021182348589321, + "learning_rate": 9.979496953565346e-06, + "loss": 0.2646, + "step": 29300 + }, + { + "epoch": 0.37800576487809284, + "grad_norm": 1.0243945012232902, + "learning_rate": 9.979429019341566e-06, + "loss": 0.2588, + "step": 29310 + }, + { + "epoch": 0.37813473306808876, + "grad_norm": 1.0345877065899347, + "learning_rate": 9.979360972990165e-06, + "loss": 0.2629, + "step": 29320 + }, + { + "epoch": 0.3782637012580847, + "grad_norm": 1.1142592068470436, + "learning_rate": 9.979292814512673e-06, + "loss": 0.2547, + "step": 29330 + }, + { + "epoch": 0.37839266944808064, + "grad_norm": 1.164141671684023, + "learning_rate": 9.979224543910625e-06, + "loss": 0.2672, + "step": 29340 + }, + { + "epoch": 0.37852163763807656, + "grad_norm": 1.0328279202182002, + "learning_rate": 9.979156161185558e-06, + "loss": 0.2606, + "step": 29350 + }, + { + "epoch": 0.37865060582807253, + "grad_norm": 1.1874114102878561, + "learning_rate": 9.979087666339012e-06, + "loss": 0.2588, + "step": 29360 + }, + { + "epoch": 0.37877957401806844, + "grad_norm": 1.0930143048407595, + "learning_rate": 9.97901905937253e-06, + "loss": 0.2783, + "step": 29370 + }, + { + "epoch": 0.37890854220806436, + "grad_norm": 0.9878521685272389, + "learning_rate": 9.978950340287657e-06, + "loss": 0.2667, + "step": 29380 + }, + { + "epoch": 0.37903751039806033, + "grad_norm": 1.0227712848581976, + "learning_rate": 9.97888150908594e-06, + "loss": 0.2727, + "step": 29390 + }, + { + "epoch": 0.37916647858805624, + "grad_norm": 1.045041815493671, + "learning_rate": 9.97881256576893e-06, + "loss": 0.2428, + "step": 29400 + }, + { + "epoch": 0.3792954467780522, + "grad_norm": 1.1045644965610009, + "learning_rate": 9.978743510338177e-06, + "loss": 0.2579, + "step": 29410 + }, + { + "epoch": 0.37942441496804813, + "grad_norm": 1.0003840041451535, + "learning_rate": 9.978674342795239e-06, + "loss": 0.2661, + "step": 29420 + }, + { + "epoch": 0.37955338315804404, + "grad_norm": 1.0436682785722213, + "learning_rate": 9.978605063141668e-06, + "loss": 0.2963, + "step": 29430 + }, + { + "epoch": 0.37968235134804, + "grad_norm": 1.0755390274082408, + "learning_rate": 9.978535671379033e-06, + "loss": 0.2624, + "step": 29440 + }, + { + "epoch": 0.37981131953803593, + "grad_norm": 1.047614573552154, + "learning_rate": 9.97846616750889e-06, + "loss": 0.2601, + "step": 29450 + }, + { + "epoch": 0.3799402877280319, + "grad_norm": 1.1350591256326505, + "learning_rate": 9.978396551532803e-06, + "loss": 0.2626, + "step": 29460 + }, + { + "epoch": 0.3800692559180278, + "grad_norm": 1.0212312769203655, + "learning_rate": 9.978326823452345e-06, + "loss": 0.2587, + "step": 29470 + }, + { + "epoch": 0.38019822410802373, + "grad_norm": 1.0883138126608798, + "learning_rate": 9.978256983269083e-06, + "loss": 0.2592, + "step": 29480 + }, + { + "epoch": 0.3803271922980197, + "grad_norm": 1.0064960354226824, + "learning_rate": 9.97818703098459e-06, + "loss": 0.2602, + "step": 29490 + }, + { + "epoch": 0.3804561604880156, + "grad_norm": 1.0659642105022913, + "learning_rate": 9.978116966600442e-06, + "loss": 0.2566, + "step": 29500 + }, + { + "epoch": 0.3805851286780116, + "grad_norm": 1.060720368775864, + "learning_rate": 9.978046790118214e-06, + "loss": 0.2695, + "step": 29510 + }, + { + "epoch": 0.3807140968680075, + "grad_norm": 1.0206256464010044, + "learning_rate": 9.97797650153949e-06, + "loss": 0.2588, + "step": 29520 + }, + { + "epoch": 0.3808430650580034, + "grad_norm": 1.1412393722200913, + "learning_rate": 9.97790610086585e-06, + "loss": 0.2585, + "step": 29530 + }, + { + "epoch": 0.3809720332479994, + "grad_norm": 1.0761153538234158, + "learning_rate": 9.977835588098884e-06, + "loss": 0.2716, + "step": 29540 + }, + { + "epoch": 0.3811010014379953, + "grad_norm": 0.9460031781370098, + "learning_rate": 9.977764963240172e-06, + "loss": 0.2681, + "step": 29550 + }, + { + "epoch": 0.38122996962799127, + "grad_norm": 0.99894783545229, + "learning_rate": 9.977694226291311e-06, + "loss": 0.2627, + "step": 29560 + }, + { + "epoch": 0.3813589378179872, + "grad_norm": 0.9884448115576078, + "learning_rate": 9.97762337725389e-06, + "loss": 0.2496, + "step": 29570 + }, + { + "epoch": 0.38148790600798316, + "grad_norm": 1.0102853544841086, + "learning_rate": 9.977552416129506e-06, + "loss": 0.2628, + "step": 29580 + }, + { + "epoch": 0.3816168741979791, + "grad_norm": 1.109218522347383, + "learning_rate": 9.977481342919756e-06, + "loss": 0.2625, + "step": 29590 + }, + { + "epoch": 0.381745842387975, + "grad_norm": 1.0883558536754547, + "learning_rate": 9.977410157626241e-06, + "loss": 0.2649, + "step": 29600 + }, + { + "epoch": 0.38187481057797096, + "grad_norm": 1.1191848388149181, + "learning_rate": 9.977338860250567e-06, + "loss": 0.2584, + "step": 29610 + }, + { + "epoch": 0.3820037787679669, + "grad_norm": 1.0853286674950755, + "learning_rate": 9.977267450794334e-06, + "loss": 0.2569, + "step": 29620 + }, + { + "epoch": 0.38213274695796284, + "grad_norm": 0.9985307973067001, + "learning_rate": 9.977195929259153e-06, + "loss": 0.2607, + "step": 29630 + }, + { + "epoch": 0.38226171514795876, + "grad_norm": 0.9993982182213839, + "learning_rate": 9.977124295646634e-06, + "loss": 0.2697, + "step": 29640 + }, + { + "epoch": 0.3823906833379547, + "grad_norm": 1.054824075146435, + "learning_rate": 9.977052549958391e-06, + "loss": 0.2551, + "step": 29650 + }, + { + "epoch": 0.38251965152795064, + "grad_norm": 1.1545743003653657, + "learning_rate": 9.97698069219604e-06, + "loss": 0.2689, + "step": 29660 + }, + { + "epoch": 0.38264861971794656, + "grad_norm": 1.0953925792913868, + "learning_rate": 9.976908722361196e-06, + "loss": 0.2714, + "step": 29670 + }, + { + "epoch": 0.38277758790794253, + "grad_norm": 1.0136906849912983, + "learning_rate": 9.97683664045548e-06, + "loss": 0.2532, + "step": 29680 + }, + { + "epoch": 0.38290655609793844, + "grad_norm": 1.096082167108456, + "learning_rate": 9.97676444648052e-06, + "loss": 0.2684, + "step": 29690 + }, + { + "epoch": 0.38303552428793436, + "grad_norm": 1.0306111341766893, + "learning_rate": 9.976692140437938e-06, + "loss": 0.2674, + "step": 29700 + }, + { + "epoch": 0.38316449247793033, + "grad_norm": 1.0625565897491742, + "learning_rate": 9.97661972232936e-06, + "loss": 0.2503, + "step": 29710 + }, + { + "epoch": 0.38329346066792624, + "grad_norm": 1.0369779990319687, + "learning_rate": 9.976547192156421e-06, + "loss": 0.2803, + "step": 29720 + }, + { + "epoch": 0.3834224288579222, + "grad_norm": 0.9616925385010496, + "learning_rate": 9.976474549920751e-06, + "loss": 0.2679, + "step": 29730 + }, + { + "epoch": 0.38355139704791813, + "grad_norm": 1.1690314353016675, + "learning_rate": 9.97640179562399e-06, + "loss": 0.2535, + "step": 29740 + }, + { + "epoch": 0.38368036523791405, + "grad_norm": 1.1450408521461826, + "learning_rate": 9.97632892926777e-06, + "loss": 0.2466, + "step": 29750 + }, + { + "epoch": 0.38380933342791, + "grad_norm": 1.1169180404743477, + "learning_rate": 9.97625595085374e-06, + "loss": 0.2603, + "step": 29760 + }, + { + "epoch": 0.38393830161790593, + "grad_norm": 1.1152374811297499, + "learning_rate": 9.976182860383535e-06, + "loss": 0.2527, + "step": 29770 + }, + { + "epoch": 0.3840672698079019, + "grad_norm": 1.052736438995604, + "learning_rate": 9.976109657858805e-06, + "loss": 0.2649, + "step": 29780 + }, + { + "epoch": 0.3841962379978978, + "grad_norm": 0.9992623121322249, + "learning_rate": 9.9760363432812e-06, + "loss": 0.2557, + "step": 29790 + }, + { + "epoch": 0.38432520618789373, + "grad_norm": 1.1082106059560761, + "learning_rate": 9.975962916652369e-06, + "loss": 0.2685, + "step": 29800 + }, + { + "epoch": 0.3844541743778897, + "grad_norm": 1.1035866724994443, + "learning_rate": 9.975889377973964e-06, + "loss": 0.2723, + "step": 29810 + }, + { + "epoch": 0.3845831425678856, + "grad_norm": 1.0094532138615175, + "learning_rate": 9.975815727247641e-06, + "loss": 0.2448, + "step": 29820 + }, + { + "epoch": 0.3847121107578816, + "grad_norm": 0.9784196375845463, + "learning_rate": 9.97574196447506e-06, + "loss": 0.2731, + "step": 29830 + }, + { + "epoch": 0.3848410789478775, + "grad_norm": 0.9376014751850893, + "learning_rate": 9.975668089657884e-06, + "loss": 0.2646, + "step": 29840 + }, + { + "epoch": 0.3849700471378734, + "grad_norm": 1.0743977211186078, + "learning_rate": 9.975594102797774e-06, + "loss": 0.2607, + "step": 29850 + }, + { + "epoch": 0.3850990153278694, + "grad_norm": 0.9833734939174894, + "learning_rate": 9.975520003896395e-06, + "loss": 0.2621, + "step": 29860 + }, + { + "epoch": 0.3852279835178653, + "grad_norm": 1.0272818147566163, + "learning_rate": 9.975445792955417e-06, + "loss": 0.244, + "step": 29870 + }, + { + "epoch": 0.3853569517078613, + "grad_norm": 1.079573243826737, + "learning_rate": 9.975371469976512e-06, + "loss": 0.2735, + "step": 29880 + }, + { + "epoch": 0.3854859198978572, + "grad_norm": 1.0133511513227416, + "learning_rate": 9.97529703496135e-06, + "loss": 0.2703, + "step": 29890 + }, + { + "epoch": 0.38561488808785316, + "grad_norm": 1.0922737456784484, + "learning_rate": 9.975222487911614e-06, + "loss": 0.2588, + "step": 29900 + }, + { + "epoch": 0.3857438562778491, + "grad_norm": 1.0781873927225816, + "learning_rate": 9.975147828828975e-06, + "loss": 0.2697, + "step": 29910 + }, + { + "epoch": 0.385872824467845, + "grad_norm": 1.101113574041866, + "learning_rate": 9.975073057715119e-06, + "loss": 0.2721, + "step": 29920 + }, + { + "epoch": 0.38600179265784096, + "grad_norm": 1.1622975484647955, + "learning_rate": 9.974998174571728e-06, + "loss": 0.2653, + "step": 29930 + }, + { + "epoch": 0.3861307608478369, + "grad_norm": 0.9990454444921801, + "learning_rate": 9.974923179400489e-06, + "loss": 0.2576, + "step": 29940 + }, + { + "epoch": 0.38625972903783284, + "grad_norm": 1.0423116379938955, + "learning_rate": 9.97484807220309e-06, + "loss": 0.263, + "step": 29950 + }, + { + "epoch": 0.38638869722782876, + "grad_norm": 1.063311636434383, + "learning_rate": 9.974772852981222e-06, + "loss": 0.2504, + "step": 29960 + }, + { + "epoch": 0.3865176654178247, + "grad_norm": 1.1954349024893411, + "learning_rate": 9.974697521736581e-06, + "loss": 0.2739, + "step": 29970 + }, + { + "epoch": 0.38664663360782064, + "grad_norm": 1.0153344094952703, + "learning_rate": 9.974622078470861e-06, + "loss": 0.2667, + "step": 29980 + }, + { + "epoch": 0.38677560179781656, + "grad_norm": 1.1693255219264198, + "learning_rate": 9.974546523185761e-06, + "loss": 0.2652, + "step": 29990 + }, + { + "epoch": 0.38690456998781253, + "grad_norm": 0.9601085176347178, + "learning_rate": 9.974470855882983e-06, + "loss": 0.2615, + "step": 30000 + }, + { + "epoch": 0.38703353817780844, + "grad_norm": 1.039875300444438, + "learning_rate": 9.974395076564232e-06, + "loss": 0.2721, + "step": 30010 + }, + { + "epoch": 0.38716250636780436, + "grad_norm": 1.0147612576700156, + "learning_rate": 9.974319185231212e-06, + "loss": 0.2515, + "step": 30020 + }, + { + "epoch": 0.38729147455780033, + "grad_norm": 1.0306084319149755, + "learning_rate": 9.974243181885632e-06, + "loss": 0.2581, + "step": 30030 + }, + { + "epoch": 0.38742044274779625, + "grad_norm": 1.028075654584565, + "learning_rate": 9.974167066529205e-06, + "loss": 0.2561, + "step": 30040 + }, + { + "epoch": 0.3875494109377922, + "grad_norm": 1.0086249981379831, + "learning_rate": 9.974090839163648e-06, + "loss": 0.253, + "step": 30050 + }, + { + "epoch": 0.38767837912778813, + "grad_norm": 0.9700864836141215, + "learning_rate": 9.97401449979067e-06, + "loss": 0.2559, + "step": 30060 + }, + { + "epoch": 0.38780734731778405, + "grad_norm": 1.0464684295618776, + "learning_rate": 9.973938048411997e-06, + "loss": 0.2574, + "step": 30070 + }, + { + "epoch": 0.38793631550778, + "grad_norm": 1.0403009171627062, + "learning_rate": 9.973861485029346e-06, + "loss": 0.2548, + "step": 30080 + }, + { + "epoch": 0.38806528369777593, + "grad_norm": 1.0992804202567923, + "learning_rate": 9.973784809644443e-06, + "loss": 0.2535, + "step": 30090 + }, + { + "epoch": 0.3881942518877719, + "grad_norm": 0.9850008410477408, + "learning_rate": 9.973708022259014e-06, + "loss": 0.2579, + "step": 30100 + }, + { + "epoch": 0.3883232200777678, + "grad_norm": 1.07548737503066, + "learning_rate": 9.973631122874789e-06, + "loss": 0.256, + "step": 30110 + }, + { + "epoch": 0.38845218826776373, + "grad_norm": 1.1458546170836281, + "learning_rate": 9.9735541114935e-06, + "loss": 0.2522, + "step": 30120 + }, + { + "epoch": 0.3885811564577597, + "grad_norm": 0.9689941215474571, + "learning_rate": 9.973476988116877e-06, + "loss": 0.2785, + "step": 30130 + }, + { + "epoch": 0.3887101246477556, + "grad_norm": 1.0275532353724262, + "learning_rate": 9.973399752746663e-06, + "loss": 0.2513, + "step": 30140 + }, + { + "epoch": 0.3888390928377516, + "grad_norm": 1.0426317177624658, + "learning_rate": 9.973322405384593e-06, + "loss": 0.2777, + "step": 30150 + }, + { + "epoch": 0.3889680610277475, + "grad_norm": 0.924225817647003, + "learning_rate": 9.97324494603241e-06, + "loss": 0.2548, + "step": 30160 + }, + { + "epoch": 0.3890970292177434, + "grad_norm": 1.1080545996607607, + "learning_rate": 9.973167374691857e-06, + "loss": 0.255, + "step": 30170 + }, + { + "epoch": 0.3892259974077394, + "grad_norm": 0.9968248627894808, + "learning_rate": 9.973089691364684e-06, + "loss": 0.2619, + "step": 30180 + }, + { + "epoch": 0.3893549655977353, + "grad_norm": 0.9936858724053794, + "learning_rate": 9.973011896052635e-06, + "loss": 0.2563, + "step": 30190 + }, + { + "epoch": 0.3894839337877313, + "grad_norm": 1.0345918651128805, + "learning_rate": 9.972933988757468e-06, + "loss": 0.2742, + "step": 30200 + }, + { + "epoch": 0.3896129019777272, + "grad_norm": 1.0740958083755088, + "learning_rate": 9.972855969480932e-06, + "loss": 0.2728, + "step": 30210 + }, + { + "epoch": 0.3897418701677231, + "grad_norm": 1.0142915741938057, + "learning_rate": 9.972777838224788e-06, + "loss": 0.2546, + "step": 30220 + }, + { + "epoch": 0.3898708383577191, + "grad_norm": 0.8948786089300921, + "learning_rate": 9.972699594990793e-06, + "loss": 0.2504, + "step": 30230 + }, + { + "epoch": 0.389999806547715, + "grad_norm": 1.1645930504480377, + "learning_rate": 9.972621239780708e-06, + "loss": 0.2554, + "step": 30240 + }, + { + "epoch": 0.39012877473771096, + "grad_norm": 0.8751144445565747, + "learning_rate": 9.972542772596301e-06, + "loss": 0.2568, + "step": 30250 + }, + { + "epoch": 0.3902577429277069, + "grad_norm": 1.0164877296446826, + "learning_rate": 9.972464193439335e-06, + "loss": 0.2677, + "step": 30260 + }, + { + "epoch": 0.39038671111770284, + "grad_norm": 1.0276915916870681, + "learning_rate": 9.972385502311582e-06, + "loss": 0.2682, + "step": 30270 + }, + { + "epoch": 0.39051567930769876, + "grad_norm": 1.1146305141705533, + "learning_rate": 9.972306699214813e-06, + "loss": 0.2639, + "step": 30280 + }, + { + "epoch": 0.3906446474976947, + "grad_norm": 1.105148880237084, + "learning_rate": 9.972227784150802e-06, + "loss": 0.2608, + "step": 30290 + }, + { + "epoch": 0.39077361568769065, + "grad_norm": 1.0250479661788128, + "learning_rate": 9.972148757121327e-06, + "loss": 0.267, + "step": 30300 + }, + { + "epoch": 0.39090258387768656, + "grad_norm": 1.0569054591151836, + "learning_rate": 9.972069618128166e-06, + "loss": 0.2522, + "step": 30310 + }, + { + "epoch": 0.39103155206768253, + "grad_norm": 0.9536695553862793, + "learning_rate": 9.971990367173104e-06, + "loss": 0.2646, + "step": 30320 + }, + { + "epoch": 0.39116052025767845, + "grad_norm": 1.1021695206130875, + "learning_rate": 9.971911004257923e-06, + "loss": 0.2737, + "step": 30330 + }, + { + "epoch": 0.39128948844767436, + "grad_norm": 1.0874917269569921, + "learning_rate": 9.971831529384413e-06, + "loss": 0.2732, + "step": 30340 + }, + { + "epoch": 0.39141845663767033, + "grad_norm": 1.1138562060740862, + "learning_rate": 9.97175194255436e-06, + "loss": 0.2737, + "step": 30350 + }, + { + "epoch": 0.39154742482766625, + "grad_norm": 1.0597786693841504, + "learning_rate": 9.971672243769557e-06, + "loss": 0.2657, + "step": 30360 + }, + { + "epoch": 0.3916763930176622, + "grad_norm": 1.0448898834588092, + "learning_rate": 9.9715924330318e-06, + "loss": 0.2725, + "step": 30370 + }, + { + "epoch": 0.39180536120765813, + "grad_norm": 1.0270367131438682, + "learning_rate": 9.971512510342886e-06, + "loss": 0.253, + "step": 30380 + }, + { + "epoch": 0.39193432939765405, + "grad_norm": 1.0800555858815204, + "learning_rate": 9.971432475704615e-06, + "loss": 0.2694, + "step": 30390 + }, + { + "epoch": 0.39206329758765, + "grad_norm": 1.12880142749909, + "learning_rate": 9.971352329118786e-06, + "loss": 0.2708, + "step": 30400 + }, + { + "epoch": 0.39219226577764593, + "grad_norm": 1.0222982666191558, + "learning_rate": 9.971272070587208e-06, + "loss": 0.2614, + "step": 30410 + }, + { + "epoch": 0.3923212339676419, + "grad_norm": 1.0818185083410128, + "learning_rate": 9.971191700111687e-06, + "loss": 0.2416, + "step": 30420 + }, + { + "epoch": 0.3924502021576378, + "grad_norm": 1.1761456104819827, + "learning_rate": 9.971111217694032e-06, + "loss": 0.2642, + "step": 30430 + }, + { + "epoch": 0.39257917034763373, + "grad_norm": 1.0954387889714436, + "learning_rate": 9.971030623336056e-06, + "loss": 0.2681, + "step": 30440 + }, + { + "epoch": 0.3927081385376297, + "grad_norm": 0.9511535699046259, + "learning_rate": 9.970949917039574e-06, + "loss": 0.2524, + "step": 30450 + }, + { + "epoch": 0.3928371067276256, + "grad_norm": 0.954798070467782, + "learning_rate": 9.970869098806402e-06, + "loss": 0.2741, + "step": 30460 + }, + { + "epoch": 0.3929660749176216, + "grad_norm": 1.0424660470882774, + "learning_rate": 9.97078816863836e-06, + "loss": 0.2551, + "step": 30470 + }, + { + "epoch": 0.3930950431076175, + "grad_norm": 1.0813221107835091, + "learning_rate": 9.970707126537271e-06, + "loss": 0.2713, + "step": 30480 + }, + { + "epoch": 0.3932240112976134, + "grad_norm": 1.0736736580630182, + "learning_rate": 9.970625972504962e-06, + "loss": 0.254, + "step": 30490 + }, + { + "epoch": 0.3933529794876094, + "grad_norm": 0.9885421732159474, + "learning_rate": 9.970544706543257e-06, + "loss": 0.2411, + "step": 30500 + }, + { + "epoch": 0.3934819476776053, + "grad_norm": 1.0880471868559052, + "learning_rate": 9.970463328653989e-06, + "loss": 0.2671, + "step": 30510 + }, + { + "epoch": 0.3936109158676013, + "grad_norm": 1.1234373396852833, + "learning_rate": 9.970381838838988e-06, + "loss": 0.26, + "step": 30520 + }, + { + "epoch": 0.3937398840575972, + "grad_norm": 1.0177298094349427, + "learning_rate": 9.97030023710009e-06, + "loss": 0.2534, + "step": 30530 + }, + { + "epoch": 0.3938688522475931, + "grad_norm": 1.077028409047899, + "learning_rate": 9.970218523439132e-06, + "loss": 0.2737, + "step": 30540 + }, + { + "epoch": 0.3939978204375891, + "grad_norm": 1.1103666704889288, + "learning_rate": 9.970136697857956e-06, + "loss": 0.258, + "step": 30550 + }, + { + "epoch": 0.394126788627585, + "grad_norm": 1.0180294821350422, + "learning_rate": 9.970054760358402e-06, + "loss": 0.2563, + "step": 30560 + }, + { + "epoch": 0.39425575681758096, + "grad_norm": 1.045828475824024, + "learning_rate": 9.969972710942318e-06, + "loss": 0.2479, + "step": 30570 + }, + { + "epoch": 0.3943847250075769, + "grad_norm": 1.0520369027165304, + "learning_rate": 9.969890549611548e-06, + "loss": 0.2667, + "step": 30580 + }, + { + "epoch": 0.39451369319757285, + "grad_norm": 1.0318962608848046, + "learning_rate": 9.969808276367942e-06, + "loss": 0.2604, + "step": 30590 + }, + { + "epoch": 0.39464266138756876, + "grad_norm": 1.0897223961352398, + "learning_rate": 9.969725891213358e-06, + "loss": 0.2577, + "step": 30600 + }, + { + "epoch": 0.3947716295775647, + "grad_norm": 1.0515555926574391, + "learning_rate": 9.969643394149646e-06, + "loss": 0.2574, + "step": 30610 + }, + { + "epoch": 0.39490059776756065, + "grad_norm": 1.0298055380647633, + "learning_rate": 9.969560785178667e-06, + "loss": 0.2615, + "step": 30620 + }, + { + "epoch": 0.39502956595755656, + "grad_norm": 1.107935540747185, + "learning_rate": 9.969478064302277e-06, + "loss": 0.264, + "step": 30630 + }, + { + "epoch": 0.39515853414755253, + "grad_norm": 1.0078573077308142, + "learning_rate": 9.969395231522344e-06, + "loss": 0.2595, + "step": 30640 + }, + { + "epoch": 0.39528750233754845, + "grad_norm": 1.076789155539305, + "learning_rate": 9.969312286840729e-06, + "loss": 0.2669, + "step": 30650 + }, + { + "epoch": 0.39541647052754436, + "grad_norm": 1.0395661264138962, + "learning_rate": 9.969229230259302e-06, + "loss": 0.2733, + "step": 30660 + }, + { + "epoch": 0.39554543871754033, + "grad_norm": 1.0417693173234166, + "learning_rate": 9.969146061779934e-06, + "loss": 0.2638, + "step": 30670 + }, + { + "epoch": 0.39567440690753625, + "grad_norm": 1.065507696045954, + "learning_rate": 9.969062781404493e-06, + "loss": 0.274, + "step": 30680 + }, + { + "epoch": 0.3958033750975322, + "grad_norm": 1.1173108704552157, + "learning_rate": 9.968979389134859e-06, + "loss": 0.2488, + "step": 30690 + }, + { + "epoch": 0.39593234328752813, + "grad_norm": 1.036027831232659, + "learning_rate": 9.96889588497291e-06, + "loss": 0.2705, + "step": 30700 + }, + { + "epoch": 0.39606131147752405, + "grad_norm": 0.98628790267994, + "learning_rate": 9.968812268920524e-06, + "loss": 0.2642, + "step": 30710 + }, + { + "epoch": 0.39619027966752, + "grad_norm": 0.9556626447703208, + "learning_rate": 9.968728540979584e-06, + "loss": 0.2431, + "step": 30720 + }, + { + "epoch": 0.39631924785751593, + "grad_norm": 1.0776636895089275, + "learning_rate": 9.968644701151976e-06, + "loss": 0.2596, + "step": 30730 + }, + { + "epoch": 0.3964482160475119, + "grad_norm": 0.9336588197358138, + "learning_rate": 9.96856074943959e-06, + "loss": 0.2685, + "step": 30740 + }, + { + "epoch": 0.3965771842375078, + "grad_norm": 1.0293464866117885, + "learning_rate": 9.968476685844314e-06, + "loss": 0.2675, + "step": 30750 + }, + { + "epoch": 0.39670615242750373, + "grad_norm": 1.0527813693853263, + "learning_rate": 9.968392510368043e-06, + "loss": 0.2767, + "step": 30760 + }, + { + "epoch": 0.3968351206174997, + "grad_norm": 1.0852814963163715, + "learning_rate": 9.968308223012668e-06, + "loss": 0.2689, + "step": 30770 + }, + { + "epoch": 0.3969640888074956, + "grad_norm": 1.0615259638358225, + "learning_rate": 9.968223823780092e-06, + "loss": 0.2656, + "step": 30780 + }, + { + "epoch": 0.3970930569974916, + "grad_norm": 1.0287408567601932, + "learning_rate": 9.968139312672213e-06, + "loss": 0.2655, + "step": 30790 + }, + { + "epoch": 0.3972220251874875, + "grad_norm": 1.1325060570825192, + "learning_rate": 9.968054689690934e-06, + "loss": 0.2575, + "step": 30800 + }, + { + "epoch": 0.3973509933774834, + "grad_norm": 1.0253156942746864, + "learning_rate": 9.967969954838161e-06, + "loss": 0.2596, + "step": 30810 + }, + { + "epoch": 0.3974799615674794, + "grad_norm": 0.9847348758886942, + "learning_rate": 9.967885108115803e-06, + "loss": 0.2546, + "step": 30820 + }, + { + "epoch": 0.3976089297574753, + "grad_norm": 0.9974060696938025, + "learning_rate": 9.96780014952577e-06, + "loss": 0.2408, + "step": 30830 + }, + { + "epoch": 0.3977378979474713, + "grad_norm": 0.9637855639539669, + "learning_rate": 9.967715079069976e-06, + "loss": 0.2603, + "step": 30840 + }, + { + "epoch": 0.3978668661374672, + "grad_norm": 1.1109038056838352, + "learning_rate": 9.967629896750332e-06, + "loss": 0.2546, + "step": 30850 + }, + { + "epoch": 0.3979958343274631, + "grad_norm": 1.0101134375042604, + "learning_rate": 9.967544602568762e-06, + "loss": 0.2649, + "step": 30860 + }, + { + "epoch": 0.3981248025174591, + "grad_norm": 1.1100888956667823, + "learning_rate": 9.967459196527185e-06, + "loss": 0.2586, + "step": 30870 + }, + { + "epoch": 0.398253770707455, + "grad_norm": 1.0968619600172689, + "learning_rate": 9.96737367862752e-06, + "loss": 0.2591, + "step": 30880 + }, + { + "epoch": 0.39838273889745096, + "grad_norm": 1.137693056445304, + "learning_rate": 9.967288048871701e-06, + "loss": 0.2609, + "step": 30890 + }, + { + "epoch": 0.3985117070874469, + "grad_norm": 1.0353003176537354, + "learning_rate": 9.96720230726165e-06, + "loss": 0.2572, + "step": 30900 + }, + { + "epoch": 0.39864067527744285, + "grad_norm": 1.0155139705392073, + "learning_rate": 9.967116453799298e-06, + "loss": 0.26, + "step": 30910 + }, + { + "epoch": 0.39876964346743876, + "grad_norm": 1.0416438208550958, + "learning_rate": 9.967030488486579e-06, + "loss": 0.2595, + "step": 30920 + }, + { + "epoch": 0.3988986116574347, + "grad_norm": 1.1548451194773084, + "learning_rate": 9.96694441132543e-06, + "loss": 0.2647, + "step": 30930 + }, + { + "epoch": 0.39902757984743065, + "grad_norm": 0.985047148101906, + "learning_rate": 9.96685822231779e-06, + "loss": 0.2648, + "step": 30940 + }, + { + "epoch": 0.39915654803742656, + "grad_norm": 1.041091565504057, + "learning_rate": 9.966771921465596e-06, + "loss": 0.2716, + "step": 30950 + }, + { + "epoch": 0.39928551622742253, + "grad_norm": 1.0479448109408291, + "learning_rate": 9.966685508770795e-06, + "loss": 0.2728, + "step": 30960 + }, + { + "epoch": 0.39941448441741845, + "grad_norm": 1.0923264960805912, + "learning_rate": 9.966598984235332e-06, + "loss": 0.2591, + "step": 30970 + }, + { + "epoch": 0.39954345260741436, + "grad_norm": 1.006484673200835, + "learning_rate": 9.966512347861154e-06, + "loss": 0.2706, + "step": 30980 + }, + { + "epoch": 0.39967242079741033, + "grad_norm": 1.0352725231693545, + "learning_rate": 9.966425599650214e-06, + "loss": 0.2545, + "step": 30990 + }, + { + "epoch": 0.39980138898740625, + "grad_norm": 1.0696255345568195, + "learning_rate": 9.966338739604462e-06, + "loss": 0.2578, + "step": 31000 + }, + { + "epoch": 0.3999303571774022, + "grad_norm": 1.0380944186167038, + "learning_rate": 9.966251767725858e-06, + "loss": 0.2573, + "step": 31010 + }, + { + "epoch": 0.40005932536739813, + "grad_norm": 1.0455170081056844, + "learning_rate": 9.966164684016357e-06, + "loss": 0.2687, + "step": 31020 + }, + { + "epoch": 0.40018829355739405, + "grad_norm": 0.9935839474307238, + "learning_rate": 9.966077488477924e-06, + "loss": 0.2557, + "step": 31030 + }, + { + "epoch": 0.40031726174739, + "grad_norm": 0.9776774946649284, + "learning_rate": 9.965990181112519e-06, + "loss": 0.2509, + "step": 31040 + }, + { + "epoch": 0.40044622993738593, + "grad_norm": 0.9582734465972417, + "learning_rate": 9.965902761922109e-06, + "loss": 0.2614, + "step": 31050 + }, + { + "epoch": 0.4005751981273819, + "grad_norm": 1.1298108858300928, + "learning_rate": 9.965815230908662e-06, + "loss": 0.2573, + "step": 31060 + }, + { + "epoch": 0.4007041663173778, + "grad_norm": 0.9930210172245599, + "learning_rate": 9.96572758807415e-06, + "loss": 0.2597, + "step": 31070 + }, + { + "epoch": 0.40083313450737373, + "grad_norm": 1.062962802053586, + "learning_rate": 9.965639833420547e-06, + "loss": 0.2615, + "step": 31080 + }, + { + "epoch": 0.4009621026973697, + "grad_norm": 1.1455452867139289, + "learning_rate": 9.965551966949827e-06, + "loss": 0.2666, + "step": 31090 + }, + { + "epoch": 0.4010910708873656, + "grad_norm": 0.9311025593682992, + "learning_rate": 9.96546398866397e-06, + "loss": 0.269, + "step": 31100 + }, + { + "epoch": 0.4012200390773616, + "grad_norm": 1.0082856012002832, + "learning_rate": 9.965375898564957e-06, + "loss": 0.271, + "step": 31110 + }, + { + "epoch": 0.4013490072673575, + "grad_norm": 1.0309414289595613, + "learning_rate": 9.965287696654772e-06, + "loss": 0.2768, + "step": 31120 + }, + { + "epoch": 0.4014779754573534, + "grad_norm": 0.9353058182867384, + "learning_rate": 9.965199382935403e-06, + "loss": 0.25, + "step": 31130 + }, + { + "epoch": 0.4016069436473494, + "grad_norm": 1.0374078480969953, + "learning_rate": 9.965110957408833e-06, + "loss": 0.2538, + "step": 31140 + }, + { + "epoch": 0.4017359118373453, + "grad_norm": 0.9404575281755976, + "learning_rate": 9.965022420077058e-06, + "loss": 0.2723, + "step": 31150 + }, + { + "epoch": 0.4018648800273413, + "grad_norm": 1.123622281296022, + "learning_rate": 9.964933770942069e-06, + "loss": 0.2498, + "step": 31160 + }, + { + "epoch": 0.4019938482173372, + "grad_norm": 1.071675002237236, + "learning_rate": 9.964845010005864e-06, + "loss": 0.2789, + "step": 31170 + }, + { + "epoch": 0.4021228164073331, + "grad_norm": 0.9835898550673275, + "learning_rate": 9.964756137270442e-06, + "loss": 0.2576, + "step": 31180 + }, + { + "epoch": 0.4022517845973291, + "grad_norm": 1.0295620963618888, + "learning_rate": 9.964667152737804e-06, + "loss": 0.2709, + "step": 31190 + }, + { + "epoch": 0.402380752787325, + "grad_norm": 1.0397598760051796, + "learning_rate": 9.964578056409952e-06, + "loss": 0.2603, + "step": 31200 + }, + { + "epoch": 0.40250972097732096, + "grad_norm": 1.1014150290648876, + "learning_rate": 9.964488848288894e-06, + "loss": 0.2639, + "step": 31210 + }, + { + "epoch": 0.4026386891673169, + "grad_norm": 1.119399486178015, + "learning_rate": 9.964399528376638e-06, + "loss": 0.251, + "step": 31220 + }, + { + "epoch": 0.4027676573573128, + "grad_norm": 1.06142959731974, + "learning_rate": 9.964310096675195e-06, + "loss": 0.2631, + "step": 31230 + }, + { + "epoch": 0.40289662554730876, + "grad_norm": 1.0610118017358465, + "learning_rate": 9.96422055318658e-06, + "loss": 0.2642, + "step": 31240 + }, + { + "epoch": 0.4030255937373047, + "grad_norm": 0.9331334053210637, + "learning_rate": 9.96413089791281e-06, + "loss": 0.2471, + "step": 31250 + }, + { + "epoch": 0.40315456192730065, + "grad_norm": 1.1405486869829888, + "learning_rate": 9.964041130855903e-06, + "loss": 0.2527, + "step": 31260 + }, + { + "epoch": 0.40328353011729656, + "grad_norm": 1.0229913808920317, + "learning_rate": 9.963951252017878e-06, + "loss": 0.2705, + "step": 31270 + }, + { + "epoch": 0.40341249830729253, + "grad_norm": 0.9224906565902371, + "learning_rate": 9.963861261400762e-06, + "loss": 0.2579, + "step": 31280 + }, + { + "epoch": 0.40354146649728845, + "grad_norm": 1.1045367007493894, + "learning_rate": 9.963771159006582e-06, + "loss": 0.2555, + "step": 31290 + }, + { + "epoch": 0.40367043468728436, + "grad_norm": 1.0990013929219582, + "learning_rate": 9.963680944837364e-06, + "loss": 0.2719, + "step": 31300 + }, + { + "epoch": 0.40379940287728033, + "grad_norm": 0.9602938304217662, + "learning_rate": 9.96359061889514e-06, + "loss": 0.2608, + "step": 31310 + }, + { + "epoch": 0.40392837106727625, + "grad_norm": 1.0533169587509479, + "learning_rate": 9.963500181181947e-06, + "loss": 0.27, + "step": 31320 + }, + { + "epoch": 0.4040573392572722, + "grad_norm": 1.0652715135244308, + "learning_rate": 9.963409631699816e-06, + "loss": 0.262, + "step": 31330 + }, + { + "epoch": 0.40418630744726813, + "grad_norm": 1.0862968342159143, + "learning_rate": 9.963318970450793e-06, + "loss": 0.2573, + "step": 31340 + }, + { + "epoch": 0.40431527563726405, + "grad_norm": 1.167186769625873, + "learning_rate": 9.963228197436913e-06, + "loss": 0.2623, + "step": 31350 + }, + { + "epoch": 0.40444424382726, + "grad_norm": 1.0712730016155894, + "learning_rate": 9.963137312660224e-06, + "loss": 0.2623, + "step": 31360 + }, + { + "epoch": 0.40457321201725593, + "grad_norm": 1.0183361804347353, + "learning_rate": 9.963046316122772e-06, + "loss": 0.2565, + "step": 31370 + }, + { + "epoch": 0.4047021802072519, + "grad_norm": 1.0501433615635212, + "learning_rate": 9.962955207826604e-06, + "loss": 0.2581, + "step": 31380 + }, + { + "epoch": 0.4048311483972478, + "grad_norm": 1.0343745403216877, + "learning_rate": 9.962863987773775e-06, + "loss": 0.2622, + "step": 31390 + }, + { + "epoch": 0.40496011658724373, + "grad_norm": 1.0924966498329063, + "learning_rate": 9.962772655966337e-06, + "loss": 0.2545, + "step": 31400 + }, + { + "epoch": 0.4050890847772397, + "grad_norm": 1.2880608210017916, + "learning_rate": 9.962681212406346e-06, + "loss": 0.2571, + "step": 31410 + }, + { + "epoch": 0.4052180529672356, + "grad_norm": 1.0185517717567945, + "learning_rate": 9.962589657095861e-06, + "loss": 0.2565, + "step": 31420 + }, + { + "epoch": 0.4053470211572316, + "grad_norm": 1.0210848072428789, + "learning_rate": 9.962497990036945e-06, + "loss": 0.261, + "step": 31430 + }, + { + "epoch": 0.4054759893472275, + "grad_norm": 1.059367095197627, + "learning_rate": 9.962406211231663e-06, + "loss": 0.26, + "step": 31440 + }, + { + "epoch": 0.4056049575372234, + "grad_norm": 1.0544710824946892, + "learning_rate": 9.96231432068208e-06, + "loss": 0.2602, + "step": 31450 + }, + { + "epoch": 0.4057339257272194, + "grad_norm": 1.0323972016932224, + "learning_rate": 9.962222318390267e-06, + "loss": 0.2597, + "step": 31460 + }, + { + "epoch": 0.4058628939172153, + "grad_norm": 1.019754582289751, + "learning_rate": 9.962130204358294e-06, + "loss": 0.2565, + "step": 31470 + }, + { + "epoch": 0.4059918621072113, + "grad_norm": 1.043080594432345, + "learning_rate": 9.962037978588233e-06, + "loss": 0.249, + "step": 31480 + }, + { + "epoch": 0.4061208302972072, + "grad_norm": 1.0325298955666835, + "learning_rate": 9.961945641082167e-06, + "loss": 0.2392, + "step": 31490 + }, + { + "epoch": 0.4062497984872031, + "grad_norm": 0.9898558346775008, + "learning_rate": 9.96185319184217e-06, + "loss": 0.2706, + "step": 31500 + }, + { + "epoch": 0.4063787666771991, + "grad_norm": 1.0861404451422, + "learning_rate": 9.961760630870324e-06, + "loss": 0.25, + "step": 31510 + }, + { + "epoch": 0.406507734867195, + "grad_norm": 1.0599269777195286, + "learning_rate": 9.961667958168718e-06, + "loss": 0.2673, + "step": 31520 + }, + { + "epoch": 0.40663670305719096, + "grad_norm": 1.0401658451615967, + "learning_rate": 9.961575173739434e-06, + "loss": 0.2626, + "step": 31530 + }, + { + "epoch": 0.4067656712471869, + "grad_norm": 1.024650734610949, + "learning_rate": 9.961482277584562e-06, + "loss": 0.2404, + "step": 31540 + }, + { + "epoch": 0.4068946394371828, + "grad_norm": 1.0423451259215388, + "learning_rate": 9.961389269706196e-06, + "loss": 0.2538, + "step": 31550 + }, + { + "epoch": 0.40702360762717876, + "grad_norm": 1.0242026116551408, + "learning_rate": 9.961296150106428e-06, + "loss": 0.2576, + "step": 31560 + }, + { + "epoch": 0.4071525758171747, + "grad_norm": 0.9429834916585094, + "learning_rate": 9.961202918787359e-06, + "loss": 0.2513, + "step": 31570 + }, + { + "epoch": 0.40728154400717065, + "grad_norm": 1.0095415041832687, + "learning_rate": 9.961109575751083e-06, + "loss": 0.2471, + "step": 31580 + }, + { + "epoch": 0.40741051219716656, + "grad_norm": 1.1637812074820135, + "learning_rate": 9.961016120999704e-06, + "loss": 0.2651, + "step": 31590 + }, + { + "epoch": 0.40753948038716253, + "grad_norm": 0.9456683555663327, + "learning_rate": 9.960922554535327e-06, + "loss": 0.2567, + "step": 31600 + }, + { + "epoch": 0.40766844857715845, + "grad_norm": 1.079766832437417, + "learning_rate": 9.96082887636006e-06, + "loss": 0.2649, + "step": 31610 + }, + { + "epoch": 0.40779741676715436, + "grad_norm": 0.9307375633299716, + "learning_rate": 9.96073508647601e-06, + "loss": 0.2613, + "step": 31620 + }, + { + "epoch": 0.40792638495715033, + "grad_norm": 1.0378329525972507, + "learning_rate": 9.960641184885291e-06, + "loss": 0.2689, + "step": 31630 + }, + { + "epoch": 0.40805535314714625, + "grad_norm": 0.9787889362645597, + "learning_rate": 9.960547171590016e-06, + "loss": 0.2551, + "step": 31640 + }, + { + "epoch": 0.4081843213371422, + "grad_norm": 1.1202801517667458, + "learning_rate": 9.960453046592303e-06, + "loss": 0.2628, + "step": 31650 + }, + { + "epoch": 0.40831328952713813, + "grad_norm": 1.0283950490084683, + "learning_rate": 9.960358809894271e-06, + "loss": 0.2654, + "step": 31660 + }, + { + "epoch": 0.40844225771713405, + "grad_norm": 1.1458700613889643, + "learning_rate": 9.960264461498044e-06, + "loss": 0.2662, + "step": 31670 + }, + { + "epoch": 0.40857122590713, + "grad_norm": 1.031074276318322, + "learning_rate": 9.960170001405742e-06, + "loss": 0.2639, + "step": 31680 + }, + { + "epoch": 0.40870019409712594, + "grad_norm": 0.9891933216798026, + "learning_rate": 9.960075429619496e-06, + "loss": 0.263, + "step": 31690 + }, + { + "epoch": 0.4088291622871219, + "grad_norm": 1.0947490287556363, + "learning_rate": 9.959980746141435e-06, + "loss": 0.2497, + "step": 31700 + }, + { + "epoch": 0.4089581304771178, + "grad_norm": 1.10403277392881, + "learning_rate": 9.95988595097369e-06, + "loss": 0.2562, + "step": 31710 + }, + { + "epoch": 0.40908709866711374, + "grad_norm": 0.8997553175167037, + "learning_rate": 9.959791044118397e-06, + "loss": 0.2525, + "step": 31720 + }, + { + "epoch": 0.4092160668571097, + "grad_norm": 1.0104856703021208, + "learning_rate": 9.959696025577691e-06, + "loss": 0.2506, + "step": 31730 + }, + { + "epoch": 0.4093450350471056, + "grad_norm": 1.0446744323740327, + "learning_rate": 9.959600895353714e-06, + "loss": 0.243, + "step": 31740 + }, + { + "epoch": 0.4094740032371016, + "grad_norm": 1.0079867965688303, + "learning_rate": 9.959505653448606e-06, + "loss": 0.2669, + "step": 31750 + }, + { + "epoch": 0.4096029714270975, + "grad_norm": 1.0283907656309614, + "learning_rate": 9.959410299864514e-06, + "loss": 0.2507, + "step": 31760 + }, + { + "epoch": 0.4097319396170934, + "grad_norm": 0.9773795603015645, + "learning_rate": 9.959314834603582e-06, + "loss": 0.2509, + "step": 31770 + }, + { + "epoch": 0.4098609078070894, + "grad_norm": 0.8721971570845644, + "learning_rate": 9.959219257667964e-06, + "loss": 0.2568, + "step": 31780 + }, + { + "epoch": 0.4099898759970853, + "grad_norm": 1.234814496952523, + "learning_rate": 9.959123569059808e-06, + "loss": 0.265, + "step": 31790 + }, + { + "epoch": 0.4101188441870813, + "grad_norm": 0.9648581659999719, + "learning_rate": 9.959027768781272e-06, + "loss": 0.2662, + "step": 31800 + }, + { + "epoch": 0.4102478123770772, + "grad_norm": 1.048019596451411, + "learning_rate": 9.958931856834511e-06, + "loss": 0.2602, + "step": 31810 + }, + { + "epoch": 0.4103767805670731, + "grad_norm": 1.091428049809515, + "learning_rate": 9.958835833221685e-06, + "loss": 0.2679, + "step": 31820 + }, + { + "epoch": 0.4105057487570691, + "grad_norm": 0.9999141584818492, + "learning_rate": 9.958739697944959e-06, + "loss": 0.2622, + "step": 31830 + }, + { + "epoch": 0.410634716947065, + "grad_norm": 1.025591115299325, + "learning_rate": 9.958643451006493e-06, + "loss": 0.2712, + "step": 31840 + }, + { + "epoch": 0.41076368513706096, + "grad_norm": 1.049136528316131, + "learning_rate": 9.958547092408459e-06, + "loss": 0.2642, + "step": 31850 + }, + { + "epoch": 0.4108926533270569, + "grad_norm": 1.0955455792362485, + "learning_rate": 9.958450622153024e-06, + "loss": 0.246, + "step": 31860 + }, + { + "epoch": 0.4110216215170528, + "grad_norm": 1.0302358030430718, + "learning_rate": 9.958354040242361e-06, + "loss": 0.2649, + "step": 31870 + }, + { + "epoch": 0.41115058970704876, + "grad_norm": 1.0328190824872605, + "learning_rate": 9.958257346678645e-06, + "loss": 0.2573, + "step": 31880 + }, + { + "epoch": 0.4112795578970447, + "grad_norm": 0.9001408095811982, + "learning_rate": 9.958160541464054e-06, + "loss": 0.2453, + "step": 31890 + }, + { + "epoch": 0.41140852608704065, + "grad_norm": 1.0731544972729272, + "learning_rate": 9.958063624600766e-06, + "loss": 0.2449, + "step": 31900 + }, + { + "epoch": 0.41153749427703656, + "grad_norm": 1.0445317778954903, + "learning_rate": 9.957966596090967e-06, + "loss": 0.2615, + "step": 31910 + }, + { + "epoch": 0.4116664624670325, + "grad_norm": 1.06750190558539, + "learning_rate": 9.957869455936838e-06, + "loss": 0.2541, + "step": 31920 + }, + { + "epoch": 0.41179543065702845, + "grad_norm": 0.9486090515832721, + "learning_rate": 9.957772204140568e-06, + "loss": 0.2403, + "step": 31930 + }, + { + "epoch": 0.41192439884702436, + "grad_norm": 1.056400304903925, + "learning_rate": 9.957674840704347e-06, + "loss": 0.265, + "step": 31940 + }, + { + "epoch": 0.41205336703702033, + "grad_norm": 1.0327596158370442, + "learning_rate": 9.957577365630367e-06, + "loss": 0.2639, + "step": 31950 + }, + { + "epoch": 0.41218233522701625, + "grad_norm": 1.002447926464062, + "learning_rate": 9.957479778920824e-06, + "loss": 0.2546, + "step": 31960 + }, + { + "epoch": 0.4123113034170122, + "grad_norm": 1.0307031050891529, + "learning_rate": 9.957382080577914e-06, + "loss": 0.2562, + "step": 31970 + }, + { + "epoch": 0.41244027160700814, + "grad_norm": 1.0472890726560975, + "learning_rate": 9.957284270603838e-06, + "loss": 0.265, + "step": 31980 + }, + { + "epoch": 0.41256923979700405, + "grad_norm": 1.1273411150220127, + "learning_rate": 9.957186349000797e-06, + "loss": 0.2675, + "step": 31990 + }, + { + "epoch": 0.412698207987, + "grad_norm": 1.13241013715791, + "learning_rate": 9.957088315770998e-06, + "loss": 0.2683, + "step": 32000 + }, + { + "epoch": 0.41282717617699594, + "grad_norm": 0.9705208549785529, + "learning_rate": 9.956990170916648e-06, + "loss": 0.2526, + "step": 32010 + }, + { + "epoch": 0.4129561443669919, + "grad_norm": 0.9494076451352634, + "learning_rate": 9.956891914439956e-06, + "loss": 0.2477, + "step": 32020 + }, + { + "epoch": 0.4130851125569878, + "grad_norm": 0.9572173848593895, + "learning_rate": 9.956793546343137e-06, + "loss": 0.2551, + "step": 32030 + }, + { + "epoch": 0.41321408074698374, + "grad_norm": 0.9814198042550032, + "learning_rate": 9.956695066628405e-06, + "loss": 0.2651, + "step": 32040 + }, + { + "epoch": 0.4133430489369797, + "grad_norm": 0.9958361186235721, + "learning_rate": 9.956596475297976e-06, + "loss": 0.2643, + "step": 32050 + }, + { + "epoch": 0.4134720171269756, + "grad_norm": 1.055033197059246, + "learning_rate": 9.95649777235407e-06, + "loss": 0.2454, + "step": 32060 + }, + { + "epoch": 0.4136009853169716, + "grad_norm": 1.0540776072616065, + "learning_rate": 9.956398957798912e-06, + "loss": 0.2546, + "step": 32070 + }, + { + "epoch": 0.4137299535069675, + "grad_norm": 0.9935472379480939, + "learning_rate": 9.956300031634725e-06, + "loss": 0.2658, + "step": 32080 + }, + { + "epoch": 0.4138589216969634, + "grad_norm": 1.0242983943819894, + "learning_rate": 9.956200993863738e-06, + "loss": 0.2437, + "step": 32090 + }, + { + "epoch": 0.4139878898869594, + "grad_norm": 0.9539796958695472, + "learning_rate": 9.956101844488181e-06, + "loss": 0.2567, + "step": 32100 + }, + { + "epoch": 0.4141168580769553, + "grad_norm": 0.9531336602285932, + "learning_rate": 9.956002583510285e-06, + "loss": 0.2476, + "step": 32110 + }, + { + "epoch": 0.4142458262669513, + "grad_norm": 1.0477593160569323, + "learning_rate": 9.955903210932287e-06, + "loss": 0.2543, + "step": 32120 + }, + { + "epoch": 0.4143747944569472, + "grad_norm": 0.9986228568964087, + "learning_rate": 9.955803726756426e-06, + "loss": 0.2532, + "step": 32130 + }, + { + "epoch": 0.4145037626469431, + "grad_norm": 1.0831113717029406, + "learning_rate": 9.955704130984937e-06, + "loss": 0.2591, + "step": 32140 + }, + { + "epoch": 0.4146327308369391, + "grad_norm": 1.3025133567563179, + "learning_rate": 9.955604423620068e-06, + "loss": 0.2642, + "step": 32150 + }, + { + "epoch": 0.414761699026935, + "grad_norm": 1.0654915971381742, + "learning_rate": 9.955504604664064e-06, + "loss": 0.2465, + "step": 32160 + }, + { + "epoch": 0.41489066721693096, + "grad_norm": 1.1117678705750973, + "learning_rate": 9.95540467411917e-06, + "loss": 0.2614, + "step": 32170 + }, + { + "epoch": 0.4150196354069269, + "grad_norm": 0.9900665046274721, + "learning_rate": 9.955304631987635e-06, + "loss": 0.2556, + "step": 32180 + }, + { + "epoch": 0.4151486035969228, + "grad_norm": 1.06898585977533, + "learning_rate": 9.955204478271716e-06, + "loss": 0.257, + "step": 32190 + }, + { + "epoch": 0.41527757178691876, + "grad_norm": 1.183372493729429, + "learning_rate": 9.955104212973666e-06, + "loss": 0.2553, + "step": 32200 + }, + { + "epoch": 0.4154065399769147, + "grad_norm": 1.1176212331523485, + "learning_rate": 9.955003836095743e-06, + "loss": 0.2628, + "step": 32210 + }, + { + "epoch": 0.41553550816691065, + "grad_norm": 1.0467417348798738, + "learning_rate": 9.954903347640208e-06, + "loss": 0.2542, + "step": 32220 + }, + { + "epoch": 0.41566447635690656, + "grad_norm": 1.1238333180632958, + "learning_rate": 9.954802747609323e-06, + "loss": 0.2667, + "step": 32230 + }, + { + "epoch": 0.4157934445469025, + "grad_norm": 1.1105087943948841, + "learning_rate": 9.954702036005353e-06, + "loss": 0.2668, + "step": 32240 + }, + { + "epoch": 0.41592241273689845, + "grad_norm": 1.0261692213383278, + "learning_rate": 9.954601212830568e-06, + "loss": 0.2622, + "step": 32250 + }, + { + "epoch": 0.41605138092689437, + "grad_norm": 1.0191009575537815, + "learning_rate": 9.954500278087236e-06, + "loss": 0.2555, + "step": 32260 + }, + { + "epoch": 0.41618034911689034, + "grad_norm": 1.1057370841619782, + "learning_rate": 9.95439923177763e-06, + "loss": 0.264, + "step": 32270 + }, + { + "epoch": 0.41630931730688625, + "grad_norm": 1.0408672001915036, + "learning_rate": 9.954298073904026e-06, + "loss": 0.2496, + "step": 32280 + }, + { + "epoch": 0.4164382854968822, + "grad_norm": 0.9851635653932541, + "learning_rate": 9.954196804468702e-06, + "loss": 0.2618, + "step": 32290 + }, + { + "epoch": 0.41656725368687814, + "grad_norm": 1.0565231036509992, + "learning_rate": 9.954095423473939e-06, + "loss": 0.2459, + "step": 32300 + }, + { + "epoch": 0.41669622187687405, + "grad_norm": 1.0931310177156477, + "learning_rate": 9.953993930922019e-06, + "loss": 0.2649, + "step": 32310 + }, + { + "epoch": 0.41682519006687, + "grad_norm": 1.0455494279338098, + "learning_rate": 9.953892326815228e-06, + "loss": 0.2511, + "step": 32320 + }, + { + "epoch": 0.41695415825686594, + "grad_norm": 0.9856632155842016, + "learning_rate": 9.953790611155853e-06, + "loss": 0.2614, + "step": 32330 + }, + { + "epoch": 0.4170831264468619, + "grad_norm": 1.1393633144667632, + "learning_rate": 9.953688783946186e-06, + "loss": 0.2609, + "step": 32340 + }, + { + "epoch": 0.4172120946368578, + "grad_norm": 1.0170446414486738, + "learning_rate": 9.953586845188517e-06, + "loss": 0.2544, + "step": 32350 + }, + { + "epoch": 0.41734106282685374, + "grad_norm": 1.010341619736438, + "learning_rate": 9.953484794885144e-06, + "loss": 0.2555, + "step": 32360 + }, + { + "epoch": 0.4174700310168497, + "grad_norm": 0.9708133308477978, + "learning_rate": 9.953382633038367e-06, + "loss": 0.2492, + "step": 32370 + }, + { + "epoch": 0.4175989992068456, + "grad_norm": 0.9500475085494561, + "learning_rate": 9.953280359650481e-06, + "loss": 0.256, + "step": 32380 + }, + { + "epoch": 0.4177279673968416, + "grad_norm": 1.0001782217535158, + "learning_rate": 9.953177974723793e-06, + "loss": 0.2602, + "step": 32390 + }, + { + "epoch": 0.4178569355868375, + "grad_norm": 0.962957077068685, + "learning_rate": 9.953075478260607e-06, + "loss": 0.2392, + "step": 32400 + }, + { + "epoch": 0.4179859037768334, + "grad_norm": 1.0140187999926253, + "learning_rate": 9.952972870263232e-06, + "loss": 0.2541, + "step": 32410 + }, + { + "epoch": 0.4181148719668294, + "grad_norm": 1.0798386906932194, + "learning_rate": 9.952870150733979e-06, + "loss": 0.2606, + "step": 32420 + }, + { + "epoch": 0.4182438401568253, + "grad_norm": 1.0615524405480656, + "learning_rate": 9.952767319675158e-06, + "loss": 0.2564, + "step": 32430 + }, + { + "epoch": 0.4183728083468213, + "grad_norm": 1.0932060298837685, + "learning_rate": 9.95266437708909e-06, + "loss": 0.2536, + "step": 32440 + }, + { + "epoch": 0.4185017765368172, + "grad_norm": 0.9549549558951366, + "learning_rate": 9.952561322978088e-06, + "loss": 0.2592, + "step": 32450 + }, + { + "epoch": 0.4186307447268131, + "grad_norm": 1.0361430498691333, + "learning_rate": 9.952458157344473e-06, + "loss": 0.2698, + "step": 32460 + }, + { + "epoch": 0.4187597129168091, + "grad_norm": 0.9973093231075535, + "learning_rate": 9.95235488019057e-06, + "loss": 0.2572, + "step": 32470 + }, + { + "epoch": 0.418888681106805, + "grad_norm": 1.0830650465348828, + "learning_rate": 9.952251491518705e-06, + "loss": 0.2566, + "step": 32480 + }, + { + "epoch": 0.41901764929680096, + "grad_norm": 1.0748314968003483, + "learning_rate": 9.952147991331204e-06, + "loss": 0.2559, + "step": 32490 + }, + { + "epoch": 0.4191466174867969, + "grad_norm": 1.0289227569945993, + "learning_rate": 9.952044379630402e-06, + "loss": 0.2595, + "step": 32500 + }, + { + "epoch": 0.4192755856767928, + "grad_norm": 1.014848072372354, + "learning_rate": 9.951940656418624e-06, + "loss": 0.2551, + "step": 32510 + }, + { + "epoch": 0.41940455386678877, + "grad_norm": 1.073442297604253, + "learning_rate": 9.951836821698214e-06, + "loss": 0.2841, + "step": 32520 + }, + { + "epoch": 0.4195335220567847, + "grad_norm": 0.9501472345812052, + "learning_rate": 9.951732875471507e-06, + "loss": 0.2733, + "step": 32530 + }, + { + "epoch": 0.41966249024678065, + "grad_norm": 1.1856203002002428, + "learning_rate": 9.951628817740842e-06, + "loss": 0.2579, + "step": 32540 + }, + { + "epoch": 0.41979145843677657, + "grad_norm": 0.9975318484782243, + "learning_rate": 9.951524648508564e-06, + "loss": 0.2583, + "step": 32550 + }, + { + "epoch": 0.4199204266267725, + "grad_norm": 1.460790855739462, + "learning_rate": 9.951420367777019e-06, + "loss": 0.2669, + "step": 32560 + }, + { + "epoch": 0.42004939481676845, + "grad_norm": 0.9134035283445403, + "learning_rate": 9.951315975548554e-06, + "loss": 0.2478, + "step": 32570 + }, + { + "epoch": 0.42017836300676437, + "grad_norm": 1.0931084590385478, + "learning_rate": 9.951211471825521e-06, + "loss": 0.2453, + "step": 32580 + }, + { + "epoch": 0.42030733119676034, + "grad_norm": 0.9988351334930977, + "learning_rate": 9.951106856610273e-06, + "loss": 0.2489, + "step": 32590 + }, + { + "epoch": 0.42043629938675625, + "grad_norm": 1.0813315821187854, + "learning_rate": 9.951002129905163e-06, + "loss": 0.2569, + "step": 32600 + }, + { + "epoch": 0.42056526757675217, + "grad_norm": 1.0005537011242345, + "learning_rate": 9.950897291712553e-06, + "loss": 0.2523, + "step": 32610 + }, + { + "epoch": 0.42069423576674814, + "grad_norm": 1.0370815142766772, + "learning_rate": 9.950792342034802e-06, + "loss": 0.2618, + "step": 32620 + }, + { + "epoch": 0.42082320395674405, + "grad_norm": 1.1639299939230139, + "learning_rate": 9.950687280874274e-06, + "loss": 0.2616, + "step": 32630 + }, + { + "epoch": 0.42095217214674, + "grad_norm": 1.0071377036008675, + "learning_rate": 9.950582108233335e-06, + "loss": 0.2536, + "step": 32640 + }, + { + "epoch": 0.42108114033673594, + "grad_norm": 1.0271353029163637, + "learning_rate": 9.950476824114352e-06, + "loss": 0.2537, + "step": 32650 + }, + { + "epoch": 0.4212101085267319, + "grad_norm": 1.0649048450572363, + "learning_rate": 9.950371428519697e-06, + "loss": 0.2472, + "step": 32660 + }, + { + "epoch": 0.4213390767167278, + "grad_norm": 1.0370875343733057, + "learning_rate": 9.95026592145174e-06, + "loss": 0.2596, + "step": 32670 + }, + { + "epoch": 0.42146804490672374, + "grad_norm": 0.9664649609792595, + "learning_rate": 9.950160302912862e-06, + "loss": 0.2577, + "step": 32680 + }, + { + "epoch": 0.4215970130967197, + "grad_norm": 1.0406416959646265, + "learning_rate": 9.950054572905439e-06, + "loss": 0.2552, + "step": 32690 + }, + { + "epoch": 0.4217259812867156, + "grad_norm": 0.9804354932988228, + "learning_rate": 9.949948731431851e-06, + "loss": 0.2532, + "step": 32700 + }, + { + "epoch": 0.4218549494767116, + "grad_norm": 1.0144895504987068, + "learning_rate": 9.949842778494483e-06, + "loss": 0.2751, + "step": 32710 + }, + { + "epoch": 0.4219839176667075, + "grad_norm": 1.0018918637983214, + "learning_rate": 9.94973671409572e-06, + "loss": 0.2628, + "step": 32720 + }, + { + "epoch": 0.4221128858567034, + "grad_norm": 1.1202286251082978, + "learning_rate": 9.94963053823795e-06, + "loss": 0.2595, + "step": 32730 + }, + { + "epoch": 0.4222418540466994, + "grad_norm": 1.0098962470700508, + "learning_rate": 9.949524250923564e-06, + "loss": 0.2654, + "step": 32740 + }, + { + "epoch": 0.4223708222366953, + "grad_norm": 1.1742306598187193, + "learning_rate": 9.949417852154955e-06, + "loss": 0.2541, + "step": 32750 + }, + { + "epoch": 0.4224997904266913, + "grad_norm": 0.9788337857989674, + "learning_rate": 9.94931134193452e-06, + "loss": 0.2536, + "step": 32760 + }, + { + "epoch": 0.4226287586166872, + "grad_norm": 0.9860876654470118, + "learning_rate": 9.949204720264659e-06, + "loss": 0.262, + "step": 32770 + }, + { + "epoch": 0.4227577268066831, + "grad_norm": 1.1045572484209918, + "learning_rate": 9.949097987147768e-06, + "loss": 0.257, + "step": 32780 + }, + { + "epoch": 0.4228866949966791, + "grad_norm": 1.0648219245241977, + "learning_rate": 9.948991142586255e-06, + "loss": 0.2608, + "step": 32790 + }, + { + "epoch": 0.423015663186675, + "grad_norm": 0.8900290470264586, + "learning_rate": 9.948884186582524e-06, + "loss": 0.2608, + "step": 32800 + }, + { + "epoch": 0.42314463137667097, + "grad_norm": 1.0903662639019736, + "learning_rate": 9.948777119138983e-06, + "loss": 0.2579, + "step": 32810 + }, + { + "epoch": 0.4232735995666669, + "grad_norm": 0.9713567689862556, + "learning_rate": 9.948669940258044e-06, + "loss": 0.2617, + "step": 32820 + }, + { + "epoch": 0.4234025677566628, + "grad_norm": 0.9392829080976393, + "learning_rate": 9.948562649942122e-06, + "loss": 0.2675, + "step": 32830 + }, + { + "epoch": 0.42353153594665877, + "grad_norm": 1.0912338902114056, + "learning_rate": 9.948455248193628e-06, + "loss": 0.2556, + "step": 32840 + }, + { + "epoch": 0.4236605041366547, + "grad_norm": 1.0286051775037952, + "learning_rate": 9.948347735014988e-06, + "loss": 0.2611, + "step": 32850 + }, + { + "epoch": 0.42378947232665065, + "grad_norm": 1.049028740584834, + "learning_rate": 9.948240110408615e-06, + "loss": 0.2525, + "step": 32860 + }, + { + "epoch": 0.42391844051664657, + "grad_norm": 1.0487928325069436, + "learning_rate": 9.948132374376938e-06, + "loss": 0.2529, + "step": 32870 + }, + { + "epoch": 0.4240474087066425, + "grad_norm": 0.8975503106757313, + "learning_rate": 9.94802452692238e-06, + "loss": 0.2375, + "step": 32880 + }, + { + "epoch": 0.42417637689663845, + "grad_norm": 1.0121730268824296, + "learning_rate": 9.947916568047371e-06, + "loss": 0.2713, + "step": 32890 + }, + { + "epoch": 0.42430534508663437, + "grad_norm": 1.0163664906432708, + "learning_rate": 9.947808497754341e-06, + "loss": 0.2698, + "step": 32900 + }, + { + "epoch": 0.42443431327663034, + "grad_norm": 1.036033260484487, + "learning_rate": 9.947700316045726e-06, + "loss": 0.2457, + "step": 32910 + }, + { + "epoch": 0.42456328146662625, + "grad_norm": 1.0871395594632314, + "learning_rate": 9.947592022923958e-06, + "loss": 0.2737, + "step": 32920 + }, + { + "epoch": 0.42469224965662217, + "grad_norm": 0.9754044844980161, + "learning_rate": 9.94748361839148e-06, + "loss": 0.2514, + "step": 32930 + }, + { + "epoch": 0.42482121784661814, + "grad_norm": 0.9401249703399758, + "learning_rate": 9.947375102450731e-06, + "loss": 0.2511, + "step": 32940 + }, + { + "epoch": 0.42495018603661405, + "grad_norm": 1.101066034547543, + "learning_rate": 9.947266475104154e-06, + "loss": 0.2536, + "step": 32950 + }, + { + "epoch": 0.42507915422661, + "grad_norm": 0.9084413018289362, + "learning_rate": 9.947157736354196e-06, + "loss": 0.2405, + "step": 32960 + }, + { + "epoch": 0.42520812241660594, + "grad_norm": 1.0971366367709454, + "learning_rate": 9.947048886203303e-06, + "loss": 0.2589, + "step": 32970 + }, + { + "epoch": 0.4253370906066019, + "grad_norm": 1.0071066939176272, + "learning_rate": 9.946939924653931e-06, + "loss": 0.2476, + "step": 32980 + }, + { + "epoch": 0.4254660587965978, + "grad_norm": 0.9424456309099735, + "learning_rate": 9.94683085170853e-06, + "loss": 0.2434, + "step": 32990 + }, + { + "epoch": 0.42559502698659374, + "grad_norm": 1.0872171368912995, + "learning_rate": 9.946721667369556e-06, + "loss": 0.2618, + "step": 33000 + }, + { + "epoch": 0.4257239951765897, + "grad_norm": 0.9546503510846437, + "learning_rate": 9.94661237163947e-06, + "loss": 0.2652, + "step": 33010 + }, + { + "epoch": 0.4258529633665856, + "grad_norm": 1.0485027740314206, + "learning_rate": 9.946502964520733e-06, + "loss": 0.2588, + "step": 33020 + }, + { + "epoch": 0.4259819315565816, + "grad_norm": 1.1274860718214754, + "learning_rate": 9.946393446015806e-06, + "loss": 0.2564, + "step": 33030 + }, + { + "epoch": 0.4261108997465775, + "grad_norm": 0.9988473419214325, + "learning_rate": 9.946283816127156e-06, + "loss": 0.2494, + "step": 33040 + }, + { + "epoch": 0.4262398679365734, + "grad_norm": 1.0357254175149986, + "learning_rate": 9.946174074857254e-06, + "loss": 0.2718, + "step": 33050 + }, + { + "epoch": 0.4263688361265694, + "grad_norm": 1.000164023640495, + "learning_rate": 9.94606422220857e-06, + "loss": 0.2657, + "step": 33060 + }, + { + "epoch": 0.4264978043165653, + "grad_norm": 0.8999418797040433, + "learning_rate": 9.945954258183574e-06, + "loss": 0.2592, + "step": 33070 + }, + { + "epoch": 0.4266267725065613, + "grad_norm": 0.9863912418107262, + "learning_rate": 9.945844182784747e-06, + "loss": 0.257, + "step": 33080 + }, + { + "epoch": 0.4267557406965572, + "grad_norm": 1.1398337117849517, + "learning_rate": 9.945733996014568e-06, + "loss": 0.2656, + "step": 33090 + }, + { + "epoch": 0.4268847088865531, + "grad_norm": 0.9823700036758573, + "learning_rate": 9.945623697875514e-06, + "loss": 0.2515, + "step": 33100 + }, + { + "epoch": 0.4270136770765491, + "grad_norm": 1.0399861161412354, + "learning_rate": 9.945513288370072e-06, + "loss": 0.2765, + "step": 33110 + }, + { + "epoch": 0.427142645266545, + "grad_norm": 1.122684863962125, + "learning_rate": 9.945402767500726e-06, + "loss": 0.2602, + "step": 33120 + }, + { + "epoch": 0.42727161345654097, + "grad_norm": 0.9761084855017945, + "learning_rate": 9.945292135269967e-06, + "loss": 0.2583, + "step": 33130 + }, + { + "epoch": 0.4274005816465369, + "grad_norm": 0.9945540096264359, + "learning_rate": 9.945181391680284e-06, + "loss": 0.2536, + "step": 33140 + }, + { + "epoch": 0.4275295498365328, + "grad_norm": 1.071323755238467, + "learning_rate": 9.945070536734172e-06, + "loss": 0.2652, + "step": 33150 + }, + { + "epoch": 0.42765851802652877, + "grad_norm": 1.050583461774323, + "learning_rate": 9.94495957043413e-06, + "loss": 0.2522, + "step": 33160 + }, + { + "epoch": 0.4277874862165247, + "grad_norm": 0.9812062462534977, + "learning_rate": 9.94484849278265e-06, + "loss": 0.2584, + "step": 33170 + }, + { + "epoch": 0.42791645440652065, + "grad_norm": 0.9670050370378394, + "learning_rate": 9.944737303782238e-06, + "loss": 0.2539, + "step": 33180 + }, + { + "epoch": 0.42804542259651657, + "grad_norm": 0.9641798513638268, + "learning_rate": 9.944626003435399e-06, + "loss": 0.2591, + "step": 33190 + }, + { + "epoch": 0.4281743907865125, + "grad_norm": 1.0164670584238993, + "learning_rate": 9.944514591744636e-06, + "loss": 0.2537, + "step": 33200 + }, + { + "epoch": 0.42830335897650845, + "grad_norm": 1.036472509097168, + "learning_rate": 9.94440306871246e-06, + "loss": 0.2641, + "step": 33210 + }, + { + "epoch": 0.42843232716650437, + "grad_norm": 1.0505260763752309, + "learning_rate": 9.94429143434138e-06, + "loss": 0.2496, + "step": 33220 + }, + { + "epoch": 0.42856129535650034, + "grad_norm": 1.1855264189942232, + "learning_rate": 9.944179688633913e-06, + "loss": 0.2595, + "step": 33230 + }, + { + "epoch": 0.42869026354649625, + "grad_norm": 1.0330606079433626, + "learning_rate": 9.944067831592572e-06, + "loss": 0.258, + "step": 33240 + }, + { + "epoch": 0.42881923173649217, + "grad_norm": 0.9676374001655393, + "learning_rate": 9.943955863219876e-06, + "loss": 0.2642, + "step": 33250 + }, + { + "epoch": 0.42894819992648814, + "grad_norm": 0.9972200088793209, + "learning_rate": 9.943843783518348e-06, + "loss": 0.2493, + "step": 33260 + }, + { + "epoch": 0.42907716811648405, + "grad_norm": 1.1010414151798056, + "learning_rate": 9.943731592490513e-06, + "loss": 0.2715, + "step": 33270 + }, + { + "epoch": 0.42920613630648, + "grad_norm": 1.0891909423328712, + "learning_rate": 9.943619290138894e-06, + "loss": 0.2563, + "step": 33280 + }, + { + "epoch": 0.42933510449647594, + "grad_norm": 1.0153043160699895, + "learning_rate": 9.943506876466023e-06, + "loss": 0.2597, + "step": 33290 + }, + { + "epoch": 0.4294640726864719, + "grad_norm": 0.9505686323077417, + "learning_rate": 9.94339435147443e-06, + "loss": 0.245, + "step": 33300 + }, + { + "epoch": 0.4295930408764678, + "grad_norm": 0.939910067895544, + "learning_rate": 9.943281715166648e-06, + "loss": 0.247, + "step": 33310 + }, + { + "epoch": 0.42972200906646374, + "grad_norm": 0.9938410497657928, + "learning_rate": 9.943168967545214e-06, + "loss": 0.2537, + "step": 33320 + }, + { + "epoch": 0.4298509772564597, + "grad_norm": 1.0302870972694067, + "learning_rate": 9.943056108612668e-06, + "loss": 0.2652, + "step": 33330 + }, + { + "epoch": 0.4299799454464556, + "grad_norm": 0.964872277102367, + "learning_rate": 9.94294313837155e-06, + "loss": 0.2615, + "step": 33340 + }, + { + "epoch": 0.4301089136364516, + "grad_norm": 1.0055299763828371, + "learning_rate": 9.942830056824404e-06, + "loss": 0.2607, + "step": 33350 + }, + { + "epoch": 0.4302378818264475, + "grad_norm": 1.0740437230888442, + "learning_rate": 9.942716863973777e-06, + "loss": 0.2536, + "step": 33360 + }, + { + "epoch": 0.4303668500164434, + "grad_norm": 1.0605756891251898, + "learning_rate": 9.942603559822215e-06, + "loss": 0.2732, + "step": 33370 + }, + { + "epoch": 0.4304958182064394, + "grad_norm": 0.979474737008603, + "learning_rate": 9.942490144372276e-06, + "loss": 0.2644, + "step": 33380 + }, + { + "epoch": 0.4306247863964353, + "grad_norm": 1.1398926290991227, + "learning_rate": 9.942376617626507e-06, + "loss": 0.2626, + "step": 33390 + }, + { + "epoch": 0.4307537545864313, + "grad_norm": 1.036447659899763, + "learning_rate": 9.94226297958747e-06, + "loss": 0.2567, + "step": 33400 + }, + { + "epoch": 0.4308827227764272, + "grad_norm": 0.8941400916159677, + "learning_rate": 9.942149230257718e-06, + "loss": 0.2521, + "step": 33410 + }, + { + "epoch": 0.4310116909664231, + "grad_norm": 1.0181240038051231, + "learning_rate": 9.942035369639818e-06, + "loss": 0.2489, + "step": 33420 + }, + { + "epoch": 0.4311406591564191, + "grad_norm": 0.8893329986933249, + "learning_rate": 9.94192139773633e-06, + "loss": 0.2519, + "step": 33430 + }, + { + "epoch": 0.431269627346415, + "grad_norm": 0.9906264195979232, + "learning_rate": 9.941807314549823e-06, + "loss": 0.2511, + "step": 33440 + }, + { + "epoch": 0.43139859553641097, + "grad_norm": 0.9421977321684293, + "learning_rate": 9.941693120082864e-06, + "loss": 0.2612, + "step": 33450 + }, + { + "epoch": 0.4315275637264069, + "grad_norm": 1.0098765006818795, + "learning_rate": 9.941578814338026e-06, + "loss": 0.2379, + "step": 33460 + }, + { + "epoch": 0.4316565319164028, + "grad_norm": 0.9298318497072916, + "learning_rate": 9.941464397317883e-06, + "loss": 0.2541, + "step": 33470 + }, + { + "epoch": 0.43178550010639877, + "grad_norm": 0.9898456440869758, + "learning_rate": 9.941349869025009e-06, + "loss": 0.2553, + "step": 33480 + }, + { + "epoch": 0.4319144682963947, + "grad_norm": 1.0840359180844596, + "learning_rate": 9.941235229461987e-06, + "loss": 0.2522, + "step": 33490 + }, + { + "epoch": 0.43204343648639065, + "grad_norm": 0.9590879071903561, + "learning_rate": 9.941120478631395e-06, + "loss": 0.2646, + "step": 33500 + }, + { + "epoch": 0.43217240467638657, + "grad_norm": 0.9708443334748015, + "learning_rate": 9.941005616535818e-06, + "loss": 0.2603, + "step": 33510 + }, + { + "epoch": 0.4323013728663825, + "grad_norm": 1.0253231994486736, + "learning_rate": 9.940890643177842e-06, + "loss": 0.2612, + "step": 33520 + }, + { + "epoch": 0.43243034105637845, + "grad_norm": 1.0068452837700799, + "learning_rate": 9.94077555856006e-06, + "loss": 0.2552, + "step": 33530 + }, + { + "epoch": 0.43255930924637437, + "grad_norm": 1.0204598697793243, + "learning_rate": 9.940660362685058e-06, + "loss": 0.2554, + "step": 33540 + }, + { + "epoch": 0.43268827743637034, + "grad_norm": 0.9796215604747004, + "learning_rate": 9.940545055555431e-06, + "loss": 0.2492, + "step": 33550 + }, + { + "epoch": 0.43281724562636625, + "grad_norm": 0.9887080235172232, + "learning_rate": 9.940429637173779e-06, + "loss": 0.2585, + "step": 33560 + }, + { + "epoch": 0.43294621381636217, + "grad_norm": 1.0707532324791202, + "learning_rate": 9.940314107542697e-06, + "loss": 0.2621, + "step": 33570 + }, + { + "epoch": 0.43307518200635814, + "grad_norm": 0.9654473800369429, + "learning_rate": 9.940198466664789e-06, + "loss": 0.2592, + "step": 33580 + }, + { + "epoch": 0.43320415019635405, + "grad_norm": 1.011322758936162, + "learning_rate": 9.940082714542658e-06, + "loss": 0.2544, + "step": 33590 + }, + { + "epoch": 0.43333311838635, + "grad_norm": 0.955941338479415, + "learning_rate": 9.939966851178911e-06, + "loss": 0.2556, + "step": 33600 + }, + { + "epoch": 0.43346208657634594, + "grad_norm": 1.0332360177877304, + "learning_rate": 9.939850876576156e-06, + "loss": 0.2511, + "step": 33610 + }, + { + "epoch": 0.43359105476634185, + "grad_norm": 1.0051895537815414, + "learning_rate": 9.939734790737007e-06, + "loss": 0.2641, + "step": 33620 + }, + { + "epoch": 0.4337200229563378, + "grad_norm": 1.0299479819223012, + "learning_rate": 9.939618593664076e-06, + "loss": 0.269, + "step": 33630 + }, + { + "epoch": 0.43384899114633374, + "grad_norm": 1.0118259566753078, + "learning_rate": 9.939502285359978e-06, + "loss": 0.2559, + "step": 33640 + }, + { + "epoch": 0.4339779593363297, + "grad_norm": 1.0445416549613347, + "learning_rate": 9.939385865827335e-06, + "loss": 0.2584, + "step": 33650 + }, + { + "epoch": 0.4341069275263256, + "grad_norm": 0.953744591763211, + "learning_rate": 9.939269335068768e-06, + "loss": 0.248, + "step": 33660 + }, + { + "epoch": 0.4342358957163216, + "grad_norm": 0.9543489825856618, + "learning_rate": 9.939152693086901e-06, + "loss": 0.2619, + "step": 33670 + }, + { + "epoch": 0.4343648639063175, + "grad_norm": 1.013391428952192, + "learning_rate": 9.93903593988436e-06, + "loss": 0.2504, + "step": 33680 + }, + { + "epoch": 0.4344938320963134, + "grad_norm": 1.000125291762917, + "learning_rate": 9.938919075463774e-06, + "loss": 0.2602, + "step": 33690 + }, + { + "epoch": 0.4346228002863094, + "grad_norm": 0.9949450857758886, + "learning_rate": 9.938802099827774e-06, + "loss": 0.2488, + "step": 33700 + }, + { + "epoch": 0.4347517684763053, + "grad_norm": 0.9775847403164419, + "learning_rate": 9.938685012978995e-06, + "loss": 0.254, + "step": 33710 + }, + { + "epoch": 0.4348807366663013, + "grad_norm": 1.0666320665418765, + "learning_rate": 9.938567814920075e-06, + "loss": 0.2587, + "step": 33720 + }, + { + "epoch": 0.4350097048562972, + "grad_norm": 0.9638682816613265, + "learning_rate": 9.93845050565365e-06, + "loss": 0.2507, + "step": 33730 + }, + { + "epoch": 0.4351386730462931, + "grad_norm": 1.1031172791597152, + "learning_rate": 9.938333085182363e-06, + "loss": 0.2647, + "step": 33740 + }, + { + "epoch": 0.4352676412362891, + "grad_norm": 1.0159495440653559, + "learning_rate": 9.93821555350886e-06, + "loss": 0.2414, + "step": 33750 + }, + { + "epoch": 0.435396609426285, + "grad_norm": 1.0685805564957453, + "learning_rate": 9.938097910635785e-06, + "loss": 0.2532, + "step": 33760 + }, + { + "epoch": 0.43552557761628097, + "grad_norm": 0.9356694093823558, + "learning_rate": 9.937980156565788e-06, + "loss": 0.2523, + "step": 33770 + }, + { + "epoch": 0.4356545458062769, + "grad_norm": 1.0021615461482904, + "learning_rate": 9.937862291301518e-06, + "loss": 0.2529, + "step": 33780 + }, + { + "epoch": 0.4357835139962728, + "grad_norm": 1.1044445124402769, + "learning_rate": 9.937744314845635e-06, + "loss": 0.2576, + "step": 33790 + }, + { + "epoch": 0.43591248218626877, + "grad_norm": 0.8578068794658134, + "learning_rate": 9.93762622720079e-06, + "loss": 0.2471, + "step": 33800 + }, + { + "epoch": 0.4360414503762647, + "grad_norm": 0.9812393862769625, + "learning_rate": 9.937508028369647e-06, + "loss": 0.2562, + "step": 33810 + }, + { + "epoch": 0.43617041856626065, + "grad_norm": 1.054319863102044, + "learning_rate": 9.937389718354862e-06, + "loss": 0.247, + "step": 33820 + }, + { + "epoch": 0.43629938675625657, + "grad_norm": 1.0317688662261562, + "learning_rate": 9.937271297159103e-06, + "loss": 0.2574, + "step": 33830 + }, + { + "epoch": 0.4364283549462525, + "grad_norm": 1.127796795301134, + "learning_rate": 9.937152764785037e-06, + "loss": 0.2535, + "step": 33840 + }, + { + "epoch": 0.43655732313624845, + "grad_norm": 1.0314251218963901, + "learning_rate": 9.937034121235332e-06, + "loss": 0.2549, + "step": 33850 + }, + { + "epoch": 0.43668629132624437, + "grad_norm": 0.8548596793990345, + "learning_rate": 9.936915366512658e-06, + "loss": 0.267, + "step": 33860 + }, + { + "epoch": 0.43681525951624034, + "grad_norm": 1.013096101756939, + "learning_rate": 9.936796500619693e-06, + "loss": 0.255, + "step": 33870 + }, + { + "epoch": 0.43694422770623625, + "grad_norm": 0.9818873365591816, + "learning_rate": 9.936677523559108e-06, + "loss": 0.2466, + "step": 33880 + }, + { + "epoch": 0.43707319589623217, + "grad_norm": 0.9286068999848136, + "learning_rate": 9.936558435333587e-06, + "loss": 0.2598, + "step": 33890 + }, + { + "epoch": 0.43720216408622814, + "grad_norm": 1.1041634332415107, + "learning_rate": 9.93643923594581e-06, + "loss": 0.2629, + "step": 33900 + }, + { + "epoch": 0.43733113227622405, + "grad_norm": 0.969246794849981, + "learning_rate": 9.93631992539846e-06, + "loss": 0.2525, + "step": 33910 + }, + { + "epoch": 0.43746010046622, + "grad_norm": 1.0654485944563514, + "learning_rate": 9.936200503694227e-06, + "loss": 0.2586, + "step": 33920 + }, + { + "epoch": 0.43758906865621594, + "grad_norm": 0.9947550145268647, + "learning_rate": 9.936080970835798e-06, + "loss": 0.2532, + "step": 33930 + }, + { + "epoch": 0.43771803684621186, + "grad_norm": 0.9312271223844886, + "learning_rate": 9.935961326825865e-06, + "loss": 0.2622, + "step": 33940 + }, + { + "epoch": 0.4378470050362078, + "grad_norm": 0.995299670743341, + "learning_rate": 9.93584157166712e-06, + "loss": 0.2413, + "step": 33950 + }, + { + "epoch": 0.43797597322620374, + "grad_norm": 1.051880249010302, + "learning_rate": 9.935721705362263e-06, + "loss": 0.2504, + "step": 33960 + }, + { + "epoch": 0.4381049414161997, + "grad_norm": 1.0036673317151865, + "learning_rate": 9.93560172791399e-06, + "loss": 0.2721, + "step": 33970 + }, + { + "epoch": 0.4382339096061956, + "grad_norm": 1.0632798059902884, + "learning_rate": 9.935481639325007e-06, + "loss": 0.2543, + "step": 33980 + }, + { + "epoch": 0.4383628777961916, + "grad_norm": 0.9442682535398514, + "learning_rate": 9.935361439598014e-06, + "loss": 0.256, + "step": 33990 + }, + { + "epoch": 0.4384918459861875, + "grad_norm": 0.9919790656997097, + "learning_rate": 9.935241128735718e-06, + "loss": 0.2478, + "step": 34000 + }, + { + "epoch": 0.4386208141761834, + "grad_norm": 1.054073518926339, + "learning_rate": 9.935120706740832e-06, + "loss": 0.26, + "step": 34010 + }, + { + "epoch": 0.4387497823661794, + "grad_norm": 1.166551095801195, + "learning_rate": 9.935000173616063e-06, + "loss": 0.2499, + "step": 34020 + }, + { + "epoch": 0.4388787505561753, + "grad_norm": 0.9458146328049207, + "learning_rate": 9.93487952936413e-06, + "loss": 0.2534, + "step": 34030 + }, + { + "epoch": 0.4390077187461713, + "grad_norm": 0.9928607805670863, + "learning_rate": 9.934758773987743e-06, + "loss": 0.2547, + "step": 34040 + }, + { + "epoch": 0.4391366869361672, + "grad_norm": 1.008616773485554, + "learning_rate": 9.934637907489628e-06, + "loss": 0.2594, + "step": 34050 + }, + { + "epoch": 0.4392656551261631, + "grad_norm": 0.983008606288569, + "learning_rate": 9.934516929872503e-06, + "loss": 0.261, + "step": 34060 + }, + { + "epoch": 0.4393946233161591, + "grad_norm": 1.0896535349663967, + "learning_rate": 9.934395841139093e-06, + "loss": 0.2501, + "step": 34070 + }, + { + "epoch": 0.439523591506155, + "grad_norm": 1.092602419781799, + "learning_rate": 9.934274641292125e-06, + "loss": 0.254, + "step": 34080 + }, + { + "epoch": 0.43965255969615097, + "grad_norm": 1.0420157586360659, + "learning_rate": 9.934153330334329e-06, + "loss": 0.2472, + "step": 34090 + }, + { + "epoch": 0.4397815278861469, + "grad_norm": 0.9581047666235121, + "learning_rate": 9.934031908268433e-06, + "loss": 0.2496, + "step": 34100 + }, + { + "epoch": 0.4399104960761428, + "grad_norm": 0.9254388429683742, + "learning_rate": 9.933910375097176e-06, + "loss": 0.2575, + "step": 34110 + }, + { + "epoch": 0.44003946426613877, + "grad_norm": 1.017803975398279, + "learning_rate": 9.933788730823293e-06, + "loss": 0.2568, + "step": 34120 + }, + { + "epoch": 0.4401684324561347, + "grad_norm": 1.0255766235445387, + "learning_rate": 9.93366697544952e-06, + "loss": 0.2667, + "step": 34130 + }, + { + "epoch": 0.44029740064613065, + "grad_norm": 0.9950794268728116, + "learning_rate": 9.933545108978604e-06, + "loss": 0.2709, + "step": 34140 + }, + { + "epoch": 0.44042636883612657, + "grad_norm": 0.9710601855243456, + "learning_rate": 9.933423131413287e-06, + "loss": 0.2504, + "step": 34150 + }, + { + "epoch": 0.4405553370261225, + "grad_norm": 1.1539849456676898, + "learning_rate": 9.933301042756314e-06, + "loss": 0.2524, + "step": 34160 + }, + { + "epoch": 0.44068430521611845, + "grad_norm": 0.961437177275812, + "learning_rate": 9.933178843010435e-06, + "loss": 0.2529, + "step": 34170 + }, + { + "epoch": 0.44081327340611437, + "grad_norm": 0.9947939319202963, + "learning_rate": 9.933056532178405e-06, + "loss": 0.2557, + "step": 34180 + }, + { + "epoch": 0.44094224159611034, + "grad_norm": 0.925428459336166, + "learning_rate": 9.932934110262972e-06, + "loss": 0.2543, + "step": 34190 + }, + { + "epoch": 0.44107120978610626, + "grad_norm": 0.919362617064776, + "learning_rate": 9.932811577266898e-06, + "loss": 0.2383, + "step": 34200 + }, + { + "epoch": 0.44120017797610217, + "grad_norm": 1.0779784081556363, + "learning_rate": 9.932688933192942e-06, + "loss": 0.2659, + "step": 34210 + }, + { + "epoch": 0.44132914616609814, + "grad_norm": 1.025506547248025, + "learning_rate": 9.932566178043861e-06, + "loss": 0.2564, + "step": 34220 + }, + { + "epoch": 0.44145811435609406, + "grad_norm": 1.0346657526430865, + "learning_rate": 9.932443311822424e-06, + "loss": 0.2622, + "step": 34230 + }, + { + "epoch": 0.44158708254609, + "grad_norm": 0.9860782095834458, + "learning_rate": 9.932320334531396e-06, + "loss": 0.2463, + "step": 34240 + }, + { + "epoch": 0.44171605073608594, + "grad_norm": 1.096562739885167, + "learning_rate": 9.932197246173548e-06, + "loss": 0.2594, + "step": 34250 + }, + { + "epoch": 0.44184501892608186, + "grad_norm": 1.1072465172387567, + "learning_rate": 9.932074046751648e-06, + "loss": 0.2707, + "step": 34260 + }, + { + "epoch": 0.4419739871160778, + "grad_norm": 1.0440800114069815, + "learning_rate": 9.931950736268474e-06, + "loss": 0.2631, + "step": 34270 + }, + { + "epoch": 0.44210295530607374, + "grad_norm": 1.0635259680596136, + "learning_rate": 9.931827314726798e-06, + "loss": 0.2573, + "step": 34280 + }, + { + "epoch": 0.4422319234960697, + "grad_norm": 1.0943852265401182, + "learning_rate": 9.931703782129405e-06, + "loss": 0.251, + "step": 34290 + }, + { + "epoch": 0.4423608916860656, + "grad_norm": 0.9852133458235934, + "learning_rate": 9.931580138479074e-06, + "loss": 0.2673, + "step": 34300 + }, + { + "epoch": 0.44248985987606154, + "grad_norm": 0.9615154749461482, + "learning_rate": 9.931456383778588e-06, + "loss": 0.2511, + "step": 34310 + }, + { + "epoch": 0.4426188280660575, + "grad_norm": 0.9951971819056497, + "learning_rate": 9.931332518030736e-06, + "loss": 0.2509, + "step": 34320 + }, + { + "epoch": 0.4427477962560534, + "grad_norm": 0.9750192095248269, + "learning_rate": 9.931208541238305e-06, + "loss": 0.2454, + "step": 34330 + }, + { + "epoch": 0.4428767644460494, + "grad_norm": 1.0214055534697475, + "learning_rate": 9.931084453404089e-06, + "loss": 0.2557, + "step": 34340 + }, + { + "epoch": 0.4430057326360453, + "grad_norm": 1.025632021080092, + "learning_rate": 9.93096025453088e-06, + "loss": 0.2578, + "step": 34350 + }, + { + "epoch": 0.4431347008260413, + "grad_norm": 1.0529563820660728, + "learning_rate": 9.930835944621477e-06, + "loss": 0.2469, + "step": 34360 + }, + { + "epoch": 0.4432636690160372, + "grad_norm": 0.9164743239952148, + "learning_rate": 9.930711523678678e-06, + "loss": 0.2388, + "step": 34370 + }, + { + "epoch": 0.4433926372060331, + "grad_norm": 1.3043619010687961, + "learning_rate": 9.930586991705285e-06, + "loss": 0.2619, + "step": 34380 + }, + { + "epoch": 0.4435216053960291, + "grad_norm": 0.9233310303002209, + "learning_rate": 9.9304623487041e-06, + "loss": 0.2717, + "step": 34390 + }, + { + "epoch": 0.443650573586025, + "grad_norm": 0.9531539614691944, + "learning_rate": 9.930337594677934e-06, + "loss": 0.251, + "step": 34400 + }, + { + "epoch": 0.44377954177602097, + "grad_norm": 1.0049085542935798, + "learning_rate": 9.930212729629593e-06, + "loss": 0.2579, + "step": 34410 + }, + { + "epoch": 0.4439085099660169, + "grad_norm": 0.9958168466887537, + "learning_rate": 9.93008775356189e-06, + "loss": 0.2497, + "step": 34420 + }, + { + "epoch": 0.4440374781560128, + "grad_norm": 1.0469230866810555, + "learning_rate": 9.92996266647764e-06, + "loss": 0.2475, + "step": 34430 + }, + { + "epoch": 0.44416644634600877, + "grad_norm": 1.1009974999416854, + "learning_rate": 9.929837468379657e-06, + "loss": 0.2539, + "step": 34440 + }, + { + "epoch": 0.4442954145360047, + "grad_norm": 0.9477526812263241, + "learning_rate": 9.929712159270763e-06, + "loss": 0.2508, + "step": 34450 + }, + { + "epoch": 0.44442438272600066, + "grad_norm": 0.9841270259973276, + "learning_rate": 9.929586739153778e-06, + "loss": 0.258, + "step": 34460 + }, + { + "epoch": 0.44455335091599657, + "grad_norm": 0.9970387644200288, + "learning_rate": 9.929461208031528e-06, + "loss": 0.2491, + "step": 34470 + }, + { + "epoch": 0.4446823191059925, + "grad_norm": 0.8627306744058607, + "learning_rate": 9.929335565906836e-06, + "loss": 0.2505, + "step": 34480 + }, + { + "epoch": 0.44481128729598846, + "grad_norm": 0.8936544945633312, + "learning_rate": 9.929209812782537e-06, + "loss": 0.2487, + "step": 34490 + }, + { + "epoch": 0.44494025548598437, + "grad_norm": 0.8428280719234351, + "learning_rate": 9.92908394866146e-06, + "loss": 0.2451, + "step": 34500 + }, + { + "epoch": 0.44506922367598034, + "grad_norm": 0.9958591139604266, + "learning_rate": 9.928957973546435e-06, + "loss": 0.2615, + "step": 34510 + }, + { + "epoch": 0.44519819186597626, + "grad_norm": 1.0191414952879505, + "learning_rate": 9.928831887440307e-06, + "loss": 0.2594, + "step": 34520 + }, + { + "epoch": 0.44532716005597217, + "grad_norm": 1.0264556588892904, + "learning_rate": 9.928705690345908e-06, + "loss": 0.2507, + "step": 34530 + }, + { + "epoch": 0.44545612824596814, + "grad_norm": 1.0207433398057282, + "learning_rate": 9.928579382266084e-06, + "loss": 0.2772, + "step": 34540 + }, + { + "epoch": 0.44558509643596406, + "grad_norm": 1.0281862057151534, + "learning_rate": 9.928452963203677e-06, + "loss": 0.2665, + "step": 34550 + }, + { + "epoch": 0.44571406462596, + "grad_norm": 1.0508450853114768, + "learning_rate": 9.928326433161535e-06, + "loss": 0.2452, + "step": 34560 + }, + { + "epoch": 0.44584303281595594, + "grad_norm": 0.9819741135525012, + "learning_rate": 9.928199792142505e-06, + "loss": 0.2721, + "step": 34570 + }, + { + "epoch": 0.44597200100595186, + "grad_norm": 0.9531596953340363, + "learning_rate": 9.928073040149441e-06, + "loss": 0.2425, + "step": 34580 + }, + { + "epoch": 0.4461009691959478, + "grad_norm": 1.0138433397531743, + "learning_rate": 9.927946177185197e-06, + "loss": 0.2458, + "step": 34590 + }, + { + "epoch": 0.44622993738594374, + "grad_norm": 0.9729504795679644, + "learning_rate": 9.92781920325263e-06, + "loss": 0.2425, + "step": 34600 + }, + { + "epoch": 0.4463589055759397, + "grad_norm": 1.0356396162532866, + "learning_rate": 9.927692118354599e-06, + "loss": 0.2588, + "step": 34610 + }, + { + "epoch": 0.44648787376593563, + "grad_norm": 1.156490656579711, + "learning_rate": 9.927564922493963e-06, + "loss": 0.2693, + "step": 34620 + }, + { + "epoch": 0.44661684195593154, + "grad_norm": 1.0338186407168164, + "learning_rate": 9.927437615673593e-06, + "loss": 0.2543, + "step": 34630 + }, + { + "epoch": 0.4467458101459275, + "grad_norm": 1.2073959628294506, + "learning_rate": 9.927310197896347e-06, + "loss": 0.2529, + "step": 34640 + }, + { + "epoch": 0.44687477833592343, + "grad_norm": 1.101806066615438, + "learning_rate": 9.9271826691651e-06, + "loss": 0.2631, + "step": 34650 + }, + { + "epoch": 0.4470037465259194, + "grad_norm": 1.0881536788872763, + "learning_rate": 9.927055029482722e-06, + "loss": 0.2576, + "step": 34660 + }, + { + "epoch": 0.4471327147159153, + "grad_norm": 1.048031912665452, + "learning_rate": 9.926927278852088e-06, + "loss": 0.2462, + "step": 34670 + }, + { + "epoch": 0.4472616829059113, + "grad_norm": 0.9643121499095895, + "learning_rate": 9.926799417276072e-06, + "loss": 0.2595, + "step": 34680 + }, + { + "epoch": 0.4473906510959072, + "grad_norm": 1.0034761155078862, + "learning_rate": 9.926671444757558e-06, + "loss": 0.2632, + "step": 34690 + }, + { + "epoch": 0.4475196192859031, + "grad_norm": 0.9923083982962867, + "learning_rate": 9.926543361299423e-06, + "loss": 0.2565, + "step": 34700 + }, + { + "epoch": 0.4476485874758991, + "grad_norm": 1.1324858309025783, + "learning_rate": 9.926415166904554e-06, + "loss": 0.256, + "step": 34710 + }, + { + "epoch": 0.447777555665895, + "grad_norm": 0.9170986096022146, + "learning_rate": 9.926286861575837e-06, + "loss": 0.2497, + "step": 34720 + }, + { + "epoch": 0.44790652385589097, + "grad_norm": 1.0122824486826958, + "learning_rate": 9.92615844531616e-06, + "loss": 0.2683, + "step": 34730 + }, + { + "epoch": 0.4480354920458869, + "grad_norm": 1.0597352235446638, + "learning_rate": 9.926029918128418e-06, + "loss": 0.2679, + "step": 34740 + }, + { + "epoch": 0.4481644602358828, + "grad_norm": 0.9779322242564965, + "learning_rate": 9.925901280015501e-06, + "loss": 0.2624, + "step": 34750 + }, + { + "epoch": 0.44829342842587877, + "grad_norm": 1.0049891546693803, + "learning_rate": 9.925772530980308e-06, + "loss": 0.2494, + "step": 34760 + }, + { + "epoch": 0.4484223966158747, + "grad_norm": 1.05321286539323, + "learning_rate": 9.92564367102574e-06, + "loss": 0.2506, + "step": 34770 + }, + { + "epoch": 0.44855136480587066, + "grad_norm": 1.1013222207119064, + "learning_rate": 9.925514700154695e-06, + "loss": 0.2516, + "step": 34780 + }, + { + "epoch": 0.44868033299586657, + "grad_norm": 0.9500706698624213, + "learning_rate": 9.925385618370078e-06, + "loss": 0.2442, + "step": 34790 + }, + { + "epoch": 0.4488093011858625, + "grad_norm": 0.9725545293303118, + "learning_rate": 9.9252564256748e-06, + "loss": 0.2561, + "step": 34800 + }, + { + "epoch": 0.44893826937585846, + "grad_norm": 1.004695975091749, + "learning_rate": 9.925127122071762e-06, + "loss": 0.2573, + "step": 34810 + }, + { + "epoch": 0.44906723756585437, + "grad_norm": 0.9852533721274186, + "learning_rate": 9.924997707563881e-06, + "loss": 0.2485, + "step": 34820 + }, + { + "epoch": 0.44919620575585034, + "grad_norm": 1.1683370790631291, + "learning_rate": 9.924868182154072e-06, + "loss": 0.2646, + "step": 34830 + }, + { + "epoch": 0.44932517394584626, + "grad_norm": 1.0396859496512798, + "learning_rate": 9.92473854584525e-06, + "loss": 0.2452, + "step": 34840 + }, + { + "epoch": 0.44945414213584217, + "grad_norm": 2.338019650099273, + "learning_rate": 9.924608798640335e-06, + "loss": 0.257, + "step": 34850 + }, + { + "epoch": 0.44958311032583814, + "grad_norm": 1.073601130885756, + "learning_rate": 9.924478940542248e-06, + "loss": 0.2593, + "step": 34860 + }, + { + "epoch": 0.44971207851583406, + "grad_norm": 0.9752696873439761, + "learning_rate": 9.924348971553913e-06, + "loss": 0.257, + "step": 34870 + }, + { + "epoch": 0.44984104670583, + "grad_norm": 0.8937873997753403, + "learning_rate": 9.924218891678256e-06, + "loss": 0.2431, + "step": 34880 + }, + { + "epoch": 0.44997001489582594, + "grad_norm": 1.1056363057272953, + "learning_rate": 9.924088700918206e-06, + "loss": 0.263, + "step": 34890 + }, + { + "epoch": 0.45009898308582186, + "grad_norm": 0.9640276556486458, + "learning_rate": 9.923958399276698e-06, + "loss": 0.2552, + "step": 34900 + }, + { + "epoch": 0.45022795127581783, + "grad_norm": 1.01590436417853, + "learning_rate": 9.923827986756662e-06, + "loss": 0.2542, + "step": 34910 + }, + { + "epoch": 0.45035691946581374, + "grad_norm": 0.9755551672786363, + "learning_rate": 9.923697463361039e-06, + "loss": 0.2543, + "step": 34920 + }, + { + "epoch": 0.4504858876558097, + "grad_norm": 0.8191047876078132, + "learning_rate": 9.923566829092764e-06, + "loss": 0.2476, + "step": 34930 + }, + { + "epoch": 0.45061485584580563, + "grad_norm": 0.8831316550753602, + "learning_rate": 9.92343608395478e-06, + "loss": 0.2516, + "step": 34940 + }, + { + "epoch": 0.45074382403580154, + "grad_norm": 0.9515223213683172, + "learning_rate": 9.923305227950031e-06, + "loss": 0.2528, + "step": 34950 + }, + { + "epoch": 0.4508727922257975, + "grad_norm": 1.0937141438671545, + "learning_rate": 9.923174261081466e-06, + "loss": 0.2504, + "step": 34960 + }, + { + "epoch": 0.45100176041579343, + "grad_norm": 0.9920348178748054, + "learning_rate": 9.92304318335203e-06, + "loss": 0.249, + "step": 34970 + }, + { + "epoch": 0.4511307286057894, + "grad_norm": 1.0165463019138454, + "learning_rate": 9.922911994764678e-06, + "loss": 0.2506, + "step": 34980 + }, + { + "epoch": 0.4512596967957853, + "grad_norm": 1.1456371957468998, + "learning_rate": 9.922780695322363e-06, + "loss": 0.2467, + "step": 34990 + }, + { + "epoch": 0.45138866498578123, + "grad_norm": 1.0316528962863094, + "learning_rate": 9.922649285028041e-06, + "loss": 0.2437, + "step": 35000 + }, + { + "epoch": 0.4515176331757772, + "grad_norm": 1.1146256142282567, + "learning_rate": 9.922517763884671e-06, + "loss": 0.2692, + "step": 35010 + }, + { + "epoch": 0.4516466013657731, + "grad_norm": 0.9838908460945042, + "learning_rate": 9.922386131895217e-06, + "loss": 0.2499, + "step": 35020 + }, + { + "epoch": 0.4517755695557691, + "grad_norm": 0.9380720419426755, + "learning_rate": 9.92225438906264e-06, + "loss": 0.2461, + "step": 35030 + }, + { + "epoch": 0.451904537745765, + "grad_norm": 0.8813594236913865, + "learning_rate": 9.922122535389908e-06, + "loss": 0.2504, + "step": 35040 + }, + { + "epoch": 0.45203350593576097, + "grad_norm": 1.0265461140438645, + "learning_rate": 9.921990570879991e-06, + "loss": 0.2548, + "step": 35050 + }, + { + "epoch": 0.4521624741257569, + "grad_norm": 1.083384038694887, + "learning_rate": 9.92185849553586e-06, + "loss": 0.2748, + "step": 35060 + }, + { + "epoch": 0.4522914423157528, + "grad_norm": 0.9588713201004373, + "learning_rate": 9.92172630936049e-06, + "loss": 0.2534, + "step": 35070 + }, + { + "epoch": 0.45242041050574877, + "grad_norm": 1.020321314683428, + "learning_rate": 9.921594012356854e-06, + "loss": 0.2562, + "step": 35080 + }, + { + "epoch": 0.4525493786957447, + "grad_norm": 1.0988848167852783, + "learning_rate": 9.921461604527933e-06, + "loss": 0.247, + "step": 35090 + }, + { + "epoch": 0.45267834688574066, + "grad_norm": 0.9973589256426211, + "learning_rate": 9.92132908587671e-06, + "loss": 0.2452, + "step": 35100 + }, + { + "epoch": 0.45280731507573657, + "grad_norm": 1.035480121462215, + "learning_rate": 9.92119645640617e-06, + "loss": 0.2735, + "step": 35110 + }, + { + "epoch": 0.4529362832657325, + "grad_norm": 1.0842238981020222, + "learning_rate": 9.921063716119295e-06, + "loss": 0.2648, + "step": 35120 + }, + { + "epoch": 0.45306525145572846, + "grad_norm": 1.0238955990792031, + "learning_rate": 9.920930865019078e-06, + "loss": 0.2417, + "step": 35130 + }, + { + "epoch": 0.45319421964572437, + "grad_norm": 0.963259327741174, + "learning_rate": 9.92079790310851e-06, + "loss": 0.2542, + "step": 35140 + }, + { + "epoch": 0.45332318783572034, + "grad_norm": 1.1007155836479787, + "learning_rate": 9.920664830390582e-06, + "loss": 0.2688, + "step": 35150 + }, + { + "epoch": 0.45345215602571626, + "grad_norm": 1.1112232071441448, + "learning_rate": 9.920531646868297e-06, + "loss": 0.2508, + "step": 35160 + }, + { + "epoch": 0.4535811242157122, + "grad_norm": 0.9852840352389094, + "learning_rate": 9.920398352544647e-06, + "loss": 0.26, + "step": 35170 + }, + { + "epoch": 0.45371009240570814, + "grad_norm": 0.8540167086016879, + "learning_rate": 9.920264947422637e-06, + "loss": 0.2572, + "step": 35180 + }, + { + "epoch": 0.45383906059570406, + "grad_norm": 0.9511210283008235, + "learning_rate": 9.920131431505272e-06, + "loss": 0.2449, + "step": 35190 + }, + { + "epoch": 0.45396802878570003, + "grad_norm": 1.0027388299762936, + "learning_rate": 9.919997804795556e-06, + "loss": 0.2375, + "step": 35200 + }, + { + "epoch": 0.45409699697569594, + "grad_norm": 0.9800918711859702, + "learning_rate": 9.919864067296498e-06, + "loss": 0.2454, + "step": 35210 + }, + { + "epoch": 0.45422596516569186, + "grad_norm": 0.8994680976463185, + "learning_rate": 9.919730219011112e-06, + "loss": 0.2434, + "step": 35220 + }, + { + "epoch": 0.45435493335568783, + "grad_norm": 0.9385141238672335, + "learning_rate": 9.91959625994241e-06, + "loss": 0.248, + "step": 35230 + }, + { + "epoch": 0.45448390154568374, + "grad_norm": 0.9763276447055061, + "learning_rate": 9.919462190093409e-06, + "loss": 0.2587, + "step": 35240 + }, + { + "epoch": 0.4546128697356797, + "grad_norm": 1.0734481810402616, + "learning_rate": 9.919328009467131e-06, + "loss": 0.2517, + "step": 35250 + }, + { + "epoch": 0.45474183792567563, + "grad_norm": 1.0245039168544414, + "learning_rate": 9.919193718066591e-06, + "loss": 0.2406, + "step": 35260 + }, + { + "epoch": 0.45487080611567154, + "grad_norm": 0.968142485417027, + "learning_rate": 9.91905931589482e-06, + "loss": 0.2568, + "step": 35270 + }, + { + "epoch": 0.4549997743056675, + "grad_norm": 1.0664122500834559, + "learning_rate": 9.91892480295484e-06, + "loss": 0.2625, + "step": 35280 + }, + { + "epoch": 0.45512874249566343, + "grad_norm": 1.112186973452789, + "learning_rate": 9.91879017924968e-06, + "loss": 0.2624, + "step": 35290 + }, + { + "epoch": 0.4552577106856594, + "grad_norm": 1.0386034793399428, + "learning_rate": 9.918655444782373e-06, + "loss": 0.2475, + "step": 35300 + }, + { + "epoch": 0.4553866788756553, + "grad_norm": 1.1267797336536554, + "learning_rate": 9.918520599555954e-06, + "loss": 0.2637, + "step": 35310 + }, + { + "epoch": 0.45551564706565123, + "grad_norm": 1.028491459972319, + "learning_rate": 9.918385643573457e-06, + "loss": 0.2451, + "step": 35320 + }, + { + "epoch": 0.4556446152556472, + "grad_norm": 0.9632708731230119, + "learning_rate": 9.918250576837922e-06, + "loss": 0.2714, + "step": 35330 + }, + { + "epoch": 0.4557735834456431, + "grad_norm": 0.9692866528756221, + "learning_rate": 9.91811539935239e-06, + "loss": 0.2489, + "step": 35340 + }, + { + "epoch": 0.4559025516356391, + "grad_norm": 0.9256683455822678, + "learning_rate": 9.917980111119909e-06, + "loss": 0.2466, + "step": 35350 + }, + { + "epoch": 0.456031519825635, + "grad_norm": 1.0043302467647226, + "learning_rate": 9.917844712143518e-06, + "loss": 0.2609, + "step": 35360 + }, + { + "epoch": 0.45616048801563097, + "grad_norm": 1.0130732279579522, + "learning_rate": 9.917709202426271e-06, + "loss": 0.2488, + "step": 35370 + }, + { + "epoch": 0.4562894562056269, + "grad_norm": 0.9106771588676357, + "learning_rate": 9.917573581971218e-06, + "loss": 0.2587, + "step": 35380 + }, + { + "epoch": 0.4564184243956228, + "grad_norm": 1.0463621944365942, + "learning_rate": 9.917437850781416e-06, + "loss": 0.2639, + "step": 35390 + }, + { + "epoch": 0.45654739258561877, + "grad_norm": 0.9330252179525772, + "learning_rate": 9.917302008859915e-06, + "loss": 0.2478, + "step": 35400 + }, + { + "epoch": 0.4566763607756147, + "grad_norm": 0.9393475307768016, + "learning_rate": 9.91716605620978e-06, + "loss": 0.2478, + "step": 35410 + }, + { + "epoch": 0.45680532896561066, + "grad_norm": 1.0110836961740206, + "learning_rate": 9.917029992834068e-06, + "loss": 0.2472, + "step": 35420 + }, + { + "epoch": 0.4569342971556066, + "grad_norm": 1.034613534972213, + "learning_rate": 9.916893818735846e-06, + "loss": 0.2501, + "step": 35430 + }, + { + "epoch": 0.4570632653456025, + "grad_norm": 1.0521863757568148, + "learning_rate": 9.916757533918177e-06, + "loss": 0.2319, + "step": 35440 + }, + { + "epoch": 0.45719223353559846, + "grad_norm": 1.0422105805359851, + "learning_rate": 9.916621138384135e-06, + "loss": 0.2507, + "step": 35450 + }, + { + "epoch": 0.4573212017255944, + "grad_norm": 1.0766066045264817, + "learning_rate": 9.916484632136787e-06, + "loss": 0.2453, + "step": 35460 + }, + { + "epoch": 0.45745016991559034, + "grad_norm": 1.0649596476155156, + "learning_rate": 9.91634801517921e-06, + "loss": 0.2611, + "step": 35470 + }, + { + "epoch": 0.45757913810558626, + "grad_norm": 0.8765838956794602, + "learning_rate": 9.916211287514478e-06, + "loss": 0.2646, + "step": 35480 + }, + { + "epoch": 0.4577081062955822, + "grad_norm": 1.03906507053887, + "learning_rate": 9.91607444914567e-06, + "loss": 0.2461, + "step": 35490 + }, + { + "epoch": 0.45783707448557814, + "grad_norm": 0.9336409566655415, + "learning_rate": 9.915937500075869e-06, + "loss": 0.2505, + "step": 35500 + }, + { + "epoch": 0.45796604267557406, + "grad_norm": 1.057087973804893, + "learning_rate": 9.915800440308157e-06, + "loss": 0.2514, + "step": 35510 + }, + { + "epoch": 0.45809501086557003, + "grad_norm": 0.9642771403412982, + "learning_rate": 9.915663269845621e-06, + "loss": 0.2566, + "step": 35520 + }, + { + "epoch": 0.45822397905556594, + "grad_norm": 1.0159950446162054, + "learning_rate": 9.91552598869135e-06, + "loss": 0.2589, + "step": 35530 + }, + { + "epoch": 0.45835294724556186, + "grad_norm": 0.9897322289694123, + "learning_rate": 9.915388596848436e-06, + "loss": 0.2491, + "step": 35540 + }, + { + "epoch": 0.45848191543555783, + "grad_norm": 0.9726180288601649, + "learning_rate": 9.91525109431997e-06, + "loss": 0.2436, + "step": 35550 + }, + { + "epoch": 0.45861088362555374, + "grad_norm": 0.9143707354808833, + "learning_rate": 9.915113481109054e-06, + "loss": 0.2425, + "step": 35560 + }, + { + "epoch": 0.4587398518155497, + "grad_norm": 1.048432569954603, + "learning_rate": 9.914975757218778e-06, + "loss": 0.2479, + "step": 35570 + }, + { + "epoch": 0.45886882000554563, + "grad_norm": 0.9975730640114668, + "learning_rate": 9.914837922652252e-06, + "loss": 0.2565, + "step": 35580 + }, + { + "epoch": 0.45899778819554155, + "grad_norm": 1.064524701737371, + "learning_rate": 9.914699977412576e-06, + "loss": 0.2402, + "step": 35590 + }, + { + "epoch": 0.4591267563855375, + "grad_norm": 1.053876908489012, + "learning_rate": 9.914561921502857e-06, + "loss": 0.2503, + "step": 35600 + }, + { + "epoch": 0.45925572457553343, + "grad_norm": 1.0660217775834875, + "learning_rate": 9.914423754926204e-06, + "loss": 0.2655, + "step": 35610 + }, + { + "epoch": 0.4593846927655294, + "grad_norm": 1.0610677088111355, + "learning_rate": 9.914285477685726e-06, + "loss": 0.2562, + "step": 35620 + }, + { + "epoch": 0.4595136609555253, + "grad_norm": 0.9937632015519036, + "learning_rate": 9.914147089784538e-06, + "loss": 0.2574, + "step": 35630 + }, + { + "epoch": 0.45964262914552123, + "grad_norm": 0.9975968739617775, + "learning_rate": 9.914008591225757e-06, + "loss": 0.2657, + "step": 35640 + }, + { + "epoch": 0.4597715973355172, + "grad_norm": 0.9109433567928921, + "learning_rate": 9.913869982012501e-06, + "loss": 0.252, + "step": 35650 + }, + { + "epoch": 0.4599005655255131, + "grad_norm": 1.0762290513683208, + "learning_rate": 9.913731262147893e-06, + "loss": 0.2658, + "step": 35660 + }, + { + "epoch": 0.4600295337155091, + "grad_norm": 0.9366725210244885, + "learning_rate": 9.913592431635053e-06, + "loss": 0.2576, + "step": 35670 + }, + { + "epoch": 0.460158501905505, + "grad_norm": 1.043441516438102, + "learning_rate": 9.913453490477109e-06, + "loss": 0.2625, + "step": 35680 + }, + { + "epoch": 0.460287470095501, + "grad_norm": 1.0466499417117325, + "learning_rate": 9.913314438677193e-06, + "loss": 0.2648, + "step": 35690 + }, + { + "epoch": 0.4604164382854969, + "grad_norm": 1.0092250615487726, + "learning_rate": 9.913175276238432e-06, + "loss": 0.2524, + "step": 35700 + }, + { + "epoch": 0.4605454064754928, + "grad_norm": 0.932909982672479, + "learning_rate": 9.913036003163961e-06, + "loss": 0.2536, + "step": 35710 + }, + { + "epoch": 0.4606743746654888, + "grad_norm": 0.9859698718947328, + "learning_rate": 9.912896619456916e-06, + "loss": 0.2631, + "step": 35720 + }, + { + "epoch": 0.4608033428554847, + "grad_norm": 1.0455098676143608, + "learning_rate": 9.912757125120436e-06, + "loss": 0.2578, + "step": 35730 + }, + { + "epoch": 0.46093231104548066, + "grad_norm": 0.9208286266032583, + "learning_rate": 9.912617520157661e-06, + "loss": 0.2675, + "step": 35740 + }, + { + "epoch": 0.4610612792354766, + "grad_norm": 1.1240182635205183, + "learning_rate": 9.912477804571739e-06, + "loss": 0.2552, + "step": 35750 + }, + { + "epoch": 0.4611902474254725, + "grad_norm": 0.9125367648905179, + "learning_rate": 9.912337978365808e-06, + "loss": 0.2465, + "step": 35760 + }, + { + "epoch": 0.46131921561546846, + "grad_norm": 0.9730751925423389, + "learning_rate": 9.912198041543025e-06, + "loss": 0.2595, + "step": 35770 + }, + { + "epoch": 0.4614481838054644, + "grad_norm": 1.0495038654907938, + "learning_rate": 9.912057994106536e-06, + "loss": 0.2567, + "step": 35780 + }, + { + "epoch": 0.46157715199546034, + "grad_norm": 0.985624187975414, + "learning_rate": 9.911917836059497e-06, + "loss": 0.2455, + "step": 35790 + }, + { + "epoch": 0.46170612018545626, + "grad_norm": 1.0183829628087, + "learning_rate": 9.911777567405063e-06, + "loss": 0.2538, + "step": 35800 + }, + { + "epoch": 0.4618350883754522, + "grad_norm": 0.9673161000721497, + "learning_rate": 9.911637188146392e-06, + "loss": 0.2562, + "step": 35810 + }, + { + "epoch": 0.46196405656544814, + "grad_norm": 0.9398414689524591, + "learning_rate": 9.911496698286647e-06, + "loss": 0.2482, + "step": 35820 + }, + { + "epoch": 0.46209302475544406, + "grad_norm": 0.9811572446270094, + "learning_rate": 9.91135609782899e-06, + "loss": 0.2791, + "step": 35830 + }, + { + "epoch": 0.46222199294544003, + "grad_norm": 0.9434640685415607, + "learning_rate": 9.91121538677659e-06, + "loss": 0.2653, + "step": 35840 + }, + { + "epoch": 0.46235096113543594, + "grad_norm": 0.9310147160158554, + "learning_rate": 9.91107456513261e-06, + "loss": 0.2553, + "step": 35850 + }, + { + "epoch": 0.46247992932543186, + "grad_norm": 0.9482827773568008, + "learning_rate": 9.910933632900224e-06, + "loss": 0.2462, + "step": 35860 + }, + { + "epoch": 0.46260889751542783, + "grad_norm": 1.04078406574512, + "learning_rate": 9.910792590082607e-06, + "loss": 0.238, + "step": 35870 + }, + { + "epoch": 0.46273786570542375, + "grad_norm": 1.1188247644304865, + "learning_rate": 9.910651436682934e-06, + "loss": 0.2616, + "step": 35880 + }, + { + "epoch": 0.4628668338954197, + "grad_norm": 1.0494586996531547, + "learning_rate": 9.910510172704382e-06, + "loss": 0.2527, + "step": 35890 + }, + { + "epoch": 0.46299580208541563, + "grad_norm": 1.100374653549574, + "learning_rate": 9.910368798150134e-06, + "loss": 0.2579, + "step": 35900 + }, + { + "epoch": 0.46312477027541155, + "grad_norm": 1.037491147971748, + "learning_rate": 9.910227313023373e-06, + "loss": 0.2504, + "step": 35910 + }, + { + "epoch": 0.4632537384654075, + "grad_norm": 0.9978856424737705, + "learning_rate": 9.910085717327284e-06, + "loss": 0.2606, + "step": 35920 + }, + { + "epoch": 0.46338270665540343, + "grad_norm": 0.9904348540208086, + "learning_rate": 9.909944011065057e-06, + "loss": 0.2606, + "step": 35930 + }, + { + "epoch": 0.4635116748453994, + "grad_norm": 0.9495754702525732, + "learning_rate": 9.909802194239883e-06, + "loss": 0.2577, + "step": 35940 + }, + { + "epoch": 0.4636406430353953, + "grad_norm": 1.0880225879003507, + "learning_rate": 9.909660266854952e-06, + "loss": 0.2524, + "step": 35950 + }, + { + "epoch": 0.46376961122539123, + "grad_norm": 1.0114045983244195, + "learning_rate": 9.909518228913466e-06, + "loss": 0.2592, + "step": 35960 + }, + { + "epoch": 0.4638985794153872, + "grad_norm": 0.95406524209346, + "learning_rate": 9.909376080418617e-06, + "loss": 0.2544, + "step": 35970 + }, + { + "epoch": 0.4640275476053831, + "grad_norm": 1.0760830069246636, + "learning_rate": 9.90923382137361e-06, + "loss": 0.2583, + "step": 35980 + }, + { + "epoch": 0.4641565157953791, + "grad_norm": 1.1070176456133975, + "learning_rate": 9.909091451781648e-06, + "loss": 0.267, + "step": 35990 + }, + { + "epoch": 0.464285483985375, + "grad_norm": 1.063713663222204, + "learning_rate": 9.908948971645935e-06, + "loss": 0.2475, + "step": 36000 + }, + { + "epoch": 0.4644144521753709, + "grad_norm": 0.9644524952422713, + "learning_rate": 9.908806380969682e-06, + "loss": 0.2575, + "step": 36010 + }, + { + "epoch": 0.4645434203653669, + "grad_norm": 0.9886313297858637, + "learning_rate": 9.9086636797561e-06, + "loss": 0.2393, + "step": 36020 + }, + { + "epoch": 0.4646723885553628, + "grad_norm": 1.042267808018364, + "learning_rate": 9.908520868008398e-06, + "loss": 0.2509, + "step": 36030 + }, + { + "epoch": 0.4648013567453588, + "grad_norm": 0.9359702695854003, + "learning_rate": 9.908377945729794e-06, + "loss": 0.247, + "step": 36040 + }, + { + "epoch": 0.4649303249353547, + "grad_norm": 1.0027631120698643, + "learning_rate": 9.908234912923509e-06, + "loss": 0.2536, + "step": 36050 + }, + { + "epoch": 0.46505929312535066, + "grad_norm": 1.0019514223125952, + "learning_rate": 9.908091769592763e-06, + "loss": 0.2593, + "step": 36060 + }, + { + "epoch": 0.4651882613153466, + "grad_norm": 1.1132225108584717, + "learning_rate": 9.907948515740776e-06, + "loss": 0.2581, + "step": 36070 + }, + { + "epoch": 0.4653172295053425, + "grad_norm": 0.972127396486019, + "learning_rate": 9.907805151370777e-06, + "loss": 0.2305, + "step": 36080 + }, + { + "epoch": 0.46544619769533846, + "grad_norm": 0.9487803738299422, + "learning_rate": 9.907661676485994e-06, + "loss": 0.2557, + "step": 36090 + }, + { + "epoch": 0.4655751658853344, + "grad_norm": 1.063897452391119, + "learning_rate": 9.907518091089656e-06, + "loss": 0.2682, + "step": 36100 + }, + { + "epoch": 0.46570413407533034, + "grad_norm": 0.9322333884551508, + "learning_rate": 9.907374395184999e-06, + "loss": 0.2601, + "step": 36110 + }, + { + "epoch": 0.46583310226532626, + "grad_norm": 0.9505309153237329, + "learning_rate": 9.907230588775256e-06, + "loss": 0.2454, + "step": 36120 + }, + { + "epoch": 0.4659620704553222, + "grad_norm": 1.0689533619290013, + "learning_rate": 9.907086671863666e-06, + "loss": 0.2585, + "step": 36130 + }, + { + "epoch": 0.46609103864531815, + "grad_norm": 1.0547089641350402, + "learning_rate": 9.906942644453473e-06, + "loss": 0.2585, + "step": 36140 + }, + { + "epoch": 0.46622000683531406, + "grad_norm": 1.1118221127916743, + "learning_rate": 9.906798506547914e-06, + "loss": 0.2567, + "step": 36150 + }, + { + "epoch": 0.46634897502531003, + "grad_norm": 0.9805067346276121, + "learning_rate": 9.906654258150238e-06, + "loss": 0.268, + "step": 36160 + }, + { + "epoch": 0.46647794321530595, + "grad_norm": 1.097273200412436, + "learning_rate": 9.906509899263696e-06, + "loss": 0.2656, + "step": 36170 + }, + { + "epoch": 0.46660691140530186, + "grad_norm": 1.0690503330213779, + "learning_rate": 9.906365429891536e-06, + "loss": 0.243, + "step": 36180 + }, + { + "epoch": 0.46673587959529783, + "grad_norm": 1.0035180819660898, + "learning_rate": 9.90622085003701e-06, + "loss": 0.2719, + "step": 36190 + }, + { + "epoch": 0.46686484778529375, + "grad_norm": 1.0452639852517398, + "learning_rate": 9.906076159703376e-06, + "loss": 0.2563, + "step": 36200 + }, + { + "epoch": 0.4669938159752897, + "grad_norm": 0.9918675392830285, + "learning_rate": 9.90593135889389e-06, + "loss": 0.2371, + "step": 36210 + }, + { + "epoch": 0.46712278416528563, + "grad_norm": 1.0204664068001117, + "learning_rate": 9.905786447611814e-06, + "loss": 0.2464, + "step": 36220 + }, + { + "epoch": 0.46725175235528155, + "grad_norm": 0.9646362763606525, + "learning_rate": 9.905641425860411e-06, + "loss": 0.2645, + "step": 36230 + }, + { + "epoch": 0.4673807205452775, + "grad_norm": 1.0198987045446195, + "learning_rate": 9.905496293642947e-06, + "loss": 0.25, + "step": 36240 + }, + { + "epoch": 0.46750968873527343, + "grad_norm": 0.9577003228040449, + "learning_rate": 9.90535105096269e-06, + "loss": 0.2557, + "step": 36250 + }, + { + "epoch": 0.4676386569252694, + "grad_norm": 0.8633000818954037, + "learning_rate": 9.90520569782291e-06, + "loss": 0.2517, + "step": 36260 + }, + { + "epoch": 0.4677676251152653, + "grad_norm": 1.0564350543540024, + "learning_rate": 9.90506023422688e-06, + "loss": 0.2565, + "step": 36270 + }, + { + "epoch": 0.46789659330526123, + "grad_norm": 0.9361305852494664, + "learning_rate": 9.904914660177876e-06, + "loss": 0.2457, + "step": 36280 + }, + { + "epoch": 0.4680255614952572, + "grad_norm": 0.9721319635664172, + "learning_rate": 9.904768975679176e-06, + "loss": 0.2487, + "step": 36290 + }, + { + "epoch": 0.4681545296852531, + "grad_norm": 0.9766762965629748, + "learning_rate": 9.904623180734059e-06, + "loss": 0.2491, + "step": 36300 + }, + { + "epoch": 0.4682834978752491, + "grad_norm": 1.0002316319319606, + "learning_rate": 9.90447727534581e-06, + "loss": 0.2449, + "step": 36310 + }, + { + "epoch": 0.468412466065245, + "grad_norm": 1.0404310895906674, + "learning_rate": 9.904331259517716e-06, + "loss": 0.263, + "step": 36320 + }, + { + "epoch": 0.4685414342552409, + "grad_norm": 0.9995757852791797, + "learning_rate": 9.904185133253061e-06, + "loss": 0.2561, + "step": 36330 + }, + { + "epoch": 0.4686704024452369, + "grad_norm": 1.0278874782890384, + "learning_rate": 9.90403889655514e-06, + "loss": 0.253, + "step": 36340 + }, + { + "epoch": 0.4687993706352328, + "grad_norm": 0.8953196433779457, + "learning_rate": 9.90389254942724e-06, + "loss": 0.2413, + "step": 36350 + }, + { + "epoch": 0.4689283388252288, + "grad_norm": 0.9250360049237686, + "learning_rate": 9.903746091872663e-06, + "loss": 0.2546, + "step": 36360 + }, + { + "epoch": 0.4690573070152247, + "grad_norm": 0.9077586056504775, + "learning_rate": 9.903599523894704e-06, + "loss": 0.2541, + "step": 36370 + }, + { + "epoch": 0.46918627520522066, + "grad_norm": 1.0712613287899915, + "learning_rate": 9.903452845496661e-06, + "loss": 0.2643, + "step": 36380 + }, + { + "epoch": 0.4693152433952166, + "grad_norm": 0.97021947428643, + "learning_rate": 9.903306056681843e-06, + "loss": 0.2541, + "step": 36390 + }, + { + "epoch": 0.4694442115852125, + "grad_norm": 0.9933844050466477, + "learning_rate": 9.903159157453548e-06, + "loss": 0.2652, + "step": 36400 + }, + { + "epoch": 0.46957317977520846, + "grad_norm": 0.920498688349586, + "learning_rate": 9.90301214781509e-06, + "loss": 0.2585, + "step": 36410 + }, + { + "epoch": 0.4697021479652044, + "grad_norm": 1.101616345519633, + "learning_rate": 9.902865027769776e-06, + "loss": 0.2511, + "step": 36420 + }, + { + "epoch": 0.46983111615520035, + "grad_norm": 1.1091027170709384, + "learning_rate": 9.90271779732092e-06, + "loss": 0.2633, + "step": 36430 + }, + { + "epoch": 0.46996008434519626, + "grad_norm": 1.0586621405772276, + "learning_rate": 9.902570456471839e-06, + "loss": 0.2602, + "step": 36440 + }, + { + "epoch": 0.4700890525351922, + "grad_norm": 1.021097704201302, + "learning_rate": 9.902423005225846e-06, + "loss": 0.2577, + "step": 36450 + }, + { + "epoch": 0.47021802072518815, + "grad_norm": 0.9897811556287183, + "learning_rate": 9.902275443586267e-06, + "loss": 0.2638, + "step": 36460 + }, + { + "epoch": 0.47034698891518406, + "grad_norm": 1.0989442046743838, + "learning_rate": 9.90212777155642e-06, + "loss": 0.2459, + "step": 36470 + }, + { + "epoch": 0.47047595710518003, + "grad_norm": 0.9914853363553272, + "learning_rate": 9.901979989139635e-06, + "loss": 0.2477, + "step": 36480 + }, + { + "epoch": 0.47060492529517595, + "grad_norm": 1.009039040427602, + "learning_rate": 9.901832096339237e-06, + "loss": 0.2533, + "step": 36490 + }, + { + "epoch": 0.47073389348517186, + "grad_norm": 0.9952147697563133, + "learning_rate": 9.901684093158554e-06, + "loss": 0.2596, + "step": 36500 + }, + { + "epoch": 0.47086286167516783, + "grad_norm": 0.9385446939335162, + "learning_rate": 9.901535979600924e-06, + "loss": 0.2553, + "step": 36510 + }, + { + "epoch": 0.47099182986516375, + "grad_norm": 0.9530962503910515, + "learning_rate": 9.90138775566968e-06, + "loss": 0.2508, + "step": 36520 + }, + { + "epoch": 0.4711207980551597, + "grad_norm": 0.9897587073551867, + "learning_rate": 9.901239421368159e-06, + "loss": 0.247, + "step": 36530 + }, + { + "epoch": 0.47124976624515563, + "grad_norm": 0.9984401257327742, + "learning_rate": 9.9010909766997e-06, + "loss": 0.2527, + "step": 36540 + }, + { + "epoch": 0.47137873443515155, + "grad_norm": 0.9202377485033715, + "learning_rate": 9.900942421667648e-06, + "loss": 0.2621, + "step": 36550 + }, + { + "epoch": 0.4715077026251475, + "grad_norm": 0.9507774649076949, + "learning_rate": 9.900793756275348e-06, + "loss": 0.246, + "step": 36560 + }, + { + "epoch": 0.47163667081514343, + "grad_norm": 1.032865318749517, + "learning_rate": 9.900644980526148e-06, + "loss": 0.2673, + "step": 36570 + }, + { + "epoch": 0.4717656390051394, + "grad_norm": 1.109857444988876, + "learning_rate": 9.900496094423394e-06, + "loss": 0.244, + "step": 36580 + }, + { + "epoch": 0.4718946071951353, + "grad_norm": 0.9710524274834058, + "learning_rate": 9.900347097970446e-06, + "loss": 0.2586, + "step": 36590 + }, + { + "epoch": 0.47202357538513123, + "grad_norm": 1.021966289638806, + "learning_rate": 9.900197991170654e-06, + "loss": 0.251, + "step": 36600 + }, + { + "epoch": 0.4721525435751272, + "grad_norm": 1.018530870227059, + "learning_rate": 9.900048774027375e-06, + "loss": 0.2573, + "step": 36610 + }, + { + "epoch": 0.4722815117651231, + "grad_norm": 0.8885197024948756, + "learning_rate": 9.899899446543973e-06, + "loss": 0.2508, + "step": 36620 + }, + { + "epoch": 0.4724104799551191, + "grad_norm": 1.027306027940243, + "learning_rate": 9.899750008723807e-06, + "loss": 0.2443, + "step": 36630 + }, + { + "epoch": 0.472539448145115, + "grad_norm": 0.9649168707172453, + "learning_rate": 9.899600460570244e-06, + "loss": 0.2541, + "step": 36640 + }, + { + "epoch": 0.4726684163351109, + "grad_norm": 0.9091840513463451, + "learning_rate": 9.89945080208665e-06, + "loss": 0.2501, + "step": 36650 + }, + { + "epoch": 0.4727973845251069, + "grad_norm": 0.8983088092324332, + "learning_rate": 9.899301033276397e-06, + "loss": 0.2682, + "step": 36660 + }, + { + "epoch": 0.4729263527151028, + "grad_norm": 0.9309696679344036, + "learning_rate": 9.899151154142857e-06, + "loss": 0.2434, + "step": 36670 + }, + { + "epoch": 0.4730553209050988, + "grad_norm": 1.0486201870720004, + "learning_rate": 9.899001164689404e-06, + "loss": 0.2671, + "step": 36680 + }, + { + "epoch": 0.4731842890950947, + "grad_norm": 1.0050426332103906, + "learning_rate": 9.898851064919416e-06, + "loss": 0.2502, + "step": 36690 + }, + { + "epoch": 0.4733132572850906, + "grad_norm": 0.9853056425720045, + "learning_rate": 9.898700854836273e-06, + "loss": 0.2505, + "step": 36700 + }, + { + "epoch": 0.4734422254750866, + "grad_norm": 1.0312533762919784, + "learning_rate": 9.898550534443356e-06, + "loss": 0.2399, + "step": 36710 + }, + { + "epoch": 0.4735711936650825, + "grad_norm": 1.052944280654305, + "learning_rate": 9.898400103744055e-06, + "loss": 0.2578, + "step": 36720 + }, + { + "epoch": 0.47370016185507846, + "grad_norm": 0.9782778268034301, + "learning_rate": 9.898249562741751e-06, + "loss": 0.2496, + "step": 36730 + }, + { + "epoch": 0.4738291300450744, + "grad_norm": 1.001950712392258, + "learning_rate": 9.898098911439837e-06, + "loss": 0.2661, + "step": 36740 + }, + { + "epoch": 0.47395809823507035, + "grad_norm": 0.9761479387567846, + "learning_rate": 9.897948149841706e-06, + "loss": 0.2608, + "step": 36750 + }, + { + "epoch": 0.47408706642506626, + "grad_norm": 0.9625600419511048, + "learning_rate": 9.897797277950752e-06, + "loss": 0.2404, + "step": 36760 + }, + { + "epoch": 0.4742160346150622, + "grad_norm": 0.9972658643324813, + "learning_rate": 9.89764629577037e-06, + "loss": 0.2386, + "step": 36770 + }, + { + "epoch": 0.47434500280505815, + "grad_norm": 1.0392287258115973, + "learning_rate": 9.897495203303966e-06, + "loss": 0.2584, + "step": 36780 + }, + { + "epoch": 0.47447397099505406, + "grad_norm": 1.0438675316876784, + "learning_rate": 9.897344000554937e-06, + "loss": 0.2622, + "step": 36790 + }, + { + "epoch": 0.47460293918505003, + "grad_norm": 0.9377351358056684, + "learning_rate": 9.89719268752669e-06, + "loss": 0.263, + "step": 36800 + }, + { + "epoch": 0.47473190737504595, + "grad_norm": 0.9091677630651656, + "learning_rate": 9.897041264222628e-06, + "loss": 0.2567, + "step": 36810 + }, + { + "epoch": 0.47486087556504186, + "grad_norm": 0.9591815286792765, + "learning_rate": 9.896889730646168e-06, + "loss": 0.2466, + "step": 36820 + }, + { + "epoch": 0.47498984375503783, + "grad_norm": 0.9375268068066621, + "learning_rate": 9.89673808680072e-06, + "loss": 0.2444, + "step": 36830 + }, + { + "epoch": 0.47511881194503375, + "grad_norm": 0.9242124792059764, + "learning_rate": 9.896586332689694e-06, + "loss": 0.2547, + "step": 36840 + }, + { + "epoch": 0.4752477801350297, + "grad_norm": 0.9805423275262048, + "learning_rate": 9.89643446831651e-06, + "loss": 0.2583, + "step": 36850 + }, + { + "epoch": 0.47537674832502563, + "grad_norm": 1.0241635570490908, + "learning_rate": 9.896282493684593e-06, + "loss": 0.2538, + "step": 36860 + }, + { + "epoch": 0.47550571651502155, + "grad_norm": 0.9330345490853744, + "learning_rate": 9.896130408797357e-06, + "loss": 0.2431, + "step": 36870 + }, + { + "epoch": 0.4756346847050175, + "grad_norm": 1.0410388973636175, + "learning_rate": 9.895978213658231e-06, + "loss": 0.2493, + "step": 36880 + }, + { + "epoch": 0.47576365289501343, + "grad_norm": 1.0281439335044704, + "learning_rate": 9.89582590827064e-06, + "loss": 0.2749, + "step": 36890 + }, + { + "epoch": 0.4758926210850094, + "grad_norm": 1.0619934878696395, + "learning_rate": 9.895673492638017e-06, + "loss": 0.2505, + "step": 36900 + }, + { + "epoch": 0.4760215892750053, + "grad_norm": 0.9298454615805509, + "learning_rate": 9.895520966763792e-06, + "loss": 0.2506, + "step": 36910 + }, + { + "epoch": 0.47615055746500123, + "grad_norm": 0.8783274426999221, + "learning_rate": 9.895368330651399e-06, + "loss": 0.252, + "step": 36920 + }, + { + "epoch": 0.4762795256549972, + "grad_norm": 1.088693877414871, + "learning_rate": 9.895215584304277e-06, + "loss": 0.266, + "step": 36930 + }, + { + "epoch": 0.4764084938449931, + "grad_norm": 0.8582323822496589, + "learning_rate": 9.895062727725862e-06, + "loss": 0.2554, + "step": 36940 + }, + { + "epoch": 0.4765374620349891, + "grad_norm": 1.0290018881170682, + "learning_rate": 9.8949097609196e-06, + "loss": 0.2614, + "step": 36950 + }, + { + "epoch": 0.476666430224985, + "grad_norm": 0.9681246402155635, + "learning_rate": 9.894756683888933e-06, + "loss": 0.2645, + "step": 36960 + }, + { + "epoch": 0.4767953984149809, + "grad_norm": 0.8829862076458598, + "learning_rate": 9.894603496637311e-06, + "loss": 0.2395, + "step": 36970 + }, + { + "epoch": 0.4769243666049769, + "grad_norm": 1.004156521972252, + "learning_rate": 9.89445019916818e-06, + "loss": 0.2663, + "step": 36980 + }, + { + "epoch": 0.4770533347949728, + "grad_norm": 1.069583470862624, + "learning_rate": 9.894296791484994e-06, + "loss": 0.2542, + "step": 36990 + }, + { + "epoch": 0.4771823029849688, + "grad_norm": 0.8985991105171007, + "learning_rate": 9.894143273591207e-06, + "loss": 0.2517, + "step": 37000 + }, + { + "epoch": 0.4773112711749647, + "grad_norm": 1.0309457151130956, + "learning_rate": 9.893989645490275e-06, + "loss": 0.2459, + "step": 37010 + }, + { + "epoch": 0.4774402393649606, + "grad_norm": 0.9774565331247882, + "learning_rate": 9.893835907185659e-06, + "loss": 0.2402, + "step": 37020 + }, + { + "epoch": 0.4775692075549566, + "grad_norm": 0.9356286184113481, + "learning_rate": 9.89368205868082e-06, + "loss": 0.2591, + "step": 37030 + }, + { + "epoch": 0.4776981757449525, + "grad_norm": 0.8825937781459303, + "learning_rate": 9.893528099979223e-06, + "loss": 0.2635, + "step": 37040 + }, + { + "epoch": 0.47782714393494846, + "grad_norm": 0.940295461693295, + "learning_rate": 9.893374031084333e-06, + "loss": 0.2367, + "step": 37050 + }, + { + "epoch": 0.4779561121249444, + "grad_norm": 1.0353999452857074, + "learning_rate": 9.893219851999622e-06, + "loss": 0.2644, + "step": 37060 + }, + { + "epoch": 0.47808508031494035, + "grad_norm": 0.9938102313463624, + "learning_rate": 9.89306556272856e-06, + "loss": 0.2378, + "step": 37070 + }, + { + "epoch": 0.47821404850493626, + "grad_norm": 1.0912166516854174, + "learning_rate": 9.892911163274623e-06, + "loss": 0.2668, + "step": 37080 + }, + { + "epoch": 0.4783430166949322, + "grad_norm": 0.9008221551714682, + "learning_rate": 9.892756653641286e-06, + "loss": 0.2484, + "step": 37090 + }, + { + "epoch": 0.47847198488492815, + "grad_norm": 0.964410162917058, + "learning_rate": 9.892602033832029e-06, + "loss": 0.2604, + "step": 37100 + }, + { + "epoch": 0.47860095307492406, + "grad_norm": 1.0522188536410941, + "learning_rate": 9.892447303850334e-06, + "loss": 0.2512, + "step": 37110 + }, + { + "epoch": 0.47872992126492003, + "grad_norm": 0.986790848459005, + "learning_rate": 9.892292463699686e-06, + "loss": 0.2535, + "step": 37120 + }, + { + "epoch": 0.47885888945491595, + "grad_norm": 0.9780354952444847, + "learning_rate": 9.892137513383568e-06, + "loss": 0.2492, + "step": 37130 + }, + { + "epoch": 0.47898785764491186, + "grad_norm": 1.0893255645943611, + "learning_rate": 9.891982452905475e-06, + "loss": 0.2509, + "step": 37140 + }, + { + "epoch": 0.47911682583490783, + "grad_norm": 0.9995412757251472, + "learning_rate": 9.891827282268893e-06, + "loss": 0.2694, + "step": 37150 + }, + { + "epoch": 0.47924579402490375, + "grad_norm": 0.9956632851740452, + "learning_rate": 9.891672001477321e-06, + "loss": 0.2636, + "step": 37160 + }, + { + "epoch": 0.4793747622148997, + "grad_norm": 0.9722776900581066, + "learning_rate": 9.89151661053425e-06, + "loss": 0.2444, + "step": 37170 + }, + { + "epoch": 0.47950373040489563, + "grad_norm": 0.9358692615497582, + "learning_rate": 9.891361109443186e-06, + "loss": 0.2468, + "step": 37180 + }, + { + "epoch": 0.47963269859489155, + "grad_norm": 1.0357375106621627, + "learning_rate": 9.891205498207625e-06, + "loss": 0.2564, + "step": 37190 + }, + { + "epoch": 0.4797616667848875, + "grad_norm": 1.022049813573373, + "learning_rate": 9.891049776831075e-06, + "loss": 0.2531, + "step": 37200 + }, + { + "epoch": 0.47989063497488343, + "grad_norm": 1.0375420029031435, + "learning_rate": 9.89089394531704e-06, + "loss": 0.2517, + "step": 37210 + }, + { + "epoch": 0.4800196031648794, + "grad_norm": 0.9732677640834052, + "learning_rate": 9.890738003669029e-06, + "loss": 0.255, + "step": 37220 + }, + { + "epoch": 0.4801485713548753, + "grad_norm": 0.9443530762609894, + "learning_rate": 9.890581951890554e-06, + "loss": 0.2541, + "step": 37230 + }, + { + "epoch": 0.48027753954487123, + "grad_norm": 0.9809771445556009, + "learning_rate": 9.89042578998513e-06, + "loss": 0.2613, + "step": 37240 + }, + { + "epoch": 0.4804065077348672, + "grad_norm": 0.9681249117521551, + "learning_rate": 9.890269517956274e-06, + "loss": 0.2439, + "step": 37250 + }, + { + "epoch": 0.4805354759248631, + "grad_norm": 1.0332837944323083, + "learning_rate": 9.890113135807502e-06, + "loss": 0.2534, + "step": 37260 + }, + { + "epoch": 0.4806644441148591, + "grad_norm": 0.8713444350399612, + "learning_rate": 9.889956643542338e-06, + "loss": 0.2566, + "step": 37270 + }, + { + "epoch": 0.480793412304855, + "grad_norm": 0.9179638984461272, + "learning_rate": 9.889800041164305e-06, + "loss": 0.2564, + "step": 37280 + }, + { + "epoch": 0.4809223804948509, + "grad_norm": 1.0134037968057976, + "learning_rate": 9.88964332867693e-06, + "loss": 0.2544, + "step": 37290 + }, + { + "epoch": 0.4810513486848469, + "grad_norm": 0.9459802093771872, + "learning_rate": 9.88948650608374e-06, + "loss": 0.2521, + "step": 37300 + }, + { + "epoch": 0.4811803168748428, + "grad_norm": 0.9722576290543797, + "learning_rate": 9.889329573388269e-06, + "loss": 0.2348, + "step": 37310 + }, + { + "epoch": 0.4813092850648388, + "grad_norm": 0.9586061463832217, + "learning_rate": 9.889172530594049e-06, + "loss": 0.2445, + "step": 37320 + }, + { + "epoch": 0.4814382532548347, + "grad_norm": 1.0723965733676906, + "learning_rate": 9.889015377704616e-06, + "loss": 0.2507, + "step": 37330 + }, + { + "epoch": 0.4815672214448306, + "grad_norm": 0.9520886023905797, + "learning_rate": 9.888858114723512e-06, + "loss": 0.2529, + "step": 37340 + }, + { + "epoch": 0.4816961896348266, + "grad_norm": 1.0694546370143077, + "learning_rate": 9.888700741654274e-06, + "loss": 0.2429, + "step": 37350 + }, + { + "epoch": 0.4818251578248225, + "grad_norm": 0.9680300165449306, + "learning_rate": 9.888543258500449e-06, + "loss": 0.2461, + "step": 37360 + }, + { + "epoch": 0.48195412601481846, + "grad_norm": 1.016614331338847, + "learning_rate": 9.888385665265581e-06, + "loss": 0.2558, + "step": 37370 + }, + { + "epoch": 0.4820830942048144, + "grad_norm": 0.9331630918832154, + "learning_rate": 9.88822796195322e-06, + "loss": 0.2303, + "step": 37380 + }, + { + "epoch": 0.4822120623948103, + "grad_norm": 0.9942187902837819, + "learning_rate": 9.888070148566917e-06, + "loss": 0.2493, + "step": 37390 + }, + { + "epoch": 0.48234103058480626, + "grad_norm": 0.9764458597130593, + "learning_rate": 9.887912225110224e-06, + "loss": 0.2481, + "step": 37400 + }, + { + "epoch": 0.4824699987748022, + "grad_norm": 0.9651134532747011, + "learning_rate": 9.8877541915867e-06, + "loss": 0.2496, + "step": 37410 + }, + { + "epoch": 0.48259896696479815, + "grad_norm": 0.9705013680310497, + "learning_rate": 9.887596047999903e-06, + "loss": 0.2514, + "step": 37420 + }, + { + "epoch": 0.48272793515479406, + "grad_norm": 1.0054879574661986, + "learning_rate": 9.887437794353392e-06, + "loss": 0.2541, + "step": 37430 + }, + { + "epoch": 0.48285690334479003, + "grad_norm": 0.9716519491917971, + "learning_rate": 9.887279430650734e-06, + "loss": 0.2547, + "step": 37440 + }, + { + "epoch": 0.48298587153478595, + "grad_norm": 0.9847532855631429, + "learning_rate": 9.88712095689549e-06, + "loss": 0.241, + "step": 37450 + }, + { + "epoch": 0.48311483972478186, + "grad_norm": 1.0365093215973862, + "learning_rate": 9.886962373091233e-06, + "loss": 0.2588, + "step": 37460 + }, + { + "epoch": 0.48324380791477783, + "grad_norm": 1.054824473541715, + "learning_rate": 9.886803679241533e-06, + "loss": 0.2669, + "step": 37470 + }, + { + "epoch": 0.48337277610477375, + "grad_norm": 0.9797760848895896, + "learning_rate": 9.886644875349963e-06, + "loss": 0.2463, + "step": 37480 + }, + { + "epoch": 0.4835017442947697, + "grad_norm": 1.0183416813599024, + "learning_rate": 9.8864859614201e-06, + "loss": 0.2672, + "step": 37490 + }, + { + "epoch": 0.48363071248476563, + "grad_norm": 1.0261524054193765, + "learning_rate": 9.886326937455519e-06, + "loss": 0.2321, + "step": 37500 + }, + { + "epoch": 0.48375968067476155, + "grad_norm": 0.9984704468217062, + "learning_rate": 9.886167803459806e-06, + "loss": 0.2414, + "step": 37510 + }, + { + "epoch": 0.4838886488647575, + "grad_norm": 0.9668238476282075, + "learning_rate": 9.886008559436539e-06, + "loss": 0.2538, + "step": 37520 + }, + { + "epoch": 0.48401761705475344, + "grad_norm": 0.9555117765658276, + "learning_rate": 9.88584920538931e-06, + "loss": 0.2452, + "step": 37530 + }, + { + "epoch": 0.4841465852447494, + "grad_norm": 0.9272735359693732, + "learning_rate": 9.885689741321703e-06, + "loss": 0.2569, + "step": 37540 + }, + { + "epoch": 0.4842755534347453, + "grad_norm": 0.9461875211701518, + "learning_rate": 9.885530167237308e-06, + "loss": 0.2555, + "step": 37550 + }, + { + "epoch": 0.48440452162474124, + "grad_norm": 1.1061587783948346, + "learning_rate": 9.885370483139722e-06, + "loss": 0.2415, + "step": 37560 + }, + { + "epoch": 0.4845334898147372, + "grad_norm": 0.9442866786231736, + "learning_rate": 9.885210689032538e-06, + "loss": 0.2541, + "step": 37570 + }, + { + "epoch": 0.4846624580047331, + "grad_norm": 1.0217756617056388, + "learning_rate": 9.885050784919357e-06, + "loss": 0.2427, + "step": 37580 + }, + { + "epoch": 0.4847914261947291, + "grad_norm": 0.9545625461256485, + "learning_rate": 9.884890770803777e-06, + "loss": 0.2625, + "step": 37590 + }, + { + "epoch": 0.484920394384725, + "grad_norm": 0.967888245138264, + "learning_rate": 9.884730646689402e-06, + "loss": 0.2449, + "step": 37600 + }, + { + "epoch": 0.4850493625747209, + "grad_norm": 0.9769438464160112, + "learning_rate": 9.88457041257984e-06, + "loss": 0.2486, + "step": 37610 + }, + { + "epoch": 0.4851783307647169, + "grad_norm": 1.0043746681567698, + "learning_rate": 9.884410068478694e-06, + "loss": 0.2388, + "step": 37620 + }, + { + "epoch": 0.4853072989547128, + "grad_norm": 1.168224631304636, + "learning_rate": 9.88424961438958e-06, + "loss": 0.2563, + "step": 37630 + }, + { + "epoch": 0.4854362671447088, + "grad_norm": 0.8936933082226899, + "learning_rate": 9.884089050316107e-06, + "loss": 0.2608, + "step": 37640 + }, + { + "epoch": 0.4855652353347047, + "grad_norm": 0.9442619858905371, + "learning_rate": 9.883928376261895e-06, + "loss": 0.2494, + "step": 37650 + }, + { + "epoch": 0.4856942035247006, + "grad_norm": 0.95449972433338, + "learning_rate": 9.883767592230558e-06, + "loss": 0.2614, + "step": 37660 + }, + { + "epoch": 0.4858231717146966, + "grad_norm": 0.9732015359865938, + "learning_rate": 9.883606698225719e-06, + "loss": 0.2418, + "step": 37670 + }, + { + "epoch": 0.4859521399046925, + "grad_norm": 0.94928445280278, + "learning_rate": 9.883445694251e-06, + "loss": 0.2465, + "step": 37680 + }, + { + "epoch": 0.48608110809468846, + "grad_norm": 0.9845029722756844, + "learning_rate": 9.883284580310026e-06, + "loss": 0.2471, + "step": 37690 + }, + { + "epoch": 0.4862100762846844, + "grad_norm": 0.9692417460587318, + "learning_rate": 9.883123356406426e-06, + "loss": 0.2512, + "step": 37700 + }, + { + "epoch": 0.4863390444746803, + "grad_norm": 0.9468094327956881, + "learning_rate": 9.882962022543832e-06, + "loss": 0.2466, + "step": 37710 + }, + { + "epoch": 0.48646801266467626, + "grad_norm": 0.8524275284965793, + "learning_rate": 9.882800578725872e-06, + "loss": 0.2649, + "step": 37720 + }, + { + "epoch": 0.4865969808546722, + "grad_norm": 1.030664881734248, + "learning_rate": 9.882639024956187e-06, + "loss": 0.2625, + "step": 37730 + }, + { + "epoch": 0.48672594904466815, + "grad_norm": 0.8821894924307443, + "learning_rate": 9.882477361238411e-06, + "loss": 0.2555, + "step": 37740 + }, + { + "epoch": 0.48685491723466406, + "grad_norm": 0.9096767612864478, + "learning_rate": 9.882315587576188e-06, + "loss": 0.25, + "step": 37750 + }, + { + "epoch": 0.48698388542466003, + "grad_norm": 1.0436201878981197, + "learning_rate": 9.882153703973156e-06, + "loss": 0.25, + "step": 37760 + }, + { + "epoch": 0.48711285361465595, + "grad_norm": 1.0201904196797258, + "learning_rate": 9.881991710432965e-06, + "loss": 0.2593, + "step": 37770 + }, + { + "epoch": 0.48724182180465186, + "grad_norm": 0.9519033808443147, + "learning_rate": 9.881829606959261e-06, + "loss": 0.2493, + "step": 37780 + }, + { + "epoch": 0.48737078999464784, + "grad_norm": 0.9461781050876321, + "learning_rate": 9.881667393555692e-06, + "loss": 0.2552, + "step": 37790 + }, + { + "epoch": 0.48749975818464375, + "grad_norm": 1.0081497395252093, + "learning_rate": 9.881505070225915e-06, + "loss": 0.2486, + "step": 37800 + }, + { + "epoch": 0.4876287263746397, + "grad_norm": 1.009501355171364, + "learning_rate": 9.881342636973582e-06, + "loss": 0.2574, + "step": 37810 + }, + { + "epoch": 0.48775769456463564, + "grad_norm": 0.8595459393873157, + "learning_rate": 9.881180093802351e-06, + "loss": 0.2551, + "step": 37820 + }, + { + "epoch": 0.48788666275463155, + "grad_norm": 0.8696850781714097, + "learning_rate": 9.881017440715885e-06, + "loss": 0.2414, + "step": 37830 + }, + { + "epoch": 0.4880156309446275, + "grad_norm": 0.9575079468131353, + "learning_rate": 9.880854677717844e-06, + "loss": 0.2369, + "step": 37840 + }, + { + "epoch": 0.48814459913462344, + "grad_norm": 1.1070239350275022, + "learning_rate": 9.880691804811893e-06, + "loss": 0.265, + "step": 37850 + }, + { + "epoch": 0.4882735673246194, + "grad_norm": 0.9664289839232388, + "learning_rate": 9.8805288220017e-06, + "loss": 0.2609, + "step": 37860 + }, + { + "epoch": 0.4884025355146153, + "grad_norm": 1.0052892850608237, + "learning_rate": 9.880365729290935e-06, + "loss": 0.2548, + "step": 37870 + }, + { + "epoch": 0.48853150370461124, + "grad_norm": 0.9686324887127248, + "learning_rate": 9.880202526683272e-06, + "loss": 0.2556, + "step": 37880 + }, + { + "epoch": 0.4886604718946072, + "grad_norm": 1.0491245546718342, + "learning_rate": 9.880039214182384e-06, + "loss": 0.2497, + "step": 37890 + }, + { + "epoch": 0.4887894400846031, + "grad_norm": 0.8957919636977184, + "learning_rate": 9.87987579179195e-06, + "loss": 0.2415, + "step": 37900 + }, + { + "epoch": 0.4889184082745991, + "grad_norm": 0.9338858306524038, + "learning_rate": 9.879712259515651e-06, + "loss": 0.2489, + "step": 37910 + }, + { + "epoch": 0.489047376464595, + "grad_norm": 0.9402099731593937, + "learning_rate": 9.879548617357167e-06, + "loss": 0.2574, + "step": 37920 + }, + { + "epoch": 0.4891763446545909, + "grad_norm": 1.0086093105343632, + "learning_rate": 9.879384865320182e-06, + "loss": 0.244, + "step": 37930 + }, + { + "epoch": 0.4893053128445869, + "grad_norm": 1.0033402813205574, + "learning_rate": 9.879221003408385e-06, + "loss": 0.2447, + "step": 37940 + }, + { + "epoch": 0.4894342810345828, + "grad_norm": 1.0587214727157281, + "learning_rate": 9.879057031625468e-06, + "loss": 0.2491, + "step": 37950 + }, + { + "epoch": 0.4895632492245788, + "grad_norm": 0.9066467138132893, + "learning_rate": 9.878892949975122e-06, + "loss": 0.2562, + "step": 37960 + }, + { + "epoch": 0.4896922174145747, + "grad_norm": 1.0684194445982085, + "learning_rate": 9.87872875846104e-06, + "loss": 0.2553, + "step": 37970 + }, + { + "epoch": 0.4898211856045706, + "grad_norm": 1.0048358929289154, + "learning_rate": 9.87856445708692e-06, + "loss": 0.2607, + "step": 37980 + }, + { + "epoch": 0.4899501537945666, + "grad_norm": 0.996729064531958, + "learning_rate": 9.878400045856463e-06, + "loss": 0.2426, + "step": 37990 + }, + { + "epoch": 0.4900791219845625, + "grad_norm": 0.9757966559875958, + "learning_rate": 9.87823552477337e-06, + "loss": 0.2564, + "step": 38000 + }, + { + "epoch": 0.49020809017455846, + "grad_norm": 1.0405997784343275, + "learning_rate": 9.878070893841347e-06, + "loss": 0.2487, + "step": 38010 + }, + { + "epoch": 0.4903370583645544, + "grad_norm": 0.9956055632721584, + "learning_rate": 9.8779061530641e-06, + "loss": 0.2435, + "step": 38020 + }, + { + "epoch": 0.4904660265545503, + "grad_norm": 0.9485313107893931, + "learning_rate": 9.877741302445339e-06, + "loss": 0.2489, + "step": 38030 + }, + { + "epoch": 0.49059499474454626, + "grad_norm": 1.013093661241454, + "learning_rate": 9.877576341988778e-06, + "loss": 0.2718, + "step": 38040 + }, + { + "epoch": 0.4907239629345422, + "grad_norm": 0.9322597107536735, + "learning_rate": 9.877411271698127e-06, + "loss": 0.2451, + "step": 38050 + }, + { + "epoch": 0.49085293112453815, + "grad_norm": 0.9960440590160282, + "learning_rate": 9.877246091577106e-06, + "loss": 0.2491, + "step": 38060 + }, + { + "epoch": 0.49098189931453406, + "grad_norm": 1.010954758620975, + "learning_rate": 9.877080801629436e-06, + "loss": 0.2626, + "step": 38070 + }, + { + "epoch": 0.49111086750453004, + "grad_norm": 1.0159166386347196, + "learning_rate": 9.876915401858837e-06, + "loss": 0.2502, + "step": 38080 + }, + { + "epoch": 0.49123983569452595, + "grad_norm": 1.0197797461112477, + "learning_rate": 9.876749892269034e-06, + "loss": 0.2357, + "step": 38090 + }, + { + "epoch": 0.49136880388452187, + "grad_norm": 1.004180550413299, + "learning_rate": 9.876584272863754e-06, + "loss": 0.2697, + "step": 38100 + }, + { + "epoch": 0.49149777207451784, + "grad_norm": 0.8907807732831091, + "learning_rate": 9.876418543646727e-06, + "loss": 0.2481, + "step": 38110 + }, + { + "epoch": 0.49162674026451375, + "grad_norm": 0.9057510629632465, + "learning_rate": 9.876252704621683e-06, + "loss": 0.2488, + "step": 38120 + }, + { + "epoch": 0.4917557084545097, + "grad_norm": 0.9672344222385316, + "learning_rate": 9.876086755792358e-06, + "loss": 0.2466, + "step": 38130 + }, + { + "epoch": 0.49188467664450564, + "grad_norm": 1.0068338615041803, + "learning_rate": 9.87592069716249e-06, + "loss": 0.2636, + "step": 38140 + }, + { + "epoch": 0.49201364483450155, + "grad_norm": 0.8903056427344773, + "learning_rate": 9.875754528735815e-06, + "loss": 0.2577, + "step": 38150 + }, + { + "epoch": 0.4921426130244975, + "grad_norm": 0.9719965961918381, + "learning_rate": 9.875588250516077e-06, + "loss": 0.2764, + "step": 38160 + }, + { + "epoch": 0.49227158121449344, + "grad_norm": 0.9252969562175295, + "learning_rate": 9.87542186250702e-06, + "loss": 0.2353, + "step": 38170 + }, + { + "epoch": 0.4924005494044894, + "grad_norm": 0.9697930135589018, + "learning_rate": 9.875255364712392e-06, + "loss": 0.241, + "step": 38180 + }, + { + "epoch": 0.4925295175944853, + "grad_norm": 0.914427535428268, + "learning_rate": 9.87508875713594e-06, + "loss": 0.2396, + "step": 38190 + }, + { + "epoch": 0.49265848578448124, + "grad_norm": 1.009335219872601, + "learning_rate": 9.874922039781417e-06, + "loss": 0.255, + "step": 38200 + }, + { + "epoch": 0.4927874539744772, + "grad_norm": 0.9052519090392817, + "learning_rate": 9.874755212652578e-06, + "loss": 0.2453, + "step": 38210 + }, + { + "epoch": 0.4929164221644731, + "grad_norm": 0.9906996068052759, + "learning_rate": 9.874588275753176e-06, + "loss": 0.2529, + "step": 38220 + }, + { + "epoch": 0.4930453903544691, + "grad_norm": 0.9223402108010338, + "learning_rate": 9.874421229086975e-06, + "loss": 0.2509, + "step": 38230 + }, + { + "epoch": 0.493174358544465, + "grad_norm": 0.9568625727801879, + "learning_rate": 9.874254072657731e-06, + "loss": 0.2438, + "step": 38240 + }, + { + "epoch": 0.4933033267344609, + "grad_norm": 0.9139645421862022, + "learning_rate": 9.874086806469215e-06, + "loss": 0.2656, + "step": 38250 + }, + { + "epoch": 0.4934322949244569, + "grad_norm": 1.0653464808449982, + "learning_rate": 9.873919430525188e-06, + "loss": 0.2527, + "step": 38260 + }, + { + "epoch": 0.4935612631144528, + "grad_norm": 1.0553360979346587, + "learning_rate": 9.873751944829421e-06, + "loss": 0.2411, + "step": 38270 + }, + { + "epoch": 0.4936902313044488, + "grad_norm": 1.065708218931649, + "learning_rate": 9.873584349385686e-06, + "loss": 0.2473, + "step": 38280 + }, + { + "epoch": 0.4938191994944447, + "grad_norm": 0.9235267596898328, + "learning_rate": 9.873416644197753e-06, + "loss": 0.253, + "step": 38290 + }, + { + "epoch": 0.4939481676844406, + "grad_norm": 0.9626773484998232, + "learning_rate": 9.873248829269405e-06, + "loss": 0.2561, + "step": 38300 + }, + { + "epoch": 0.4940771358744366, + "grad_norm": 1.0187007741516891, + "learning_rate": 9.873080904604415e-06, + "loss": 0.2372, + "step": 38310 + }, + { + "epoch": 0.4942061040644325, + "grad_norm": 1.0661088908786358, + "learning_rate": 9.872912870206569e-06, + "loss": 0.2638, + "step": 38320 + }, + { + "epoch": 0.49433507225442846, + "grad_norm": 1.0381583911063068, + "learning_rate": 9.872744726079648e-06, + "loss": 0.253, + "step": 38330 + }, + { + "epoch": 0.4944640404444244, + "grad_norm": 0.9474436526075407, + "learning_rate": 9.872576472227438e-06, + "loss": 0.2451, + "step": 38340 + }, + { + "epoch": 0.4945930086344203, + "grad_norm": 0.9584282963514135, + "learning_rate": 9.87240810865373e-06, + "loss": 0.2583, + "step": 38350 + }, + { + "epoch": 0.49472197682441627, + "grad_norm": 0.9912010472541701, + "learning_rate": 9.872239635362314e-06, + "loss": 0.2412, + "step": 38360 + }, + { + "epoch": 0.4948509450144122, + "grad_norm": 0.940464375762335, + "learning_rate": 9.872071052356982e-06, + "loss": 0.249, + "step": 38370 + }, + { + "epoch": 0.49497991320440815, + "grad_norm": 0.9126346101844712, + "learning_rate": 9.871902359641534e-06, + "loss": 0.2437, + "step": 38380 + }, + { + "epoch": 0.49510888139440407, + "grad_norm": 0.94258420801555, + "learning_rate": 9.871733557219766e-06, + "loss": 0.2564, + "step": 38390 + }, + { + "epoch": 0.4952378495844, + "grad_norm": 0.9106373275367022, + "learning_rate": 9.87156464509548e-06, + "loss": 0.2573, + "step": 38400 + }, + { + "epoch": 0.49536681777439595, + "grad_norm": 0.9844824527344014, + "learning_rate": 9.871395623272477e-06, + "loss": 0.2524, + "step": 38410 + }, + { + "epoch": 0.49549578596439187, + "grad_norm": 1.0163631334718635, + "learning_rate": 9.871226491754567e-06, + "loss": 0.2452, + "step": 38420 + }, + { + "epoch": 0.49562475415438784, + "grad_norm": 0.9274343662088299, + "learning_rate": 9.871057250545558e-06, + "loss": 0.2462, + "step": 38430 + }, + { + "epoch": 0.49575372234438375, + "grad_norm": 0.9423607989296187, + "learning_rate": 9.870887899649257e-06, + "loss": 0.2447, + "step": 38440 + }, + { + "epoch": 0.4958826905343797, + "grad_norm": 1.0253685398866514, + "learning_rate": 9.870718439069482e-06, + "loss": 0.2538, + "step": 38450 + }, + { + "epoch": 0.49601165872437564, + "grad_norm": 0.9916310145081249, + "learning_rate": 9.870548868810045e-06, + "loss": 0.2458, + "step": 38460 + }, + { + "epoch": 0.49614062691437155, + "grad_norm": 0.923830041508614, + "learning_rate": 9.87037918887477e-06, + "loss": 0.2592, + "step": 38470 + }, + { + "epoch": 0.4962695951043675, + "grad_norm": 1.0569614471236939, + "learning_rate": 9.870209399267473e-06, + "loss": 0.2567, + "step": 38480 + }, + { + "epoch": 0.49639856329436344, + "grad_norm": 0.9807744668808899, + "learning_rate": 9.870039499991979e-06, + "loss": 0.2469, + "step": 38490 + }, + { + "epoch": 0.4965275314843594, + "grad_norm": 1.0571886030417141, + "learning_rate": 9.869869491052113e-06, + "loss": 0.2391, + "step": 38500 + }, + { + "epoch": 0.4966564996743553, + "grad_norm": 0.9438643902393892, + "learning_rate": 9.869699372451705e-06, + "loss": 0.2502, + "step": 38510 + }, + { + "epoch": 0.49678546786435124, + "grad_norm": 0.9507080030010143, + "learning_rate": 9.869529144194585e-06, + "loss": 0.2467, + "step": 38520 + }, + { + "epoch": 0.4969144360543472, + "grad_norm": 0.9883548241823823, + "learning_rate": 9.869358806284585e-06, + "loss": 0.2435, + "step": 38530 + }, + { + "epoch": 0.4970434042443431, + "grad_norm": 0.9437267145981928, + "learning_rate": 9.869188358725542e-06, + "loss": 0.25, + "step": 38540 + }, + { + "epoch": 0.4971723724343391, + "grad_norm": 0.98344497847579, + "learning_rate": 9.869017801521294e-06, + "loss": 0.2576, + "step": 38550 + }, + { + "epoch": 0.497301340624335, + "grad_norm": 1.0236412699000432, + "learning_rate": 9.86884713467568e-06, + "loss": 0.2475, + "step": 38560 + }, + { + "epoch": 0.4974303088143309, + "grad_norm": 1.085159438378335, + "learning_rate": 9.868676358192548e-06, + "loss": 0.2562, + "step": 38570 + }, + { + "epoch": 0.4975592770043269, + "grad_norm": 0.8724172261639498, + "learning_rate": 9.868505472075738e-06, + "loss": 0.2584, + "step": 38580 + }, + { + "epoch": 0.4976882451943228, + "grad_norm": 0.9339893645031764, + "learning_rate": 9.8683344763291e-06, + "loss": 0.23, + "step": 38590 + }, + { + "epoch": 0.4978172133843188, + "grad_norm": 1.0561936631196964, + "learning_rate": 9.868163370956485e-06, + "loss": 0.2441, + "step": 38600 + }, + { + "epoch": 0.4979461815743147, + "grad_norm": 0.979547014598374, + "learning_rate": 9.867992155961745e-06, + "loss": 0.2526, + "step": 38610 + }, + { + "epoch": 0.4980751497643106, + "grad_norm": 1.0299167075219449, + "learning_rate": 9.867820831348738e-06, + "loss": 0.2465, + "step": 38620 + }, + { + "epoch": 0.4982041179543066, + "grad_norm": 1.0940425076031046, + "learning_rate": 9.867649397121317e-06, + "loss": 0.2616, + "step": 38630 + }, + { + "epoch": 0.4983330861443025, + "grad_norm": 0.9263585620941457, + "learning_rate": 9.867477853283348e-06, + "loss": 0.2465, + "step": 38640 + }, + { + "epoch": 0.49846205433429847, + "grad_norm": 0.9807995686229578, + "learning_rate": 9.86730619983869e-06, + "loss": 0.2495, + "step": 38650 + }, + { + "epoch": 0.4985910225242944, + "grad_norm": 0.9477006705168806, + "learning_rate": 9.867134436791212e-06, + "loss": 0.2369, + "step": 38660 + }, + { + "epoch": 0.4987199907142903, + "grad_norm": 1.0588526538920777, + "learning_rate": 9.866962564144778e-06, + "loss": 0.2622, + "step": 38670 + }, + { + "epoch": 0.49884895890428627, + "grad_norm": 1.1207867762650754, + "learning_rate": 9.866790581903259e-06, + "loss": 0.2697, + "step": 38680 + }, + { + "epoch": 0.4989779270942822, + "grad_norm": 0.9941004415488379, + "learning_rate": 9.866618490070528e-06, + "loss": 0.2502, + "step": 38690 + }, + { + "epoch": 0.49910689528427815, + "grad_norm": 1.0049236136323476, + "learning_rate": 9.866446288650461e-06, + "loss": 0.2418, + "step": 38700 + }, + { + "epoch": 0.49923586347427407, + "grad_norm": 0.965083056942395, + "learning_rate": 9.866273977646936e-06, + "loss": 0.2399, + "step": 38710 + }, + { + "epoch": 0.49936483166427, + "grad_norm": 0.8894800905014574, + "learning_rate": 9.866101557063831e-06, + "loss": 0.2455, + "step": 38720 + }, + { + "epoch": 0.49949379985426595, + "grad_norm": 1.028400098610984, + "learning_rate": 9.865929026905032e-06, + "loss": 0.2699, + "step": 38730 + }, + { + "epoch": 0.49962276804426187, + "grad_norm": 0.9639367177146427, + "learning_rate": 9.865756387174421e-06, + "loss": 0.2518, + "step": 38740 + }, + { + "epoch": 0.49975173623425784, + "grad_norm": 0.8578081498095746, + "learning_rate": 9.865583637875887e-06, + "loss": 0.2543, + "step": 38750 + }, + { + "epoch": 0.49988070442425375, + "grad_norm": 0.9853222099167577, + "learning_rate": 9.865410779013318e-06, + "loss": 0.2536, + "step": 38760 + }, + { + "epoch": 0.5000096726142497, + "grad_norm": 0.9287364226908571, + "learning_rate": 9.86523781059061e-06, + "loss": 0.2623, + "step": 38770 + }, + { + "epoch": 0.5001386408042456, + "grad_norm": 1.0448935424228265, + "learning_rate": 9.865064732611654e-06, + "loss": 0.2591, + "step": 38780 + }, + { + "epoch": 0.5002676089942416, + "grad_norm": 1.0116812424889041, + "learning_rate": 9.86489154508035e-06, + "loss": 0.252, + "step": 38790 + }, + { + "epoch": 0.5003965771842375, + "grad_norm": 0.9777465341563111, + "learning_rate": 9.864718248000598e-06, + "loss": 0.2444, + "step": 38800 + }, + { + "epoch": 0.5005255453742334, + "grad_norm": 0.9533684818314664, + "learning_rate": 9.864544841376297e-06, + "loss": 0.2483, + "step": 38810 + }, + { + "epoch": 0.5006545135642294, + "grad_norm": 1.0414172096009435, + "learning_rate": 9.864371325211358e-06, + "loss": 0.2476, + "step": 38820 + }, + { + "epoch": 0.5007834817542253, + "grad_norm": 0.9557351886213648, + "learning_rate": 9.864197699509684e-06, + "loss": 0.2449, + "step": 38830 + }, + { + "epoch": 0.5009124499442212, + "grad_norm": 1.0132757828191763, + "learning_rate": 9.864023964275184e-06, + "loss": 0.2445, + "step": 38840 + }, + { + "epoch": 0.5010414181342172, + "grad_norm": 1.0020595383944848, + "learning_rate": 9.86385011951177e-06, + "loss": 0.2548, + "step": 38850 + }, + { + "epoch": 0.5011703863242132, + "grad_norm": 2.3652665726061177, + "learning_rate": 9.86367616522336e-06, + "loss": 0.2485, + "step": 38860 + }, + { + "epoch": 0.501299354514209, + "grad_norm": 1.0257796211937875, + "learning_rate": 9.863502101413871e-06, + "loss": 0.2606, + "step": 38870 + }, + { + "epoch": 0.501428322704205, + "grad_norm": 0.9734443888986921, + "learning_rate": 9.86332792808722e-06, + "loss": 0.2467, + "step": 38880 + }, + { + "epoch": 0.501557290894201, + "grad_norm": 0.9977214665231453, + "learning_rate": 9.86315364524733e-06, + "loss": 0.2537, + "step": 38890 + }, + { + "epoch": 0.5016862590841968, + "grad_norm": 1.002644090808672, + "learning_rate": 9.862979252898123e-06, + "loss": 0.2445, + "step": 38900 + }, + { + "epoch": 0.5018152272741928, + "grad_norm": 0.9513171881702124, + "learning_rate": 9.86280475104353e-06, + "loss": 0.2457, + "step": 38910 + }, + { + "epoch": 0.5019441954641888, + "grad_norm": 0.8818424815016022, + "learning_rate": 9.862630139687481e-06, + "loss": 0.2397, + "step": 38920 + }, + { + "epoch": 0.5020731636541846, + "grad_norm": 1.0064864090300614, + "learning_rate": 9.862455418833905e-06, + "loss": 0.2525, + "step": 38930 + }, + { + "epoch": 0.5022021318441806, + "grad_norm": 0.9337112135227915, + "learning_rate": 9.862280588486737e-06, + "loss": 0.2486, + "step": 38940 + }, + { + "epoch": 0.5023311000341766, + "grad_norm": 1.042260322663899, + "learning_rate": 9.862105648649913e-06, + "loss": 0.2523, + "step": 38950 + }, + { + "epoch": 0.5024600682241726, + "grad_norm": 1.0619949276374714, + "learning_rate": 9.861930599327376e-06, + "loss": 0.243, + "step": 38960 + }, + { + "epoch": 0.5025890364141684, + "grad_norm": 1.016063669884332, + "learning_rate": 9.861755440523062e-06, + "loss": 0.2511, + "step": 38970 + }, + { + "epoch": 0.5027180046041644, + "grad_norm": 1.0019267579130715, + "learning_rate": 9.861580172240922e-06, + "loss": 0.2571, + "step": 38980 + }, + { + "epoch": 0.5028469727941604, + "grad_norm": 1.0250335414938987, + "learning_rate": 9.861404794484896e-06, + "loss": 0.2464, + "step": 38990 + }, + { + "epoch": 0.5029759409841562, + "grad_norm": 0.9736618920519513, + "learning_rate": 9.861229307258939e-06, + "loss": 0.251, + "step": 39000 + }, + { + "epoch": 0.5031049091741522, + "grad_norm": 0.9233884397029466, + "learning_rate": 9.861053710566999e-06, + "loss": 0.2307, + "step": 39010 + }, + { + "epoch": 0.5032338773641482, + "grad_norm": 0.9462248062863299, + "learning_rate": 9.86087800441303e-06, + "loss": 0.2597, + "step": 39020 + }, + { + "epoch": 0.5033628455541441, + "grad_norm": 1.019318006996316, + "learning_rate": 9.860702188800992e-06, + "loss": 0.2584, + "step": 39030 + }, + { + "epoch": 0.50349181374414, + "grad_norm": 1.0569794872159495, + "learning_rate": 9.860526263734843e-06, + "loss": 0.2475, + "step": 39040 + }, + { + "epoch": 0.503620781934136, + "grad_norm": 1.0380512872662528, + "learning_rate": 9.86035022921854e-06, + "loss": 0.2468, + "step": 39050 + }, + { + "epoch": 0.5037497501241319, + "grad_norm": 0.9185530588716446, + "learning_rate": 9.860174085256051e-06, + "loss": 0.2409, + "step": 39060 + }, + { + "epoch": 0.5038787183141278, + "grad_norm": 0.8932531071240539, + "learning_rate": 9.859997831851343e-06, + "loss": 0.2576, + "step": 39070 + }, + { + "epoch": 0.5040076865041238, + "grad_norm": 1.070433362822675, + "learning_rate": 9.859821469008383e-06, + "loss": 0.2448, + "step": 39080 + }, + { + "epoch": 0.5041366546941197, + "grad_norm": 1.0589457046955975, + "learning_rate": 9.859644996731143e-06, + "loss": 0.249, + "step": 39090 + }, + { + "epoch": 0.5042656228841156, + "grad_norm": 0.9601794575036477, + "learning_rate": 9.859468415023598e-06, + "loss": 0.2508, + "step": 39100 + }, + { + "epoch": 0.5043945910741116, + "grad_norm": 0.936550180091959, + "learning_rate": 9.859291723889721e-06, + "loss": 0.245, + "step": 39110 + }, + { + "epoch": 0.5045235592641075, + "grad_norm": 1.0070623905587033, + "learning_rate": 9.859114923333494e-06, + "loss": 0.2385, + "step": 39120 + }, + { + "epoch": 0.5046525274541035, + "grad_norm": 0.949904521291178, + "learning_rate": 9.8589380133589e-06, + "loss": 0.2283, + "step": 39130 + }, + { + "epoch": 0.5047814956440994, + "grad_norm": 0.9674713626580295, + "learning_rate": 9.858760993969916e-06, + "loss": 0.2439, + "step": 39140 + }, + { + "epoch": 0.5049104638340953, + "grad_norm": 0.9260235272842193, + "learning_rate": 9.858583865170532e-06, + "loss": 0.24, + "step": 39150 + }, + { + "epoch": 0.5050394320240913, + "grad_norm": 1.0118144694335764, + "learning_rate": 9.858406626964738e-06, + "loss": 0.26, + "step": 39160 + }, + { + "epoch": 0.5051684002140872, + "grad_norm": 0.9503044135457748, + "learning_rate": 9.858229279356524e-06, + "loss": 0.2508, + "step": 39170 + }, + { + "epoch": 0.5052973684040831, + "grad_norm": 0.9681863524717759, + "learning_rate": 9.858051822349883e-06, + "loss": 0.2472, + "step": 39180 + }, + { + "epoch": 0.5054263365940791, + "grad_norm": 1.1084442410106967, + "learning_rate": 9.857874255948811e-06, + "loss": 0.248, + "step": 39190 + }, + { + "epoch": 0.505555304784075, + "grad_norm": 0.9088582308494438, + "learning_rate": 9.857696580157307e-06, + "loss": 0.2484, + "step": 39200 + }, + { + "epoch": 0.5056842729740709, + "grad_norm": 0.9873505567301285, + "learning_rate": 9.85751879497937e-06, + "loss": 0.2672, + "step": 39210 + }, + { + "epoch": 0.5058132411640669, + "grad_norm": 0.996076548270587, + "learning_rate": 9.857340900419009e-06, + "loss": 0.255, + "step": 39220 + }, + { + "epoch": 0.5059422093540629, + "grad_norm": 1.0022171145126701, + "learning_rate": 9.85716289648022e-06, + "loss": 0.245, + "step": 39230 + }, + { + "epoch": 0.5060711775440587, + "grad_norm": 1.076671711321503, + "learning_rate": 9.856984783167022e-06, + "loss": 0.2471, + "step": 39240 + }, + { + "epoch": 0.5062001457340547, + "grad_norm": 0.945471631372074, + "learning_rate": 9.85680656048342e-06, + "loss": 0.2436, + "step": 39250 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.9342219832302879, + "learning_rate": 9.856628228433429e-06, + "loss": 0.2404, + "step": 39260 + }, + { + "epoch": 0.5064580821140465, + "grad_norm": 1.0103656050101697, + "learning_rate": 9.856449787021061e-06, + "loss": 0.2533, + "step": 39270 + }, + { + "epoch": 0.5065870503040425, + "grad_norm": 0.9001492834926093, + "learning_rate": 9.85627123625034e-06, + "loss": 0.2344, + "step": 39280 + }, + { + "epoch": 0.5067160184940385, + "grad_norm": 0.9317481487921713, + "learning_rate": 9.856092576125282e-06, + "loss": 0.2511, + "step": 39290 + }, + { + "epoch": 0.5068449866840344, + "grad_norm": 0.9792813409601265, + "learning_rate": 9.855913806649913e-06, + "loss": 0.2534, + "step": 39300 + }, + { + "epoch": 0.5069739548740303, + "grad_norm": 0.951551274423583, + "learning_rate": 9.855734927828258e-06, + "loss": 0.2446, + "step": 39310 + }, + { + "epoch": 0.5071029230640263, + "grad_norm": 0.9591851547929697, + "learning_rate": 9.855555939664343e-06, + "loss": 0.2387, + "step": 39320 + }, + { + "epoch": 0.5072318912540222, + "grad_norm": 0.9434317295144206, + "learning_rate": 9.855376842162201e-06, + "loss": 0.2594, + "step": 39330 + }, + { + "epoch": 0.5073608594440181, + "grad_norm": 0.9957282644175821, + "learning_rate": 9.855197635325863e-06, + "loss": 0.2479, + "step": 39340 + }, + { + "epoch": 0.5074898276340141, + "grad_norm": 0.9112458055605422, + "learning_rate": 9.855018319159366e-06, + "loss": 0.2457, + "step": 39350 + }, + { + "epoch": 0.50761879582401, + "grad_norm": 0.9599937529576529, + "learning_rate": 9.854838893666748e-06, + "loss": 0.2348, + "step": 39360 + }, + { + "epoch": 0.5077477640140059, + "grad_norm": 1.0526739704246564, + "learning_rate": 9.854659358852046e-06, + "loss": 0.2458, + "step": 39370 + }, + { + "epoch": 0.5078767322040019, + "grad_norm": 0.9349287316005279, + "learning_rate": 9.854479714719307e-06, + "loss": 0.2446, + "step": 39380 + }, + { + "epoch": 0.5080057003939978, + "grad_norm": 0.9251158348988339, + "learning_rate": 9.854299961272574e-06, + "loss": 0.2509, + "step": 39390 + }, + { + "epoch": 0.5081346685839938, + "grad_norm": 0.9698827275741573, + "learning_rate": 9.854120098515896e-06, + "loss": 0.2545, + "step": 39400 + }, + { + "epoch": 0.5082636367739897, + "grad_norm": 0.9842427491001255, + "learning_rate": 9.853940126453322e-06, + "loss": 0.251, + "step": 39410 + }, + { + "epoch": 0.5083926049639856, + "grad_norm": 0.8691093412292062, + "learning_rate": 9.853760045088905e-06, + "loss": 0.2484, + "step": 39420 + }, + { + "epoch": 0.5085215731539816, + "grad_norm": 0.9771186694764409, + "learning_rate": 9.8535798544267e-06, + "loss": 0.2645, + "step": 39430 + }, + { + "epoch": 0.5086505413439775, + "grad_norm": 0.9095854875811233, + "learning_rate": 9.853399554470766e-06, + "loss": 0.252, + "step": 39440 + }, + { + "epoch": 0.5087795095339734, + "grad_norm": 0.892767510832129, + "learning_rate": 9.853219145225162e-06, + "loss": 0.2322, + "step": 39450 + }, + { + "epoch": 0.5089084777239694, + "grad_norm": 1.0377281784349985, + "learning_rate": 9.853038626693947e-06, + "loss": 0.2432, + "step": 39460 + }, + { + "epoch": 0.5090374459139653, + "grad_norm": 0.9242073459855717, + "learning_rate": 9.852857998881193e-06, + "loss": 0.2386, + "step": 39470 + }, + { + "epoch": 0.5091664141039612, + "grad_norm": 0.969765899632922, + "learning_rate": 9.852677261790962e-06, + "loss": 0.2492, + "step": 39480 + }, + { + "epoch": 0.5092953822939572, + "grad_norm": 0.8690829643782537, + "learning_rate": 9.852496415427326e-06, + "loss": 0.2519, + "step": 39490 + }, + { + "epoch": 0.5094243504839532, + "grad_norm": 1.0665134870399369, + "learning_rate": 9.852315459794356e-06, + "loss": 0.2447, + "step": 39500 + }, + { + "epoch": 0.509553318673949, + "grad_norm": 0.965991993621723, + "learning_rate": 9.85213439489613e-06, + "loss": 0.2455, + "step": 39510 + }, + { + "epoch": 0.509682286863945, + "grad_norm": 1.0329340761997197, + "learning_rate": 9.851953220736718e-06, + "loss": 0.2378, + "step": 39520 + }, + { + "epoch": 0.509811255053941, + "grad_norm": 0.9825925870719147, + "learning_rate": 9.851771937320209e-06, + "loss": 0.2527, + "step": 39530 + }, + { + "epoch": 0.5099402232439368, + "grad_norm": 0.9091271693782592, + "learning_rate": 9.851590544650679e-06, + "loss": 0.2465, + "step": 39540 + }, + { + "epoch": 0.5100691914339328, + "grad_norm": 0.8781363169788023, + "learning_rate": 9.851409042732216e-06, + "loss": 0.244, + "step": 39550 + }, + { + "epoch": 0.5101981596239288, + "grad_norm": 0.8899894269616447, + "learning_rate": 9.851227431568904e-06, + "loss": 0.2433, + "step": 39560 + }, + { + "epoch": 0.5103271278139246, + "grad_norm": 0.9602387299483034, + "learning_rate": 9.851045711164833e-06, + "loss": 0.2555, + "step": 39570 + }, + { + "epoch": 0.5104560960039206, + "grad_norm": 1.0460769435908002, + "learning_rate": 9.850863881524098e-06, + "loss": 0.2526, + "step": 39580 + }, + { + "epoch": 0.5105850641939166, + "grad_norm": 0.8371178874633063, + "learning_rate": 9.85068194265079e-06, + "loss": 0.2416, + "step": 39590 + }, + { + "epoch": 0.5107140323839126, + "grad_norm": 0.8990949945041542, + "learning_rate": 9.850499894549008e-06, + "loss": 0.2445, + "step": 39600 + }, + { + "epoch": 0.5108430005739084, + "grad_norm": 1.0024108650601267, + "learning_rate": 9.850317737222852e-06, + "loss": 0.2632, + "step": 39610 + }, + { + "epoch": 0.5109719687639044, + "grad_norm": 0.9546280056031732, + "learning_rate": 9.850135470676422e-06, + "loss": 0.2661, + "step": 39620 + }, + { + "epoch": 0.5111009369539004, + "grad_norm": 0.8629145333097055, + "learning_rate": 9.849953094913822e-06, + "loss": 0.2446, + "step": 39630 + }, + { + "epoch": 0.5112299051438962, + "grad_norm": 1.0301781228828761, + "learning_rate": 9.849770609939162e-06, + "loss": 0.2521, + "step": 39640 + }, + { + "epoch": 0.5113588733338922, + "grad_norm": 0.9427217303502637, + "learning_rate": 9.849588015756548e-06, + "loss": 0.2542, + "step": 39650 + }, + { + "epoch": 0.5114878415238882, + "grad_norm": 0.8886280904454347, + "learning_rate": 9.84940531237009e-06, + "loss": 0.2565, + "step": 39660 + }, + { + "epoch": 0.5116168097138841, + "grad_norm": 0.9502541761432136, + "learning_rate": 9.849222499783909e-06, + "loss": 0.2537, + "step": 39670 + }, + { + "epoch": 0.51174577790388, + "grad_norm": 0.9300577888344668, + "learning_rate": 9.849039578002115e-06, + "loss": 0.25, + "step": 39680 + }, + { + "epoch": 0.511874746093876, + "grad_norm": 1.0132647239476174, + "learning_rate": 9.848856547028829e-06, + "loss": 0.2518, + "step": 39690 + }, + { + "epoch": 0.5120037142838719, + "grad_norm": 0.9482673221316995, + "learning_rate": 9.848673406868174e-06, + "loss": 0.2549, + "step": 39700 + }, + { + "epoch": 0.5121326824738678, + "grad_norm": 0.9525846612400984, + "learning_rate": 9.848490157524272e-06, + "loss": 0.2411, + "step": 39710 + }, + { + "epoch": 0.5122616506638638, + "grad_norm": 0.8654059522755508, + "learning_rate": 9.848306799001253e-06, + "loss": 0.2365, + "step": 39720 + }, + { + "epoch": 0.5123906188538597, + "grad_norm": 1.0887226117538407, + "learning_rate": 9.84812333130324e-06, + "loss": 0.2507, + "step": 39730 + }, + { + "epoch": 0.5125195870438556, + "grad_norm": 0.9061472932428686, + "learning_rate": 9.84793975443437e-06, + "loss": 0.2484, + "step": 39740 + }, + { + "epoch": 0.5126485552338516, + "grad_norm": 0.9383751035828214, + "learning_rate": 9.847756068398775e-06, + "loss": 0.2501, + "step": 39750 + }, + { + "epoch": 0.5127775234238475, + "grad_norm": 0.94178652528898, + "learning_rate": 9.84757227320059e-06, + "loss": 0.2324, + "step": 39760 + }, + { + "epoch": 0.5129064916138435, + "grad_norm": 0.9711769406133304, + "learning_rate": 9.847388368843953e-06, + "loss": 0.2598, + "step": 39770 + }, + { + "epoch": 0.5130354598038394, + "grad_norm": 0.9470323294494338, + "learning_rate": 9.847204355333009e-06, + "loss": 0.2443, + "step": 39780 + }, + { + "epoch": 0.5131644279938353, + "grad_norm": 0.8752819106081544, + "learning_rate": 9.847020232671897e-06, + "loss": 0.2527, + "step": 39790 + }, + { + "epoch": 0.5132933961838313, + "grad_norm": 0.9689044479855962, + "learning_rate": 9.846836000864769e-06, + "loss": 0.2627, + "step": 39800 + }, + { + "epoch": 0.5134223643738272, + "grad_norm": 0.9332088910838681, + "learning_rate": 9.846651659915767e-06, + "loss": 0.2602, + "step": 39810 + }, + { + "epoch": 0.5135513325638231, + "grad_norm": 0.9152322485910015, + "learning_rate": 9.846467209829044e-06, + "loss": 0.2422, + "step": 39820 + }, + { + "epoch": 0.5136803007538191, + "grad_norm": 1.0480369666057276, + "learning_rate": 9.846282650608758e-06, + "loss": 0.2701, + "step": 39830 + }, + { + "epoch": 0.513809268943815, + "grad_norm": 0.998210615772693, + "learning_rate": 9.84609798225906e-06, + "loss": 0.2474, + "step": 39840 + }, + { + "epoch": 0.5139382371338109, + "grad_norm": 1.0695372108729717, + "learning_rate": 9.845913204784107e-06, + "loss": 0.2432, + "step": 39850 + }, + { + "epoch": 0.5140672053238069, + "grad_norm": 1.0059386501120429, + "learning_rate": 9.845728318188065e-06, + "loss": 0.2586, + "step": 39860 + }, + { + "epoch": 0.5141961735138029, + "grad_norm": 0.8948598390393857, + "learning_rate": 9.845543322475096e-06, + "loss": 0.2577, + "step": 39870 + }, + { + "epoch": 0.5143251417037987, + "grad_norm": 1.0159856140886259, + "learning_rate": 9.845358217649365e-06, + "loss": 0.2477, + "step": 39880 + }, + { + "epoch": 0.5144541098937947, + "grad_norm": 0.9785898059365556, + "learning_rate": 9.845173003715039e-06, + "loss": 0.2481, + "step": 39890 + }, + { + "epoch": 0.5145830780837907, + "grad_norm": 0.9281301384848573, + "learning_rate": 9.844987680676289e-06, + "loss": 0.2474, + "step": 39900 + }, + { + "epoch": 0.5147120462737865, + "grad_norm": 0.9489657517210545, + "learning_rate": 9.844802248537288e-06, + "loss": 0.2566, + "step": 39910 + }, + { + "epoch": 0.5148410144637825, + "grad_norm": 0.9107766619821369, + "learning_rate": 9.844616707302214e-06, + "loss": 0.2486, + "step": 39920 + }, + { + "epoch": 0.5149699826537785, + "grad_norm": 0.8193388266329334, + "learning_rate": 9.844431056975244e-06, + "loss": 0.2299, + "step": 39930 + }, + { + "epoch": 0.5150989508437743, + "grad_norm": 0.9486805748136645, + "learning_rate": 9.844245297560558e-06, + "loss": 0.2553, + "step": 39940 + }, + { + "epoch": 0.5152279190337703, + "grad_norm": 0.9359762801301628, + "learning_rate": 9.844059429062338e-06, + "loss": 0.2423, + "step": 39950 + }, + { + "epoch": 0.5153568872237663, + "grad_norm": 1.0205575296444886, + "learning_rate": 9.843873451484768e-06, + "loss": 0.2572, + "step": 39960 + }, + { + "epoch": 0.5154858554137622, + "grad_norm": 0.9746084131382406, + "learning_rate": 9.843687364832042e-06, + "loss": 0.2281, + "step": 39970 + }, + { + "epoch": 0.5156148236037581, + "grad_norm": 1.046691967135704, + "learning_rate": 9.843501169108345e-06, + "loss": 0.257, + "step": 39980 + }, + { + "epoch": 0.5157437917937541, + "grad_norm": 1.0444245018589038, + "learning_rate": 9.843314864317872e-06, + "loss": 0.2502, + "step": 39990 + }, + { + "epoch": 0.51587275998375, + "grad_norm": 0.9649084435892149, + "learning_rate": 9.843128450464816e-06, + "loss": 0.2497, + "step": 40000 + }, + { + "epoch": 0.5160017281737459, + "grad_norm": 0.9060360940260852, + "learning_rate": 9.842941927553377e-06, + "loss": 0.2366, + "step": 40010 + }, + { + "epoch": 0.5161306963637419, + "grad_norm": 0.9767099833773452, + "learning_rate": 9.842755295587756e-06, + "loss": 0.2468, + "step": 40020 + }, + { + "epoch": 0.5162596645537378, + "grad_norm": 1.0462956023720857, + "learning_rate": 9.842568554572151e-06, + "loss": 0.2482, + "step": 40030 + }, + { + "epoch": 0.5163886327437338, + "grad_norm": 0.8752825857825729, + "learning_rate": 9.842381704510772e-06, + "loss": 0.2509, + "step": 40040 + }, + { + "epoch": 0.5165176009337297, + "grad_norm": 0.9317016570522267, + "learning_rate": 9.842194745407825e-06, + "loss": 0.2501, + "step": 40050 + }, + { + "epoch": 0.5166465691237256, + "grad_norm": 0.9977325963423793, + "learning_rate": 9.842007677267518e-06, + "loss": 0.2575, + "step": 40060 + }, + { + "epoch": 0.5167755373137216, + "grad_norm": 0.9322835665301493, + "learning_rate": 9.841820500094067e-06, + "loss": 0.252, + "step": 40070 + }, + { + "epoch": 0.5169045055037175, + "grad_norm": 0.9370469164070977, + "learning_rate": 9.841633213891683e-06, + "loss": 0.2471, + "step": 40080 + }, + { + "epoch": 0.5170334736937134, + "grad_norm": 0.9169355793553984, + "learning_rate": 9.841445818664587e-06, + "loss": 0.2403, + "step": 40090 + }, + { + "epoch": 0.5171624418837094, + "grad_norm": 0.9914034763649426, + "learning_rate": 9.841258314416995e-06, + "loss": 0.2319, + "step": 40100 + }, + { + "epoch": 0.5172914100737053, + "grad_norm": 0.9455048320909505, + "learning_rate": 9.841070701153133e-06, + "loss": 0.2391, + "step": 40110 + }, + { + "epoch": 0.5174203782637012, + "grad_norm": 0.917682643376077, + "learning_rate": 9.840882978877224e-06, + "loss": 0.2443, + "step": 40120 + }, + { + "epoch": 0.5175493464536972, + "grad_norm": 1.0187376970658308, + "learning_rate": 9.840695147593494e-06, + "loss": 0.2502, + "step": 40130 + }, + { + "epoch": 0.5176783146436932, + "grad_norm": 0.9397652806759473, + "learning_rate": 9.840507207306175e-06, + "loss": 0.2446, + "step": 40140 + }, + { + "epoch": 0.517807282833689, + "grad_norm": 0.9482030172749035, + "learning_rate": 9.840319158019497e-06, + "loss": 0.2547, + "step": 40150 + }, + { + "epoch": 0.517936251023685, + "grad_norm": 0.9264826574216056, + "learning_rate": 9.840130999737697e-06, + "loss": 0.2494, + "step": 40160 + }, + { + "epoch": 0.518065219213681, + "grad_norm": 0.9735107306181665, + "learning_rate": 9.839942732465008e-06, + "loss": 0.2458, + "step": 40170 + }, + { + "epoch": 0.5181941874036768, + "grad_norm": 0.9412842115369839, + "learning_rate": 9.839754356205672e-06, + "loss": 0.2406, + "step": 40180 + }, + { + "epoch": 0.5183231555936728, + "grad_norm": 0.9224281929489373, + "learning_rate": 9.839565870963933e-06, + "loss": 0.2488, + "step": 40190 + }, + { + "epoch": 0.5184521237836688, + "grad_norm": 0.9012161188068285, + "learning_rate": 9.83937727674403e-06, + "loss": 0.2364, + "step": 40200 + }, + { + "epoch": 0.5185810919736646, + "grad_norm": 0.9470308744536405, + "learning_rate": 9.839188573550213e-06, + "loss": 0.2405, + "step": 40210 + }, + { + "epoch": 0.5187100601636606, + "grad_norm": 1.0144210593523975, + "learning_rate": 9.838999761386735e-06, + "loss": 0.2569, + "step": 40220 + }, + { + "epoch": 0.5188390283536566, + "grad_norm": 0.9490332344456045, + "learning_rate": 9.83881084025784e-06, + "loss": 0.2634, + "step": 40230 + }, + { + "epoch": 0.5189679965436526, + "grad_norm": 0.9583294691735054, + "learning_rate": 9.838621810167786e-06, + "loss": 0.2471, + "step": 40240 + }, + { + "epoch": 0.5190969647336484, + "grad_norm": 0.931093221124386, + "learning_rate": 9.838432671120831e-06, + "loss": 0.2487, + "step": 40250 + }, + { + "epoch": 0.5192259329236444, + "grad_norm": 0.9143735636468109, + "learning_rate": 9.838243423121232e-06, + "loss": 0.2499, + "step": 40260 + }, + { + "epoch": 0.5193549011136404, + "grad_norm": 0.974199205549812, + "learning_rate": 9.838054066173252e-06, + "loss": 0.2536, + "step": 40270 + }, + { + "epoch": 0.5194838693036362, + "grad_norm": 0.9254109829532693, + "learning_rate": 9.837864600281153e-06, + "loss": 0.2505, + "step": 40280 + }, + { + "epoch": 0.5196128374936322, + "grad_norm": 0.8968078619462659, + "learning_rate": 9.837675025449202e-06, + "loss": 0.2445, + "step": 40290 + }, + { + "epoch": 0.5197418056836282, + "grad_norm": 1.0909615241014117, + "learning_rate": 9.837485341681669e-06, + "loss": 0.2531, + "step": 40300 + }, + { + "epoch": 0.5198707738736241, + "grad_norm": 0.9267534073708735, + "learning_rate": 9.837295548982825e-06, + "loss": 0.2521, + "step": 40310 + }, + { + "epoch": 0.51999974206362, + "grad_norm": 1.021048903404838, + "learning_rate": 9.837105647356946e-06, + "loss": 0.2476, + "step": 40320 + }, + { + "epoch": 0.520128710253616, + "grad_norm": 0.9550607281024512, + "learning_rate": 9.836915636808303e-06, + "loss": 0.2521, + "step": 40330 + }, + { + "epoch": 0.5202576784436119, + "grad_norm": 0.997644580237307, + "learning_rate": 9.836725517341177e-06, + "loss": 0.2543, + "step": 40340 + }, + { + "epoch": 0.5203866466336078, + "grad_norm": 1.0171186757390747, + "learning_rate": 9.836535288959849e-06, + "loss": 0.2491, + "step": 40350 + }, + { + "epoch": 0.5205156148236038, + "grad_norm": 0.9219172543572488, + "learning_rate": 9.836344951668606e-06, + "loss": 0.2546, + "step": 40360 + }, + { + "epoch": 0.5206445830135997, + "grad_norm": 0.9713132784063438, + "learning_rate": 9.836154505471729e-06, + "loss": 0.2462, + "step": 40370 + }, + { + "epoch": 0.5207735512035956, + "grad_norm": 0.954787617383152, + "learning_rate": 9.83596395037351e-06, + "loss": 0.2451, + "step": 40380 + }, + { + "epoch": 0.5209025193935916, + "grad_norm": 0.9673000081252474, + "learning_rate": 9.835773286378239e-06, + "loss": 0.2546, + "step": 40390 + }, + { + "epoch": 0.5210314875835875, + "grad_norm": 0.9581011379207697, + "learning_rate": 9.835582513490208e-06, + "loss": 0.2573, + "step": 40400 + }, + { + "epoch": 0.5211604557735835, + "grad_norm": 0.8254280409224775, + "learning_rate": 9.835391631713714e-06, + "loss": 0.2514, + "step": 40410 + }, + { + "epoch": 0.5212894239635794, + "grad_norm": 1.0988366818924258, + "learning_rate": 9.835200641053056e-06, + "loss": 0.2507, + "step": 40420 + }, + { + "epoch": 0.5214183921535753, + "grad_norm": 0.9332118650681739, + "learning_rate": 9.835009541512532e-06, + "loss": 0.2499, + "step": 40430 + }, + { + "epoch": 0.5215473603435713, + "grad_norm": 0.9179393904335741, + "learning_rate": 9.83481833309645e-06, + "loss": 0.257, + "step": 40440 + }, + { + "epoch": 0.5216763285335672, + "grad_norm": 0.9203157015577693, + "learning_rate": 9.83462701580911e-06, + "loss": 0.2419, + "step": 40450 + }, + { + "epoch": 0.5218052967235631, + "grad_norm": 0.9631171961506498, + "learning_rate": 9.834435589654825e-06, + "loss": 0.2594, + "step": 40460 + }, + { + "epoch": 0.5219342649135591, + "grad_norm": 0.9251340119728468, + "learning_rate": 9.834244054637903e-06, + "loss": 0.2438, + "step": 40470 + }, + { + "epoch": 0.522063233103555, + "grad_norm": 0.9171560315561714, + "learning_rate": 9.834052410762658e-06, + "loss": 0.2536, + "step": 40480 + }, + { + "epoch": 0.5221922012935509, + "grad_norm": 1.0189335599120113, + "learning_rate": 9.833860658033405e-06, + "loss": 0.2608, + "step": 40490 + }, + { + "epoch": 0.5223211694835469, + "grad_norm": 1.0577080636730853, + "learning_rate": 9.833668796454461e-06, + "loss": 0.2481, + "step": 40500 + }, + { + "epoch": 0.5224501376735429, + "grad_norm": 1.033301939968273, + "learning_rate": 9.833476826030148e-06, + "loss": 0.2609, + "step": 40510 + }, + { + "epoch": 0.5225791058635387, + "grad_norm": 0.9716993163012156, + "learning_rate": 9.83328474676479e-06, + "loss": 0.2397, + "step": 40520 + }, + { + "epoch": 0.5227080740535347, + "grad_norm": 1.0884272676922702, + "learning_rate": 9.833092558662707e-06, + "loss": 0.2396, + "step": 40530 + }, + { + "epoch": 0.5228370422435307, + "grad_norm": 0.9284535739493239, + "learning_rate": 9.832900261728231e-06, + "loss": 0.2457, + "step": 40540 + }, + { + "epoch": 0.5229660104335265, + "grad_norm": 0.917217243908441, + "learning_rate": 9.832707855965693e-06, + "loss": 0.2397, + "step": 40550 + }, + { + "epoch": 0.5230949786235225, + "grad_norm": 0.9335312628386642, + "learning_rate": 9.832515341379423e-06, + "loss": 0.2474, + "step": 40560 + }, + { + "epoch": 0.5232239468135185, + "grad_norm": 0.9476335678949792, + "learning_rate": 9.832322717973757e-06, + "loss": 0.2389, + "step": 40570 + }, + { + "epoch": 0.5233529150035143, + "grad_norm": 0.9709588275235478, + "learning_rate": 9.832129985753032e-06, + "loss": 0.2616, + "step": 40580 + }, + { + "epoch": 0.5234818831935103, + "grad_norm": 0.9792428897883328, + "learning_rate": 9.831937144721589e-06, + "loss": 0.2503, + "step": 40590 + }, + { + "epoch": 0.5236108513835063, + "grad_norm": 0.9708182559868389, + "learning_rate": 9.83174419488377e-06, + "loss": 0.2425, + "step": 40600 + }, + { + "epoch": 0.5237398195735022, + "grad_norm": 0.9815177945617685, + "learning_rate": 9.831551136243922e-06, + "loss": 0.2411, + "step": 40610 + }, + { + "epoch": 0.5238687877634981, + "grad_norm": 0.8649940290344903, + "learning_rate": 9.831357968806388e-06, + "loss": 0.2415, + "step": 40620 + }, + { + "epoch": 0.5239977559534941, + "grad_norm": 0.9115520623747634, + "learning_rate": 9.831164692575518e-06, + "loss": 0.2411, + "step": 40630 + }, + { + "epoch": 0.52412672414349, + "grad_norm": 0.8201230619285619, + "learning_rate": 9.83097130755567e-06, + "loss": 0.2438, + "step": 40640 + }, + { + "epoch": 0.5242556923334859, + "grad_norm": 0.9853588580204461, + "learning_rate": 9.830777813751193e-06, + "loss": 0.2549, + "step": 40650 + }, + { + "epoch": 0.5243846605234819, + "grad_norm": 0.9425852924298647, + "learning_rate": 9.830584211166445e-06, + "loss": 0.2482, + "step": 40660 + }, + { + "epoch": 0.5245136287134778, + "grad_norm": 0.9733568055524976, + "learning_rate": 9.830390499805788e-06, + "loss": 0.2389, + "step": 40670 + }, + { + "epoch": 0.5246425969034738, + "grad_norm": 0.9632796724648064, + "learning_rate": 9.830196679673582e-06, + "loss": 0.2537, + "step": 40680 + }, + { + "epoch": 0.5247715650934697, + "grad_norm": 1.1261267609910715, + "learning_rate": 9.830002750774193e-06, + "loss": 0.2467, + "step": 40690 + }, + { + "epoch": 0.5249005332834656, + "grad_norm": 1.0505344350789074, + "learning_rate": 9.829808713111985e-06, + "loss": 0.2452, + "step": 40700 + }, + { + "epoch": 0.5250295014734616, + "grad_norm": 0.9270577630619522, + "learning_rate": 9.829614566691332e-06, + "loss": 0.2463, + "step": 40710 + }, + { + "epoch": 0.5251584696634575, + "grad_norm": 0.9369403141849948, + "learning_rate": 9.829420311516603e-06, + "loss": 0.2459, + "step": 40720 + }, + { + "epoch": 0.5252874378534534, + "grad_norm": 0.9726098774019121, + "learning_rate": 9.82922594759217e-06, + "loss": 0.2431, + "step": 40730 + }, + { + "epoch": 0.5254164060434494, + "grad_norm": 0.9651441269913145, + "learning_rate": 9.829031474922412e-06, + "loss": 0.2496, + "step": 40740 + }, + { + "epoch": 0.5255453742334453, + "grad_norm": 0.8726036542540152, + "learning_rate": 9.82883689351171e-06, + "loss": 0.2404, + "step": 40750 + }, + { + "epoch": 0.5256743424234412, + "grad_norm": 0.8722226260609744, + "learning_rate": 9.828642203364445e-06, + "loss": 0.2342, + "step": 40760 + }, + { + "epoch": 0.5258033106134372, + "grad_norm": 0.9346048716573724, + "learning_rate": 9.828447404484998e-06, + "loss": 0.2423, + "step": 40770 + }, + { + "epoch": 0.5259322788034332, + "grad_norm": 0.9498493186618248, + "learning_rate": 9.828252496877759e-06, + "loss": 0.2547, + "step": 40780 + }, + { + "epoch": 0.526061246993429, + "grad_norm": 0.901884568556941, + "learning_rate": 9.828057480547115e-06, + "loss": 0.2438, + "step": 40790 + }, + { + "epoch": 0.526190215183425, + "grad_norm": 0.8878849423457682, + "learning_rate": 9.827862355497457e-06, + "loss": 0.26, + "step": 40800 + }, + { + "epoch": 0.526319183373421, + "grad_norm": 0.9565026681979064, + "learning_rate": 9.82766712173318e-06, + "loss": 0.2518, + "step": 40810 + }, + { + "epoch": 0.5264481515634168, + "grad_norm": 0.9744305616674096, + "learning_rate": 9.827471779258681e-06, + "loss": 0.243, + "step": 40820 + }, + { + "epoch": 0.5265771197534128, + "grad_norm": 0.9896130579305912, + "learning_rate": 9.827276328078357e-06, + "loss": 0.2476, + "step": 40830 + }, + { + "epoch": 0.5267060879434088, + "grad_norm": 0.8885301404077831, + "learning_rate": 9.82708076819661e-06, + "loss": 0.2518, + "step": 40840 + }, + { + "epoch": 0.5268350561334046, + "grad_norm": 1.001385073600405, + "learning_rate": 9.826885099617844e-06, + "loss": 0.2426, + "step": 40850 + }, + { + "epoch": 0.5269640243234006, + "grad_norm": 0.9590400989002945, + "learning_rate": 9.826689322346466e-06, + "loss": 0.249, + "step": 40860 + }, + { + "epoch": 0.5270929925133966, + "grad_norm": 0.9659064914561489, + "learning_rate": 9.82649343638688e-06, + "loss": 0.2396, + "step": 40870 + }, + { + "epoch": 0.5272219607033926, + "grad_norm": 0.8622358748962574, + "learning_rate": 9.826297441743502e-06, + "loss": 0.2578, + "step": 40880 + }, + { + "epoch": 0.5273509288933884, + "grad_norm": 0.9558439462853411, + "learning_rate": 9.826101338420745e-06, + "loss": 0.2556, + "step": 40890 + }, + { + "epoch": 0.5274798970833844, + "grad_norm": 0.9187962386395071, + "learning_rate": 9.825905126423024e-06, + "loss": 0.2602, + "step": 40900 + }, + { + "epoch": 0.5276088652733804, + "grad_norm": 0.9079269850254281, + "learning_rate": 9.825708805754755e-06, + "loss": 0.2562, + "step": 40910 + }, + { + "epoch": 0.5277378334633762, + "grad_norm": 0.9436704577004169, + "learning_rate": 9.825512376420361e-06, + "loss": 0.2329, + "step": 40920 + }, + { + "epoch": 0.5278668016533722, + "grad_norm": 0.9456738199420223, + "learning_rate": 9.825315838424265e-06, + "loss": 0.2483, + "step": 40930 + }, + { + "epoch": 0.5279957698433682, + "grad_norm": 1.0102328668675, + "learning_rate": 9.825119191770894e-06, + "loss": 0.2544, + "step": 40940 + }, + { + "epoch": 0.528124738033364, + "grad_norm": 0.8682063892722085, + "learning_rate": 9.824922436464675e-06, + "loss": 0.2562, + "step": 40950 + }, + { + "epoch": 0.52825370622336, + "grad_norm": 0.9463751581347757, + "learning_rate": 9.824725572510036e-06, + "loss": 0.2426, + "step": 40960 + }, + { + "epoch": 0.528382674413356, + "grad_norm": 0.9432362390366009, + "learning_rate": 9.824528599911414e-06, + "loss": 0.2427, + "step": 40970 + }, + { + "epoch": 0.5285116426033519, + "grad_norm": 0.8775643916200405, + "learning_rate": 9.824331518673242e-06, + "loss": 0.2498, + "step": 40980 + }, + { + "epoch": 0.5286406107933478, + "grad_norm": 1.0278246035986895, + "learning_rate": 9.82413432879996e-06, + "loss": 0.244, + "step": 40990 + }, + { + "epoch": 0.5287695789833438, + "grad_norm": 1.0221086217695146, + "learning_rate": 9.823937030296004e-06, + "loss": 0.2487, + "step": 41000 + }, + { + "epoch": 0.5288985471733397, + "grad_norm": 1.1044173603238225, + "learning_rate": 9.823739623165823e-06, + "loss": 0.2444, + "step": 41010 + }, + { + "epoch": 0.5290275153633356, + "grad_norm": 0.9218840758412403, + "learning_rate": 9.823542107413859e-06, + "loss": 0.2447, + "step": 41020 + }, + { + "epoch": 0.5291564835533316, + "grad_norm": 0.920512604750753, + "learning_rate": 9.823344483044558e-06, + "loss": 0.2474, + "step": 41030 + }, + { + "epoch": 0.5292854517433275, + "grad_norm": 0.9153742208666776, + "learning_rate": 9.823146750062374e-06, + "loss": 0.2468, + "step": 41040 + }, + { + "epoch": 0.5294144199333235, + "grad_norm": 0.9514580623304584, + "learning_rate": 9.822948908471757e-06, + "loss": 0.2392, + "step": 41050 + }, + { + "epoch": 0.5295433881233194, + "grad_norm": 0.8193563567223353, + "learning_rate": 9.822750958277162e-06, + "loss": 0.2524, + "step": 41060 + }, + { + "epoch": 0.5296723563133153, + "grad_norm": 1.044778979012698, + "learning_rate": 9.822552899483047e-06, + "loss": 0.2533, + "step": 41070 + }, + { + "epoch": 0.5298013245033113, + "grad_norm": 0.9956627473575341, + "learning_rate": 9.822354732093873e-06, + "loss": 0.2568, + "step": 41080 + }, + { + "epoch": 0.5299302926933072, + "grad_norm": 0.8643743017757861, + "learning_rate": 9.8221564561141e-06, + "loss": 0.2258, + "step": 41090 + }, + { + "epoch": 0.5300592608833031, + "grad_norm": 0.9629859566045763, + "learning_rate": 9.821958071548194e-06, + "loss": 0.2376, + "step": 41100 + }, + { + "epoch": 0.5301882290732991, + "grad_norm": 1.0312138194118712, + "learning_rate": 9.821759578400625e-06, + "loss": 0.2531, + "step": 41110 + }, + { + "epoch": 0.530317197263295, + "grad_norm": 0.9471938565748129, + "learning_rate": 9.821560976675856e-06, + "loss": 0.2381, + "step": 41120 + }, + { + "epoch": 0.5304461654532909, + "grad_norm": 0.939825435420217, + "learning_rate": 9.821362266378367e-06, + "loss": 0.2487, + "step": 41130 + }, + { + "epoch": 0.5305751336432869, + "grad_norm": 1.0524861010782367, + "learning_rate": 9.821163447512628e-06, + "loss": 0.251, + "step": 41140 + }, + { + "epoch": 0.5307041018332829, + "grad_norm": 0.9203122015994619, + "learning_rate": 9.820964520083116e-06, + "loss": 0.237, + "step": 41150 + }, + { + "epoch": 0.5308330700232787, + "grad_norm": 0.886550407859046, + "learning_rate": 9.820765484094312e-06, + "loss": 0.2547, + "step": 41160 + }, + { + "epoch": 0.5309620382132747, + "grad_norm": 1.0556653132176321, + "learning_rate": 9.820566339550696e-06, + "loss": 0.2358, + "step": 41170 + }, + { + "epoch": 0.5310910064032707, + "grad_norm": 0.9899568739776987, + "learning_rate": 9.820367086456754e-06, + "loss": 0.2475, + "step": 41180 + }, + { + "epoch": 0.5312199745932665, + "grad_norm": 0.9677556162275793, + "learning_rate": 9.820167724816975e-06, + "loss": 0.2494, + "step": 41190 + }, + { + "epoch": 0.5313489427832625, + "grad_norm": 0.9250757815298681, + "learning_rate": 9.819968254635842e-06, + "loss": 0.249, + "step": 41200 + }, + { + "epoch": 0.5314779109732585, + "grad_norm": 0.9810628766593514, + "learning_rate": 9.819768675917853e-06, + "loss": 0.2528, + "step": 41210 + }, + { + "epoch": 0.5316068791632543, + "grad_norm": 0.9953777038521714, + "learning_rate": 9.8195689886675e-06, + "loss": 0.2529, + "step": 41220 + }, + { + "epoch": 0.5317358473532503, + "grad_norm": 0.9224260348719416, + "learning_rate": 9.819369192889278e-06, + "loss": 0.2449, + "step": 41230 + }, + { + "epoch": 0.5318648155432463, + "grad_norm": 0.9417515593036636, + "learning_rate": 9.819169288587687e-06, + "loss": 0.2484, + "step": 41240 + }, + { + "epoch": 0.5319937837332422, + "grad_norm": 0.9189291973407948, + "learning_rate": 9.81896927576723e-06, + "loss": 0.2448, + "step": 41250 + }, + { + "epoch": 0.5321227519232381, + "grad_norm": 0.9407082507978803, + "learning_rate": 9.818769154432407e-06, + "loss": 0.2522, + "step": 41260 + }, + { + "epoch": 0.5322517201132341, + "grad_norm": 0.9169309764753671, + "learning_rate": 9.81856892458773e-06, + "loss": 0.2395, + "step": 41270 + }, + { + "epoch": 0.53238068830323, + "grad_norm": 0.9331598741664852, + "learning_rate": 9.818368586237703e-06, + "loss": 0.2371, + "step": 41280 + }, + { + "epoch": 0.5325096564932259, + "grad_norm": 0.9192842427406218, + "learning_rate": 9.818168139386842e-06, + "loss": 0.2394, + "step": 41290 + }, + { + "epoch": 0.5326386246832219, + "grad_norm": 0.890473760959125, + "learning_rate": 9.817967584039654e-06, + "loss": 0.2402, + "step": 41300 + }, + { + "epoch": 0.5327675928732178, + "grad_norm": 0.9877064461774135, + "learning_rate": 9.81776692020066e-06, + "loss": 0.2544, + "step": 41310 + }, + { + "epoch": 0.5328965610632137, + "grad_norm": 0.9531762085170075, + "learning_rate": 9.817566147874378e-06, + "loss": 0.2499, + "step": 41320 + }, + { + "epoch": 0.5330255292532097, + "grad_norm": 0.9439194788484977, + "learning_rate": 9.817365267065329e-06, + "loss": 0.2488, + "step": 41330 + }, + { + "epoch": 0.5331544974432056, + "grad_norm": 0.8967142608825334, + "learning_rate": 9.817164277778036e-06, + "loss": 0.2572, + "step": 41340 + }, + { + "epoch": 0.5332834656332016, + "grad_norm": 0.9810511735126811, + "learning_rate": 9.816963180017024e-06, + "loss": 0.2392, + "step": 41350 + }, + { + "epoch": 0.5334124338231975, + "grad_norm": 0.9170301979897586, + "learning_rate": 9.81676197378682e-06, + "loss": 0.2423, + "step": 41360 + }, + { + "epoch": 0.5335414020131934, + "grad_norm": 0.9038502773458117, + "learning_rate": 9.81656065909196e-06, + "loss": 0.2408, + "step": 41370 + }, + { + "epoch": 0.5336703702031894, + "grad_norm": 0.9689131127420577, + "learning_rate": 9.816359235936974e-06, + "loss": 0.2491, + "step": 41380 + }, + { + "epoch": 0.5337993383931853, + "grad_norm": 0.8277998839279082, + "learning_rate": 9.816157704326397e-06, + "loss": 0.2451, + "step": 41390 + }, + { + "epoch": 0.5339283065831812, + "grad_norm": 0.9859069199743815, + "learning_rate": 9.81595606426477e-06, + "loss": 0.2634, + "step": 41400 + }, + { + "epoch": 0.5340572747731772, + "grad_norm": 0.9968715401491365, + "learning_rate": 9.81575431575663e-06, + "loss": 0.244, + "step": 41410 + }, + { + "epoch": 0.5341862429631732, + "grad_norm": 0.9853312775697504, + "learning_rate": 9.815552458806521e-06, + "loss": 0.2549, + "step": 41420 + }, + { + "epoch": 0.534315211153169, + "grad_norm": 1.0209783623033721, + "learning_rate": 9.81535049341899e-06, + "loss": 0.2466, + "step": 41430 + }, + { + "epoch": 0.534444179343165, + "grad_norm": 0.9726945968485315, + "learning_rate": 9.815148419598584e-06, + "loss": 0.2522, + "step": 41440 + }, + { + "epoch": 0.534573147533161, + "grad_norm": 1.0601073502747353, + "learning_rate": 9.814946237349854e-06, + "loss": 0.2551, + "step": 41450 + }, + { + "epoch": 0.5347021157231568, + "grad_norm": 0.9533317424812562, + "learning_rate": 9.814743946677351e-06, + "loss": 0.2359, + "step": 41460 + }, + { + "epoch": 0.5348310839131528, + "grad_norm": 0.8967540711574918, + "learning_rate": 9.814541547585632e-06, + "loss": 0.2494, + "step": 41470 + }, + { + "epoch": 0.5349600521031488, + "grad_norm": 1.0877684456602874, + "learning_rate": 9.814339040079253e-06, + "loss": 0.2575, + "step": 41480 + }, + { + "epoch": 0.5350890202931446, + "grad_norm": 0.9504923827545081, + "learning_rate": 9.814136424162775e-06, + "loss": 0.2272, + "step": 41490 + }, + { + "epoch": 0.5352179884831406, + "grad_norm": 1.0453131511784661, + "learning_rate": 9.813933699840762e-06, + "loss": 0.2592, + "step": 41500 + }, + { + "epoch": 0.5353469566731366, + "grad_norm": 0.9573778197676389, + "learning_rate": 9.813730867117777e-06, + "loss": 0.2388, + "step": 41510 + }, + { + "epoch": 0.5354759248631326, + "grad_norm": 1.0246185075030685, + "learning_rate": 9.813527925998387e-06, + "loss": 0.2486, + "step": 41520 + }, + { + "epoch": 0.5356048930531284, + "grad_norm": 1.0141117253344958, + "learning_rate": 9.813324876487165e-06, + "loss": 0.2494, + "step": 41530 + }, + { + "epoch": 0.5357338612431244, + "grad_norm": 0.9409766862957728, + "learning_rate": 9.81312171858868e-06, + "loss": 0.2392, + "step": 41540 + }, + { + "epoch": 0.5358628294331204, + "grad_norm": 0.9523528477227108, + "learning_rate": 9.812918452307508e-06, + "loss": 0.241, + "step": 41550 + }, + { + "epoch": 0.5359917976231162, + "grad_norm": 0.9568317314218908, + "learning_rate": 9.812715077648225e-06, + "loss": 0.2674, + "step": 41560 + }, + { + "epoch": 0.5361207658131122, + "grad_norm": 0.8751026896071591, + "learning_rate": 9.812511594615413e-06, + "loss": 0.2406, + "step": 41570 + }, + { + "epoch": 0.5362497340031082, + "grad_norm": 0.9826575681920294, + "learning_rate": 9.812308003213653e-06, + "loss": 0.2379, + "step": 41580 + }, + { + "epoch": 0.536378702193104, + "grad_norm": 0.8184358892120426, + "learning_rate": 9.812104303447527e-06, + "loss": 0.2411, + "step": 41590 + }, + { + "epoch": 0.5365076703831, + "grad_norm": 1.0580943825641396, + "learning_rate": 9.811900495321627e-06, + "loss": 0.2496, + "step": 41600 + }, + { + "epoch": 0.536636638573096, + "grad_norm": 0.966019559179978, + "learning_rate": 9.811696578840538e-06, + "loss": 0.2414, + "step": 41610 + }, + { + "epoch": 0.5367656067630919, + "grad_norm": 0.9637294212085182, + "learning_rate": 9.811492554008854e-06, + "loss": 0.2456, + "step": 41620 + }, + { + "epoch": 0.5368945749530878, + "grad_norm": 0.9835442697858102, + "learning_rate": 9.811288420831168e-06, + "loss": 0.2469, + "step": 41630 + }, + { + "epoch": 0.5370235431430838, + "grad_norm": 0.8650232351160106, + "learning_rate": 9.811084179312079e-06, + "loss": 0.2356, + "step": 41640 + }, + { + "epoch": 0.5371525113330797, + "grad_norm": 0.9107052001649185, + "learning_rate": 9.810879829456184e-06, + "loss": 0.2514, + "step": 41650 + }, + { + "epoch": 0.5372814795230756, + "grad_norm": 1.0243010205730525, + "learning_rate": 9.810675371268084e-06, + "loss": 0.2563, + "step": 41660 + }, + { + "epoch": 0.5374104477130716, + "grad_norm": 1.0238999754248292, + "learning_rate": 9.810470804752386e-06, + "loss": 0.2385, + "step": 41670 + }, + { + "epoch": 0.5375394159030675, + "grad_norm": 0.9689707054616434, + "learning_rate": 9.810266129913692e-06, + "loss": 0.2366, + "step": 41680 + }, + { + "epoch": 0.5376683840930635, + "grad_norm": 1.0194102263889846, + "learning_rate": 9.810061346756616e-06, + "loss": 0.2509, + "step": 41690 + }, + { + "epoch": 0.5377973522830594, + "grad_norm": 0.9439631812227381, + "learning_rate": 9.809856455285766e-06, + "loss": 0.2358, + "step": 41700 + }, + { + "epoch": 0.5379263204730553, + "grad_norm": 0.9355785511583855, + "learning_rate": 9.809651455505757e-06, + "loss": 0.2446, + "step": 41710 + }, + { + "epoch": 0.5380552886630513, + "grad_norm": 0.8576732973786734, + "learning_rate": 9.809446347421203e-06, + "loss": 0.249, + "step": 41720 + }, + { + "epoch": 0.5381842568530472, + "grad_norm": 0.8913039818419353, + "learning_rate": 9.809241131036727e-06, + "loss": 0.2439, + "step": 41730 + }, + { + "epoch": 0.5383132250430431, + "grad_norm": 0.9461647428889427, + "learning_rate": 9.809035806356947e-06, + "loss": 0.2609, + "step": 41740 + }, + { + "epoch": 0.5384421932330391, + "grad_norm": 0.9436594041340047, + "learning_rate": 9.808830373386486e-06, + "loss": 0.2339, + "step": 41750 + }, + { + "epoch": 0.538571161423035, + "grad_norm": 0.9505724245933216, + "learning_rate": 9.808624832129972e-06, + "loss": 0.2404, + "step": 41760 + }, + { + "epoch": 0.5387001296130309, + "grad_norm": 0.9264636984987731, + "learning_rate": 9.808419182592032e-06, + "loss": 0.2445, + "step": 41770 + }, + { + "epoch": 0.5388290978030269, + "grad_norm": 1.034497438956668, + "learning_rate": 9.8082134247773e-06, + "loss": 0.241, + "step": 41780 + }, + { + "epoch": 0.5389580659930229, + "grad_norm": 0.8722572588355331, + "learning_rate": 9.808007558690405e-06, + "loss": 0.2502, + "step": 41790 + }, + { + "epoch": 0.5390870341830187, + "grad_norm": 0.8561604143634948, + "learning_rate": 9.807801584335983e-06, + "loss": 0.2407, + "step": 41800 + }, + { + "epoch": 0.5392160023730147, + "grad_norm": 0.9806558785473772, + "learning_rate": 9.807595501718674e-06, + "loss": 0.2379, + "step": 41810 + }, + { + "epoch": 0.5393449705630107, + "grad_norm": 0.9416447971768356, + "learning_rate": 9.80738931084312e-06, + "loss": 0.2511, + "step": 41820 + }, + { + "epoch": 0.5394739387530065, + "grad_norm": 0.9830110996768509, + "learning_rate": 9.807183011713961e-06, + "loss": 0.2459, + "step": 41830 + }, + { + "epoch": 0.5396029069430025, + "grad_norm": 1.033865111209831, + "learning_rate": 9.806976604335843e-06, + "loss": 0.2545, + "step": 41840 + }, + { + "epoch": 0.5397318751329985, + "grad_norm": 0.8982691538838986, + "learning_rate": 9.806770088713415e-06, + "loss": 0.2479, + "step": 41850 + }, + { + "epoch": 0.5398608433229943, + "grad_norm": 0.9754548264750197, + "learning_rate": 9.806563464851328e-06, + "loss": 0.2574, + "step": 41860 + }, + { + "epoch": 0.5399898115129903, + "grad_norm": 0.9939701672766588, + "learning_rate": 9.806356732754235e-06, + "loss": 0.2493, + "step": 41870 + }, + { + "epoch": 0.5401187797029863, + "grad_norm": 0.846491370944675, + "learning_rate": 9.806149892426786e-06, + "loss": 0.2429, + "step": 41880 + }, + { + "epoch": 0.5402477478929822, + "grad_norm": 0.9005477242910553, + "learning_rate": 9.805942943873645e-06, + "loss": 0.2517, + "step": 41890 + }, + { + "epoch": 0.5403767160829781, + "grad_norm": 0.9686633310186694, + "learning_rate": 9.80573588709947e-06, + "loss": 0.2633, + "step": 41900 + }, + { + "epoch": 0.5405056842729741, + "grad_norm": 1.0335666533763823, + "learning_rate": 9.805528722108923e-06, + "loss": 0.2415, + "step": 41910 + }, + { + "epoch": 0.54063465246297, + "grad_norm": 0.9632041670453223, + "learning_rate": 9.805321448906669e-06, + "loss": 0.2453, + "step": 41920 + }, + { + "epoch": 0.5407636206529659, + "grad_norm": 0.9641097090486273, + "learning_rate": 9.805114067497376e-06, + "loss": 0.2422, + "step": 41930 + }, + { + "epoch": 0.5408925888429619, + "grad_norm": 0.9530272044134015, + "learning_rate": 9.804906577885714e-06, + "loss": 0.2563, + "step": 41940 + }, + { + "epoch": 0.5410215570329578, + "grad_norm": 0.9474241408837192, + "learning_rate": 9.804698980076355e-06, + "loss": 0.2405, + "step": 41950 + }, + { + "epoch": 0.5411505252229537, + "grad_norm": 0.9336102612615611, + "learning_rate": 9.804491274073972e-06, + "loss": 0.2563, + "step": 41960 + }, + { + "epoch": 0.5412794934129497, + "grad_norm": 0.9356987974960109, + "learning_rate": 9.804283459883244e-06, + "loss": 0.2298, + "step": 41970 + }, + { + "epoch": 0.5414084616029456, + "grad_norm": 0.9682771251119445, + "learning_rate": 9.804075537508851e-06, + "loss": 0.2493, + "step": 41980 + }, + { + "epoch": 0.5415374297929416, + "grad_norm": 0.8957391698995925, + "learning_rate": 9.803867506955474e-06, + "loss": 0.2647, + "step": 41990 + }, + { + "epoch": 0.5416663979829375, + "grad_norm": 0.9956673641058129, + "learning_rate": 9.803659368227797e-06, + "loss": 0.2524, + "step": 42000 + }, + { + "epoch": 0.5417953661729334, + "grad_norm": 0.9804421987633939, + "learning_rate": 9.80345112133051e-06, + "loss": 0.2464, + "step": 42010 + }, + { + "epoch": 0.5419243343629294, + "grad_norm": 0.939330311282869, + "learning_rate": 9.8032427662683e-06, + "loss": 0.2446, + "step": 42020 + }, + { + "epoch": 0.5420533025529253, + "grad_norm": 0.9800138050278612, + "learning_rate": 9.803034303045858e-06, + "loss": 0.2476, + "step": 42030 + }, + { + "epoch": 0.5421822707429212, + "grad_norm": 0.9220345043308802, + "learning_rate": 9.802825731667877e-06, + "loss": 0.2453, + "step": 42040 + }, + { + "epoch": 0.5423112389329172, + "grad_norm": 0.9388198011533216, + "learning_rate": 9.802617052139059e-06, + "loss": 0.2471, + "step": 42050 + }, + { + "epoch": 0.5424402071229132, + "grad_norm": 0.9410205222976957, + "learning_rate": 9.802408264464096e-06, + "loss": 0.243, + "step": 42060 + }, + { + "epoch": 0.542569175312909, + "grad_norm": 0.9030010524160093, + "learning_rate": 9.802199368647695e-06, + "loss": 0.2441, + "step": 42070 + }, + { + "epoch": 0.542698143502905, + "grad_norm": 0.9312044781300169, + "learning_rate": 9.80199036469456e-06, + "loss": 0.2545, + "step": 42080 + }, + { + "epoch": 0.542827111692901, + "grad_norm": 0.9276610518587637, + "learning_rate": 9.801781252609394e-06, + "loss": 0.2422, + "step": 42090 + }, + { + "epoch": 0.5429560798828968, + "grad_norm": 0.9062081806207342, + "learning_rate": 9.801572032396907e-06, + "loss": 0.2448, + "step": 42100 + }, + { + "epoch": 0.5430850480728928, + "grad_norm": 0.9291491280647061, + "learning_rate": 9.801362704061812e-06, + "loss": 0.2294, + "step": 42110 + }, + { + "epoch": 0.5432140162628888, + "grad_norm": 0.9529658760365608, + "learning_rate": 9.801153267608821e-06, + "loss": 0.2537, + "step": 42120 + }, + { + "epoch": 0.5433429844528846, + "grad_norm": 0.930424279850116, + "learning_rate": 9.800943723042649e-06, + "loss": 0.2356, + "step": 42130 + }, + { + "epoch": 0.5434719526428806, + "grad_norm": 0.9706218472781623, + "learning_rate": 9.800734070368018e-06, + "loss": 0.2541, + "step": 42140 + }, + { + "epoch": 0.5436009208328766, + "grad_norm": 1.008147240468404, + "learning_rate": 9.800524309589645e-06, + "loss": 0.2498, + "step": 42150 + }, + { + "epoch": 0.5437298890228726, + "grad_norm": 1.0124912177706824, + "learning_rate": 9.800314440712257e-06, + "loss": 0.2452, + "step": 42160 + }, + { + "epoch": 0.5438588572128684, + "grad_norm": 1.008682142281978, + "learning_rate": 9.800104463740578e-06, + "loss": 0.2588, + "step": 42170 + }, + { + "epoch": 0.5439878254028644, + "grad_norm": 0.9158494478936529, + "learning_rate": 9.799894378679338e-06, + "loss": 0.2343, + "step": 42180 + }, + { + "epoch": 0.5441167935928604, + "grad_norm": 1.0443606803173067, + "learning_rate": 9.799684185533265e-06, + "loss": 0.2522, + "step": 42190 + }, + { + "epoch": 0.5442457617828562, + "grad_norm": 0.9702205383428815, + "learning_rate": 9.799473884307094e-06, + "loss": 0.2386, + "step": 42200 + }, + { + "epoch": 0.5443747299728522, + "grad_norm": 0.9462651145140064, + "learning_rate": 9.79926347500556e-06, + "loss": 0.2543, + "step": 42210 + }, + { + "epoch": 0.5445036981628482, + "grad_norm": 0.9293477837000531, + "learning_rate": 9.799052957633401e-06, + "loss": 0.2473, + "step": 42220 + }, + { + "epoch": 0.544632666352844, + "grad_norm": 0.9590147846576262, + "learning_rate": 9.798842332195358e-06, + "loss": 0.2582, + "step": 42230 + }, + { + "epoch": 0.54476163454284, + "grad_norm": 0.9316312426172482, + "learning_rate": 9.798631598696173e-06, + "loss": 0.2488, + "step": 42240 + }, + { + "epoch": 0.544890602732836, + "grad_norm": 0.8955888460446552, + "learning_rate": 9.798420757140592e-06, + "loss": 0.2449, + "step": 42250 + }, + { + "epoch": 0.5450195709228319, + "grad_norm": 0.9022927176213557, + "learning_rate": 9.798209807533363e-06, + "loss": 0.2534, + "step": 42260 + }, + { + "epoch": 0.5451485391128278, + "grad_norm": 0.9585105977690208, + "learning_rate": 9.797998749879235e-06, + "loss": 0.2658, + "step": 42270 + }, + { + "epoch": 0.5452775073028238, + "grad_norm": 0.9190273582888546, + "learning_rate": 9.797787584182963e-06, + "loss": 0.2599, + "step": 42280 + }, + { + "epoch": 0.5454064754928197, + "grad_norm": 1.008623128007685, + "learning_rate": 9.7975763104493e-06, + "loss": 0.2427, + "step": 42290 + }, + { + "epoch": 0.5455354436828156, + "grad_norm": 0.9995190441269804, + "learning_rate": 9.797364928683002e-06, + "loss": 0.2505, + "step": 42300 + }, + { + "epoch": 0.5456644118728116, + "grad_norm": 0.9847934198069255, + "learning_rate": 9.797153438888834e-06, + "loss": 0.2573, + "step": 42310 + }, + { + "epoch": 0.5457933800628075, + "grad_norm": 0.9225950282424993, + "learning_rate": 9.796941841071553e-06, + "loss": 0.2364, + "step": 42320 + }, + { + "epoch": 0.5459223482528034, + "grad_norm": 0.9083128069603137, + "learning_rate": 9.796730135235926e-06, + "loss": 0.2559, + "step": 42330 + }, + { + "epoch": 0.5460513164427994, + "grad_norm": 1.0386817488586189, + "learning_rate": 9.796518321386722e-06, + "loss": 0.2506, + "step": 42340 + }, + { + "epoch": 0.5461802846327953, + "grad_norm": 0.9233756327124318, + "learning_rate": 9.796306399528708e-06, + "loss": 0.2486, + "step": 42350 + }, + { + "epoch": 0.5463092528227913, + "grad_norm": 0.9639830637509024, + "learning_rate": 9.796094369666658e-06, + "loss": 0.264, + "step": 42360 + }, + { + "epoch": 0.5464382210127872, + "grad_norm": 0.9485028598340038, + "learning_rate": 9.795882231805345e-06, + "loss": 0.2341, + "step": 42370 + }, + { + "epoch": 0.5465671892027831, + "grad_norm": 0.9184616355858448, + "learning_rate": 9.795669985949548e-06, + "loss": 0.2394, + "step": 42380 + }, + { + "epoch": 0.5466961573927791, + "grad_norm": 1.0099086473399606, + "learning_rate": 9.795457632104041e-06, + "loss": 0.2618, + "step": 42390 + }, + { + "epoch": 0.546825125582775, + "grad_norm": 0.9012192041866344, + "learning_rate": 9.795245170273615e-06, + "loss": 0.2352, + "step": 42400 + }, + { + "epoch": 0.5469540937727709, + "grad_norm": 1.0360374761573614, + "learning_rate": 9.795032600463047e-06, + "loss": 0.2353, + "step": 42410 + }, + { + "epoch": 0.5470830619627669, + "grad_norm": 0.9570938774427695, + "learning_rate": 9.794819922677123e-06, + "loss": 0.2494, + "step": 42420 + }, + { + "epoch": 0.5472120301527629, + "grad_norm": 0.8935361629952734, + "learning_rate": 9.794607136920638e-06, + "loss": 0.2271, + "step": 42430 + }, + { + "epoch": 0.5473409983427587, + "grad_norm": 1.0925439629521583, + "learning_rate": 9.79439424319838e-06, + "loss": 0.2388, + "step": 42440 + }, + { + "epoch": 0.5474699665327547, + "grad_norm": 0.8985230705307333, + "learning_rate": 9.794181241515141e-06, + "loss": 0.243, + "step": 42450 + }, + { + "epoch": 0.5475989347227507, + "grad_norm": 0.957603725945156, + "learning_rate": 9.793968131875722e-06, + "loss": 0.2503, + "step": 42460 + }, + { + "epoch": 0.5477279029127465, + "grad_norm": 0.8928799152429147, + "learning_rate": 9.79375491428492e-06, + "loss": 0.2425, + "step": 42470 + }, + { + "epoch": 0.5478568711027425, + "grad_norm": 0.9599383840914048, + "learning_rate": 9.793541588747535e-06, + "loss": 0.2458, + "step": 42480 + }, + { + "epoch": 0.5479858392927385, + "grad_norm": 1.0125033011618216, + "learning_rate": 9.793328155268371e-06, + "loss": 0.2532, + "step": 42490 + }, + { + "epoch": 0.5481148074827343, + "grad_norm": 0.9323843611169634, + "learning_rate": 9.793114613852235e-06, + "loss": 0.2437, + "step": 42500 + }, + { + "epoch": 0.5482437756727303, + "grad_norm": 0.9138468846202125, + "learning_rate": 9.792900964503936e-06, + "loss": 0.2519, + "step": 42510 + }, + { + "epoch": 0.5483727438627263, + "grad_norm": 0.9656926659630231, + "learning_rate": 9.792687207228284e-06, + "loss": 0.239, + "step": 42520 + }, + { + "epoch": 0.5485017120527222, + "grad_norm": 1.0540765285667606, + "learning_rate": 9.792473342030092e-06, + "loss": 0.2522, + "step": 42530 + }, + { + "epoch": 0.5486306802427181, + "grad_norm": 1.0084830691593039, + "learning_rate": 9.792259368914177e-06, + "loss": 0.2442, + "step": 42540 + }, + { + "epoch": 0.5487596484327141, + "grad_norm": 0.8733627516166533, + "learning_rate": 9.792045287885357e-06, + "loss": 0.2414, + "step": 42550 + }, + { + "epoch": 0.54888861662271, + "grad_norm": 0.9080844396329414, + "learning_rate": 9.791831098948452e-06, + "loss": 0.2548, + "step": 42560 + }, + { + "epoch": 0.5490175848127059, + "grad_norm": 0.8870350710789322, + "learning_rate": 9.791616802108287e-06, + "loss": 0.2296, + "step": 42570 + }, + { + "epoch": 0.5491465530027019, + "grad_norm": 0.9650428316558348, + "learning_rate": 9.791402397369684e-06, + "loss": 0.2395, + "step": 42580 + }, + { + "epoch": 0.5492755211926978, + "grad_norm": 0.9815249543616722, + "learning_rate": 9.791187884737477e-06, + "loss": 0.237, + "step": 42590 + }, + { + "epoch": 0.5494044893826937, + "grad_norm": 0.8625807266362946, + "learning_rate": 9.79097326421649e-06, + "loss": 0.2398, + "step": 42600 + }, + { + "epoch": 0.5495334575726897, + "grad_norm": 1.0052092217931201, + "learning_rate": 9.790758535811558e-06, + "loss": 0.2531, + "step": 42610 + }, + { + "epoch": 0.5496624257626856, + "grad_norm": 0.9236042697153388, + "learning_rate": 9.790543699527518e-06, + "loss": 0.2429, + "step": 42620 + }, + { + "epoch": 0.5497913939526816, + "grad_norm": 1.0685964719421985, + "learning_rate": 9.790328755369205e-06, + "loss": 0.2523, + "step": 42630 + }, + { + "epoch": 0.5499203621426775, + "grad_norm": 0.8900323928499669, + "learning_rate": 9.790113703341462e-06, + "loss": 0.2375, + "step": 42640 + }, + { + "epoch": 0.5500493303326734, + "grad_norm": 0.9264746520412206, + "learning_rate": 9.78989854344913e-06, + "loss": 0.2408, + "step": 42650 + }, + { + "epoch": 0.5501782985226694, + "grad_norm": 0.9003383274665908, + "learning_rate": 9.789683275697055e-06, + "loss": 0.2446, + "step": 42660 + }, + { + "epoch": 0.5503072667126653, + "grad_norm": 0.9429680670800052, + "learning_rate": 9.789467900090083e-06, + "loss": 0.2509, + "step": 42670 + }, + { + "epoch": 0.5504362349026612, + "grad_norm": 0.8803142287490232, + "learning_rate": 9.789252416633065e-06, + "loss": 0.2429, + "step": 42680 + }, + { + "epoch": 0.5505652030926572, + "grad_norm": 0.9166225408795196, + "learning_rate": 9.789036825330853e-06, + "loss": 0.2511, + "step": 42690 + }, + { + "epoch": 0.5506941712826532, + "grad_norm": 0.9638984956135918, + "learning_rate": 9.7888211261883e-06, + "loss": 0.2353, + "step": 42700 + }, + { + "epoch": 0.550823139472649, + "grad_norm": 1.105525042869136, + "learning_rate": 9.788605319210267e-06, + "loss": 0.2425, + "step": 42710 + }, + { + "epoch": 0.550952107662645, + "grad_norm": 0.9552421634492612, + "learning_rate": 9.78838940440161e-06, + "loss": 0.2433, + "step": 42720 + }, + { + "epoch": 0.551081075852641, + "grad_norm": 1.0118622127198649, + "learning_rate": 9.788173381767192e-06, + "loss": 0.2492, + "step": 42730 + }, + { + "epoch": 0.5512100440426368, + "grad_norm": 0.8973207759895794, + "learning_rate": 9.78795725131188e-06, + "loss": 0.2409, + "step": 42740 + }, + { + "epoch": 0.5513390122326328, + "grad_norm": 0.8893674681097574, + "learning_rate": 9.787741013040535e-06, + "loss": 0.2541, + "step": 42750 + }, + { + "epoch": 0.5514679804226288, + "grad_norm": 0.9690854004937386, + "learning_rate": 9.787524666958033e-06, + "loss": 0.2482, + "step": 42760 + }, + { + "epoch": 0.5515969486126246, + "grad_norm": 0.9817367089109417, + "learning_rate": 9.787308213069241e-06, + "loss": 0.2228, + "step": 42770 + }, + { + "epoch": 0.5517259168026206, + "grad_norm": 0.8995251622195685, + "learning_rate": 9.787091651379035e-06, + "loss": 0.2489, + "step": 42780 + }, + { + "epoch": 0.5518548849926166, + "grad_norm": 0.9417010450901638, + "learning_rate": 9.786874981892292e-06, + "loss": 0.2342, + "step": 42790 + }, + { + "epoch": 0.5519838531826126, + "grad_norm": 0.948866504706501, + "learning_rate": 9.78665820461389e-06, + "loss": 0.238, + "step": 42800 + }, + { + "epoch": 0.5521128213726084, + "grad_norm": 0.9116883807855493, + "learning_rate": 9.786441319548711e-06, + "loss": 0.2428, + "step": 42810 + }, + { + "epoch": 0.5522417895626044, + "grad_norm": 0.9454122888890288, + "learning_rate": 9.786224326701638e-06, + "loss": 0.2439, + "step": 42820 + }, + { + "epoch": 0.5523707577526004, + "grad_norm": 0.9687504269654681, + "learning_rate": 9.786007226077559e-06, + "loss": 0.2464, + "step": 42830 + }, + { + "epoch": 0.5524997259425962, + "grad_norm": 1.0878525568379467, + "learning_rate": 9.78579001768136e-06, + "loss": 0.2415, + "step": 42840 + }, + { + "epoch": 0.5526286941325922, + "grad_norm": 0.9077899815431194, + "learning_rate": 9.785572701517935e-06, + "loss": 0.2409, + "step": 42850 + }, + { + "epoch": 0.5527576623225882, + "grad_norm": 1.080489359668786, + "learning_rate": 9.785355277592177e-06, + "loss": 0.2425, + "step": 42860 + }, + { + "epoch": 0.552886630512584, + "grad_norm": 0.966179944181851, + "learning_rate": 9.78513774590898e-06, + "loss": 0.2503, + "step": 42870 + }, + { + "epoch": 0.55301559870258, + "grad_norm": 1.0402783800983026, + "learning_rate": 9.784920106473244e-06, + "loss": 0.249, + "step": 42880 + }, + { + "epoch": 0.553144566892576, + "grad_norm": 1.0026757978712402, + "learning_rate": 9.78470235928987e-06, + "loss": 0.2573, + "step": 42890 + }, + { + "epoch": 0.5532735350825719, + "grad_norm": 0.9219111445910878, + "learning_rate": 9.78448450436376e-06, + "loss": 0.2334, + "step": 42900 + }, + { + "epoch": 0.5534025032725678, + "grad_norm": 1.019958777017084, + "learning_rate": 9.784266541699822e-06, + "loss": 0.2452, + "step": 42910 + }, + { + "epoch": 0.5535314714625638, + "grad_norm": 0.9545676495021485, + "learning_rate": 9.78404847130296e-06, + "loss": 0.2484, + "step": 42920 + }, + { + "epoch": 0.5536604396525597, + "grad_norm": 0.8480651124961163, + "learning_rate": 9.78383029317809e-06, + "loss": 0.2529, + "step": 42930 + }, + { + "epoch": 0.5537894078425556, + "grad_norm": 0.9366964002006533, + "learning_rate": 9.783612007330122e-06, + "loss": 0.2568, + "step": 42940 + }, + { + "epoch": 0.5539183760325516, + "grad_norm": 0.9248554617604241, + "learning_rate": 9.78339361376397e-06, + "loss": 0.2377, + "step": 42950 + }, + { + "epoch": 0.5540473442225475, + "grad_norm": 0.8751157065849449, + "learning_rate": 9.783175112484554e-06, + "loss": 0.2476, + "step": 42960 + }, + { + "epoch": 0.5541763124125434, + "grad_norm": 0.9799258997551453, + "learning_rate": 9.782956503496793e-06, + "loss": 0.24, + "step": 42970 + }, + { + "epoch": 0.5543052806025394, + "grad_norm": 0.9789488141247286, + "learning_rate": 9.782737786805611e-06, + "loss": 0.254, + "step": 42980 + }, + { + "epoch": 0.5544342487925353, + "grad_norm": 0.9088853692487027, + "learning_rate": 9.782518962415932e-06, + "loss": 0.2569, + "step": 42990 + }, + { + "epoch": 0.5545632169825313, + "grad_norm": 0.90807656894012, + "learning_rate": 9.782300030332685e-06, + "loss": 0.2377, + "step": 43000 + }, + { + "epoch": 0.5546921851725272, + "grad_norm": 0.9875111199505618, + "learning_rate": 9.782080990560799e-06, + "loss": 0.2365, + "step": 43010 + }, + { + "epoch": 0.5548211533625231, + "grad_norm": 0.918126706224231, + "learning_rate": 9.781861843105205e-06, + "loss": 0.23, + "step": 43020 + }, + { + "epoch": 0.5549501215525191, + "grad_norm": 0.875478173146246, + "learning_rate": 9.781642587970839e-06, + "loss": 0.2355, + "step": 43030 + }, + { + "epoch": 0.555079089742515, + "grad_norm": 0.9075682192514078, + "learning_rate": 9.781423225162639e-06, + "loss": 0.2324, + "step": 43040 + }, + { + "epoch": 0.5552080579325109, + "grad_norm": 1.0479120077040485, + "learning_rate": 9.781203754685545e-06, + "loss": 0.2579, + "step": 43050 + }, + { + "epoch": 0.5553370261225069, + "grad_norm": 1.0065201988752157, + "learning_rate": 9.780984176544495e-06, + "loss": 0.2466, + "step": 43060 + }, + { + "epoch": 0.5554659943125029, + "grad_norm": 0.9176968758152073, + "learning_rate": 9.780764490744438e-06, + "loss": 0.2357, + "step": 43070 + }, + { + "epoch": 0.5555949625024987, + "grad_norm": 0.8845852653339091, + "learning_rate": 9.78054469729032e-06, + "loss": 0.2553, + "step": 43080 + }, + { + "epoch": 0.5557239306924947, + "grad_norm": 1.0123814364750257, + "learning_rate": 9.78032479618709e-06, + "loss": 0.2623, + "step": 43090 + }, + { + "epoch": 0.5558528988824907, + "grad_norm": 0.9581392090845984, + "learning_rate": 9.7801047874397e-06, + "loss": 0.2514, + "step": 43100 + }, + { + "epoch": 0.5559818670724865, + "grad_norm": 0.9477017481666483, + "learning_rate": 9.779884671053101e-06, + "loss": 0.2444, + "step": 43110 + }, + { + "epoch": 0.5561108352624825, + "grad_norm": 0.9231462367593847, + "learning_rate": 9.779664447032255e-06, + "loss": 0.2406, + "step": 43120 + }, + { + "epoch": 0.5562398034524785, + "grad_norm": 1.0201094263962314, + "learning_rate": 9.779444115382117e-06, + "loss": 0.2526, + "step": 43130 + }, + { + "epoch": 0.5563687716424743, + "grad_norm": 1.0464298245307444, + "learning_rate": 9.77922367610765e-06, + "loss": 0.2609, + "step": 43140 + }, + { + "epoch": 0.5564977398324703, + "grad_norm": 0.9895396073147387, + "learning_rate": 9.779003129213818e-06, + "loss": 0.264, + "step": 43150 + }, + { + "epoch": 0.5566267080224663, + "grad_norm": 0.9128591937198556, + "learning_rate": 9.778782474705588e-06, + "loss": 0.2344, + "step": 43160 + }, + { + "epoch": 0.5567556762124622, + "grad_norm": 0.9186219288199465, + "learning_rate": 9.778561712587925e-06, + "loss": 0.2344, + "step": 43170 + }, + { + "epoch": 0.5568846444024581, + "grad_norm": 1.0115650011058706, + "learning_rate": 9.778340842865806e-06, + "loss": 0.2425, + "step": 43180 + }, + { + "epoch": 0.5570136125924541, + "grad_norm": 0.9231666713981649, + "learning_rate": 9.7781198655442e-06, + "loss": 0.2379, + "step": 43190 + }, + { + "epoch": 0.55714258078245, + "grad_norm": 0.9245312155075958, + "learning_rate": 9.777898780628086e-06, + "loss": 0.2372, + "step": 43200 + }, + { + "epoch": 0.5572715489724459, + "grad_norm": 0.9570525906862215, + "learning_rate": 9.77767758812244e-06, + "loss": 0.2608, + "step": 43210 + }, + { + "epoch": 0.5574005171624419, + "grad_norm": 0.9071431681374407, + "learning_rate": 9.777456288032243e-06, + "loss": 0.2386, + "step": 43220 + }, + { + "epoch": 0.5575294853524378, + "grad_norm": 1.0060397725216408, + "learning_rate": 9.77723488036248e-06, + "loss": 0.2384, + "step": 43230 + }, + { + "epoch": 0.5576584535424337, + "grad_norm": 0.8897611291088776, + "learning_rate": 9.777013365118135e-06, + "loss": 0.2249, + "step": 43240 + }, + { + "epoch": 0.5577874217324297, + "grad_norm": 0.9140611968112566, + "learning_rate": 9.776791742304198e-06, + "loss": 0.2495, + "step": 43250 + }, + { + "epoch": 0.5579163899224256, + "grad_norm": 0.9304579169483028, + "learning_rate": 9.776570011925658e-06, + "loss": 0.2441, + "step": 43260 + }, + { + "epoch": 0.5580453581124216, + "grad_norm": 0.8865413750899482, + "learning_rate": 9.77634817398751e-06, + "loss": 0.2474, + "step": 43270 + }, + { + "epoch": 0.5581743263024175, + "grad_norm": 0.9594571797647372, + "learning_rate": 9.776126228494744e-06, + "loss": 0.2542, + "step": 43280 + }, + { + "epoch": 0.5583032944924134, + "grad_norm": 0.9019820091925909, + "learning_rate": 9.775904175452365e-06, + "loss": 0.2435, + "step": 43290 + }, + { + "epoch": 0.5584322626824094, + "grad_norm": 0.9430589073374603, + "learning_rate": 9.775682014865368e-06, + "loss": 0.2609, + "step": 43300 + }, + { + "epoch": 0.5585612308724053, + "grad_norm": 1.043200031481611, + "learning_rate": 9.775459746738759e-06, + "loss": 0.2456, + "step": 43310 + }, + { + "epoch": 0.5586901990624012, + "grad_norm": 0.8128994354547653, + "learning_rate": 9.77523737107754e-06, + "loss": 0.2333, + "step": 43320 + }, + { + "epoch": 0.5588191672523972, + "grad_norm": 0.9297613011894575, + "learning_rate": 9.77501488788672e-06, + "loss": 0.2575, + "step": 43330 + }, + { + "epoch": 0.5589481354423931, + "grad_norm": 0.959741322395769, + "learning_rate": 9.77479229717131e-06, + "loss": 0.2336, + "step": 43340 + }, + { + "epoch": 0.559077103632389, + "grad_norm": 0.8609272491805047, + "learning_rate": 9.77456959893632e-06, + "loss": 0.2494, + "step": 43350 + }, + { + "epoch": 0.559206071822385, + "grad_norm": 0.8780463826700061, + "learning_rate": 9.774346793186768e-06, + "loss": 0.2398, + "step": 43360 + }, + { + "epoch": 0.559335040012381, + "grad_norm": 0.9371467107611842, + "learning_rate": 9.774123879927668e-06, + "loss": 0.2543, + "step": 43370 + }, + { + "epoch": 0.5594640082023769, + "grad_norm": 0.8776182938437758, + "learning_rate": 9.773900859164041e-06, + "loss": 0.2559, + "step": 43380 + }, + { + "epoch": 0.5595929763923728, + "grad_norm": 0.817266842017591, + "learning_rate": 9.773677730900908e-06, + "loss": 0.2253, + "step": 43390 + }, + { + "epoch": 0.5597219445823688, + "grad_norm": 0.9008999776027375, + "learning_rate": 9.773454495143295e-06, + "loss": 0.2362, + "step": 43400 + }, + { + "epoch": 0.5598509127723647, + "grad_norm": 0.9265379891478855, + "learning_rate": 9.77323115189623e-06, + "loss": 0.2306, + "step": 43410 + }, + { + "epoch": 0.5599798809623606, + "grad_norm": 0.962326483872778, + "learning_rate": 9.773007701164738e-06, + "loss": 0.2477, + "step": 43420 + }, + { + "epoch": 0.5601088491523566, + "grad_norm": 0.9643567278623351, + "learning_rate": 9.772784142953852e-06, + "loss": 0.2483, + "step": 43430 + }, + { + "epoch": 0.5602378173423526, + "grad_norm": 1.018535098166489, + "learning_rate": 9.772560477268611e-06, + "loss": 0.2389, + "step": 43440 + }, + { + "epoch": 0.5603667855323484, + "grad_norm": 0.9253730904293181, + "learning_rate": 9.772336704114045e-06, + "loss": 0.2346, + "step": 43450 + }, + { + "epoch": 0.5604957537223444, + "grad_norm": 0.9397699441427155, + "learning_rate": 9.772112823495196e-06, + "loss": 0.2488, + "step": 43460 + }, + { + "epoch": 0.5606247219123404, + "grad_norm": 0.9161642677550352, + "learning_rate": 9.771888835417108e-06, + "loss": 0.2401, + "step": 43470 + }, + { + "epoch": 0.5607536901023362, + "grad_norm": 0.9531926347083687, + "learning_rate": 9.77166473988482e-06, + "loss": 0.2417, + "step": 43480 + }, + { + "epoch": 0.5608826582923322, + "grad_norm": 1.124204233064032, + "learning_rate": 9.771440536903377e-06, + "loss": 0.2325, + "step": 43490 + }, + { + "epoch": 0.5610116264823282, + "grad_norm": 0.9151667689360337, + "learning_rate": 9.771216226477834e-06, + "loss": 0.2553, + "step": 43500 + }, + { + "epoch": 0.561140594672324, + "grad_norm": 0.9397684876178521, + "learning_rate": 9.77099180861324e-06, + "loss": 0.2502, + "step": 43510 + }, + { + "epoch": 0.56126956286232, + "grad_norm": 1.040514935093759, + "learning_rate": 9.770767283314644e-06, + "loss": 0.2499, + "step": 43520 + }, + { + "epoch": 0.561398531052316, + "grad_norm": 1.0220022645346665, + "learning_rate": 9.770542650587107e-06, + "loss": 0.2603, + "step": 43530 + }, + { + "epoch": 0.5615274992423119, + "grad_norm": 0.9711242091909972, + "learning_rate": 9.770317910435684e-06, + "loss": 0.2295, + "step": 43540 + }, + { + "epoch": 0.5616564674323078, + "grad_norm": 0.8601943420001094, + "learning_rate": 9.77009306286544e-06, + "loss": 0.2414, + "step": 43550 + }, + { + "epoch": 0.5617854356223038, + "grad_norm": 0.9782779738537645, + "learning_rate": 9.769868107881435e-06, + "loss": 0.2401, + "step": 43560 + }, + { + "epoch": 0.5619144038122997, + "grad_norm": 0.9303796092359496, + "learning_rate": 9.769643045488734e-06, + "loss": 0.2488, + "step": 43570 + }, + { + "epoch": 0.5620433720022956, + "grad_norm": 0.9476966814169899, + "learning_rate": 9.769417875692407e-06, + "loss": 0.2459, + "step": 43580 + }, + { + "epoch": 0.5621723401922916, + "grad_norm": 0.9464231186450074, + "learning_rate": 9.769192598497522e-06, + "loss": 0.2433, + "step": 43590 + }, + { + "epoch": 0.5623013083822875, + "grad_norm": 0.9045596133671697, + "learning_rate": 9.768967213909152e-06, + "loss": 0.2271, + "step": 43600 + }, + { + "epoch": 0.5624302765722834, + "grad_norm": 0.8665463791350707, + "learning_rate": 9.768741721932379e-06, + "loss": 0.2307, + "step": 43610 + }, + { + "epoch": 0.5625592447622794, + "grad_norm": 0.9816681260050814, + "learning_rate": 9.76851612257227e-06, + "loss": 0.2548, + "step": 43620 + }, + { + "epoch": 0.5626882129522753, + "grad_norm": 0.8136699388048625, + "learning_rate": 9.768290415833912e-06, + "loss": 0.2465, + "step": 43630 + }, + { + "epoch": 0.5628171811422713, + "grad_norm": 0.9386595427384391, + "learning_rate": 9.768064601722384e-06, + "loss": 0.2448, + "step": 43640 + }, + { + "epoch": 0.5629461493322672, + "grad_norm": 0.9530025088613706, + "learning_rate": 9.767838680242775e-06, + "loss": 0.2483, + "step": 43650 + }, + { + "epoch": 0.5630751175222631, + "grad_norm": 0.9436739372069447, + "learning_rate": 9.767612651400169e-06, + "loss": 0.2393, + "step": 43660 + }, + { + "epoch": 0.5632040857122591, + "grad_norm": 0.9048718198481553, + "learning_rate": 9.767386515199658e-06, + "loss": 0.2518, + "step": 43670 + }, + { + "epoch": 0.563333053902255, + "grad_norm": 0.8494576143357905, + "learning_rate": 9.767160271646332e-06, + "loss": 0.2499, + "step": 43680 + }, + { + "epoch": 0.5634620220922509, + "grad_norm": 0.9544261361841359, + "learning_rate": 9.766933920745285e-06, + "loss": 0.2464, + "step": 43690 + }, + { + "epoch": 0.5635909902822469, + "grad_norm": 0.9436868158529861, + "learning_rate": 9.766707462501617e-06, + "loss": 0.2403, + "step": 43700 + }, + { + "epoch": 0.5637199584722428, + "grad_norm": 0.9426168021369067, + "learning_rate": 9.766480896920427e-06, + "loss": 0.2505, + "step": 43710 + }, + { + "epoch": 0.5638489266622387, + "grad_norm": 0.931627605646451, + "learning_rate": 9.766254224006815e-06, + "loss": 0.2384, + "step": 43720 + }, + { + "epoch": 0.5639778948522347, + "grad_norm": 0.9000158930067526, + "learning_rate": 9.766027443765884e-06, + "loss": 0.2475, + "step": 43730 + }, + { + "epoch": 0.5641068630422307, + "grad_norm": 0.9246355645855888, + "learning_rate": 9.765800556202745e-06, + "loss": 0.2437, + "step": 43740 + }, + { + "epoch": 0.5642358312322265, + "grad_norm": 0.9064839610934498, + "learning_rate": 9.765573561322506e-06, + "loss": 0.2515, + "step": 43750 + }, + { + "epoch": 0.5643647994222225, + "grad_norm": 1.0258711128740412, + "learning_rate": 9.765346459130274e-06, + "loss": 0.2492, + "step": 43760 + }, + { + "epoch": 0.5644937676122185, + "grad_norm": 1.0391739608116177, + "learning_rate": 9.765119249631168e-06, + "loss": 0.2531, + "step": 43770 + }, + { + "epoch": 0.5646227358022143, + "grad_norm": 0.9230758074057483, + "learning_rate": 9.764891932830303e-06, + "loss": 0.2512, + "step": 43780 + }, + { + "epoch": 0.5647517039922103, + "grad_norm": 0.9264207418335866, + "learning_rate": 9.764664508732797e-06, + "loss": 0.2453, + "step": 43790 + }, + { + "epoch": 0.5648806721822063, + "grad_norm": 0.9291405417994173, + "learning_rate": 9.764436977343772e-06, + "loss": 0.2501, + "step": 43800 + }, + { + "epoch": 0.5650096403722022, + "grad_norm": 0.9799661404605671, + "learning_rate": 9.76420933866835e-06, + "loss": 0.2476, + "step": 43810 + }, + { + "epoch": 0.5651386085621981, + "grad_norm": 0.942188959788967, + "learning_rate": 9.763981592711659e-06, + "loss": 0.2416, + "step": 43820 + }, + { + "epoch": 0.5652675767521941, + "grad_norm": 0.9153266487801562, + "learning_rate": 9.763753739478826e-06, + "loss": 0.2462, + "step": 43830 + }, + { + "epoch": 0.56539654494219, + "grad_norm": 0.9367645593324722, + "learning_rate": 9.763525778974982e-06, + "loss": 0.246, + "step": 43840 + }, + { + "epoch": 0.5655255131321859, + "grad_norm": 0.9511868194373497, + "learning_rate": 9.763297711205262e-06, + "loss": 0.2396, + "step": 43850 + }, + { + "epoch": 0.5656544813221819, + "grad_norm": 0.9395075072379834, + "learning_rate": 9.7630695361748e-06, + "loss": 0.2542, + "step": 43860 + }, + { + "epoch": 0.5657834495121778, + "grad_norm": 1.0294180648489013, + "learning_rate": 9.762841253888734e-06, + "loss": 0.245, + "step": 43870 + }, + { + "epoch": 0.5659124177021737, + "grad_norm": 0.943315710277113, + "learning_rate": 9.762612864352204e-06, + "loss": 0.2495, + "step": 43880 + }, + { + "epoch": 0.5660413858921697, + "grad_norm": 0.952201496949342, + "learning_rate": 9.762384367570355e-06, + "loss": 0.2474, + "step": 43890 + }, + { + "epoch": 0.5661703540821656, + "grad_norm": 0.8702688926331864, + "learning_rate": 9.762155763548331e-06, + "loss": 0.2355, + "step": 43900 + }, + { + "epoch": 0.5662993222721616, + "grad_norm": 0.8729024789717305, + "learning_rate": 9.76192705229128e-06, + "loss": 0.2485, + "step": 43910 + }, + { + "epoch": 0.5664282904621575, + "grad_norm": 0.9793265106178548, + "learning_rate": 9.761698233804351e-06, + "loss": 0.2484, + "step": 43920 + }, + { + "epoch": 0.5665572586521535, + "grad_norm": 0.9139874962697178, + "learning_rate": 9.7614693080927e-06, + "loss": 0.2377, + "step": 43930 + }, + { + "epoch": 0.5666862268421494, + "grad_norm": 1.0312878454696406, + "learning_rate": 9.761240275161477e-06, + "loss": 0.2384, + "step": 43940 + }, + { + "epoch": 0.5668151950321453, + "grad_norm": 0.9352327585300425, + "learning_rate": 9.761011135015843e-06, + "loss": 0.2567, + "step": 43950 + }, + { + "epoch": 0.5669441632221413, + "grad_norm": 0.9571593008563071, + "learning_rate": 9.760781887660957e-06, + "loss": 0.2528, + "step": 43960 + }, + { + "epoch": 0.5670731314121372, + "grad_norm": 0.8284966432262277, + "learning_rate": 9.760552533101981e-06, + "loss": 0.2536, + "step": 43970 + }, + { + "epoch": 0.5672020996021331, + "grad_norm": 0.932235800612905, + "learning_rate": 9.760323071344079e-06, + "loss": 0.2436, + "step": 43980 + }, + { + "epoch": 0.567331067792129, + "grad_norm": 0.9049316761007679, + "learning_rate": 9.76009350239242e-06, + "loss": 0.2434, + "step": 43990 + }, + { + "epoch": 0.567460035982125, + "grad_norm": 1.0294931131095244, + "learning_rate": 9.759863826252171e-06, + "loss": 0.2519, + "step": 44000 + }, + { + "epoch": 0.567589004172121, + "grad_norm": 0.9307114777296558, + "learning_rate": 9.759634042928506e-06, + "loss": 0.2437, + "step": 44010 + }, + { + "epoch": 0.5677179723621169, + "grad_norm": 0.8932431131746249, + "learning_rate": 9.759404152426597e-06, + "loss": 0.2447, + "step": 44020 + }, + { + "epoch": 0.5678469405521128, + "grad_norm": 0.8976459833779454, + "learning_rate": 9.759174154751624e-06, + "loss": 0.2414, + "step": 44030 + }, + { + "epoch": 0.5679759087421088, + "grad_norm": 0.9507578759465158, + "learning_rate": 9.758944049908763e-06, + "loss": 0.2501, + "step": 44040 + }, + { + "epoch": 0.5681048769321047, + "grad_norm": 0.9602319550559043, + "learning_rate": 9.758713837903198e-06, + "loss": 0.2496, + "step": 44050 + }, + { + "epoch": 0.5682338451221006, + "grad_norm": 0.9002328557033419, + "learning_rate": 9.75848351874011e-06, + "loss": 0.2397, + "step": 44060 + }, + { + "epoch": 0.5683628133120966, + "grad_norm": 1.0040070470101243, + "learning_rate": 9.758253092424689e-06, + "loss": 0.237, + "step": 44070 + }, + { + "epoch": 0.5684917815020926, + "grad_norm": 0.9901285361523082, + "learning_rate": 9.758022558962121e-06, + "loss": 0.2479, + "step": 44080 + }, + { + "epoch": 0.5686207496920884, + "grad_norm": 1.0310553216145408, + "learning_rate": 9.757791918357598e-06, + "loss": 0.2437, + "step": 44090 + }, + { + "epoch": 0.5687497178820844, + "grad_norm": 0.8876589137393595, + "learning_rate": 9.757561170616315e-06, + "loss": 0.2473, + "step": 44100 + }, + { + "epoch": 0.5688786860720804, + "grad_norm": 0.8827078385906195, + "learning_rate": 9.757330315743463e-06, + "loss": 0.248, + "step": 44110 + }, + { + "epoch": 0.5690076542620762, + "grad_norm": 0.9192544692160998, + "learning_rate": 9.757099353744248e-06, + "loss": 0.2392, + "step": 44120 + }, + { + "epoch": 0.5691366224520722, + "grad_norm": 0.9543659813557105, + "learning_rate": 9.756868284623865e-06, + "loss": 0.25, + "step": 44130 + }, + { + "epoch": 0.5692655906420682, + "grad_norm": 1.0634290633044237, + "learning_rate": 9.75663710838752e-06, + "loss": 0.2401, + "step": 44140 + }, + { + "epoch": 0.569394558832064, + "grad_norm": 0.9714701289859364, + "learning_rate": 9.756405825040418e-06, + "loss": 0.2417, + "step": 44150 + }, + { + "epoch": 0.56952352702206, + "grad_norm": 0.9199146677467979, + "learning_rate": 9.756174434587766e-06, + "loss": 0.2518, + "step": 44160 + }, + { + "epoch": 0.569652495212056, + "grad_norm": 1.1229746255029114, + "learning_rate": 9.755942937034778e-06, + "loss": 0.2409, + "step": 44170 + }, + { + "epoch": 0.5697814634020519, + "grad_norm": 0.9282465976395224, + "learning_rate": 9.755711332386661e-06, + "loss": 0.2358, + "step": 44180 + }, + { + "epoch": 0.5699104315920478, + "grad_norm": 0.9628968814363804, + "learning_rate": 9.755479620648636e-06, + "loss": 0.2493, + "step": 44190 + }, + { + "epoch": 0.5700393997820438, + "grad_norm": 1.1363495516664552, + "learning_rate": 9.755247801825916e-06, + "loss": 0.254, + "step": 44200 + }, + { + "epoch": 0.5701683679720397, + "grad_norm": 0.9720178807726955, + "learning_rate": 9.755015875923725e-06, + "loss": 0.2455, + "step": 44210 + }, + { + "epoch": 0.5702973361620356, + "grad_norm": 0.9467683091866046, + "learning_rate": 9.754783842947286e-06, + "loss": 0.2521, + "step": 44220 + }, + { + "epoch": 0.5704263043520316, + "grad_norm": 1.0191258653845336, + "learning_rate": 9.75455170290182e-06, + "loss": 0.2304, + "step": 44230 + }, + { + "epoch": 0.5705552725420275, + "grad_norm": 0.8954254935593683, + "learning_rate": 9.754319455792555e-06, + "loss": 0.2403, + "step": 44240 + }, + { + "epoch": 0.5706842407320234, + "grad_norm": 0.8872601210054069, + "learning_rate": 9.754087101624726e-06, + "loss": 0.2443, + "step": 44250 + }, + { + "epoch": 0.5708132089220194, + "grad_norm": 0.947714622976445, + "learning_rate": 9.753854640403557e-06, + "loss": 0.2546, + "step": 44260 + }, + { + "epoch": 0.5709421771120153, + "grad_norm": 1.0243624340378645, + "learning_rate": 9.75362207213429e-06, + "loss": 0.245, + "step": 44270 + }, + { + "epoch": 0.5710711453020113, + "grad_norm": 0.9518489562920707, + "learning_rate": 9.753389396822158e-06, + "loss": 0.2491, + "step": 44280 + }, + { + "epoch": 0.5712001134920072, + "grad_norm": 0.868441670377137, + "learning_rate": 9.753156614472401e-06, + "loss": 0.2414, + "step": 44290 + }, + { + "epoch": 0.5713290816820031, + "grad_norm": 0.8836608980463118, + "learning_rate": 9.752923725090261e-06, + "loss": 0.2386, + "step": 44300 + }, + { + "epoch": 0.5714580498719991, + "grad_norm": 0.9425002262795341, + "learning_rate": 9.752690728680984e-06, + "loss": 0.2465, + "step": 44310 + }, + { + "epoch": 0.571587018061995, + "grad_norm": 0.9209907257245369, + "learning_rate": 9.752457625249816e-06, + "loss": 0.2456, + "step": 44320 + }, + { + "epoch": 0.5717159862519909, + "grad_norm": 0.9120459168667273, + "learning_rate": 9.752224414802002e-06, + "loss": 0.2408, + "step": 44330 + }, + { + "epoch": 0.5718449544419869, + "grad_norm": 0.9561181116697215, + "learning_rate": 9.7519910973428e-06, + "loss": 0.2415, + "step": 44340 + }, + { + "epoch": 0.5719739226319828, + "grad_norm": 0.9752457340925158, + "learning_rate": 9.751757672877457e-06, + "loss": 0.2523, + "step": 44350 + }, + { + "epoch": 0.5721028908219787, + "grad_norm": 0.9044253992965616, + "learning_rate": 9.751524141411234e-06, + "loss": 0.2423, + "step": 44360 + }, + { + "epoch": 0.5722318590119747, + "grad_norm": 0.9267417479495523, + "learning_rate": 9.75129050294939e-06, + "loss": 0.258, + "step": 44370 + }, + { + "epoch": 0.5723608272019707, + "grad_norm": 0.9603866839988704, + "learning_rate": 9.751056757497184e-06, + "loss": 0.2433, + "step": 44380 + }, + { + "epoch": 0.5724897953919665, + "grad_norm": 0.9589326480521615, + "learning_rate": 9.75082290505988e-06, + "loss": 0.2449, + "step": 44390 + }, + { + "epoch": 0.5726187635819625, + "grad_norm": 0.9848442087143436, + "learning_rate": 9.750588945642744e-06, + "loss": 0.2444, + "step": 44400 + }, + { + "epoch": 0.5727477317719585, + "grad_norm": 0.8968413535422168, + "learning_rate": 9.750354879251045e-06, + "loss": 0.2463, + "step": 44410 + }, + { + "epoch": 0.5728766999619543, + "grad_norm": 0.8833620829551534, + "learning_rate": 9.750120705890053e-06, + "loss": 0.2396, + "step": 44420 + }, + { + "epoch": 0.5730056681519503, + "grad_norm": 0.935937149858866, + "learning_rate": 9.749886425565041e-06, + "loss": 0.242, + "step": 44430 + }, + { + "epoch": 0.5731346363419463, + "grad_norm": 0.9307121130699297, + "learning_rate": 9.749652038281286e-06, + "loss": 0.2544, + "step": 44440 + }, + { + "epoch": 0.5732636045319423, + "grad_norm": 0.8285102604245104, + "learning_rate": 9.749417544044063e-06, + "loss": 0.2342, + "step": 44450 + }, + { + "epoch": 0.5733925727219381, + "grad_norm": 0.9068726654307507, + "learning_rate": 9.749182942858654e-06, + "loss": 0.2365, + "step": 44460 + }, + { + "epoch": 0.5735215409119341, + "grad_norm": 0.9563966140091062, + "learning_rate": 9.748948234730345e-06, + "loss": 0.2437, + "step": 44470 + }, + { + "epoch": 0.57365050910193, + "grad_norm": 0.9244130356302979, + "learning_rate": 9.748713419664416e-06, + "loss": 0.2444, + "step": 44480 + }, + { + "epoch": 0.5737794772919259, + "grad_norm": 1.001602174663974, + "learning_rate": 9.748478497666158e-06, + "loss": 0.2544, + "step": 44490 + }, + { + "epoch": 0.5739084454819219, + "grad_norm": 0.9166091628712725, + "learning_rate": 9.748243468740859e-06, + "loss": 0.2507, + "step": 44500 + }, + { + "epoch": 0.5740374136719179, + "grad_norm": 0.9316448337102324, + "learning_rate": 9.748008332893812e-06, + "loss": 0.2493, + "step": 44510 + }, + { + "epoch": 0.5741663818619137, + "grad_norm": 1.0126404955887853, + "learning_rate": 9.747773090130312e-06, + "loss": 0.2472, + "step": 44520 + }, + { + "epoch": 0.5742953500519097, + "grad_norm": 0.9549582165069201, + "learning_rate": 9.747537740455658e-06, + "loss": 0.233, + "step": 44530 + }, + { + "epoch": 0.5744243182419057, + "grad_norm": 0.9649305983004351, + "learning_rate": 9.747302283875147e-06, + "loss": 0.2468, + "step": 44540 + }, + { + "epoch": 0.5745532864319016, + "grad_norm": 1.0156839523234182, + "learning_rate": 9.747066720394082e-06, + "loss": 0.2381, + "step": 44550 + }, + { + "epoch": 0.5746822546218975, + "grad_norm": 0.917286565087259, + "learning_rate": 9.746831050017766e-06, + "loss": 0.2406, + "step": 44560 + }, + { + "epoch": 0.5748112228118935, + "grad_norm": 0.9329195947471165, + "learning_rate": 9.74659527275151e-06, + "loss": 0.2376, + "step": 44570 + }, + { + "epoch": 0.5749401910018894, + "grad_norm": 0.9440862447847614, + "learning_rate": 9.746359388600618e-06, + "loss": 0.224, + "step": 44580 + }, + { + "epoch": 0.5750691591918853, + "grad_norm": 0.8667207230231528, + "learning_rate": 9.746123397570405e-06, + "loss": 0.2498, + "step": 44590 + }, + { + "epoch": 0.5751981273818813, + "grad_norm": 0.8753231148744136, + "learning_rate": 9.745887299666188e-06, + "loss": 0.2405, + "step": 44600 + }, + { + "epoch": 0.5753270955718772, + "grad_norm": 1.016992883477807, + "learning_rate": 9.745651094893275e-06, + "loss": 0.2453, + "step": 44610 + }, + { + "epoch": 0.5754560637618731, + "grad_norm": 0.8838209196273495, + "learning_rate": 9.745414783256994e-06, + "loss": 0.245, + "step": 44620 + }, + { + "epoch": 0.575585031951869, + "grad_norm": 1.0085053319003328, + "learning_rate": 9.74517836476266e-06, + "loss": 0.2474, + "step": 44630 + }, + { + "epoch": 0.575714000141865, + "grad_norm": 0.9007086581451992, + "learning_rate": 9.744941839415598e-06, + "loss": 0.2475, + "step": 44640 + }, + { + "epoch": 0.575842968331861, + "grad_norm": 0.915626478114635, + "learning_rate": 9.744705207221136e-06, + "loss": 0.2208, + "step": 44650 + }, + { + "epoch": 0.5759719365218569, + "grad_norm": 0.9517483479988401, + "learning_rate": 9.7444684681846e-06, + "loss": 0.2411, + "step": 44660 + }, + { + "epoch": 0.5761009047118528, + "grad_norm": 0.9810958120112503, + "learning_rate": 9.744231622311324e-06, + "loss": 0.253, + "step": 44670 + }, + { + "epoch": 0.5762298729018488, + "grad_norm": 0.8789174940129497, + "learning_rate": 9.74399466960664e-06, + "loss": 0.2348, + "step": 44680 + }, + { + "epoch": 0.5763588410918447, + "grad_norm": 0.9407392794832145, + "learning_rate": 9.743757610075882e-06, + "loss": 0.2505, + "step": 44690 + }, + { + "epoch": 0.5764878092818406, + "grad_norm": 0.9171944485030674, + "learning_rate": 9.743520443724392e-06, + "loss": 0.2364, + "step": 44700 + }, + { + "epoch": 0.5766167774718366, + "grad_norm": 0.9417005645814827, + "learning_rate": 9.743283170557504e-06, + "loss": 0.2545, + "step": 44710 + }, + { + "epoch": 0.5767457456618325, + "grad_norm": 0.8186068183221986, + "learning_rate": 9.743045790580569e-06, + "loss": 0.2502, + "step": 44720 + }, + { + "epoch": 0.5768747138518284, + "grad_norm": 0.9490381305301211, + "learning_rate": 9.742808303798927e-06, + "loss": 0.245, + "step": 44730 + }, + { + "epoch": 0.5770036820418244, + "grad_norm": 0.9571537706965514, + "learning_rate": 9.742570710217928e-06, + "loss": 0.2402, + "step": 44740 + }, + { + "epoch": 0.5771326502318204, + "grad_norm": 0.9247757940631982, + "learning_rate": 9.742333009842921e-06, + "loss": 0.2395, + "step": 44750 + }, + { + "epoch": 0.5772616184218162, + "grad_norm": 0.8809960708085665, + "learning_rate": 9.742095202679257e-06, + "loss": 0.2358, + "step": 44760 + }, + { + "epoch": 0.5773905866118122, + "grad_norm": 0.9752636421502735, + "learning_rate": 9.741857288732296e-06, + "loss": 0.2465, + "step": 44770 + }, + { + "epoch": 0.5775195548018082, + "grad_norm": 0.8951551743175322, + "learning_rate": 9.741619268007391e-06, + "loss": 0.2425, + "step": 44780 + }, + { + "epoch": 0.577648522991804, + "grad_norm": 0.8677562704698328, + "learning_rate": 9.741381140509904e-06, + "loss": 0.2479, + "step": 44790 + }, + { + "epoch": 0.5777774911818, + "grad_norm": 0.9437261039692811, + "learning_rate": 9.741142906245196e-06, + "loss": 0.2496, + "step": 44800 + }, + { + "epoch": 0.577906459371796, + "grad_norm": 0.9443596869082578, + "learning_rate": 9.74090456521863e-06, + "loss": 0.2398, + "step": 44810 + }, + { + "epoch": 0.5780354275617919, + "grad_norm": 0.9033265266999421, + "learning_rate": 9.740666117435577e-06, + "loss": 0.2431, + "step": 44820 + }, + { + "epoch": 0.5781643957517878, + "grad_norm": 1.0610183278558132, + "learning_rate": 9.740427562901405e-06, + "loss": 0.244, + "step": 44830 + }, + { + "epoch": 0.5782933639417838, + "grad_norm": 0.9247257243691319, + "learning_rate": 9.740188901621484e-06, + "loss": 0.2421, + "step": 44840 + }, + { + "epoch": 0.5784223321317797, + "grad_norm": 0.8930561872971108, + "learning_rate": 9.739950133601189e-06, + "loss": 0.2494, + "step": 44850 + }, + { + "epoch": 0.5785513003217756, + "grad_norm": 0.995286145520731, + "learning_rate": 9.739711258845897e-06, + "loss": 0.2482, + "step": 44860 + }, + { + "epoch": 0.5786802685117716, + "grad_norm": 0.979593546554048, + "learning_rate": 9.739472277360988e-06, + "loss": 0.2347, + "step": 44870 + }, + { + "epoch": 0.5788092367017675, + "grad_norm": 0.9648879945016551, + "learning_rate": 9.739233189151842e-06, + "loss": 0.2432, + "step": 44880 + }, + { + "epoch": 0.5789382048917634, + "grad_norm": 0.9674683873097413, + "learning_rate": 9.738993994223843e-06, + "loss": 0.2413, + "step": 44890 + }, + { + "epoch": 0.5790671730817594, + "grad_norm": 0.9773028818566462, + "learning_rate": 9.738754692582378e-06, + "loss": 0.2556, + "step": 44900 + }, + { + "epoch": 0.5791961412717553, + "grad_norm": 0.8914729848708858, + "learning_rate": 9.738515284232835e-06, + "loss": 0.2465, + "step": 44910 + }, + { + "epoch": 0.5793251094617513, + "grad_norm": 1.0081809431621394, + "learning_rate": 9.738275769180605e-06, + "loss": 0.2424, + "step": 44920 + }, + { + "epoch": 0.5794540776517472, + "grad_norm": 0.9434578184214001, + "learning_rate": 9.738036147431081e-06, + "loss": 0.2437, + "step": 44930 + }, + { + "epoch": 0.5795830458417431, + "grad_norm": 0.9470175209092242, + "learning_rate": 9.73779641898966e-06, + "loss": 0.2564, + "step": 44940 + }, + { + "epoch": 0.5797120140317391, + "grad_norm": 0.8903027450013178, + "learning_rate": 9.73755658386174e-06, + "loss": 0.2489, + "step": 44950 + }, + { + "epoch": 0.579840982221735, + "grad_norm": 0.9295879406473306, + "learning_rate": 9.737316642052722e-06, + "loss": 0.2427, + "step": 44960 + }, + { + "epoch": 0.5799699504117309, + "grad_norm": 0.9129072851047293, + "learning_rate": 9.737076593568007e-06, + "loss": 0.2513, + "step": 44970 + }, + { + "epoch": 0.5800989186017269, + "grad_norm": 0.9935575488253203, + "learning_rate": 9.736836438413003e-06, + "loss": 0.2493, + "step": 44980 + }, + { + "epoch": 0.5802278867917228, + "grad_norm": 0.9728980010247883, + "learning_rate": 9.736596176593117e-06, + "loss": 0.257, + "step": 44990 + }, + { + "epoch": 0.5803568549817187, + "grad_norm": 0.9071323205717284, + "learning_rate": 9.736355808113758e-06, + "loss": 0.2293, + "step": 45000 + }, + { + "epoch": 0.5804858231717147, + "grad_norm": 0.9157133635570973, + "learning_rate": 9.73611533298034e-06, + "loss": 0.2427, + "step": 45010 + }, + { + "epoch": 0.5806147913617107, + "grad_norm": 1.0196434873549476, + "learning_rate": 9.735874751198278e-06, + "loss": 0.2673, + "step": 45020 + }, + { + "epoch": 0.5807437595517065, + "grad_norm": 0.9305234525438288, + "learning_rate": 9.735634062772987e-06, + "loss": 0.2579, + "step": 45030 + }, + { + "epoch": 0.5808727277417025, + "grad_norm": 1.037526820787731, + "learning_rate": 9.735393267709892e-06, + "loss": 0.2513, + "step": 45040 + }, + { + "epoch": 0.5810016959316985, + "grad_norm": 0.9939536746891978, + "learning_rate": 9.73515236601441e-06, + "loss": 0.2474, + "step": 45050 + }, + { + "epoch": 0.5811306641216943, + "grad_norm": 0.9684922133969062, + "learning_rate": 9.73491135769197e-06, + "loss": 0.2534, + "step": 45060 + }, + { + "epoch": 0.5812596323116903, + "grad_norm": 0.9956365427395856, + "learning_rate": 9.734670242747996e-06, + "loss": 0.2263, + "step": 45070 + }, + { + "epoch": 0.5813886005016863, + "grad_norm": 0.8926646512803093, + "learning_rate": 9.734429021187918e-06, + "loss": 0.2402, + "step": 45080 + }, + { + "epoch": 0.5815175686916823, + "grad_norm": 0.9442050400790316, + "learning_rate": 9.73418769301717e-06, + "loss": 0.2527, + "step": 45090 + }, + { + "epoch": 0.5816465368816781, + "grad_norm": 1.0121898498044075, + "learning_rate": 9.733946258241183e-06, + "loss": 0.249, + "step": 45100 + }, + { + "epoch": 0.5817755050716741, + "grad_norm": 0.8974807182086595, + "learning_rate": 9.733704716865395e-06, + "loss": 0.2564, + "step": 45110 + }, + { + "epoch": 0.58190447326167, + "grad_norm": 0.9351451199770546, + "learning_rate": 9.733463068895246e-06, + "loss": 0.2603, + "step": 45120 + }, + { + "epoch": 0.5820334414516659, + "grad_norm": 0.8811156039880056, + "learning_rate": 9.733221314336176e-06, + "loss": 0.248, + "step": 45130 + }, + { + "epoch": 0.5821624096416619, + "grad_norm": 0.9996052269552023, + "learning_rate": 9.732979453193632e-06, + "loss": 0.2365, + "step": 45140 + }, + { + "epoch": 0.5822913778316579, + "grad_norm": 0.8782141670372801, + "learning_rate": 9.732737485473056e-06, + "loss": 0.2375, + "step": 45150 + }, + { + "epoch": 0.5824203460216537, + "grad_norm": 0.9109903836146399, + "learning_rate": 9.732495411179898e-06, + "loss": 0.2518, + "step": 45160 + }, + { + "epoch": 0.5825493142116497, + "grad_norm": 0.9654099446374973, + "learning_rate": 9.732253230319613e-06, + "loss": 0.2454, + "step": 45170 + }, + { + "epoch": 0.5826782824016457, + "grad_norm": 1.0376276382872551, + "learning_rate": 9.732010942897648e-06, + "loss": 0.2442, + "step": 45180 + }, + { + "epoch": 0.5828072505916416, + "grad_norm": 1.0739906206358198, + "learning_rate": 9.731768548919464e-06, + "loss": 0.2511, + "step": 45190 + }, + { + "epoch": 0.5829362187816375, + "grad_norm": 0.9342048580807993, + "learning_rate": 9.731526048390516e-06, + "loss": 0.2397, + "step": 45200 + }, + { + "epoch": 0.5830651869716335, + "grad_norm": 1.0993440172680868, + "learning_rate": 9.731283441316266e-06, + "loss": 0.246, + "step": 45210 + }, + { + "epoch": 0.5831941551616294, + "grad_norm": 0.9352354542866181, + "learning_rate": 9.731040727702178e-06, + "loss": 0.2442, + "step": 45220 + }, + { + "epoch": 0.5833231233516253, + "grad_norm": 0.8798466040955187, + "learning_rate": 9.730797907553716e-06, + "loss": 0.2406, + "step": 45230 + }, + { + "epoch": 0.5834520915416213, + "grad_norm": 0.9979296459158908, + "learning_rate": 9.730554980876347e-06, + "loss": 0.239, + "step": 45240 + }, + { + "epoch": 0.5835810597316172, + "grad_norm": 0.9065583063257909, + "learning_rate": 9.730311947675544e-06, + "loss": 0.2328, + "step": 45250 + }, + { + "epoch": 0.5837100279216131, + "grad_norm": 0.9388934218243074, + "learning_rate": 9.730068807956779e-06, + "loss": 0.2475, + "step": 45260 + }, + { + "epoch": 0.583838996111609, + "grad_norm": 0.9339488821082245, + "learning_rate": 9.729825561725525e-06, + "loss": 0.2381, + "step": 45270 + }, + { + "epoch": 0.583967964301605, + "grad_norm": 0.9547004670693244, + "learning_rate": 9.729582208987263e-06, + "loss": 0.2418, + "step": 45280 + }, + { + "epoch": 0.584096932491601, + "grad_norm": 0.938556497798176, + "learning_rate": 9.729338749747468e-06, + "loss": 0.2523, + "step": 45290 + }, + { + "epoch": 0.5842259006815969, + "grad_norm": 0.9539088215781948, + "learning_rate": 9.729095184011627e-06, + "loss": 0.2396, + "step": 45300 + }, + { + "epoch": 0.5843548688715928, + "grad_norm": 0.9205645252042554, + "learning_rate": 9.728851511785222e-06, + "loss": 0.2436, + "step": 45310 + }, + { + "epoch": 0.5844838370615888, + "grad_norm": 0.8683575516489255, + "learning_rate": 9.728607733073739e-06, + "loss": 0.2376, + "step": 45320 + }, + { + "epoch": 0.5846128052515847, + "grad_norm": 0.9251848190909363, + "learning_rate": 9.72836384788267e-06, + "loss": 0.2382, + "step": 45330 + }, + { + "epoch": 0.5847417734415806, + "grad_norm": 0.9913701302176241, + "learning_rate": 9.728119856217508e-06, + "loss": 0.2566, + "step": 45340 + }, + { + "epoch": 0.5848707416315766, + "grad_norm": 0.951262916135731, + "learning_rate": 9.727875758083743e-06, + "loss": 0.2456, + "step": 45350 + }, + { + "epoch": 0.5849997098215725, + "grad_norm": 0.8435770729631329, + "learning_rate": 9.727631553486874e-06, + "loss": 0.25, + "step": 45360 + }, + { + "epoch": 0.5851286780115684, + "grad_norm": 1.0439712916630486, + "learning_rate": 9.7273872424324e-06, + "loss": 0.255, + "step": 45370 + }, + { + "epoch": 0.5852576462015644, + "grad_norm": 0.9697417089390997, + "learning_rate": 9.727142824925823e-06, + "loss": 0.2404, + "step": 45380 + }, + { + "epoch": 0.5853866143915604, + "grad_norm": 0.9323049122995571, + "learning_rate": 9.726898300972646e-06, + "loss": 0.2468, + "step": 45390 + }, + { + "epoch": 0.5855155825815562, + "grad_norm": 0.9173070837222326, + "learning_rate": 9.726653670578374e-06, + "loss": 0.2479, + "step": 45400 + }, + { + "epoch": 0.5856445507715522, + "grad_norm": 0.9646469469938169, + "learning_rate": 9.726408933748519e-06, + "loss": 0.2347, + "step": 45410 + }, + { + "epoch": 0.5857735189615482, + "grad_norm": 0.974354758928525, + "learning_rate": 9.726164090488589e-06, + "loss": 0.237, + "step": 45420 + }, + { + "epoch": 0.585902487151544, + "grad_norm": 0.9497427532358388, + "learning_rate": 9.7259191408041e-06, + "loss": 0.2502, + "step": 45430 + }, + { + "epoch": 0.58603145534154, + "grad_norm": 0.8557556833650662, + "learning_rate": 9.725674084700562e-06, + "loss": 0.2397, + "step": 45440 + }, + { + "epoch": 0.586160423531536, + "grad_norm": 0.9939022691348433, + "learning_rate": 9.725428922183503e-06, + "loss": 0.2403, + "step": 45450 + }, + { + "epoch": 0.5862893917215319, + "grad_norm": 0.9014395718481596, + "learning_rate": 9.725183653258434e-06, + "loss": 0.2462, + "step": 45460 + }, + { + "epoch": 0.5864183599115278, + "grad_norm": 0.8880910314832019, + "learning_rate": 9.724938277930885e-06, + "loss": 0.2402, + "step": 45470 + }, + { + "epoch": 0.5865473281015238, + "grad_norm": 0.9268667074452624, + "learning_rate": 9.724692796206374e-06, + "loss": 0.2308, + "step": 45480 + }, + { + "epoch": 0.5866762962915197, + "grad_norm": 1.010081893555742, + "learning_rate": 9.724447208090437e-06, + "loss": 0.253, + "step": 45490 + }, + { + "epoch": 0.5868052644815156, + "grad_norm": 0.8842677131995672, + "learning_rate": 9.7242015135886e-06, + "loss": 0.2439, + "step": 45500 + }, + { + "epoch": 0.5869342326715116, + "grad_norm": 0.9260901296078795, + "learning_rate": 9.723955712706397e-06, + "loss": 0.2482, + "step": 45510 + }, + { + "epoch": 0.5870632008615075, + "grad_norm": 0.833088502842384, + "learning_rate": 9.72370980544936e-06, + "loss": 0.255, + "step": 45520 + }, + { + "epoch": 0.5871921690515034, + "grad_norm": 1.0369797882551022, + "learning_rate": 9.72346379182303e-06, + "loss": 0.2487, + "step": 45530 + }, + { + "epoch": 0.5873211372414994, + "grad_norm": 0.9121451027043639, + "learning_rate": 9.723217671832946e-06, + "loss": 0.2565, + "step": 45540 + }, + { + "epoch": 0.5874501054314953, + "grad_norm": 0.8857917411689983, + "learning_rate": 9.722971445484646e-06, + "loss": 0.2378, + "step": 45550 + }, + { + "epoch": 0.5875790736214913, + "grad_norm": 0.7905322563350154, + "learning_rate": 9.722725112783681e-06, + "loss": 0.2357, + "step": 45560 + }, + { + "epoch": 0.5877080418114872, + "grad_norm": 0.9395444346046724, + "learning_rate": 9.722478673735594e-06, + "loss": 0.2528, + "step": 45570 + }, + { + "epoch": 0.5878370100014831, + "grad_norm": 0.9900702758968783, + "learning_rate": 9.722232128345935e-06, + "loss": 0.2436, + "step": 45580 + }, + { + "epoch": 0.5879659781914791, + "grad_norm": 0.8630600233513747, + "learning_rate": 9.721985476620257e-06, + "loss": 0.2431, + "step": 45590 + }, + { + "epoch": 0.588094946381475, + "grad_norm": 0.9386349871692039, + "learning_rate": 9.721738718564112e-06, + "loss": 0.2459, + "step": 45600 + }, + { + "epoch": 0.5882239145714709, + "grad_norm": 0.8495608161849812, + "learning_rate": 9.721491854183058e-06, + "loss": 0.2302, + "step": 45610 + }, + { + "epoch": 0.5883528827614669, + "grad_norm": 0.8897216602431337, + "learning_rate": 9.721244883482655e-06, + "loss": 0.2535, + "step": 45620 + }, + { + "epoch": 0.5884818509514628, + "grad_norm": 0.9538002096944159, + "learning_rate": 9.72099780646846e-06, + "loss": 0.2478, + "step": 45630 + }, + { + "epoch": 0.5886108191414587, + "grad_norm": 0.9508368340979104, + "learning_rate": 9.720750623146042e-06, + "loss": 0.2419, + "step": 45640 + }, + { + "epoch": 0.5887397873314547, + "grad_norm": 0.9972922695295919, + "learning_rate": 9.720503333520966e-06, + "loss": 0.2457, + "step": 45650 + }, + { + "epoch": 0.5888687555214507, + "grad_norm": 0.9833330937753029, + "learning_rate": 9.720255937598797e-06, + "loss": 0.2491, + "step": 45660 + }, + { + "epoch": 0.5889977237114465, + "grad_norm": 0.9987398099840884, + "learning_rate": 9.720008435385108e-06, + "loss": 0.2563, + "step": 45670 + }, + { + "epoch": 0.5891266919014425, + "grad_norm": 0.8797482097834699, + "learning_rate": 9.719760826885474e-06, + "loss": 0.2483, + "step": 45680 + }, + { + "epoch": 0.5892556600914385, + "grad_norm": 0.9373506037694694, + "learning_rate": 9.719513112105469e-06, + "loss": 0.2421, + "step": 45690 + }, + { + "epoch": 0.5893846282814343, + "grad_norm": 0.9001277072476798, + "learning_rate": 9.71926529105067e-06, + "loss": 0.2463, + "step": 45700 + }, + { + "epoch": 0.5895135964714303, + "grad_norm": 0.9483280761789609, + "learning_rate": 9.71901736372666e-06, + "loss": 0.2446, + "step": 45710 + }, + { + "epoch": 0.5896425646614263, + "grad_norm": 0.991051087738522, + "learning_rate": 9.71876933013902e-06, + "loss": 0.2524, + "step": 45720 + }, + { + "epoch": 0.5897715328514221, + "grad_norm": 0.9332370287199685, + "learning_rate": 9.718521190293337e-06, + "loss": 0.2546, + "step": 45730 + }, + { + "epoch": 0.5899005010414181, + "grad_norm": 1.0484651083137797, + "learning_rate": 9.718272944195197e-06, + "loss": 0.2489, + "step": 45740 + }, + { + "epoch": 0.5900294692314141, + "grad_norm": 0.9419398419171165, + "learning_rate": 9.71802459185019e-06, + "loss": 0.2433, + "step": 45750 + }, + { + "epoch": 0.59015843742141, + "grad_norm": 0.9091826002554101, + "learning_rate": 9.71777613326391e-06, + "loss": 0.2592, + "step": 45760 + }, + { + "epoch": 0.5902874056114059, + "grad_norm": 0.9426097280124583, + "learning_rate": 9.717527568441951e-06, + "loss": 0.263, + "step": 45770 + }, + { + "epoch": 0.5904163738014019, + "grad_norm": 0.9822237216759517, + "learning_rate": 9.717278897389909e-06, + "loss": 0.2509, + "step": 45780 + }, + { + "epoch": 0.5905453419913979, + "grad_norm": 0.917286855352765, + "learning_rate": 9.717030120113386e-06, + "loss": 0.2383, + "step": 45790 + }, + { + "epoch": 0.5906743101813937, + "grad_norm": 0.9394043798404642, + "learning_rate": 9.71678123661798e-06, + "loss": 0.2596, + "step": 45800 + }, + { + "epoch": 0.5908032783713897, + "grad_norm": 0.9098531703086877, + "learning_rate": 9.716532246909301e-06, + "loss": 0.2358, + "step": 45810 + }, + { + "epoch": 0.5909322465613857, + "grad_norm": 0.916540860930046, + "learning_rate": 9.716283150992953e-06, + "loss": 0.255, + "step": 45820 + }, + { + "epoch": 0.5910612147513816, + "grad_norm": 0.9026405852945698, + "learning_rate": 9.716033948874544e-06, + "loss": 0.2366, + "step": 45830 + }, + { + "epoch": 0.5911901829413775, + "grad_norm": 0.8948913029552088, + "learning_rate": 9.715784640559687e-06, + "loss": 0.2596, + "step": 45840 + }, + { + "epoch": 0.5913191511313735, + "grad_norm": 0.9238457134540102, + "learning_rate": 9.715535226053996e-06, + "loss": 0.2427, + "step": 45850 + }, + { + "epoch": 0.5914481193213694, + "grad_norm": 1.0123573688421967, + "learning_rate": 9.715285705363086e-06, + "loss": 0.25, + "step": 45860 + }, + { + "epoch": 0.5915770875113653, + "grad_norm": 0.9770375149733139, + "learning_rate": 9.715036078492578e-06, + "loss": 0.2635, + "step": 45870 + }, + { + "epoch": 0.5917060557013613, + "grad_norm": 0.9279239689134257, + "learning_rate": 9.71478634544809e-06, + "loss": 0.2407, + "step": 45880 + }, + { + "epoch": 0.5918350238913572, + "grad_norm": 0.8885554977532166, + "learning_rate": 9.71453650623525e-06, + "loss": 0.2371, + "step": 45890 + }, + { + "epoch": 0.5919639920813531, + "grad_norm": 0.9364313353967484, + "learning_rate": 9.71428656085968e-06, + "loss": 0.2322, + "step": 45900 + }, + { + "epoch": 0.5920929602713491, + "grad_norm": 0.908906544788089, + "learning_rate": 9.714036509327009e-06, + "loss": 0.2397, + "step": 45910 + }, + { + "epoch": 0.592221928461345, + "grad_norm": 0.9903482189750622, + "learning_rate": 9.713786351642869e-06, + "loss": 0.2402, + "step": 45920 + }, + { + "epoch": 0.592350896651341, + "grad_norm": 0.978223171234057, + "learning_rate": 9.713536087812895e-06, + "loss": 0.2423, + "step": 45930 + }, + { + "epoch": 0.5924798648413369, + "grad_norm": 0.8794767193404879, + "learning_rate": 9.713285717842716e-06, + "loss": 0.2457, + "step": 45940 + }, + { + "epoch": 0.5926088330313328, + "grad_norm": 0.8873428855133141, + "learning_rate": 9.713035241737974e-06, + "loss": 0.2373, + "step": 45950 + }, + { + "epoch": 0.5927378012213288, + "grad_norm": 0.9202890268267847, + "learning_rate": 9.71278465950431e-06, + "loss": 0.2441, + "step": 45960 + }, + { + "epoch": 0.5928667694113247, + "grad_norm": 1.0541689709629163, + "learning_rate": 9.712533971147368e-06, + "loss": 0.2487, + "step": 45970 + }, + { + "epoch": 0.5929957376013206, + "grad_norm": 0.8427564331050669, + "learning_rate": 9.712283176672787e-06, + "loss": 0.232, + "step": 45980 + }, + { + "epoch": 0.5931247057913166, + "grad_norm": 0.9471929363939334, + "learning_rate": 9.71203227608622e-06, + "loss": 0.2407, + "step": 45990 + }, + { + "epoch": 0.5932536739813125, + "grad_norm": 0.9002216230310893, + "learning_rate": 9.711781269393317e-06, + "loss": 0.2514, + "step": 46000 + }, + { + "epoch": 0.5933826421713084, + "grad_norm": 0.9046725225934891, + "learning_rate": 9.711530156599726e-06, + "loss": 0.2509, + "step": 46010 + }, + { + "epoch": 0.5935116103613044, + "grad_norm": 0.8806063583658174, + "learning_rate": 9.711278937711103e-06, + "loss": 0.2463, + "step": 46020 + }, + { + "epoch": 0.5936405785513004, + "grad_norm": 0.8457929695266004, + "learning_rate": 9.711027612733108e-06, + "loss": 0.2384, + "step": 46030 + }, + { + "epoch": 0.5937695467412962, + "grad_norm": 0.9121649763521568, + "learning_rate": 9.710776181671397e-06, + "loss": 0.2391, + "step": 46040 + }, + { + "epoch": 0.5938985149312922, + "grad_norm": 0.8318005574354542, + "learning_rate": 9.710524644531634e-06, + "loss": 0.2481, + "step": 46050 + }, + { + "epoch": 0.5940274831212882, + "grad_norm": 0.8567104039673458, + "learning_rate": 9.710273001319481e-06, + "loss": 0.2336, + "step": 46060 + }, + { + "epoch": 0.594156451311284, + "grad_norm": 0.9476169547648734, + "learning_rate": 9.710021252040607e-06, + "loss": 0.2323, + "step": 46070 + }, + { + "epoch": 0.59428541950128, + "grad_norm": 0.9252819120013026, + "learning_rate": 9.709769396700678e-06, + "loss": 0.237, + "step": 46080 + }, + { + "epoch": 0.594414387691276, + "grad_norm": 0.9346327338630556, + "learning_rate": 9.709517435305367e-06, + "loss": 0.2525, + "step": 46090 + }, + { + "epoch": 0.5945433558812718, + "grad_norm": 1.0497026831066125, + "learning_rate": 9.709265367860349e-06, + "loss": 0.2351, + "step": 46100 + }, + { + "epoch": 0.5946723240712678, + "grad_norm": 1.0238047890184405, + "learning_rate": 9.709013194371297e-06, + "loss": 0.2453, + "step": 46110 + }, + { + "epoch": 0.5948012922612638, + "grad_norm": 0.9737022685790234, + "learning_rate": 9.708760914843893e-06, + "loss": 0.2531, + "step": 46120 + }, + { + "epoch": 0.5949302604512597, + "grad_norm": 1.0570002063255854, + "learning_rate": 9.708508529283815e-06, + "loss": 0.2465, + "step": 46130 + }, + { + "epoch": 0.5950592286412556, + "grad_norm": 0.9319303309045089, + "learning_rate": 9.708256037696749e-06, + "loss": 0.2434, + "step": 46140 + }, + { + "epoch": 0.5951881968312516, + "grad_norm": 0.9613024715456185, + "learning_rate": 9.708003440088378e-06, + "loss": 0.2392, + "step": 46150 + }, + { + "epoch": 0.5953171650212475, + "grad_norm": 0.9808758288867244, + "learning_rate": 9.70775073646439e-06, + "loss": 0.2515, + "step": 46160 + }, + { + "epoch": 0.5954461332112434, + "grad_norm": 0.8802156510893486, + "learning_rate": 9.707497926830478e-06, + "loss": 0.2431, + "step": 46170 + }, + { + "epoch": 0.5955751014012394, + "grad_norm": 0.8748260932326097, + "learning_rate": 9.707245011192331e-06, + "loss": 0.2604, + "step": 46180 + }, + { + "epoch": 0.5957040695912353, + "grad_norm": 0.9139492357024326, + "learning_rate": 9.706991989555649e-06, + "loss": 0.2531, + "step": 46190 + }, + { + "epoch": 0.5958330377812313, + "grad_norm": 0.923093886547282, + "learning_rate": 9.706738861926126e-06, + "loss": 0.2507, + "step": 46200 + }, + { + "epoch": 0.5959620059712272, + "grad_norm": 0.969143357249875, + "learning_rate": 9.706485628309464e-06, + "loss": 0.2448, + "step": 46210 + }, + { + "epoch": 0.5960909741612231, + "grad_norm": 0.9538920245911232, + "learning_rate": 9.706232288711361e-06, + "loss": 0.2402, + "step": 46220 + }, + { + "epoch": 0.5962199423512191, + "grad_norm": 1.0491375476497617, + "learning_rate": 9.705978843137529e-06, + "loss": 0.252, + "step": 46230 + }, + { + "epoch": 0.596348910541215, + "grad_norm": 1.0521245864338704, + "learning_rate": 9.70572529159367e-06, + "loss": 0.2508, + "step": 46240 + }, + { + "epoch": 0.5964778787312109, + "grad_norm": 0.9931907039784263, + "learning_rate": 9.705471634085494e-06, + "loss": 0.2327, + "step": 46250 + }, + { + "epoch": 0.5966068469212069, + "grad_norm": 1.2443884583750453, + "learning_rate": 9.705217870618713e-06, + "loss": 0.2494, + "step": 46260 + }, + { + "epoch": 0.5967358151112028, + "grad_norm": 0.9539021513588811, + "learning_rate": 9.704964001199044e-06, + "loss": 0.2397, + "step": 46270 + }, + { + "epoch": 0.5968647833011987, + "grad_norm": 1.0553934810316712, + "learning_rate": 9.704710025832202e-06, + "loss": 0.2484, + "step": 46280 + }, + { + "epoch": 0.5969937514911947, + "grad_norm": 0.9232626688926174, + "learning_rate": 9.704455944523902e-06, + "loss": 0.2375, + "step": 46290 + }, + { + "epoch": 0.5971227196811907, + "grad_norm": 0.8817431014415865, + "learning_rate": 9.704201757279875e-06, + "loss": 0.2409, + "step": 46300 + }, + { + "epoch": 0.5972516878711865, + "grad_norm": 0.9251349043920255, + "learning_rate": 9.703947464105834e-06, + "loss": 0.23, + "step": 46310 + }, + { + "epoch": 0.5973806560611825, + "grad_norm": 0.814356868857168, + "learning_rate": 9.703693065007511e-06, + "loss": 0.2277, + "step": 46320 + }, + { + "epoch": 0.5975096242511785, + "grad_norm": 0.8428464413158879, + "learning_rate": 9.703438559990635e-06, + "loss": 0.2412, + "step": 46330 + }, + { + "epoch": 0.5976385924411743, + "grad_norm": 0.8400848700906504, + "learning_rate": 9.703183949060936e-06, + "loss": 0.2435, + "step": 46340 + }, + { + "epoch": 0.5977675606311703, + "grad_norm": 0.8139318184082364, + "learning_rate": 9.702929232224146e-06, + "loss": 0.2333, + "step": 46350 + }, + { + "epoch": 0.5978965288211663, + "grad_norm": 0.8720820115443988, + "learning_rate": 9.702674409486003e-06, + "loss": 0.2499, + "step": 46360 + }, + { + "epoch": 0.5980254970111621, + "grad_norm": 0.9766334103368228, + "learning_rate": 9.702419480852243e-06, + "loss": 0.2417, + "step": 46370 + }, + { + "epoch": 0.5981544652011581, + "grad_norm": 1.0225881457224397, + "learning_rate": 9.702164446328609e-06, + "loss": 0.2437, + "step": 46380 + }, + { + "epoch": 0.5982834333911541, + "grad_norm": 1.1976001926442144, + "learning_rate": 9.701909305920842e-06, + "loss": 0.2284, + "step": 46390 + }, + { + "epoch": 0.59841240158115, + "grad_norm": 0.9274641677779348, + "learning_rate": 9.701654059634687e-06, + "loss": 0.2361, + "step": 46400 + }, + { + "epoch": 0.5985413697711459, + "grad_norm": 0.9231087423573937, + "learning_rate": 9.701398707475893e-06, + "loss": 0.2424, + "step": 46410 + }, + { + "epoch": 0.5986703379611419, + "grad_norm": 1.0059704892792776, + "learning_rate": 9.701143249450209e-06, + "loss": 0.2448, + "step": 46420 + }, + { + "epoch": 0.5987993061511379, + "grad_norm": 0.9530495623962429, + "learning_rate": 9.700887685563389e-06, + "loss": 0.2342, + "step": 46430 + }, + { + "epoch": 0.5989282743411337, + "grad_norm": 0.9029769049954405, + "learning_rate": 9.700632015821185e-06, + "loss": 0.2497, + "step": 46440 + }, + { + "epoch": 0.5990572425311297, + "grad_norm": 0.9306836352817486, + "learning_rate": 9.700376240229358e-06, + "loss": 0.2389, + "step": 46450 + }, + { + "epoch": 0.5991862107211257, + "grad_norm": 0.988509137165121, + "learning_rate": 9.700120358793665e-06, + "loss": 0.2519, + "step": 46460 + }, + { + "epoch": 0.5993151789111216, + "grad_norm": 0.9688088673716433, + "learning_rate": 9.699864371519869e-06, + "loss": 0.2411, + "step": 46470 + }, + { + "epoch": 0.5994441471011175, + "grad_norm": 0.9733588749440903, + "learning_rate": 9.699608278413732e-06, + "loss": 0.2353, + "step": 46480 + }, + { + "epoch": 0.5995731152911135, + "grad_norm": 0.9370588584978851, + "learning_rate": 9.699352079481024e-06, + "loss": 0.2529, + "step": 46490 + }, + { + "epoch": 0.5997020834811094, + "grad_norm": 1.0037064463166807, + "learning_rate": 9.699095774727514e-06, + "loss": 0.2436, + "step": 46500 + }, + { + "epoch": 0.5998310516711053, + "grad_norm": 0.9046693373616611, + "learning_rate": 9.69883936415897e-06, + "loss": 0.2349, + "step": 46510 + }, + { + "epoch": 0.5999600198611013, + "grad_norm": 0.9339185429080487, + "learning_rate": 9.69858284778117e-06, + "loss": 0.2468, + "step": 46520 + }, + { + "epoch": 0.6000889880510972, + "grad_norm": 0.9598415254378689, + "learning_rate": 9.698326225599888e-06, + "loss": 0.2388, + "step": 46530 + }, + { + "epoch": 0.6002179562410931, + "grad_norm": 0.956883707400646, + "learning_rate": 9.698069497620904e-06, + "loss": 0.2526, + "step": 46540 + }, + { + "epoch": 0.6003469244310891, + "grad_norm": 0.9377491627509373, + "learning_rate": 9.697812663849997e-06, + "loss": 0.239, + "step": 46550 + }, + { + "epoch": 0.600475892621085, + "grad_norm": 0.8989364611220793, + "learning_rate": 9.697555724292952e-06, + "loss": 0.2373, + "step": 46560 + }, + { + "epoch": 0.600604860811081, + "grad_norm": 0.8307738359848911, + "learning_rate": 9.697298678955556e-06, + "loss": 0.2385, + "step": 46570 + }, + { + "epoch": 0.6007338290010769, + "grad_norm": 0.9638664376700625, + "learning_rate": 9.697041527843593e-06, + "loss": 0.2387, + "step": 46580 + }, + { + "epoch": 0.6008627971910728, + "grad_norm": 0.832493868744372, + "learning_rate": 9.696784270962858e-06, + "loss": 0.2419, + "step": 46590 + }, + { + "epoch": 0.6009917653810688, + "grad_norm": 0.8683909204670017, + "learning_rate": 9.696526908319141e-06, + "loss": 0.2478, + "step": 46600 + }, + { + "epoch": 0.6011207335710647, + "grad_norm": 1.0067778936802825, + "learning_rate": 9.69626943991824e-06, + "loss": 0.2415, + "step": 46610 + }, + { + "epoch": 0.6012497017610606, + "grad_norm": 0.83048113654664, + "learning_rate": 9.69601186576595e-06, + "loss": 0.2412, + "step": 46620 + }, + { + "epoch": 0.6013786699510566, + "grad_norm": 0.8962312709009692, + "learning_rate": 9.695754185868072e-06, + "loss": 0.2465, + "step": 46630 + }, + { + "epoch": 0.6015076381410525, + "grad_norm": 0.943387141557253, + "learning_rate": 9.695496400230411e-06, + "loss": 0.2286, + "step": 46640 + }, + { + "epoch": 0.6016366063310484, + "grad_norm": 0.9277757942993136, + "learning_rate": 9.695238508858767e-06, + "loss": 0.2347, + "step": 46650 + }, + { + "epoch": 0.6017655745210444, + "grad_norm": 0.8256041437698537, + "learning_rate": 9.694980511758951e-06, + "loss": 0.2416, + "step": 46660 + }, + { + "epoch": 0.6018945427110404, + "grad_norm": 0.8692689976614809, + "learning_rate": 9.694722408936772e-06, + "loss": 0.2293, + "step": 46670 + }, + { + "epoch": 0.6020235109010362, + "grad_norm": 0.9666316785530296, + "learning_rate": 9.694464200398041e-06, + "loss": 0.229, + "step": 46680 + }, + { + "epoch": 0.6021524790910322, + "grad_norm": 0.9137469896658451, + "learning_rate": 9.694205886148573e-06, + "loss": 0.2516, + "step": 46690 + }, + { + "epoch": 0.6022814472810282, + "grad_norm": 1.0489449556020922, + "learning_rate": 9.693947466194185e-06, + "loss": 0.2507, + "step": 46700 + }, + { + "epoch": 0.602410415471024, + "grad_norm": 0.9705416652131514, + "learning_rate": 9.693688940540696e-06, + "loss": 0.2288, + "step": 46710 + }, + { + "epoch": 0.60253938366102, + "grad_norm": 0.9109618774297801, + "learning_rate": 9.693430309193927e-06, + "loss": 0.2475, + "step": 46720 + }, + { + "epoch": 0.602668351851016, + "grad_norm": 0.9351784117429849, + "learning_rate": 9.693171572159703e-06, + "loss": 0.2504, + "step": 46730 + }, + { + "epoch": 0.6027973200410118, + "grad_norm": 0.956018000426199, + "learning_rate": 9.692912729443849e-06, + "loss": 0.245, + "step": 46740 + }, + { + "epoch": 0.6029262882310078, + "grad_norm": 1.0684931853673767, + "learning_rate": 9.692653781052194e-06, + "loss": 0.243, + "step": 46750 + }, + { + "epoch": 0.6030552564210038, + "grad_norm": 0.9159899958619335, + "learning_rate": 9.69239472699057e-06, + "loss": 0.2381, + "step": 46760 + }, + { + "epoch": 0.6031842246109997, + "grad_norm": 0.869459449152426, + "learning_rate": 9.69213556726481e-06, + "loss": 0.2451, + "step": 46770 + }, + { + "epoch": 0.6033131928009956, + "grad_norm": 1.030056994655708, + "learning_rate": 9.691876301880747e-06, + "loss": 0.2441, + "step": 46780 + }, + { + "epoch": 0.6034421609909916, + "grad_norm": 0.8806925456952411, + "learning_rate": 9.691616930844222e-06, + "loss": 0.2389, + "step": 46790 + }, + { + "epoch": 0.6035711291809875, + "grad_norm": 0.8720916735097218, + "learning_rate": 9.691357454161076e-06, + "loss": 0.2473, + "step": 46800 + }, + { + "epoch": 0.6037000973709834, + "grad_norm": 0.8935236330132001, + "learning_rate": 9.691097871837153e-06, + "loss": 0.253, + "step": 46810 + }, + { + "epoch": 0.6038290655609794, + "grad_norm": 0.9000841754046082, + "learning_rate": 9.690838183878296e-06, + "loss": 0.235, + "step": 46820 + }, + { + "epoch": 0.6039580337509753, + "grad_norm": 0.8048964600688728, + "learning_rate": 9.69057839029035e-06, + "loss": 0.2398, + "step": 46830 + }, + { + "epoch": 0.6040870019409713, + "grad_norm": 0.8236333465161056, + "learning_rate": 9.690318491079171e-06, + "loss": 0.241, + "step": 46840 + }, + { + "epoch": 0.6042159701309672, + "grad_norm": 0.9209739704010036, + "learning_rate": 9.69005848625061e-06, + "loss": 0.2384, + "step": 46850 + }, + { + "epoch": 0.6043449383209631, + "grad_norm": 0.9185618486816345, + "learning_rate": 9.689798375810517e-06, + "loss": 0.2512, + "step": 46860 + }, + { + "epoch": 0.6044739065109591, + "grad_norm": 0.9445871142072764, + "learning_rate": 9.689538159764756e-06, + "loss": 0.2424, + "step": 46870 + }, + { + "epoch": 0.604602874700955, + "grad_norm": 0.899949167301755, + "learning_rate": 9.689277838119182e-06, + "loss": 0.2477, + "step": 46880 + }, + { + "epoch": 0.6047318428909509, + "grad_norm": 0.9475502163055673, + "learning_rate": 9.689017410879658e-06, + "loss": 0.2306, + "step": 46890 + }, + { + "epoch": 0.6048608110809469, + "grad_norm": 0.8513443690606325, + "learning_rate": 9.688756878052048e-06, + "loss": 0.235, + "step": 46900 + }, + { + "epoch": 0.6049897792709428, + "grad_norm": 0.8958931606867658, + "learning_rate": 9.68849623964222e-06, + "loss": 0.248, + "step": 46910 + }, + { + "epoch": 0.6051187474609387, + "grad_norm": 0.9921090605628679, + "learning_rate": 9.688235495656043e-06, + "loss": 0.2557, + "step": 46920 + }, + { + "epoch": 0.6052477156509347, + "grad_norm": 0.9153189121736057, + "learning_rate": 9.687974646099387e-06, + "loss": 0.2384, + "step": 46930 + }, + { + "epoch": 0.6053766838409307, + "grad_norm": 0.9595422670109821, + "learning_rate": 9.687713690978127e-06, + "loss": 0.2302, + "step": 46940 + }, + { + "epoch": 0.6055056520309265, + "grad_norm": 0.9403397817855502, + "learning_rate": 9.68745263029814e-06, + "loss": 0.243, + "step": 46950 + }, + { + "epoch": 0.6056346202209225, + "grad_norm": 0.9563713740548524, + "learning_rate": 9.687191464065303e-06, + "loss": 0.2359, + "step": 46960 + }, + { + "epoch": 0.6057635884109185, + "grad_norm": 0.943859164202391, + "learning_rate": 9.686930192285497e-06, + "loss": 0.2536, + "step": 46970 + }, + { + "epoch": 0.6058925566009143, + "grad_norm": 0.9347692167292072, + "learning_rate": 9.686668814964608e-06, + "loss": 0.2384, + "step": 46980 + }, + { + "epoch": 0.6060215247909103, + "grad_norm": 0.9600093267226504, + "learning_rate": 9.686407332108519e-06, + "loss": 0.2395, + "step": 46990 + }, + { + "epoch": 0.6061504929809063, + "grad_norm": 1.0024083746065249, + "learning_rate": 9.686145743723117e-06, + "loss": 0.2575, + "step": 47000 + }, + { + "epoch": 0.6062794611709021, + "grad_norm": 0.8830613600046356, + "learning_rate": 9.685884049814295e-06, + "loss": 0.2333, + "step": 47010 + }, + { + "epoch": 0.6064084293608981, + "grad_norm": 0.8772316804008117, + "learning_rate": 9.685622250387946e-06, + "loss": 0.2409, + "step": 47020 + }, + { + "epoch": 0.6065373975508941, + "grad_norm": 1.0182407938953646, + "learning_rate": 9.685360345449964e-06, + "loss": 0.2486, + "step": 47030 + }, + { + "epoch": 0.6066663657408901, + "grad_norm": 0.9146922348775202, + "learning_rate": 9.685098335006248e-06, + "loss": 0.2494, + "step": 47040 + }, + { + "epoch": 0.6067953339308859, + "grad_norm": 0.8707236898982035, + "learning_rate": 9.684836219062696e-06, + "loss": 0.2617, + "step": 47050 + }, + { + "epoch": 0.6069243021208819, + "grad_norm": 0.9179826753545232, + "learning_rate": 9.684573997625212e-06, + "loss": 0.2401, + "step": 47060 + }, + { + "epoch": 0.6070532703108779, + "grad_norm": 0.9901649348402106, + "learning_rate": 9.6843116706997e-06, + "loss": 0.246, + "step": 47070 + }, + { + "epoch": 0.6071822385008737, + "grad_norm": 1.1762726815470148, + "learning_rate": 9.684049238292067e-06, + "loss": 0.2497, + "step": 47080 + }, + { + "epoch": 0.6073112066908697, + "grad_norm": 0.9049704477552122, + "learning_rate": 9.683786700408222e-06, + "loss": 0.2493, + "step": 47090 + }, + { + "epoch": 0.6074401748808657, + "grad_norm": 0.8109334196581032, + "learning_rate": 9.68352405705408e-06, + "loss": 0.2483, + "step": 47100 + }, + { + "epoch": 0.6075691430708615, + "grad_norm": 0.9668018048315502, + "learning_rate": 9.683261308235551e-06, + "loss": 0.2426, + "step": 47110 + }, + { + "epoch": 0.6076981112608575, + "grad_norm": 0.9172841745621326, + "learning_rate": 9.682998453958555e-06, + "loss": 0.2502, + "step": 47120 + }, + { + "epoch": 0.6078270794508535, + "grad_norm": 0.9761069437257047, + "learning_rate": 9.682735494229009e-06, + "loss": 0.2409, + "step": 47130 + }, + { + "epoch": 0.6079560476408494, + "grad_norm": 0.8231726737436704, + "learning_rate": 9.682472429052834e-06, + "loss": 0.2386, + "step": 47140 + }, + { + "epoch": 0.6080850158308453, + "grad_norm": 0.9029891860967799, + "learning_rate": 9.682209258435956e-06, + "loss": 0.2371, + "step": 47150 + }, + { + "epoch": 0.6082139840208413, + "grad_norm": 0.8222601590661713, + "learning_rate": 9.681945982384299e-06, + "loss": 0.2448, + "step": 47160 + }, + { + "epoch": 0.6083429522108372, + "grad_norm": 0.9378210776533222, + "learning_rate": 9.681682600903792e-06, + "loss": 0.2558, + "step": 47170 + }, + { + "epoch": 0.6084719204008331, + "grad_norm": 0.8127893849082016, + "learning_rate": 9.681419114000366e-06, + "loss": 0.2387, + "step": 47180 + }, + { + "epoch": 0.6086008885908291, + "grad_norm": 0.9193579486435004, + "learning_rate": 9.681155521679955e-06, + "loss": 0.2487, + "step": 47190 + }, + { + "epoch": 0.608729856780825, + "grad_norm": 0.9801027281396898, + "learning_rate": 9.680891823948495e-06, + "loss": 0.2345, + "step": 47200 + }, + { + "epoch": 0.608858824970821, + "grad_norm": 0.9483091119144256, + "learning_rate": 9.680628020811921e-06, + "loss": 0.2528, + "step": 47210 + }, + { + "epoch": 0.6089877931608169, + "grad_norm": 0.9000874086359788, + "learning_rate": 9.680364112276177e-06, + "loss": 0.25, + "step": 47220 + }, + { + "epoch": 0.6091167613508128, + "grad_norm": 0.9020685092133015, + "learning_rate": 9.680100098347202e-06, + "loss": 0.2393, + "step": 47230 + }, + { + "epoch": 0.6092457295408088, + "grad_norm": 0.9896155649053582, + "learning_rate": 9.679835979030944e-06, + "loss": 0.2589, + "step": 47240 + }, + { + "epoch": 0.6093746977308047, + "grad_norm": 0.9252042246907902, + "learning_rate": 9.679571754333352e-06, + "loss": 0.2338, + "step": 47250 + }, + { + "epoch": 0.6095036659208006, + "grad_norm": 0.9637300097415333, + "learning_rate": 9.679307424260372e-06, + "loss": 0.2426, + "step": 47260 + }, + { + "epoch": 0.6096326341107966, + "grad_norm": 1.0509520336392213, + "learning_rate": 9.679042988817957e-06, + "loss": 0.2459, + "step": 47270 + }, + { + "epoch": 0.6097616023007925, + "grad_norm": 0.9548824834209904, + "learning_rate": 9.678778448012063e-06, + "loss": 0.2367, + "step": 47280 + }, + { + "epoch": 0.6098905704907884, + "grad_norm": 0.8115703439706732, + "learning_rate": 9.678513801848645e-06, + "loss": 0.238, + "step": 47290 + }, + { + "epoch": 0.6100195386807844, + "grad_norm": 0.8773037110837472, + "learning_rate": 9.678249050333666e-06, + "loss": 0.2422, + "step": 47300 + }, + { + "epoch": 0.6101485068707804, + "grad_norm": 0.9628619567378109, + "learning_rate": 9.677984193473085e-06, + "loss": 0.2486, + "step": 47310 + }, + { + "epoch": 0.6102774750607762, + "grad_norm": 0.9110392255579257, + "learning_rate": 9.677719231272865e-06, + "loss": 0.2416, + "step": 47320 + }, + { + "epoch": 0.6104064432507722, + "grad_norm": 0.9342774560326872, + "learning_rate": 9.677454163738974e-06, + "loss": 0.2464, + "step": 47330 + }, + { + "epoch": 0.6105354114407682, + "grad_norm": 0.8753209032264397, + "learning_rate": 9.677188990877382e-06, + "loss": 0.2376, + "step": 47340 + }, + { + "epoch": 0.610664379630764, + "grad_norm": 0.9544930549707448, + "learning_rate": 9.676923712694059e-06, + "loss": 0.2413, + "step": 47350 + }, + { + "epoch": 0.61079334782076, + "grad_norm": 0.9214189677263753, + "learning_rate": 9.67665832919498e-06, + "loss": 0.2325, + "step": 47360 + }, + { + "epoch": 0.610922316010756, + "grad_norm": 0.9225716813838306, + "learning_rate": 9.676392840386115e-06, + "loss": 0.2361, + "step": 47370 + }, + { + "epoch": 0.6110512842007518, + "grad_norm": 0.9280857065142507, + "learning_rate": 9.67612724627345e-06, + "loss": 0.2484, + "step": 47380 + }, + { + "epoch": 0.6111802523907478, + "grad_norm": 0.9708703256036711, + "learning_rate": 9.675861546862963e-06, + "loss": 0.2462, + "step": 47390 + }, + { + "epoch": 0.6113092205807438, + "grad_norm": 0.9669113646162717, + "learning_rate": 9.675595742160635e-06, + "loss": 0.23, + "step": 47400 + }, + { + "epoch": 0.6114381887707397, + "grad_norm": 0.9322426065445902, + "learning_rate": 9.675329832172454e-06, + "loss": 0.2478, + "step": 47410 + }, + { + "epoch": 0.6115671569607356, + "grad_norm": 0.8480695885073709, + "learning_rate": 9.675063816904406e-06, + "loss": 0.2335, + "step": 47420 + }, + { + "epoch": 0.6116961251507316, + "grad_norm": 0.8667015810387845, + "learning_rate": 9.674797696362484e-06, + "loss": 0.255, + "step": 47430 + }, + { + "epoch": 0.6118250933407275, + "grad_norm": 1.0142005498801143, + "learning_rate": 9.674531470552677e-06, + "loss": 0.2467, + "step": 47440 + }, + { + "epoch": 0.6119540615307234, + "grad_norm": 0.90519365162705, + "learning_rate": 9.674265139480982e-06, + "loss": 0.2422, + "step": 47450 + }, + { + "epoch": 0.6120830297207194, + "grad_norm": 0.9401095596874158, + "learning_rate": 9.673998703153395e-06, + "loss": 0.2562, + "step": 47460 + }, + { + "epoch": 0.6122119979107153, + "grad_norm": 0.997504010565957, + "learning_rate": 9.673732161575917e-06, + "loss": 0.2578, + "step": 47470 + }, + { + "epoch": 0.6123409661007113, + "grad_norm": 0.8793614501549643, + "learning_rate": 9.67346551475455e-06, + "loss": 0.2271, + "step": 47480 + }, + { + "epoch": 0.6124699342907072, + "grad_norm": 0.8849287303759836, + "learning_rate": 9.673198762695297e-06, + "loss": 0.24, + "step": 47490 + }, + { + "epoch": 0.6125989024807031, + "grad_norm": 0.9801884961754913, + "learning_rate": 9.672931905404166e-06, + "loss": 0.2364, + "step": 47500 + }, + { + "epoch": 0.6127278706706991, + "grad_norm": 0.9311994421735004, + "learning_rate": 9.672664942887166e-06, + "loss": 0.2237, + "step": 47510 + }, + { + "epoch": 0.612856838860695, + "grad_norm": 0.8956236676279736, + "learning_rate": 9.672397875150308e-06, + "loss": 0.242, + "step": 47520 + }, + { + "epoch": 0.612985807050691, + "grad_norm": 0.9891203015008257, + "learning_rate": 9.672130702199607e-06, + "loss": 0.2367, + "step": 47530 + }, + { + "epoch": 0.6131147752406869, + "grad_norm": 1.0443931658444072, + "learning_rate": 9.671863424041077e-06, + "loss": 0.2377, + "step": 47540 + }, + { + "epoch": 0.6132437434306828, + "grad_norm": 0.9588684948851518, + "learning_rate": 9.671596040680739e-06, + "loss": 0.2437, + "step": 47550 + }, + { + "epoch": 0.6133727116206787, + "grad_norm": 0.9398669680531122, + "learning_rate": 9.671328552124613e-06, + "loss": 0.2527, + "step": 47560 + }, + { + "epoch": 0.6135016798106747, + "grad_norm": 0.8937319943065953, + "learning_rate": 9.671060958378722e-06, + "loss": 0.2477, + "step": 47570 + }, + { + "epoch": 0.6136306480006707, + "grad_norm": 0.8927107956223522, + "learning_rate": 9.670793259449092e-06, + "loss": 0.2363, + "step": 47580 + }, + { + "epoch": 0.6137596161906665, + "grad_norm": 0.8808563476259758, + "learning_rate": 9.670525455341751e-06, + "loss": 0.245, + "step": 47590 + }, + { + "epoch": 0.6138885843806625, + "grad_norm": 1.040163202650026, + "learning_rate": 9.670257546062729e-06, + "loss": 0.2411, + "step": 47600 + }, + { + "epoch": 0.6140175525706585, + "grad_norm": 0.8727962685080299, + "learning_rate": 9.669989531618061e-06, + "loss": 0.2305, + "step": 47610 + }, + { + "epoch": 0.6141465207606543, + "grad_norm": 0.7848753753835094, + "learning_rate": 9.669721412013779e-06, + "loss": 0.2372, + "step": 47620 + }, + { + "epoch": 0.6142754889506503, + "grad_norm": 0.8329082177075611, + "learning_rate": 9.669453187255924e-06, + "loss": 0.2337, + "step": 47630 + }, + { + "epoch": 0.6144044571406463, + "grad_norm": 0.9251412241334299, + "learning_rate": 9.669184857350534e-06, + "loss": 0.2443, + "step": 47640 + }, + { + "epoch": 0.6145334253306421, + "grad_norm": 0.92102954329784, + "learning_rate": 9.668916422303652e-06, + "loss": 0.2418, + "step": 47650 + }, + { + "epoch": 0.6146623935206381, + "grad_norm": 0.9970902406843436, + "learning_rate": 9.66864788212132e-06, + "loss": 0.2464, + "step": 47660 + }, + { + "epoch": 0.6147913617106341, + "grad_norm": 0.9974360499797937, + "learning_rate": 9.668379236809588e-06, + "loss": 0.2319, + "step": 47670 + }, + { + "epoch": 0.6149203299006301, + "grad_norm": 0.9553333009967167, + "learning_rate": 9.668110486374505e-06, + "loss": 0.2314, + "step": 47680 + }, + { + "epoch": 0.6150492980906259, + "grad_norm": 0.9139341880804867, + "learning_rate": 9.66784163082212e-06, + "loss": 0.2378, + "step": 47690 + }, + { + "epoch": 0.6151782662806219, + "grad_norm": 0.9619936853010038, + "learning_rate": 9.667572670158492e-06, + "loss": 0.2284, + "step": 47700 + }, + { + "epoch": 0.6153072344706179, + "grad_norm": 0.8017526386822298, + "learning_rate": 9.667303604389675e-06, + "loss": 0.2411, + "step": 47710 + }, + { + "epoch": 0.6154362026606137, + "grad_norm": 0.9826978567459691, + "learning_rate": 9.667034433521727e-06, + "loss": 0.2499, + "step": 47720 + }, + { + "epoch": 0.6155651708506097, + "grad_norm": 0.8600494913446758, + "learning_rate": 9.66676515756071e-06, + "loss": 0.2366, + "step": 47730 + }, + { + "epoch": 0.6156941390406057, + "grad_norm": 0.9647804241071061, + "learning_rate": 9.666495776512688e-06, + "loss": 0.2303, + "step": 47740 + }, + { + "epoch": 0.6158231072306015, + "grad_norm": 0.9985080849540195, + "learning_rate": 9.666226290383726e-06, + "loss": 0.2387, + "step": 47750 + }, + { + "epoch": 0.6159520754205975, + "grad_norm": 1.000209232299648, + "learning_rate": 9.665956699179892e-06, + "loss": 0.2425, + "step": 47760 + }, + { + "epoch": 0.6160810436105935, + "grad_norm": 0.916412194340159, + "learning_rate": 9.66568700290726e-06, + "loss": 0.2484, + "step": 47770 + }, + { + "epoch": 0.6162100118005894, + "grad_norm": 0.9612841362076524, + "learning_rate": 9.6654172015719e-06, + "loss": 0.2552, + "step": 47780 + }, + { + "epoch": 0.6163389799905853, + "grad_norm": 0.8860910595808394, + "learning_rate": 9.665147295179888e-06, + "loss": 0.2551, + "step": 47790 + }, + { + "epoch": 0.6164679481805813, + "grad_norm": 0.9494970874555992, + "learning_rate": 9.664877283737301e-06, + "loss": 0.2502, + "step": 47800 + }, + { + "epoch": 0.6165969163705772, + "grad_norm": 0.9255287792503507, + "learning_rate": 9.664607167250221e-06, + "loss": 0.2448, + "step": 47810 + }, + { + "epoch": 0.6167258845605731, + "grad_norm": 0.9384927866491506, + "learning_rate": 9.66433694572473e-06, + "loss": 0.2301, + "step": 47820 + }, + { + "epoch": 0.6168548527505691, + "grad_norm": 0.9993428379487944, + "learning_rate": 9.664066619166912e-06, + "loss": 0.2482, + "step": 47830 + }, + { + "epoch": 0.616983820940565, + "grad_norm": 0.9700596743876473, + "learning_rate": 9.663796187582854e-06, + "loss": 0.2342, + "step": 47840 + }, + { + "epoch": 0.617112789130561, + "grad_norm": 0.9244726560893052, + "learning_rate": 9.663525650978649e-06, + "loss": 0.2497, + "step": 47850 + }, + { + "epoch": 0.6172417573205569, + "grad_norm": 0.8646191150002646, + "learning_rate": 9.663255009360385e-06, + "loss": 0.2412, + "step": 47860 + }, + { + "epoch": 0.6173707255105528, + "grad_norm": 0.9702100369554293, + "learning_rate": 9.662984262734157e-06, + "loss": 0.2321, + "step": 47870 + }, + { + "epoch": 0.6174996937005488, + "grad_norm": 0.9343400107459566, + "learning_rate": 9.662713411106062e-06, + "loss": 0.2348, + "step": 47880 + }, + { + "epoch": 0.6176286618905447, + "grad_norm": 0.9282084898383688, + "learning_rate": 9.662442454482202e-06, + "loss": 0.2326, + "step": 47890 + }, + { + "epoch": 0.6177576300805406, + "grad_norm": 0.9029442375113781, + "learning_rate": 9.662171392868673e-06, + "loss": 0.242, + "step": 47900 + }, + { + "epoch": 0.6178865982705366, + "grad_norm": 0.9308433499794586, + "learning_rate": 9.661900226271584e-06, + "loss": 0.2462, + "step": 47910 + }, + { + "epoch": 0.6180155664605325, + "grad_norm": 0.8923247578486652, + "learning_rate": 9.661628954697038e-06, + "loss": 0.2395, + "step": 47920 + }, + { + "epoch": 0.6181445346505284, + "grad_norm": 0.8548180302981618, + "learning_rate": 9.661357578151145e-06, + "loss": 0.244, + "step": 47930 + }, + { + "epoch": 0.6182735028405244, + "grad_norm": 0.9684643475220452, + "learning_rate": 9.661086096640015e-06, + "loss": 0.2448, + "step": 47940 + }, + { + "epoch": 0.6184024710305204, + "grad_norm": 0.8923305844045013, + "learning_rate": 9.660814510169763e-06, + "loss": 0.2449, + "step": 47950 + }, + { + "epoch": 0.6185314392205162, + "grad_norm": 0.9501482400504037, + "learning_rate": 9.6605428187465e-06, + "loss": 0.2302, + "step": 47960 + }, + { + "epoch": 0.6186604074105122, + "grad_norm": 0.9748373056070148, + "learning_rate": 9.660271022376351e-06, + "loss": 0.2529, + "step": 47970 + }, + { + "epoch": 0.6187893756005082, + "grad_norm": 0.8904802747873807, + "learning_rate": 9.659999121065431e-06, + "loss": 0.2385, + "step": 47980 + }, + { + "epoch": 0.618918343790504, + "grad_norm": 0.8318109363649712, + "learning_rate": 9.659727114819865e-06, + "loss": 0.2403, + "step": 47990 + }, + { + "epoch": 0.6190473119805, + "grad_norm": 0.884633156830653, + "learning_rate": 9.659455003645777e-06, + "loss": 0.2328, + "step": 48000 + }, + { + "epoch": 0.619176280170496, + "grad_norm": 0.9706444477815508, + "learning_rate": 9.659182787549294e-06, + "loss": 0.2388, + "step": 48010 + }, + { + "epoch": 0.6193052483604918, + "grad_norm": 0.9206898356379083, + "learning_rate": 9.65891046653655e-06, + "loss": 0.2527, + "step": 48020 + }, + { + "epoch": 0.6194342165504878, + "grad_norm": 0.8336081652812539, + "learning_rate": 9.658638040613671e-06, + "loss": 0.2361, + "step": 48030 + }, + { + "epoch": 0.6195631847404838, + "grad_norm": 0.9280140704406333, + "learning_rate": 9.658365509786794e-06, + "loss": 0.2447, + "step": 48040 + }, + { + "epoch": 0.6196921529304797, + "grad_norm": 0.9015920556220043, + "learning_rate": 9.658092874062058e-06, + "loss": 0.2452, + "step": 48050 + }, + { + "epoch": 0.6198211211204756, + "grad_norm": 0.9907309676923707, + "learning_rate": 9.6578201334456e-06, + "loss": 0.2427, + "step": 48060 + }, + { + "epoch": 0.6199500893104716, + "grad_norm": 0.974919898910003, + "learning_rate": 9.657547287943562e-06, + "loss": 0.2469, + "step": 48070 + }, + { + "epoch": 0.6200790575004675, + "grad_norm": 0.9679104517864502, + "learning_rate": 9.657274337562088e-06, + "loss": 0.2483, + "step": 48080 + }, + { + "epoch": 0.6202080256904634, + "grad_norm": 0.9670290755797529, + "learning_rate": 9.657001282307325e-06, + "loss": 0.2598, + "step": 48090 + }, + { + "epoch": 0.6203369938804594, + "grad_norm": 0.9174294446524367, + "learning_rate": 9.656728122185421e-06, + "loss": 0.2511, + "step": 48100 + }, + { + "epoch": 0.6204659620704553, + "grad_norm": 0.8833656520539249, + "learning_rate": 9.656454857202529e-06, + "loss": 0.2454, + "step": 48110 + }, + { + "epoch": 0.6205949302604512, + "grad_norm": 0.993031383565587, + "learning_rate": 9.6561814873648e-06, + "loss": 0.2463, + "step": 48120 + }, + { + "epoch": 0.6207238984504472, + "grad_norm": 0.9645693139925939, + "learning_rate": 9.65590801267839e-06, + "loss": 0.2483, + "step": 48130 + }, + { + "epoch": 0.6208528666404431, + "grad_norm": 0.9262614446853259, + "learning_rate": 9.655634433149457e-06, + "loss": 0.2562, + "step": 48140 + }, + { + "epoch": 0.6209818348304391, + "grad_norm": 0.9156255283105448, + "learning_rate": 9.655360748784162e-06, + "loss": 0.2396, + "step": 48150 + }, + { + "epoch": 0.621110803020435, + "grad_norm": 0.950086741550899, + "learning_rate": 9.65508695958867e-06, + "loss": 0.2437, + "step": 48160 + }, + { + "epoch": 0.621239771210431, + "grad_norm": 0.8971583521252183, + "learning_rate": 9.654813065569142e-06, + "loss": 0.2564, + "step": 48170 + }, + { + "epoch": 0.6213687394004269, + "grad_norm": 1.0053679888863174, + "learning_rate": 9.654539066731748e-06, + "loss": 0.2459, + "step": 48180 + }, + { + "epoch": 0.6214977075904228, + "grad_norm": 0.9007103232502943, + "learning_rate": 9.65426496308266e-06, + "loss": 0.2546, + "step": 48190 + }, + { + "epoch": 0.6216266757804187, + "grad_norm": 0.8484710708378479, + "learning_rate": 9.653990754628046e-06, + "loss": 0.2493, + "step": 48200 + }, + { + "epoch": 0.6217556439704147, + "grad_norm": 0.8235425964574302, + "learning_rate": 9.653716441374084e-06, + "loss": 0.2336, + "step": 48210 + }, + { + "epoch": 0.6218846121604107, + "grad_norm": 0.8996728755205978, + "learning_rate": 9.653442023326948e-06, + "loss": 0.2409, + "step": 48220 + }, + { + "epoch": 0.6220135803504065, + "grad_norm": 0.9125081077723608, + "learning_rate": 9.653167500492821e-06, + "loss": 0.247, + "step": 48230 + }, + { + "epoch": 0.6221425485404025, + "grad_norm": 0.9698197361778715, + "learning_rate": 9.652892872877884e-06, + "loss": 0.2514, + "step": 48240 + }, + { + "epoch": 0.6222715167303985, + "grad_norm": 1.0137873047684638, + "learning_rate": 9.652618140488318e-06, + "loss": 0.2448, + "step": 48250 + }, + { + "epoch": 0.6224004849203943, + "grad_norm": 0.8819059931596682, + "learning_rate": 9.652343303330311e-06, + "loss": 0.2431, + "step": 48260 + }, + { + "epoch": 0.6225294531103903, + "grad_norm": 0.9229853445548983, + "learning_rate": 9.652068361410054e-06, + "loss": 0.2315, + "step": 48270 + }, + { + "epoch": 0.6226584213003863, + "grad_norm": 0.998396401533269, + "learning_rate": 9.651793314733737e-06, + "loss": 0.2459, + "step": 48280 + }, + { + "epoch": 0.6227873894903821, + "grad_norm": 0.8563406463726295, + "learning_rate": 9.651518163307552e-06, + "loss": 0.2475, + "step": 48290 + }, + { + "epoch": 0.6229163576803781, + "grad_norm": 0.9188383515541498, + "learning_rate": 9.651242907137697e-06, + "loss": 0.2348, + "step": 48300 + }, + { + "epoch": 0.6230453258703741, + "grad_norm": 0.9720943697963241, + "learning_rate": 9.65096754623037e-06, + "loss": 0.2366, + "step": 48310 + }, + { + "epoch": 0.6231742940603701, + "grad_norm": 1.1589604877703024, + "learning_rate": 9.650692080591768e-06, + "loss": 0.246, + "step": 48320 + }, + { + "epoch": 0.6233032622503659, + "grad_norm": 0.8946713525588492, + "learning_rate": 9.650416510228099e-06, + "loss": 0.2543, + "step": 48330 + }, + { + "epoch": 0.6234322304403619, + "grad_norm": 0.9574237864492187, + "learning_rate": 9.650140835145565e-06, + "loss": 0.2499, + "step": 48340 + }, + { + "epoch": 0.6235611986303579, + "grad_norm": 1.0149614405856495, + "learning_rate": 9.649865055350377e-06, + "loss": 0.2488, + "step": 48350 + }, + { + "epoch": 0.6236901668203537, + "grad_norm": 0.9920597647154977, + "learning_rate": 9.64958917084874e-06, + "loss": 0.2504, + "step": 48360 + }, + { + "epoch": 0.6238191350103497, + "grad_norm": 0.9256785978758596, + "learning_rate": 9.649313181646873e-06, + "loss": 0.2478, + "step": 48370 + }, + { + "epoch": 0.6239481032003457, + "grad_norm": 0.9118156466496876, + "learning_rate": 9.649037087750985e-06, + "loss": 0.244, + "step": 48380 + }, + { + "epoch": 0.6240770713903415, + "grad_norm": 0.9552453976230871, + "learning_rate": 9.648760889167296e-06, + "loss": 0.2455, + "step": 48390 + }, + { + "epoch": 0.6242060395803375, + "grad_norm": 0.8227885237310326, + "learning_rate": 9.648484585902024e-06, + "loss": 0.2406, + "step": 48400 + }, + { + "epoch": 0.6243350077703335, + "grad_norm": 0.9036873482937389, + "learning_rate": 9.648208177961392e-06, + "loss": 0.2454, + "step": 48410 + }, + { + "epoch": 0.6244639759603294, + "grad_norm": 0.8547922791881778, + "learning_rate": 9.647931665351625e-06, + "loss": 0.246, + "step": 48420 + }, + { + "epoch": 0.6245929441503253, + "grad_norm": 0.936237326264674, + "learning_rate": 9.647655048078946e-06, + "loss": 0.2398, + "step": 48430 + }, + { + "epoch": 0.6247219123403213, + "grad_norm": 1.0576435506788395, + "learning_rate": 9.647378326149587e-06, + "loss": 0.2206, + "step": 48440 + }, + { + "epoch": 0.6248508805303172, + "grad_norm": 0.8709945448580062, + "learning_rate": 9.64710149956978e-06, + "loss": 0.2436, + "step": 48450 + }, + { + "epoch": 0.6249798487203131, + "grad_norm": 0.9432950004001693, + "learning_rate": 9.646824568345758e-06, + "loss": 0.2464, + "step": 48460 + }, + { + "epoch": 0.6251088169103091, + "grad_norm": 1.0954346426306725, + "learning_rate": 9.646547532483754e-06, + "loss": 0.2444, + "step": 48470 + }, + { + "epoch": 0.625237785100305, + "grad_norm": 1.008388013867771, + "learning_rate": 9.64627039199001e-06, + "loss": 0.2432, + "step": 48480 + }, + { + "epoch": 0.6253667532903009, + "grad_norm": 0.947543201197995, + "learning_rate": 9.645993146870764e-06, + "loss": 0.2507, + "step": 48490 + }, + { + "epoch": 0.6254957214802969, + "grad_norm": 1.0197084587394438, + "learning_rate": 9.64571579713226e-06, + "loss": 0.2426, + "step": 48500 + }, + { + "epoch": 0.6256246896702928, + "grad_norm": 0.8756807675623162, + "learning_rate": 9.645438342780745e-06, + "loss": 0.2449, + "step": 48510 + }, + { + "epoch": 0.6257536578602888, + "grad_norm": 0.9379273950660294, + "learning_rate": 9.645160783822464e-06, + "loss": 0.2443, + "step": 48520 + }, + { + "epoch": 0.6258826260502847, + "grad_norm": 0.9164735757314377, + "learning_rate": 9.64488312026367e-06, + "loss": 0.2367, + "step": 48530 + }, + { + "epoch": 0.6260115942402806, + "grad_norm": 0.9920763189280319, + "learning_rate": 9.644605352110615e-06, + "loss": 0.2393, + "step": 48540 + }, + { + "epoch": 0.6261405624302766, + "grad_norm": 0.8634460134416921, + "learning_rate": 9.64432747936955e-06, + "loss": 0.2351, + "step": 48550 + }, + { + "epoch": 0.6262695306202725, + "grad_norm": 0.9376834981659584, + "learning_rate": 9.644049502046738e-06, + "loss": 0.2329, + "step": 48560 + }, + { + "epoch": 0.6263984988102684, + "grad_norm": 0.8654391139102491, + "learning_rate": 9.643771420148433e-06, + "loss": 0.2347, + "step": 48570 + }, + { + "epoch": 0.6265274670002644, + "grad_norm": 0.8945999626897271, + "learning_rate": 9.6434932336809e-06, + "loss": 0.2565, + "step": 48580 + }, + { + "epoch": 0.6266564351902604, + "grad_norm": 0.9641197178375885, + "learning_rate": 9.643214942650403e-06, + "loss": 0.238, + "step": 48590 + }, + { + "epoch": 0.6267854033802562, + "grad_norm": 0.9427577585936728, + "learning_rate": 9.642936547063209e-06, + "loss": 0.2278, + "step": 48600 + }, + { + "epoch": 0.6269143715702522, + "grad_norm": 0.9382323220967252, + "learning_rate": 9.642658046925586e-06, + "loss": 0.2459, + "step": 48610 + }, + { + "epoch": 0.6270433397602482, + "grad_norm": 0.9475303645596888, + "learning_rate": 9.642379442243807e-06, + "loss": 0.2511, + "step": 48620 + }, + { + "epoch": 0.627172307950244, + "grad_norm": 0.9077957019841458, + "learning_rate": 9.642100733024143e-06, + "loss": 0.2367, + "step": 48630 + }, + { + "epoch": 0.62730127614024, + "grad_norm": 0.933701579856268, + "learning_rate": 9.641821919272871e-06, + "loss": 0.2316, + "step": 48640 + }, + { + "epoch": 0.627430244330236, + "grad_norm": 0.9866972906492377, + "learning_rate": 9.64154300099627e-06, + "loss": 0.2405, + "step": 48650 + }, + { + "epoch": 0.6275592125202318, + "grad_norm": 0.91820568947504, + "learning_rate": 9.641263978200621e-06, + "loss": 0.237, + "step": 48660 + }, + { + "epoch": 0.6276881807102278, + "grad_norm": 0.9785178602229162, + "learning_rate": 9.640984850892205e-06, + "loss": 0.2415, + "step": 48670 + }, + { + "epoch": 0.6278171489002238, + "grad_norm": 0.8537355200970609, + "learning_rate": 9.64070561907731e-06, + "loss": 0.2387, + "step": 48680 + }, + { + "epoch": 0.6279461170902197, + "grad_norm": 1.0211890318494201, + "learning_rate": 9.640426282762223e-06, + "loss": 0.2289, + "step": 48690 + }, + { + "epoch": 0.6280750852802156, + "grad_norm": 0.9147079214408892, + "learning_rate": 9.640146841953233e-06, + "loss": 0.2441, + "step": 48700 + }, + { + "epoch": 0.6282040534702116, + "grad_norm": 0.9097953561121069, + "learning_rate": 9.639867296656634e-06, + "loss": 0.2386, + "step": 48710 + }, + { + "epoch": 0.6283330216602075, + "grad_norm": 1.060909160337207, + "learning_rate": 9.63958764687872e-06, + "loss": 0.237, + "step": 48720 + }, + { + "epoch": 0.6284619898502034, + "grad_norm": 1.0085602340228401, + "learning_rate": 9.639307892625788e-06, + "loss": 0.2448, + "step": 48730 + }, + { + "epoch": 0.6285909580401994, + "grad_norm": 0.9763364304377462, + "learning_rate": 9.639028033904139e-06, + "loss": 0.2383, + "step": 48740 + }, + { + "epoch": 0.6287199262301953, + "grad_norm": 0.9990420693881426, + "learning_rate": 9.638748070720074e-06, + "loss": 0.2392, + "step": 48750 + }, + { + "epoch": 0.6288488944201912, + "grad_norm": 0.9414767729813439, + "learning_rate": 9.638468003079897e-06, + "loss": 0.2537, + "step": 48760 + }, + { + "epoch": 0.6289778626101872, + "grad_norm": 0.8233029337912559, + "learning_rate": 9.638187830989914e-06, + "loss": 0.227, + "step": 48770 + }, + { + "epoch": 0.6291068308001831, + "grad_norm": 0.9483791577409045, + "learning_rate": 9.637907554456434e-06, + "loss": 0.2428, + "step": 48780 + }, + { + "epoch": 0.6292357989901791, + "grad_norm": 0.9795209448783274, + "learning_rate": 9.637627173485771e-06, + "loss": 0.2449, + "step": 48790 + }, + { + "epoch": 0.629364767180175, + "grad_norm": 0.8661834594063248, + "learning_rate": 9.637346688084234e-06, + "loss": 0.2392, + "step": 48800 + }, + { + "epoch": 0.629493735370171, + "grad_norm": 0.9862771814077269, + "learning_rate": 9.637066098258146e-06, + "loss": 0.2489, + "step": 48810 + }, + { + "epoch": 0.6296227035601669, + "grad_norm": 0.8968899927534077, + "learning_rate": 9.636785404013817e-06, + "loss": 0.2348, + "step": 48820 + }, + { + "epoch": 0.6297516717501628, + "grad_norm": 0.9115345189027092, + "learning_rate": 9.636504605357572e-06, + "loss": 0.2393, + "step": 48830 + }, + { + "epoch": 0.6298806399401587, + "grad_norm": 0.979657961471723, + "learning_rate": 9.636223702295737e-06, + "loss": 0.2395, + "step": 48840 + }, + { + "epoch": 0.6300096081301547, + "grad_norm": 0.9049144984374041, + "learning_rate": 9.635942694834632e-06, + "loss": 0.2315, + "step": 48850 + }, + { + "epoch": 0.6301385763201507, + "grad_norm": 0.9803569525891883, + "learning_rate": 9.635661582980587e-06, + "loss": 0.2439, + "step": 48860 + }, + { + "epoch": 0.6302675445101465, + "grad_norm": 0.9339952103574355, + "learning_rate": 9.635380366739933e-06, + "loss": 0.2452, + "step": 48870 + }, + { + "epoch": 0.6303965127001425, + "grad_norm": 0.943864131077914, + "learning_rate": 9.635099046119e-06, + "loss": 0.2317, + "step": 48880 + }, + { + "epoch": 0.6305254808901385, + "grad_norm": 0.8301438965814739, + "learning_rate": 9.634817621124127e-06, + "loss": 0.2342, + "step": 48890 + }, + { + "epoch": 0.6306544490801343, + "grad_norm": 0.996341109040121, + "learning_rate": 9.634536091761648e-06, + "loss": 0.2344, + "step": 48900 + }, + { + "epoch": 0.6307834172701303, + "grad_norm": 0.8957138416348702, + "learning_rate": 9.634254458037902e-06, + "loss": 0.2453, + "step": 48910 + }, + { + "epoch": 0.6309123854601263, + "grad_norm": 0.8854200778279862, + "learning_rate": 9.633972719959233e-06, + "loss": 0.245, + "step": 48920 + }, + { + "epoch": 0.6310413536501221, + "grad_norm": 0.8796294689981929, + "learning_rate": 9.633690877531985e-06, + "loss": 0.2361, + "step": 48930 + }, + { + "epoch": 0.6311703218401181, + "grad_norm": 0.9874780276485193, + "learning_rate": 9.633408930762502e-06, + "loss": 0.2532, + "step": 48940 + }, + { + "epoch": 0.6312992900301141, + "grad_norm": 0.8973897705755536, + "learning_rate": 9.633126879657135e-06, + "loss": 0.2321, + "step": 48950 + }, + { + "epoch": 0.6314282582201101, + "grad_norm": 0.7858558428315167, + "learning_rate": 9.632844724222237e-06, + "loss": 0.2317, + "step": 48960 + }, + { + "epoch": 0.6315572264101059, + "grad_norm": 0.8728736450741459, + "learning_rate": 9.632562464464156e-06, + "loss": 0.2285, + "step": 48970 + }, + { + "epoch": 0.6316861946001019, + "grad_norm": 0.9518271071607086, + "learning_rate": 9.632280100389255e-06, + "loss": 0.2434, + "step": 48980 + }, + { + "epoch": 0.6318151627900979, + "grad_norm": 0.9368108185131202, + "learning_rate": 9.631997632003888e-06, + "loss": 0.2418, + "step": 48990 + }, + { + "epoch": 0.6319441309800937, + "grad_norm": 0.8780142473339967, + "learning_rate": 9.631715059314418e-06, + "loss": 0.2465, + "step": 49000 + }, + { + "epoch": 0.6320730991700897, + "grad_norm": 0.9377977937302794, + "learning_rate": 9.631432382327204e-06, + "loss": 0.2443, + "step": 49010 + }, + { + "epoch": 0.6322020673600857, + "grad_norm": 0.8900846341048797, + "learning_rate": 9.631149601048617e-06, + "loss": 0.2354, + "step": 49020 + }, + { + "epoch": 0.6323310355500815, + "grad_norm": 0.9861883189565486, + "learning_rate": 9.630866715485019e-06, + "loss": 0.2491, + "step": 49030 + }, + { + "epoch": 0.6324600037400775, + "grad_norm": 0.8419321990776908, + "learning_rate": 9.630583725642784e-06, + "loss": 0.2389, + "step": 49040 + }, + { + "epoch": 0.6325889719300735, + "grad_norm": 0.9155147604058749, + "learning_rate": 9.630300631528284e-06, + "loss": 0.2296, + "step": 49050 + }, + { + "epoch": 0.6327179401200694, + "grad_norm": 0.9831190973737035, + "learning_rate": 9.630017433147893e-06, + "loss": 0.2391, + "step": 49060 + }, + { + "epoch": 0.6328469083100653, + "grad_norm": 0.9024301524528201, + "learning_rate": 9.629734130507985e-06, + "loss": 0.2387, + "step": 49070 + }, + { + "epoch": 0.6329758765000613, + "grad_norm": 0.9941929016588095, + "learning_rate": 9.629450723614946e-06, + "loss": 0.2377, + "step": 49080 + }, + { + "epoch": 0.6331048446900572, + "grad_norm": 0.9018307552438393, + "learning_rate": 9.629167212475153e-06, + "loss": 0.246, + "step": 49090 + }, + { + "epoch": 0.6332338128800531, + "grad_norm": 0.853150816891397, + "learning_rate": 9.628883597094992e-06, + "loss": 0.2276, + "step": 49100 + }, + { + "epoch": 0.6333627810700491, + "grad_norm": 0.8371498381765545, + "learning_rate": 9.62859987748085e-06, + "loss": 0.2395, + "step": 49110 + }, + { + "epoch": 0.633491749260045, + "grad_norm": 0.9323225365125036, + "learning_rate": 9.628316053639113e-06, + "loss": 0.2456, + "step": 49120 + }, + { + "epoch": 0.6336207174500409, + "grad_norm": 0.8021639216314806, + "learning_rate": 9.628032125576174e-06, + "loss": 0.2423, + "step": 49130 + }, + { + "epoch": 0.6337496856400369, + "grad_norm": 0.9008236411754831, + "learning_rate": 9.627748093298428e-06, + "loss": 0.2313, + "step": 49140 + }, + { + "epoch": 0.6338786538300328, + "grad_norm": 0.8802025766128805, + "learning_rate": 9.627463956812269e-06, + "loss": 0.2327, + "step": 49150 + }, + { + "epoch": 0.6340076220200288, + "grad_norm": 0.942376309073064, + "learning_rate": 9.627179716124096e-06, + "loss": 0.2308, + "step": 49160 + }, + { + "epoch": 0.6341365902100247, + "grad_norm": 0.954068577986626, + "learning_rate": 9.626895371240308e-06, + "loss": 0.2384, + "step": 49170 + }, + { + "epoch": 0.6342655584000206, + "grad_norm": 0.8291156958105693, + "learning_rate": 9.62661092216731e-06, + "loss": 0.2518, + "step": 49180 + }, + { + "epoch": 0.6343945265900166, + "grad_norm": 0.8605207726012747, + "learning_rate": 9.626326368911508e-06, + "loss": 0.2412, + "step": 49190 + }, + { + "epoch": 0.6345234947800125, + "grad_norm": 0.9421770719334773, + "learning_rate": 9.626041711479306e-06, + "loss": 0.259, + "step": 49200 + }, + { + "epoch": 0.6346524629700084, + "grad_norm": 0.879313291228772, + "learning_rate": 9.625756949877116e-06, + "loss": 0.2436, + "step": 49210 + }, + { + "epoch": 0.6347814311600044, + "grad_norm": 0.955914986998319, + "learning_rate": 9.625472084111352e-06, + "loss": 0.2391, + "step": 49220 + }, + { + "epoch": 0.6349103993500004, + "grad_norm": 0.9549115669502891, + "learning_rate": 9.625187114188425e-06, + "loss": 0.2504, + "step": 49230 + }, + { + "epoch": 0.6350393675399962, + "grad_norm": 0.9733978280089334, + "learning_rate": 9.624902040114756e-06, + "loss": 0.2454, + "step": 49240 + }, + { + "epoch": 0.6351683357299922, + "grad_norm": 0.8239052974757543, + "learning_rate": 9.624616861896762e-06, + "loss": 0.2271, + "step": 49250 + }, + { + "epoch": 0.6352973039199882, + "grad_norm": 0.9610136395520198, + "learning_rate": 9.624331579540864e-06, + "loss": 0.248, + "step": 49260 + }, + { + "epoch": 0.635426272109984, + "grad_norm": 0.9059419791954595, + "learning_rate": 9.624046193053488e-06, + "loss": 0.2345, + "step": 49270 + }, + { + "epoch": 0.63555524029998, + "grad_norm": 1.0573177855331681, + "learning_rate": 9.62376070244106e-06, + "loss": 0.2352, + "step": 49280 + }, + { + "epoch": 0.635684208489976, + "grad_norm": 0.9606862828938397, + "learning_rate": 9.623475107710007e-06, + "loss": 0.2336, + "step": 49290 + }, + { + "epoch": 0.6358131766799718, + "grad_norm": 0.9813229959383147, + "learning_rate": 9.623189408866762e-06, + "loss": 0.2411, + "step": 49300 + }, + { + "epoch": 0.6359421448699678, + "grad_norm": 0.9211993429290539, + "learning_rate": 9.622903605917757e-06, + "loss": 0.2431, + "step": 49310 + }, + { + "epoch": 0.6360711130599638, + "grad_norm": 0.9690647510527709, + "learning_rate": 9.622617698869429e-06, + "loss": 0.2474, + "step": 49320 + }, + { + "epoch": 0.6362000812499597, + "grad_norm": 0.9623360264014189, + "learning_rate": 9.622331687728218e-06, + "loss": 0.2349, + "step": 49330 + }, + { + "epoch": 0.6363290494399556, + "grad_norm": 0.8856812093086555, + "learning_rate": 9.622045572500558e-06, + "loss": 0.2449, + "step": 49340 + }, + { + "epoch": 0.6364580176299516, + "grad_norm": 0.9523712917718059, + "learning_rate": 9.621759353192896e-06, + "loss": 0.2459, + "step": 49350 + }, + { + "epoch": 0.6365869858199475, + "grad_norm": 1.0456205010766673, + "learning_rate": 9.621473029811679e-06, + "loss": 0.252, + "step": 49360 + }, + { + "epoch": 0.6367159540099434, + "grad_norm": 0.986242893555414, + "learning_rate": 9.621186602363352e-06, + "loss": 0.2345, + "step": 49370 + }, + { + "epoch": 0.6368449221999394, + "grad_norm": 0.9730310803493815, + "learning_rate": 9.620900070854365e-06, + "loss": 0.2431, + "step": 49380 + }, + { + "epoch": 0.6369738903899353, + "grad_norm": 0.9385119084181571, + "learning_rate": 9.620613435291171e-06, + "loss": 0.2418, + "step": 49390 + }, + { + "epoch": 0.6371028585799312, + "grad_norm": 0.959086733841201, + "learning_rate": 9.620326695680223e-06, + "loss": 0.2458, + "step": 49400 + }, + { + "epoch": 0.6372318267699272, + "grad_norm": 0.7999791813921114, + "learning_rate": 9.62003985202798e-06, + "loss": 0.2327, + "step": 49410 + }, + { + "epoch": 0.6373607949599231, + "grad_norm": 0.8421745685372095, + "learning_rate": 9.619752904340897e-06, + "loss": 0.2282, + "step": 49420 + }, + { + "epoch": 0.6374897631499191, + "grad_norm": 0.8836735520741449, + "learning_rate": 9.619465852625443e-06, + "loss": 0.2373, + "step": 49430 + }, + { + "epoch": 0.637618731339915, + "grad_norm": 1.063767402061847, + "learning_rate": 9.619178696888074e-06, + "loss": 0.2511, + "step": 49440 + }, + { + "epoch": 0.637747699529911, + "grad_norm": 0.8419628410633377, + "learning_rate": 9.61889143713526e-06, + "loss": 0.2576, + "step": 49450 + }, + { + "epoch": 0.6378766677199069, + "grad_norm": 0.9472327556926515, + "learning_rate": 9.618604073373469e-06, + "loss": 0.2387, + "step": 49460 + }, + { + "epoch": 0.6380056359099028, + "grad_norm": 0.8644366203560938, + "learning_rate": 9.618316605609173e-06, + "loss": 0.2396, + "step": 49470 + }, + { + "epoch": 0.6381346040998988, + "grad_norm": 1.0270762730604721, + "learning_rate": 9.618029033848845e-06, + "loss": 0.2303, + "step": 49480 + }, + { + "epoch": 0.6382635722898947, + "grad_norm": 0.9067901233522856, + "learning_rate": 9.61774135809896e-06, + "loss": 0.2469, + "step": 49490 + }, + { + "epoch": 0.6383925404798906, + "grad_norm": 0.8653174973591411, + "learning_rate": 9.617453578365994e-06, + "loss": 0.2363, + "step": 49500 + }, + { + "epoch": 0.6385215086698866, + "grad_norm": 0.8605494555486183, + "learning_rate": 9.617165694656429e-06, + "loss": 0.2418, + "step": 49510 + }, + { + "epoch": 0.6386504768598825, + "grad_norm": 0.9258988546857115, + "learning_rate": 9.61687770697675e-06, + "loss": 0.244, + "step": 49520 + }, + { + "epoch": 0.6387794450498785, + "grad_norm": 0.9362700291423302, + "learning_rate": 9.616589615333438e-06, + "loss": 0.2417, + "step": 49530 + }, + { + "epoch": 0.6389084132398744, + "grad_norm": 0.9476877013674562, + "learning_rate": 9.616301419732983e-06, + "loss": 0.2371, + "step": 49540 + }, + { + "epoch": 0.6390373814298703, + "grad_norm": 0.9190028950955, + "learning_rate": 9.616013120181872e-06, + "loss": 0.2432, + "step": 49550 + }, + { + "epoch": 0.6391663496198663, + "grad_norm": 0.9496879161888796, + "learning_rate": 9.6157247166866e-06, + "loss": 0.2449, + "step": 49560 + }, + { + "epoch": 0.6392953178098622, + "grad_norm": 0.8902862810456399, + "learning_rate": 9.615436209253658e-06, + "loss": 0.2331, + "step": 49570 + }, + { + "epoch": 0.6394242859998581, + "grad_norm": 0.8673016283871093, + "learning_rate": 9.615147597889547e-06, + "loss": 0.232, + "step": 49580 + }, + { + "epoch": 0.6395532541898541, + "grad_norm": 0.9846886564073312, + "learning_rate": 9.614858882600762e-06, + "loss": 0.2298, + "step": 49590 + }, + { + "epoch": 0.6396822223798501, + "grad_norm": 0.9989076360745144, + "learning_rate": 9.614570063393807e-06, + "loss": 0.2501, + "step": 49600 + }, + { + "epoch": 0.6398111905698459, + "grad_norm": 0.9211215148444045, + "learning_rate": 9.614281140275185e-06, + "loss": 0.2421, + "step": 49610 + }, + { + "epoch": 0.6399401587598419, + "grad_norm": 0.9700871576730661, + "learning_rate": 9.6139921132514e-06, + "loss": 0.2447, + "step": 49620 + }, + { + "epoch": 0.6400691269498379, + "grad_norm": 0.8524617326245184, + "learning_rate": 9.613702982328962e-06, + "loss": 0.2586, + "step": 49630 + }, + { + "epoch": 0.6401980951398337, + "grad_norm": 0.8852403536889208, + "learning_rate": 9.613413747514383e-06, + "loss": 0.2497, + "step": 49640 + }, + { + "epoch": 0.6403270633298297, + "grad_norm": 0.9372823876899148, + "learning_rate": 9.613124408814174e-06, + "loss": 0.24, + "step": 49650 + }, + { + "epoch": 0.6404560315198257, + "grad_norm": 0.9021461722221569, + "learning_rate": 9.61283496623485e-06, + "loss": 0.2506, + "step": 49660 + }, + { + "epoch": 0.6405849997098215, + "grad_norm": 1.03540443993684, + "learning_rate": 9.612545419782933e-06, + "loss": 0.241, + "step": 49670 + }, + { + "epoch": 0.6407139678998175, + "grad_norm": 0.9243745158776396, + "learning_rate": 9.612255769464936e-06, + "loss": 0.2426, + "step": 49680 + }, + { + "epoch": 0.6408429360898135, + "grad_norm": 0.9772217451504203, + "learning_rate": 9.611966015287389e-06, + "loss": 0.2359, + "step": 49690 + }, + { + "epoch": 0.6409719042798094, + "grad_norm": 0.8561864284539311, + "learning_rate": 9.611676157256809e-06, + "loss": 0.2347, + "step": 49700 + }, + { + "epoch": 0.6411008724698053, + "grad_norm": 0.936231055818971, + "learning_rate": 9.611386195379729e-06, + "loss": 0.2455, + "step": 49710 + }, + { + "epoch": 0.6412298406598013, + "grad_norm": 0.9117389387559434, + "learning_rate": 9.611096129662676e-06, + "loss": 0.2424, + "step": 49720 + }, + { + "epoch": 0.6413588088497972, + "grad_norm": 0.8952454068917214, + "learning_rate": 9.610805960112184e-06, + "loss": 0.2443, + "step": 49730 + }, + { + "epoch": 0.6414877770397931, + "grad_norm": 0.8213453141174137, + "learning_rate": 9.610515686734784e-06, + "loss": 0.228, + "step": 49740 + }, + { + "epoch": 0.6416167452297891, + "grad_norm": 0.9491012052401234, + "learning_rate": 9.610225309537013e-06, + "loss": 0.2455, + "step": 49750 + }, + { + "epoch": 0.641745713419785, + "grad_norm": 0.8960079039830297, + "learning_rate": 9.609934828525412e-06, + "loss": 0.247, + "step": 49760 + }, + { + "epoch": 0.6418746816097809, + "grad_norm": 0.8054427334962214, + "learning_rate": 9.60964424370652e-06, + "loss": 0.2431, + "step": 49770 + }, + { + "epoch": 0.6420036497997769, + "grad_norm": 0.8712545597102141, + "learning_rate": 9.60935355508688e-06, + "loss": 0.2314, + "step": 49780 + }, + { + "epoch": 0.6421326179897728, + "grad_norm": 0.9649978552172713, + "learning_rate": 9.60906276267304e-06, + "loss": 0.2464, + "step": 49790 + }, + { + "epoch": 0.6422615861797688, + "grad_norm": 0.8881255195280051, + "learning_rate": 9.608771866471546e-06, + "loss": 0.2397, + "step": 49800 + }, + { + "epoch": 0.6423905543697647, + "grad_norm": 0.9673523814118463, + "learning_rate": 9.60848086648895e-06, + "loss": 0.2346, + "step": 49810 + }, + { + "epoch": 0.6425195225597606, + "grad_norm": 0.9536438599704279, + "learning_rate": 9.608189762731804e-06, + "loss": 0.2363, + "step": 49820 + }, + { + "epoch": 0.6426484907497566, + "grad_norm": 0.9415693811910101, + "learning_rate": 9.607898555206662e-06, + "loss": 0.2419, + "step": 49830 + }, + { + "epoch": 0.6427774589397525, + "grad_norm": 0.9193329448183848, + "learning_rate": 9.607607243920085e-06, + "loss": 0.2439, + "step": 49840 + }, + { + "epoch": 0.6429064271297484, + "grad_norm": 0.8322234297560357, + "learning_rate": 9.607315828878629e-06, + "loss": 0.2518, + "step": 49850 + }, + { + "epoch": 0.6430353953197444, + "grad_norm": 0.9818009765436407, + "learning_rate": 9.607024310088859e-06, + "loss": 0.2565, + "step": 49860 + }, + { + "epoch": 0.6431643635097404, + "grad_norm": 0.865279139434337, + "learning_rate": 9.606732687557338e-06, + "loss": 0.2466, + "step": 49870 + }, + { + "epoch": 0.6432933316997362, + "grad_norm": 0.9018167877945952, + "learning_rate": 9.606440961290632e-06, + "loss": 0.244, + "step": 49880 + }, + { + "epoch": 0.6434222998897322, + "grad_norm": 0.9046036770172687, + "learning_rate": 9.606149131295311e-06, + "loss": 0.2309, + "step": 49890 + }, + { + "epoch": 0.6435512680797282, + "grad_norm": 0.9148062830161671, + "learning_rate": 9.605857197577946e-06, + "loss": 0.2572, + "step": 49900 + }, + { + "epoch": 0.643680236269724, + "grad_norm": 0.8995674277005918, + "learning_rate": 9.605565160145113e-06, + "loss": 0.241, + "step": 49910 + }, + { + "epoch": 0.64380920445972, + "grad_norm": 0.9066090645297902, + "learning_rate": 9.605273019003386e-06, + "loss": 0.2406, + "step": 49920 + }, + { + "epoch": 0.643938172649716, + "grad_norm": 0.9225130807727793, + "learning_rate": 9.604980774159344e-06, + "loss": 0.2399, + "step": 49930 + }, + { + "epoch": 0.6440671408397118, + "grad_norm": 0.95602891366339, + "learning_rate": 9.604688425619569e-06, + "loss": 0.2361, + "step": 49940 + }, + { + "epoch": 0.6441961090297078, + "grad_norm": 0.8394812308763163, + "learning_rate": 9.60439597339064e-06, + "loss": 0.2275, + "step": 49950 + }, + { + "epoch": 0.6443250772197038, + "grad_norm": 0.9640265755419007, + "learning_rate": 9.604103417479146e-06, + "loss": 0.2525, + "step": 49960 + }, + { + "epoch": 0.6444540454096997, + "grad_norm": 0.9086445285073138, + "learning_rate": 9.603810757891676e-06, + "loss": 0.2445, + "step": 49970 + }, + { + "epoch": 0.6445830135996956, + "grad_norm": 0.9003006750003624, + "learning_rate": 9.603517994634818e-06, + "loss": 0.2438, + "step": 49980 + }, + { + "epoch": 0.6447119817896916, + "grad_norm": 0.8560847512653108, + "learning_rate": 9.603225127715165e-06, + "loss": 0.2378, + "step": 49990 + }, + { + "epoch": 0.6448409499796875, + "grad_norm": 0.921992383890199, + "learning_rate": 9.602932157139311e-06, + "loss": 0.2343, + "step": 50000 + }, + { + "epoch": 0.6449699181696834, + "grad_norm": 0.916137005452511, + "learning_rate": 9.602639082913855e-06, + "loss": 0.248, + "step": 50010 + }, + { + "epoch": 0.6450988863596794, + "grad_norm": 0.904415468980794, + "learning_rate": 9.602345905045395e-06, + "loss": 0.2269, + "step": 50020 + }, + { + "epoch": 0.6452278545496754, + "grad_norm": 1.0004274467098402, + "learning_rate": 9.602052623540532e-06, + "loss": 0.2377, + "step": 50030 + }, + { + "epoch": 0.6453568227396712, + "grad_norm": 0.9643679498898569, + "learning_rate": 9.601759238405873e-06, + "loss": 0.2487, + "step": 50040 + }, + { + "epoch": 0.6454857909296672, + "grad_norm": 0.8186649943936974, + "learning_rate": 9.601465749648023e-06, + "loss": 0.2269, + "step": 50050 + }, + { + "epoch": 0.6456147591196632, + "grad_norm": 0.8804441182231025, + "learning_rate": 9.60117215727359e-06, + "loss": 0.24, + "step": 50060 + }, + { + "epoch": 0.6457437273096591, + "grad_norm": 0.9029664172613735, + "learning_rate": 9.600878461289186e-06, + "loss": 0.236, + "step": 50070 + }, + { + "epoch": 0.645872695499655, + "grad_norm": 0.8368504575842294, + "learning_rate": 9.600584661701424e-06, + "loss": 0.2443, + "step": 50080 + }, + { + "epoch": 0.646001663689651, + "grad_norm": 0.8936064904304318, + "learning_rate": 9.600290758516921e-06, + "loss": 0.2437, + "step": 50090 + }, + { + "epoch": 0.6461306318796469, + "grad_norm": 0.987043077963443, + "learning_rate": 9.599996751742293e-06, + "loss": 0.2496, + "step": 50100 + }, + { + "epoch": 0.6462596000696428, + "grad_norm": 0.939502255770288, + "learning_rate": 9.599702641384164e-06, + "loss": 0.2434, + "step": 50110 + }, + { + "epoch": 0.6463885682596388, + "grad_norm": 0.8577360173733141, + "learning_rate": 9.599408427449153e-06, + "loss": 0.2545, + "step": 50120 + }, + { + "epoch": 0.6465175364496347, + "grad_norm": 0.9398123797758652, + "learning_rate": 9.599114109943888e-06, + "loss": 0.2481, + "step": 50130 + }, + { + "epoch": 0.6466465046396306, + "grad_norm": 0.8883544571679357, + "learning_rate": 9.598819688874993e-06, + "loss": 0.2284, + "step": 50140 + }, + { + "epoch": 0.6467754728296266, + "grad_norm": 0.9081606850011804, + "learning_rate": 9.598525164249104e-06, + "loss": 0.2423, + "step": 50150 + }, + { + "epoch": 0.6469044410196225, + "grad_norm": 0.8858405075248256, + "learning_rate": 9.598230536072847e-06, + "loss": 0.2403, + "step": 50160 + }, + { + "epoch": 0.6470334092096185, + "grad_norm": 0.967253579127777, + "learning_rate": 9.597935804352859e-06, + "loss": 0.2346, + "step": 50170 + }, + { + "epoch": 0.6471623773996144, + "grad_norm": 0.9260051700100657, + "learning_rate": 9.597640969095776e-06, + "loss": 0.238, + "step": 50180 + }, + { + "epoch": 0.6472913455896103, + "grad_norm": 0.9028267319453352, + "learning_rate": 9.59734603030824e-06, + "loss": 0.2374, + "step": 50190 + }, + { + "epoch": 0.6474203137796063, + "grad_norm": 1.00504238161439, + "learning_rate": 9.597050987996887e-06, + "loss": 0.2453, + "step": 50200 + }, + { + "epoch": 0.6475492819696022, + "grad_norm": 0.8553337659988075, + "learning_rate": 9.596755842168367e-06, + "loss": 0.2303, + "step": 50210 + }, + { + "epoch": 0.6476782501595981, + "grad_norm": 0.9927257631509577, + "learning_rate": 9.596460592829321e-06, + "loss": 0.2274, + "step": 50220 + }, + { + "epoch": 0.6478072183495941, + "grad_norm": 0.8863205699141975, + "learning_rate": 9.596165239986402e-06, + "loss": 0.2361, + "step": 50230 + }, + { + "epoch": 0.6479361865395901, + "grad_norm": 0.85238925507279, + "learning_rate": 9.595869783646257e-06, + "loss": 0.2491, + "step": 50240 + }, + { + "epoch": 0.6480651547295859, + "grad_norm": 0.9714393147432775, + "learning_rate": 9.595574223815541e-06, + "loss": 0.2318, + "step": 50250 + }, + { + "epoch": 0.6481941229195819, + "grad_norm": 0.8795167871799279, + "learning_rate": 9.59527856050091e-06, + "loss": 0.2362, + "step": 50260 + }, + { + "epoch": 0.6483230911095779, + "grad_norm": 0.896535174909221, + "learning_rate": 9.59498279370902e-06, + "loss": 0.2458, + "step": 50270 + }, + { + "epoch": 0.6484520592995737, + "grad_norm": 1.002318961954643, + "learning_rate": 9.594686923446532e-06, + "loss": 0.2413, + "step": 50280 + }, + { + "epoch": 0.6485810274895697, + "grad_norm": 0.8687906942798812, + "learning_rate": 9.594390949720107e-06, + "loss": 0.2416, + "step": 50290 + }, + { + "epoch": 0.6487099956795657, + "grad_norm": 0.9296794635685439, + "learning_rate": 9.594094872536413e-06, + "loss": 0.2402, + "step": 50300 + }, + { + "epoch": 0.6488389638695615, + "grad_norm": 0.9318098561732328, + "learning_rate": 9.593798691902116e-06, + "loss": 0.2567, + "step": 50310 + }, + { + "epoch": 0.6489679320595575, + "grad_norm": 0.8716163113594189, + "learning_rate": 9.593502407823883e-06, + "loss": 0.229, + "step": 50320 + }, + { + "epoch": 0.6490969002495535, + "grad_norm": 0.8652864822074594, + "learning_rate": 9.59320602030839e-06, + "loss": 0.2397, + "step": 50330 + }, + { + "epoch": 0.6492258684395494, + "grad_norm": 1.0265534131448346, + "learning_rate": 9.592909529362306e-06, + "loss": 0.2484, + "step": 50340 + }, + { + "epoch": 0.6493548366295453, + "grad_norm": 0.8916348519138944, + "learning_rate": 9.592612934992311e-06, + "loss": 0.244, + "step": 50350 + }, + { + "epoch": 0.6494838048195413, + "grad_norm": 0.9140951536549434, + "learning_rate": 9.592316237205084e-06, + "loss": 0.2462, + "step": 50360 + }, + { + "epoch": 0.6496127730095372, + "grad_norm": 0.8395539292663742, + "learning_rate": 9.592019436007305e-06, + "loss": 0.2413, + "step": 50370 + }, + { + "epoch": 0.6497417411995331, + "grad_norm": 1.0493718440534223, + "learning_rate": 9.591722531405656e-06, + "loss": 0.2292, + "step": 50380 + }, + { + "epoch": 0.6498707093895291, + "grad_norm": 0.9606242946383684, + "learning_rate": 9.591425523406826e-06, + "loss": 0.2518, + "step": 50390 + }, + { + "epoch": 0.649999677579525, + "grad_norm": 0.8079612567462456, + "learning_rate": 9.5911284120175e-06, + "loss": 0.2386, + "step": 50400 + }, + { + "epoch": 0.6501286457695209, + "grad_norm": 0.8725747044869661, + "learning_rate": 9.59083119724437e-06, + "loss": 0.2404, + "step": 50410 + }, + { + "epoch": 0.6502576139595169, + "grad_norm": 0.8690316995890022, + "learning_rate": 9.590533879094128e-06, + "loss": 0.2381, + "step": 50420 + }, + { + "epoch": 0.6503865821495128, + "grad_norm": 0.9756184591935242, + "learning_rate": 9.59023645757347e-06, + "loss": 0.2405, + "step": 50430 + }, + { + "epoch": 0.6505155503395088, + "grad_norm": 0.8331043151047123, + "learning_rate": 9.589938932689091e-06, + "loss": 0.2363, + "step": 50440 + }, + { + "epoch": 0.6506445185295047, + "grad_norm": 0.9938252515547897, + "learning_rate": 9.589641304447695e-06, + "loss": 0.2495, + "step": 50450 + }, + { + "epoch": 0.6507734867195006, + "grad_norm": 0.9817178101706358, + "learning_rate": 9.589343572855981e-06, + "loss": 0.2291, + "step": 50460 + }, + { + "epoch": 0.6509024549094966, + "grad_norm": 0.9154744189263473, + "learning_rate": 9.589045737920652e-06, + "loss": 0.2343, + "step": 50470 + }, + { + "epoch": 0.6510314230994925, + "grad_norm": 0.8722313799976092, + "learning_rate": 9.588747799648419e-06, + "loss": 0.2339, + "step": 50480 + }, + { + "epoch": 0.6511603912894884, + "grad_norm": 0.9469255709666552, + "learning_rate": 9.588449758045989e-06, + "loss": 0.2474, + "step": 50490 + }, + { + "epoch": 0.6512893594794844, + "grad_norm": 0.8779206319797012, + "learning_rate": 9.58815161312007e-06, + "loss": 0.2453, + "step": 50500 + }, + { + "epoch": 0.6514183276694803, + "grad_norm": 0.9218442583238203, + "learning_rate": 9.587853364877382e-06, + "loss": 0.238, + "step": 50510 + }, + { + "epoch": 0.6515472958594762, + "grad_norm": 0.9304214894109316, + "learning_rate": 9.587555013324636e-06, + "loss": 0.247, + "step": 50520 + }, + { + "epoch": 0.6516762640494722, + "grad_norm": 0.8564061440568654, + "learning_rate": 9.587256558468551e-06, + "loss": 0.2254, + "step": 50530 + }, + { + "epoch": 0.6518052322394682, + "grad_norm": 0.9107006710847111, + "learning_rate": 9.586958000315852e-06, + "loss": 0.2463, + "step": 50540 + }, + { + "epoch": 0.651934200429464, + "grad_norm": 0.9657137027791927, + "learning_rate": 9.586659338873256e-06, + "loss": 0.2406, + "step": 50550 + }, + { + "epoch": 0.65206316861946, + "grad_norm": 0.9380711060498482, + "learning_rate": 9.586360574147491e-06, + "loss": 0.2443, + "step": 50560 + }, + { + "epoch": 0.652192136809456, + "grad_norm": 0.8462078830227846, + "learning_rate": 9.586061706145286e-06, + "loss": 0.2289, + "step": 50570 + }, + { + "epoch": 0.6523211049994518, + "grad_norm": 0.8758688101950058, + "learning_rate": 9.58576273487337e-06, + "loss": 0.2523, + "step": 50580 + }, + { + "epoch": 0.6524500731894478, + "grad_norm": 0.9248589932722496, + "learning_rate": 9.585463660338473e-06, + "loss": 0.2387, + "step": 50590 + }, + { + "epoch": 0.6525790413794438, + "grad_norm": 0.8663710183728824, + "learning_rate": 9.585164482547334e-06, + "loss": 0.2355, + "step": 50600 + }, + { + "epoch": 0.6527080095694398, + "grad_norm": 0.899041692271557, + "learning_rate": 9.584865201506683e-06, + "loss": 0.2473, + "step": 50610 + }, + { + "epoch": 0.6528369777594356, + "grad_norm": 0.8542165213393054, + "learning_rate": 9.584565817223268e-06, + "loss": 0.2378, + "step": 50620 + }, + { + "epoch": 0.6529659459494316, + "grad_norm": 0.8194726991499623, + "learning_rate": 9.584266329703823e-06, + "loss": 0.2392, + "step": 50630 + }, + { + "epoch": 0.6530949141394276, + "grad_norm": 0.8334570964517479, + "learning_rate": 9.583966738955098e-06, + "loss": 0.2384, + "step": 50640 + }, + { + "epoch": 0.6532238823294234, + "grad_norm": 0.8774980092080852, + "learning_rate": 9.583667044983835e-06, + "loss": 0.2412, + "step": 50650 + }, + { + "epoch": 0.6533528505194194, + "grad_norm": 0.8684498925082429, + "learning_rate": 9.583367247796783e-06, + "loss": 0.2465, + "step": 50660 + }, + { + "epoch": 0.6534818187094154, + "grad_norm": 0.936697142757943, + "learning_rate": 9.583067347400697e-06, + "loss": 0.2563, + "step": 50670 + }, + { + "epoch": 0.6536107868994112, + "grad_norm": 1.0122772517909187, + "learning_rate": 9.582767343802325e-06, + "loss": 0.2426, + "step": 50680 + }, + { + "epoch": 0.6537397550894072, + "grad_norm": 0.9527683324827924, + "learning_rate": 9.582467237008424e-06, + "loss": 0.2377, + "step": 50690 + }, + { + "epoch": 0.6538687232794032, + "grad_norm": 0.9820116612142527, + "learning_rate": 9.582167027025754e-06, + "loss": 0.2399, + "step": 50700 + }, + { + "epoch": 0.6539976914693991, + "grad_norm": 0.9451420956952116, + "learning_rate": 9.581866713861073e-06, + "loss": 0.239, + "step": 50710 + }, + { + "epoch": 0.654126659659395, + "grad_norm": 0.8545778235726683, + "learning_rate": 9.581566297521143e-06, + "loss": 0.2286, + "step": 50720 + }, + { + "epoch": 0.654255627849391, + "grad_norm": 0.9496306150650596, + "learning_rate": 9.581265778012732e-06, + "loss": 0.2419, + "step": 50730 + }, + { + "epoch": 0.6543845960393869, + "grad_norm": 0.8819027546396555, + "learning_rate": 9.580965155342603e-06, + "loss": 0.242, + "step": 50740 + }, + { + "epoch": 0.6545135642293828, + "grad_norm": 0.7717430580328176, + "learning_rate": 9.58066442951753e-06, + "loss": 0.2416, + "step": 50750 + }, + { + "epoch": 0.6546425324193788, + "grad_norm": 0.8304788024947954, + "learning_rate": 9.58036360054428e-06, + "loss": 0.235, + "step": 50760 + }, + { + "epoch": 0.6547715006093747, + "grad_norm": 0.9165807046020927, + "learning_rate": 9.580062668429631e-06, + "loss": 0.2232, + "step": 50770 + }, + { + "epoch": 0.6549004687993706, + "grad_norm": 0.8804316132162071, + "learning_rate": 9.579761633180356e-06, + "loss": 0.2475, + "step": 50780 + }, + { + "epoch": 0.6550294369893666, + "grad_norm": 0.9037291989571709, + "learning_rate": 9.579460494803238e-06, + "loss": 0.2351, + "step": 50790 + }, + { + "epoch": 0.6551584051793625, + "grad_norm": 0.8100281643104926, + "learning_rate": 9.579159253305055e-06, + "loss": 0.2306, + "step": 50800 + }, + { + "epoch": 0.6552873733693585, + "grad_norm": 0.9421689972602444, + "learning_rate": 9.578857908692592e-06, + "loss": 0.2303, + "step": 50810 + }, + { + "epoch": 0.6554163415593544, + "grad_norm": 0.8658296156899525, + "learning_rate": 9.578556460972633e-06, + "loss": 0.2364, + "step": 50820 + }, + { + "epoch": 0.6555453097493503, + "grad_norm": 0.818159892868735, + "learning_rate": 9.578254910151966e-06, + "loss": 0.2488, + "step": 50830 + }, + { + "epoch": 0.6556742779393463, + "grad_norm": 0.911031854351226, + "learning_rate": 9.577953256237382e-06, + "loss": 0.2395, + "step": 50840 + }, + { + "epoch": 0.6558032461293422, + "grad_norm": 0.8889664095074978, + "learning_rate": 9.577651499235677e-06, + "loss": 0.2532, + "step": 50850 + }, + { + "epoch": 0.6559322143193381, + "grad_norm": 0.9862969759794448, + "learning_rate": 9.57734963915364e-06, + "loss": 0.2436, + "step": 50860 + }, + { + "epoch": 0.6560611825093341, + "grad_norm": 0.8802417203500754, + "learning_rate": 9.577047675998073e-06, + "loss": 0.2309, + "step": 50870 + }, + { + "epoch": 0.65619015069933, + "grad_norm": 1.0011144097550835, + "learning_rate": 9.576745609775774e-06, + "loss": 0.2503, + "step": 50880 + }, + { + "epoch": 0.6563191188893259, + "grad_norm": 0.8178993454331066, + "learning_rate": 9.576443440493543e-06, + "loss": 0.2487, + "step": 50890 + }, + { + "epoch": 0.6564480870793219, + "grad_norm": 0.8785103294899845, + "learning_rate": 9.57614116815819e-06, + "loss": 0.234, + "step": 50900 + }, + { + "epoch": 0.6565770552693179, + "grad_norm": 0.8556597349120094, + "learning_rate": 9.575838792776516e-06, + "loss": 0.2358, + "step": 50910 + }, + { + "epoch": 0.6567060234593137, + "grad_norm": 0.8903651110433686, + "learning_rate": 9.575536314355331e-06, + "loss": 0.2395, + "step": 50920 + }, + { + "epoch": 0.6568349916493097, + "grad_norm": 0.8397345319103863, + "learning_rate": 9.575233732901447e-06, + "loss": 0.2263, + "step": 50930 + }, + { + "epoch": 0.6569639598393057, + "grad_norm": 0.9810814508104859, + "learning_rate": 9.57493104842168e-06, + "loss": 0.2299, + "step": 50940 + }, + { + "epoch": 0.6570929280293015, + "grad_norm": 0.9472039443229717, + "learning_rate": 9.574628260922843e-06, + "loss": 0.2365, + "step": 50950 + }, + { + "epoch": 0.6572218962192975, + "grad_norm": 0.8913642468143137, + "learning_rate": 9.574325370411754e-06, + "loss": 0.2321, + "step": 50960 + }, + { + "epoch": 0.6573508644092935, + "grad_norm": 1.0234762413498473, + "learning_rate": 9.574022376895233e-06, + "loss": 0.2326, + "step": 50970 + }, + { + "epoch": 0.6574798325992894, + "grad_norm": 0.9071297043762826, + "learning_rate": 9.573719280380108e-06, + "loss": 0.2491, + "step": 50980 + }, + { + "epoch": 0.6576088007892853, + "grad_norm": 0.8954674364379501, + "learning_rate": 9.573416080873197e-06, + "loss": 0.2405, + "step": 50990 + }, + { + "epoch": 0.6577377689792813, + "grad_norm": 0.9240103217036091, + "learning_rate": 9.573112778381333e-06, + "loss": 0.2315, + "step": 51000 + }, + { + "epoch": 0.6578667371692772, + "grad_norm": 1.0164308703940532, + "learning_rate": 9.572809372911341e-06, + "loss": 0.236, + "step": 51010 + }, + { + "epoch": 0.6579957053592731, + "grad_norm": 0.9028800904208686, + "learning_rate": 9.572505864470058e-06, + "loss": 0.229, + "step": 51020 + }, + { + "epoch": 0.6581246735492691, + "grad_norm": 0.9307777543938469, + "learning_rate": 9.572202253064315e-06, + "loss": 0.2423, + "step": 51030 + }, + { + "epoch": 0.658253641739265, + "grad_norm": 0.8710785146545194, + "learning_rate": 9.571898538700952e-06, + "loss": 0.2393, + "step": 51040 + }, + { + "epoch": 0.6583826099292609, + "grad_norm": 0.8733039224078185, + "learning_rate": 9.571594721386806e-06, + "loss": 0.2407, + "step": 51050 + }, + { + "epoch": 0.6585115781192569, + "grad_norm": 0.8515081147202994, + "learning_rate": 9.571290801128717e-06, + "loss": 0.2377, + "step": 51060 + }, + { + "epoch": 0.6586405463092528, + "grad_norm": 0.8588757800058064, + "learning_rate": 9.570986777933531e-06, + "loss": 0.2394, + "step": 51070 + }, + { + "epoch": 0.6587695144992488, + "grad_norm": 0.9255933223257135, + "learning_rate": 9.570682651808094e-06, + "loss": 0.2347, + "step": 51080 + }, + { + "epoch": 0.6588984826892447, + "grad_norm": 0.8765494336160736, + "learning_rate": 9.570378422759254e-06, + "loss": 0.2341, + "step": 51090 + }, + { + "epoch": 0.6590274508792406, + "grad_norm": 0.9061644083796757, + "learning_rate": 9.570074090793859e-06, + "loss": 0.2369, + "step": 51100 + }, + { + "epoch": 0.6591564190692366, + "grad_norm": 0.8260418579069607, + "learning_rate": 9.569769655918766e-06, + "loss": 0.2316, + "step": 51110 + }, + { + "epoch": 0.6592853872592325, + "grad_norm": 0.9617177030525925, + "learning_rate": 9.56946511814083e-06, + "loss": 0.2288, + "step": 51120 + }, + { + "epoch": 0.6594143554492284, + "grad_norm": 0.9161599941224212, + "learning_rate": 9.569160477466906e-06, + "loss": 0.2411, + "step": 51130 + }, + { + "epoch": 0.6595433236392244, + "grad_norm": 0.8675071647531332, + "learning_rate": 9.568855733903855e-06, + "loss": 0.2422, + "step": 51140 + }, + { + "epoch": 0.6596722918292203, + "grad_norm": 0.9077534491042055, + "learning_rate": 9.568550887458542e-06, + "loss": 0.2304, + "step": 51150 + }, + { + "epoch": 0.6598012600192162, + "grad_norm": 0.9177381161172835, + "learning_rate": 9.568245938137826e-06, + "loss": 0.2509, + "step": 51160 + }, + { + "epoch": 0.6599302282092122, + "grad_norm": 0.8535331809899008, + "learning_rate": 9.567940885948576e-06, + "loss": 0.2387, + "step": 51170 + }, + { + "epoch": 0.6600591963992082, + "grad_norm": 0.828384808314268, + "learning_rate": 9.567635730897666e-06, + "loss": 0.2351, + "step": 51180 + }, + { + "epoch": 0.660188164589204, + "grad_norm": 0.8409883447884124, + "learning_rate": 9.567330472991962e-06, + "loss": 0.2309, + "step": 51190 + }, + { + "epoch": 0.6603171327792, + "grad_norm": 0.8477959674710595, + "learning_rate": 9.56702511223834e-06, + "loss": 0.232, + "step": 51200 + }, + { + "epoch": 0.660446100969196, + "grad_norm": 0.8603663195040347, + "learning_rate": 9.566719648643677e-06, + "loss": 0.2317, + "step": 51210 + }, + { + "epoch": 0.6605750691591918, + "grad_norm": 0.918435872648591, + "learning_rate": 9.566414082214848e-06, + "loss": 0.2405, + "step": 51220 + }, + { + "epoch": 0.6607040373491878, + "grad_norm": 0.8368329943821786, + "learning_rate": 9.566108412958736e-06, + "loss": 0.2419, + "step": 51230 + }, + { + "epoch": 0.6608330055391838, + "grad_norm": 0.8709091506294144, + "learning_rate": 9.565802640882227e-06, + "loss": 0.2332, + "step": 51240 + }, + { + "epoch": 0.6609619737291798, + "grad_norm": 0.9521721562550098, + "learning_rate": 9.565496765992202e-06, + "loss": 0.2347, + "step": 51250 + }, + { + "epoch": 0.6610909419191756, + "grad_norm": 0.8803452674266772, + "learning_rate": 9.56519078829555e-06, + "loss": 0.24, + "step": 51260 + }, + { + "epoch": 0.6612199101091716, + "grad_norm": 0.8532920537467072, + "learning_rate": 9.564884707799162e-06, + "loss": 0.2356, + "step": 51270 + }, + { + "epoch": 0.6613488782991676, + "grad_norm": 0.8479163961146668, + "learning_rate": 9.56457852450993e-06, + "loss": 0.2348, + "step": 51280 + }, + { + "epoch": 0.6614778464891634, + "grad_norm": 0.9806536037317476, + "learning_rate": 9.564272238434748e-06, + "loss": 0.2551, + "step": 51290 + }, + { + "epoch": 0.6616068146791594, + "grad_norm": 0.8739376496771349, + "learning_rate": 9.563965849580512e-06, + "loss": 0.2436, + "step": 51300 + }, + { + "epoch": 0.6617357828691554, + "grad_norm": 0.8647271748656344, + "learning_rate": 9.563659357954125e-06, + "loss": 0.2357, + "step": 51310 + }, + { + "epoch": 0.6618647510591512, + "grad_norm": 0.852796952680875, + "learning_rate": 9.563352763562486e-06, + "loss": 0.2411, + "step": 51320 + }, + { + "epoch": 0.6619937192491472, + "grad_norm": 0.8325857584924533, + "learning_rate": 9.5630460664125e-06, + "loss": 0.2369, + "step": 51330 + }, + { + "epoch": 0.6621226874391432, + "grad_norm": 0.958798102805126, + "learning_rate": 9.562739266511072e-06, + "loss": 0.2352, + "step": 51340 + }, + { + "epoch": 0.6622516556291391, + "grad_norm": 0.9239973872174659, + "learning_rate": 9.562432363865111e-06, + "loss": 0.2352, + "step": 51350 + }, + { + "epoch": 0.662380623819135, + "grad_norm": 0.9418666041344865, + "learning_rate": 9.562125358481527e-06, + "loss": 0.2436, + "step": 51360 + }, + { + "epoch": 0.662509592009131, + "grad_norm": 0.8721564799093203, + "learning_rate": 9.561818250367237e-06, + "loss": 0.2298, + "step": 51370 + }, + { + "epoch": 0.6626385601991269, + "grad_norm": 0.9868136828399964, + "learning_rate": 9.561511039529152e-06, + "loss": 0.2329, + "step": 51380 + }, + { + "epoch": 0.6627675283891228, + "grad_norm": 1.0216402991848834, + "learning_rate": 9.561203725974191e-06, + "loss": 0.2472, + "step": 51390 + }, + { + "epoch": 0.6628964965791188, + "grad_norm": 0.8223897126736998, + "learning_rate": 9.560896309709275e-06, + "loss": 0.2231, + "step": 51400 + }, + { + "epoch": 0.6630254647691147, + "grad_norm": 0.9588928738327752, + "learning_rate": 9.560588790741328e-06, + "loss": 0.2405, + "step": 51410 + }, + { + "epoch": 0.6631544329591106, + "grad_norm": 0.8526933626230068, + "learning_rate": 9.560281169077272e-06, + "loss": 0.2345, + "step": 51420 + }, + { + "epoch": 0.6632834011491066, + "grad_norm": 0.8930697735730267, + "learning_rate": 9.559973444724035e-06, + "loss": 0.252, + "step": 51430 + }, + { + "epoch": 0.6634123693391025, + "grad_norm": 0.8658644015385021, + "learning_rate": 9.559665617688547e-06, + "loss": 0.2418, + "step": 51440 + }, + { + "epoch": 0.6635413375290985, + "grad_norm": 0.8694300758314185, + "learning_rate": 9.559357687977738e-06, + "loss": 0.2481, + "step": 51450 + }, + { + "epoch": 0.6636703057190944, + "grad_norm": 0.8508249769322784, + "learning_rate": 9.559049655598545e-06, + "loss": 0.2374, + "step": 51460 + }, + { + "epoch": 0.6637992739090903, + "grad_norm": 0.9691322388796382, + "learning_rate": 9.5587415205579e-06, + "loss": 0.2416, + "step": 51470 + }, + { + "epoch": 0.6639282420990863, + "grad_norm": 0.9152743251919779, + "learning_rate": 9.558433282862746e-06, + "loss": 0.2554, + "step": 51480 + }, + { + "epoch": 0.6640572102890822, + "grad_norm": 0.827550766763542, + "learning_rate": 9.558124942520022e-06, + "loss": 0.2342, + "step": 51490 + }, + { + "epoch": 0.6641861784790781, + "grad_norm": 0.8518518015796087, + "learning_rate": 9.55781649953667e-06, + "loss": 0.2412, + "step": 51500 + }, + { + "epoch": 0.6643151466690741, + "grad_norm": 0.8920808699539425, + "learning_rate": 9.557507953919639e-06, + "loss": 0.2465, + "step": 51510 + }, + { + "epoch": 0.66444411485907, + "grad_norm": 0.8969687365762052, + "learning_rate": 9.557199305675872e-06, + "loss": 0.2256, + "step": 51520 + }, + { + "epoch": 0.6645730830490659, + "grad_norm": 0.871019132441206, + "learning_rate": 9.556890554812324e-06, + "loss": 0.2359, + "step": 51530 + }, + { + "epoch": 0.6647020512390619, + "grad_norm": 0.8132981727614668, + "learning_rate": 9.556581701335946e-06, + "loss": 0.2253, + "step": 51540 + }, + { + "epoch": 0.6648310194290579, + "grad_norm": 0.8949186885552264, + "learning_rate": 9.556272745253689e-06, + "loss": 0.2446, + "step": 51550 + }, + { + "epoch": 0.6649599876190537, + "grad_norm": 0.9150724662881671, + "learning_rate": 9.555963686572517e-06, + "loss": 0.2324, + "step": 51560 + }, + { + "epoch": 0.6650889558090497, + "grad_norm": 0.8874194425444131, + "learning_rate": 9.555654525299385e-06, + "loss": 0.2384, + "step": 51570 + }, + { + "epoch": 0.6652179239990457, + "grad_norm": 0.920801852341882, + "learning_rate": 9.555345261441255e-06, + "loss": 0.2411, + "step": 51580 + }, + { + "epoch": 0.6653468921890415, + "grad_norm": 0.8318479039535434, + "learning_rate": 9.555035895005092e-06, + "loss": 0.2329, + "step": 51590 + }, + { + "epoch": 0.6654758603790375, + "grad_norm": 0.947329107794439, + "learning_rate": 9.554726425997863e-06, + "loss": 0.2418, + "step": 51600 + }, + { + "epoch": 0.6656048285690335, + "grad_norm": 0.93779486372696, + "learning_rate": 9.554416854426533e-06, + "loss": 0.2493, + "step": 51610 + }, + { + "epoch": 0.6657337967590294, + "grad_norm": 0.9223299752605704, + "learning_rate": 9.554107180298078e-06, + "loss": 0.2444, + "step": 51620 + }, + { + "epoch": 0.6658627649490253, + "grad_norm": 0.7920640747264442, + "learning_rate": 9.55379740361947e-06, + "loss": 0.2425, + "step": 51630 + }, + { + "epoch": 0.6659917331390213, + "grad_norm": 0.8311069493291338, + "learning_rate": 9.55348752439768e-06, + "loss": 0.2415, + "step": 51640 + }, + { + "epoch": 0.6661207013290172, + "grad_norm": 0.9522891693935022, + "learning_rate": 9.553177542639691e-06, + "loss": 0.2373, + "step": 51650 + }, + { + "epoch": 0.6662496695190131, + "grad_norm": 0.963049356935384, + "learning_rate": 9.552867458352483e-06, + "loss": 0.2485, + "step": 51660 + }, + { + "epoch": 0.6663786377090091, + "grad_norm": 0.8854465978529098, + "learning_rate": 9.552557271543037e-06, + "loss": 0.2348, + "step": 51670 + }, + { + "epoch": 0.666507605899005, + "grad_norm": 0.9357216113275618, + "learning_rate": 9.552246982218337e-06, + "loss": 0.2395, + "step": 51680 + }, + { + "epoch": 0.6666365740890009, + "grad_norm": 0.9676780819493577, + "learning_rate": 9.551936590385372e-06, + "loss": 0.2448, + "step": 51690 + }, + { + "epoch": 0.6667655422789969, + "grad_norm": 0.9451619675932267, + "learning_rate": 9.55162609605113e-06, + "loss": 0.2378, + "step": 51700 + }, + { + "epoch": 0.6668945104689928, + "grad_norm": 0.9127692379040683, + "learning_rate": 9.551315499222604e-06, + "loss": 0.2302, + "step": 51710 + }, + { + "epoch": 0.6670234786589888, + "grad_norm": 0.8791514019995837, + "learning_rate": 9.551004799906788e-06, + "loss": 0.2477, + "step": 51720 + }, + { + "epoch": 0.6671524468489847, + "grad_norm": 0.9570500912385562, + "learning_rate": 9.550693998110678e-06, + "loss": 0.2512, + "step": 51730 + }, + { + "epoch": 0.6672814150389806, + "grad_norm": 0.8906052654831106, + "learning_rate": 9.550383093841272e-06, + "loss": 0.2403, + "step": 51740 + }, + { + "epoch": 0.6674103832289766, + "grad_norm": 0.8207716454652612, + "learning_rate": 9.55007208710557e-06, + "loss": 0.2517, + "step": 51750 + }, + { + "epoch": 0.6675393514189725, + "grad_norm": 0.9587638714116601, + "learning_rate": 9.54976097791058e-06, + "loss": 0.2466, + "step": 51760 + }, + { + "epoch": 0.6676683196089684, + "grad_norm": 0.9506099924276473, + "learning_rate": 9.549449766263303e-06, + "loss": 0.2525, + "step": 51770 + }, + { + "epoch": 0.6677972877989644, + "grad_norm": 0.8856234603415736, + "learning_rate": 9.549138452170748e-06, + "loss": 0.2209, + "step": 51780 + }, + { + "epoch": 0.6679262559889603, + "grad_norm": 0.8762272348190863, + "learning_rate": 9.548827035639926e-06, + "loss": 0.2409, + "step": 51790 + }, + { + "epoch": 0.6680552241789562, + "grad_norm": 0.9184971810826728, + "learning_rate": 9.54851551667785e-06, + "loss": 0.2326, + "step": 51800 + }, + { + "epoch": 0.6681841923689522, + "grad_norm": 0.9969890568751988, + "learning_rate": 9.548203895291532e-06, + "loss": 0.242, + "step": 51810 + }, + { + "epoch": 0.6683131605589482, + "grad_norm": 0.8668397376098648, + "learning_rate": 9.547892171487994e-06, + "loss": 0.2377, + "step": 51820 + }, + { + "epoch": 0.668442128748944, + "grad_norm": 0.8484844507514084, + "learning_rate": 9.54758034527425e-06, + "loss": 0.2326, + "step": 51830 + }, + { + "epoch": 0.66857109693894, + "grad_norm": 0.9807752240656508, + "learning_rate": 9.547268416657326e-06, + "loss": 0.2319, + "step": 51840 + }, + { + "epoch": 0.668700065128936, + "grad_norm": 0.8797467296414527, + "learning_rate": 9.546956385644243e-06, + "loss": 0.2367, + "step": 51850 + }, + { + "epoch": 0.6688290333189318, + "grad_norm": 0.8534892759636725, + "learning_rate": 9.54664425224203e-06, + "loss": 0.2485, + "step": 51860 + }, + { + "epoch": 0.6689580015089278, + "grad_norm": 0.9185988622163355, + "learning_rate": 9.546332016457714e-06, + "loss": 0.2363, + "step": 51870 + }, + { + "epoch": 0.6690869696989238, + "grad_norm": 0.8552601016767825, + "learning_rate": 9.546019678298328e-06, + "loss": 0.2359, + "step": 51880 + }, + { + "epoch": 0.6692159378889196, + "grad_norm": 0.9399885045984896, + "learning_rate": 9.5457072377709e-06, + "loss": 0.2475, + "step": 51890 + }, + { + "epoch": 0.6693449060789156, + "grad_norm": 0.9308770535361647, + "learning_rate": 9.545394694882473e-06, + "loss": 0.2418, + "step": 51900 + }, + { + "epoch": 0.6694738742689116, + "grad_norm": 0.9795137289522502, + "learning_rate": 9.54508204964008e-06, + "loss": 0.2455, + "step": 51910 + }, + { + "epoch": 0.6696028424589076, + "grad_norm": 0.8862375695495901, + "learning_rate": 9.544769302050762e-06, + "loss": 0.2489, + "step": 51920 + }, + { + "epoch": 0.6697318106489034, + "grad_norm": 0.8282787486624674, + "learning_rate": 9.544456452121562e-06, + "loss": 0.2403, + "step": 51930 + }, + { + "epoch": 0.6698607788388994, + "grad_norm": 0.8800734776586805, + "learning_rate": 9.544143499859524e-06, + "loss": 0.2395, + "step": 51940 + }, + { + "epoch": 0.6699897470288954, + "grad_norm": 0.9419071941071822, + "learning_rate": 9.543830445271697e-06, + "loss": 0.2393, + "step": 51950 + }, + { + "epoch": 0.6701187152188912, + "grad_norm": 1.0182087464184204, + "learning_rate": 9.543517288365129e-06, + "loss": 0.2459, + "step": 51960 + }, + { + "epoch": 0.6702476834088872, + "grad_norm": 0.887988774357976, + "learning_rate": 9.543204029146873e-06, + "loss": 0.2433, + "step": 51970 + }, + { + "epoch": 0.6703766515988832, + "grad_norm": 0.9701390994903625, + "learning_rate": 9.54289066762398e-06, + "loss": 0.2424, + "step": 51980 + }, + { + "epoch": 0.6705056197888791, + "grad_norm": 0.8496580018114701, + "learning_rate": 9.54257720380351e-06, + "loss": 0.2464, + "step": 51990 + }, + { + "epoch": 0.670634587978875, + "grad_norm": 0.918458809356814, + "learning_rate": 9.542263637692517e-06, + "loss": 0.2379, + "step": 52000 + }, + { + "epoch": 0.670763556168871, + "grad_norm": 0.9086568440836185, + "learning_rate": 9.541949969298067e-06, + "loss": 0.2369, + "step": 52010 + }, + { + "epoch": 0.6708925243588669, + "grad_norm": 1.0041182346469475, + "learning_rate": 9.541636198627222e-06, + "loss": 0.2297, + "step": 52020 + }, + { + "epoch": 0.6710214925488628, + "grad_norm": 0.8563538254091729, + "learning_rate": 9.541322325687046e-06, + "loss": 0.2369, + "step": 52030 + }, + { + "epoch": 0.6711504607388588, + "grad_norm": 0.9394953720232951, + "learning_rate": 9.541008350484608e-06, + "loss": 0.2385, + "step": 52040 + }, + { + "epoch": 0.6712794289288547, + "grad_norm": 0.9200921562584483, + "learning_rate": 9.540694273026976e-06, + "loss": 0.2279, + "step": 52050 + }, + { + "epoch": 0.6714083971188506, + "grad_norm": 0.8555728957142302, + "learning_rate": 9.540380093321226e-06, + "loss": 0.2458, + "step": 52060 + }, + { + "epoch": 0.6715373653088466, + "grad_norm": 0.8799223643790175, + "learning_rate": 9.540065811374428e-06, + "loss": 0.2417, + "step": 52070 + }, + { + "epoch": 0.6716663334988425, + "grad_norm": 0.8740153392905516, + "learning_rate": 9.539751427193665e-06, + "loss": 0.2395, + "step": 52080 + }, + { + "epoch": 0.6717953016888385, + "grad_norm": 0.954854323324564, + "learning_rate": 9.539436940786012e-06, + "loss": 0.2459, + "step": 52090 + }, + { + "epoch": 0.6719242698788344, + "grad_norm": 0.9393200097482829, + "learning_rate": 9.539122352158552e-06, + "loss": 0.2261, + "step": 52100 + }, + { + "epoch": 0.6720532380688303, + "grad_norm": 0.8631189917655839, + "learning_rate": 9.538807661318368e-06, + "loss": 0.2421, + "step": 52110 + }, + { + "epoch": 0.6721822062588263, + "grad_norm": 0.9144247925972765, + "learning_rate": 9.538492868272549e-06, + "loss": 0.2187, + "step": 52120 + }, + { + "epoch": 0.6723111744488222, + "grad_norm": 0.9561127116044796, + "learning_rate": 9.538177973028182e-06, + "loss": 0.2241, + "step": 52130 + }, + { + "epoch": 0.6724401426388181, + "grad_norm": 0.9073785257401902, + "learning_rate": 9.537862975592355e-06, + "loss": 0.2406, + "step": 52140 + }, + { + "epoch": 0.6725691108288141, + "grad_norm": 0.955141419482207, + "learning_rate": 9.537547875972163e-06, + "loss": 0.229, + "step": 52150 + }, + { + "epoch": 0.67269807901881, + "grad_norm": 0.9297083140799111, + "learning_rate": 9.537232674174705e-06, + "loss": 0.2406, + "step": 52160 + }, + { + "epoch": 0.6728270472088059, + "grad_norm": 0.9473567727269384, + "learning_rate": 9.536917370207078e-06, + "loss": 0.236, + "step": 52170 + }, + { + "epoch": 0.6729560153988019, + "grad_norm": 0.9221480485013867, + "learning_rate": 9.536601964076375e-06, + "loss": 0.2331, + "step": 52180 + }, + { + "epoch": 0.6730849835887979, + "grad_norm": 0.877715350150157, + "learning_rate": 9.536286455789707e-06, + "loss": 0.2339, + "step": 52190 + }, + { + "epoch": 0.6732139517787937, + "grad_norm": 0.9291167531794333, + "learning_rate": 9.535970845354171e-06, + "loss": 0.2429, + "step": 52200 + }, + { + "epoch": 0.6733429199687897, + "grad_norm": 0.9756874621641406, + "learning_rate": 9.535655132776883e-06, + "loss": 0.2449, + "step": 52210 + }, + { + "epoch": 0.6734718881587857, + "grad_norm": 0.8401912046104384, + "learning_rate": 9.535339318064944e-06, + "loss": 0.243, + "step": 52220 + }, + { + "epoch": 0.6736008563487815, + "grad_norm": 0.8640634720522679, + "learning_rate": 9.53502340122547e-06, + "loss": 0.2427, + "step": 52230 + }, + { + "epoch": 0.6737298245387775, + "grad_norm": 0.8990042349437001, + "learning_rate": 9.534707382265574e-06, + "loss": 0.2526, + "step": 52240 + }, + { + "epoch": 0.6738587927287735, + "grad_norm": 0.9472181798017889, + "learning_rate": 9.534391261192372e-06, + "loss": 0.2525, + "step": 52250 + }, + { + "epoch": 0.6739877609187694, + "grad_norm": 0.9508920496132547, + "learning_rate": 9.534075038012983e-06, + "loss": 0.2358, + "step": 52260 + }, + { + "epoch": 0.6741167291087653, + "grad_norm": 0.905980209488704, + "learning_rate": 9.533758712734525e-06, + "loss": 0.237, + "step": 52270 + }, + { + "epoch": 0.6742456972987613, + "grad_norm": 0.8754510015592872, + "learning_rate": 9.533442285364123e-06, + "loss": 0.2331, + "step": 52280 + }, + { + "epoch": 0.6743746654887572, + "grad_norm": 0.893561179386779, + "learning_rate": 9.533125755908906e-06, + "loss": 0.2404, + "step": 52290 + }, + { + "epoch": 0.6745036336787531, + "grad_norm": 0.9996425434846996, + "learning_rate": 9.532809124375995e-06, + "loss": 0.2492, + "step": 52300 + }, + { + "epoch": 0.6746326018687491, + "grad_norm": 0.8945581466588093, + "learning_rate": 9.532492390772525e-06, + "loss": 0.2328, + "step": 52310 + }, + { + "epoch": 0.674761570058745, + "grad_norm": 0.9516003639246754, + "learning_rate": 9.532175555105625e-06, + "loss": 0.2389, + "step": 52320 + }, + { + "epoch": 0.6748905382487409, + "grad_norm": 0.9479434025172959, + "learning_rate": 9.531858617382433e-06, + "loss": 0.2391, + "step": 52330 + }, + { + "epoch": 0.6750195064387369, + "grad_norm": 0.9042419441721858, + "learning_rate": 9.531541577610082e-06, + "loss": 0.235, + "step": 52340 + }, + { + "epoch": 0.6751484746287328, + "grad_norm": 0.8752299461150866, + "learning_rate": 9.531224435795714e-06, + "loss": 0.2316, + "step": 52350 + }, + { + "epoch": 0.6752774428187288, + "grad_norm": 0.8809062900442064, + "learning_rate": 9.530907191946468e-06, + "loss": 0.2399, + "step": 52360 + }, + { + "epoch": 0.6754064110087247, + "grad_norm": 0.8759309446298512, + "learning_rate": 9.530589846069491e-06, + "loss": 0.2413, + "step": 52370 + }, + { + "epoch": 0.6755353791987206, + "grad_norm": 0.9643146773863276, + "learning_rate": 9.530272398171927e-06, + "loss": 0.2303, + "step": 52380 + }, + { + "epoch": 0.6756643473887166, + "grad_norm": 0.8150983098813804, + "learning_rate": 9.529954848260924e-06, + "loss": 0.232, + "step": 52390 + }, + { + "epoch": 0.6757933155787125, + "grad_norm": 0.9530942178743874, + "learning_rate": 9.529637196343634e-06, + "loss": 0.2259, + "step": 52400 + }, + { + "epoch": 0.6759222837687084, + "grad_norm": 0.8314575496696534, + "learning_rate": 9.52931944242721e-06, + "loss": 0.2473, + "step": 52410 + }, + { + "epoch": 0.6760512519587044, + "grad_norm": 0.9386313136296304, + "learning_rate": 9.529001586518805e-06, + "loss": 0.2502, + "step": 52420 + }, + { + "epoch": 0.6761802201487003, + "grad_norm": 0.9701688526404249, + "learning_rate": 9.528683628625577e-06, + "loss": 0.2472, + "step": 52430 + }, + { + "epoch": 0.6763091883386962, + "grad_norm": 0.8872874663016495, + "learning_rate": 9.52836556875469e-06, + "loss": 0.2392, + "step": 52440 + }, + { + "epoch": 0.6764381565286922, + "grad_norm": 0.8380556172606853, + "learning_rate": 9.5280474069133e-06, + "loss": 0.248, + "step": 52450 + }, + { + "epoch": 0.6765671247186882, + "grad_norm": 0.9077657303684799, + "learning_rate": 9.527729143108574e-06, + "loss": 0.2386, + "step": 52460 + }, + { + "epoch": 0.676696092908684, + "grad_norm": 0.884388007758167, + "learning_rate": 9.527410777347683e-06, + "loss": 0.2406, + "step": 52470 + }, + { + "epoch": 0.67682506109868, + "grad_norm": 0.9300182504709268, + "learning_rate": 9.527092309637788e-06, + "loss": 0.2333, + "step": 52480 + }, + { + "epoch": 0.676954029288676, + "grad_norm": 0.8531994163879818, + "learning_rate": 9.526773739986066e-06, + "loss": 0.2211, + "step": 52490 + }, + { + "epoch": 0.6770829974786718, + "grad_norm": 0.8075339921901539, + "learning_rate": 9.52645506839969e-06, + "loss": 0.2509, + "step": 52500 + }, + { + "epoch": 0.6772119656686678, + "grad_norm": 0.9335836019224786, + "learning_rate": 9.526136294885834e-06, + "loss": 0.2288, + "step": 52510 + }, + { + "epoch": 0.6773409338586638, + "grad_norm": 0.9735342250742969, + "learning_rate": 9.525817419451677e-06, + "loss": 0.2401, + "step": 52520 + }, + { + "epoch": 0.6774699020486596, + "grad_norm": 0.8670977665073877, + "learning_rate": 9.5254984421044e-06, + "loss": 0.2314, + "step": 52530 + }, + { + "epoch": 0.6775988702386556, + "grad_norm": 0.9888815662199782, + "learning_rate": 9.525179362851185e-06, + "loss": 0.2427, + "step": 52540 + }, + { + "epoch": 0.6777278384286516, + "grad_norm": 1.0050319751468442, + "learning_rate": 9.524860181699218e-06, + "loss": 0.2323, + "step": 52550 + }, + { + "epoch": 0.6778568066186476, + "grad_norm": 0.8443967975032252, + "learning_rate": 9.524540898655685e-06, + "loss": 0.2292, + "step": 52560 + }, + { + "epoch": 0.6779857748086434, + "grad_norm": 0.8750759559495047, + "learning_rate": 9.524221513727776e-06, + "loss": 0.2314, + "step": 52570 + }, + { + "epoch": 0.6781147429986394, + "grad_norm": 0.8038852169144513, + "learning_rate": 9.523902026922684e-06, + "loss": 0.228, + "step": 52580 + }, + { + "epoch": 0.6782437111886354, + "grad_norm": 0.9304855915377364, + "learning_rate": 9.523582438247605e-06, + "loss": 0.2411, + "step": 52590 + }, + { + "epoch": 0.6783726793786312, + "grad_norm": 0.9494143448670872, + "learning_rate": 9.52326274770973e-06, + "loss": 0.2311, + "step": 52600 + }, + { + "epoch": 0.6785016475686272, + "grad_norm": 1.0035686857240413, + "learning_rate": 9.522942955316263e-06, + "loss": 0.2407, + "step": 52610 + }, + { + "epoch": 0.6786306157586232, + "grad_norm": 0.883075777661634, + "learning_rate": 9.522623061074403e-06, + "loss": 0.2316, + "step": 52620 + }, + { + "epoch": 0.6787595839486191, + "grad_norm": 0.8511464919783002, + "learning_rate": 9.522303064991354e-06, + "loss": 0.2554, + "step": 52630 + }, + { + "epoch": 0.678888552138615, + "grad_norm": 0.8265305920963024, + "learning_rate": 9.52198296707432e-06, + "loss": 0.2311, + "step": 52640 + }, + { + "epoch": 0.679017520328611, + "grad_norm": 0.8729637911506049, + "learning_rate": 9.521662767330511e-06, + "loss": 0.2356, + "step": 52650 + }, + { + "epoch": 0.6791464885186069, + "grad_norm": 0.8445378011579499, + "learning_rate": 9.521342465767139e-06, + "loss": 0.2358, + "step": 52660 + }, + { + "epoch": 0.6792754567086028, + "grad_norm": 0.9003667676198044, + "learning_rate": 9.521022062391411e-06, + "loss": 0.2472, + "step": 52670 + }, + { + "epoch": 0.6794044248985988, + "grad_norm": 0.9251413406028763, + "learning_rate": 9.520701557210548e-06, + "loss": 0.2205, + "step": 52680 + }, + { + "epoch": 0.6795333930885947, + "grad_norm": 0.8557559134049632, + "learning_rate": 9.520380950231762e-06, + "loss": 0.2302, + "step": 52690 + }, + { + "epoch": 0.6796623612785906, + "grad_norm": 0.9137876662757775, + "learning_rate": 9.520060241462277e-06, + "loss": 0.234, + "step": 52700 + }, + { + "epoch": 0.6797913294685866, + "grad_norm": 0.9662453814656075, + "learning_rate": 9.51973943090931e-06, + "loss": 0.2342, + "step": 52710 + }, + { + "epoch": 0.6799202976585825, + "grad_norm": 0.8701423163769121, + "learning_rate": 9.51941851858009e-06, + "loss": 0.2522, + "step": 52720 + }, + { + "epoch": 0.6800492658485785, + "grad_norm": 0.9004974510613387, + "learning_rate": 9.519097504481843e-06, + "loss": 0.2417, + "step": 52730 + }, + { + "epoch": 0.6801782340385744, + "grad_norm": 0.8960716515928835, + "learning_rate": 9.518776388621791e-06, + "loss": 0.2446, + "step": 52740 + }, + { + "epoch": 0.6803072022285703, + "grad_norm": 0.8723011593345732, + "learning_rate": 9.518455171007172e-06, + "loss": 0.2388, + "step": 52750 + }, + { + "epoch": 0.6804361704185663, + "grad_norm": 0.9119757891165217, + "learning_rate": 9.51813385164522e-06, + "loss": 0.2199, + "step": 52760 + }, + { + "epoch": 0.6805651386085622, + "grad_norm": 0.8619302924304567, + "learning_rate": 9.517812430543163e-06, + "loss": 0.2429, + "step": 52770 + }, + { + "epoch": 0.6806941067985581, + "grad_norm": 0.9372844817999921, + "learning_rate": 9.517490907708244e-06, + "loss": 0.2368, + "step": 52780 + }, + { + "epoch": 0.6808230749885541, + "grad_norm": 0.858058040322166, + "learning_rate": 9.517169283147705e-06, + "loss": 0.2418, + "step": 52790 + }, + { + "epoch": 0.68095204317855, + "grad_norm": 0.8836383069992723, + "learning_rate": 9.516847556868784e-06, + "loss": 0.2321, + "step": 52800 + }, + { + "epoch": 0.6810810113685459, + "grad_norm": 0.9299791823573831, + "learning_rate": 9.516525728878727e-06, + "loss": 0.2365, + "step": 52810 + }, + { + "epoch": 0.6812099795585419, + "grad_norm": 0.9370793739069542, + "learning_rate": 9.516203799184781e-06, + "loss": 0.2431, + "step": 52820 + }, + { + "epoch": 0.6813389477485379, + "grad_norm": 0.9899604221059272, + "learning_rate": 9.515881767794197e-06, + "loss": 0.2428, + "step": 52830 + }, + { + "epoch": 0.6814679159385337, + "grad_norm": 0.8698006001320953, + "learning_rate": 9.515559634714226e-06, + "loss": 0.2371, + "step": 52840 + }, + { + "epoch": 0.6815968841285297, + "grad_norm": 0.9075294145632578, + "learning_rate": 9.51523739995212e-06, + "loss": 0.2426, + "step": 52850 + }, + { + "epoch": 0.6817258523185257, + "grad_norm": 0.8207929340288487, + "learning_rate": 9.514915063515137e-06, + "loss": 0.2345, + "step": 52860 + }, + { + "epoch": 0.6818548205085215, + "grad_norm": 0.9009696095617594, + "learning_rate": 9.514592625410534e-06, + "loss": 0.2333, + "step": 52870 + }, + { + "epoch": 0.6819837886985175, + "grad_norm": 0.8956270055122696, + "learning_rate": 9.514270085645572e-06, + "loss": 0.2306, + "step": 52880 + }, + { + "epoch": 0.6821127568885135, + "grad_norm": 0.8779161077475721, + "learning_rate": 9.513947444227516e-06, + "loss": 0.2443, + "step": 52890 + }, + { + "epoch": 0.6822417250785093, + "grad_norm": 0.8677897102115498, + "learning_rate": 9.513624701163629e-06, + "loss": 0.242, + "step": 52900 + }, + { + "epoch": 0.6823706932685053, + "grad_norm": 0.8848882421825648, + "learning_rate": 9.513301856461178e-06, + "loss": 0.2498, + "step": 52910 + }, + { + "epoch": 0.6824996614585013, + "grad_norm": 0.8012327228231829, + "learning_rate": 9.512978910127435e-06, + "loss": 0.2356, + "step": 52920 + }, + { + "epoch": 0.6826286296484972, + "grad_norm": 0.9181802392330172, + "learning_rate": 9.512655862169671e-06, + "loss": 0.2436, + "step": 52930 + }, + { + "epoch": 0.6827575978384931, + "grad_norm": 0.7866163193892292, + "learning_rate": 9.512332712595162e-06, + "loss": 0.2378, + "step": 52940 + }, + { + "epoch": 0.6828865660284891, + "grad_norm": 0.9329199979258752, + "learning_rate": 9.512009461411182e-06, + "loss": 0.2351, + "step": 52950 + }, + { + "epoch": 0.683015534218485, + "grad_norm": 0.8598995214530636, + "learning_rate": 9.511686108625014e-06, + "loss": 0.2445, + "step": 52960 + }, + { + "epoch": 0.6831445024084809, + "grad_norm": 0.8917964208291426, + "learning_rate": 9.511362654243935e-06, + "loss": 0.2572, + "step": 52970 + }, + { + "epoch": 0.6832734705984769, + "grad_norm": 0.8603920614759589, + "learning_rate": 9.51103909827523e-06, + "loss": 0.2319, + "step": 52980 + }, + { + "epoch": 0.6834024387884728, + "grad_norm": 0.8802036637374135, + "learning_rate": 9.510715440726185e-06, + "loss": 0.246, + "step": 52990 + }, + { + "epoch": 0.6835314069784688, + "grad_norm": 0.9366639121333514, + "learning_rate": 9.51039168160409e-06, + "loss": 0.2515, + "step": 53000 + }, + { + "epoch": 0.6836603751684647, + "grad_norm": 0.8116965029726455, + "learning_rate": 9.510067820916233e-06, + "loss": 0.2455, + "step": 53010 + }, + { + "epoch": 0.6837893433584606, + "grad_norm": 0.9245392501773274, + "learning_rate": 9.509743858669908e-06, + "loss": 0.2419, + "step": 53020 + }, + { + "epoch": 0.6839183115484566, + "grad_norm": 0.8369485948203681, + "learning_rate": 9.50941979487241e-06, + "loss": 0.2344, + "step": 53030 + }, + { + "epoch": 0.6840472797384525, + "grad_norm": 0.8186464598555964, + "learning_rate": 9.509095629531036e-06, + "loss": 0.2226, + "step": 53040 + }, + { + "epoch": 0.6841762479284484, + "grad_norm": 0.8854313309048488, + "learning_rate": 9.508771362653084e-06, + "loss": 0.2427, + "step": 53050 + }, + { + "epoch": 0.6843052161184444, + "grad_norm": 0.935256318434807, + "learning_rate": 9.50844699424586e-06, + "loss": 0.25, + "step": 53060 + }, + { + "epoch": 0.6844341843084403, + "grad_norm": 0.8749121155193449, + "learning_rate": 9.508122524316665e-06, + "loss": 0.2299, + "step": 53070 + }, + { + "epoch": 0.6845631524984362, + "grad_norm": 0.9118477706902465, + "learning_rate": 9.507797952872806e-06, + "loss": 0.2377, + "step": 53080 + }, + { + "epoch": 0.6846921206884322, + "grad_norm": 0.9277788556087745, + "learning_rate": 9.507473279921593e-06, + "loss": 0.2367, + "step": 53090 + }, + { + "epoch": 0.6848210888784282, + "grad_norm": 0.9687406788311709, + "learning_rate": 9.507148505470334e-06, + "loss": 0.2425, + "step": 53100 + }, + { + "epoch": 0.684950057068424, + "grad_norm": 0.8285563364073857, + "learning_rate": 9.506823629526347e-06, + "loss": 0.2307, + "step": 53110 + }, + { + "epoch": 0.68507902525842, + "grad_norm": 0.9342301355860184, + "learning_rate": 9.506498652096943e-06, + "loss": 0.2325, + "step": 53120 + }, + { + "epoch": 0.685207993448416, + "grad_norm": 0.9423068154494705, + "learning_rate": 9.506173573189442e-06, + "loss": 0.249, + "step": 53130 + }, + { + "epoch": 0.6853369616384118, + "grad_norm": 0.9002681205268115, + "learning_rate": 9.505848392811163e-06, + "loss": 0.2358, + "step": 53140 + }, + { + "epoch": 0.6854659298284078, + "grad_norm": 1.0153717091178112, + "learning_rate": 9.505523110969433e-06, + "loss": 0.2565, + "step": 53150 + }, + { + "epoch": 0.6855948980184038, + "grad_norm": 0.952643526749477, + "learning_rate": 9.50519772767157e-06, + "loss": 0.2433, + "step": 53160 + }, + { + "epoch": 0.6857238662083996, + "grad_norm": 0.8830818287134754, + "learning_rate": 9.504872242924905e-06, + "loss": 0.2357, + "step": 53170 + }, + { + "epoch": 0.6858528343983956, + "grad_norm": 0.991518471078761, + "learning_rate": 9.504546656736768e-06, + "loss": 0.2518, + "step": 53180 + }, + { + "epoch": 0.6859818025883916, + "grad_norm": 0.9757270377474581, + "learning_rate": 9.504220969114489e-06, + "loss": 0.247, + "step": 53190 + }, + { + "epoch": 0.6861107707783876, + "grad_norm": 0.8430642236930143, + "learning_rate": 9.5038951800654e-06, + "loss": 0.2379, + "step": 53200 + }, + { + "epoch": 0.6862397389683834, + "grad_norm": 0.8877753957414459, + "learning_rate": 9.503569289596841e-06, + "loss": 0.2355, + "step": 53210 + }, + { + "epoch": 0.6863687071583794, + "grad_norm": 0.8669281046988402, + "learning_rate": 9.503243297716148e-06, + "loss": 0.2358, + "step": 53220 + }, + { + "epoch": 0.6864976753483754, + "grad_norm": 0.9229317500245809, + "learning_rate": 9.502917204430663e-06, + "loss": 0.2459, + "step": 53230 + }, + { + "epoch": 0.6866266435383712, + "grad_norm": 0.8512717125283873, + "learning_rate": 9.502591009747728e-06, + "loss": 0.2429, + "step": 53240 + }, + { + "epoch": 0.6867556117283672, + "grad_norm": 0.9021422138268167, + "learning_rate": 9.50226471367469e-06, + "loss": 0.2232, + "step": 53250 + }, + { + "epoch": 0.6868845799183632, + "grad_norm": 0.8622679884974129, + "learning_rate": 9.501938316218895e-06, + "loss": 0.2303, + "step": 53260 + }, + { + "epoch": 0.687013548108359, + "grad_norm": 1.0112374645384976, + "learning_rate": 9.501611817387693e-06, + "loss": 0.2476, + "step": 53270 + }, + { + "epoch": 0.687142516298355, + "grad_norm": 0.924516506104541, + "learning_rate": 9.501285217188437e-06, + "loss": 0.2409, + "step": 53280 + }, + { + "epoch": 0.687271484488351, + "grad_norm": 0.8784688610678706, + "learning_rate": 9.500958515628479e-06, + "loss": 0.2341, + "step": 53290 + }, + { + "epoch": 0.6874004526783469, + "grad_norm": 0.9283648619973242, + "learning_rate": 9.500631712715179e-06, + "loss": 0.2459, + "step": 53300 + }, + { + "epoch": 0.6875294208683428, + "grad_norm": 0.895956972931621, + "learning_rate": 9.500304808455894e-06, + "loss": 0.2492, + "step": 53310 + }, + { + "epoch": 0.6876583890583388, + "grad_norm": 0.8421378105280017, + "learning_rate": 9.499977802857986e-06, + "loss": 0.2245, + "step": 53320 + }, + { + "epoch": 0.6877873572483347, + "grad_norm": 0.9191315328109606, + "learning_rate": 9.499650695928818e-06, + "loss": 0.2451, + "step": 53330 + }, + { + "epoch": 0.6879163254383306, + "grad_norm": 0.8876625951995073, + "learning_rate": 9.499323487675759e-06, + "loss": 0.2551, + "step": 53340 + }, + { + "epoch": 0.6880452936283266, + "grad_norm": 0.9248764947258501, + "learning_rate": 9.49899617810617e-06, + "loss": 0.2309, + "step": 53350 + }, + { + "epoch": 0.6881742618183225, + "grad_norm": 0.9201078107538087, + "learning_rate": 9.498668767227431e-06, + "loss": 0.2386, + "step": 53360 + }, + { + "epoch": 0.6883032300083185, + "grad_norm": 0.8310923448815831, + "learning_rate": 9.498341255046905e-06, + "loss": 0.2393, + "step": 53370 + }, + { + "epoch": 0.6884321981983144, + "grad_norm": 0.9292447444567125, + "learning_rate": 9.498013641571974e-06, + "loss": 0.2402, + "step": 53380 + }, + { + "epoch": 0.6885611663883103, + "grad_norm": 0.8852690364396573, + "learning_rate": 9.497685926810011e-06, + "loss": 0.2358, + "step": 53390 + }, + { + "epoch": 0.6886901345783063, + "grad_norm": 0.9357707849091914, + "learning_rate": 9.4973581107684e-06, + "loss": 0.2373, + "step": 53400 + }, + { + "epoch": 0.6888191027683022, + "grad_norm": 0.9545727799377588, + "learning_rate": 9.497030193454517e-06, + "loss": 0.2357, + "step": 53410 + }, + { + "epoch": 0.6889480709582981, + "grad_norm": 1.0720388596844248, + "learning_rate": 9.496702174875751e-06, + "loss": 0.245, + "step": 53420 + }, + { + "epoch": 0.6890770391482941, + "grad_norm": 0.9269112459740508, + "learning_rate": 9.496374055039486e-06, + "loss": 0.2415, + "step": 53430 + }, + { + "epoch": 0.68920600733829, + "grad_norm": 0.9066562322138593, + "learning_rate": 9.496045833953112e-06, + "loss": 0.2294, + "step": 53440 + }, + { + "epoch": 0.6893349755282859, + "grad_norm": 0.8993278443153462, + "learning_rate": 9.495717511624019e-06, + "loss": 0.2322, + "step": 53450 + }, + { + "epoch": 0.6894639437182819, + "grad_norm": 0.8467760341724087, + "learning_rate": 9.4953890880596e-06, + "loss": 0.2422, + "step": 53460 + }, + { + "epoch": 0.6895929119082779, + "grad_norm": 0.7440419477090994, + "learning_rate": 9.495060563267251e-06, + "loss": 0.2478, + "step": 53470 + }, + { + "epoch": 0.6897218800982737, + "grad_norm": 0.8904597545895819, + "learning_rate": 9.49473193725437e-06, + "loss": 0.2253, + "step": 53480 + }, + { + "epoch": 0.6898508482882697, + "grad_norm": 0.9665031321181609, + "learning_rate": 9.494403210028355e-06, + "loss": 0.2496, + "step": 53490 + }, + { + "epoch": 0.6899798164782657, + "grad_norm": 0.8926849053736533, + "learning_rate": 9.494074381596613e-06, + "loss": 0.2399, + "step": 53500 + }, + { + "epoch": 0.6901087846682615, + "grad_norm": 0.9334260695755693, + "learning_rate": 9.493745451966543e-06, + "loss": 0.2507, + "step": 53510 + }, + { + "epoch": 0.6902377528582575, + "grad_norm": 0.8632798126160822, + "learning_rate": 9.493416421145556e-06, + "loss": 0.2301, + "step": 53520 + }, + { + "epoch": 0.6903667210482535, + "grad_norm": 0.958049388878638, + "learning_rate": 9.493087289141061e-06, + "loss": 0.2335, + "step": 53530 + }, + { + "epoch": 0.6904956892382493, + "grad_norm": 0.876574981526435, + "learning_rate": 9.492758055960467e-06, + "loss": 0.2248, + "step": 53540 + }, + { + "epoch": 0.6906246574282453, + "grad_norm": 0.8795204508655735, + "learning_rate": 9.492428721611187e-06, + "loss": 0.2425, + "step": 53550 + }, + { + "epoch": 0.6907536256182413, + "grad_norm": 0.8941028814740644, + "learning_rate": 9.49209928610064e-06, + "loss": 0.2301, + "step": 53560 + }, + { + "epoch": 0.6908825938082372, + "grad_norm": 0.9638689649658893, + "learning_rate": 9.491769749436245e-06, + "loss": 0.2344, + "step": 53570 + }, + { + "epoch": 0.6910115619982331, + "grad_norm": 0.8440746176907017, + "learning_rate": 9.491440111625418e-06, + "loss": 0.2345, + "step": 53580 + }, + { + "epoch": 0.6911405301882291, + "grad_norm": 0.985482272189501, + "learning_rate": 9.491110372675586e-06, + "loss": 0.2443, + "step": 53590 + }, + { + "epoch": 0.691269498378225, + "grad_norm": 0.848872499506119, + "learning_rate": 9.490780532594174e-06, + "loss": 0.2329, + "step": 53600 + }, + { + "epoch": 0.6913984665682209, + "grad_norm": 0.9179759301234729, + "learning_rate": 9.490450591388607e-06, + "loss": 0.2586, + "step": 53610 + }, + { + "epoch": 0.6915274347582169, + "grad_norm": 0.8396009138162421, + "learning_rate": 9.490120549066317e-06, + "loss": 0.2446, + "step": 53620 + }, + { + "epoch": 0.6916564029482128, + "grad_norm": 0.9159512513430202, + "learning_rate": 9.489790405634733e-06, + "loss": 0.2361, + "step": 53630 + }, + { + "epoch": 0.6917853711382088, + "grad_norm": 0.9440704183527486, + "learning_rate": 9.489460161101291e-06, + "loss": 0.2363, + "step": 53640 + }, + { + "epoch": 0.6919143393282047, + "grad_norm": 0.9526364218888405, + "learning_rate": 9.489129815473428e-06, + "loss": 0.2296, + "step": 53650 + }, + { + "epoch": 0.6920433075182006, + "grad_norm": 0.856436922602491, + "learning_rate": 9.488799368758584e-06, + "loss": 0.2378, + "step": 53660 + }, + { + "epoch": 0.6921722757081966, + "grad_norm": 0.873806959764176, + "learning_rate": 9.488468820964195e-06, + "loss": 0.2454, + "step": 53670 + }, + { + "epoch": 0.6923012438981925, + "grad_norm": 0.984733761277186, + "learning_rate": 9.48813817209771e-06, + "loss": 0.2283, + "step": 53680 + }, + { + "epoch": 0.6924302120881884, + "grad_norm": 0.9096022863464184, + "learning_rate": 9.48780742216657e-06, + "loss": 0.2434, + "step": 53690 + }, + { + "epoch": 0.6925591802781844, + "grad_norm": 0.8431293984777597, + "learning_rate": 9.487476571178228e-06, + "loss": 0.231, + "step": 53700 + }, + { + "epoch": 0.6926881484681803, + "grad_norm": 0.8823928012494732, + "learning_rate": 9.48714561914013e-06, + "loss": 0.2375, + "step": 53710 + }, + { + "epoch": 0.6928171166581762, + "grad_norm": 0.8291029448430021, + "learning_rate": 9.486814566059729e-06, + "loss": 0.2368, + "step": 53720 + }, + { + "epoch": 0.6929460848481722, + "grad_norm": 0.8941452903232883, + "learning_rate": 9.48648341194448e-06, + "loss": 0.2378, + "step": 53730 + }, + { + "epoch": 0.6930750530381682, + "grad_norm": 0.9546738353258973, + "learning_rate": 9.486152156801842e-06, + "loss": 0.2594, + "step": 53740 + }, + { + "epoch": 0.693204021228164, + "grad_norm": 1.057998288901822, + "learning_rate": 9.485820800639273e-06, + "loss": 0.2431, + "step": 53750 + }, + { + "epoch": 0.69333298941816, + "grad_norm": 0.9514715694183501, + "learning_rate": 9.485489343464234e-06, + "loss": 0.2449, + "step": 53760 + }, + { + "epoch": 0.693461957608156, + "grad_norm": 0.8622099353367069, + "learning_rate": 9.485157785284187e-06, + "loss": 0.2371, + "step": 53770 + }, + { + "epoch": 0.6935909257981518, + "grad_norm": 0.9178171797478271, + "learning_rate": 9.484826126106603e-06, + "loss": 0.2349, + "step": 53780 + }, + { + "epoch": 0.6937198939881478, + "grad_norm": 0.8914448926474192, + "learning_rate": 9.484494365938946e-06, + "loss": 0.2309, + "step": 53790 + }, + { + "epoch": 0.6938488621781438, + "grad_norm": 0.9138815150926108, + "learning_rate": 9.484162504788688e-06, + "loss": 0.2353, + "step": 53800 + }, + { + "epoch": 0.6939778303681396, + "grad_norm": 0.8558528013082893, + "learning_rate": 9.483830542663302e-06, + "loss": 0.238, + "step": 53810 + }, + { + "epoch": 0.6941067985581356, + "grad_norm": 0.8136400487783707, + "learning_rate": 9.483498479570262e-06, + "loss": 0.2223, + "step": 53820 + }, + { + "epoch": 0.6942357667481316, + "grad_norm": 0.8599635405199738, + "learning_rate": 9.483166315517048e-06, + "loss": 0.2221, + "step": 53830 + }, + { + "epoch": 0.6943647349381276, + "grad_norm": 0.8458129096731724, + "learning_rate": 9.482834050511138e-06, + "loss": 0.2448, + "step": 53840 + }, + { + "epoch": 0.6944937031281234, + "grad_norm": 0.8758721175857371, + "learning_rate": 9.482501684560014e-06, + "loss": 0.236, + "step": 53850 + }, + { + "epoch": 0.6946226713181194, + "grad_norm": 0.8703743471660373, + "learning_rate": 9.482169217671163e-06, + "loss": 0.228, + "step": 53860 + }, + { + "epoch": 0.6947516395081154, + "grad_norm": 0.8777797938567082, + "learning_rate": 9.481836649852066e-06, + "loss": 0.2409, + "step": 53870 + }, + { + "epoch": 0.6948806076981112, + "grad_norm": 0.8622582799157603, + "learning_rate": 9.481503981110216e-06, + "loss": 0.2366, + "step": 53880 + }, + { + "epoch": 0.6950095758881072, + "grad_norm": 0.9661546066925887, + "learning_rate": 9.481171211453104e-06, + "loss": 0.2353, + "step": 53890 + }, + { + "epoch": 0.6951385440781032, + "grad_norm": 0.8622170357349627, + "learning_rate": 9.480838340888221e-06, + "loss": 0.2296, + "step": 53900 + }, + { + "epoch": 0.695267512268099, + "grad_norm": 0.8830014829548897, + "learning_rate": 9.480505369423066e-06, + "loss": 0.2469, + "step": 53910 + }, + { + "epoch": 0.695396480458095, + "grad_norm": 0.8215437372969479, + "learning_rate": 9.480172297065135e-06, + "loss": 0.2379, + "step": 53920 + }, + { + "epoch": 0.695525448648091, + "grad_norm": 0.8400839861352654, + "learning_rate": 9.479839123821926e-06, + "loss": 0.2427, + "step": 53930 + }, + { + "epoch": 0.6956544168380869, + "grad_norm": 0.8806464715266805, + "learning_rate": 9.479505849700945e-06, + "loss": 0.2283, + "step": 53940 + }, + { + "epoch": 0.6957833850280828, + "grad_norm": 0.9019649154530045, + "learning_rate": 9.479172474709694e-06, + "loss": 0.241, + "step": 53950 + }, + { + "epoch": 0.6959123532180788, + "grad_norm": 0.8958089002107154, + "learning_rate": 9.478838998855684e-06, + "loss": 0.2463, + "step": 53960 + }, + { + "epoch": 0.6960413214080747, + "grad_norm": 0.8752401737323753, + "learning_rate": 9.47850542214642e-06, + "loss": 0.2354, + "step": 53970 + }, + { + "epoch": 0.6961702895980706, + "grad_norm": 0.8439658569141825, + "learning_rate": 9.478171744589416e-06, + "loss": 0.2351, + "step": 53980 + }, + { + "epoch": 0.6962992577880666, + "grad_norm": 0.851097240384946, + "learning_rate": 9.477837966192182e-06, + "loss": 0.2396, + "step": 53990 + }, + { + "epoch": 0.6964282259780625, + "grad_norm": 0.9557118630158318, + "learning_rate": 9.47750408696224e-06, + "loss": 0.233, + "step": 54000 + }, + { + "epoch": 0.6965571941680585, + "grad_norm": 0.9530627040555615, + "learning_rate": 9.477170106907104e-06, + "loss": 0.2311, + "step": 54010 + }, + { + "epoch": 0.6966861623580544, + "grad_norm": 0.8649612100790364, + "learning_rate": 9.476836026034297e-06, + "loss": 0.2362, + "step": 54020 + }, + { + "epoch": 0.6968151305480503, + "grad_norm": 0.8804777333124739, + "learning_rate": 9.47650184435134e-06, + "loss": 0.2521, + "step": 54030 + }, + { + "epoch": 0.6969440987380463, + "grad_norm": 0.8394567662095948, + "learning_rate": 9.476167561865759e-06, + "loss": 0.2361, + "step": 54040 + }, + { + "epoch": 0.6970730669280422, + "grad_norm": 0.9075229837277453, + "learning_rate": 9.475833178585083e-06, + "loss": 0.2427, + "step": 54050 + }, + { + "epoch": 0.6972020351180381, + "grad_norm": 0.9144635047547038, + "learning_rate": 9.475498694516837e-06, + "loss": 0.2502, + "step": 54060 + }, + { + "epoch": 0.6973310033080341, + "grad_norm": 0.8536592746697917, + "learning_rate": 9.47516410966856e-06, + "loss": 0.2315, + "step": 54070 + }, + { + "epoch": 0.69745997149803, + "grad_norm": 0.8372016040020125, + "learning_rate": 9.47482942404778e-06, + "loss": 0.2379, + "step": 54080 + }, + { + "epoch": 0.6975889396880259, + "grad_norm": 0.8648953879975232, + "learning_rate": 9.474494637662038e-06, + "loss": 0.2352, + "step": 54090 + }, + { + "epoch": 0.6977179078780219, + "grad_norm": 0.9253738526345504, + "learning_rate": 9.474159750518868e-06, + "loss": 0.2447, + "step": 54100 + }, + { + "epoch": 0.6978468760680179, + "grad_norm": 0.8429842290779659, + "learning_rate": 9.473824762625814e-06, + "loss": 0.2386, + "step": 54110 + }, + { + "epoch": 0.6979758442580137, + "grad_norm": 0.9270776749772068, + "learning_rate": 9.47348967399042e-06, + "loss": 0.2478, + "step": 54120 + }, + { + "epoch": 0.6981048124480097, + "grad_norm": 0.8557458217896448, + "learning_rate": 9.473154484620231e-06, + "loss": 0.2372, + "step": 54130 + }, + { + "epoch": 0.6982337806380057, + "grad_norm": 0.8862173563480443, + "learning_rate": 9.472819194522794e-06, + "loss": 0.2396, + "step": 54140 + }, + { + "epoch": 0.6983627488280015, + "grad_norm": 0.9059325457165815, + "learning_rate": 9.472483803705658e-06, + "loss": 0.2369, + "step": 54150 + }, + { + "epoch": 0.6984917170179975, + "grad_norm": 0.8488862406449571, + "learning_rate": 9.47214831217638e-06, + "loss": 0.245, + "step": 54160 + }, + { + "epoch": 0.6986206852079935, + "grad_norm": 0.8474843652474465, + "learning_rate": 9.47181271994251e-06, + "loss": 0.2399, + "step": 54170 + }, + { + "epoch": 0.6987496533979893, + "grad_norm": 0.8523315528232097, + "learning_rate": 9.471477027011606e-06, + "loss": 0.2351, + "step": 54180 + }, + { + "epoch": 0.6988786215879853, + "grad_norm": 0.8923929199932776, + "learning_rate": 9.471141233391228e-06, + "loss": 0.2326, + "step": 54190 + }, + { + "epoch": 0.6990075897779813, + "grad_norm": 0.9487142938580257, + "learning_rate": 9.470805339088937e-06, + "loss": 0.2368, + "step": 54200 + }, + { + "epoch": 0.6991365579679772, + "grad_norm": 0.9596830763500188, + "learning_rate": 9.470469344112299e-06, + "loss": 0.2432, + "step": 54210 + }, + { + "epoch": 0.6992655261579731, + "grad_norm": 0.919976535245435, + "learning_rate": 9.470133248468876e-06, + "loss": 0.2366, + "step": 54220 + }, + { + "epoch": 0.6993944943479691, + "grad_norm": 0.9126750822571975, + "learning_rate": 9.469797052166239e-06, + "loss": 0.237, + "step": 54230 + }, + { + "epoch": 0.699523462537965, + "grad_norm": 0.8666718366180448, + "learning_rate": 9.469460755211957e-06, + "loss": 0.234, + "step": 54240 + }, + { + "epoch": 0.6996524307279609, + "grad_norm": 1.0290733754078938, + "learning_rate": 9.469124357613603e-06, + "loss": 0.2374, + "step": 54250 + }, + { + "epoch": 0.6997813989179569, + "grad_norm": 0.9023962162988867, + "learning_rate": 9.468787859378753e-06, + "loss": 0.2298, + "step": 54260 + }, + { + "epoch": 0.6999103671079528, + "grad_norm": 0.787768946262892, + "learning_rate": 9.468451260514983e-06, + "loss": 0.2263, + "step": 54270 + }, + { + "epoch": 0.7000393352979487, + "grad_norm": 0.9261613258035087, + "learning_rate": 9.468114561029876e-06, + "loss": 0.2428, + "step": 54280 + }, + { + "epoch": 0.7001683034879447, + "grad_norm": 0.832327984344883, + "learning_rate": 9.46777776093101e-06, + "loss": 0.2324, + "step": 54290 + }, + { + "epoch": 0.7002972716779406, + "grad_norm": 0.8270242894836063, + "learning_rate": 9.46744086022597e-06, + "loss": 0.231, + "step": 54300 + }, + { + "epoch": 0.7004262398679366, + "grad_norm": 0.8503664069847656, + "learning_rate": 9.467103858922342e-06, + "loss": 0.2348, + "step": 54310 + }, + { + "epoch": 0.7005552080579325, + "grad_norm": 0.8931439408211121, + "learning_rate": 9.466766757027717e-06, + "loss": 0.2341, + "step": 54320 + }, + { + "epoch": 0.7006841762479284, + "grad_norm": 0.9251579966234721, + "learning_rate": 9.466429554549683e-06, + "loss": 0.2312, + "step": 54330 + }, + { + "epoch": 0.7008131444379244, + "grad_norm": 0.9360920887724532, + "learning_rate": 9.466092251495836e-06, + "loss": 0.235, + "step": 54340 + }, + { + "epoch": 0.7009421126279203, + "grad_norm": 0.8945455374199108, + "learning_rate": 9.465754847873769e-06, + "loss": 0.2407, + "step": 54350 + }, + { + "epoch": 0.7010710808179162, + "grad_norm": 0.843983989658587, + "learning_rate": 9.465417343691081e-06, + "loss": 0.2316, + "step": 54360 + }, + { + "epoch": 0.7012000490079122, + "grad_norm": 0.8895062754836082, + "learning_rate": 9.465079738955371e-06, + "loss": 0.2266, + "step": 54370 + }, + { + "epoch": 0.7013290171979082, + "grad_norm": 0.9321851510626427, + "learning_rate": 9.464742033674243e-06, + "loss": 0.2259, + "step": 54380 + }, + { + "epoch": 0.701457985387904, + "grad_norm": 0.9445353433714231, + "learning_rate": 9.464404227855301e-06, + "loss": 0.2461, + "step": 54390 + }, + { + "epoch": 0.7015869535779, + "grad_norm": 0.9430891942503058, + "learning_rate": 9.464066321506151e-06, + "loss": 0.2319, + "step": 54400 + }, + { + "epoch": 0.701715921767896, + "grad_norm": 0.8010875933057168, + "learning_rate": 9.463728314634402e-06, + "loss": 0.243, + "step": 54410 + }, + { + "epoch": 0.7018448899578918, + "grad_norm": 0.8160944970967062, + "learning_rate": 9.463390207247665e-06, + "loss": 0.2305, + "step": 54420 + }, + { + "epoch": 0.7019738581478878, + "grad_norm": 0.8900275878158141, + "learning_rate": 9.463051999353554e-06, + "loss": 0.2319, + "step": 54430 + }, + { + "epoch": 0.7021028263378838, + "grad_norm": 0.9048629171384548, + "learning_rate": 9.462713690959685e-06, + "loss": 0.2349, + "step": 54440 + }, + { + "epoch": 0.7022317945278796, + "grad_norm": 0.9646193748929186, + "learning_rate": 9.462375282073677e-06, + "loss": 0.2267, + "step": 54450 + }, + { + "epoch": 0.7023607627178756, + "grad_norm": 0.8487111933847671, + "learning_rate": 9.462036772703148e-06, + "loss": 0.2299, + "step": 54460 + }, + { + "epoch": 0.7024897309078716, + "grad_norm": 0.882937418407942, + "learning_rate": 9.461698162855722e-06, + "loss": 0.2507, + "step": 54470 + }, + { + "epoch": 0.7026186990978676, + "grad_norm": 0.8144225959173276, + "learning_rate": 9.461359452539024e-06, + "loss": 0.2408, + "step": 54480 + }, + { + "epoch": 0.7027476672878634, + "grad_norm": 0.848662959318541, + "learning_rate": 9.461020641760682e-06, + "loss": 0.2389, + "step": 54490 + }, + { + "epoch": 0.7028766354778594, + "grad_norm": 0.934919057725987, + "learning_rate": 9.460681730528324e-06, + "loss": 0.2343, + "step": 54500 + }, + { + "epoch": 0.7030056036678554, + "grad_norm": 0.8572965111326941, + "learning_rate": 9.460342718849583e-06, + "loss": 0.2296, + "step": 54510 + }, + { + "epoch": 0.7031345718578512, + "grad_norm": 0.8740657431287473, + "learning_rate": 9.46000360673209e-06, + "loss": 0.2533, + "step": 54520 + }, + { + "epoch": 0.7032635400478472, + "grad_norm": 0.9654675019875547, + "learning_rate": 9.459664394183484e-06, + "loss": 0.227, + "step": 54530 + }, + { + "epoch": 0.7033925082378432, + "grad_norm": 0.9758674573186766, + "learning_rate": 9.4593250812114e-06, + "loss": 0.247, + "step": 54540 + }, + { + "epoch": 0.703521476427839, + "grad_norm": 0.8398503181737967, + "learning_rate": 9.458985667823486e-06, + "loss": 0.2471, + "step": 54550 + }, + { + "epoch": 0.703650444617835, + "grad_norm": 0.9198967142070241, + "learning_rate": 9.458646154027379e-06, + "loss": 0.2371, + "step": 54560 + }, + { + "epoch": 0.703779412807831, + "grad_norm": 0.8816464170817072, + "learning_rate": 9.458306539830723e-06, + "loss": 0.2418, + "step": 54570 + }, + { + "epoch": 0.7039083809978269, + "grad_norm": 0.8815133046992973, + "learning_rate": 9.45796682524117e-06, + "loss": 0.2334, + "step": 54580 + }, + { + "epoch": 0.7040373491878228, + "grad_norm": 0.8395057706288005, + "learning_rate": 9.457627010266366e-06, + "loss": 0.2434, + "step": 54590 + }, + { + "epoch": 0.7041663173778188, + "grad_norm": 0.868464802147145, + "learning_rate": 9.457287094913968e-06, + "loss": 0.2374, + "step": 54600 + }, + { + "epoch": 0.7042952855678147, + "grad_norm": 0.8413582346015789, + "learning_rate": 9.456947079191624e-06, + "loss": 0.2362, + "step": 54610 + }, + { + "epoch": 0.7044242537578106, + "grad_norm": 0.8191684987902954, + "learning_rate": 9.456606963106993e-06, + "loss": 0.2417, + "step": 54620 + }, + { + "epoch": 0.7045532219478066, + "grad_norm": 0.8706272438262935, + "learning_rate": 9.456266746667737e-06, + "loss": 0.2452, + "step": 54630 + }, + { + "epoch": 0.7046821901378025, + "grad_norm": 0.9373177390884875, + "learning_rate": 9.455926429881513e-06, + "loss": 0.2247, + "step": 54640 + }, + { + "epoch": 0.7048111583277985, + "grad_norm": 0.812741207769479, + "learning_rate": 9.455586012755986e-06, + "loss": 0.2279, + "step": 54650 + }, + { + "epoch": 0.7049401265177944, + "grad_norm": 0.8024492561626514, + "learning_rate": 9.455245495298821e-06, + "loss": 0.2333, + "step": 54660 + }, + { + "epoch": 0.7050690947077903, + "grad_norm": 0.9781525985506861, + "learning_rate": 9.454904877517686e-06, + "loss": 0.2433, + "step": 54670 + }, + { + "epoch": 0.7051980628977863, + "grad_norm": 0.9056373293781252, + "learning_rate": 9.454564159420251e-06, + "loss": 0.2303, + "step": 54680 + }, + { + "epoch": 0.7053270310877822, + "grad_norm": 0.9385016036656852, + "learning_rate": 9.45422334101419e-06, + "loss": 0.2452, + "step": 54690 + }, + { + "epoch": 0.7054559992777781, + "grad_norm": 0.9591478870607084, + "learning_rate": 9.453882422307175e-06, + "loss": 0.2385, + "step": 54700 + }, + { + "epoch": 0.7055849674677741, + "grad_norm": 0.8780166481713158, + "learning_rate": 9.453541403306883e-06, + "loss": 0.24, + "step": 54710 + }, + { + "epoch": 0.70571393565777, + "grad_norm": 0.8834077024619584, + "learning_rate": 9.453200284020996e-06, + "loss": 0.229, + "step": 54720 + }, + { + "epoch": 0.7058429038477659, + "grad_norm": 0.931739807603835, + "learning_rate": 9.452859064457193e-06, + "loss": 0.2399, + "step": 54730 + }, + { + "epoch": 0.7059718720377619, + "grad_norm": 0.9027191814076818, + "learning_rate": 9.452517744623157e-06, + "loss": 0.2246, + "step": 54740 + }, + { + "epoch": 0.7061008402277579, + "grad_norm": 0.8540673416105578, + "learning_rate": 9.452176324526576e-06, + "loss": 0.2362, + "step": 54750 + }, + { + "epoch": 0.7062298084177537, + "grad_norm": 0.9147120638993591, + "learning_rate": 9.451834804175138e-06, + "loss": 0.2332, + "step": 54760 + }, + { + "epoch": 0.7063587766077497, + "grad_norm": 0.8541716768740397, + "learning_rate": 9.451493183576533e-06, + "loss": 0.2305, + "step": 54770 + }, + { + "epoch": 0.7064877447977457, + "grad_norm": 0.9751909083304092, + "learning_rate": 9.45115146273845e-06, + "loss": 0.2468, + "step": 54780 + }, + { + "epoch": 0.7066167129877415, + "grad_norm": 0.9093456405702923, + "learning_rate": 9.45080964166859e-06, + "loss": 0.2315, + "step": 54790 + }, + { + "epoch": 0.7067456811777375, + "grad_norm": 0.8218756980190077, + "learning_rate": 9.450467720374647e-06, + "loss": 0.2316, + "step": 54800 + }, + { + "epoch": 0.7068746493677335, + "grad_norm": 0.9099031834972741, + "learning_rate": 9.450125698864321e-06, + "loss": 0.2354, + "step": 54810 + }, + { + "epoch": 0.7070036175577293, + "grad_norm": 0.871926547893149, + "learning_rate": 9.449783577145313e-06, + "loss": 0.2336, + "step": 54820 + }, + { + "epoch": 0.7071325857477253, + "grad_norm": 0.8629206583825104, + "learning_rate": 9.449441355225327e-06, + "loss": 0.238, + "step": 54830 + }, + { + "epoch": 0.7072615539377213, + "grad_norm": 0.888663562930502, + "learning_rate": 9.449099033112071e-06, + "loss": 0.2319, + "step": 54840 + }, + { + "epoch": 0.7073905221277172, + "grad_norm": 0.8714233918067481, + "learning_rate": 9.44875661081325e-06, + "loss": 0.2321, + "step": 54850 + }, + { + "epoch": 0.7075194903177131, + "grad_norm": 0.9368955814010508, + "learning_rate": 9.448414088336579e-06, + "loss": 0.2269, + "step": 54860 + }, + { + "epoch": 0.7076484585077091, + "grad_norm": 0.9131764497036859, + "learning_rate": 9.448071465689767e-06, + "loss": 0.2453, + "step": 54870 + }, + { + "epoch": 0.707777426697705, + "grad_norm": 0.8556464092754466, + "learning_rate": 9.447728742880534e-06, + "loss": 0.2431, + "step": 54880 + }, + { + "epoch": 0.7079063948877009, + "grad_norm": 0.8908422088546448, + "learning_rate": 9.447385919916591e-06, + "loss": 0.2389, + "step": 54890 + }, + { + "epoch": 0.7080353630776969, + "grad_norm": 0.9071004089701594, + "learning_rate": 9.447042996805665e-06, + "loss": 0.2479, + "step": 54900 + }, + { + "epoch": 0.7081643312676928, + "grad_norm": 0.7804860763326333, + "learning_rate": 9.446699973555471e-06, + "loss": 0.224, + "step": 54910 + }, + { + "epoch": 0.7082932994576887, + "grad_norm": 0.836551080581491, + "learning_rate": 9.44635685017374e-06, + "loss": 0.243, + "step": 54920 + }, + { + "epoch": 0.7084222676476847, + "grad_norm": 0.8606304468871055, + "learning_rate": 9.446013626668193e-06, + "loss": 0.22, + "step": 54930 + }, + { + "epoch": 0.7085512358376806, + "grad_norm": 0.8732901227354718, + "learning_rate": 9.44567030304656e-06, + "loss": 0.2317, + "step": 54940 + }, + { + "epoch": 0.7086802040276766, + "grad_norm": 0.8319268959753415, + "learning_rate": 9.445326879316575e-06, + "loss": 0.2327, + "step": 54950 + }, + { + "epoch": 0.7088091722176725, + "grad_norm": 0.7890596000733071, + "learning_rate": 9.444983355485967e-06, + "loss": 0.2293, + "step": 54960 + }, + { + "epoch": 0.7089381404076684, + "grad_norm": 0.9022087165015088, + "learning_rate": 9.444639731562474e-06, + "loss": 0.2442, + "step": 54970 + }, + { + "epoch": 0.7090671085976644, + "grad_norm": 0.8657654022220619, + "learning_rate": 9.444296007553836e-06, + "loss": 0.2504, + "step": 54980 + }, + { + "epoch": 0.7091960767876603, + "grad_norm": 0.9276831258199102, + "learning_rate": 9.443952183467789e-06, + "loss": 0.2404, + "step": 54990 + }, + { + "epoch": 0.7093250449776562, + "grad_norm": 0.8872801740252896, + "learning_rate": 9.443608259312077e-06, + "loss": 0.2449, + "step": 55000 + }, + { + "epoch": 0.7094540131676522, + "grad_norm": 0.8812553822689387, + "learning_rate": 9.443264235094443e-06, + "loss": 0.2341, + "step": 55010 + }, + { + "epoch": 0.7095829813576482, + "grad_norm": 0.8759852613225186, + "learning_rate": 9.442920110822637e-06, + "loss": 0.2524, + "step": 55020 + }, + { + "epoch": 0.709711949547644, + "grad_norm": 0.7990934550829822, + "learning_rate": 9.442575886504405e-06, + "loss": 0.2487, + "step": 55030 + }, + { + "epoch": 0.70984091773764, + "grad_norm": 0.8869967411495393, + "learning_rate": 9.442231562147499e-06, + "loss": 0.2478, + "step": 55040 + }, + { + "epoch": 0.709969885927636, + "grad_norm": 0.8486863695550317, + "learning_rate": 9.441887137759675e-06, + "loss": 0.2337, + "step": 55050 + }, + { + "epoch": 0.7100988541176319, + "grad_norm": 0.910680430701159, + "learning_rate": 9.441542613348684e-06, + "loss": 0.2343, + "step": 55060 + }, + { + "epoch": 0.7102278223076278, + "grad_norm": 0.8648050535378821, + "learning_rate": 9.441197988922291e-06, + "loss": 0.2341, + "step": 55070 + }, + { + "epoch": 0.7103567904976238, + "grad_norm": 0.8527080969895429, + "learning_rate": 9.440853264488248e-06, + "loss": 0.222, + "step": 55080 + }, + { + "epoch": 0.7104857586876197, + "grad_norm": 0.8609689769420318, + "learning_rate": 9.440508440054323e-06, + "loss": 0.2308, + "step": 55090 + }, + { + "epoch": 0.7106147268776156, + "grad_norm": 0.8400571483465168, + "learning_rate": 9.440163515628281e-06, + "loss": 0.2229, + "step": 55100 + }, + { + "epoch": 0.7107436950676116, + "grad_norm": 0.963756199876831, + "learning_rate": 9.439818491217887e-06, + "loss": 0.258, + "step": 55110 + }, + { + "epoch": 0.7108726632576076, + "grad_norm": 0.9152587013655649, + "learning_rate": 9.43947336683091e-06, + "loss": 0.2451, + "step": 55120 + }, + { + "epoch": 0.7110016314476034, + "grad_norm": 0.8761153389752879, + "learning_rate": 9.439128142475125e-06, + "loss": 0.232, + "step": 55130 + }, + { + "epoch": 0.7111305996375994, + "grad_norm": 0.9627219516207556, + "learning_rate": 9.438782818158301e-06, + "loss": 0.2352, + "step": 55140 + }, + { + "epoch": 0.7112595678275954, + "grad_norm": 0.8117517339364618, + "learning_rate": 9.438437393888217e-06, + "loss": 0.2301, + "step": 55150 + }, + { + "epoch": 0.7113885360175912, + "grad_norm": 0.9178508087799063, + "learning_rate": 9.438091869672651e-06, + "loss": 0.236, + "step": 55160 + }, + { + "epoch": 0.7115175042075872, + "grad_norm": 0.9669722516360605, + "learning_rate": 9.437746245519383e-06, + "loss": 0.2364, + "step": 55170 + }, + { + "epoch": 0.7116464723975832, + "grad_norm": 0.9288408209405901, + "learning_rate": 9.437400521436197e-06, + "loss": 0.2421, + "step": 55180 + }, + { + "epoch": 0.711775440587579, + "grad_norm": 0.98371073390106, + "learning_rate": 9.437054697430876e-06, + "loss": 0.2499, + "step": 55190 + }, + { + "epoch": 0.711904408777575, + "grad_norm": 0.8433581729475887, + "learning_rate": 9.43670877351121e-06, + "loss": 0.2433, + "step": 55200 + }, + { + "epoch": 0.712033376967571, + "grad_norm": 0.8361294483076238, + "learning_rate": 9.436362749684986e-06, + "loss": 0.2341, + "step": 55210 + }, + { + "epoch": 0.7121623451575669, + "grad_norm": 0.9232788928817993, + "learning_rate": 9.436016625959997e-06, + "loss": 0.2212, + "step": 55220 + }, + { + "epoch": 0.7122913133475628, + "grad_norm": 0.8450833476851255, + "learning_rate": 9.435670402344038e-06, + "loss": 0.2225, + "step": 55230 + }, + { + "epoch": 0.7124202815375588, + "grad_norm": 0.735808836012162, + "learning_rate": 9.435324078844903e-06, + "loss": 0.2296, + "step": 55240 + }, + { + "epoch": 0.7125492497275547, + "grad_norm": 0.8235124317879193, + "learning_rate": 9.434977655470392e-06, + "loss": 0.2128, + "step": 55250 + }, + { + "epoch": 0.7126782179175506, + "grad_norm": 0.9613982657853185, + "learning_rate": 9.434631132228307e-06, + "loss": 0.2317, + "step": 55260 + }, + { + "epoch": 0.7128071861075466, + "grad_norm": 0.8890736013484428, + "learning_rate": 9.43428450912645e-06, + "loss": 0.2313, + "step": 55270 + }, + { + "epoch": 0.7129361542975425, + "grad_norm": 1.0497702453883573, + "learning_rate": 9.433937786172625e-06, + "loss": 0.2286, + "step": 55280 + }, + { + "epoch": 0.7130651224875384, + "grad_norm": 0.8510671391531783, + "learning_rate": 9.43359096337464e-06, + "loss": 0.2269, + "step": 55290 + }, + { + "epoch": 0.7131940906775344, + "grad_norm": 0.9092355299435735, + "learning_rate": 9.433244040740306e-06, + "loss": 0.2413, + "step": 55300 + }, + { + "epoch": 0.7133230588675303, + "grad_norm": 0.8329320696305358, + "learning_rate": 9.432897018277435e-06, + "loss": 0.2224, + "step": 55310 + }, + { + "epoch": 0.7134520270575263, + "grad_norm": 0.8651879286327215, + "learning_rate": 9.432549895993841e-06, + "loss": 0.2337, + "step": 55320 + }, + { + "epoch": 0.7135809952475222, + "grad_norm": 0.9370265844741068, + "learning_rate": 9.43220267389734e-06, + "loss": 0.2362, + "step": 55330 + }, + { + "epoch": 0.7137099634375181, + "grad_norm": 0.786811881899858, + "learning_rate": 9.431855351995753e-06, + "loss": 0.2245, + "step": 55340 + }, + { + "epoch": 0.7138389316275141, + "grad_norm": 0.9325146098059068, + "learning_rate": 9.431507930296898e-06, + "loss": 0.2221, + "step": 55350 + }, + { + "epoch": 0.71396789981751, + "grad_norm": 0.8731115576605534, + "learning_rate": 9.431160408808598e-06, + "loss": 0.2392, + "step": 55360 + }, + { + "epoch": 0.7140968680075059, + "grad_norm": 0.931144538177547, + "learning_rate": 9.430812787538682e-06, + "loss": 0.2406, + "step": 55370 + }, + { + "epoch": 0.7142258361975019, + "grad_norm": 0.8645531235965371, + "learning_rate": 9.430465066494976e-06, + "loss": 0.2255, + "step": 55380 + }, + { + "epoch": 0.7143548043874979, + "grad_norm": 0.8565591932109737, + "learning_rate": 9.43011724568531e-06, + "loss": 0.2417, + "step": 55390 + }, + { + "epoch": 0.7144837725774937, + "grad_norm": 0.9400634803050036, + "learning_rate": 9.429769325117514e-06, + "loss": 0.2469, + "step": 55400 + }, + { + "epoch": 0.7146127407674897, + "grad_norm": 0.9287559789758459, + "learning_rate": 9.429421304799426e-06, + "loss": 0.2344, + "step": 55410 + }, + { + "epoch": 0.7147417089574857, + "grad_norm": 0.8906562976173533, + "learning_rate": 9.429073184738882e-06, + "loss": 0.2393, + "step": 55420 + }, + { + "epoch": 0.7148706771474815, + "grad_norm": 0.8762939923089273, + "learning_rate": 9.428724964943722e-06, + "loss": 0.2251, + "step": 55430 + }, + { + "epoch": 0.7149996453374775, + "grad_norm": 0.8213012351071269, + "learning_rate": 9.428376645421784e-06, + "loss": 0.2299, + "step": 55440 + }, + { + "epoch": 0.7151286135274735, + "grad_norm": 0.779879136611146, + "learning_rate": 9.428028226180914e-06, + "loss": 0.2266, + "step": 55450 + }, + { + "epoch": 0.7152575817174693, + "grad_norm": 0.8894631941071834, + "learning_rate": 9.427679707228957e-06, + "loss": 0.2291, + "step": 55460 + }, + { + "epoch": 0.7153865499074653, + "grad_norm": 0.961526993104548, + "learning_rate": 9.427331088573761e-06, + "loss": 0.23, + "step": 55470 + }, + { + "epoch": 0.7155155180974613, + "grad_norm": 0.8957433653643849, + "learning_rate": 9.426982370223177e-06, + "loss": 0.2294, + "step": 55480 + }, + { + "epoch": 0.7156444862874572, + "grad_norm": 0.9030321698022739, + "learning_rate": 9.426633552185057e-06, + "loss": 0.2316, + "step": 55490 + }, + { + "epoch": 0.7157734544774531, + "grad_norm": 0.9849960400813285, + "learning_rate": 9.426284634467255e-06, + "loss": 0.239, + "step": 55500 + }, + { + "epoch": 0.7159024226674491, + "grad_norm": 0.872948063174924, + "learning_rate": 9.425935617077629e-06, + "loss": 0.2455, + "step": 55510 + }, + { + "epoch": 0.716031390857445, + "grad_norm": 0.8771611017112072, + "learning_rate": 9.425586500024037e-06, + "loss": 0.2289, + "step": 55520 + }, + { + "epoch": 0.7161603590474409, + "grad_norm": 0.8477662879113644, + "learning_rate": 9.425237283314344e-06, + "loss": 0.2434, + "step": 55530 + }, + { + "epoch": 0.7162893272374369, + "grad_norm": 0.9071900955744944, + "learning_rate": 9.424887966956409e-06, + "loss": 0.2313, + "step": 55540 + }, + { + "epoch": 0.7164182954274328, + "grad_norm": 1.0137604243326412, + "learning_rate": 9.4245385509581e-06, + "loss": 0.2297, + "step": 55550 + }, + { + "epoch": 0.7165472636174287, + "grad_norm": 0.8875809726119311, + "learning_rate": 9.424189035327285e-06, + "loss": 0.2438, + "step": 55560 + }, + { + "epoch": 0.7166762318074247, + "grad_norm": 0.9368566586913089, + "learning_rate": 9.423839420071834e-06, + "loss": 0.2404, + "step": 55570 + }, + { + "epoch": 0.7168051999974207, + "grad_norm": 0.8876331267984842, + "learning_rate": 9.423489705199622e-06, + "loss": 0.229, + "step": 55580 + }, + { + "epoch": 0.7169341681874166, + "grad_norm": 0.8832922163871403, + "learning_rate": 9.423139890718522e-06, + "loss": 0.2345, + "step": 55590 + }, + { + "epoch": 0.7170631363774125, + "grad_norm": 0.8462820976497603, + "learning_rate": 9.422789976636412e-06, + "loss": 0.2258, + "step": 55600 + }, + { + "epoch": 0.7171921045674085, + "grad_norm": 0.9174899634127621, + "learning_rate": 9.42243996296117e-06, + "loss": 0.23, + "step": 55610 + }, + { + "epoch": 0.7173210727574044, + "grad_norm": 0.8480974044888901, + "learning_rate": 9.42208984970068e-06, + "loss": 0.2248, + "step": 55620 + }, + { + "epoch": 0.7174500409474003, + "grad_norm": 0.8183993755248379, + "learning_rate": 9.421739636862823e-06, + "loss": 0.2316, + "step": 55630 + }, + { + "epoch": 0.7175790091373963, + "grad_norm": 0.8165780304824614, + "learning_rate": 9.421389324455489e-06, + "loss": 0.232, + "step": 55640 + }, + { + "epoch": 0.7177079773273922, + "grad_norm": 0.9815907380054013, + "learning_rate": 9.42103891248656e-06, + "loss": 0.2373, + "step": 55650 + }, + { + "epoch": 0.7178369455173881, + "grad_norm": 0.9269625068844696, + "learning_rate": 9.420688400963933e-06, + "loss": 0.2418, + "step": 55660 + }, + { + "epoch": 0.717965913707384, + "grad_norm": 0.9993218580357851, + "learning_rate": 9.4203377898955e-06, + "loss": 0.2423, + "step": 55670 + }, + { + "epoch": 0.71809488189738, + "grad_norm": 0.8840928595265463, + "learning_rate": 9.419987079289152e-06, + "loss": 0.2334, + "step": 55680 + }, + { + "epoch": 0.718223850087376, + "grad_norm": 0.9135764609481271, + "learning_rate": 9.41963626915279e-06, + "loss": 0.2383, + "step": 55690 + }, + { + "epoch": 0.7183528182773719, + "grad_norm": 0.8401541328869299, + "learning_rate": 9.419285359494314e-06, + "loss": 0.2367, + "step": 55700 + }, + { + "epoch": 0.7184817864673678, + "grad_norm": 0.8913751576604065, + "learning_rate": 9.418934350321623e-06, + "loss": 0.2422, + "step": 55710 + }, + { + "epoch": 0.7186107546573638, + "grad_norm": 0.8972901661974627, + "learning_rate": 9.418583241642623e-06, + "loss": 0.2332, + "step": 55720 + }, + { + "epoch": 0.7187397228473597, + "grad_norm": 0.9586675791299643, + "learning_rate": 9.41823203346522e-06, + "loss": 0.2426, + "step": 55730 + }, + { + "epoch": 0.7188686910373556, + "grad_norm": 0.9990289125686198, + "learning_rate": 9.417880725797322e-06, + "loss": 0.2313, + "step": 55740 + }, + { + "epoch": 0.7189976592273516, + "grad_norm": 0.8610038965498024, + "learning_rate": 9.41752931864684e-06, + "loss": 0.2341, + "step": 55750 + }, + { + "epoch": 0.7191266274173476, + "grad_norm": 0.8757551957316276, + "learning_rate": 9.41717781202169e-06, + "loss": 0.2488, + "step": 55760 + }, + { + "epoch": 0.7192555956073434, + "grad_norm": 0.9044605666315445, + "learning_rate": 9.416826205929782e-06, + "loss": 0.2442, + "step": 55770 + }, + { + "epoch": 0.7193845637973394, + "grad_norm": 0.8477445380083246, + "learning_rate": 9.416474500379036e-06, + "loss": 0.2236, + "step": 55780 + }, + { + "epoch": 0.7195135319873354, + "grad_norm": 0.9197703887156861, + "learning_rate": 9.416122695377373e-06, + "loss": 0.2315, + "step": 55790 + }, + { + "epoch": 0.7196425001773312, + "grad_norm": 0.933618947148178, + "learning_rate": 9.415770790932715e-06, + "loss": 0.2344, + "step": 55800 + }, + { + "epoch": 0.7197714683673272, + "grad_norm": 0.8053352537875594, + "learning_rate": 9.415418787052983e-06, + "loss": 0.2492, + "step": 55810 + }, + { + "epoch": 0.7199004365573232, + "grad_norm": 0.8821258489116516, + "learning_rate": 9.415066683746106e-06, + "loss": 0.228, + "step": 55820 + }, + { + "epoch": 0.720029404747319, + "grad_norm": 0.8869667704249793, + "learning_rate": 9.414714481020014e-06, + "loss": 0.2208, + "step": 55830 + }, + { + "epoch": 0.720158372937315, + "grad_norm": 0.8549744084180082, + "learning_rate": 9.414362178882635e-06, + "loss": 0.2307, + "step": 55840 + }, + { + "epoch": 0.720287341127311, + "grad_norm": 0.9097390881781057, + "learning_rate": 9.414009777341903e-06, + "loss": 0.2407, + "step": 55850 + }, + { + "epoch": 0.7204163093173069, + "grad_norm": 1.0341249865133733, + "learning_rate": 9.413657276405755e-06, + "loss": 0.2419, + "step": 55860 + }, + { + "epoch": 0.7205452775073028, + "grad_norm": 0.8641415957512257, + "learning_rate": 9.413304676082126e-06, + "loss": 0.2279, + "step": 55870 + }, + { + "epoch": 0.7206742456972988, + "grad_norm": 0.8897081861876345, + "learning_rate": 9.41295197637896e-06, + "loss": 0.2455, + "step": 55880 + }, + { + "epoch": 0.7208032138872947, + "grad_norm": 0.9349180201180787, + "learning_rate": 9.412599177304193e-06, + "loss": 0.2304, + "step": 55890 + }, + { + "epoch": 0.7209321820772906, + "grad_norm": 0.8295213301113645, + "learning_rate": 9.412246278865776e-06, + "loss": 0.2367, + "step": 55900 + }, + { + "epoch": 0.7210611502672866, + "grad_norm": 0.8801150218625003, + "learning_rate": 9.411893281071654e-06, + "loss": 0.2341, + "step": 55910 + }, + { + "epoch": 0.7211901184572825, + "grad_norm": 0.903396752456181, + "learning_rate": 9.411540183929771e-06, + "loss": 0.2384, + "step": 55920 + }, + { + "epoch": 0.7213190866472784, + "grad_norm": 0.7385191745583195, + "learning_rate": 9.411186987448085e-06, + "loss": 0.2319, + "step": 55930 + }, + { + "epoch": 0.7214480548372744, + "grad_norm": 0.9181617525289342, + "learning_rate": 9.410833691634543e-06, + "loss": 0.2366, + "step": 55940 + }, + { + "epoch": 0.7215770230272703, + "grad_norm": 0.9535332142120378, + "learning_rate": 9.410480296497105e-06, + "loss": 0.2463, + "step": 55950 + }, + { + "epoch": 0.7217059912172663, + "grad_norm": 1.0294433206195703, + "learning_rate": 9.410126802043727e-06, + "loss": 0.2442, + "step": 55960 + }, + { + "epoch": 0.7218349594072622, + "grad_norm": 0.8877028047275234, + "learning_rate": 9.409773208282371e-06, + "loss": 0.249, + "step": 55970 + }, + { + "epoch": 0.7219639275972581, + "grad_norm": 0.937161325179711, + "learning_rate": 9.409419515220996e-06, + "loss": 0.2261, + "step": 55980 + }, + { + "epoch": 0.7220928957872541, + "grad_norm": 0.9027176211988859, + "learning_rate": 9.40906572286757e-06, + "loss": 0.2386, + "step": 55990 + }, + { + "epoch": 0.72222186397725, + "grad_norm": 0.8253933608944716, + "learning_rate": 9.408711831230055e-06, + "loss": 0.2377, + "step": 56000 + }, + { + "epoch": 0.7223508321672459, + "grad_norm": 0.8767675153346859, + "learning_rate": 9.408357840316425e-06, + "loss": 0.232, + "step": 56010 + }, + { + "epoch": 0.7224798003572419, + "grad_norm": 0.8735722563907767, + "learning_rate": 9.40800375013465e-06, + "loss": 0.2287, + "step": 56020 + }, + { + "epoch": 0.7226087685472379, + "grad_norm": 0.8187255038290928, + "learning_rate": 9.407649560692702e-06, + "loss": 0.2386, + "step": 56030 + }, + { + "epoch": 0.7227377367372337, + "grad_norm": 0.8424512689166069, + "learning_rate": 9.407295271998556e-06, + "loss": 0.2191, + "step": 56040 + }, + { + "epoch": 0.7228667049272297, + "grad_norm": 1.0360852031632608, + "learning_rate": 9.406940884060194e-06, + "loss": 0.2358, + "step": 56050 + }, + { + "epoch": 0.7229956731172257, + "grad_norm": 0.9174179440682761, + "learning_rate": 9.40658639688559e-06, + "loss": 0.2263, + "step": 56060 + }, + { + "epoch": 0.7231246413072215, + "grad_norm": 0.8212589500783061, + "learning_rate": 9.406231810482733e-06, + "loss": 0.2249, + "step": 56070 + }, + { + "epoch": 0.7232536094972175, + "grad_norm": 0.8787673469908972, + "learning_rate": 9.405877124859603e-06, + "loss": 0.243, + "step": 56080 + }, + { + "epoch": 0.7233825776872135, + "grad_norm": 0.9707196357061583, + "learning_rate": 9.405522340024189e-06, + "loss": 0.2283, + "step": 56090 + }, + { + "epoch": 0.7235115458772093, + "grad_norm": 0.9025676801820296, + "learning_rate": 9.405167455984479e-06, + "loss": 0.227, + "step": 56100 + }, + { + "epoch": 0.7236405140672053, + "grad_norm": 0.8560437473997988, + "learning_rate": 9.404812472748466e-06, + "loss": 0.2388, + "step": 56110 + }, + { + "epoch": 0.7237694822572013, + "grad_norm": 0.8978892031478583, + "learning_rate": 9.404457390324141e-06, + "loss": 0.2489, + "step": 56120 + }, + { + "epoch": 0.7238984504471973, + "grad_norm": 0.8385168938950829, + "learning_rate": 9.404102208719502e-06, + "loss": 0.2376, + "step": 56130 + }, + { + "epoch": 0.7240274186371931, + "grad_norm": 0.9064364770888295, + "learning_rate": 9.403746927942546e-06, + "loss": 0.2385, + "step": 56140 + }, + { + "epoch": 0.7241563868271891, + "grad_norm": 0.8716607661892686, + "learning_rate": 9.403391548001273e-06, + "loss": 0.2454, + "step": 56150 + }, + { + "epoch": 0.724285355017185, + "grad_norm": 0.9377975876936073, + "learning_rate": 9.403036068903685e-06, + "loss": 0.235, + "step": 56160 + }, + { + "epoch": 0.7244143232071809, + "grad_norm": 0.8930556264009172, + "learning_rate": 9.402680490657791e-06, + "loss": 0.2284, + "step": 56170 + }, + { + "epoch": 0.7245432913971769, + "grad_norm": 0.9350535289188567, + "learning_rate": 9.402324813271593e-06, + "loss": 0.2362, + "step": 56180 + }, + { + "epoch": 0.7246722595871729, + "grad_norm": 0.9090861811854637, + "learning_rate": 9.401969036753103e-06, + "loss": 0.2348, + "step": 56190 + }, + { + "epoch": 0.7248012277771687, + "grad_norm": 0.9551162653165071, + "learning_rate": 9.401613161110329e-06, + "loss": 0.237, + "step": 56200 + }, + { + "epoch": 0.7249301959671647, + "grad_norm": 0.9328660417020683, + "learning_rate": 9.401257186351289e-06, + "loss": 0.2462, + "step": 56210 + }, + { + "epoch": 0.7250591641571607, + "grad_norm": 0.9359380121511514, + "learning_rate": 9.400901112483996e-06, + "loss": 0.2253, + "step": 56220 + }, + { + "epoch": 0.7251881323471566, + "grad_norm": 0.8367987569087348, + "learning_rate": 9.40054493951647e-06, + "loss": 0.2216, + "step": 56230 + }, + { + "epoch": 0.7253171005371525, + "grad_norm": 0.8316590516262887, + "learning_rate": 9.400188667456729e-06, + "loss": 0.2366, + "step": 56240 + }, + { + "epoch": 0.7254460687271485, + "grad_norm": 0.9084477027042355, + "learning_rate": 9.399832296312798e-06, + "loss": 0.2279, + "step": 56250 + }, + { + "epoch": 0.7255750369171444, + "grad_norm": 0.8841114159908039, + "learning_rate": 9.3994758260927e-06, + "loss": 0.2388, + "step": 56260 + }, + { + "epoch": 0.7257040051071403, + "grad_norm": 0.8366991023978001, + "learning_rate": 9.399119256804464e-06, + "loss": 0.2222, + "step": 56270 + }, + { + "epoch": 0.7258329732971363, + "grad_norm": 0.9092537133676876, + "learning_rate": 9.398762588456118e-06, + "loss": 0.2488, + "step": 56280 + }, + { + "epoch": 0.7259619414871322, + "grad_norm": 0.8653797081314881, + "learning_rate": 9.398405821055694e-06, + "loss": 0.2264, + "step": 56290 + }, + { + "epoch": 0.7260909096771281, + "grad_norm": 0.8343550592943124, + "learning_rate": 9.398048954611226e-06, + "loss": 0.2347, + "step": 56300 + }, + { + "epoch": 0.726219877867124, + "grad_norm": 0.8651640171398652, + "learning_rate": 9.39769198913075e-06, + "loss": 0.234, + "step": 56310 + }, + { + "epoch": 0.72634884605712, + "grad_norm": 0.8959868877515397, + "learning_rate": 9.397334924622299e-06, + "loss": 0.2282, + "step": 56320 + }, + { + "epoch": 0.726477814247116, + "grad_norm": 0.9042372010651648, + "learning_rate": 9.396977761093922e-06, + "loss": 0.2329, + "step": 56330 + }, + { + "epoch": 0.7266067824371119, + "grad_norm": 0.8314326214465148, + "learning_rate": 9.396620498553658e-06, + "loss": 0.2317, + "step": 56340 + }, + { + "epoch": 0.7267357506271078, + "grad_norm": 0.883116370780322, + "learning_rate": 9.396263137009548e-06, + "loss": 0.2283, + "step": 56350 + }, + { + "epoch": 0.7268647188171038, + "grad_norm": 0.8500214147804077, + "learning_rate": 9.395905676469648e-06, + "loss": 0.2478, + "step": 56360 + }, + { + "epoch": 0.7269936870070997, + "grad_norm": 0.8737962865007401, + "learning_rate": 9.395548116941999e-06, + "loss": 0.2366, + "step": 56370 + }, + { + "epoch": 0.7271226551970956, + "grad_norm": 0.8592951468801875, + "learning_rate": 9.395190458434657e-06, + "loss": 0.2306, + "step": 56380 + }, + { + "epoch": 0.7272516233870916, + "grad_norm": 0.9673406270932386, + "learning_rate": 9.394832700955674e-06, + "loss": 0.2429, + "step": 56390 + }, + { + "epoch": 0.7273805915770876, + "grad_norm": 0.945847452545865, + "learning_rate": 9.394474844513106e-06, + "loss": 0.2478, + "step": 56400 + }, + { + "epoch": 0.7275095597670834, + "grad_norm": 0.8631549813811156, + "learning_rate": 9.394116889115012e-06, + "loss": 0.2324, + "step": 56410 + }, + { + "epoch": 0.7276385279570794, + "grad_norm": 0.8888059424844944, + "learning_rate": 9.393758834769453e-06, + "loss": 0.2406, + "step": 56420 + }, + { + "epoch": 0.7277674961470754, + "grad_norm": 0.8710098052827515, + "learning_rate": 9.393400681484492e-06, + "loss": 0.2201, + "step": 56430 + }, + { + "epoch": 0.7278964643370712, + "grad_norm": 0.8442013582272374, + "learning_rate": 9.393042429268191e-06, + "loss": 0.2281, + "step": 56440 + }, + { + "epoch": 0.7280254325270672, + "grad_norm": 0.8228679497908173, + "learning_rate": 9.39268407812862e-06, + "loss": 0.2273, + "step": 56450 + }, + { + "epoch": 0.7281544007170632, + "grad_norm": 0.8886119132798043, + "learning_rate": 9.392325628073848e-06, + "loss": 0.2317, + "step": 56460 + }, + { + "epoch": 0.728283368907059, + "grad_norm": 0.7629972035169035, + "learning_rate": 9.391967079111946e-06, + "loss": 0.2242, + "step": 56470 + }, + { + "epoch": 0.728412337097055, + "grad_norm": 0.9463501155969536, + "learning_rate": 9.39160843125099e-06, + "loss": 0.237, + "step": 56480 + }, + { + "epoch": 0.728541305287051, + "grad_norm": 0.872661571822522, + "learning_rate": 9.39124968449905e-06, + "loss": 0.2308, + "step": 56490 + }, + { + "epoch": 0.7286702734770469, + "grad_norm": 0.8615567280039976, + "learning_rate": 9.390890838864212e-06, + "loss": 0.2225, + "step": 56500 + }, + { + "epoch": 0.7287992416670428, + "grad_norm": 0.9126565429834581, + "learning_rate": 9.390531894354553e-06, + "loss": 0.237, + "step": 56510 + }, + { + "epoch": 0.7289282098570388, + "grad_norm": 0.8872720587906148, + "learning_rate": 9.390172850978154e-06, + "loss": 0.2334, + "step": 56520 + }, + { + "epoch": 0.7290571780470347, + "grad_norm": 0.8914944106801809, + "learning_rate": 9.389813708743102e-06, + "loss": 0.2425, + "step": 56530 + }, + { + "epoch": 0.7291861462370306, + "grad_norm": 0.8897251856704513, + "learning_rate": 9.389454467657486e-06, + "loss": 0.2278, + "step": 56540 + }, + { + "epoch": 0.7293151144270266, + "grad_norm": 0.8608070369335691, + "learning_rate": 9.389095127729393e-06, + "loss": 0.2406, + "step": 56550 + }, + { + "epoch": 0.7294440826170225, + "grad_norm": 1.0731687204491414, + "learning_rate": 9.388735688966914e-06, + "loss": 0.2365, + "step": 56560 + }, + { + "epoch": 0.7295730508070184, + "grad_norm": 0.837339967647585, + "learning_rate": 9.388376151378146e-06, + "loss": 0.2362, + "step": 56570 + }, + { + "epoch": 0.7297020189970144, + "grad_norm": 0.8107052197875411, + "learning_rate": 9.388016514971181e-06, + "loss": 0.231, + "step": 56580 + }, + { + "epoch": 0.7298309871870103, + "grad_norm": 0.8956455585575145, + "learning_rate": 9.387656779754121e-06, + "loss": 0.2412, + "step": 56590 + }, + { + "epoch": 0.7299599553770063, + "grad_norm": 0.8066071443560288, + "learning_rate": 9.387296945735065e-06, + "loss": 0.2357, + "step": 56600 + }, + { + "epoch": 0.7300889235670022, + "grad_norm": 0.900001020219322, + "learning_rate": 9.386937012922117e-06, + "loss": 0.2339, + "step": 56610 + }, + { + "epoch": 0.7302178917569981, + "grad_norm": 0.8317555450048351, + "learning_rate": 9.38657698132338e-06, + "loss": 0.2209, + "step": 56620 + }, + { + "epoch": 0.7303468599469941, + "grad_norm": 0.8179076095966809, + "learning_rate": 9.386216850946962e-06, + "loss": 0.2416, + "step": 56630 + }, + { + "epoch": 0.73047582813699, + "grad_norm": 0.8580654574566123, + "learning_rate": 9.385856621800973e-06, + "loss": 0.2396, + "step": 56640 + }, + { + "epoch": 0.7306047963269859, + "grad_norm": 0.8372614504192265, + "learning_rate": 9.385496293893525e-06, + "loss": 0.2332, + "step": 56650 + }, + { + "epoch": 0.7307337645169819, + "grad_norm": 0.927388271990999, + "learning_rate": 9.385135867232732e-06, + "loss": 0.2469, + "step": 56660 + }, + { + "epoch": 0.7308627327069778, + "grad_norm": 0.9363654114619274, + "learning_rate": 9.384775341826707e-06, + "loss": 0.2341, + "step": 56670 + }, + { + "epoch": 0.7309917008969737, + "grad_norm": 0.8250450198112019, + "learning_rate": 9.384414717683574e-06, + "loss": 0.2387, + "step": 56680 + }, + { + "epoch": 0.7311206690869697, + "grad_norm": 0.9854147427313285, + "learning_rate": 9.384053994811449e-06, + "loss": 0.229, + "step": 56690 + }, + { + "epoch": 0.7312496372769657, + "grad_norm": 1.0145098393874898, + "learning_rate": 9.383693173218455e-06, + "loss": 0.2355, + "step": 56700 + }, + { + "epoch": 0.7313786054669615, + "grad_norm": 0.882951484025576, + "learning_rate": 9.38333225291272e-06, + "loss": 0.2449, + "step": 56710 + }, + { + "epoch": 0.7315075736569575, + "grad_norm": 0.8492146949324819, + "learning_rate": 9.382971233902369e-06, + "loss": 0.2465, + "step": 56720 + }, + { + "epoch": 0.7316365418469535, + "grad_norm": 0.831628750462582, + "learning_rate": 9.382610116195532e-06, + "loss": 0.241, + "step": 56730 + }, + { + "epoch": 0.7317655100369493, + "grad_norm": 0.8699403743083948, + "learning_rate": 9.382248899800341e-06, + "loss": 0.2371, + "step": 56740 + }, + { + "epoch": 0.7318944782269453, + "grad_norm": 0.9864847722078274, + "learning_rate": 9.38188758472493e-06, + "loss": 0.2289, + "step": 56750 + }, + { + "epoch": 0.7320234464169413, + "grad_norm": 0.9921443310007908, + "learning_rate": 9.381526170977435e-06, + "loss": 0.2397, + "step": 56760 + }, + { + "epoch": 0.7321524146069373, + "grad_norm": 0.9684448639576068, + "learning_rate": 9.381164658565995e-06, + "loss": 0.232, + "step": 56770 + }, + { + "epoch": 0.7322813827969331, + "grad_norm": 0.9081499931489202, + "learning_rate": 9.380803047498748e-06, + "loss": 0.2424, + "step": 56780 + }, + { + "epoch": 0.7324103509869291, + "grad_norm": 0.9229637117092787, + "learning_rate": 9.38044133778384e-06, + "loss": 0.2315, + "step": 56790 + }, + { + "epoch": 0.732539319176925, + "grad_norm": 0.9049407661855742, + "learning_rate": 9.380079529429415e-06, + "loss": 0.2309, + "step": 56800 + }, + { + "epoch": 0.7326682873669209, + "grad_norm": 0.8674983406343253, + "learning_rate": 9.379717622443617e-06, + "loss": 0.2381, + "step": 56810 + }, + { + "epoch": 0.7327972555569169, + "grad_norm": 0.8782638291826975, + "learning_rate": 9.379355616834602e-06, + "loss": 0.2502, + "step": 56820 + }, + { + "epoch": 0.7329262237469129, + "grad_norm": 0.8976640365910912, + "learning_rate": 9.378993512610516e-06, + "loss": 0.2525, + "step": 56830 + }, + { + "epoch": 0.7330551919369087, + "grad_norm": 0.9044062069718739, + "learning_rate": 9.378631309779517e-06, + "loss": 0.2288, + "step": 56840 + }, + { + "epoch": 0.7331841601269047, + "grad_norm": 0.9009164951625533, + "learning_rate": 9.37826900834976e-06, + "loss": 0.2408, + "step": 56850 + }, + { + "epoch": 0.7333131283169007, + "grad_norm": 0.8333529460376735, + "learning_rate": 9.3779066083294e-06, + "loss": 0.2429, + "step": 56860 + }, + { + "epoch": 0.7334420965068966, + "grad_norm": 0.8979061112348047, + "learning_rate": 9.377544109726601e-06, + "loss": 0.2445, + "step": 56870 + }, + { + "epoch": 0.7335710646968925, + "grad_norm": 0.8891882734528994, + "learning_rate": 9.377181512549527e-06, + "loss": 0.2374, + "step": 56880 + }, + { + "epoch": 0.7337000328868885, + "grad_norm": 0.9359358963146658, + "learning_rate": 9.376818816806338e-06, + "loss": 0.2507, + "step": 56890 + }, + { + "epoch": 0.7338290010768844, + "grad_norm": 0.8388832789884627, + "learning_rate": 9.376456022505207e-06, + "loss": 0.2427, + "step": 56900 + }, + { + "epoch": 0.7339579692668803, + "grad_norm": 0.8879035174010822, + "learning_rate": 9.376093129654297e-06, + "loss": 0.2391, + "step": 56910 + }, + { + "epoch": 0.7340869374568763, + "grad_norm": 0.813682674256455, + "learning_rate": 9.375730138261787e-06, + "loss": 0.2315, + "step": 56920 + }, + { + "epoch": 0.7342159056468722, + "grad_norm": 0.9191139523592807, + "learning_rate": 9.375367048335847e-06, + "loss": 0.234, + "step": 56930 + }, + { + "epoch": 0.7343448738368681, + "grad_norm": 0.9376529741437124, + "learning_rate": 9.37500385988465e-06, + "loss": 0.2328, + "step": 56940 + }, + { + "epoch": 0.734473842026864, + "grad_norm": 0.9427937054666252, + "learning_rate": 9.374640572916383e-06, + "loss": 0.2532, + "step": 56950 + }, + { + "epoch": 0.73460281021686, + "grad_norm": 0.9627822104206332, + "learning_rate": 9.374277187439218e-06, + "loss": 0.2329, + "step": 56960 + }, + { + "epoch": 0.734731778406856, + "grad_norm": 0.884726899552739, + "learning_rate": 9.373913703461341e-06, + "loss": 0.2336, + "step": 56970 + }, + { + "epoch": 0.7348607465968519, + "grad_norm": 0.8514756378623878, + "learning_rate": 9.373550120990938e-06, + "loss": 0.2486, + "step": 56980 + }, + { + "epoch": 0.7349897147868478, + "grad_norm": 0.8931270935066887, + "learning_rate": 9.373186440036195e-06, + "loss": 0.244, + "step": 56990 + }, + { + "epoch": 0.7351186829768438, + "grad_norm": 0.7883002528415947, + "learning_rate": 9.372822660605301e-06, + "loss": 0.2405, + "step": 57000 + }, + { + "epoch": 0.7352476511668397, + "grad_norm": 0.8372041454294532, + "learning_rate": 9.37245878270645e-06, + "loss": 0.219, + "step": 57010 + }, + { + "epoch": 0.7353766193568356, + "grad_norm": 0.8015003588734045, + "learning_rate": 9.372094806347834e-06, + "loss": 0.2328, + "step": 57020 + }, + { + "epoch": 0.7355055875468316, + "grad_norm": 0.8545963868398337, + "learning_rate": 9.37173073153765e-06, + "loss": 0.2319, + "step": 57030 + }, + { + "epoch": 0.7356345557368276, + "grad_norm": 0.8754095747360403, + "learning_rate": 9.371366558284095e-06, + "loss": 0.2435, + "step": 57040 + }, + { + "epoch": 0.7357635239268234, + "grad_norm": 0.8089986682377114, + "learning_rate": 9.371002286595367e-06, + "loss": 0.2371, + "step": 57050 + }, + { + "epoch": 0.7358924921168194, + "grad_norm": 0.914956108208278, + "learning_rate": 9.370637916479677e-06, + "loss": 0.2461, + "step": 57060 + }, + { + "epoch": 0.7360214603068154, + "grad_norm": 0.7900921559990786, + "learning_rate": 9.370273447945223e-06, + "loss": 0.2356, + "step": 57070 + }, + { + "epoch": 0.7361504284968112, + "grad_norm": 0.8519260275909243, + "learning_rate": 9.369908881000212e-06, + "loss": 0.2325, + "step": 57080 + }, + { + "epoch": 0.7362793966868072, + "grad_norm": 0.8501499620321247, + "learning_rate": 9.369544215652856e-06, + "loss": 0.225, + "step": 57090 + }, + { + "epoch": 0.7364083648768032, + "grad_norm": 0.8493337121327376, + "learning_rate": 9.369179451911367e-06, + "loss": 0.232, + "step": 57100 + }, + { + "epoch": 0.736537333066799, + "grad_norm": 0.9048309844886342, + "learning_rate": 9.368814589783958e-06, + "loss": 0.2371, + "step": 57110 + }, + { + "epoch": 0.736666301256795, + "grad_norm": 0.8477844302189108, + "learning_rate": 9.368449629278844e-06, + "loss": 0.2225, + "step": 57120 + }, + { + "epoch": 0.736795269446791, + "grad_norm": 0.9020097428320912, + "learning_rate": 9.368084570404245e-06, + "loss": 0.2301, + "step": 57130 + }, + { + "epoch": 0.7369242376367869, + "grad_norm": 0.8265858100682814, + "learning_rate": 9.36771941316838e-06, + "loss": 0.2388, + "step": 57140 + }, + { + "epoch": 0.7370532058267828, + "grad_norm": 0.8604745146795616, + "learning_rate": 9.367354157579472e-06, + "loss": 0.2373, + "step": 57150 + }, + { + "epoch": 0.7371821740167788, + "grad_norm": 0.9168194250089755, + "learning_rate": 9.366988803645747e-06, + "loss": 0.2297, + "step": 57160 + }, + { + "epoch": 0.7373111422067747, + "grad_norm": 0.8667656504516714, + "learning_rate": 9.36662335137543e-06, + "loss": 0.232, + "step": 57170 + }, + { + "epoch": 0.7374401103967706, + "grad_norm": 0.8914374756461257, + "learning_rate": 9.366257800776752e-06, + "loss": 0.2323, + "step": 57180 + }, + { + "epoch": 0.7375690785867666, + "grad_norm": 0.86961211344151, + "learning_rate": 9.365892151857943e-06, + "loss": 0.2339, + "step": 57190 + }, + { + "epoch": 0.7376980467767625, + "grad_norm": 0.995619611235371, + "learning_rate": 9.365526404627239e-06, + "loss": 0.2367, + "step": 57200 + }, + { + "epoch": 0.7378270149667584, + "grad_norm": 0.873980123709762, + "learning_rate": 9.365160559092874e-06, + "loss": 0.2253, + "step": 57210 + }, + { + "epoch": 0.7379559831567544, + "grad_norm": 0.927021075526882, + "learning_rate": 9.364794615263089e-06, + "loss": 0.2362, + "step": 57220 + }, + { + "epoch": 0.7380849513467503, + "grad_norm": 0.8880382956804366, + "learning_rate": 9.364428573146119e-06, + "loss": 0.2392, + "step": 57230 + }, + { + "epoch": 0.7382139195367463, + "grad_norm": 0.9138862582181496, + "learning_rate": 9.364062432750212e-06, + "loss": 0.2253, + "step": 57240 + }, + { + "epoch": 0.7383428877267422, + "grad_norm": 0.8425579616254492, + "learning_rate": 9.36369619408361e-06, + "loss": 0.2321, + "step": 57250 + }, + { + "epoch": 0.7384718559167381, + "grad_norm": 0.882502098662909, + "learning_rate": 9.363329857154562e-06, + "loss": 0.2351, + "step": 57260 + }, + { + "epoch": 0.7386008241067341, + "grad_norm": 0.8885955996357581, + "learning_rate": 9.362963421971313e-06, + "loss": 0.241, + "step": 57270 + }, + { + "epoch": 0.73872979229673, + "grad_norm": 0.9118627073493241, + "learning_rate": 9.362596888542119e-06, + "loss": 0.2353, + "step": 57280 + }, + { + "epoch": 0.7388587604867259, + "grad_norm": 0.8757273105855699, + "learning_rate": 9.362230256875234e-06, + "loss": 0.2308, + "step": 57290 + }, + { + "epoch": 0.7389877286767219, + "grad_norm": 0.8522308098456466, + "learning_rate": 9.361863526978908e-06, + "loss": 0.2254, + "step": 57300 + }, + { + "epoch": 0.7391166968667178, + "grad_norm": 0.8850486961283272, + "learning_rate": 9.361496698861407e-06, + "loss": 0.2396, + "step": 57310 + }, + { + "epoch": 0.7392456650567137, + "grad_norm": 0.8924799929728463, + "learning_rate": 9.361129772530984e-06, + "loss": 0.2321, + "step": 57320 + }, + { + "epoch": 0.7393746332467097, + "grad_norm": 0.8722384779476561, + "learning_rate": 9.360762747995908e-06, + "loss": 0.2369, + "step": 57330 + }, + { + "epoch": 0.7395036014367057, + "grad_norm": 0.9786025976978527, + "learning_rate": 9.360395625264438e-06, + "loss": 0.2355, + "step": 57340 + }, + { + "epoch": 0.7396325696267015, + "grad_norm": 0.8864812396288039, + "learning_rate": 9.360028404344843e-06, + "loss": 0.2363, + "step": 57350 + }, + { + "epoch": 0.7397615378166975, + "grad_norm": 0.934657658670678, + "learning_rate": 9.359661085245395e-06, + "loss": 0.2367, + "step": 57360 + }, + { + "epoch": 0.7398905060066935, + "grad_norm": 0.910426302688893, + "learning_rate": 9.359293667974363e-06, + "loss": 0.2399, + "step": 57370 + }, + { + "epoch": 0.7400194741966893, + "grad_norm": 0.8254776846188936, + "learning_rate": 9.35892615254002e-06, + "loss": 0.2288, + "step": 57380 + }, + { + "epoch": 0.7401484423866853, + "grad_norm": 0.9561602731395081, + "learning_rate": 9.358558538950641e-06, + "loss": 0.2247, + "step": 57390 + }, + { + "epoch": 0.7402774105766813, + "grad_norm": 0.831057167544984, + "learning_rate": 9.358190827214506e-06, + "loss": 0.238, + "step": 57400 + }, + { + "epoch": 0.7404063787666773, + "grad_norm": 0.9059455672468737, + "learning_rate": 9.357823017339894e-06, + "loss": 0.2323, + "step": 57410 + }, + { + "epoch": 0.7405353469566731, + "grad_norm": 0.8901095622122299, + "learning_rate": 9.357455109335089e-06, + "loss": 0.2332, + "step": 57420 + }, + { + "epoch": 0.7406643151466691, + "grad_norm": 0.8761682901138008, + "learning_rate": 9.357087103208373e-06, + "loss": 0.2305, + "step": 57430 + }, + { + "epoch": 0.740793283336665, + "grad_norm": 0.8778858319179584, + "learning_rate": 9.356718998968036e-06, + "loss": 0.2189, + "step": 57440 + }, + { + "epoch": 0.7409222515266609, + "grad_norm": 0.9973851490362243, + "learning_rate": 9.356350796622365e-06, + "loss": 0.2409, + "step": 57450 + }, + { + "epoch": 0.7410512197166569, + "grad_norm": 0.8917928973569794, + "learning_rate": 9.355982496179651e-06, + "loss": 0.2411, + "step": 57460 + }, + { + "epoch": 0.7411801879066529, + "grad_norm": 0.8247510350502204, + "learning_rate": 9.35561409764819e-06, + "loss": 0.2295, + "step": 57470 + }, + { + "epoch": 0.7413091560966487, + "grad_norm": 0.8957112124973072, + "learning_rate": 9.355245601036273e-06, + "loss": 0.2417, + "step": 57480 + }, + { + "epoch": 0.7414381242866447, + "grad_norm": 0.8859372205996996, + "learning_rate": 9.354877006352201e-06, + "loss": 0.2486, + "step": 57490 + }, + { + "epoch": 0.7415670924766407, + "grad_norm": 0.815328839520723, + "learning_rate": 9.354508313604274e-06, + "loss": 0.2381, + "step": 57500 + }, + { + "epoch": 0.7416960606666366, + "grad_norm": 0.9447162927734183, + "learning_rate": 9.354139522800794e-06, + "loss": 0.2484, + "step": 57510 + }, + { + "epoch": 0.7418250288566325, + "grad_norm": 0.8255214604451571, + "learning_rate": 9.353770633950068e-06, + "loss": 0.2368, + "step": 57520 + }, + { + "epoch": 0.7419539970466285, + "grad_norm": 0.8721118114410137, + "learning_rate": 9.353401647060397e-06, + "loss": 0.223, + "step": 57530 + }, + { + "epoch": 0.7420829652366244, + "grad_norm": 0.8632309236223891, + "learning_rate": 9.353032562140093e-06, + "loss": 0.2459, + "step": 57540 + }, + { + "epoch": 0.7422119334266203, + "grad_norm": 0.8763691527807892, + "learning_rate": 9.352663379197468e-06, + "loss": 0.235, + "step": 57550 + }, + { + "epoch": 0.7423409016166163, + "grad_norm": 0.8382700784411639, + "learning_rate": 9.352294098240835e-06, + "loss": 0.2383, + "step": 57560 + }, + { + "epoch": 0.7424698698066122, + "grad_norm": 0.8743673481916225, + "learning_rate": 9.351924719278508e-06, + "loss": 0.234, + "step": 57570 + }, + { + "epoch": 0.7425988379966081, + "grad_norm": 0.8388112405162328, + "learning_rate": 9.351555242318807e-06, + "loss": 0.2285, + "step": 57580 + }, + { + "epoch": 0.7427278061866041, + "grad_norm": 0.9232617757422785, + "learning_rate": 9.351185667370051e-06, + "loss": 0.2466, + "step": 57590 + }, + { + "epoch": 0.7428567743766, + "grad_norm": 0.830963037304669, + "learning_rate": 9.350815994440561e-06, + "loss": 0.2353, + "step": 57600 + }, + { + "epoch": 0.742985742566596, + "grad_norm": 0.8714164259235283, + "learning_rate": 9.350446223538661e-06, + "loss": 0.2408, + "step": 57610 + }, + { + "epoch": 0.7431147107565919, + "grad_norm": 0.916629457606497, + "learning_rate": 9.35007635467268e-06, + "loss": 0.2362, + "step": 57620 + }, + { + "epoch": 0.7432436789465878, + "grad_norm": 0.7884743245774054, + "learning_rate": 9.349706387850945e-06, + "loss": 0.2201, + "step": 57630 + }, + { + "epoch": 0.7433726471365838, + "grad_norm": 0.9648002945490627, + "learning_rate": 9.349336323081787e-06, + "loss": 0.2352, + "step": 57640 + }, + { + "epoch": 0.7435016153265797, + "grad_norm": 0.7805719321647641, + "learning_rate": 9.348966160373542e-06, + "loss": 0.2299, + "step": 57650 + }, + { + "epoch": 0.7436305835165756, + "grad_norm": 0.8897965033195979, + "learning_rate": 9.348595899734543e-06, + "loss": 0.2455, + "step": 57660 + }, + { + "epoch": 0.7437595517065716, + "grad_norm": 0.9276179081913705, + "learning_rate": 9.348225541173126e-06, + "loss": 0.2321, + "step": 57670 + }, + { + "epoch": 0.7438885198965675, + "grad_norm": 0.8628847321558036, + "learning_rate": 9.347855084697632e-06, + "loss": 0.2394, + "step": 57680 + }, + { + "epoch": 0.7440174880865634, + "grad_norm": 0.9341459889797094, + "learning_rate": 9.347484530316406e-06, + "loss": 0.2449, + "step": 57690 + }, + { + "epoch": 0.7441464562765594, + "grad_norm": 0.8338205811574236, + "learning_rate": 9.347113878037788e-06, + "loss": 0.2274, + "step": 57700 + }, + { + "epoch": 0.7442754244665554, + "grad_norm": 0.8404043825944275, + "learning_rate": 9.346743127870126e-06, + "loss": 0.236, + "step": 57710 + }, + { + "epoch": 0.7444043926565512, + "grad_norm": 0.87785928419902, + "learning_rate": 9.34637227982177e-06, + "loss": 0.2355, + "step": 57720 + }, + { + "epoch": 0.7445333608465472, + "grad_norm": 0.9994666038720992, + "learning_rate": 9.346001333901066e-06, + "loss": 0.2509, + "step": 57730 + }, + { + "epoch": 0.7446623290365432, + "grad_norm": 0.9200080885939008, + "learning_rate": 9.345630290116375e-06, + "loss": 0.2393, + "step": 57740 + }, + { + "epoch": 0.744791297226539, + "grad_norm": 0.934213115822601, + "learning_rate": 9.345259148476046e-06, + "loss": 0.2434, + "step": 57750 + }, + { + "epoch": 0.744920265416535, + "grad_norm": 0.8871052576966518, + "learning_rate": 9.344887908988437e-06, + "loss": 0.2252, + "step": 57760 + }, + { + "epoch": 0.745049233606531, + "grad_norm": 0.9649895004015322, + "learning_rate": 9.34451657166191e-06, + "loss": 0.2397, + "step": 57770 + }, + { + "epoch": 0.7451782017965269, + "grad_norm": 0.8220191003216791, + "learning_rate": 9.344145136504827e-06, + "loss": 0.2377, + "step": 57780 + }, + { + "epoch": 0.7453071699865228, + "grad_norm": 0.8609803507002797, + "learning_rate": 9.343773603525549e-06, + "loss": 0.2285, + "step": 57790 + }, + { + "epoch": 0.7454361381765188, + "grad_norm": 0.9347455522982847, + "learning_rate": 9.343401972732444e-06, + "loss": 0.2326, + "step": 57800 + }, + { + "epoch": 0.7455651063665147, + "grad_norm": 0.8771667165663062, + "learning_rate": 9.34303024413388e-06, + "loss": 0.2303, + "step": 57810 + }, + { + "epoch": 0.7456940745565106, + "grad_norm": 0.912430378200429, + "learning_rate": 9.34265841773823e-06, + "loss": 0.2346, + "step": 57820 + }, + { + "epoch": 0.7458230427465066, + "grad_norm": 0.892768449630694, + "learning_rate": 9.342286493553864e-06, + "loss": 0.2378, + "step": 57830 + }, + { + "epoch": 0.7459520109365025, + "grad_norm": 0.8941277663625998, + "learning_rate": 9.341914471589157e-06, + "loss": 0.2215, + "step": 57840 + }, + { + "epoch": 0.7460809791264984, + "grad_norm": 0.8561360819567801, + "learning_rate": 9.341542351852489e-06, + "loss": 0.2428, + "step": 57850 + }, + { + "epoch": 0.7462099473164944, + "grad_norm": 0.789101345850751, + "learning_rate": 9.341170134352235e-06, + "loss": 0.2251, + "step": 57860 + }, + { + "epoch": 0.7463389155064903, + "grad_norm": 0.874046804288229, + "learning_rate": 9.340797819096781e-06, + "loss": 0.2525, + "step": 57870 + }, + { + "epoch": 0.7464678836964863, + "grad_norm": 0.9299504489340252, + "learning_rate": 9.34042540609451e-06, + "loss": 0.228, + "step": 57880 + }, + { + "epoch": 0.7465968518864822, + "grad_norm": 0.9340304786241646, + "learning_rate": 9.340052895353806e-06, + "loss": 0.2273, + "step": 57890 + }, + { + "epoch": 0.7467258200764781, + "grad_norm": 0.7995944866354574, + "learning_rate": 9.33968028688306e-06, + "loss": 0.2296, + "step": 57900 + }, + { + "epoch": 0.7468547882664741, + "grad_norm": 0.8627595341062932, + "learning_rate": 9.339307580690658e-06, + "loss": 0.2335, + "step": 57910 + }, + { + "epoch": 0.74698375645647, + "grad_norm": 0.8641757248314489, + "learning_rate": 9.338934776784998e-06, + "loss": 0.2399, + "step": 57920 + }, + { + "epoch": 0.7471127246464659, + "grad_norm": 0.8741717755807826, + "learning_rate": 9.338561875174472e-06, + "loss": 0.241, + "step": 57930 + }, + { + "epoch": 0.7472416928364619, + "grad_norm": 0.8025549429511639, + "learning_rate": 9.338188875867477e-06, + "loss": 0.2365, + "step": 57940 + }, + { + "epoch": 0.7473706610264578, + "grad_norm": 0.9103083376759253, + "learning_rate": 9.337815778872413e-06, + "loss": 0.2358, + "step": 57950 + }, + { + "epoch": 0.7474996292164537, + "grad_norm": 0.9058021842082967, + "learning_rate": 9.337442584197683e-06, + "loss": 0.2365, + "step": 57960 + }, + { + "epoch": 0.7476285974064497, + "grad_norm": 0.901364552850649, + "learning_rate": 9.337069291851686e-06, + "loss": 0.2385, + "step": 57970 + }, + { + "epoch": 0.7477575655964457, + "grad_norm": 0.9050500909916424, + "learning_rate": 9.336695901842833e-06, + "loss": 0.241, + "step": 57980 + }, + { + "epoch": 0.7478865337864415, + "grad_norm": 0.911489816978807, + "learning_rate": 9.336322414179529e-06, + "loss": 0.2335, + "step": 57990 + }, + { + "epoch": 0.7480155019764375, + "grad_norm": 0.8481641196162797, + "learning_rate": 9.335948828870185e-06, + "loss": 0.2503, + "step": 58000 + }, + { + "epoch": 0.7481444701664335, + "grad_norm": 0.9408998930878866, + "learning_rate": 9.335575145923213e-06, + "loss": 0.2383, + "step": 58010 + }, + { + "epoch": 0.7482734383564293, + "grad_norm": 0.8735798022133561, + "learning_rate": 9.33520136534703e-06, + "loss": 0.2258, + "step": 58020 + }, + { + "epoch": 0.7484024065464253, + "grad_norm": 0.9191604409295882, + "learning_rate": 9.334827487150049e-06, + "loss": 0.2412, + "step": 58030 + }, + { + "epoch": 0.7485313747364213, + "grad_norm": 0.9166942135049081, + "learning_rate": 9.334453511340693e-06, + "loss": 0.2331, + "step": 58040 + }, + { + "epoch": 0.7486603429264171, + "grad_norm": 0.8900669085612125, + "learning_rate": 9.33407943792738e-06, + "loss": 0.2377, + "step": 58050 + }, + { + "epoch": 0.7487893111164131, + "grad_norm": 0.8137838028459543, + "learning_rate": 9.333705266918536e-06, + "loss": 0.2243, + "step": 58060 + }, + { + "epoch": 0.7489182793064091, + "grad_norm": 0.9565880093868666, + "learning_rate": 9.333330998322586e-06, + "loss": 0.2331, + "step": 58070 + }, + { + "epoch": 0.749047247496405, + "grad_norm": 0.8549644748068836, + "learning_rate": 9.332956632147954e-06, + "loss": 0.2285, + "step": 58080 + }, + { + "epoch": 0.7491762156864009, + "grad_norm": 0.7825572091947599, + "learning_rate": 9.332582168403076e-06, + "loss": 0.2226, + "step": 58090 + }, + { + "epoch": 0.7493051838763969, + "grad_norm": 0.9398359007997411, + "learning_rate": 9.33220760709638e-06, + "loss": 0.2311, + "step": 58100 + }, + { + "epoch": 0.7494341520663929, + "grad_norm": 0.8030803794295641, + "learning_rate": 9.331832948236303e-06, + "loss": 0.2216, + "step": 58110 + }, + { + "epoch": 0.7495631202563887, + "grad_norm": 0.8973010950122926, + "learning_rate": 9.331458191831282e-06, + "loss": 0.2197, + "step": 58120 + }, + { + "epoch": 0.7496920884463847, + "grad_norm": 0.9042722508611053, + "learning_rate": 9.331083337889752e-06, + "loss": 0.231, + "step": 58130 + }, + { + "epoch": 0.7498210566363807, + "grad_norm": 0.8635488224756116, + "learning_rate": 9.330708386420158e-06, + "loss": 0.232, + "step": 58140 + }, + { + "epoch": 0.7499500248263766, + "grad_norm": 0.8493718920344784, + "learning_rate": 9.330333337430943e-06, + "loss": 0.2339, + "step": 58150 + }, + { + "epoch": 0.7500789930163725, + "grad_norm": 0.785560117230875, + "learning_rate": 9.32995819093055e-06, + "loss": 0.2301, + "step": 58160 + }, + { + "epoch": 0.7502079612063685, + "grad_norm": 0.8856654322111487, + "learning_rate": 9.329582946927428e-06, + "loss": 0.2354, + "step": 58170 + }, + { + "epoch": 0.7503369293963644, + "grad_norm": 0.8840031787476069, + "learning_rate": 9.329207605430024e-06, + "loss": 0.2293, + "step": 58180 + }, + { + "epoch": 0.7504658975863603, + "grad_norm": 0.8750504571196347, + "learning_rate": 9.328832166446796e-06, + "loss": 0.2439, + "step": 58190 + }, + { + "epoch": 0.7505948657763563, + "grad_norm": 0.8291391049634292, + "learning_rate": 9.328456629986193e-06, + "loss": 0.232, + "step": 58200 + }, + { + "epoch": 0.7507238339663522, + "grad_norm": 0.930443458255772, + "learning_rate": 9.328080996056671e-06, + "loss": 0.2301, + "step": 58210 + }, + { + "epoch": 0.7508528021563481, + "grad_norm": 0.8433018572653763, + "learning_rate": 9.327705264666694e-06, + "loss": 0.2185, + "step": 58220 + }, + { + "epoch": 0.7509817703463441, + "grad_norm": 0.9466636811351178, + "learning_rate": 9.327329435824718e-06, + "loss": 0.234, + "step": 58230 + }, + { + "epoch": 0.75111073853634, + "grad_norm": 0.9646250925937234, + "learning_rate": 9.326953509539209e-06, + "loss": 0.2348, + "step": 58240 + }, + { + "epoch": 0.751239706726336, + "grad_norm": 0.8592605629745891, + "learning_rate": 9.32657748581863e-06, + "loss": 0.2422, + "step": 58250 + }, + { + "epoch": 0.7513686749163319, + "grad_norm": 0.8006493569391645, + "learning_rate": 9.32620136467145e-06, + "loss": 0.2205, + "step": 58260 + }, + { + "epoch": 0.7514976431063278, + "grad_norm": 0.8895609097119478, + "learning_rate": 9.325825146106136e-06, + "loss": 0.2304, + "step": 58270 + }, + { + "epoch": 0.7516266112963238, + "grad_norm": 0.8630489461650724, + "learning_rate": 9.325448830131162e-06, + "loss": 0.2234, + "step": 58280 + }, + { + "epoch": 0.7517555794863197, + "grad_norm": 0.8792559182680267, + "learning_rate": 9.325072416755001e-06, + "loss": 0.2374, + "step": 58290 + }, + { + "epoch": 0.7518845476763156, + "grad_norm": 0.901519785861879, + "learning_rate": 9.324695905986129e-06, + "loss": 0.2271, + "step": 58300 + }, + { + "epoch": 0.7520135158663116, + "grad_norm": 0.8480377316579069, + "learning_rate": 9.324319297833027e-06, + "loss": 0.2256, + "step": 58310 + }, + { + "epoch": 0.7521424840563075, + "grad_norm": 0.8980082486995065, + "learning_rate": 9.323942592304172e-06, + "loss": 0.2381, + "step": 58320 + }, + { + "epoch": 0.7522714522463034, + "grad_norm": 0.9399906279054708, + "learning_rate": 9.323565789408047e-06, + "loss": 0.231, + "step": 58330 + }, + { + "epoch": 0.7524004204362994, + "grad_norm": 0.8764943710776316, + "learning_rate": 9.323188889153139e-06, + "loss": 0.2451, + "step": 58340 + }, + { + "epoch": 0.7525293886262954, + "grad_norm": 0.9238980793924084, + "learning_rate": 9.322811891547934e-06, + "loss": 0.2321, + "step": 58350 + }, + { + "epoch": 0.7526583568162912, + "grad_norm": 0.9787351111852371, + "learning_rate": 9.322434796600921e-06, + "loss": 0.2468, + "step": 58360 + }, + { + "epoch": 0.7527873250062872, + "grad_norm": 0.8441991371520664, + "learning_rate": 9.322057604320592e-06, + "loss": 0.2326, + "step": 58370 + }, + { + "epoch": 0.7529162931962832, + "grad_norm": 0.858762683827703, + "learning_rate": 9.32168031471544e-06, + "loss": 0.2412, + "step": 58380 + }, + { + "epoch": 0.753045261386279, + "grad_norm": 0.8855599194538654, + "learning_rate": 9.321302927793963e-06, + "loss": 0.2347, + "step": 58390 + }, + { + "epoch": 0.753174229576275, + "grad_norm": 0.9241203817401722, + "learning_rate": 9.320925443564655e-06, + "loss": 0.217, + "step": 58400 + }, + { + "epoch": 0.753303197766271, + "grad_norm": 0.8662874338774255, + "learning_rate": 9.32054786203602e-06, + "loss": 0.2307, + "step": 58410 + }, + { + "epoch": 0.7534321659562669, + "grad_norm": 0.873809939872258, + "learning_rate": 9.32017018321656e-06, + "loss": 0.2351, + "step": 58420 + }, + { + "epoch": 0.7535611341462628, + "grad_norm": 0.791881193494722, + "learning_rate": 9.319792407114777e-06, + "loss": 0.2437, + "step": 58430 + }, + { + "epoch": 0.7536901023362588, + "grad_norm": 0.8190391469090536, + "learning_rate": 9.319414533739181e-06, + "loss": 0.2256, + "step": 58440 + }, + { + "epoch": 0.7538190705262547, + "grad_norm": 0.7942721385542966, + "learning_rate": 9.319036563098278e-06, + "loss": 0.2424, + "step": 58450 + }, + { + "epoch": 0.7539480387162506, + "grad_norm": 0.9623856367095069, + "learning_rate": 9.318658495200583e-06, + "loss": 0.2363, + "step": 58460 + }, + { + "epoch": 0.7540770069062466, + "grad_norm": 1.0218287479396782, + "learning_rate": 9.318280330054605e-06, + "loss": 0.228, + "step": 58470 + }, + { + "epoch": 0.7542059750962425, + "grad_norm": 0.8410581215369312, + "learning_rate": 9.317902067668862e-06, + "loss": 0.2308, + "step": 58480 + }, + { + "epoch": 0.7543349432862384, + "grad_norm": 0.9232907589608331, + "learning_rate": 9.317523708051874e-06, + "loss": 0.2495, + "step": 58490 + }, + { + "epoch": 0.7544639114762344, + "grad_norm": 0.9985133439404852, + "learning_rate": 9.317145251212156e-06, + "loss": 0.2266, + "step": 58500 + }, + { + "epoch": 0.7545928796662303, + "grad_norm": 0.8650669317055191, + "learning_rate": 9.316766697158235e-06, + "loss": 0.2383, + "step": 58510 + }, + { + "epoch": 0.7547218478562263, + "grad_norm": 0.8655900725378514, + "learning_rate": 9.316388045898632e-06, + "loss": 0.2197, + "step": 58520 + }, + { + "epoch": 0.7548508160462222, + "grad_norm": 0.8669213543895614, + "learning_rate": 9.316009297441874e-06, + "loss": 0.2331, + "step": 58530 + }, + { + "epoch": 0.7549797842362181, + "grad_norm": 0.9096798542984291, + "learning_rate": 9.31563045179649e-06, + "loss": 0.2251, + "step": 58540 + }, + { + "epoch": 0.7551087524262141, + "grad_norm": 0.8603898740248482, + "learning_rate": 9.315251508971013e-06, + "loss": 0.2409, + "step": 58550 + }, + { + "epoch": 0.75523772061621, + "grad_norm": 0.9280293779926035, + "learning_rate": 9.314872468973972e-06, + "loss": 0.2265, + "step": 58560 + }, + { + "epoch": 0.7553666888062059, + "grad_norm": 0.9389110572807431, + "learning_rate": 9.314493331813905e-06, + "loss": 0.2346, + "step": 58570 + }, + { + "epoch": 0.7554956569962019, + "grad_norm": 0.9094912827741978, + "learning_rate": 9.31411409749935e-06, + "loss": 0.2503, + "step": 58580 + }, + { + "epoch": 0.7556246251861978, + "grad_norm": 0.8657385733567443, + "learning_rate": 9.313734766038847e-06, + "loss": 0.2319, + "step": 58590 + }, + { + "epoch": 0.7557535933761937, + "grad_norm": 0.8156900437382216, + "learning_rate": 9.313355337440935e-06, + "loss": 0.2482, + "step": 58600 + }, + { + "epoch": 0.7558825615661897, + "grad_norm": 0.799023106126864, + "learning_rate": 9.312975811714159e-06, + "loss": 0.2204, + "step": 58610 + }, + { + "epoch": 0.7560115297561857, + "grad_norm": 0.8685399625826872, + "learning_rate": 9.312596188867067e-06, + "loss": 0.2494, + "step": 58620 + }, + { + "epoch": 0.7561404979461815, + "grad_norm": 0.8834660756825088, + "learning_rate": 9.312216468908204e-06, + "loss": 0.2301, + "step": 58630 + }, + { + "epoch": 0.7562694661361775, + "grad_norm": 0.8673221839265428, + "learning_rate": 9.311836651846125e-06, + "loss": 0.2284, + "step": 58640 + }, + { + "epoch": 0.7563984343261735, + "grad_norm": 0.8423960890778659, + "learning_rate": 9.311456737689381e-06, + "loss": 0.2199, + "step": 58650 + }, + { + "epoch": 0.7565274025161693, + "grad_norm": 0.9108312290384079, + "learning_rate": 9.311076726446525e-06, + "loss": 0.2361, + "step": 58660 + }, + { + "epoch": 0.7566563707061653, + "grad_norm": 0.8176147130953325, + "learning_rate": 9.310696618126118e-06, + "loss": 0.2429, + "step": 58670 + }, + { + "epoch": 0.7567853388961613, + "grad_norm": 0.8939300045844149, + "learning_rate": 9.310316412736716e-06, + "loss": 0.2257, + "step": 58680 + }, + { + "epoch": 0.7569143070861571, + "grad_norm": 0.8732492011418622, + "learning_rate": 9.309936110286882e-06, + "loss": 0.2273, + "step": 58690 + }, + { + "epoch": 0.7570432752761531, + "grad_norm": 0.9583808670643934, + "learning_rate": 9.309555710785177e-06, + "loss": 0.2173, + "step": 58700 + }, + { + "epoch": 0.7571722434661491, + "grad_norm": 0.8048891816642187, + "learning_rate": 9.309175214240171e-06, + "loss": 0.2265, + "step": 58710 + }, + { + "epoch": 0.7573012116561451, + "grad_norm": 0.7902168885061506, + "learning_rate": 9.30879462066043e-06, + "loss": 0.2281, + "step": 58720 + }, + { + "epoch": 0.7574301798461409, + "grad_norm": 0.8746115091241968, + "learning_rate": 9.308413930054525e-06, + "loss": 0.2351, + "step": 58730 + }, + { + "epoch": 0.7575591480361369, + "grad_norm": 0.8131820288094286, + "learning_rate": 9.30803314243103e-06, + "loss": 0.2312, + "step": 58740 + }, + { + "epoch": 0.7576881162261329, + "grad_norm": 0.8375772577947691, + "learning_rate": 9.307652257798514e-06, + "loss": 0.2482, + "step": 58750 + }, + { + "epoch": 0.7578170844161287, + "grad_norm": 0.9563505861095345, + "learning_rate": 9.307271276165559e-06, + "loss": 0.2299, + "step": 58760 + }, + { + "epoch": 0.7579460526061247, + "grad_norm": 0.8715415256676313, + "learning_rate": 9.306890197540743e-06, + "loss": 0.2372, + "step": 58770 + }, + { + "epoch": 0.7580750207961207, + "grad_norm": 0.861652350290659, + "learning_rate": 9.306509021932647e-06, + "loss": 0.2246, + "step": 58780 + }, + { + "epoch": 0.7582039889861166, + "grad_norm": 0.9707149815853333, + "learning_rate": 9.306127749349853e-06, + "loss": 0.2333, + "step": 58790 + }, + { + "epoch": 0.7583329571761125, + "grad_norm": 0.8098436382127501, + "learning_rate": 9.305746379800948e-06, + "loss": 0.2346, + "step": 58800 + }, + { + "epoch": 0.7584619253661085, + "grad_norm": 0.9278787262962912, + "learning_rate": 9.30536491329452e-06, + "loss": 0.2319, + "step": 58810 + }, + { + "epoch": 0.7585908935561044, + "grad_norm": 0.8936664350733031, + "learning_rate": 9.304983349839158e-06, + "loss": 0.2324, + "step": 58820 + }, + { + "epoch": 0.7587198617461003, + "grad_norm": 0.9340474753952784, + "learning_rate": 9.304601689443455e-06, + "loss": 0.2435, + "step": 58830 + }, + { + "epoch": 0.7588488299360963, + "grad_norm": 0.8427841413746793, + "learning_rate": 9.304219932116003e-06, + "loss": 0.2378, + "step": 58840 + }, + { + "epoch": 0.7589777981260922, + "grad_norm": 0.8973992752720081, + "learning_rate": 9.303838077865402e-06, + "loss": 0.2423, + "step": 58850 + }, + { + "epoch": 0.7591067663160881, + "grad_norm": 0.8276912432658917, + "learning_rate": 9.303456126700248e-06, + "loss": 0.2378, + "step": 58860 + }, + { + "epoch": 0.7592357345060841, + "grad_norm": 0.9779759283657237, + "learning_rate": 9.303074078629143e-06, + "loss": 0.2399, + "step": 58870 + }, + { + "epoch": 0.75936470269608, + "grad_norm": 0.8452703858762499, + "learning_rate": 9.302691933660691e-06, + "loss": 0.2214, + "step": 58880 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.8409065182910354, + "learning_rate": 9.302309691803492e-06, + "loss": 0.2455, + "step": 58890 + }, + { + "epoch": 0.7596226390760719, + "grad_norm": 0.9432755560036932, + "learning_rate": 9.301927353066162e-06, + "loss": 0.2332, + "step": 58900 + }, + { + "epoch": 0.7597516072660678, + "grad_norm": 0.885737113597953, + "learning_rate": 9.301544917457304e-06, + "loss": 0.2258, + "step": 58910 + }, + { + "epoch": 0.7598805754560638, + "grad_norm": 0.9237423454049307, + "learning_rate": 9.301162384985532e-06, + "loss": 0.2312, + "step": 58920 + }, + { + "epoch": 0.7600095436460597, + "grad_norm": 0.9451440547991212, + "learning_rate": 9.300779755659458e-06, + "loss": 0.2318, + "step": 58930 + }, + { + "epoch": 0.7601385118360556, + "grad_norm": 0.8380403358496459, + "learning_rate": 9.3003970294877e-06, + "loss": 0.2333, + "step": 58940 + }, + { + "epoch": 0.7602674800260516, + "grad_norm": 0.929870828662699, + "learning_rate": 9.300014206478878e-06, + "loss": 0.2338, + "step": 58950 + }, + { + "epoch": 0.7603964482160475, + "grad_norm": 0.869094903819028, + "learning_rate": 9.299631286641609e-06, + "loss": 0.2528, + "step": 58960 + }, + { + "epoch": 0.7605254164060434, + "grad_norm": 0.8363571238090416, + "learning_rate": 9.299248269984517e-06, + "loss": 0.233, + "step": 58970 + }, + { + "epoch": 0.7606543845960394, + "grad_norm": 0.840321974678568, + "learning_rate": 9.298865156516228e-06, + "loss": 0.2325, + "step": 58980 + }, + { + "epoch": 0.7607833527860354, + "grad_norm": 0.7962823090541534, + "learning_rate": 9.298481946245366e-06, + "loss": 0.2346, + "step": 58990 + }, + { + "epoch": 0.7609123209760312, + "grad_norm": 0.8588315601543937, + "learning_rate": 9.298098639180564e-06, + "loss": 0.2331, + "step": 59000 + }, + { + "epoch": 0.7610412891660272, + "grad_norm": 0.8438075690747291, + "learning_rate": 9.29771523533045e-06, + "loss": 0.2283, + "step": 59010 + }, + { + "epoch": 0.7611702573560232, + "grad_norm": 0.9155321644210489, + "learning_rate": 9.29733173470366e-06, + "loss": 0.2278, + "step": 59020 + }, + { + "epoch": 0.761299225546019, + "grad_norm": 0.9466228277007676, + "learning_rate": 9.29694813730883e-06, + "loss": 0.2225, + "step": 59030 + }, + { + "epoch": 0.761428193736015, + "grad_norm": 0.9782457341181773, + "learning_rate": 9.296564443154594e-06, + "loss": 0.2335, + "step": 59040 + }, + { + "epoch": 0.761557161926011, + "grad_norm": 0.8765652405652427, + "learning_rate": 9.296180652249597e-06, + "loss": 0.2299, + "step": 59050 + }, + { + "epoch": 0.7616861301160068, + "grad_norm": 0.9543858224759474, + "learning_rate": 9.295796764602478e-06, + "loss": 0.2404, + "step": 59060 + }, + { + "epoch": 0.7618150983060028, + "grad_norm": 0.9402379961514943, + "learning_rate": 9.295412780221884e-06, + "loss": 0.2306, + "step": 59070 + }, + { + "epoch": 0.7619440664959988, + "grad_norm": 0.7673679640141647, + "learning_rate": 9.295028699116458e-06, + "loss": 0.2301, + "step": 59080 + }, + { + "epoch": 0.7620730346859947, + "grad_norm": 0.9140506012044541, + "learning_rate": 9.294644521294853e-06, + "loss": 0.2364, + "step": 59090 + }, + { + "epoch": 0.7622020028759906, + "grad_norm": 0.8573913178913698, + "learning_rate": 9.294260246765716e-06, + "loss": 0.2341, + "step": 59100 + }, + { + "epoch": 0.7623309710659866, + "grad_norm": 0.8375691068408361, + "learning_rate": 9.293875875537704e-06, + "loss": 0.2257, + "step": 59110 + }, + { + "epoch": 0.7624599392559825, + "grad_norm": 0.870316248888292, + "learning_rate": 9.293491407619468e-06, + "loss": 0.2302, + "step": 59120 + }, + { + "epoch": 0.7625889074459784, + "grad_norm": 0.9149873295758768, + "learning_rate": 9.29310684301967e-06, + "loss": 0.2273, + "step": 59130 + }, + { + "epoch": 0.7627178756359744, + "grad_norm": 0.8254397383165337, + "learning_rate": 9.292722181746967e-06, + "loss": 0.2344, + "step": 59140 + }, + { + "epoch": 0.7628468438259703, + "grad_norm": 0.8396674701140977, + "learning_rate": 9.292337423810022e-06, + "loss": 0.2299, + "step": 59150 + }, + { + "epoch": 0.7629758120159663, + "grad_norm": 0.8317067309840087, + "learning_rate": 9.291952569217496e-06, + "loss": 0.2362, + "step": 59160 + }, + { + "epoch": 0.7631047802059622, + "grad_norm": 0.8710048118711228, + "learning_rate": 9.291567617978061e-06, + "loss": 0.2372, + "step": 59170 + }, + { + "epoch": 0.7632337483959581, + "grad_norm": 0.9511827034359529, + "learning_rate": 9.291182570100381e-06, + "loss": 0.235, + "step": 59180 + }, + { + "epoch": 0.7633627165859541, + "grad_norm": 0.8459390053365529, + "learning_rate": 9.290797425593128e-06, + "loss": 0.232, + "step": 59190 + }, + { + "epoch": 0.76349168477595, + "grad_norm": 0.9340355465429578, + "learning_rate": 9.290412184464975e-06, + "loss": 0.2362, + "step": 59200 + }, + { + "epoch": 0.763620652965946, + "grad_norm": 0.9938036365458475, + "learning_rate": 9.290026846724595e-06, + "loss": 0.2346, + "step": 59210 + }, + { + "epoch": 0.7637496211559419, + "grad_norm": 0.8639766755533602, + "learning_rate": 9.289641412380667e-06, + "loss": 0.2458, + "step": 59220 + }, + { + "epoch": 0.7638785893459378, + "grad_norm": 0.8833809287911858, + "learning_rate": 9.28925588144187e-06, + "loss": 0.2471, + "step": 59230 + }, + { + "epoch": 0.7640075575359337, + "grad_norm": 0.8484593618307106, + "learning_rate": 9.288870253916886e-06, + "loss": 0.2363, + "step": 59240 + }, + { + "epoch": 0.7641365257259297, + "grad_norm": 0.8513882734930219, + "learning_rate": 9.288484529814396e-06, + "loss": 0.2212, + "step": 59250 + }, + { + "epoch": 0.7642654939159257, + "grad_norm": 0.8686680292270418, + "learning_rate": 9.288098709143088e-06, + "loss": 0.2266, + "step": 59260 + }, + { + "epoch": 0.7643944621059215, + "grad_norm": 0.9653468163311089, + "learning_rate": 9.287712791911652e-06, + "loss": 0.2319, + "step": 59270 + }, + { + "epoch": 0.7645234302959175, + "grad_norm": 0.9604757197290922, + "learning_rate": 9.287326778128773e-06, + "loss": 0.2305, + "step": 59280 + }, + { + "epoch": 0.7646523984859135, + "grad_norm": 0.881487269097774, + "learning_rate": 9.286940667803148e-06, + "loss": 0.218, + "step": 59290 + }, + { + "epoch": 0.7647813666759093, + "grad_norm": 0.9215773541281002, + "learning_rate": 9.286554460943467e-06, + "loss": 0.2431, + "step": 59300 + }, + { + "epoch": 0.7649103348659053, + "grad_norm": 0.8837781623875435, + "learning_rate": 9.286168157558432e-06, + "loss": 0.2307, + "step": 59310 + }, + { + "epoch": 0.7650393030559013, + "grad_norm": 0.7825312140143018, + "learning_rate": 9.285781757656738e-06, + "loss": 0.2309, + "step": 59320 + }, + { + "epoch": 0.7651682712458971, + "grad_norm": 0.8726112207895167, + "learning_rate": 9.285395261247088e-06, + "loss": 0.2277, + "step": 59330 + }, + { + "epoch": 0.7652972394358931, + "grad_norm": 0.9656652678671171, + "learning_rate": 9.285008668338183e-06, + "loss": 0.2414, + "step": 59340 + }, + { + "epoch": 0.7654262076258891, + "grad_norm": 0.995706943628638, + "learning_rate": 9.28462197893873e-06, + "loss": 0.2427, + "step": 59350 + }, + { + "epoch": 0.7655551758158851, + "grad_norm": 0.8376390200258668, + "learning_rate": 9.284235193057436e-06, + "loss": 0.2333, + "step": 59360 + }, + { + "epoch": 0.7656841440058809, + "grad_norm": 0.8614749642021747, + "learning_rate": 9.283848310703012e-06, + "loss": 0.2424, + "step": 59370 + }, + { + "epoch": 0.7658131121958769, + "grad_norm": 0.8496245888507707, + "learning_rate": 9.283461331884168e-06, + "loss": 0.219, + "step": 59380 + }, + { + "epoch": 0.7659420803858729, + "grad_norm": 0.8849718365814747, + "learning_rate": 9.283074256609619e-06, + "loss": 0.247, + "step": 59390 + }, + { + "epoch": 0.7660710485758687, + "grad_norm": 0.8897562257445862, + "learning_rate": 9.282687084888083e-06, + "loss": 0.2368, + "step": 59400 + }, + { + "epoch": 0.7662000167658647, + "grad_norm": 0.9410717949105573, + "learning_rate": 9.282299816728273e-06, + "loss": 0.2256, + "step": 59410 + }, + { + "epoch": 0.7663289849558607, + "grad_norm": 0.856440374040463, + "learning_rate": 9.281912452138914e-06, + "loss": 0.2376, + "step": 59420 + }, + { + "epoch": 0.7664579531458566, + "grad_norm": 0.8518212138552026, + "learning_rate": 9.28152499112873e-06, + "loss": 0.2296, + "step": 59430 + }, + { + "epoch": 0.7665869213358525, + "grad_norm": 0.925527962145525, + "learning_rate": 9.281137433706442e-06, + "loss": 0.2495, + "step": 59440 + }, + { + "epoch": 0.7667158895258485, + "grad_norm": 0.8890800348395319, + "learning_rate": 9.28074977988078e-06, + "loss": 0.2287, + "step": 59450 + }, + { + "epoch": 0.7668448577158444, + "grad_norm": 0.8873221509558163, + "learning_rate": 9.280362029660471e-06, + "loss": 0.2398, + "step": 59460 + }, + { + "epoch": 0.7669738259058403, + "grad_norm": 0.9556787911232619, + "learning_rate": 9.279974183054248e-06, + "loss": 0.2435, + "step": 59470 + }, + { + "epoch": 0.7671027940958363, + "grad_norm": 0.9484601490984638, + "learning_rate": 9.279586240070843e-06, + "loss": 0.2329, + "step": 59480 + }, + { + "epoch": 0.7672317622858322, + "grad_norm": 0.8695298402451508, + "learning_rate": 9.279198200718994e-06, + "loss": 0.2336, + "step": 59490 + }, + { + "epoch": 0.7673607304758281, + "grad_norm": 0.9246820819564965, + "learning_rate": 9.278810065007438e-06, + "loss": 0.2348, + "step": 59500 + }, + { + "epoch": 0.7674896986658241, + "grad_norm": 0.79250265656932, + "learning_rate": 9.278421832944914e-06, + "loss": 0.2256, + "step": 59510 + }, + { + "epoch": 0.76761866685582, + "grad_norm": 0.8613574716643172, + "learning_rate": 9.278033504540164e-06, + "loss": 0.2263, + "step": 59520 + }, + { + "epoch": 0.767747635045816, + "grad_norm": 0.9422294458328611, + "learning_rate": 9.277645079801937e-06, + "loss": 0.2342, + "step": 59530 + }, + { + "epoch": 0.7678766032358119, + "grad_norm": 0.9190136490522717, + "learning_rate": 9.277256558738975e-06, + "loss": 0.2251, + "step": 59540 + }, + { + "epoch": 0.7680055714258078, + "grad_norm": 0.780512231161097, + "learning_rate": 9.276867941360026e-06, + "loss": 0.2233, + "step": 59550 + }, + { + "epoch": 0.7681345396158038, + "grad_norm": 1.0075874198271009, + "learning_rate": 9.276479227673845e-06, + "loss": 0.2314, + "step": 59560 + }, + { + "epoch": 0.7682635078057997, + "grad_norm": 0.8552102948381093, + "learning_rate": 9.276090417689182e-06, + "loss": 0.2389, + "step": 59570 + }, + { + "epoch": 0.7683924759957956, + "grad_norm": 0.869617400161683, + "learning_rate": 9.275701511414795e-06, + "loss": 0.2303, + "step": 59580 + }, + { + "epoch": 0.7685214441857916, + "grad_norm": 0.8662759033134411, + "learning_rate": 9.275312508859438e-06, + "loss": 0.2545, + "step": 59590 + }, + { + "epoch": 0.7686504123757875, + "grad_norm": 0.8369855100504411, + "learning_rate": 9.274923410031873e-06, + "loss": 0.2317, + "step": 59600 + }, + { + "epoch": 0.7687793805657834, + "grad_norm": 0.7987648455747546, + "learning_rate": 9.27453421494086e-06, + "loss": 0.2402, + "step": 59610 + }, + { + "epoch": 0.7689083487557794, + "grad_norm": 0.8633928221921574, + "learning_rate": 9.274144923595166e-06, + "loss": 0.2282, + "step": 59620 + }, + { + "epoch": 0.7690373169457754, + "grad_norm": 0.8095165675286364, + "learning_rate": 9.273755536003555e-06, + "loss": 0.217, + "step": 59630 + }, + { + "epoch": 0.7691662851357712, + "grad_norm": 0.859985937867782, + "learning_rate": 9.273366052174796e-06, + "loss": 0.2165, + "step": 59640 + }, + { + "epoch": 0.7692952533257672, + "grad_norm": 0.8232161589419629, + "learning_rate": 9.272976472117659e-06, + "loss": 0.2242, + "step": 59650 + }, + { + "epoch": 0.7694242215157632, + "grad_norm": 0.8199578767289999, + "learning_rate": 9.272586795840915e-06, + "loss": 0.2396, + "step": 59660 + }, + { + "epoch": 0.769553189705759, + "grad_norm": 0.8262152205021812, + "learning_rate": 9.272197023353341e-06, + "loss": 0.232, + "step": 59670 + }, + { + "epoch": 0.769682157895755, + "grad_norm": 0.8382646929018728, + "learning_rate": 9.271807154663714e-06, + "loss": 0.2311, + "step": 59680 + }, + { + "epoch": 0.769811126085751, + "grad_norm": 0.8584809555584593, + "learning_rate": 9.271417189780814e-06, + "loss": 0.2314, + "step": 59690 + }, + { + "epoch": 0.7699400942757468, + "grad_norm": 0.8553171602844224, + "learning_rate": 9.27102712871342e-06, + "loss": 0.2297, + "step": 59700 + }, + { + "epoch": 0.7700690624657428, + "grad_norm": 0.7467305436734086, + "learning_rate": 9.270636971470316e-06, + "loss": 0.2441, + "step": 59710 + }, + { + "epoch": 0.7701980306557388, + "grad_norm": 0.9075193594087669, + "learning_rate": 9.270246718060288e-06, + "loss": 0.2342, + "step": 59720 + }, + { + "epoch": 0.7703269988457347, + "grad_norm": 0.7759768493667106, + "learning_rate": 9.269856368492125e-06, + "loss": 0.2298, + "step": 59730 + }, + { + "epoch": 0.7704559670357306, + "grad_norm": 0.9429230429723714, + "learning_rate": 9.269465922774614e-06, + "loss": 0.2236, + "step": 59740 + }, + { + "epoch": 0.7705849352257266, + "grad_norm": 0.9092375498745063, + "learning_rate": 9.26907538091655e-06, + "loss": 0.229, + "step": 59750 + }, + { + "epoch": 0.7707139034157225, + "grad_norm": 0.8958488773041208, + "learning_rate": 9.268684742926724e-06, + "loss": 0.2317, + "step": 59760 + }, + { + "epoch": 0.7708428716057184, + "grad_norm": 0.8695969210407831, + "learning_rate": 9.268294008813936e-06, + "loss": 0.2341, + "step": 59770 + }, + { + "epoch": 0.7709718397957144, + "grad_norm": 0.7986574248197773, + "learning_rate": 9.267903178586985e-06, + "loss": 0.2234, + "step": 59780 + }, + { + "epoch": 0.7711008079857103, + "grad_norm": 0.8318187410885642, + "learning_rate": 9.267512252254667e-06, + "loss": 0.2283, + "step": 59790 + }, + { + "epoch": 0.7712297761757063, + "grad_norm": 0.9287155084382096, + "learning_rate": 9.26712122982579e-06, + "loss": 0.2401, + "step": 59800 + }, + { + "epoch": 0.7713587443657022, + "grad_norm": 0.8086187604932595, + "learning_rate": 9.266730111309156e-06, + "loss": 0.2195, + "step": 59810 + }, + { + "epoch": 0.7714877125556981, + "grad_norm": 0.9077660753601409, + "learning_rate": 9.266338896713573e-06, + "loss": 0.2382, + "step": 59820 + }, + { + "epoch": 0.7716166807456941, + "grad_norm": 0.8479662825400412, + "learning_rate": 9.265947586047853e-06, + "loss": 0.2332, + "step": 59830 + }, + { + "epoch": 0.77174564893569, + "grad_norm": 0.886423330456959, + "learning_rate": 9.265556179320803e-06, + "loss": 0.2356, + "step": 59840 + }, + { + "epoch": 0.771874617125686, + "grad_norm": 0.8447948914615651, + "learning_rate": 9.265164676541241e-06, + "loss": 0.228, + "step": 59850 + }, + { + "epoch": 0.7720035853156819, + "grad_norm": 0.943681414191613, + "learning_rate": 9.26477307771798e-06, + "loss": 0.2348, + "step": 59860 + }, + { + "epoch": 0.7721325535056778, + "grad_norm": 0.9485765969837395, + "learning_rate": 9.26438138285984e-06, + "loss": 0.2346, + "step": 59870 + }, + { + "epoch": 0.7722615216956737, + "grad_norm": 0.9285106896485861, + "learning_rate": 9.263989591975638e-06, + "loss": 0.2483, + "step": 59880 + }, + { + "epoch": 0.7723904898856697, + "grad_norm": 0.8422242346108155, + "learning_rate": 9.2635977050742e-06, + "loss": 0.2286, + "step": 59890 + }, + { + "epoch": 0.7725194580756657, + "grad_norm": 0.9260848251070728, + "learning_rate": 9.26320572216435e-06, + "loss": 0.2442, + "step": 59900 + }, + { + "epoch": 0.7726484262656615, + "grad_norm": 0.8538304101902582, + "learning_rate": 9.262813643254916e-06, + "loss": 0.2381, + "step": 59910 + }, + { + "epoch": 0.7727773944556575, + "grad_norm": 0.8819578058533897, + "learning_rate": 9.26242146835472e-06, + "loss": 0.219, + "step": 59920 + }, + { + "epoch": 0.7729063626456535, + "grad_norm": 0.890118491574806, + "learning_rate": 9.262029197472602e-06, + "loss": 0.229, + "step": 59930 + }, + { + "epoch": 0.7730353308356493, + "grad_norm": 0.9264647299252495, + "learning_rate": 9.26163683061739e-06, + "loss": 0.239, + "step": 59940 + }, + { + "epoch": 0.7731642990256453, + "grad_norm": 0.8616690380246184, + "learning_rate": 9.261244367797922e-06, + "loss": 0.228, + "step": 59950 + }, + { + "epoch": 0.7732932672156413, + "grad_norm": 0.8757175474783284, + "learning_rate": 9.260851809023032e-06, + "loss": 0.2384, + "step": 59960 + }, + { + "epoch": 0.7734222354056371, + "grad_norm": 1.1905102404821075, + "learning_rate": 9.260459154301564e-06, + "loss": 0.2334, + "step": 59970 + }, + { + "epoch": 0.7735512035956331, + "grad_norm": 0.8294469281322165, + "learning_rate": 9.260066403642357e-06, + "loss": 0.233, + "step": 59980 + }, + { + "epoch": 0.7736801717856291, + "grad_norm": 0.823352985065623, + "learning_rate": 9.259673557054257e-06, + "loss": 0.2217, + "step": 59990 + }, + { + "epoch": 0.7738091399756251, + "grad_norm": 0.8746722013131234, + "learning_rate": 9.259280614546106e-06, + "loss": 0.2357, + "step": 60000 + }, + { + "epoch": 0.7739381081656209, + "grad_norm": 0.8622579949379808, + "learning_rate": 9.258887576126759e-06, + "loss": 0.2283, + "step": 60010 + }, + { + "epoch": 0.7740670763556169, + "grad_norm": 0.921821320711689, + "learning_rate": 9.25849444180506e-06, + "loss": 0.2346, + "step": 60020 + }, + { + "epoch": 0.7741960445456129, + "grad_norm": 0.8532168654967938, + "learning_rate": 9.258101211589865e-06, + "loss": 0.2281, + "step": 60030 + }, + { + "epoch": 0.7743250127356087, + "grad_norm": 0.9087365086890486, + "learning_rate": 9.257707885490029e-06, + "loss": 0.2234, + "step": 60040 + }, + { + "epoch": 0.7744539809256047, + "grad_norm": 0.9023326235160128, + "learning_rate": 9.257314463514408e-06, + "loss": 0.2317, + "step": 60050 + }, + { + "epoch": 0.7745829491156007, + "grad_norm": 0.9258329044817526, + "learning_rate": 9.256920945671861e-06, + "loss": 0.2326, + "step": 60060 + }, + { + "epoch": 0.7747119173055965, + "grad_norm": 0.8637574714353919, + "learning_rate": 9.25652733197125e-06, + "loss": 0.2255, + "step": 60070 + }, + { + "epoch": 0.7748408854955925, + "grad_norm": 0.8786636547592845, + "learning_rate": 9.256133622421439e-06, + "loss": 0.2301, + "step": 60080 + }, + { + "epoch": 0.7749698536855885, + "grad_norm": 0.8216449110959965, + "learning_rate": 9.255739817031293e-06, + "loss": 0.2319, + "step": 60090 + }, + { + "epoch": 0.7750988218755844, + "grad_norm": 0.979653104845507, + "learning_rate": 9.255345915809679e-06, + "loss": 0.2381, + "step": 60100 + }, + { + "epoch": 0.7752277900655803, + "grad_norm": 0.8590285760849539, + "learning_rate": 9.254951918765469e-06, + "loss": 0.2299, + "step": 60110 + }, + { + "epoch": 0.7753567582555763, + "grad_norm": 0.8508711455361848, + "learning_rate": 9.25455782590753e-06, + "loss": 0.2292, + "step": 60120 + }, + { + "epoch": 0.7754857264455722, + "grad_norm": 0.8303145053806525, + "learning_rate": 9.254163637244744e-06, + "loss": 0.2289, + "step": 60130 + }, + { + "epoch": 0.7756146946355681, + "grad_norm": 0.8736759227852352, + "learning_rate": 9.253769352785981e-06, + "loss": 0.2339, + "step": 60140 + }, + { + "epoch": 0.7757436628255641, + "grad_norm": 0.9228436890659475, + "learning_rate": 9.25337497254012e-06, + "loss": 0.2312, + "step": 60150 + }, + { + "epoch": 0.77587263101556, + "grad_norm": 0.8829304862024998, + "learning_rate": 9.252980496516048e-06, + "loss": 0.2302, + "step": 60160 + }, + { + "epoch": 0.776001599205556, + "grad_norm": 0.8454297664561559, + "learning_rate": 9.25258592472264e-06, + "loss": 0.2273, + "step": 60170 + }, + { + "epoch": 0.7761305673955519, + "grad_norm": 0.8573054754875974, + "learning_rate": 9.252191257168787e-06, + "loss": 0.2336, + "step": 60180 + }, + { + "epoch": 0.7762595355855478, + "grad_norm": 0.9129940012063903, + "learning_rate": 9.251796493863374e-06, + "loss": 0.2341, + "step": 60190 + }, + { + "epoch": 0.7763885037755438, + "grad_norm": 0.8768110136093978, + "learning_rate": 9.251401634815289e-06, + "loss": 0.2282, + "step": 60200 + }, + { + "epoch": 0.7765174719655397, + "grad_norm": 0.9535020855751142, + "learning_rate": 9.251006680033424e-06, + "loss": 0.2317, + "step": 60210 + }, + { + "epoch": 0.7766464401555356, + "grad_norm": 0.8358238814805449, + "learning_rate": 9.250611629526673e-06, + "loss": 0.2312, + "step": 60220 + }, + { + "epoch": 0.7767754083455316, + "grad_norm": 0.8940615927031959, + "learning_rate": 9.250216483303932e-06, + "loss": 0.2338, + "step": 60230 + }, + { + "epoch": 0.7769043765355275, + "grad_norm": 0.8511730547070068, + "learning_rate": 9.2498212413741e-06, + "loss": 0.2349, + "step": 60240 + }, + { + "epoch": 0.7770333447255234, + "grad_norm": 0.8656128584193931, + "learning_rate": 9.249425903746075e-06, + "loss": 0.2358, + "step": 60250 + }, + { + "epoch": 0.7771623129155194, + "grad_norm": 0.8324446969559075, + "learning_rate": 9.249030470428761e-06, + "loss": 0.2239, + "step": 60260 + }, + { + "epoch": 0.7772912811055154, + "grad_norm": 0.9173615685802424, + "learning_rate": 9.248634941431062e-06, + "loss": 0.2195, + "step": 60270 + }, + { + "epoch": 0.7774202492955112, + "grad_norm": 0.9591890810267113, + "learning_rate": 9.248239316761885e-06, + "loss": 0.2381, + "step": 60280 + }, + { + "epoch": 0.7775492174855072, + "grad_norm": 0.8837190512346569, + "learning_rate": 9.247843596430136e-06, + "loss": 0.2417, + "step": 60290 + }, + { + "epoch": 0.7776781856755032, + "grad_norm": 0.8512991662058151, + "learning_rate": 9.247447780444729e-06, + "loss": 0.2308, + "step": 60300 + }, + { + "epoch": 0.777807153865499, + "grad_norm": 0.8970958811264158, + "learning_rate": 9.247051868814577e-06, + "loss": 0.2374, + "step": 60310 + }, + { + "epoch": 0.777936122055495, + "grad_norm": 0.7884402115463258, + "learning_rate": 9.246655861548594e-06, + "loss": 0.2307, + "step": 60320 + }, + { + "epoch": 0.778065090245491, + "grad_norm": 0.8940984486128861, + "learning_rate": 9.246259758655697e-06, + "loss": 0.2302, + "step": 60330 + }, + { + "epoch": 0.7781940584354868, + "grad_norm": 0.806047311305527, + "learning_rate": 9.245863560144807e-06, + "loss": 0.2251, + "step": 60340 + }, + { + "epoch": 0.7783230266254828, + "grad_norm": 0.9271921309701742, + "learning_rate": 9.245467266024845e-06, + "loss": 0.2317, + "step": 60350 + }, + { + "epoch": 0.7784519948154788, + "grad_norm": 0.8923692120111629, + "learning_rate": 9.245070876304734e-06, + "loss": 0.2263, + "step": 60360 + }, + { + "epoch": 0.7785809630054747, + "grad_norm": 0.9414766955927731, + "learning_rate": 9.2446743909934e-06, + "loss": 0.2489, + "step": 60370 + }, + { + "epoch": 0.7787099311954706, + "grad_norm": 0.9828808509782593, + "learning_rate": 9.244277810099774e-06, + "loss": 0.2272, + "step": 60380 + }, + { + "epoch": 0.7788388993854666, + "grad_norm": 0.8753997509686607, + "learning_rate": 9.24388113363278e-06, + "loss": 0.2325, + "step": 60390 + }, + { + "epoch": 0.7789678675754625, + "grad_norm": 0.8866091493382694, + "learning_rate": 9.243484361601359e-06, + "loss": 0.2286, + "step": 60400 + }, + { + "epoch": 0.7790968357654584, + "grad_norm": 0.9504297815105481, + "learning_rate": 9.24308749401444e-06, + "loss": 0.2391, + "step": 60410 + }, + { + "epoch": 0.7792258039554544, + "grad_norm": 0.9572828242185063, + "learning_rate": 9.242690530880959e-06, + "loss": 0.2273, + "step": 60420 + }, + { + "epoch": 0.7793547721454503, + "grad_norm": 0.8019715917629457, + "learning_rate": 9.242293472209857e-06, + "loss": 0.2364, + "step": 60430 + }, + { + "epoch": 0.7794837403354462, + "grad_norm": 0.8086540768685653, + "learning_rate": 9.241896318010075e-06, + "loss": 0.2379, + "step": 60440 + }, + { + "epoch": 0.7796127085254422, + "grad_norm": 0.886981152191663, + "learning_rate": 9.241499068290558e-06, + "loss": 0.2333, + "step": 60450 + }, + { + "epoch": 0.7797416767154381, + "grad_norm": 0.8526722447540329, + "learning_rate": 9.241101723060246e-06, + "loss": 0.2328, + "step": 60460 + }, + { + "epoch": 0.7798706449054341, + "grad_norm": 0.852916385182379, + "learning_rate": 9.240704282328091e-06, + "loss": 0.2364, + "step": 60470 + }, + { + "epoch": 0.77999961309543, + "grad_norm": 0.8836953505541859, + "learning_rate": 9.240306746103042e-06, + "loss": 0.234, + "step": 60480 + }, + { + "epoch": 0.780128581285426, + "grad_norm": 0.9376494890923472, + "learning_rate": 9.23990911439405e-06, + "loss": 0.2338, + "step": 60490 + }, + { + "epoch": 0.7802575494754219, + "grad_norm": 0.9728624219509175, + "learning_rate": 9.239511387210067e-06, + "loss": 0.238, + "step": 60500 + }, + { + "epoch": 0.7803865176654178, + "grad_norm": 0.9764327489236386, + "learning_rate": 9.239113564560055e-06, + "loss": 0.2361, + "step": 60510 + }, + { + "epoch": 0.7805154858554137, + "grad_norm": 1.0117232138735988, + "learning_rate": 9.238715646452965e-06, + "loss": 0.2253, + "step": 60520 + }, + { + "epoch": 0.7806444540454097, + "grad_norm": 0.9014583363358677, + "learning_rate": 9.238317632897762e-06, + "loss": 0.2313, + "step": 60530 + }, + { + "epoch": 0.7807734222354057, + "grad_norm": 0.8731569630648437, + "learning_rate": 9.237919523903407e-06, + "loss": 0.2367, + "step": 60540 + }, + { + "epoch": 0.7809023904254015, + "grad_norm": 0.9113870359957161, + "learning_rate": 9.237521319478864e-06, + "loss": 0.2316, + "step": 60550 + }, + { + "epoch": 0.7810313586153975, + "grad_norm": 0.8074124450580606, + "learning_rate": 9.237123019633104e-06, + "loss": 0.2217, + "step": 60560 + }, + { + "epoch": 0.7811603268053935, + "grad_norm": 0.8662023228476414, + "learning_rate": 9.23672462437509e-06, + "loss": 0.2278, + "step": 60570 + }, + { + "epoch": 0.7812892949953893, + "grad_norm": 0.8016582783299865, + "learning_rate": 9.236326133713796e-06, + "loss": 0.2291, + "step": 60580 + }, + { + "epoch": 0.7814182631853853, + "grad_norm": 0.9181864458070528, + "learning_rate": 9.235927547658196e-06, + "loss": 0.2405, + "step": 60590 + }, + { + "epoch": 0.7815472313753813, + "grad_norm": 0.8306421283125052, + "learning_rate": 9.235528866217264e-06, + "loss": 0.2262, + "step": 60600 + }, + { + "epoch": 0.7816761995653771, + "grad_norm": 0.9172031700229311, + "learning_rate": 9.23513008939998e-06, + "loss": 0.2321, + "step": 60610 + }, + { + "epoch": 0.7818051677553731, + "grad_norm": 0.8492466368014336, + "learning_rate": 9.23473121721532e-06, + "loss": 0.2362, + "step": 60620 + }, + { + "epoch": 0.7819341359453691, + "grad_norm": 0.8868414535901424, + "learning_rate": 9.234332249672268e-06, + "loss": 0.2435, + "step": 60630 + }, + { + "epoch": 0.7820631041353651, + "grad_norm": 0.8317588941956856, + "learning_rate": 9.23393318677981e-06, + "loss": 0.2348, + "step": 60640 + }, + { + "epoch": 0.7821920723253609, + "grad_norm": 0.8488689283372727, + "learning_rate": 9.233534028546928e-06, + "loss": 0.2206, + "step": 60650 + }, + { + "epoch": 0.7823210405153569, + "grad_norm": 0.8030698041264798, + "learning_rate": 9.233134774982613e-06, + "loss": 0.2301, + "step": 60660 + }, + { + "epoch": 0.7824500087053529, + "grad_norm": 0.9135592067423365, + "learning_rate": 9.232735426095855e-06, + "loss": 0.2343, + "step": 60670 + }, + { + "epoch": 0.7825789768953487, + "grad_norm": 0.8319460790201116, + "learning_rate": 9.232335981895646e-06, + "loss": 0.2316, + "step": 60680 + }, + { + "epoch": 0.7827079450853447, + "grad_norm": 0.8812059136235276, + "learning_rate": 9.23193644239098e-06, + "loss": 0.2126, + "step": 60690 + }, + { + "epoch": 0.7828369132753407, + "grad_norm": 0.8842743270959862, + "learning_rate": 9.231536807590858e-06, + "loss": 0.228, + "step": 60700 + }, + { + "epoch": 0.7829658814653365, + "grad_norm": 1.0402535464973657, + "learning_rate": 9.231137077504274e-06, + "loss": 0.2306, + "step": 60710 + }, + { + "epoch": 0.7830948496553325, + "grad_norm": 0.9081859952729501, + "learning_rate": 9.23073725214023e-06, + "loss": 0.2274, + "step": 60720 + }, + { + "epoch": 0.7832238178453285, + "grad_norm": 0.8441241469455252, + "learning_rate": 9.230337331507733e-06, + "loss": 0.2272, + "step": 60730 + }, + { + "epoch": 0.7833527860353244, + "grad_norm": 0.9108892659042688, + "learning_rate": 9.229937315615785e-06, + "loss": 0.2333, + "step": 60740 + }, + { + "epoch": 0.7834817542253203, + "grad_norm": 0.8363695715857893, + "learning_rate": 9.229537204473392e-06, + "loss": 0.2308, + "step": 60750 + }, + { + "epoch": 0.7836107224153163, + "grad_norm": 0.8782579435051003, + "learning_rate": 9.229136998089572e-06, + "loss": 0.2453, + "step": 60760 + }, + { + "epoch": 0.7837396906053122, + "grad_norm": 0.7944024003963526, + "learning_rate": 9.228736696473327e-06, + "loss": 0.2332, + "step": 60770 + }, + { + "epoch": 0.7838686587953081, + "grad_norm": 0.9214379716118221, + "learning_rate": 9.228336299633678e-06, + "loss": 0.2356, + "step": 60780 + }, + { + "epoch": 0.7839976269853041, + "grad_norm": 0.8713712055502331, + "learning_rate": 9.227935807579638e-06, + "loss": 0.2428, + "step": 60790 + }, + { + "epoch": 0.7841265951753, + "grad_norm": 0.9040090362304017, + "learning_rate": 9.227535220320225e-06, + "loss": 0.2327, + "step": 60800 + }, + { + "epoch": 0.784255563365296, + "grad_norm": 0.8484236480321995, + "learning_rate": 9.227134537864463e-06, + "loss": 0.2311, + "step": 60810 + }, + { + "epoch": 0.7843845315552919, + "grad_norm": 0.8235374246358973, + "learning_rate": 9.226733760221368e-06, + "loss": 0.2344, + "step": 60820 + }, + { + "epoch": 0.7845134997452878, + "grad_norm": 0.8809122498637435, + "learning_rate": 9.226332887399974e-06, + "loss": 0.224, + "step": 60830 + }, + { + "epoch": 0.7846424679352838, + "grad_norm": 0.7902277466188684, + "learning_rate": 9.2259319194093e-06, + "loss": 0.236, + "step": 60840 + }, + { + "epoch": 0.7847714361252797, + "grad_norm": 0.8931901005875349, + "learning_rate": 9.225530856258378e-06, + "loss": 0.2205, + "step": 60850 + }, + { + "epoch": 0.7849004043152756, + "grad_norm": 0.8764435037181976, + "learning_rate": 9.225129697956238e-06, + "loss": 0.2262, + "step": 60860 + }, + { + "epoch": 0.7850293725052716, + "grad_norm": 0.892457694430262, + "learning_rate": 9.224728444511916e-06, + "loss": 0.2411, + "step": 60870 + }, + { + "epoch": 0.7851583406952675, + "grad_norm": 0.895417130214843, + "learning_rate": 9.224327095934446e-06, + "loss": 0.2275, + "step": 60880 + }, + { + "epoch": 0.7852873088852634, + "grad_norm": 0.8798658657291222, + "learning_rate": 9.223925652232866e-06, + "loss": 0.2275, + "step": 60890 + }, + { + "epoch": 0.7854162770752594, + "grad_norm": 0.8943089236442465, + "learning_rate": 9.223524113416214e-06, + "loss": 0.2336, + "step": 60900 + }, + { + "epoch": 0.7855452452652554, + "grad_norm": 0.7962937256328246, + "learning_rate": 9.223122479493534e-06, + "loss": 0.2209, + "step": 60910 + }, + { + "epoch": 0.7856742134552512, + "grad_norm": 0.9032844549381197, + "learning_rate": 9.22272075047387e-06, + "loss": 0.2317, + "step": 60920 + }, + { + "epoch": 0.7858031816452472, + "grad_norm": 0.8246853483866918, + "learning_rate": 9.222318926366266e-06, + "loss": 0.2377, + "step": 60930 + }, + { + "epoch": 0.7859321498352432, + "grad_norm": 0.9130184493700839, + "learning_rate": 9.221917007179772e-06, + "loss": 0.2353, + "step": 60940 + }, + { + "epoch": 0.786061118025239, + "grad_norm": 0.8964784556245897, + "learning_rate": 9.221514992923439e-06, + "loss": 0.2298, + "step": 60950 + }, + { + "epoch": 0.786190086215235, + "grad_norm": 0.8851755793664794, + "learning_rate": 9.221112883606319e-06, + "loss": 0.2356, + "step": 60960 + }, + { + "epoch": 0.786319054405231, + "grad_norm": 0.8237808466414189, + "learning_rate": 9.220710679237467e-06, + "loss": 0.2337, + "step": 60970 + }, + { + "epoch": 0.7864480225952268, + "grad_norm": 0.8076572379361714, + "learning_rate": 9.220308379825939e-06, + "loss": 0.2319, + "step": 60980 + }, + { + "epoch": 0.7865769907852228, + "grad_norm": 0.9076767746644654, + "learning_rate": 9.219905985380795e-06, + "loss": 0.2397, + "step": 60990 + }, + { + "epoch": 0.7867059589752188, + "grad_norm": 0.8787825729729152, + "learning_rate": 9.219503495911096e-06, + "loss": 0.2357, + "step": 61000 + }, + { + "epoch": 0.7868349271652147, + "grad_norm": 0.8770680680808695, + "learning_rate": 9.219100911425904e-06, + "loss": 0.2358, + "step": 61010 + }, + { + "epoch": 0.7869638953552106, + "grad_norm": 0.9904774287570741, + "learning_rate": 9.218698231934289e-06, + "loss": 0.2448, + "step": 61020 + }, + { + "epoch": 0.7870928635452066, + "grad_norm": 0.8687476380978384, + "learning_rate": 9.218295457445313e-06, + "loss": 0.2175, + "step": 61030 + }, + { + "epoch": 0.7872218317352025, + "grad_norm": 0.9531718245797836, + "learning_rate": 9.217892587968047e-06, + "loss": 0.2383, + "step": 61040 + }, + { + "epoch": 0.7873507999251984, + "grad_norm": 0.8396061576308012, + "learning_rate": 9.217489623511567e-06, + "loss": 0.2427, + "step": 61050 + }, + { + "epoch": 0.7874797681151944, + "grad_norm": 0.9894166587231978, + "learning_rate": 9.21708656408494e-06, + "loss": 0.2338, + "step": 61060 + }, + { + "epoch": 0.7876087363051903, + "grad_norm": 0.8678845015532686, + "learning_rate": 9.21668340969725e-06, + "loss": 0.2402, + "step": 61070 + }, + { + "epoch": 0.7877377044951862, + "grad_norm": 0.8428619794359039, + "learning_rate": 9.21628016035757e-06, + "loss": 0.2193, + "step": 61080 + }, + { + "epoch": 0.7878666726851822, + "grad_norm": 0.9582605657882226, + "learning_rate": 9.21587681607498e-06, + "loss": 0.2346, + "step": 61090 + }, + { + "epoch": 0.7879956408751781, + "grad_norm": 1.5113108468968524, + "learning_rate": 9.215473376858566e-06, + "loss": 0.2273, + "step": 61100 + }, + { + "epoch": 0.7881246090651741, + "grad_norm": 0.8201507106888815, + "learning_rate": 9.21506984271741e-06, + "loss": 0.2406, + "step": 61110 + }, + { + "epoch": 0.78825357725517, + "grad_norm": 0.8810361732750152, + "learning_rate": 9.214666213660602e-06, + "loss": 0.2293, + "step": 61120 + }, + { + "epoch": 0.788382545445166, + "grad_norm": 0.9447480709456844, + "learning_rate": 9.214262489697228e-06, + "loss": 0.2365, + "step": 61130 + }, + { + "epoch": 0.7885115136351619, + "grad_norm": 0.9331970432381064, + "learning_rate": 9.21385867083638e-06, + "loss": 0.239, + "step": 61140 + }, + { + "epoch": 0.7886404818251578, + "grad_norm": 0.9126920754687669, + "learning_rate": 9.21345475708715e-06, + "loss": 0.2385, + "step": 61150 + }, + { + "epoch": 0.7887694500151538, + "grad_norm": 0.9464717908103839, + "learning_rate": 9.213050748458636e-06, + "loss": 0.2572, + "step": 61160 + }, + { + "epoch": 0.7888984182051497, + "grad_norm": 0.8845303619493629, + "learning_rate": 9.212646644959936e-06, + "loss": 0.229, + "step": 61170 + }, + { + "epoch": 0.7890273863951457, + "grad_norm": 0.8734508742608451, + "learning_rate": 9.212242446600145e-06, + "loss": 0.2275, + "step": 61180 + }, + { + "epoch": 0.7891563545851416, + "grad_norm": 0.8765031572389504, + "learning_rate": 9.211838153388369e-06, + "loss": 0.2194, + "step": 61190 + }, + { + "epoch": 0.7892853227751375, + "grad_norm": 0.8678133649185243, + "learning_rate": 9.21143376533371e-06, + "loss": 0.2274, + "step": 61200 + }, + { + "epoch": 0.7894142909651335, + "grad_norm": 0.9432622532829819, + "learning_rate": 9.211029282445276e-06, + "loss": 0.2269, + "step": 61210 + }, + { + "epoch": 0.7895432591551294, + "grad_norm": 0.9432212206402387, + "learning_rate": 9.210624704732175e-06, + "loss": 0.2326, + "step": 61220 + }, + { + "epoch": 0.7896722273451253, + "grad_norm": 0.9586697352478452, + "learning_rate": 9.210220032203513e-06, + "loss": 0.2232, + "step": 61230 + }, + { + "epoch": 0.7898011955351213, + "grad_norm": 0.8364951497404342, + "learning_rate": 9.209815264868408e-06, + "loss": 0.2232, + "step": 61240 + }, + { + "epoch": 0.7899301637251172, + "grad_norm": 0.7966391763762734, + "learning_rate": 9.209410402735971e-06, + "loss": 0.2306, + "step": 61250 + }, + { + "epoch": 0.7900591319151131, + "grad_norm": 0.8553057545492169, + "learning_rate": 9.209005445815322e-06, + "loss": 0.2353, + "step": 61260 + }, + { + "epoch": 0.7901881001051091, + "grad_norm": 0.9623580611336594, + "learning_rate": 9.208600394115577e-06, + "loss": 0.2316, + "step": 61270 + }, + { + "epoch": 0.7903170682951051, + "grad_norm": 0.8808485660644493, + "learning_rate": 9.20819524764586e-06, + "loss": 0.2344, + "step": 61280 + }, + { + "epoch": 0.7904460364851009, + "grad_norm": 0.9083646823791101, + "learning_rate": 9.20779000641529e-06, + "loss": 0.2338, + "step": 61290 + }, + { + "epoch": 0.7905750046750969, + "grad_norm": 0.8721684989572573, + "learning_rate": 9.207384670432995e-06, + "loss": 0.2221, + "step": 61300 + }, + { + "epoch": 0.7907039728650929, + "grad_norm": 0.9095951305183956, + "learning_rate": 9.206979239708102e-06, + "loss": 0.2363, + "step": 61310 + }, + { + "epoch": 0.7908329410550887, + "grad_norm": 0.8379333421591809, + "learning_rate": 9.206573714249742e-06, + "loss": 0.2397, + "step": 61320 + }, + { + "epoch": 0.7909619092450847, + "grad_norm": 0.8161983482694285, + "learning_rate": 9.206168094067043e-06, + "loss": 0.2339, + "step": 61330 + }, + { + "epoch": 0.7910908774350807, + "grad_norm": 0.9623903118189387, + "learning_rate": 9.205762379169142e-06, + "loss": 0.2317, + "step": 61340 + }, + { + "epoch": 0.7912198456250765, + "grad_norm": 0.8599730887134948, + "learning_rate": 9.205356569565174e-06, + "loss": 0.241, + "step": 61350 + }, + { + "epoch": 0.7913488138150725, + "grad_norm": 0.9276908873074953, + "learning_rate": 9.204950665264277e-06, + "loss": 0.2356, + "step": 61360 + }, + { + "epoch": 0.7914777820050685, + "grad_norm": 0.9425785669693326, + "learning_rate": 9.204544666275593e-06, + "loss": 0.2461, + "step": 61370 + }, + { + "epoch": 0.7916067501950644, + "grad_norm": 0.9768657328360008, + "learning_rate": 9.20413857260826e-06, + "loss": 0.2351, + "step": 61380 + }, + { + "epoch": 0.7917357183850603, + "grad_norm": 0.9187857558767544, + "learning_rate": 9.203732384271427e-06, + "loss": 0.228, + "step": 61390 + }, + { + "epoch": 0.7918646865750563, + "grad_norm": 0.9430920693457353, + "learning_rate": 9.203326101274237e-06, + "loss": 0.2249, + "step": 61400 + }, + { + "epoch": 0.7919936547650522, + "grad_norm": 0.8261026364402968, + "learning_rate": 9.202919723625842e-06, + "loss": 0.2344, + "step": 61410 + }, + { + "epoch": 0.7921226229550481, + "grad_norm": 0.8965316647888388, + "learning_rate": 9.202513251335391e-06, + "loss": 0.242, + "step": 61420 + }, + { + "epoch": 0.7922515911450441, + "grad_norm": 0.8722692600097157, + "learning_rate": 9.202106684412038e-06, + "loss": 0.2395, + "step": 61430 + }, + { + "epoch": 0.79238055933504, + "grad_norm": 0.9778653159400214, + "learning_rate": 9.201700022864936e-06, + "loss": 0.2336, + "step": 61440 + }, + { + "epoch": 0.7925095275250359, + "grad_norm": 0.83023834539787, + "learning_rate": 9.201293266703247e-06, + "loss": 0.226, + "step": 61450 + }, + { + "epoch": 0.7926384957150319, + "grad_norm": 0.8947560619336984, + "learning_rate": 9.200886415936124e-06, + "loss": 0.2242, + "step": 61460 + }, + { + "epoch": 0.7927674639050278, + "grad_norm": 0.8579508984278088, + "learning_rate": 9.200479470572732e-06, + "loss": 0.2365, + "step": 61470 + }, + { + "epoch": 0.7928964320950238, + "grad_norm": 0.8441362437403233, + "learning_rate": 9.200072430622235e-06, + "loss": 0.237, + "step": 61480 + }, + { + "epoch": 0.7930254002850197, + "grad_norm": 0.9286582676887688, + "learning_rate": 9.1996652960938e-06, + "loss": 0.2361, + "step": 61490 + }, + { + "epoch": 0.7931543684750156, + "grad_norm": 0.8826915689321829, + "learning_rate": 9.19925806699659e-06, + "loss": 0.2339, + "step": 61500 + }, + { + "epoch": 0.7932833366650116, + "grad_norm": 0.8045656761738759, + "learning_rate": 9.19885074333978e-06, + "loss": 0.2293, + "step": 61510 + }, + { + "epoch": 0.7934123048550075, + "grad_norm": 0.8505921123962651, + "learning_rate": 9.19844332513254e-06, + "loss": 0.2275, + "step": 61520 + }, + { + "epoch": 0.7935412730450034, + "grad_norm": 0.9336472966843873, + "learning_rate": 9.198035812384044e-06, + "loss": 0.234, + "step": 61530 + }, + { + "epoch": 0.7936702412349994, + "grad_norm": 0.8233426676766585, + "learning_rate": 9.197628205103473e-06, + "loss": 0.237, + "step": 61540 + }, + { + "epoch": 0.7937992094249954, + "grad_norm": 0.961199465613278, + "learning_rate": 9.197220503299998e-06, + "loss": 0.2257, + "step": 61550 + }, + { + "epoch": 0.7939281776149912, + "grad_norm": 0.9457833961819043, + "learning_rate": 9.196812706982805e-06, + "loss": 0.2329, + "step": 61560 + }, + { + "epoch": 0.7940571458049872, + "grad_norm": 0.7916235956406397, + "learning_rate": 9.196404816161073e-06, + "loss": 0.2277, + "step": 61570 + }, + { + "epoch": 0.7941861139949832, + "grad_norm": 0.8673526166025723, + "learning_rate": 9.195996830843992e-06, + "loss": 0.2368, + "step": 61580 + }, + { + "epoch": 0.794315082184979, + "grad_norm": 0.9423491129779077, + "learning_rate": 9.195588751040744e-06, + "loss": 0.2265, + "step": 61590 + }, + { + "epoch": 0.794444050374975, + "grad_norm": 0.9178527948092287, + "learning_rate": 9.19518057676052e-06, + "loss": 0.223, + "step": 61600 + }, + { + "epoch": 0.794573018564971, + "grad_norm": 0.919561500985301, + "learning_rate": 9.194772308012514e-06, + "loss": 0.2255, + "step": 61610 + }, + { + "epoch": 0.7947019867549668, + "grad_norm": 0.9031502519347167, + "learning_rate": 9.194363944805918e-06, + "loss": 0.2288, + "step": 61620 + }, + { + "epoch": 0.7948309549449628, + "grad_norm": 1.7321584756943724, + "learning_rate": 9.193955487149924e-06, + "loss": 0.2213, + "step": 61630 + }, + { + "epoch": 0.7949599231349588, + "grad_norm": 0.9538401943074899, + "learning_rate": 9.193546935053735e-06, + "loss": 0.2339, + "step": 61640 + }, + { + "epoch": 0.7950888913249547, + "grad_norm": 0.8685073972536418, + "learning_rate": 9.193138288526546e-06, + "loss": 0.2373, + "step": 61650 + }, + { + "epoch": 0.7952178595149506, + "grad_norm": 0.8046721014663702, + "learning_rate": 9.192729547577563e-06, + "loss": 0.2349, + "step": 61660 + }, + { + "epoch": 0.7953468277049466, + "grad_norm": 0.8979496040658776, + "learning_rate": 9.192320712215988e-06, + "loss": 0.2244, + "step": 61670 + }, + { + "epoch": 0.7954757958949426, + "grad_norm": 0.9103411089507358, + "learning_rate": 9.191911782451028e-06, + "loss": 0.2362, + "step": 61680 + }, + { + "epoch": 0.7956047640849384, + "grad_norm": 0.8366711560695285, + "learning_rate": 9.191502758291893e-06, + "loss": 0.2127, + "step": 61690 + }, + { + "epoch": 0.7957337322749344, + "grad_norm": 0.8478122479606868, + "learning_rate": 9.191093639747788e-06, + "loss": 0.2324, + "step": 61700 + }, + { + "epoch": 0.7958627004649304, + "grad_norm": 0.8051776340380399, + "learning_rate": 9.190684426827931e-06, + "loss": 0.2352, + "step": 61710 + }, + { + "epoch": 0.7959916686549262, + "grad_norm": 0.8239510826547773, + "learning_rate": 9.190275119541534e-06, + "loss": 0.228, + "step": 61720 + }, + { + "epoch": 0.7961206368449222, + "grad_norm": 0.7859930742758596, + "learning_rate": 9.189865717897817e-06, + "loss": 0.2279, + "step": 61730 + }, + { + "epoch": 0.7962496050349182, + "grad_norm": 0.9720756068258068, + "learning_rate": 9.189456221905995e-06, + "loss": 0.2377, + "step": 61740 + }, + { + "epoch": 0.7963785732249141, + "grad_norm": 0.8839026160839958, + "learning_rate": 9.18904663157529e-06, + "loss": 0.2254, + "step": 61750 + }, + { + "epoch": 0.79650754141491, + "grad_norm": 0.7925867230361755, + "learning_rate": 9.188636946914929e-06, + "loss": 0.2284, + "step": 61760 + }, + { + "epoch": 0.796636509604906, + "grad_norm": 0.8455623432069149, + "learning_rate": 9.188227167934131e-06, + "loss": 0.2282, + "step": 61770 + }, + { + "epoch": 0.7967654777949019, + "grad_norm": 0.9315021149648915, + "learning_rate": 9.187817294642127e-06, + "loss": 0.2339, + "step": 61780 + }, + { + "epoch": 0.7968944459848978, + "grad_norm": 0.7870447908131691, + "learning_rate": 9.187407327048149e-06, + "loss": 0.2281, + "step": 61790 + }, + { + "epoch": 0.7970234141748938, + "grad_norm": 0.8578431707948155, + "learning_rate": 9.186997265161425e-06, + "loss": 0.2381, + "step": 61800 + }, + { + "epoch": 0.7971523823648897, + "grad_norm": 0.8814458736584778, + "learning_rate": 9.18658710899119e-06, + "loss": 0.2295, + "step": 61810 + }, + { + "epoch": 0.7972813505548857, + "grad_norm": 0.8252265407181018, + "learning_rate": 9.18617685854668e-06, + "loss": 0.2368, + "step": 61820 + }, + { + "epoch": 0.7974103187448816, + "grad_norm": 0.8073387091106896, + "learning_rate": 9.185766513837131e-06, + "loss": 0.2104, + "step": 61830 + }, + { + "epoch": 0.7975392869348775, + "grad_norm": 0.934153586495996, + "learning_rate": 9.185356074871788e-06, + "loss": 0.2325, + "step": 61840 + }, + { + "epoch": 0.7976682551248735, + "grad_norm": 0.841335818508361, + "learning_rate": 9.18494554165989e-06, + "loss": 0.2354, + "step": 61850 + }, + { + "epoch": 0.7977972233148694, + "grad_norm": 0.8502003658226808, + "learning_rate": 9.184534914210683e-06, + "loss": 0.2277, + "step": 61860 + }, + { + "epoch": 0.7979261915048653, + "grad_norm": 0.8131793983441998, + "learning_rate": 9.184124192533411e-06, + "loss": 0.2452, + "step": 61870 + }, + { + "epoch": 0.7980551596948613, + "grad_norm": 0.9584697350483238, + "learning_rate": 9.183713376637324e-06, + "loss": 0.2241, + "step": 61880 + }, + { + "epoch": 0.7981841278848572, + "grad_norm": 0.807808561926972, + "learning_rate": 9.183302466531674e-06, + "loss": 0.2211, + "step": 61890 + }, + { + "epoch": 0.7983130960748531, + "grad_norm": 0.8599619789230778, + "learning_rate": 9.182891462225713e-06, + "loss": 0.2305, + "step": 61900 + }, + { + "epoch": 0.7984420642648491, + "grad_norm": 0.9205064830442872, + "learning_rate": 9.182480363728696e-06, + "loss": 0.2358, + "step": 61910 + }, + { + "epoch": 0.7985710324548451, + "grad_norm": 1.0644100992468573, + "learning_rate": 9.18206917104988e-06, + "loss": 0.2253, + "step": 61920 + }, + { + "epoch": 0.7987000006448409, + "grad_norm": 0.7986716379890276, + "learning_rate": 9.181657884198527e-06, + "loss": 0.2253, + "step": 61930 + }, + { + "epoch": 0.7988289688348369, + "grad_norm": 0.8125761291311289, + "learning_rate": 9.181246503183893e-06, + "loss": 0.2217, + "step": 61940 + }, + { + "epoch": 0.7989579370248329, + "grad_norm": 0.8794374310483705, + "learning_rate": 9.180835028015246e-06, + "loss": 0.2296, + "step": 61950 + }, + { + "epoch": 0.7990869052148287, + "grad_norm": 0.9114330413349183, + "learning_rate": 9.180423458701852e-06, + "loss": 0.2362, + "step": 61960 + }, + { + "epoch": 0.7992158734048247, + "grad_norm": 0.8868298849474764, + "learning_rate": 9.180011795252975e-06, + "loss": 0.2337, + "step": 61970 + }, + { + "epoch": 0.7993448415948207, + "grad_norm": 0.8402909489876792, + "learning_rate": 9.179600037677887e-06, + "loss": 0.2285, + "step": 61980 + }, + { + "epoch": 0.7994738097848165, + "grad_norm": 0.8440391624614768, + "learning_rate": 9.17918818598586e-06, + "loss": 0.2378, + "step": 61990 + }, + { + "epoch": 0.7996027779748125, + "grad_norm": 0.82663104441072, + "learning_rate": 9.178776240186169e-06, + "loss": 0.2308, + "step": 62000 + }, + { + "epoch": 0.7997317461648085, + "grad_norm": 0.8299963604915626, + "learning_rate": 9.178364200288091e-06, + "loss": 0.235, + "step": 62010 + }, + { + "epoch": 0.7998607143548044, + "grad_norm": 0.851773056838486, + "learning_rate": 9.177952066300899e-06, + "loss": 0.2196, + "step": 62020 + }, + { + "epoch": 0.7999896825448003, + "grad_norm": 0.8347912042399329, + "learning_rate": 9.177539838233879e-06, + "loss": 0.2322, + "step": 62030 + }, + { + "epoch": 0.8001186507347963, + "grad_norm": 0.8329949694943091, + "learning_rate": 9.177127516096314e-06, + "loss": 0.2312, + "step": 62040 + }, + { + "epoch": 0.8002476189247922, + "grad_norm": 0.8974409150504582, + "learning_rate": 9.176715099897484e-06, + "loss": 0.2355, + "step": 62050 + }, + { + "epoch": 0.8003765871147881, + "grad_norm": 0.9178484059966991, + "learning_rate": 9.17630258964668e-06, + "loss": 0.2158, + "step": 62060 + }, + { + "epoch": 0.8005055553047841, + "grad_norm": 0.8877230380785378, + "learning_rate": 9.175889985353188e-06, + "loss": 0.2328, + "step": 62070 + }, + { + "epoch": 0.80063452349478, + "grad_norm": 0.9208499234359852, + "learning_rate": 9.175477287026302e-06, + "loss": 0.2417, + "step": 62080 + }, + { + "epoch": 0.8007634916847759, + "grad_norm": 0.8826678828698362, + "learning_rate": 9.175064494675313e-06, + "loss": 0.2313, + "step": 62090 + }, + { + "epoch": 0.8008924598747719, + "grad_norm": 0.9472116822049377, + "learning_rate": 9.174651608309518e-06, + "loss": 0.2383, + "step": 62100 + }, + { + "epoch": 0.8010214280647678, + "grad_norm": 0.8048565331279603, + "learning_rate": 9.174238627938212e-06, + "loss": 0.225, + "step": 62110 + }, + { + "epoch": 0.8011503962547638, + "grad_norm": 0.9124610243491362, + "learning_rate": 9.173825553570697e-06, + "loss": 0.2439, + "step": 62120 + }, + { + "epoch": 0.8012793644447597, + "grad_norm": 0.8550666987070409, + "learning_rate": 9.173412385216275e-06, + "loss": 0.2347, + "step": 62130 + }, + { + "epoch": 0.8014083326347556, + "grad_norm": 0.8486109617115798, + "learning_rate": 9.172999122884247e-06, + "loss": 0.2348, + "step": 62140 + }, + { + "epoch": 0.8015373008247516, + "grad_norm": 0.8918049547177164, + "learning_rate": 9.172585766583921e-06, + "loss": 0.2296, + "step": 62150 + }, + { + "epoch": 0.8016662690147475, + "grad_norm": 0.8841432472733839, + "learning_rate": 9.172172316324605e-06, + "loss": 0.2301, + "step": 62160 + }, + { + "epoch": 0.8017952372047434, + "grad_norm": 0.9021100391670459, + "learning_rate": 9.171758772115608e-06, + "loss": 0.2284, + "step": 62170 + }, + { + "epoch": 0.8019242053947394, + "grad_norm": 0.9475664606713678, + "learning_rate": 9.171345133966246e-06, + "loss": 0.2399, + "step": 62180 + }, + { + "epoch": 0.8020531735847354, + "grad_norm": 0.8267901853715763, + "learning_rate": 9.170931401885826e-06, + "loss": 0.2374, + "step": 62190 + }, + { + "epoch": 0.8021821417747312, + "grad_norm": 0.9215840337242862, + "learning_rate": 9.17051757588367e-06, + "loss": 0.232, + "step": 62200 + }, + { + "epoch": 0.8023111099647272, + "grad_norm": 0.8474186506889136, + "learning_rate": 9.170103655969096e-06, + "loss": 0.2253, + "step": 62210 + }, + { + "epoch": 0.8024400781547232, + "grad_norm": 0.9269789094739974, + "learning_rate": 9.169689642151425e-06, + "loss": 0.2317, + "step": 62220 + }, + { + "epoch": 0.802569046344719, + "grad_norm": 0.9171173562863607, + "learning_rate": 9.169275534439979e-06, + "loss": 0.229, + "step": 62230 + }, + { + "epoch": 0.802698014534715, + "grad_norm": 0.9195562045625594, + "learning_rate": 9.168861332844081e-06, + "loss": 0.2187, + "step": 62240 + }, + { + "epoch": 0.802826982724711, + "grad_norm": 0.9173959861555352, + "learning_rate": 9.168447037373063e-06, + "loss": 0.2324, + "step": 62250 + }, + { + "epoch": 0.8029559509147068, + "grad_norm": 0.8320480252205463, + "learning_rate": 9.16803264803625e-06, + "loss": 0.2124, + "step": 62260 + }, + { + "epoch": 0.8030849191047028, + "grad_norm": 0.7842469405473232, + "learning_rate": 9.167618164842973e-06, + "loss": 0.2319, + "step": 62270 + }, + { + "epoch": 0.8032138872946988, + "grad_norm": 0.8460828548374215, + "learning_rate": 9.167203587802567e-06, + "loss": 0.2398, + "step": 62280 + }, + { + "epoch": 0.8033428554846948, + "grad_norm": 0.8858366196289452, + "learning_rate": 9.166788916924367e-06, + "loss": 0.2234, + "step": 62290 + }, + { + "epoch": 0.8034718236746906, + "grad_norm": 0.8253513692211246, + "learning_rate": 9.166374152217712e-06, + "loss": 0.2278, + "step": 62300 + }, + { + "epoch": 0.8036007918646866, + "grad_norm": 0.9128249501571011, + "learning_rate": 9.165959293691942e-06, + "loss": 0.2467, + "step": 62310 + }, + { + "epoch": 0.8037297600546826, + "grad_norm": 0.913529130494967, + "learning_rate": 9.165544341356396e-06, + "loss": 0.2496, + "step": 62320 + }, + { + "epoch": 0.8038587282446784, + "grad_norm": 0.9262279903730001, + "learning_rate": 9.16512929522042e-06, + "loss": 0.2404, + "step": 62330 + }, + { + "epoch": 0.8039876964346744, + "grad_norm": 0.8405126480379059, + "learning_rate": 9.16471415529336e-06, + "loss": 0.2235, + "step": 62340 + }, + { + "epoch": 0.8041166646246704, + "grad_norm": 0.8423822351966894, + "learning_rate": 9.164298921584563e-06, + "loss": 0.2348, + "step": 62350 + }, + { + "epoch": 0.8042456328146662, + "grad_norm": 0.8915709251498264, + "learning_rate": 9.163883594103382e-06, + "loss": 0.2382, + "step": 62360 + }, + { + "epoch": 0.8043746010046622, + "grad_norm": 0.8068645542761566, + "learning_rate": 9.163468172859164e-06, + "loss": 0.2208, + "step": 62370 + }, + { + "epoch": 0.8045035691946582, + "grad_norm": 0.9570824041446924, + "learning_rate": 9.163052657861272e-06, + "loss": 0.2355, + "step": 62380 + }, + { + "epoch": 0.8046325373846541, + "grad_norm": 0.8824020277458642, + "learning_rate": 9.162637049119055e-06, + "loss": 0.2256, + "step": 62390 + }, + { + "epoch": 0.80476150557465, + "grad_norm": 0.851957053159962, + "learning_rate": 9.162221346641873e-06, + "loss": 0.2411, + "step": 62400 + }, + { + "epoch": 0.804890473764646, + "grad_norm": 0.8818559438321454, + "learning_rate": 9.161805550439088e-06, + "loss": 0.2419, + "step": 62410 + }, + { + "epoch": 0.8050194419546419, + "grad_norm": 0.8916414647832839, + "learning_rate": 9.161389660520067e-06, + "loss": 0.215, + "step": 62420 + }, + { + "epoch": 0.8051484101446378, + "grad_norm": 0.8608880005639113, + "learning_rate": 9.16097367689417e-06, + "loss": 0.2315, + "step": 62430 + }, + { + "epoch": 0.8052773783346338, + "grad_norm": 0.7964237848232263, + "learning_rate": 9.160557599570765e-06, + "loss": 0.2338, + "step": 62440 + }, + { + "epoch": 0.8054063465246297, + "grad_norm": 0.832360759420813, + "learning_rate": 9.160141428559221e-06, + "loss": 0.2385, + "step": 62450 + }, + { + "epoch": 0.8055353147146256, + "grad_norm": 0.8885226150135788, + "learning_rate": 9.159725163868911e-06, + "loss": 0.2246, + "step": 62460 + }, + { + "epoch": 0.8056642829046216, + "grad_norm": 0.9010370898174268, + "learning_rate": 9.159308805509208e-06, + "loss": 0.2411, + "step": 62470 + }, + { + "epoch": 0.8057932510946175, + "grad_norm": 0.8806048267094183, + "learning_rate": 9.158892353489488e-06, + "loss": 0.2363, + "step": 62480 + }, + { + "epoch": 0.8059222192846135, + "grad_norm": 0.9249460014698815, + "learning_rate": 9.158475807819127e-06, + "loss": 0.2259, + "step": 62490 + }, + { + "epoch": 0.8060511874746094, + "grad_norm": 0.7889687371445699, + "learning_rate": 9.158059168507507e-06, + "loss": 0.2384, + "step": 62500 + }, + { + "epoch": 0.8061801556646053, + "grad_norm": 0.8514546396127116, + "learning_rate": 9.157642435564006e-06, + "loss": 0.2258, + "step": 62510 + }, + { + "epoch": 0.8063091238546013, + "grad_norm": 0.9436259380749712, + "learning_rate": 9.157225608998015e-06, + "loss": 0.2308, + "step": 62520 + }, + { + "epoch": 0.8064380920445972, + "grad_norm": 0.8717404931769007, + "learning_rate": 9.156808688818914e-06, + "loss": 0.2347, + "step": 62530 + }, + { + "epoch": 0.8065670602345931, + "grad_norm": 0.9244853262176927, + "learning_rate": 9.156391675036095e-06, + "loss": 0.225, + "step": 62540 + }, + { + "epoch": 0.8066960284245891, + "grad_norm": 0.8725139714836765, + "learning_rate": 9.155974567658946e-06, + "loss": 0.2244, + "step": 62550 + }, + { + "epoch": 0.8068249966145851, + "grad_norm": 0.8633106427342815, + "learning_rate": 9.155557366696861e-06, + "loss": 0.2373, + "step": 62560 + }, + { + "epoch": 0.8069539648045809, + "grad_norm": 0.8489119480692374, + "learning_rate": 9.155140072159232e-06, + "loss": 0.2201, + "step": 62570 + }, + { + "epoch": 0.8070829329945769, + "grad_norm": 0.9062457009227448, + "learning_rate": 9.154722684055459e-06, + "loss": 0.2413, + "step": 62580 + }, + { + "epoch": 0.8072119011845729, + "grad_norm": 0.8753662230293588, + "learning_rate": 9.15430520239494e-06, + "loss": 0.2388, + "step": 62590 + }, + { + "epoch": 0.8073408693745687, + "grad_norm": 0.891626925037195, + "learning_rate": 9.153887627187076e-06, + "loss": 0.235, + "step": 62600 + }, + { + "epoch": 0.8074698375645647, + "grad_norm": 0.8402337533006305, + "learning_rate": 9.153469958441266e-06, + "loss": 0.2301, + "step": 62610 + }, + { + "epoch": 0.8075988057545607, + "grad_norm": 0.7922924716231624, + "learning_rate": 9.153052196166921e-06, + "loss": 0.2217, + "step": 62620 + }, + { + "epoch": 0.8077277739445565, + "grad_norm": 0.8726545329790915, + "learning_rate": 9.152634340373447e-06, + "loss": 0.2403, + "step": 62630 + }, + { + "epoch": 0.8078567421345525, + "grad_norm": 0.877859236354177, + "learning_rate": 9.15221639107025e-06, + "loss": 0.2435, + "step": 62640 + }, + { + "epoch": 0.8079857103245485, + "grad_norm": 0.9111239667191237, + "learning_rate": 9.151798348266743e-06, + "loss": 0.2395, + "step": 62650 + }, + { + "epoch": 0.8081146785145444, + "grad_norm": 0.8775418264220339, + "learning_rate": 9.151380211972343e-06, + "loss": 0.2363, + "step": 62660 + }, + { + "epoch": 0.8082436467045403, + "grad_norm": 0.8203537268681829, + "learning_rate": 9.15096198219646e-06, + "loss": 0.2267, + "step": 62670 + }, + { + "epoch": 0.8083726148945363, + "grad_norm": 0.8870995893534734, + "learning_rate": 9.150543658948516e-06, + "loss": 0.2358, + "step": 62680 + }, + { + "epoch": 0.8085015830845322, + "grad_norm": 0.7414773531212822, + "learning_rate": 9.15012524223793e-06, + "loss": 0.2329, + "step": 62690 + }, + { + "epoch": 0.8086305512745281, + "grad_norm": 0.8535773842462192, + "learning_rate": 9.149706732074123e-06, + "loss": 0.2365, + "step": 62700 + }, + { + "epoch": 0.8087595194645241, + "grad_norm": 0.8717599110830836, + "learning_rate": 9.149288128466519e-06, + "loss": 0.2331, + "step": 62710 + }, + { + "epoch": 0.80888848765452, + "grad_norm": 0.8195681137322396, + "learning_rate": 9.148869431424546e-06, + "loss": 0.2278, + "step": 62720 + }, + { + "epoch": 0.8090174558445159, + "grad_norm": 0.8388132304842989, + "learning_rate": 9.14845064095763e-06, + "loss": 0.2258, + "step": 62730 + }, + { + "epoch": 0.8091464240345119, + "grad_norm": 0.8264822082766343, + "learning_rate": 9.148031757075202e-06, + "loss": 0.2327, + "step": 62740 + }, + { + "epoch": 0.8092753922245078, + "grad_norm": 0.7745537476619383, + "learning_rate": 9.147612779786695e-06, + "loss": 0.2219, + "step": 62750 + }, + { + "epoch": 0.8094043604145038, + "grad_norm": 0.8890058489127789, + "learning_rate": 9.147193709101545e-06, + "loss": 0.2401, + "step": 62760 + }, + { + "epoch": 0.8095333286044997, + "grad_norm": 0.9141517804766763, + "learning_rate": 9.146774545029185e-06, + "loss": 0.2203, + "step": 62770 + }, + { + "epoch": 0.8096622967944956, + "grad_norm": 0.8944587192301563, + "learning_rate": 9.146355287579057e-06, + "loss": 0.2325, + "step": 62780 + }, + { + "epoch": 0.8097912649844916, + "grad_norm": 0.9139534644486628, + "learning_rate": 9.145935936760602e-06, + "loss": 0.2306, + "step": 62790 + }, + { + "epoch": 0.8099202331744875, + "grad_norm": 0.9640909413105119, + "learning_rate": 9.14551649258326e-06, + "loss": 0.2376, + "step": 62800 + }, + { + "epoch": 0.8100492013644834, + "grad_norm": 0.8104336454132471, + "learning_rate": 9.14509695505648e-06, + "loss": 0.2343, + "step": 62810 + }, + { + "epoch": 0.8101781695544794, + "grad_norm": 0.886361926609879, + "learning_rate": 9.144677324189705e-06, + "loss": 0.2314, + "step": 62820 + }, + { + "epoch": 0.8103071377444753, + "grad_norm": 0.8834493558565893, + "learning_rate": 9.144257599992387e-06, + "loss": 0.2365, + "step": 62830 + }, + { + "epoch": 0.8104361059344712, + "grad_norm": 0.9161957879054006, + "learning_rate": 9.143837782473978e-06, + "loss": 0.2355, + "step": 62840 + }, + { + "epoch": 0.8105650741244672, + "grad_norm": 0.9092510006733905, + "learning_rate": 9.143417871643931e-06, + "loss": 0.2377, + "step": 62850 + }, + { + "epoch": 0.8106940423144632, + "grad_norm": 0.9045995490168578, + "learning_rate": 9.1429978675117e-06, + "loss": 0.2354, + "step": 62860 + }, + { + "epoch": 0.810823010504459, + "grad_norm": 0.9658564471292035, + "learning_rate": 9.142577770086746e-06, + "loss": 0.233, + "step": 62870 + }, + { + "epoch": 0.810951978694455, + "grad_norm": 0.9314666893682554, + "learning_rate": 9.142157579378524e-06, + "loss": 0.227, + "step": 62880 + }, + { + "epoch": 0.811080946884451, + "grad_norm": 0.9227039412323529, + "learning_rate": 9.1417372953965e-06, + "loss": 0.2269, + "step": 62890 + }, + { + "epoch": 0.8112099150744468, + "grad_norm": 0.9107380158212008, + "learning_rate": 9.141316918150136e-06, + "loss": 0.238, + "step": 62900 + }, + { + "epoch": 0.8113388832644428, + "grad_norm": 0.851447825429585, + "learning_rate": 9.1408964476489e-06, + "loss": 0.2186, + "step": 62910 + }, + { + "epoch": 0.8114678514544388, + "grad_norm": 0.8538633264598732, + "learning_rate": 9.140475883902258e-06, + "loss": 0.2242, + "step": 62920 + }, + { + "epoch": 0.8115968196444348, + "grad_norm": 0.905798052016153, + "learning_rate": 9.140055226919682e-06, + "loss": 0.2379, + "step": 62930 + }, + { + "epoch": 0.8117257878344306, + "grad_norm": 0.9305212625988639, + "learning_rate": 9.139634476710642e-06, + "loss": 0.23, + "step": 62940 + }, + { + "epoch": 0.8118547560244266, + "grad_norm": 0.8665767765912015, + "learning_rate": 9.139213633284616e-06, + "loss": 0.2386, + "step": 62950 + }, + { + "epoch": 0.8119837242144226, + "grad_norm": 0.8392479406984177, + "learning_rate": 9.138792696651078e-06, + "loss": 0.2388, + "step": 62960 + }, + { + "epoch": 0.8121126924044184, + "grad_norm": 0.8538079602152576, + "learning_rate": 9.138371666819508e-06, + "loss": 0.2396, + "step": 62970 + }, + { + "epoch": 0.8122416605944144, + "grad_norm": 0.9294379086052589, + "learning_rate": 9.137950543799387e-06, + "loss": 0.2345, + "step": 62980 + }, + { + "epoch": 0.8123706287844104, + "grad_norm": 0.8802984322046948, + "learning_rate": 9.137529327600197e-06, + "loss": 0.2288, + "step": 62990 + }, + { + "epoch": 0.8124995969744062, + "grad_norm": 0.8839437271441257, + "learning_rate": 9.137108018231421e-06, + "loss": 0.2282, + "step": 63000 + }, + { + "epoch": 0.8126285651644022, + "grad_norm": 0.8831043107844556, + "learning_rate": 9.136686615702552e-06, + "loss": 0.233, + "step": 63010 + }, + { + "epoch": 0.8127575333543982, + "grad_norm": 0.8934650457851774, + "learning_rate": 9.136265120023075e-06, + "loss": 0.2334, + "step": 63020 + }, + { + "epoch": 0.8128865015443941, + "grad_norm": 0.7941466391864896, + "learning_rate": 9.135843531202482e-06, + "loss": 0.2293, + "step": 63030 + }, + { + "epoch": 0.81301546973439, + "grad_norm": 0.8513315236991659, + "learning_rate": 9.135421849250266e-06, + "loss": 0.2391, + "step": 63040 + }, + { + "epoch": 0.813144437924386, + "grad_norm": 0.8702146476437572, + "learning_rate": 9.135000074175922e-06, + "loss": 0.2301, + "step": 63050 + }, + { + "epoch": 0.8132734061143819, + "grad_norm": 0.8965298054630677, + "learning_rate": 9.134578205988948e-06, + "loss": 0.238, + "step": 63060 + }, + { + "epoch": 0.8134023743043778, + "grad_norm": 0.8392504598961305, + "learning_rate": 9.134156244698846e-06, + "loss": 0.2281, + "step": 63070 + }, + { + "epoch": 0.8135313424943738, + "grad_norm": 0.8171347000331814, + "learning_rate": 9.133734190315115e-06, + "loss": 0.219, + "step": 63080 + }, + { + "epoch": 0.8136603106843697, + "grad_norm": 0.7895345712457488, + "learning_rate": 9.13331204284726e-06, + "loss": 0.217, + "step": 63090 + }, + { + "epoch": 0.8137892788743656, + "grad_norm": 0.9255613702681907, + "learning_rate": 9.132889802304788e-06, + "loss": 0.2342, + "step": 63100 + }, + { + "epoch": 0.8139182470643616, + "grad_norm": 0.8424545613056895, + "learning_rate": 9.132467468697207e-06, + "loss": 0.2342, + "step": 63110 + }, + { + "epoch": 0.8140472152543575, + "grad_norm": 0.8455653566825551, + "learning_rate": 9.132045042034023e-06, + "loss": 0.2259, + "step": 63120 + }, + { + "epoch": 0.8141761834443535, + "grad_norm": 1.0317966642604834, + "learning_rate": 9.131622522324753e-06, + "loss": 0.2226, + "step": 63130 + }, + { + "epoch": 0.8143051516343494, + "grad_norm": 0.8590332417488764, + "learning_rate": 9.13119990957891e-06, + "loss": 0.2222, + "step": 63140 + }, + { + "epoch": 0.8144341198243453, + "grad_norm": 0.9452710372974397, + "learning_rate": 9.13077720380601e-06, + "loss": 0.2305, + "step": 63150 + }, + { + "epoch": 0.8145630880143413, + "grad_norm": 0.9781052672687868, + "learning_rate": 9.13035440501557e-06, + "loss": 0.2293, + "step": 63160 + }, + { + "epoch": 0.8146920562043372, + "grad_norm": 0.8906067024511669, + "learning_rate": 9.129931513217116e-06, + "loss": 0.2287, + "step": 63170 + }, + { + "epoch": 0.8148210243943331, + "grad_norm": 0.861776779945215, + "learning_rate": 9.129508528420166e-06, + "loss": 0.2199, + "step": 63180 + }, + { + "epoch": 0.8149499925843291, + "grad_norm": 0.8349248732481963, + "learning_rate": 9.129085450634245e-06, + "loss": 0.2311, + "step": 63190 + }, + { + "epoch": 0.8150789607743251, + "grad_norm": 0.9087803277324282, + "learning_rate": 9.128662279868883e-06, + "loss": 0.2399, + "step": 63200 + }, + { + "epoch": 0.8152079289643209, + "grad_norm": 0.8979429915140853, + "learning_rate": 9.128239016133606e-06, + "loss": 0.2428, + "step": 63210 + }, + { + "epoch": 0.8153368971543169, + "grad_norm": 0.9039822949173263, + "learning_rate": 9.127815659437943e-06, + "loss": 0.2272, + "step": 63220 + }, + { + "epoch": 0.8154658653443129, + "grad_norm": 0.8620720036411883, + "learning_rate": 9.127392209791435e-06, + "loss": 0.2411, + "step": 63230 + }, + { + "epoch": 0.8155948335343087, + "grad_norm": 0.9003853399958157, + "learning_rate": 9.12696866720361e-06, + "loss": 0.2251, + "step": 63240 + }, + { + "epoch": 0.8157238017243047, + "grad_norm": 0.9079281693021791, + "learning_rate": 9.12654503168401e-06, + "loss": 0.2277, + "step": 63250 + }, + { + "epoch": 0.8158527699143007, + "grad_norm": 0.9095631935035965, + "learning_rate": 9.126121303242172e-06, + "loss": 0.234, + "step": 63260 + }, + { + "epoch": 0.8159817381042965, + "grad_norm": 0.848213778856376, + "learning_rate": 9.125697481887638e-06, + "loss": 0.2183, + "step": 63270 + }, + { + "epoch": 0.8161107062942925, + "grad_norm": 0.9099222966190282, + "learning_rate": 9.125273567629951e-06, + "loss": 0.2268, + "step": 63280 + }, + { + "epoch": 0.8162396744842885, + "grad_norm": 0.9468009201339136, + "learning_rate": 9.124849560478659e-06, + "loss": 0.2397, + "step": 63290 + }, + { + "epoch": 0.8163686426742844, + "grad_norm": 0.8902178807921921, + "learning_rate": 9.124425460443309e-06, + "loss": 0.2412, + "step": 63300 + }, + { + "epoch": 0.8164976108642803, + "grad_norm": 0.8925822336345829, + "learning_rate": 9.12400126753345e-06, + "loss": 0.2263, + "step": 63310 + }, + { + "epoch": 0.8166265790542763, + "grad_norm": 0.8214822412672099, + "learning_rate": 9.123576981758633e-06, + "loss": 0.2326, + "step": 63320 + }, + { + "epoch": 0.8167555472442722, + "grad_norm": 0.9346886682698673, + "learning_rate": 9.123152603128414e-06, + "loss": 0.2452, + "step": 63330 + }, + { + "epoch": 0.8168845154342681, + "grad_norm": 0.8790148588094232, + "learning_rate": 9.12272813165235e-06, + "loss": 0.2331, + "step": 63340 + }, + { + "epoch": 0.8170134836242641, + "grad_norm": 0.7961898526026233, + "learning_rate": 9.122303567339999e-06, + "loss": 0.2307, + "step": 63350 + }, + { + "epoch": 0.81714245181426, + "grad_norm": 0.9008204177921892, + "learning_rate": 9.121878910200919e-06, + "loss": 0.2375, + "step": 63360 + }, + { + "epoch": 0.8172714200042559, + "grad_norm": 0.870449988766568, + "learning_rate": 9.121454160244674e-06, + "loss": 0.2182, + "step": 63370 + }, + { + "epoch": 0.8174003881942519, + "grad_norm": 0.8707343566869515, + "learning_rate": 9.12102931748083e-06, + "loss": 0.2344, + "step": 63380 + }, + { + "epoch": 0.8175293563842478, + "grad_norm": 0.7762660193019996, + "learning_rate": 9.120604381918951e-06, + "loss": 0.2387, + "step": 63390 + }, + { + "epoch": 0.8176583245742438, + "grad_norm": 0.9217018990399637, + "learning_rate": 9.120179353568608e-06, + "loss": 0.2315, + "step": 63400 + }, + { + "epoch": 0.8177872927642397, + "grad_norm": 0.8913252663566258, + "learning_rate": 9.11975423243937e-06, + "loss": 0.2244, + "step": 63410 + }, + { + "epoch": 0.8179162609542356, + "grad_norm": 0.7719519035657904, + "learning_rate": 9.119329018540813e-06, + "loss": 0.2279, + "step": 63420 + }, + { + "epoch": 0.8180452291442316, + "grad_norm": 0.8582187220515161, + "learning_rate": 9.118903711882508e-06, + "loss": 0.2183, + "step": 63430 + }, + { + "epoch": 0.8181741973342275, + "grad_norm": 0.8633090054919571, + "learning_rate": 9.118478312474034e-06, + "loss": 0.2298, + "step": 63440 + }, + { + "epoch": 0.8183031655242234, + "grad_norm": 0.8810425160584221, + "learning_rate": 9.118052820324972e-06, + "loss": 0.2268, + "step": 63450 + }, + { + "epoch": 0.8184321337142194, + "grad_norm": 0.9726383197077896, + "learning_rate": 9.117627235444901e-06, + "loss": 0.2271, + "step": 63460 + }, + { + "epoch": 0.8185611019042153, + "grad_norm": 0.9201645232725404, + "learning_rate": 9.117201557843404e-06, + "loss": 0.223, + "step": 63470 + }, + { + "epoch": 0.8186900700942112, + "grad_norm": 0.875245526139093, + "learning_rate": 9.116775787530067e-06, + "loss": 0.2249, + "step": 63480 + }, + { + "epoch": 0.8188190382842072, + "grad_norm": 0.8829111520671783, + "learning_rate": 9.11634992451448e-06, + "loss": 0.2335, + "step": 63490 + }, + { + "epoch": 0.8189480064742032, + "grad_norm": 0.8511780253678063, + "learning_rate": 9.11592396880623e-06, + "loss": 0.2403, + "step": 63500 + }, + { + "epoch": 0.819076974664199, + "grad_norm": 0.8847089949058777, + "learning_rate": 9.115497920414911e-06, + "loss": 0.221, + "step": 63510 + }, + { + "epoch": 0.819205942854195, + "grad_norm": 0.8213795138762857, + "learning_rate": 9.115071779350115e-06, + "loss": 0.2205, + "step": 63520 + }, + { + "epoch": 0.819334911044191, + "grad_norm": 0.9042173957485179, + "learning_rate": 9.114645545621437e-06, + "loss": 0.2347, + "step": 63530 + }, + { + "epoch": 0.8194638792341868, + "grad_norm": 0.8994268974620452, + "learning_rate": 9.114219219238476e-06, + "loss": 0.2258, + "step": 63540 + }, + { + "epoch": 0.8195928474241828, + "grad_norm": 0.8195044035746842, + "learning_rate": 9.113792800210832e-06, + "loss": 0.2225, + "step": 63550 + }, + { + "epoch": 0.8197218156141788, + "grad_norm": 0.8447908975087122, + "learning_rate": 9.11336628854811e-06, + "loss": 0.2407, + "step": 63560 + }, + { + "epoch": 0.8198507838041748, + "grad_norm": 0.8428395929361843, + "learning_rate": 9.112939684259909e-06, + "loss": 0.2332, + "step": 63570 + }, + { + "epoch": 0.8199797519941706, + "grad_norm": 0.9271734264460496, + "learning_rate": 9.11251298735584e-06, + "loss": 0.2266, + "step": 63580 + }, + { + "epoch": 0.8201087201841666, + "grad_norm": 0.9048557320054155, + "learning_rate": 9.11208619784551e-06, + "loss": 0.2188, + "step": 63590 + }, + { + "epoch": 0.8202376883741626, + "grad_norm": 0.8598411855274131, + "learning_rate": 9.111659315738527e-06, + "loss": 0.2278, + "step": 63600 + }, + { + "epoch": 0.8203666565641584, + "grad_norm": 1.068704479012997, + "learning_rate": 9.111232341044505e-06, + "loss": 0.2221, + "step": 63610 + }, + { + "epoch": 0.8204956247541544, + "grad_norm": 0.8834295593214279, + "learning_rate": 9.110805273773062e-06, + "loss": 0.2227, + "step": 63620 + }, + { + "epoch": 0.8206245929441504, + "grad_norm": 0.7909033787705562, + "learning_rate": 9.11037811393381e-06, + "loss": 0.2116, + "step": 63630 + }, + { + "epoch": 0.8207535611341462, + "grad_norm": 0.8454656283136501, + "learning_rate": 9.109950861536371e-06, + "loss": 0.2416, + "step": 63640 + }, + { + "epoch": 0.8208825293241422, + "grad_norm": 0.9125452341747694, + "learning_rate": 9.109523516590366e-06, + "loss": 0.2286, + "step": 63650 + }, + { + "epoch": 0.8210114975141382, + "grad_norm": 0.9099388854944429, + "learning_rate": 9.109096079105415e-06, + "loss": 0.2434, + "step": 63660 + }, + { + "epoch": 0.8211404657041341, + "grad_norm": 0.928409664397466, + "learning_rate": 9.108668549091147e-06, + "loss": 0.228, + "step": 63670 + }, + { + "epoch": 0.82126943389413, + "grad_norm": 0.8545800502730969, + "learning_rate": 9.108240926557186e-06, + "loss": 0.2244, + "step": 63680 + }, + { + "epoch": 0.821398402084126, + "grad_norm": 0.8949080713067448, + "learning_rate": 9.107813211513163e-06, + "loss": 0.2259, + "step": 63690 + }, + { + "epoch": 0.8215273702741219, + "grad_norm": 0.7704111853109971, + "learning_rate": 9.107385403968709e-06, + "loss": 0.219, + "step": 63700 + }, + { + "epoch": 0.8216563384641178, + "grad_norm": 0.8212076336921201, + "learning_rate": 9.106957503933456e-06, + "loss": 0.2261, + "step": 63710 + }, + { + "epoch": 0.8217853066541138, + "grad_norm": 0.8513658606269296, + "learning_rate": 9.106529511417041e-06, + "loss": 0.2396, + "step": 63720 + }, + { + "epoch": 0.8219142748441097, + "grad_norm": 0.988247583093594, + "learning_rate": 9.106101426429103e-06, + "loss": 0.2281, + "step": 63730 + }, + { + "epoch": 0.8220432430341056, + "grad_norm": 0.7762233382074243, + "learning_rate": 9.10567324897928e-06, + "loss": 0.2224, + "step": 63740 + }, + { + "epoch": 0.8221722112241016, + "grad_norm": 0.9097391879497483, + "learning_rate": 9.105244979077214e-06, + "loss": 0.2326, + "step": 63750 + }, + { + "epoch": 0.8223011794140975, + "grad_norm": 0.8221470129191276, + "learning_rate": 9.104816616732548e-06, + "loss": 0.2278, + "step": 63760 + }, + { + "epoch": 0.8224301476040935, + "grad_norm": 0.8745642435361157, + "learning_rate": 9.10438816195493e-06, + "loss": 0.2417, + "step": 63770 + }, + { + "epoch": 0.8225591157940894, + "grad_norm": 0.8656548502971664, + "learning_rate": 9.103959614754003e-06, + "loss": 0.2325, + "step": 63780 + }, + { + "epoch": 0.8226880839840853, + "grad_norm": 0.9310494199219601, + "learning_rate": 9.103530975139425e-06, + "loss": 0.226, + "step": 63790 + }, + { + "epoch": 0.8228170521740813, + "grad_norm": 0.9532283103146078, + "learning_rate": 9.103102243120843e-06, + "loss": 0.2297, + "step": 63800 + }, + { + "epoch": 0.8229460203640772, + "grad_norm": 0.8068710032967964, + "learning_rate": 9.10267341870791e-06, + "loss": 0.2277, + "step": 63810 + }, + { + "epoch": 0.8230749885540731, + "grad_norm": 0.9492785061462926, + "learning_rate": 9.102244501910287e-06, + "loss": 0.2484, + "step": 63820 + }, + { + "epoch": 0.8232039567440691, + "grad_norm": 0.9300173724850594, + "learning_rate": 9.101815492737628e-06, + "loss": 0.2406, + "step": 63830 + }, + { + "epoch": 0.823332924934065, + "grad_norm": 0.8436636468468176, + "learning_rate": 9.101386391199597e-06, + "loss": 0.2324, + "step": 63840 + }, + { + "epoch": 0.8234618931240609, + "grad_norm": 0.8619173790365094, + "learning_rate": 9.100957197305858e-06, + "loss": 0.2281, + "step": 63850 + }, + { + "epoch": 0.8235908613140569, + "grad_norm": 0.9456427919905788, + "learning_rate": 9.100527911066069e-06, + "loss": 0.234, + "step": 63860 + }, + { + "epoch": 0.8237198295040529, + "grad_norm": 0.9879366557132943, + "learning_rate": 9.1000985324899e-06, + "loss": 0.2195, + "step": 63870 + }, + { + "epoch": 0.8238487976940487, + "grad_norm": 0.840277040419075, + "learning_rate": 9.099669061587022e-06, + "loss": 0.2272, + "step": 63880 + }, + { + "epoch": 0.8239777658840447, + "grad_norm": 0.8179597642633726, + "learning_rate": 9.099239498367106e-06, + "loss": 0.2288, + "step": 63890 + }, + { + "epoch": 0.8241067340740407, + "grad_norm": 0.9040142009004407, + "learning_rate": 9.09880984283982e-06, + "loss": 0.2238, + "step": 63900 + }, + { + "epoch": 0.8242357022640365, + "grad_norm": 0.9112814829106294, + "learning_rate": 9.098380095014843e-06, + "loss": 0.2359, + "step": 63910 + }, + { + "epoch": 0.8243646704540325, + "grad_norm": 0.9715539350532779, + "learning_rate": 9.097950254901853e-06, + "loss": 0.2347, + "step": 63920 + }, + { + "epoch": 0.8244936386440285, + "grad_norm": 0.9713506780686553, + "learning_rate": 9.097520322510526e-06, + "loss": 0.2351, + "step": 63930 + }, + { + "epoch": 0.8246226068340244, + "grad_norm": 0.8257228974209652, + "learning_rate": 9.097090297850544e-06, + "loss": 0.2331, + "step": 63940 + }, + { + "epoch": 0.8247515750240203, + "grad_norm": 0.8218151207635653, + "learning_rate": 9.096660180931594e-06, + "loss": 0.2162, + "step": 63950 + }, + { + "epoch": 0.8248805432140163, + "grad_norm": 0.8074277220625921, + "learning_rate": 9.096229971763357e-06, + "loss": 0.2207, + "step": 63960 + }, + { + "epoch": 0.8250095114040122, + "grad_norm": 0.7790434709861154, + "learning_rate": 9.095799670355523e-06, + "loss": 0.2189, + "step": 63970 + }, + { + "epoch": 0.8251384795940081, + "grad_norm": 0.8858939718928535, + "learning_rate": 9.095369276717781e-06, + "loss": 0.2417, + "step": 63980 + }, + { + "epoch": 0.8252674477840041, + "grad_norm": 0.8897944836235889, + "learning_rate": 9.09493879085982e-06, + "loss": 0.2265, + "step": 63990 + }, + { + "epoch": 0.825396415974, + "grad_norm": 0.8259531836200519, + "learning_rate": 9.09450821279134e-06, + "loss": 0.2353, + "step": 64000 + }, + { + "epoch": 0.8255253841639959, + "grad_norm": 0.813423860854996, + "learning_rate": 9.094077542522031e-06, + "loss": 0.2341, + "step": 64010 + }, + { + "epoch": 0.8256543523539919, + "grad_norm": 0.8689012499607969, + "learning_rate": 9.093646780061594e-06, + "loss": 0.232, + "step": 64020 + }, + { + "epoch": 0.8257833205439878, + "grad_norm": 0.8975859004726505, + "learning_rate": 9.093215925419727e-06, + "loss": 0.2287, + "step": 64030 + }, + { + "epoch": 0.8259122887339838, + "grad_norm": 0.8308452062426382, + "learning_rate": 9.092784978606132e-06, + "loss": 0.2216, + "step": 64040 + }, + { + "epoch": 0.8260412569239797, + "grad_norm": 0.8995720232257584, + "learning_rate": 9.092353939630516e-06, + "loss": 0.2322, + "step": 64050 + }, + { + "epoch": 0.8261702251139756, + "grad_norm": 0.8372041254794743, + "learning_rate": 9.091922808502583e-06, + "loss": 0.2181, + "step": 64060 + }, + { + "epoch": 0.8262991933039716, + "grad_norm": 0.9684633224106483, + "learning_rate": 9.091491585232043e-06, + "loss": 0.2325, + "step": 64070 + }, + { + "epoch": 0.8264281614939675, + "grad_norm": 0.8651962876008688, + "learning_rate": 9.091060269828602e-06, + "loss": 0.2379, + "step": 64080 + }, + { + "epoch": 0.8265571296839634, + "grad_norm": 0.88381480207658, + "learning_rate": 9.090628862301977e-06, + "loss": 0.2252, + "step": 64090 + }, + { + "epoch": 0.8266860978739594, + "grad_norm": 0.8973621332714015, + "learning_rate": 9.090197362661881e-06, + "loss": 0.223, + "step": 64100 + }, + { + "epoch": 0.8268150660639553, + "grad_norm": 0.8713527243900491, + "learning_rate": 9.089765770918031e-06, + "loss": 0.2173, + "step": 64110 + }, + { + "epoch": 0.8269440342539512, + "grad_norm": 0.8031263975009157, + "learning_rate": 9.089334087080145e-06, + "loss": 0.2284, + "step": 64120 + }, + { + "epoch": 0.8270730024439472, + "grad_norm": 0.8749848423647538, + "learning_rate": 9.088902311157944e-06, + "loss": 0.2216, + "step": 64130 + }, + { + "epoch": 0.8272019706339432, + "grad_norm": 0.8756838582825281, + "learning_rate": 9.08847044316115e-06, + "loss": 0.2204, + "step": 64140 + }, + { + "epoch": 0.827330938823939, + "grad_norm": 0.8293953685930006, + "learning_rate": 9.088038483099489e-06, + "loss": 0.2265, + "step": 64150 + }, + { + "epoch": 0.827459907013935, + "grad_norm": 0.8600785455356706, + "learning_rate": 9.087606430982687e-06, + "loss": 0.2355, + "step": 64160 + }, + { + "epoch": 0.827588875203931, + "grad_norm": 0.918452774578914, + "learning_rate": 9.087174286820474e-06, + "loss": 0.2274, + "step": 64170 + }, + { + "epoch": 0.8277178433939268, + "grad_norm": 0.8617232146134708, + "learning_rate": 9.08674205062258e-06, + "loss": 0.23, + "step": 64180 + }, + { + "epoch": 0.8278468115839228, + "grad_norm": 0.8648016412981421, + "learning_rate": 9.08630972239874e-06, + "loss": 0.2145, + "step": 64190 + }, + { + "epoch": 0.8279757797739188, + "grad_norm": 0.8020472087182295, + "learning_rate": 9.08587730215869e-06, + "loss": 0.235, + "step": 64200 + }, + { + "epoch": 0.8281047479639148, + "grad_norm": 0.926841726959342, + "learning_rate": 9.085444789912164e-06, + "loss": 0.2285, + "step": 64210 + }, + { + "epoch": 0.8282337161539106, + "grad_norm": 0.855564516200177, + "learning_rate": 9.0850121856689e-06, + "loss": 0.2347, + "step": 64220 + }, + { + "epoch": 0.8283626843439066, + "grad_norm": 0.8706400061667869, + "learning_rate": 9.084579489438646e-06, + "loss": 0.2175, + "step": 64230 + }, + { + "epoch": 0.8284916525339026, + "grad_norm": 0.8521755184993282, + "learning_rate": 9.08414670123114e-06, + "loss": 0.2231, + "step": 64240 + }, + { + "epoch": 0.8286206207238984, + "grad_norm": 0.8104389315460465, + "learning_rate": 9.083713821056129e-06, + "loss": 0.2369, + "step": 64250 + }, + { + "epoch": 0.8287495889138944, + "grad_norm": 0.8693260607027871, + "learning_rate": 9.08328084892336e-06, + "loss": 0.2269, + "step": 64260 + }, + { + "epoch": 0.8288785571038904, + "grad_norm": 0.9081650176699527, + "learning_rate": 9.082847784842586e-06, + "loss": 0.2389, + "step": 64270 + }, + { + "epoch": 0.8290075252938862, + "grad_norm": 0.8269194483114094, + "learning_rate": 9.082414628823556e-06, + "loss": 0.2301, + "step": 64280 + }, + { + "epoch": 0.8291364934838822, + "grad_norm": 0.8822006855534276, + "learning_rate": 9.081981380876024e-06, + "loss": 0.2318, + "step": 64290 + }, + { + "epoch": 0.8292654616738782, + "grad_norm": 0.8778344150918269, + "learning_rate": 9.081548041009746e-06, + "loss": 0.2249, + "step": 64300 + }, + { + "epoch": 0.8293944298638741, + "grad_norm": 0.8642085908578947, + "learning_rate": 9.081114609234483e-06, + "loss": 0.2365, + "step": 64310 + }, + { + "epoch": 0.82952339805387, + "grad_norm": 0.9076953537449217, + "learning_rate": 9.08068108555999e-06, + "loss": 0.2374, + "step": 64320 + }, + { + "epoch": 0.829652366243866, + "grad_norm": 0.8832044326343619, + "learning_rate": 9.080247469996033e-06, + "loss": 0.22, + "step": 64330 + }, + { + "epoch": 0.8297813344338619, + "grad_norm": 0.8520450846034884, + "learning_rate": 9.079813762552374e-06, + "loss": 0.2224, + "step": 64340 + }, + { + "epoch": 0.8299103026238578, + "grad_norm": 0.8694775032385967, + "learning_rate": 9.079379963238779e-06, + "loss": 0.2443, + "step": 64350 + }, + { + "epoch": 0.8300392708138538, + "grad_norm": 0.8670248338926708, + "learning_rate": 9.07894607206502e-06, + "loss": 0.2342, + "step": 64360 + }, + { + "epoch": 0.8301682390038497, + "grad_norm": 0.8282698448512852, + "learning_rate": 9.078512089040865e-06, + "loss": 0.2373, + "step": 64370 + }, + { + "epoch": 0.8302972071938456, + "grad_norm": 0.8649174160770057, + "learning_rate": 9.078078014176084e-06, + "loss": 0.2309, + "step": 64380 + }, + { + "epoch": 0.8304261753838416, + "grad_norm": 0.7770291379924061, + "learning_rate": 9.077643847480457e-06, + "loss": 0.2454, + "step": 64390 + }, + { + "epoch": 0.8305551435738375, + "grad_norm": 0.8285505785391454, + "learning_rate": 9.077209588963756e-06, + "loss": 0.239, + "step": 64400 + }, + { + "epoch": 0.8306841117638335, + "grad_norm": 1.0452202739384933, + "learning_rate": 9.076775238635762e-06, + "loss": 0.2352, + "step": 64410 + }, + { + "epoch": 0.8308130799538294, + "grad_norm": 0.8885455962403669, + "learning_rate": 9.076340796506254e-06, + "loss": 0.2446, + "step": 64420 + }, + { + "epoch": 0.8309420481438253, + "grad_norm": 0.8403732679826887, + "learning_rate": 9.075906262585017e-06, + "loss": 0.2308, + "step": 64430 + }, + { + "epoch": 0.8310710163338213, + "grad_norm": 0.8564666955225583, + "learning_rate": 9.075471636881836e-06, + "loss": 0.2235, + "step": 64440 + }, + { + "epoch": 0.8311999845238172, + "grad_norm": 0.8655958746917201, + "learning_rate": 9.075036919406495e-06, + "loss": 0.2318, + "step": 64450 + }, + { + "epoch": 0.8313289527138131, + "grad_norm": 0.901457958121277, + "learning_rate": 9.074602110168786e-06, + "loss": 0.2365, + "step": 64460 + }, + { + "epoch": 0.8314579209038091, + "grad_norm": 0.9131132175595081, + "learning_rate": 9.0741672091785e-06, + "loss": 0.2302, + "step": 64470 + }, + { + "epoch": 0.831586889093805, + "grad_norm": 0.7874746310533793, + "learning_rate": 9.073732216445428e-06, + "loss": 0.2435, + "step": 64480 + }, + { + "epoch": 0.8317158572838009, + "grad_norm": 0.9102303001138656, + "learning_rate": 9.073297131979366e-06, + "loss": 0.2303, + "step": 64490 + }, + { + "epoch": 0.8318448254737969, + "grad_norm": 0.9319148211952257, + "learning_rate": 9.072861955790113e-06, + "loss": 0.2455, + "step": 64500 + }, + { + "epoch": 0.8319737936637929, + "grad_norm": 0.781955460892185, + "learning_rate": 9.072426687887467e-06, + "loss": 0.2203, + "step": 64510 + }, + { + "epoch": 0.8321027618537887, + "grad_norm": 0.9407987287038404, + "learning_rate": 9.071991328281228e-06, + "loss": 0.2192, + "step": 64520 + }, + { + "epoch": 0.8322317300437847, + "grad_norm": 0.8408040070407578, + "learning_rate": 9.071555876981203e-06, + "loss": 0.2331, + "step": 64530 + }, + { + "epoch": 0.8323606982337807, + "grad_norm": 0.8559713448632057, + "learning_rate": 9.071120333997195e-06, + "loss": 0.2262, + "step": 64540 + }, + { + "epoch": 0.8324896664237765, + "grad_norm": 0.8081106197060937, + "learning_rate": 9.070684699339011e-06, + "loss": 0.2223, + "step": 64550 + }, + { + "epoch": 0.8326186346137725, + "grad_norm": 0.9296637435066896, + "learning_rate": 9.070248973016465e-06, + "loss": 0.2328, + "step": 64560 + }, + { + "epoch": 0.8327476028037685, + "grad_norm": 0.9034664403981318, + "learning_rate": 9.069813155039364e-06, + "loss": 0.2307, + "step": 64570 + }, + { + "epoch": 0.8328765709937644, + "grad_norm": 0.7742938145096457, + "learning_rate": 9.069377245417524e-06, + "loss": 0.2365, + "step": 64580 + }, + { + "epoch": 0.8330055391837603, + "grad_norm": 0.8394737920579425, + "learning_rate": 9.06894124416076e-06, + "loss": 0.2307, + "step": 64590 + }, + { + "epoch": 0.8331345073737563, + "grad_norm": 0.8088461367203956, + "learning_rate": 9.06850515127889e-06, + "loss": 0.229, + "step": 64600 + }, + { + "epoch": 0.8332634755637522, + "grad_norm": 0.8085580994348286, + "learning_rate": 9.068068966781735e-06, + "loss": 0.2145, + "step": 64610 + }, + { + "epoch": 0.8333924437537481, + "grad_norm": 0.8426180970025715, + "learning_rate": 9.067632690679117e-06, + "loss": 0.2357, + "step": 64620 + }, + { + "epoch": 0.8335214119437441, + "grad_norm": 0.9575908543235343, + "learning_rate": 9.067196322980858e-06, + "loss": 0.2263, + "step": 64630 + }, + { + "epoch": 0.83365038013374, + "grad_norm": 0.8626003162644187, + "learning_rate": 9.066759863696788e-06, + "loss": 0.2296, + "step": 64640 + }, + { + "epoch": 0.8337793483237359, + "grad_norm": 0.7932696956956714, + "learning_rate": 9.066323312836733e-06, + "loss": 0.2176, + "step": 64650 + }, + { + "epoch": 0.8339083165137319, + "grad_norm": 0.8529023726577807, + "learning_rate": 9.065886670410522e-06, + "loss": 0.2142, + "step": 64660 + }, + { + "epoch": 0.8340372847037278, + "grad_norm": 0.9191331304386738, + "learning_rate": 9.06544993642799e-06, + "loss": 0.2381, + "step": 64670 + }, + { + "epoch": 0.8341662528937238, + "grad_norm": 0.8183733993368338, + "learning_rate": 9.065013110898969e-06, + "loss": 0.2279, + "step": 64680 + }, + { + "epoch": 0.8342952210837197, + "grad_norm": 0.8799320563940232, + "learning_rate": 9.064576193833297e-06, + "loss": 0.2242, + "step": 64690 + }, + { + "epoch": 0.8344241892737156, + "grad_norm": 0.8422119151639961, + "learning_rate": 9.064139185240813e-06, + "loss": 0.2386, + "step": 64700 + }, + { + "epoch": 0.8345531574637116, + "grad_norm": 0.8718830312082284, + "learning_rate": 9.063702085131356e-06, + "loss": 0.2423, + "step": 64710 + }, + { + "epoch": 0.8346821256537075, + "grad_norm": 0.9079402101105113, + "learning_rate": 9.063264893514771e-06, + "loss": 0.2283, + "step": 64720 + }, + { + "epoch": 0.8348110938437034, + "grad_norm": 0.9031278445615668, + "learning_rate": 9.062827610400901e-06, + "loss": 0.2305, + "step": 64730 + }, + { + "epoch": 0.8349400620336994, + "grad_norm": 0.893500492803066, + "learning_rate": 9.062390235799594e-06, + "loss": 0.2269, + "step": 64740 + }, + { + "epoch": 0.8350690302236953, + "grad_norm": 0.7935415985438528, + "learning_rate": 9.061952769720697e-06, + "loss": 0.2302, + "step": 64750 + }, + { + "epoch": 0.8351979984136912, + "grad_norm": 0.9536452156119577, + "learning_rate": 9.061515212174061e-06, + "loss": 0.2341, + "step": 64760 + }, + { + "epoch": 0.8353269666036872, + "grad_norm": 0.8645159256813553, + "learning_rate": 9.061077563169543e-06, + "loss": 0.234, + "step": 64770 + }, + { + "epoch": 0.8354559347936832, + "grad_norm": 0.9093261605475919, + "learning_rate": 9.060639822716992e-06, + "loss": 0.2274, + "step": 64780 + }, + { + "epoch": 0.835584902983679, + "grad_norm": 0.686009902723465, + "learning_rate": 9.060201990826271e-06, + "loss": 0.2213, + "step": 64790 + }, + { + "epoch": 0.835713871173675, + "grad_norm": 0.9507692170037352, + "learning_rate": 9.059764067507236e-06, + "loss": 0.2436, + "step": 64800 + }, + { + "epoch": 0.835842839363671, + "grad_norm": 0.8630763846849271, + "learning_rate": 9.059326052769745e-06, + "loss": 0.217, + "step": 64810 + }, + { + "epoch": 0.8359718075536668, + "grad_norm": 0.9363630066767143, + "learning_rate": 9.05888794662367e-06, + "loss": 0.229, + "step": 64820 + }, + { + "epoch": 0.8361007757436628, + "grad_norm": 0.8570402348378606, + "learning_rate": 9.058449749078868e-06, + "loss": 0.2291, + "step": 64830 + }, + { + "epoch": 0.8362297439336588, + "grad_norm": 0.8787089469640633, + "learning_rate": 9.058011460145211e-06, + "loss": 0.2177, + "step": 64840 + }, + { + "epoch": 0.8363587121236546, + "grad_norm": 0.9130720817290239, + "learning_rate": 9.057573079832566e-06, + "loss": 0.233, + "step": 64850 + }, + { + "epoch": 0.8364876803136506, + "grad_norm": 0.8306054973601329, + "learning_rate": 9.057134608150806e-06, + "loss": 0.224, + "step": 64860 + }, + { + "epoch": 0.8366166485036466, + "grad_norm": 0.8228209140605566, + "learning_rate": 9.056696045109805e-06, + "loss": 0.2174, + "step": 64870 + }, + { + "epoch": 0.8367456166936426, + "grad_norm": 0.8164600643365234, + "learning_rate": 9.056257390719437e-06, + "loss": 0.2189, + "step": 64880 + }, + { + "epoch": 0.8368745848836384, + "grad_norm": 0.8278661641882825, + "learning_rate": 9.055818644989582e-06, + "loss": 0.246, + "step": 64890 + }, + { + "epoch": 0.8370035530736344, + "grad_norm": 0.8976260907358365, + "learning_rate": 9.055379807930117e-06, + "loss": 0.2256, + "step": 64900 + }, + { + "epoch": 0.8371325212636304, + "grad_norm": 0.7437968653507289, + "learning_rate": 9.054940879550925e-06, + "loss": 0.2176, + "step": 64910 + }, + { + "epoch": 0.8372614894536262, + "grad_norm": 0.8417743981496573, + "learning_rate": 9.05450185986189e-06, + "loss": 0.2237, + "step": 64920 + }, + { + "epoch": 0.8373904576436222, + "grad_norm": 0.8299286266415856, + "learning_rate": 9.054062748872899e-06, + "loss": 0.2307, + "step": 64930 + }, + { + "epoch": 0.8375194258336182, + "grad_norm": 0.8739061319277803, + "learning_rate": 9.053623546593838e-06, + "loss": 0.2218, + "step": 64940 + }, + { + "epoch": 0.8376483940236141, + "grad_norm": 0.8347090201968766, + "learning_rate": 9.053184253034599e-06, + "loss": 0.2151, + "step": 64950 + }, + { + "epoch": 0.83777736221361, + "grad_norm": 0.8467883696513651, + "learning_rate": 9.052744868205072e-06, + "loss": 0.2192, + "step": 64960 + }, + { + "epoch": 0.837906330403606, + "grad_norm": 0.8176356289207612, + "learning_rate": 9.052305392115153e-06, + "loss": 0.2217, + "step": 64970 + }, + { + "epoch": 0.8380352985936019, + "grad_norm": 0.8482533990220804, + "learning_rate": 9.051865824774736e-06, + "loss": 0.2386, + "step": 64980 + }, + { + "epoch": 0.8381642667835978, + "grad_norm": 0.8181875078382674, + "learning_rate": 9.051426166193721e-06, + "loss": 0.2195, + "step": 64990 + }, + { + "epoch": 0.8382932349735938, + "grad_norm": 0.7975355729671088, + "learning_rate": 9.050986416382009e-06, + "loss": 0.2289, + "step": 65000 + }, + { + "epoch": 0.8384222031635897, + "grad_norm": 0.8056960166908121, + "learning_rate": 9.0505465753495e-06, + "loss": 0.2175, + "step": 65010 + }, + { + "epoch": 0.8385511713535856, + "grad_norm": 0.8155361193902007, + "learning_rate": 9.050106643106099e-06, + "loss": 0.2354, + "step": 65020 + }, + { + "epoch": 0.8386801395435816, + "grad_norm": 0.8706089196119781, + "learning_rate": 9.049666619661714e-06, + "loss": 0.2226, + "step": 65030 + }, + { + "epoch": 0.8388091077335775, + "grad_norm": 0.8734030766265494, + "learning_rate": 9.049226505026253e-06, + "loss": 0.2195, + "step": 65040 + }, + { + "epoch": 0.8389380759235735, + "grad_norm": 1.10698705063715, + "learning_rate": 9.048786299209627e-06, + "loss": 0.2318, + "step": 65050 + }, + { + "epoch": 0.8390670441135694, + "grad_norm": 0.8571644050650815, + "learning_rate": 9.048346002221746e-06, + "loss": 0.2408, + "step": 65060 + }, + { + "epoch": 0.8391960123035653, + "grad_norm": 0.8689098743576165, + "learning_rate": 9.047905614072528e-06, + "loss": 0.2304, + "step": 65070 + }, + { + "epoch": 0.8393249804935613, + "grad_norm": 0.748123458057846, + "learning_rate": 9.047465134771889e-06, + "loss": 0.2307, + "step": 65080 + }, + { + "epoch": 0.8394539486835572, + "grad_norm": 0.9053509552458463, + "learning_rate": 9.047024564329745e-06, + "loss": 0.2478, + "step": 65090 + }, + { + "epoch": 0.8395829168735531, + "grad_norm": 0.8235018512681945, + "learning_rate": 9.046583902756019e-06, + "loss": 0.2198, + "step": 65100 + }, + { + "epoch": 0.8397118850635491, + "grad_norm": 0.8846073360578945, + "learning_rate": 9.046143150060635e-06, + "loss": 0.2265, + "step": 65110 + }, + { + "epoch": 0.839840853253545, + "grad_norm": 0.8834140865198608, + "learning_rate": 9.045702306253516e-06, + "loss": 0.2291, + "step": 65120 + }, + { + "epoch": 0.8399698214435409, + "grad_norm": 0.8093644207052707, + "learning_rate": 9.04526137134459e-06, + "loss": 0.2272, + "step": 65130 + }, + { + "epoch": 0.8400987896335369, + "grad_norm": 0.9329147492603046, + "learning_rate": 9.044820345343786e-06, + "loss": 0.2375, + "step": 65140 + }, + { + "epoch": 0.8402277578235329, + "grad_norm": 0.8155170705040177, + "learning_rate": 9.044379228261036e-06, + "loss": 0.2319, + "step": 65150 + }, + { + "epoch": 0.8403567260135287, + "grad_norm": 0.8741582551059842, + "learning_rate": 9.04393802010627e-06, + "loss": 0.2347, + "step": 65160 + }, + { + "epoch": 0.8404856942035247, + "grad_norm": 0.9440584141989921, + "learning_rate": 9.043496720889426e-06, + "loss": 0.2367, + "step": 65170 + }, + { + "epoch": 0.8406146623935207, + "grad_norm": 0.8746525879306345, + "learning_rate": 9.043055330620441e-06, + "loss": 0.2323, + "step": 65180 + }, + { + "epoch": 0.8407436305835165, + "grad_norm": 0.8462751720652922, + "learning_rate": 9.042613849309251e-06, + "loss": 0.2316, + "step": 65190 + }, + { + "epoch": 0.8408725987735125, + "grad_norm": 0.8026484494006175, + "learning_rate": 9.042172276965804e-06, + "loss": 0.2291, + "step": 65200 + }, + { + "epoch": 0.8410015669635085, + "grad_norm": 0.8447753232482681, + "learning_rate": 9.041730613600037e-06, + "loss": 0.2283, + "step": 65210 + }, + { + "epoch": 0.8411305351535043, + "grad_norm": 0.8393136004187383, + "learning_rate": 9.0412888592219e-06, + "loss": 0.2293, + "step": 65220 + }, + { + "epoch": 0.8412595033435003, + "grad_norm": 1.0056596436870298, + "learning_rate": 9.040847013841337e-06, + "loss": 0.2319, + "step": 65230 + }, + { + "epoch": 0.8413884715334963, + "grad_norm": 0.8392768902255808, + "learning_rate": 9.0404050774683e-06, + "loss": 0.2282, + "step": 65240 + }, + { + "epoch": 0.8415174397234922, + "grad_norm": 0.9093876757060448, + "learning_rate": 9.03996305011274e-06, + "loss": 0.2434, + "step": 65250 + }, + { + "epoch": 0.8416464079134881, + "grad_norm": 0.8439448216388767, + "learning_rate": 9.039520931784608e-06, + "loss": 0.2208, + "step": 65260 + }, + { + "epoch": 0.8417753761034841, + "grad_norm": 0.818298382823253, + "learning_rate": 9.039078722493864e-06, + "loss": 0.2243, + "step": 65270 + }, + { + "epoch": 0.84190434429348, + "grad_norm": 0.9329623045500749, + "learning_rate": 9.038636422250461e-06, + "loss": 0.2344, + "step": 65280 + }, + { + "epoch": 0.8420333124834759, + "grad_norm": 0.812569388600852, + "learning_rate": 9.038194031064365e-06, + "loss": 0.2328, + "step": 65290 + }, + { + "epoch": 0.8421622806734719, + "grad_norm": 0.8575436344221639, + "learning_rate": 9.037751548945532e-06, + "loss": 0.2359, + "step": 65300 + }, + { + "epoch": 0.8422912488634678, + "grad_norm": 1.0132972651463694, + "learning_rate": 9.037308975903929e-06, + "loss": 0.2227, + "step": 65310 + }, + { + "epoch": 0.8424202170534638, + "grad_norm": 0.850900235572703, + "learning_rate": 9.03686631194952e-06, + "loss": 0.2423, + "step": 65320 + }, + { + "epoch": 0.8425491852434597, + "grad_norm": 0.8749401440043194, + "learning_rate": 9.036423557092277e-06, + "loss": 0.2343, + "step": 65330 + }, + { + "epoch": 0.8426781534334556, + "grad_norm": 0.8285682656362117, + "learning_rate": 9.035980711342165e-06, + "loss": 0.2313, + "step": 65340 + }, + { + "epoch": 0.8428071216234516, + "grad_norm": 0.8724021561721107, + "learning_rate": 9.03553777470916e-06, + "loss": 0.2355, + "step": 65350 + }, + { + "epoch": 0.8429360898134475, + "grad_norm": 0.8561696850112023, + "learning_rate": 9.035094747203233e-06, + "loss": 0.2139, + "step": 65360 + }, + { + "epoch": 0.8430650580034434, + "grad_norm": 0.868150244887284, + "learning_rate": 9.034651628834361e-06, + "loss": 0.2171, + "step": 65370 + }, + { + "epoch": 0.8431940261934394, + "grad_norm": 0.8558556187265467, + "learning_rate": 9.034208419612525e-06, + "loss": 0.2242, + "step": 65380 + }, + { + "epoch": 0.8433229943834353, + "grad_norm": 0.869564114836693, + "learning_rate": 9.0337651195477e-06, + "loss": 0.2285, + "step": 65390 + }, + { + "epoch": 0.8434519625734312, + "grad_norm": 0.8571551914889437, + "learning_rate": 9.033321728649874e-06, + "loss": 0.2372, + "step": 65400 + }, + { + "epoch": 0.8435809307634272, + "grad_norm": 0.8892243914838742, + "learning_rate": 9.032878246929026e-06, + "loss": 0.2245, + "step": 65410 + }, + { + "epoch": 0.8437098989534232, + "grad_norm": 0.9615257989409014, + "learning_rate": 9.032434674395147e-06, + "loss": 0.251, + "step": 65420 + }, + { + "epoch": 0.843838867143419, + "grad_norm": 0.8989588140161194, + "learning_rate": 9.031991011058223e-06, + "loss": 0.2329, + "step": 65430 + }, + { + "epoch": 0.843967835333415, + "grad_norm": 0.8940364857908322, + "learning_rate": 9.031547256928246e-06, + "loss": 0.2292, + "step": 65440 + }, + { + "epoch": 0.844096803523411, + "grad_norm": 0.8608172388543922, + "learning_rate": 9.031103412015207e-06, + "loss": 0.2309, + "step": 65450 + }, + { + "epoch": 0.8442257717134068, + "grad_norm": 0.9031131836086804, + "learning_rate": 9.0306594763291e-06, + "loss": 0.2415, + "step": 65460 + }, + { + "epoch": 0.8443547399034028, + "grad_norm": 0.8757769427149762, + "learning_rate": 9.030215449879925e-06, + "loss": 0.2219, + "step": 65470 + }, + { + "epoch": 0.8444837080933988, + "grad_norm": 0.9167486284286441, + "learning_rate": 9.029771332677676e-06, + "loss": 0.2345, + "step": 65480 + }, + { + "epoch": 0.8446126762833946, + "grad_norm": 0.9176873187771709, + "learning_rate": 9.029327124732356e-06, + "loss": 0.2359, + "step": 65490 + }, + { + "epoch": 0.8447416444733906, + "grad_norm": 0.8793358489999131, + "learning_rate": 9.028882826053969e-06, + "loss": 0.2338, + "step": 65500 + }, + { + "epoch": 0.8448706126633866, + "grad_norm": 0.8844430914597898, + "learning_rate": 9.028438436652519e-06, + "loss": 0.2359, + "step": 65510 + }, + { + "epoch": 0.8449995808533826, + "grad_norm": 0.7609088793398416, + "learning_rate": 9.027993956538012e-06, + "loss": 0.2413, + "step": 65520 + }, + { + "epoch": 0.8451285490433784, + "grad_norm": 0.8628276047234184, + "learning_rate": 9.027549385720458e-06, + "loss": 0.2175, + "step": 65530 + }, + { + "epoch": 0.8452575172333744, + "grad_norm": 0.9468708649306977, + "learning_rate": 9.027104724209866e-06, + "loss": 0.2269, + "step": 65540 + }, + { + "epoch": 0.8453864854233704, + "grad_norm": 0.8825637731394841, + "learning_rate": 9.02665997201625e-06, + "loss": 0.222, + "step": 65550 + }, + { + "epoch": 0.8455154536133662, + "grad_norm": 0.8294131405334949, + "learning_rate": 9.026215129149627e-06, + "loss": 0.2363, + "step": 65560 + }, + { + "epoch": 0.8456444218033622, + "grad_norm": 0.988699225910513, + "learning_rate": 9.02577019562001e-06, + "loss": 0.2326, + "step": 65570 + }, + { + "epoch": 0.8457733899933582, + "grad_norm": 0.9079051987597421, + "learning_rate": 9.025325171437422e-06, + "loss": 0.2267, + "step": 65580 + }, + { + "epoch": 0.8459023581833541, + "grad_norm": 0.9414066098426452, + "learning_rate": 9.024880056611882e-06, + "loss": 0.2187, + "step": 65590 + }, + { + "epoch": 0.84603132637335, + "grad_norm": 0.8924618665887485, + "learning_rate": 9.024434851153414e-06, + "loss": 0.2238, + "step": 65600 + }, + { + "epoch": 0.846160294563346, + "grad_norm": 0.8907819914072412, + "learning_rate": 9.023989555072044e-06, + "loss": 0.2311, + "step": 65610 + }, + { + "epoch": 0.8462892627533419, + "grad_norm": 0.8707451650525295, + "learning_rate": 9.023544168377796e-06, + "loss": 0.2268, + "step": 65620 + }, + { + "epoch": 0.8464182309433378, + "grad_norm": 0.8024030105034442, + "learning_rate": 9.023098691080703e-06, + "loss": 0.2284, + "step": 65630 + }, + { + "epoch": 0.8465471991333338, + "grad_norm": 0.8773925562272344, + "learning_rate": 9.022653123190792e-06, + "loss": 0.2357, + "step": 65640 + }, + { + "epoch": 0.8466761673233297, + "grad_norm": 0.9074589750744898, + "learning_rate": 9.022207464718102e-06, + "loss": 0.2267, + "step": 65650 + }, + { + "epoch": 0.8468051355133256, + "grad_norm": 0.8373134993651135, + "learning_rate": 9.021761715672664e-06, + "loss": 0.223, + "step": 65660 + }, + { + "epoch": 0.8469341037033216, + "grad_norm": 0.7742705275933371, + "learning_rate": 9.021315876064518e-06, + "loss": 0.2277, + "step": 65670 + }, + { + "epoch": 0.8470630718933175, + "grad_norm": 0.8015865543988436, + "learning_rate": 9.020869945903704e-06, + "loss": 0.2337, + "step": 65680 + }, + { + "epoch": 0.8471920400833135, + "grad_norm": 0.8417111752498531, + "learning_rate": 9.020423925200258e-06, + "loss": 0.2274, + "step": 65690 + }, + { + "epoch": 0.8473210082733094, + "grad_norm": 0.8428112679164, + "learning_rate": 9.019977813964231e-06, + "loss": 0.2199, + "step": 65700 + }, + { + "epoch": 0.8474499764633053, + "grad_norm": 0.8515908127403334, + "learning_rate": 9.019531612205664e-06, + "loss": 0.2178, + "step": 65710 + }, + { + "epoch": 0.8475789446533013, + "grad_norm": 0.7786515903027388, + "learning_rate": 9.019085319934606e-06, + "loss": 0.2454, + "step": 65720 + }, + { + "epoch": 0.8477079128432972, + "grad_norm": 0.8946845668235611, + "learning_rate": 9.018638937161106e-06, + "loss": 0.2123, + "step": 65730 + }, + { + "epoch": 0.8478368810332931, + "grad_norm": 0.9020828321515206, + "learning_rate": 9.018192463895216e-06, + "loss": 0.245, + "step": 65740 + }, + { + "epoch": 0.8479658492232891, + "grad_norm": 1.1181615608052615, + "learning_rate": 9.01774590014699e-06, + "loss": 0.2295, + "step": 65750 + }, + { + "epoch": 0.848094817413285, + "grad_norm": 0.9341614232858609, + "learning_rate": 9.017299245926485e-06, + "loss": 0.2292, + "step": 65760 + }, + { + "epoch": 0.8482237856032809, + "grad_norm": 0.7998395021813642, + "learning_rate": 9.016852501243758e-06, + "loss": 0.2247, + "step": 65770 + }, + { + "epoch": 0.8483527537932769, + "grad_norm": 0.8731516655186039, + "learning_rate": 9.016405666108867e-06, + "loss": 0.2372, + "step": 65780 + }, + { + "epoch": 0.8484817219832729, + "grad_norm": 0.8449151654329594, + "learning_rate": 9.015958740531876e-06, + "loss": 0.2397, + "step": 65790 + }, + { + "epoch": 0.8486106901732687, + "grad_norm": 0.8519539044356517, + "learning_rate": 9.015511724522849e-06, + "loss": 0.2203, + "step": 65800 + }, + { + "epoch": 0.8487396583632647, + "grad_norm": 0.877557851353293, + "learning_rate": 9.015064618091852e-06, + "loss": 0.2207, + "step": 65810 + }, + { + "epoch": 0.8488686265532607, + "grad_norm": 0.8606077653789884, + "learning_rate": 9.01461742124895e-06, + "loss": 0.2238, + "step": 65820 + }, + { + "epoch": 0.8489975947432565, + "grad_norm": 0.8953659684546147, + "learning_rate": 9.014170134004216e-06, + "loss": 0.2212, + "step": 65830 + }, + { + "epoch": 0.8491265629332525, + "grad_norm": 0.88411778008728, + "learning_rate": 9.013722756367721e-06, + "loss": 0.2213, + "step": 65840 + }, + { + "epoch": 0.8492555311232485, + "grad_norm": 0.9105633351083676, + "learning_rate": 9.013275288349542e-06, + "loss": 0.2323, + "step": 65850 + }, + { + "epoch": 0.8493844993132443, + "grad_norm": 0.8431978576043578, + "learning_rate": 9.012827729959751e-06, + "loss": 0.2286, + "step": 65860 + }, + { + "epoch": 0.8495134675032403, + "grad_norm": 0.8839360929499056, + "learning_rate": 9.012380081208429e-06, + "loss": 0.2352, + "step": 65870 + }, + { + "epoch": 0.8496424356932363, + "grad_norm": 0.8619986000475006, + "learning_rate": 9.011932342105654e-06, + "loss": 0.2317, + "step": 65880 + }, + { + "epoch": 0.8497714038832322, + "grad_norm": 0.8484807884891473, + "learning_rate": 9.01148451266151e-06, + "loss": 0.2227, + "step": 65890 + }, + { + "epoch": 0.8499003720732281, + "grad_norm": 0.8718633709592223, + "learning_rate": 9.011036592886082e-06, + "loss": 0.2251, + "step": 65900 + }, + { + "epoch": 0.8500293402632241, + "grad_norm": 0.8437985972103236, + "learning_rate": 9.010588582789454e-06, + "loss": 0.2268, + "step": 65910 + }, + { + "epoch": 0.85015830845322, + "grad_norm": 0.9135149445290782, + "learning_rate": 9.010140482381716e-06, + "loss": 0.2219, + "step": 65920 + }, + { + "epoch": 0.8502872766432159, + "grad_norm": 0.9827158496919997, + "learning_rate": 9.009692291672957e-06, + "loss": 0.2305, + "step": 65930 + }, + { + "epoch": 0.8504162448332119, + "grad_norm": 0.8236792662634435, + "learning_rate": 9.009244010673271e-06, + "loss": 0.2256, + "step": 65940 + }, + { + "epoch": 0.8505452130232078, + "grad_norm": 0.9634371263794467, + "learning_rate": 9.008795639392751e-06, + "loss": 0.2135, + "step": 65950 + }, + { + "epoch": 0.8506741812132038, + "grad_norm": 0.9367377970677762, + "learning_rate": 9.008347177841496e-06, + "loss": 0.2214, + "step": 65960 + }, + { + "epoch": 0.8508031494031997, + "grad_norm": 0.9725577383875492, + "learning_rate": 9.0078986260296e-06, + "loss": 0.237, + "step": 65970 + }, + { + "epoch": 0.8509321175931956, + "grad_norm": 0.8706573264882507, + "learning_rate": 9.007449983967169e-06, + "loss": 0.2322, + "step": 65980 + }, + { + "epoch": 0.8510610857831916, + "grad_norm": 0.8311716518887827, + "learning_rate": 9.007001251664302e-06, + "loss": 0.2297, + "step": 65990 + }, + { + "epoch": 0.8511900539731875, + "grad_norm": 0.8193122351814635, + "learning_rate": 9.006552429131104e-06, + "loss": 0.23, + "step": 66000 + }, + { + "epoch": 0.8513190221631834, + "grad_norm": 0.9566214880187868, + "learning_rate": 9.006103516377685e-06, + "loss": 0.2386, + "step": 66010 + }, + { + "epoch": 0.8514479903531794, + "grad_norm": 0.8683413338391435, + "learning_rate": 9.005654513414149e-06, + "loss": 0.2364, + "step": 66020 + }, + { + "epoch": 0.8515769585431753, + "grad_norm": 0.8438011454368682, + "learning_rate": 9.005205420250608e-06, + "loss": 0.2285, + "step": 66030 + }, + { + "epoch": 0.8517059267331712, + "grad_norm": 0.938079938379517, + "learning_rate": 9.004756236897177e-06, + "loss": 0.2236, + "step": 66040 + }, + { + "epoch": 0.8518348949231672, + "grad_norm": 0.8748274406655758, + "learning_rate": 9.004306963363967e-06, + "loss": 0.2129, + "step": 66050 + }, + { + "epoch": 0.8519638631131632, + "grad_norm": 0.8951401648632226, + "learning_rate": 9.0038575996611e-06, + "loss": 0.2335, + "step": 66060 + }, + { + "epoch": 0.852092831303159, + "grad_norm": 0.9561668924655758, + "learning_rate": 9.00340814579869e-06, + "loss": 0.2242, + "step": 66070 + }, + { + "epoch": 0.852221799493155, + "grad_norm": 0.8475082159733489, + "learning_rate": 9.002958601786858e-06, + "loss": 0.2442, + "step": 66080 + }, + { + "epoch": 0.852350767683151, + "grad_norm": 0.8893277781366835, + "learning_rate": 9.002508967635732e-06, + "loss": 0.2157, + "step": 66090 + }, + { + "epoch": 0.8524797358731468, + "grad_norm": 0.7946796025066727, + "learning_rate": 9.002059243355431e-06, + "loss": 0.2358, + "step": 66100 + }, + { + "epoch": 0.8526087040631428, + "grad_norm": 0.9540945632484582, + "learning_rate": 9.001609428956087e-06, + "loss": 0.2217, + "step": 66110 + }, + { + "epoch": 0.8527376722531388, + "grad_norm": 0.8203050495670801, + "learning_rate": 9.001159524447826e-06, + "loss": 0.2239, + "step": 66120 + }, + { + "epoch": 0.8528666404431346, + "grad_norm": 0.796860967561965, + "learning_rate": 9.000709529840779e-06, + "loss": 0.2348, + "step": 66130 + }, + { + "epoch": 0.8529956086331306, + "grad_norm": 0.8988818216193527, + "learning_rate": 9.000259445145081e-06, + "loss": 0.2379, + "step": 66140 + }, + { + "epoch": 0.8531245768231266, + "grad_norm": 0.8882547198524458, + "learning_rate": 8.999809270370863e-06, + "loss": 0.2334, + "step": 66150 + }, + { + "epoch": 0.8532535450131226, + "grad_norm": 0.89815655285106, + "learning_rate": 8.999359005528267e-06, + "loss": 0.2068, + "step": 66160 + }, + { + "epoch": 0.8533825132031184, + "grad_norm": 0.7857824803874155, + "learning_rate": 8.99890865062743e-06, + "loss": 0.2261, + "step": 66170 + }, + { + "epoch": 0.8535114813931144, + "grad_norm": 0.8161795202299892, + "learning_rate": 8.998458205678492e-06, + "loss": 0.2391, + "step": 66180 + }, + { + "epoch": 0.8536404495831104, + "grad_norm": 0.8339552813064769, + "learning_rate": 8.9980076706916e-06, + "loss": 0.232, + "step": 66190 + }, + { + "epoch": 0.8537694177731062, + "grad_norm": 0.7931649472971514, + "learning_rate": 8.997557045676893e-06, + "loss": 0.2317, + "step": 66200 + }, + { + "epoch": 0.8538983859631022, + "grad_norm": 0.728203347700758, + "learning_rate": 8.997106330644524e-06, + "loss": 0.2263, + "step": 66210 + }, + { + "epoch": 0.8540273541530982, + "grad_norm": 0.8793735145713963, + "learning_rate": 8.99665552560464e-06, + "loss": 0.2429, + "step": 66220 + }, + { + "epoch": 0.854156322343094, + "grad_norm": 0.8810872634354854, + "learning_rate": 8.99620463056739e-06, + "loss": 0.2271, + "step": 66230 + }, + { + "epoch": 0.85428529053309, + "grad_norm": 0.8352875729230855, + "learning_rate": 8.995753645542933e-06, + "loss": 0.2252, + "step": 66240 + }, + { + "epoch": 0.854414258723086, + "grad_norm": 0.8945981708947269, + "learning_rate": 8.995302570541419e-06, + "loss": 0.2389, + "step": 66250 + }, + { + "epoch": 0.8545432269130819, + "grad_norm": 0.8625314097685026, + "learning_rate": 8.99485140557301e-06, + "loss": 0.2269, + "step": 66260 + }, + { + "epoch": 0.8546721951030778, + "grad_norm": 0.7991948380621154, + "learning_rate": 8.99440015064786e-06, + "loss": 0.2209, + "step": 66270 + }, + { + "epoch": 0.8548011632930738, + "grad_norm": 0.8804289889986386, + "learning_rate": 8.993948805776136e-06, + "loss": 0.2333, + "step": 66280 + }, + { + "epoch": 0.8549301314830697, + "grad_norm": 0.7555707970288991, + "learning_rate": 8.993497370967996e-06, + "loss": 0.2357, + "step": 66290 + }, + { + "epoch": 0.8550590996730656, + "grad_norm": 0.8886947349579386, + "learning_rate": 8.99304584623361e-06, + "loss": 0.2339, + "step": 66300 + }, + { + "epoch": 0.8551880678630616, + "grad_norm": 0.9055052570793879, + "learning_rate": 8.992594231583144e-06, + "loss": 0.2295, + "step": 66310 + }, + { + "epoch": 0.8553170360530575, + "grad_norm": 0.879286737284916, + "learning_rate": 8.992142527026768e-06, + "loss": 0.245, + "step": 66320 + }, + { + "epoch": 0.8554460042430535, + "grad_norm": 0.9137532386307876, + "learning_rate": 8.99169073257465e-06, + "loss": 0.2342, + "step": 66330 + }, + { + "epoch": 0.8555749724330494, + "grad_norm": 0.8353927487456254, + "learning_rate": 8.99123884823697e-06, + "loss": 0.2286, + "step": 66340 + }, + { + "epoch": 0.8557039406230453, + "grad_norm": 0.8890561516151438, + "learning_rate": 8.990786874023898e-06, + "loss": 0.2267, + "step": 66350 + }, + { + "epoch": 0.8558329088130413, + "grad_norm": 0.7771441679042262, + "learning_rate": 8.990334809945614e-06, + "loss": 0.2174, + "step": 66360 + }, + { + "epoch": 0.8559618770030372, + "grad_norm": 0.9396843951405931, + "learning_rate": 8.989882656012297e-06, + "loss": 0.2218, + "step": 66370 + }, + { + "epoch": 0.8560908451930331, + "grad_norm": 0.8105220635226026, + "learning_rate": 8.989430412234128e-06, + "loss": 0.2306, + "step": 66380 + }, + { + "epoch": 0.8562198133830291, + "grad_norm": 0.8577797930608907, + "learning_rate": 8.988978078621295e-06, + "loss": 0.2336, + "step": 66390 + }, + { + "epoch": 0.856348781573025, + "grad_norm": 0.8392479185252351, + "learning_rate": 8.988525655183976e-06, + "loss": 0.2387, + "step": 66400 + }, + { + "epoch": 0.8564777497630209, + "grad_norm": 0.8326727072501162, + "learning_rate": 8.988073141932367e-06, + "loss": 0.2238, + "step": 66410 + }, + { + "epoch": 0.8566067179530169, + "grad_norm": 0.8527182001088377, + "learning_rate": 8.987620538876654e-06, + "loss": 0.2249, + "step": 66420 + }, + { + "epoch": 0.8567356861430129, + "grad_norm": 0.8387998734280415, + "learning_rate": 8.987167846027027e-06, + "loss": 0.2434, + "step": 66430 + }, + { + "epoch": 0.8568646543330087, + "grad_norm": 0.9819788133261144, + "learning_rate": 8.986715063393683e-06, + "loss": 0.2296, + "step": 66440 + }, + { + "epoch": 0.8569936225230047, + "grad_norm": 0.8249111144598823, + "learning_rate": 8.986262190986814e-06, + "loss": 0.229, + "step": 66450 + }, + { + "epoch": 0.8571225907130007, + "grad_norm": 0.8905276619178446, + "learning_rate": 8.985809228816622e-06, + "loss": 0.243, + "step": 66460 + }, + { + "epoch": 0.8572515589029965, + "grad_norm": 0.8354472555091588, + "learning_rate": 8.985356176893305e-06, + "loss": 0.2178, + "step": 66470 + }, + { + "epoch": 0.8573805270929925, + "grad_norm": 0.7920710722725273, + "learning_rate": 8.984903035227066e-06, + "loss": 0.2301, + "step": 66480 + }, + { + "epoch": 0.8575094952829885, + "grad_norm": 0.92214371949071, + "learning_rate": 8.984449803828106e-06, + "loss": 0.2366, + "step": 66490 + }, + { + "epoch": 0.8576384634729843, + "grad_norm": 0.8314780145661164, + "learning_rate": 8.983996482706634e-06, + "loss": 0.233, + "step": 66500 + }, + { + "epoch": 0.8577674316629803, + "grad_norm": 0.9057160021961826, + "learning_rate": 8.983543071872857e-06, + "loss": 0.2277, + "step": 66510 + }, + { + "epoch": 0.8578963998529763, + "grad_norm": 0.8717562801020727, + "learning_rate": 8.983089571336986e-06, + "loss": 0.2244, + "step": 66520 + }, + { + "epoch": 0.8580253680429722, + "grad_norm": 0.8677947574574136, + "learning_rate": 8.982635981109232e-06, + "loss": 0.2252, + "step": 66530 + }, + { + "epoch": 0.8581543362329681, + "grad_norm": 0.8535870472201552, + "learning_rate": 8.982182301199806e-06, + "loss": 0.2456, + "step": 66540 + }, + { + "epoch": 0.8582833044229641, + "grad_norm": 1.013414076971652, + "learning_rate": 8.981728531618929e-06, + "loss": 0.2244, + "step": 66550 + }, + { + "epoch": 0.85841227261296, + "grad_norm": 0.8338511023211077, + "learning_rate": 8.981274672376816e-06, + "loss": 0.2328, + "step": 66560 + }, + { + "epoch": 0.8585412408029559, + "grad_norm": 0.9621457768961477, + "learning_rate": 8.980820723483687e-06, + "loss": 0.2296, + "step": 66570 + }, + { + "epoch": 0.8586702089929519, + "grad_norm": 0.9012517717819463, + "learning_rate": 8.980366684949769e-06, + "loss": 0.2296, + "step": 66580 + }, + { + "epoch": 0.8587991771829478, + "grad_norm": 0.8576870509560233, + "learning_rate": 8.97991255678528e-06, + "loss": 0.2359, + "step": 66590 + }, + { + "epoch": 0.8589281453729438, + "grad_norm": 0.8936062643769123, + "learning_rate": 8.979458339000447e-06, + "loss": 0.2317, + "step": 66600 + }, + { + "epoch": 0.8590571135629397, + "grad_norm": 0.7719234616716444, + "learning_rate": 8.979004031605502e-06, + "loss": 0.2167, + "step": 66610 + }, + { + "epoch": 0.8591860817529356, + "grad_norm": 0.8562873464808293, + "learning_rate": 8.978549634610672e-06, + "loss": 0.2293, + "step": 66620 + }, + { + "epoch": 0.8593150499429316, + "grad_norm": 0.8929868062181031, + "learning_rate": 8.97809514802619e-06, + "loss": 0.2367, + "step": 66630 + }, + { + "epoch": 0.8594440181329275, + "grad_norm": 0.9201673350836108, + "learning_rate": 8.97764057186229e-06, + "loss": 0.2281, + "step": 66640 + }, + { + "epoch": 0.8595729863229234, + "grad_norm": 0.7980340637593485, + "learning_rate": 8.977185906129206e-06, + "loss": 0.2421, + "step": 66650 + }, + { + "epoch": 0.8597019545129194, + "grad_norm": 0.8090942211769064, + "learning_rate": 8.97673115083718e-06, + "loss": 0.2374, + "step": 66660 + }, + { + "epoch": 0.8598309227029153, + "grad_norm": 0.8230939642696399, + "learning_rate": 8.976276305996453e-06, + "loss": 0.2313, + "step": 66670 + }, + { + "epoch": 0.8599598908929112, + "grad_norm": 0.8366535503143444, + "learning_rate": 8.975821371617264e-06, + "loss": 0.2255, + "step": 66680 + }, + { + "epoch": 0.8600888590829072, + "grad_norm": 0.8562497465815583, + "learning_rate": 8.975366347709858e-06, + "loss": 0.2216, + "step": 66690 + }, + { + "epoch": 0.8602178272729032, + "grad_norm": 0.935109905792708, + "learning_rate": 8.974911234284483e-06, + "loss": 0.2252, + "step": 66700 + }, + { + "epoch": 0.860346795462899, + "grad_norm": 0.9057623065515363, + "learning_rate": 8.974456031351384e-06, + "loss": 0.218, + "step": 66710 + }, + { + "epoch": 0.860475763652895, + "grad_norm": 0.8661329459609809, + "learning_rate": 8.974000738920814e-06, + "loss": 0.2271, + "step": 66720 + }, + { + "epoch": 0.860604731842891, + "grad_norm": 0.7993592784043613, + "learning_rate": 8.973545357003026e-06, + "loss": 0.2238, + "step": 66730 + }, + { + "epoch": 0.8607337000328869, + "grad_norm": 0.8769647067737295, + "learning_rate": 8.973089885608271e-06, + "loss": 0.2225, + "step": 66740 + }, + { + "epoch": 0.8608626682228828, + "grad_norm": 0.836430807006964, + "learning_rate": 8.97263432474681e-06, + "loss": 0.2348, + "step": 66750 + }, + { + "epoch": 0.8609916364128788, + "grad_norm": 0.9360894293066336, + "learning_rate": 8.972178674428898e-06, + "loss": 0.2246, + "step": 66760 + }, + { + "epoch": 0.8611206046028747, + "grad_norm": 0.8508323231468646, + "learning_rate": 8.971722934664795e-06, + "loss": 0.232, + "step": 66770 + }, + { + "epoch": 0.8612495727928706, + "grad_norm": 0.7996084392403877, + "learning_rate": 8.971267105464765e-06, + "loss": 0.2239, + "step": 66780 + }, + { + "epoch": 0.8613785409828666, + "grad_norm": 0.8271818069806385, + "learning_rate": 8.970811186839072e-06, + "loss": 0.2155, + "step": 66790 + }, + { + "epoch": 0.8615075091728626, + "grad_norm": 0.8733311610084791, + "learning_rate": 8.970355178797984e-06, + "loss": 0.2193, + "step": 66800 + }, + { + "epoch": 0.8616364773628584, + "grad_norm": 0.8631429992485736, + "learning_rate": 8.969899081351768e-06, + "loss": 0.2259, + "step": 66810 + }, + { + "epoch": 0.8617654455528544, + "grad_norm": 0.9189063629695973, + "learning_rate": 8.969442894510693e-06, + "loss": 0.2372, + "step": 66820 + }, + { + "epoch": 0.8618944137428504, + "grad_norm": 0.779186723539233, + "learning_rate": 8.968986618285034e-06, + "loss": 0.2315, + "step": 66830 + }, + { + "epoch": 0.8620233819328462, + "grad_norm": 0.8354123339659284, + "learning_rate": 8.968530252685066e-06, + "loss": 0.2233, + "step": 66840 + }, + { + "epoch": 0.8621523501228422, + "grad_norm": 0.8203690047277605, + "learning_rate": 8.968073797721062e-06, + "loss": 0.2384, + "step": 66850 + }, + { + "epoch": 0.8622813183128382, + "grad_norm": 0.8791379984565928, + "learning_rate": 8.967617253403303e-06, + "loss": 0.2151, + "step": 66860 + }, + { + "epoch": 0.862410286502834, + "grad_norm": 0.9259869220749762, + "learning_rate": 8.967160619742068e-06, + "loss": 0.2254, + "step": 66870 + }, + { + "epoch": 0.86253925469283, + "grad_norm": 0.8539997062499968, + "learning_rate": 8.966703896747644e-06, + "loss": 0.2166, + "step": 66880 + }, + { + "epoch": 0.862668222882826, + "grad_norm": 0.8731552587343402, + "learning_rate": 8.966247084430309e-06, + "loss": 0.2248, + "step": 66890 + }, + { + "epoch": 0.8627971910728219, + "grad_norm": 0.8258055102645087, + "learning_rate": 8.965790182800353e-06, + "loss": 0.2302, + "step": 66900 + }, + { + "epoch": 0.8629261592628178, + "grad_norm": 0.7310859661459035, + "learning_rate": 8.965333191868066e-06, + "loss": 0.2239, + "step": 66910 + }, + { + "epoch": 0.8630551274528138, + "grad_norm": 0.860107129362315, + "learning_rate": 8.964876111643737e-06, + "loss": 0.2324, + "step": 66920 + }, + { + "epoch": 0.8631840956428097, + "grad_norm": 0.8492964365799943, + "learning_rate": 8.964418942137659e-06, + "loss": 0.2302, + "step": 66930 + }, + { + "epoch": 0.8633130638328056, + "grad_norm": 0.8856324852261416, + "learning_rate": 8.963961683360125e-06, + "loss": 0.2337, + "step": 66940 + }, + { + "epoch": 0.8634420320228016, + "grad_norm": 0.8779649947035042, + "learning_rate": 8.963504335321432e-06, + "loss": 0.2335, + "step": 66950 + }, + { + "epoch": 0.8635710002127975, + "grad_norm": 0.9122133744856896, + "learning_rate": 8.963046898031882e-06, + "loss": 0.2241, + "step": 66960 + }, + { + "epoch": 0.8636999684027935, + "grad_norm": 0.8677870764871038, + "learning_rate": 8.96258937150177e-06, + "loss": 0.2291, + "step": 66970 + }, + { + "epoch": 0.8638289365927894, + "grad_norm": 0.8248119220107719, + "learning_rate": 8.962131755741405e-06, + "loss": 0.2299, + "step": 66980 + }, + { + "epoch": 0.8639579047827853, + "grad_norm": 0.8590763321912805, + "learning_rate": 8.961674050761087e-06, + "loss": 0.2469, + "step": 66990 + }, + { + "epoch": 0.8640868729727813, + "grad_norm": 0.8439838506221499, + "learning_rate": 8.961216256571125e-06, + "loss": 0.2269, + "step": 67000 + }, + { + "epoch": 0.8642158411627772, + "grad_norm": 0.7779073852443683, + "learning_rate": 8.960758373181826e-06, + "loss": 0.2434, + "step": 67010 + }, + { + "epoch": 0.8643448093527731, + "grad_norm": 0.8340943864391602, + "learning_rate": 8.960300400603502e-06, + "loss": 0.2305, + "step": 67020 + }, + { + "epoch": 0.8644737775427691, + "grad_norm": 0.9297842352368084, + "learning_rate": 8.959842338846466e-06, + "loss": 0.2233, + "step": 67030 + }, + { + "epoch": 0.864602745732765, + "grad_norm": 0.8749503214662185, + "learning_rate": 8.95938418792103e-06, + "loss": 0.2318, + "step": 67040 + }, + { + "epoch": 0.8647317139227609, + "grad_norm": 0.8483536993705233, + "learning_rate": 8.958925947837513e-06, + "loss": 0.2311, + "step": 67050 + }, + { + "epoch": 0.8648606821127569, + "grad_norm": 0.8086228549333317, + "learning_rate": 8.958467618606234e-06, + "loss": 0.2321, + "step": 67060 + }, + { + "epoch": 0.8649896503027529, + "grad_norm": 0.8072407662015537, + "learning_rate": 8.958009200237514e-06, + "loss": 0.2284, + "step": 67070 + }, + { + "epoch": 0.8651186184927487, + "grad_norm": 0.9203424096800955, + "learning_rate": 8.957550692741675e-06, + "loss": 0.2303, + "step": 67080 + }, + { + "epoch": 0.8652475866827447, + "grad_norm": 0.8719199787795923, + "learning_rate": 8.957092096129042e-06, + "loss": 0.2136, + "step": 67090 + }, + { + "epoch": 0.8653765548727407, + "grad_norm": 0.7742742122652727, + "learning_rate": 8.956633410409938e-06, + "loss": 0.2245, + "step": 67100 + }, + { + "epoch": 0.8655055230627365, + "grad_norm": 0.8168386846742681, + "learning_rate": 8.956174635594699e-06, + "loss": 0.2259, + "step": 67110 + }, + { + "epoch": 0.8656344912527325, + "grad_norm": 0.8853699671264801, + "learning_rate": 8.95571577169365e-06, + "loss": 0.2375, + "step": 67120 + }, + { + "epoch": 0.8657634594427285, + "grad_norm": 0.9533644741720018, + "learning_rate": 8.955256818717126e-06, + "loss": 0.2218, + "step": 67130 + }, + { + "epoch": 0.8658924276327243, + "grad_norm": 0.8391510725806478, + "learning_rate": 8.954797776675461e-06, + "loss": 0.2204, + "step": 67140 + }, + { + "epoch": 0.8660213958227203, + "grad_norm": 0.889347503457279, + "learning_rate": 8.954338645578994e-06, + "loss": 0.2204, + "step": 67150 + }, + { + "epoch": 0.8661503640127163, + "grad_norm": 0.8294398071281678, + "learning_rate": 8.953879425438061e-06, + "loss": 0.2278, + "step": 67160 + }, + { + "epoch": 0.8662793322027122, + "grad_norm": 0.8092848516545812, + "learning_rate": 8.953420116263003e-06, + "loss": 0.2215, + "step": 67170 + }, + { + "epoch": 0.8664083003927081, + "grad_norm": 0.8575447516329556, + "learning_rate": 8.952960718064166e-06, + "loss": 0.2272, + "step": 67180 + }, + { + "epoch": 0.8665372685827041, + "grad_norm": 0.9105152128812452, + "learning_rate": 8.95250123085189e-06, + "loss": 0.2219, + "step": 67190 + }, + { + "epoch": 0.8666662367727, + "grad_norm": 0.9088112679366431, + "learning_rate": 8.952041654636525e-06, + "loss": 0.2293, + "step": 67200 + }, + { + "epoch": 0.8667952049626959, + "grad_norm": 0.8807931343427299, + "learning_rate": 8.951581989428419e-06, + "loss": 0.2381, + "step": 67210 + }, + { + "epoch": 0.8669241731526919, + "grad_norm": 0.8143010704596553, + "learning_rate": 8.951122235237924e-06, + "loss": 0.2205, + "step": 67220 + }, + { + "epoch": 0.8670531413426878, + "grad_norm": 0.8725633321951285, + "learning_rate": 8.950662392075392e-06, + "loss": 0.2229, + "step": 67230 + }, + { + "epoch": 0.8671821095326837, + "grad_norm": 0.9188772971145831, + "learning_rate": 8.950202459951176e-06, + "loss": 0.2254, + "step": 67240 + }, + { + "epoch": 0.8673110777226797, + "grad_norm": 0.9112723300976469, + "learning_rate": 8.949742438875636e-06, + "loss": 0.2435, + "step": 67250 + }, + { + "epoch": 0.8674400459126757, + "grad_norm": 0.742598955998047, + "learning_rate": 8.949282328859127e-06, + "loss": 0.217, + "step": 67260 + }, + { + "epoch": 0.8675690141026716, + "grad_norm": 0.7974914952823724, + "learning_rate": 8.948822129912013e-06, + "loss": 0.2255, + "step": 67270 + }, + { + "epoch": 0.8676979822926675, + "grad_norm": 0.7575752104688547, + "learning_rate": 8.948361842044656e-06, + "loss": 0.2211, + "step": 67280 + }, + { + "epoch": 0.8678269504826635, + "grad_norm": 0.8646523915263045, + "learning_rate": 8.947901465267421e-06, + "loss": 0.2341, + "step": 67290 + }, + { + "epoch": 0.8679559186726594, + "grad_norm": 0.8854003427487044, + "learning_rate": 8.947440999590675e-06, + "loss": 0.2285, + "step": 67300 + }, + { + "epoch": 0.8680848868626553, + "grad_norm": 0.919067323057518, + "learning_rate": 8.946980445024786e-06, + "loss": 0.2289, + "step": 67310 + }, + { + "epoch": 0.8682138550526513, + "grad_norm": 0.9472756963661014, + "learning_rate": 8.946519801580124e-06, + "loss": 0.2418, + "step": 67320 + }, + { + "epoch": 0.8683428232426472, + "grad_norm": 0.8496349362068953, + "learning_rate": 8.946059069267065e-06, + "loss": 0.238, + "step": 67330 + }, + { + "epoch": 0.8684717914326432, + "grad_norm": 0.9125828291291297, + "learning_rate": 8.945598248095981e-06, + "loss": 0.2346, + "step": 67340 + }, + { + "epoch": 0.868600759622639, + "grad_norm": 0.9278228493485454, + "learning_rate": 8.94513733807725e-06, + "loss": 0.2293, + "step": 67350 + }, + { + "epoch": 0.868729727812635, + "grad_norm": 0.822211813772678, + "learning_rate": 8.944676339221251e-06, + "loss": 0.2295, + "step": 67360 + }, + { + "epoch": 0.868858696002631, + "grad_norm": 0.8280899617377754, + "learning_rate": 8.944215251538364e-06, + "loss": 0.2166, + "step": 67370 + }, + { + "epoch": 0.8689876641926269, + "grad_norm": 0.8109073087869, + "learning_rate": 8.943754075038972e-06, + "loss": 0.2157, + "step": 67380 + }, + { + "epoch": 0.8691166323826228, + "grad_norm": 0.8582887302132891, + "learning_rate": 8.943292809733461e-06, + "loss": 0.2227, + "step": 67390 + }, + { + "epoch": 0.8692456005726188, + "grad_norm": 0.8397768516718995, + "learning_rate": 8.942831455632217e-06, + "loss": 0.2286, + "step": 67400 + }, + { + "epoch": 0.8693745687626147, + "grad_norm": 0.8430518926182274, + "learning_rate": 8.942370012745629e-06, + "loss": 0.2333, + "step": 67410 + }, + { + "epoch": 0.8695035369526106, + "grad_norm": 0.9060173454701969, + "learning_rate": 8.941908481084089e-06, + "loss": 0.2208, + "step": 67420 + }, + { + "epoch": 0.8696325051426066, + "grad_norm": 0.844987622189943, + "learning_rate": 8.941446860657987e-06, + "loss": 0.2313, + "step": 67430 + }, + { + "epoch": 0.8697614733326026, + "grad_norm": 0.8404835694286307, + "learning_rate": 8.94098515147772e-06, + "loss": 0.2191, + "step": 67440 + }, + { + "epoch": 0.8698904415225984, + "grad_norm": 0.8539642107429025, + "learning_rate": 8.940523353553683e-06, + "loss": 0.2237, + "step": 67450 + }, + { + "epoch": 0.8700194097125944, + "grad_norm": 0.8161776040421023, + "learning_rate": 8.940061466896278e-06, + "loss": 0.237, + "step": 67460 + }, + { + "epoch": 0.8701483779025904, + "grad_norm": 0.7716902700345373, + "learning_rate": 8.939599491515903e-06, + "loss": 0.2294, + "step": 67470 + }, + { + "epoch": 0.8702773460925862, + "grad_norm": 0.854281160168218, + "learning_rate": 8.93913742742296e-06, + "loss": 0.2354, + "step": 67480 + }, + { + "epoch": 0.8704063142825822, + "grad_norm": 0.864347411036399, + "learning_rate": 8.938675274627858e-06, + "loss": 0.2315, + "step": 67490 + }, + { + "epoch": 0.8705352824725782, + "grad_norm": 0.7666700536866755, + "learning_rate": 8.938213033141002e-06, + "loss": 0.2253, + "step": 67500 + }, + { + "epoch": 0.870664250662574, + "grad_norm": 0.792702400535911, + "learning_rate": 8.937750702972798e-06, + "loss": 0.2309, + "step": 67510 + }, + { + "epoch": 0.87079321885257, + "grad_norm": 0.9024737544972671, + "learning_rate": 8.93728828413366e-06, + "loss": 0.2254, + "step": 67520 + }, + { + "epoch": 0.870922187042566, + "grad_norm": 0.8962352298567156, + "learning_rate": 8.936825776634002e-06, + "loss": 0.2359, + "step": 67530 + }, + { + "epoch": 0.8710511552325619, + "grad_norm": 0.9961132387004988, + "learning_rate": 8.936363180484236e-06, + "loss": 0.2287, + "step": 67540 + }, + { + "epoch": 0.8711801234225578, + "grad_norm": 0.7864135088308752, + "learning_rate": 8.935900495694779e-06, + "loss": 0.2197, + "step": 67550 + }, + { + "epoch": 0.8713090916125538, + "grad_norm": 0.8645425441408797, + "learning_rate": 8.93543772227605e-06, + "loss": 0.2326, + "step": 67560 + }, + { + "epoch": 0.8714380598025497, + "grad_norm": 0.8848321846984449, + "learning_rate": 8.93497486023847e-06, + "loss": 0.2242, + "step": 67570 + }, + { + "epoch": 0.8715670279925456, + "grad_norm": 0.9257144771630896, + "learning_rate": 8.934511909592462e-06, + "loss": 0.2387, + "step": 67580 + }, + { + "epoch": 0.8716959961825416, + "grad_norm": 0.8647096053935286, + "learning_rate": 8.934048870348453e-06, + "loss": 0.2219, + "step": 67590 + }, + { + "epoch": 0.8718249643725375, + "grad_norm": 0.8097819343565891, + "learning_rate": 8.933585742516867e-06, + "loss": 0.229, + "step": 67600 + }, + { + "epoch": 0.8719539325625334, + "grad_norm": 0.8344698412842219, + "learning_rate": 8.933122526108131e-06, + "loss": 0.2371, + "step": 67610 + }, + { + "epoch": 0.8720829007525294, + "grad_norm": 1.0659996268972007, + "learning_rate": 8.93265922113268e-06, + "loss": 0.2196, + "step": 67620 + }, + { + "epoch": 0.8722118689425253, + "grad_norm": 0.8775144385089644, + "learning_rate": 8.932195827600944e-06, + "loss": 0.2184, + "step": 67630 + }, + { + "epoch": 0.8723408371325213, + "grad_norm": 0.8400055587530985, + "learning_rate": 8.93173234552336e-06, + "loss": 0.2132, + "step": 67640 + }, + { + "epoch": 0.8724698053225172, + "grad_norm": 0.780693961045412, + "learning_rate": 8.931268774910365e-06, + "loss": 0.2211, + "step": 67650 + }, + { + "epoch": 0.8725987735125131, + "grad_norm": 1.036172730209895, + "learning_rate": 8.930805115772393e-06, + "loss": 0.2541, + "step": 67660 + }, + { + "epoch": 0.8727277417025091, + "grad_norm": 0.7925357852991068, + "learning_rate": 8.93034136811989e-06, + "loss": 0.2248, + "step": 67670 + }, + { + "epoch": 0.872856709892505, + "grad_norm": 0.9186080185136859, + "learning_rate": 8.929877531963297e-06, + "loss": 0.2382, + "step": 67680 + }, + { + "epoch": 0.8729856780825009, + "grad_norm": 0.9053353125828705, + "learning_rate": 8.929413607313057e-06, + "loss": 0.2364, + "step": 67690 + }, + { + "epoch": 0.8731146462724969, + "grad_norm": 0.7779975573742219, + "learning_rate": 8.92894959417962e-06, + "loss": 0.233, + "step": 67700 + }, + { + "epoch": 0.8732436144624929, + "grad_norm": 0.861037433738289, + "learning_rate": 8.928485492573432e-06, + "loss": 0.2287, + "step": 67710 + }, + { + "epoch": 0.8733725826524887, + "grad_norm": 0.905430974661957, + "learning_rate": 8.928021302504946e-06, + "loss": 0.2347, + "step": 67720 + }, + { + "epoch": 0.8735015508424847, + "grad_norm": 0.8118796881341944, + "learning_rate": 8.927557023984612e-06, + "loss": 0.2284, + "step": 67730 + }, + { + "epoch": 0.8736305190324807, + "grad_norm": 0.8000674050164605, + "learning_rate": 8.927092657022887e-06, + "loss": 0.224, + "step": 67740 + }, + { + "epoch": 0.8737594872224765, + "grad_norm": 0.8953309557075912, + "learning_rate": 8.926628201630225e-06, + "loss": 0.2317, + "step": 67750 + }, + { + "epoch": 0.8738884554124725, + "grad_norm": 0.8947521860164961, + "learning_rate": 8.926163657817088e-06, + "loss": 0.2395, + "step": 67760 + }, + { + "epoch": 0.8740174236024685, + "grad_norm": 0.7832891502944207, + "learning_rate": 8.925699025593936e-06, + "loss": 0.2242, + "step": 67770 + }, + { + "epoch": 0.8741463917924643, + "grad_norm": 0.8172365601646874, + "learning_rate": 8.92523430497123e-06, + "loss": 0.2403, + "step": 67780 + }, + { + "epoch": 0.8742753599824603, + "grad_norm": 0.8566523283142696, + "learning_rate": 8.924769495959436e-06, + "loss": 0.2278, + "step": 67790 + }, + { + "epoch": 0.8744043281724563, + "grad_norm": 0.8684146613171371, + "learning_rate": 8.924304598569018e-06, + "loss": 0.2271, + "step": 67800 + }, + { + "epoch": 0.8745332963624523, + "grad_norm": 0.8590588275337941, + "learning_rate": 8.92383961281045e-06, + "loss": 0.2218, + "step": 67810 + }, + { + "epoch": 0.8746622645524481, + "grad_norm": 0.8501263149240819, + "learning_rate": 8.923374538694197e-06, + "loss": 0.2206, + "step": 67820 + }, + { + "epoch": 0.8747912327424441, + "grad_norm": 0.8306811102499184, + "learning_rate": 8.922909376230736e-06, + "loss": 0.2252, + "step": 67830 + }, + { + "epoch": 0.87492020093244, + "grad_norm": 0.8127091673122033, + "learning_rate": 8.922444125430538e-06, + "loss": 0.2038, + "step": 67840 + }, + { + "epoch": 0.8750491691224359, + "grad_norm": 0.8749537375672334, + "learning_rate": 8.921978786304082e-06, + "loss": 0.2338, + "step": 67850 + }, + { + "epoch": 0.8751781373124319, + "grad_norm": 0.9624087142161295, + "learning_rate": 8.921513358861847e-06, + "loss": 0.2352, + "step": 67860 + }, + { + "epoch": 0.8753071055024279, + "grad_norm": 0.8621929866230802, + "learning_rate": 8.92104784311431e-06, + "loss": 0.2394, + "step": 67870 + }, + { + "epoch": 0.8754360736924237, + "grad_norm": 0.7691316647293251, + "learning_rate": 8.920582239071957e-06, + "loss": 0.2274, + "step": 67880 + }, + { + "epoch": 0.8755650418824197, + "grad_norm": 0.8583951739556592, + "learning_rate": 8.920116546745273e-06, + "loss": 0.2265, + "step": 67890 + }, + { + "epoch": 0.8756940100724157, + "grad_norm": 0.9179600236827323, + "learning_rate": 8.91965076614474e-06, + "loss": 0.233, + "step": 67900 + }, + { + "epoch": 0.8758229782624116, + "grad_norm": 0.8916865590363553, + "learning_rate": 8.919184897280853e-06, + "loss": 0.2251, + "step": 67910 + }, + { + "epoch": 0.8759519464524075, + "grad_norm": 0.9556834074827597, + "learning_rate": 8.918718940164097e-06, + "loss": 0.2297, + "step": 67920 + }, + { + "epoch": 0.8760809146424035, + "grad_norm": 0.7644344528327832, + "learning_rate": 8.918252894804966e-06, + "loss": 0.2317, + "step": 67930 + }, + { + "epoch": 0.8762098828323994, + "grad_norm": 0.8614443739294041, + "learning_rate": 8.917786761213956e-06, + "loss": 0.2368, + "step": 67940 + }, + { + "epoch": 0.8763388510223953, + "grad_norm": 0.8853327631551255, + "learning_rate": 8.917320539401563e-06, + "loss": 0.2318, + "step": 67950 + }, + { + "epoch": 0.8764678192123913, + "grad_norm": 0.8412183441244506, + "learning_rate": 8.916854229378284e-06, + "loss": 0.2217, + "step": 67960 + }, + { + "epoch": 0.8765967874023872, + "grad_norm": 0.87348542211233, + "learning_rate": 8.916387831154623e-06, + "loss": 0.2421, + "step": 67970 + }, + { + "epoch": 0.8767257555923832, + "grad_norm": 0.8421737335187858, + "learning_rate": 8.915921344741076e-06, + "loss": 0.2075, + "step": 67980 + }, + { + "epoch": 0.876854723782379, + "grad_norm": 0.8571358649469034, + "learning_rate": 8.915454770148155e-06, + "loss": 0.2218, + "step": 67990 + }, + { + "epoch": 0.876983691972375, + "grad_norm": 0.7988012786377048, + "learning_rate": 8.91498810738636e-06, + "loss": 0.23, + "step": 68000 + }, + { + "epoch": 0.877112660162371, + "grad_norm": 0.8985686237895995, + "learning_rate": 8.914521356466204e-06, + "loss": 0.2278, + "step": 68010 + }, + { + "epoch": 0.8772416283523669, + "grad_norm": 0.8459222482429307, + "learning_rate": 8.914054517398194e-06, + "loss": 0.2246, + "step": 68020 + }, + { + "epoch": 0.8773705965423628, + "grad_norm": 0.8850319473866233, + "learning_rate": 8.913587590192845e-06, + "loss": 0.2315, + "step": 68030 + }, + { + "epoch": 0.8774995647323588, + "grad_norm": 0.746477535463042, + "learning_rate": 8.913120574860669e-06, + "loss": 0.2202, + "step": 68040 + }, + { + "epoch": 0.8776285329223547, + "grad_norm": 0.8336144902833856, + "learning_rate": 8.912653471412185e-06, + "loss": 0.2382, + "step": 68050 + }, + { + "epoch": 0.8777575011123506, + "grad_norm": 0.8942215966723798, + "learning_rate": 8.91218627985791e-06, + "loss": 0.2298, + "step": 68060 + }, + { + "epoch": 0.8778864693023466, + "grad_norm": 0.8251631682938613, + "learning_rate": 8.911719000208363e-06, + "loss": 0.2348, + "step": 68070 + }, + { + "epoch": 0.8780154374923426, + "grad_norm": 0.7889817807620377, + "learning_rate": 8.911251632474069e-06, + "loss": 0.2212, + "step": 68080 + }, + { + "epoch": 0.8781444056823384, + "grad_norm": 0.905537956389108, + "learning_rate": 8.910784176665547e-06, + "loss": 0.2205, + "step": 68090 + }, + { + "epoch": 0.8782733738723344, + "grad_norm": 0.8850354568450108, + "learning_rate": 8.910316632793331e-06, + "loss": 0.2316, + "step": 68100 + }, + { + "epoch": 0.8784023420623304, + "grad_norm": 0.8230452658363969, + "learning_rate": 8.909849000867944e-06, + "loss": 0.2307, + "step": 68110 + }, + { + "epoch": 0.8785313102523262, + "grad_norm": 0.8358972704729907, + "learning_rate": 8.909381280899916e-06, + "loss": 0.2201, + "step": 68120 + }, + { + "epoch": 0.8786602784423222, + "grad_norm": 0.8674085835032558, + "learning_rate": 8.908913472899783e-06, + "loss": 0.2537, + "step": 68130 + }, + { + "epoch": 0.8787892466323182, + "grad_norm": 0.8273883748454524, + "learning_rate": 8.908445576878076e-06, + "loss": 0.2394, + "step": 68140 + }, + { + "epoch": 0.878918214822314, + "grad_norm": 0.9024080824067522, + "learning_rate": 8.90797759284533e-06, + "loss": 0.2318, + "step": 68150 + }, + { + "epoch": 0.87904718301231, + "grad_norm": 0.8433386360402445, + "learning_rate": 8.907509520812087e-06, + "loss": 0.2206, + "step": 68160 + }, + { + "epoch": 0.879176151202306, + "grad_norm": 0.8324356388637383, + "learning_rate": 8.907041360788885e-06, + "loss": 0.2157, + "step": 68170 + }, + { + "epoch": 0.8793051193923019, + "grad_norm": 0.854276854021911, + "learning_rate": 8.906573112786265e-06, + "loss": 0.2218, + "step": 68180 + }, + { + "epoch": 0.8794340875822978, + "grad_norm": 0.7538619186955583, + "learning_rate": 8.906104776814775e-06, + "loss": 0.2234, + "step": 68190 + }, + { + "epoch": 0.8795630557722938, + "grad_norm": 0.8056109568699755, + "learning_rate": 8.905636352884956e-06, + "loss": 0.2399, + "step": 68200 + }, + { + "epoch": 0.8796920239622897, + "grad_norm": 0.8965317943464713, + "learning_rate": 8.905167841007359e-06, + "loss": 0.2343, + "step": 68210 + }, + { + "epoch": 0.8798209921522856, + "grad_norm": 0.8927582048913055, + "learning_rate": 8.904699241192534e-06, + "loss": 0.2219, + "step": 68220 + }, + { + "epoch": 0.8799499603422816, + "grad_norm": 0.8066295616004066, + "learning_rate": 8.904230553451032e-06, + "loss": 0.2318, + "step": 68230 + }, + { + "epoch": 0.8800789285322775, + "grad_norm": 0.8829204408603063, + "learning_rate": 8.903761777793409e-06, + "loss": 0.2272, + "step": 68240 + }, + { + "epoch": 0.8802078967222734, + "grad_norm": 0.7818733803981025, + "learning_rate": 8.903292914230219e-06, + "loss": 0.2227, + "step": 68250 + }, + { + "epoch": 0.8803368649122694, + "grad_norm": 0.8222435211335298, + "learning_rate": 8.902823962772019e-06, + "loss": 0.2264, + "step": 68260 + }, + { + "epoch": 0.8804658331022653, + "grad_norm": 0.8008121180217669, + "learning_rate": 8.902354923429372e-06, + "loss": 0.2358, + "step": 68270 + }, + { + "epoch": 0.8805948012922613, + "grad_norm": 0.8708813055575368, + "learning_rate": 8.901885796212836e-06, + "loss": 0.2277, + "step": 68280 + }, + { + "epoch": 0.8807237694822572, + "grad_norm": 0.8415752394170322, + "learning_rate": 8.90141658113298e-06, + "loss": 0.2168, + "step": 68290 + }, + { + "epoch": 0.8808527376722531, + "grad_norm": 0.874238512615207, + "learning_rate": 8.900947278200365e-06, + "loss": 0.2315, + "step": 68300 + }, + { + "epoch": 0.8809817058622491, + "grad_norm": 0.8737170000037634, + "learning_rate": 8.900477887425563e-06, + "loss": 0.2244, + "step": 68310 + }, + { + "epoch": 0.881110674052245, + "grad_norm": 0.8733707761205188, + "learning_rate": 8.90000840881914e-06, + "loss": 0.2321, + "step": 68320 + }, + { + "epoch": 0.8812396422422409, + "grad_norm": 0.8560441613325813, + "learning_rate": 8.89953884239167e-06, + "loss": 0.2312, + "step": 68330 + }, + { + "epoch": 0.8813686104322369, + "grad_norm": 0.897658228829426, + "learning_rate": 8.899069188153727e-06, + "loss": 0.2262, + "step": 68340 + }, + { + "epoch": 0.8814975786222329, + "grad_norm": 0.8411931265214115, + "learning_rate": 8.898599446115886e-06, + "loss": 0.2236, + "step": 68350 + }, + { + "epoch": 0.8816265468122287, + "grad_norm": 0.86526377135128, + "learning_rate": 8.898129616288722e-06, + "loss": 0.2352, + "step": 68360 + }, + { + "epoch": 0.8817555150022247, + "grad_norm": 0.9571267374157132, + "learning_rate": 8.897659698682822e-06, + "loss": 0.2245, + "step": 68370 + }, + { + "epoch": 0.8818844831922207, + "grad_norm": 0.9223514373984527, + "learning_rate": 8.89718969330876e-06, + "loss": 0.2229, + "step": 68380 + }, + { + "epoch": 0.8820134513822165, + "grad_norm": 0.7782390144748902, + "learning_rate": 8.896719600177122e-06, + "loss": 0.2183, + "step": 68390 + }, + { + "epoch": 0.8821424195722125, + "grad_norm": 0.8810702054321282, + "learning_rate": 8.896249419298497e-06, + "loss": 0.2318, + "step": 68400 + }, + { + "epoch": 0.8822713877622085, + "grad_norm": 0.9157164796269126, + "learning_rate": 8.89577915068347e-06, + "loss": 0.2199, + "step": 68410 + }, + { + "epoch": 0.8824003559522043, + "grad_norm": 0.8862577191617934, + "learning_rate": 8.895308794342627e-06, + "loss": 0.2261, + "step": 68420 + }, + { + "epoch": 0.8825293241422003, + "grad_norm": 0.8381408576143289, + "learning_rate": 8.894838350286568e-06, + "loss": 0.2098, + "step": 68430 + }, + { + "epoch": 0.8826582923321963, + "grad_norm": 0.812539964246255, + "learning_rate": 8.89436781852588e-06, + "loss": 0.2362, + "step": 68440 + }, + { + "epoch": 0.8827872605221923, + "grad_norm": 0.847658758284067, + "learning_rate": 8.89389719907116e-06, + "loss": 0.2287, + "step": 68450 + }, + { + "epoch": 0.8829162287121881, + "grad_norm": 0.73969071352747, + "learning_rate": 8.893426491933005e-06, + "loss": 0.2108, + "step": 68460 + }, + { + "epoch": 0.8830451969021841, + "grad_norm": 0.9084606061015877, + "learning_rate": 8.892955697122015e-06, + "loss": 0.2204, + "step": 68470 + }, + { + "epoch": 0.88317416509218, + "grad_norm": 0.8431891459683006, + "learning_rate": 8.892484814648794e-06, + "loss": 0.2268, + "step": 68480 + }, + { + "epoch": 0.8833031332821759, + "grad_norm": 0.9514927834539468, + "learning_rate": 8.892013844523942e-06, + "loss": 0.2467, + "step": 68490 + }, + { + "epoch": 0.8834321014721719, + "grad_norm": 1.0021001332280892, + "learning_rate": 8.891542786758063e-06, + "loss": 0.2236, + "step": 68500 + }, + { + "epoch": 0.8835610696621679, + "grad_norm": 0.8301396820683417, + "learning_rate": 8.89107164136177e-06, + "loss": 0.2256, + "step": 68510 + }, + { + "epoch": 0.8836900378521637, + "grad_norm": 0.8341573750954465, + "learning_rate": 8.890600408345669e-06, + "loss": 0.2269, + "step": 68520 + }, + { + "epoch": 0.8838190060421597, + "grad_norm": 0.8463090917052206, + "learning_rate": 8.89012908772037e-06, + "loss": 0.23, + "step": 68530 + }, + { + "epoch": 0.8839479742321557, + "grad_norm": 0.8068042018863735, + "learning_rate": 8.889657679496488e-06, + "loss": 0.2238, + "step": 68540 + }, + { + "epoch": 0.8840769424221516, + "grad_norm": 0.8833260762426027, + "learning_rate": 8.889186183684639e-06, + "loss": 0.2283, + "step": 68550 + }, + { + "epoch": 0.8842059106121475, + "grad_norm": 0.8201257466302758, + "learning_rate": 8.888714600295438e-06, + "loss": 0.2185, + "step": 68560 + }, + { + "epoch": 0.8843348788021435, + "grad_norm": 0.8158962298844654, + "learning_rate": 8.888242929339505e-06, + "loss": 0.2241, + "step": 68570 + }, + { + "epoch": 0.8844638469921394, + "grad_norm": 0.875088500271753, + "learning_rate": 8.887771170827462e-06, + "loss": 0.2288, + "step": 68580 + }, + { + "epoch": 0.8845928151821353, + "grad_norm": 0.8092080173694114, + "learning_rate": 8.887299324769933e-06, + "loss": 0.2413, + "step": 68590 + }, + { + "epoch": 0.8847217833721313, + "grad_norm": 0.9557281014413209, + "learning_rate": 8.886827391177542e-06, + "loss": 0.2286, + "step": 68600 + }, + { + "epoch": 0.8848507515621272, + "grad_norm": 0.8871353060761077, + "learning_rate": 8.886355370060913e-06, + "loss": 0.2365, + "step": 68610 + }, + { + "epoch": 0.8849797197521231, + "grad_norm": 0.8029105319836755, + "learning_rate": 8.88588326143068e-06, + "loss": 0.2227, + "step": 68620 + }, + { + "epoch": 0.885108687942119, + "grad_norm": 0.8507661507411282, + "learning_rate": 8.885411065297472e-06, + "loss": 0.225, + "step": 68630 + }, + { + "epoch": 0.885237656132115, + "grad_norm": 0.8793887424310539, + "learning_rate": 8.884938781671922e-06, + "loss": 0.2233, + "step": 68640 + }, + { + "epoch": 0.885366624322111, + "grad_norm": 0.8008931567120715, + "learning_rate": 8.884466410564665e-06, + "loss": 0.2289, + "step": 68650 + }, + { + "epoch": 0.8854955925121069, + "grad_norm": 0.8909806947320599, + "learning_rate": 8.883993951986336e-06, + "loss": 0.2363, + "step": 68660 + }, + { + "epoch": 0.8856245607021028, + "grad_norm": 0.8487278479090028, + "learning_rate": 8.883521405947577e-06, + "loss": 0.2284, + "step": 68670 + }, + { + "epoch": 0.8857535288920988, + "grad_norm": 0.8617450444687668, + "learning_rate": 8.883048772459029e-06, + "loss": 0.2219, + "step": 68680 + }, + { + "epoch": 0.8858824970820947, + "grad_norm": 0.8793095121328729, + "learning_rate": 8.882576051531333e-06, + "loss": 0.2237, + "step": 68690 + }, + { + "epoch": 0.8860114652720906, + "grad_norm": 0.7943848919441061, + "learning_rate": 8.882103243175133e-06, + "loss": 0.2292, + "step": 68700 + }, + { + "epoch": 0.8861404334620866, + "grad_norm": 0.8637172146971843, + "learning_rate": 8.881630347401078e-06, + "loss": 0.2365, + "step": 68710 + }, + { + "epoch": 0.8862694016520826, + "grad_norm": 0.9291679925277134, + "learning_rate": 8.881157364219815e-06, + "loss": 0.2291, + "step": 68720 + }, + { + "epoch": 0.8863983698420784, + "grad_norm": 0.8360029176676792, + "learning_rate": 8.880684293641995e-06, + "loss": 0.2263, + "step": 68730 + }, + { + "epoch": 0.8865273380320744, + "grad_norm": 0.87079846081665, + "learning_rate": 8.880211135678271e-06, + "loss": 0.2271, + "step": 68740 + }, + { + "epoch": 0.8866563062220704, + "grad_norm": 0.8694079112955039, + "learning_rate": 8.879737890339301e-06, + "loss": 0.2298, + "step": 68750 + }, + { + "epoch": 0.8867852744120662, + "grad_norm": 0.8401821587584988, + "learning_rate": 8.879264557635736e-06, + "loss": 0.2159, + "step": 68760 + }, + { + "epoch": 0.8869142426020622, + "grad_norm": 0.843971330281986, + "learning_rate": 8.878791137578238e-06, + "loss": 0.2283, + "step": 68770 + }, + { + "epoch": 0.8870432107920582, + "grad_norm": 0.90201881976367, + "learning_rate": 8.878317630177465e-06, + "loss": 0.2282, + "step": 68780 + }, + { + "epoch": 0.887172178982054, + "grad_norm": 0.9075741193633772, + "learning_rate": 8.877844035444084e-06, + "loss": 0.223, + "step": 68790 + }, + { + "epoch": 0.88730114717205, + "grad_norm": 0.7844658216813148, + "learning_rate": 8.877370353388753e-06, + "loss": 0.2216, + "step": 68800 + }, + { + "epoch": 0.887430115362046, + "grad_norm": 1.0547518137854712, + "learning_rate": 8.876896584022143e-06, + "loss": 0.2323, + "step": 68810 + }, + { + "epoch": 0.8875590835520419, + "grad_norm": 0.8999187890764809, + "learning_rate": 8.876422727354922e-06, + "loss": 0.2237, + "step": 68820 + }, + { + "epoch": 0.8876880517420378, + "grad_norm": 0.9288873901889679, + "learning_rate": 8.875948783397761e-06, + "loss": 0.2172, + "step": 68830 + }, + { + "epoch": 0.8878170199320338, + "grad_norm": 0.8315305105539588, + "learning_rate": 8.875474752161331e-06, + "loss": 0.2221, + "step": 68840 + }, + { + "epoch": 0.8879459881220297, + "grad_norm": 0.949637897507053, + "learning_rate": 8.875000633656305e-06, + "loss": 0.2253, + "step": 68850 + }, + { + "epoch": 0.8880749563120256, + "grad_norm": 0.8459124608741421, + "learning_rate": 8.874526427893361e-06, + "loss": 0.2233, + "step": 68860 + }, + { + "epoch": 0.8882039245020216, + "grad_norm": 0.8691303227778101, + "learning_rate": 8.87405213488318e-06, + "loss": 0.2216, + "step": 68870 + }, + { + "epoch": 0.8883328926920175, + "grad_norm": 0.8494186997893309, + "learning_rate": 8.873577754636435e-06, + "loss": 0.2168, + "step": 68880 + }, + { + "epoch": 0.8884618608820134, + "grad_norm": 0.7798500402823407, + "learning_rate": 8.873103287163815e-06, + "loss": 0.2302, + "step": 68890 + }, + { + "epoch": 0.8885908290720094, + "grad_norm": 0.9699336602132236, + "learning_rate": 8.872628732476002e-06, + "loss": 0.2274, + "step": 68900 + }, + { + "epoch": 0.8887197972620053, + "grad_norm": 1.0490633318949307, + "learning_rate": 8.87215409058368e-06, + "loss": 0.2399, + "step": 68910 + }, + { + "epoch": 0.8888487654520013, + "grad_norm": 0.8091638508083009, + "learning_rate": 8.87167936149754e-06, + "loss": 0.2279, + "step": 68920 + }, + { + "epoch": 0.8889777336419972, + "grad_norm": 0.926696751622804, + "learning_rate": 8.871204545228271e-06, + "loss": 0.2273, + "step": 68930 + }, + { + "epoch": 0.8891067018319931, + "grad_norm": 0.789592413335299, + "learning_rate": 8.870729641786565e-06, + "loss": 0.2143, + "step": 68940 + }, + { + "epoch": 0.8892356700219891, + "grad_norm": 0.8745440013210439, + "learning_rate": 8.870254651183114e-06, + "loss": 0.2275, + "step": 68950 + }, + { + "epoch": 0.889364638211985, + "grad_norm": 1.008727065104507, + "learning_rate": 8.869779573428615e-06, + "loss": 0.2403, + "step": 68960 + }, + { + "epoch": 0.8894936064019809, + "grad_norm": 0.8527616636269023, + "learning_rate": 8.86930440853377e-06, + "loss": 0.2264, + "step": 68970 + }, + { + "epoch": 0.8896225745919769, + "grad_norm": 0.9011341138394992, + "learning_rate": 8.868829156509271e-06, + "loss": 0.22, + "step": 68980 + }, + { + "epoch": 0.8897515427819729, + "grad_norm": 0.8144450226630864, + "learning_rate": 8.868353817365826e-06, + "loss": 0.2257, + "step": 68990 + }, + { + "epoch": 0.8898805109719687, + "grad_norm": 0.9052550104722601, + "learning_rate": 8.867878391114138e-06, + "loss": 0.2191, + "step": 69000 + }, + { + "epoch": 0.8900094791619647, + "grad_norm": 1.0433921167238573, + "learning_rate": 8.867402877764912e-06, + "loss": 0.2313, + "step": 69010 + }, + { + "epoch": 0.8901384473519607, + "grad_norm": 0.8272670004633051, + "learning_rate": 8.866927277328853e-06, + "loss": 0.2213, + "step": 69020 + }, + { + "epoch": 0.8902674155419565, + "grad_norm": 0.7855419234413664, + "learning_rate": 8.866451589816676e-06, + "loss": 0.229, + "step": 69030 + }, + { + "epoch": 0.8903963837319525, + "grad_norm": 0.8780853689926346, + "learning_rate": 8.865975815239087e-06, + "loss": 0.2216, + "step": 69040 + }, + { + "epoch": 0.8905253519219485, + "grad_norm": 0.8488102238700085, + "learning_rate": 8.865499953606804e-06, + "loss": 0.2193, + "step": 69050 + }, + { + "epoch": 0.8906543201119443, + "grad_norm": 0.885685712726132, + "learning_rate": 8.86502400493054e-06, + "loss": 0.2255, + "step": 69060 + }, + { + "epoch": 0.8907832883019403, + "grad_norm": 0.815243016849173, + "learning_rate": 8.864547969221014e-06, + "loss": 0.2277, + "step": 69070 + }, + { + "epoch": 0.8909122564919363, + "grad_norm": 0.9178371251926114, + "learning_rate": 8.864071846488943e-06, + "loss": 0.236, + "step": 69080 + }, + { + "epoch": 0.8910412246819323, + "grad_norm": 0.7846848729655226, + "learning_rate": 8.86359563674505e-06, + "loss": 0.2237, + "step": 69090 + }, + { + "epoch": 0.8911701928719281, + "grad_norm": 0.8647631821217311, + "learning_rate": 8.86311934000006e-06, + "loss": 0.216, + "step": 69100 + }, + { + "epoch": 0.8912991610619241, + "grad_norm": 0.8381367821195052, + "learning_rate": 8.862642956264697e-06, + "loss": 0.2173, + "step": 69110 + }, + { + "epoch": 0.89142812925192, + "grad_norm": 0.8147864493921875, + "learning_rate": 8.862166485549687e-06, + "loss": 0.2186, + "step": 69120 + }, + { + "epoch": 0.8915570974419159, + "grad_norm": 0.8496680149950756, + "learning_rate": 8.861689927865759e-06, + "loss": 0.2197, + "step": 69130 + }, + { + "epoch": 0.8916860656319119, + "grad_norm": 0.8942887347093158, + "learning_rate": 8.861213283223648e-06, + "loss": 0.2284, + "step": 69140 + }, + { + "epoch": 0.8918150338219079, + "grad_norm": 0.9027813415655567, + "learning_rate": 8.860736551634084e-06, + "loss": 0.2326, + "step": 69150 + }, + { + "epoch": 0.8919440020119037, + "grad_norm": 0.7973020101976113, + "learning_rate": 8.860259733107804e-06, + "loss": 0.2213, + "step": 69160 + }, + { + "epoch": 0.8920729702018997, + "grad_norm": 0.8208536502645863, + "learning_rate": 8.85978282765554e-06, + "loss": 0.2285, + "step": 69170 + }, + { + "epoch": 0.8922019383918957, + "grad_norm": 0.8603095402917326, + "learning_rate": 8.85930583528804e-06, + "loss": 0.2278, + "step": 69180 + }, + { + "epoch": 0.8923309065818916, + "grad_norm": 0.8336146422019374, + "learning_rate": 8.858828756016036e-06, + "loss": 0.2208, + "step": 69190 + }, + { + "epoch": 0.8924598747718875, + "grad_norm": 0.768563832128045, + "learning_rate": 8.858351589850276e-06, + "loss": 0.2348, + "step": 69200 + }, + { + "epoch": 0.8925888429618835, + "grad_norm": 0.8683728725088414, + "learning_rate": 8.857874336801504e-06, + "loss": 0.2171, + "step": 69210 + }, + { + "epoch": 0.8927178111518794, + "grad_norm": 0.855197168080272, + "learning_rate": 8.857396996880467e-06, + "loss": 0.2188, + "step": 69220 + }, + { + "epoch": 0.8928467793418753, + "grad_norm": 0.7675974264026494, + "learning_rate": 8.856919570097914e-06, + "loss": 0.2274, + "step": 69230 + }, + { + "epoch": 0.8929757475318713, + "grad_norm": 0.9267010393105582, + "learning_rate": 8.856442056464594e-06, + "loss": 0.2292, + "step": 69240 + }, + { + "epoch": 0.8931047157218672, + "grad_norm": 0.8015139620924185, + "learning_rate": 8.855964455991262e-06, + "loss": 0.2215, + "step": 69250 + }, + { + "epoch": 0.8932336839118631, + "grad_norm": 0.8415616297767455, + "learning_rate": 8.85548676868867e-06, + "loss": 0.2144, + "step": 69260 + }, + { + "epoch": 0.8933626521018591, + "grad_norm": 0.9876379679494703, + "learning_rate": 8.855008994567578e-06, + "loss": 0.2362, + "step": 69270 + }, + { + "epoch": 0.893491620291855, + "grad_norm": 0.8542135533490545, + "learning_rate": 8.854531133638742e-06, + "loss": 0.2379, + "step": 69280 + }, + { + "epoch": 0.893620588481851, + "grad_norm": 0.8197095421477576, + "learning_rate": 8.854053185912922e-06, + "loss": 0.2238, + "step": 69290 + }, + { + "epoch": 0.8937495566718469, + "grad_norm": 0.9232650694487604, + "learning_rate": 8.853575151400886e-06, + "loss": 0.2246, + "step": 69300 + }, + { + "epoch": 0.8938785248618428, + "grad_norm": 0.8929441109133037, + "learning_rate": 8.853097030113392e-06, + "loss": 0.2259, + "step": 69310 + }, + { + "epoch": 0.8940074930518388, + "grad_norm": 0.8792953260094525, + "learning_rate": 8.85261882206121e-06, + "loss": 0.2312, + "step": 69320 + }, + { + "epoch": 0.8941364612418347, + "grad_norm": 1.0051489413163313, + "learning_rate": 8.852140527255105e-06, + "loss": 0.2288, + "step": 69330 + }, + { + "epoch": 0.8942654294318306, + "grad_norm": 0.8742993506722282, + "learning_rate": 8.85166214570585e-06, + "loss": 0.2231, + "step": 69340 + }, + { + "epoch": 0.8943943976218266, + "grad_norm": 0.8387272638792888, + "learning_rate": 8.85118367742422e-06, + "loss": 0.2418, + "step": 69350 + }, + { + "epoch": 0.8945233658118226, + "grad_norm": 0.9413755980991919, + "learning_rate": 8.850705122420981e-06, + "loss": 0.2248, + "step": 69360 + }, + { + "epoch": 0.8946523340018184, + "grad_norm": 0.8643998055806567, + "learning_rate": 8.850226480706918e-06, + "loss": 0.2192, + "step": 69370 + }, + { + "epoch": 0.8947813021918144, + "grad_norm": 0.8351288901172705, + "learning_rate": 8.849747752292805e-06, + "loss": 0.2295, + "step": 69380 + }, + { + "epoch": 0.8949102703818104, + "grad_norm": 0.8873064341225575, + "learning_rate": 8.84926893718942e-06, + "loss": 0.227, + "step": 69390 + }, + { + "epoch": 0.8950392385718062, + "grad_norm": 0.8472679730908516, + "learning_rate": 8.848790035407551e-06, + "loss": 0.2356, + "step": 69400 + }, + { + "epoch": 0.8951682067618022, + "grad_norm": 0.8285857137429331, + "learning_rate": 8.848311046957977e-06, + "loss": 0.2369, + "step": 69410 + }, + { + "epoch": 0.8952971749517982, + "grad_norm": 0.8209107742937279, + "learning_rate": 8.847831971851484e-06, + "loss": 0.2104, + "step": 69420 + }, + { + "epoch": 0.895426143141794, + "grad_norm": 0.8147884999628133, + "learning_rate": 8.847352810098863e-06, + "loss": 0.2308, + "step": 69430 + }, + { + "epoch": 0.89555511133179, + "grad_norm": 0.8418151350938871, + "learning_rate": 8.846873561710901e-06, + "loss": 0.2195, + "step": 69440 + }, + { + "epoch": 0.895684079521786, + "grad_norm": 0.833466755721098, + "learning_rate": 8.846394226698391e-06, + "loss": 0.2201, + "step": 69450 + }, + { + "epoch": 0.8958130477117819, + "grad_norm": 0.8431049399189852, + "learning_rate": 8.845914805072129e-06, + "loss": 0.2172, + "step": 69460 + }, + { + "epoch": 0.8959420159017778, + "grad_norm": 0.8902391463460171, + "learning_rate": 8.845435296842906e-06, + "loss": 0.2229, + "step": 69470 + }, + { + "epoch": 0.8960709840917738, + "grad_norm": 0.8327037397370574, + "learning_rate": 8.844955702021523e-06, + "loss": 0.227, + "step": 69480 + }, + { + "epoch": 0.8961999522817697, + "grad_norm": 0.799647543936809, + "learning_rate": 8.844476020618777e-06, + "loss": 0.2208, + "step": 69490 + }, + { + "epoch": 0.8963289204717656, + "grad_norm": 0.9460429067221934, + "learning_rate": 8.843996252645474e-06, + "loss": 0.2344, + "step": 69500 + }, + { + "epoch": 0.8964578886617616, + "grad_norm": 0.8891944448105505, + "learning_rate": 8.843516398112412e-06, + "loss": 0.2362, + "step": 69510 + }, + { + "epoch": 0.8965868568517575, + "grad_norm": 0.8435193889194597, + "learning_rate": 8.843036457030401e-06, + "loss": 0.2306, + "step": 69520 + }, + { + "epoch": 0.8967158250417534, + "grad_norm": 0.9029755843893349, + "learning_rate": 8.842556429410246e-06, + "loss": 0.2285, + "step": 69530 + }, + { + "epoch": 0.8968447932317494, + "grad_norm": 0.9288442875671203, + "learning_rate": 8.842076315262758e-06, + "loss": 0.2188, + "step": 69540 + }, + { + "epoch": 0.8969737614217453, + "grad_norm": 0.9935786749849009, + "learning_rate": 8.841596114598746e-06, + "loss": 0.2292, + "step": 69550 + }, + { + "epoch": 0.8971027296117413, + "grad_norm": 0.8859347346065107, + "learning_rate": 8.841115827429024e-06, + "loss": 0.2315, + "step": 69560 + }, + { + "epoch": 0.8972316978017372, + "grad_norm": 0.7627302979911124, + "learning_rate": 8.840635453764409e-06, + "loss": 0.2218, + "step": 69570 + }, + { + "epoch": 0.8973606659917331, + "grad_norm": 0.8106712687609302, + "learning_rate": 8.840154993615715e-06, + "loss": 0.2349, + "step": 69580 + }, + { + "epoch": 0.8974896341817291, + "grad_norm": 0.8772884046616994, + "learning_rate": 8.839674446993762e-06, + "loss": 0.2342, + "step": 69590 + }, + { + "epoch": 0.897618602371725, + "grad_norm": 0.8458673010363049, + "learning_rate": 8.839193813909374e-06, + "loss": 0.2267, + "step": 69600 + }, + { + "epoch": 0.8977475705617209, + "grad_norm": 0.888171204458306, + "learning_rate": 8.838713094373371e-06, + "loss": 0.2296, + "step": 69610 + }, + { + "epoch": 0.8978765387517169, + "grad_norm": 0.8786442396051912, + "learning_rate": 8.83823228839658e-06, + "loss": 0.2206, + "step": 69620 + }, + { + "epoch": 0.8980055069417128, + "grad_norm": 0.8123350050388246, + "learning_rate": 8.837751395989824e-06, + "loss": 0.2215, + "step": 69630 + }, + { + "epoch": 0.8981344751317087, + "grad_norm": 0.8733231882429937, + "learning_rate": 8.837270417163937e-06, + "loss": 0.2311, + "step": 69640 + }, + { + "epoch": 0.8982634433217047, + "grad_norm": 0.9165738379771249, + "learning_rate": 8.836789351929746e-06, + "loss": 0.22, + "step": 69650 + }, + { + "epoch": 0.8983924115117007, + "grad_norm": 0.7596371920936885, + "learning_rate": 8.836308200298085e-06, + "loss": 0.2253, + "step": 69660 + }, + { + "epoch": 0.8985213797016965, + "grad_norm": 0.7945655013103936, + "learning_rate": 8.835826962279788e-06, + "loss": 0.2229, + "step": 69670 + }, + { + "epoch": 0.8986503478916925, + "grad_norm": 0.8616589199940724, + "learning_rate": 8.835345637885693e-06, + "loss": 0.2325, + "step": 69680 + }, + { + "epoch": 0.8987793160816885, + "grad_norm": 0.7809967541142088, + "learning_rate": 8.834864227126639e-06, + "loss": 0.2281, + "step": 69690 + }, + { + "epoch": 0.8989082842716843, + "grad_norm": 0.8614067424011094, + "learning_rate": 8.834382730013462e-06, + "loss": 0.2305, + "step": 69700 + }, + { + "epoch": 0.8990372524616803, + "grad_norm": 0.7625129761990289, + "learning_rate": 8.83390114655701e-06, + "loss": 0.221, + "step": 69710 + }, + { + "epoch": 0.8991662206516763, + "grad_norm": 0.8339001971465021, + "learning_rate": 8.833419476768123e-06, + "loss": 0.2275, + "step": 69720 + }, + { + "epoch": 0.8992951888416723, + "grad_norm": 0.9361837698268036, + "learning_rate": 8.832937720657652e-06, + "loss": 0.2315, + "step": 69730 + }, + { + "epoch": 0.8994241570316681, + "grad_norm": 0.8890048487898228, + "learning_rate": 8.83245587823644e-06, + "loss": 0.2152, + "step": 69740 + }, + { + "epoch": 0.8995531252216641, + "grad_norm": 0.9311850529372036, + "learning_rate": 8.831973949515342e-06, + "loss": 0.2188, + "step": 69750 + }, + { + "epoch": 0.89968209341166, + "grad_norm": 0.7757785891807512, + "learning_rate": 8.831491934505207e-06, + "loss": 0.217, + "step": 69760 + }, + { + "epoch": 0.8998110616016559, + "grad_norm": 0.9167880966712986, + "learning_rate": 8.83100983321689e-06, + "loss": 0.2249, + "step": 69770 + }, + { + "epoch": 0.8999400297916519, + "grad_norm": 0.8007358123217669, + "learning_rate": 8.830527645661247e-06, + "loss": 0.2102, + "step": 69780 + }, + { + "epoch": 0.9000689979816479, + "grad_norm": 0.8333826294665766, + "learning_rate": 8.830045371849136e-06, + "loss": 0.223, + "step": 69790 + }, + { + "epoch": 0.9001979661716437, + "grad_norm": 0.7934674272581813, + "learning_rate": 8.829563011791417e-06, + "loss": 0.2366, + "step": 69800 + }, + { + "epoch": 0.9003269343616397, + "grad_norm": 0.8616560613519906, + "learning_rate": 8.829080565498952e-06, + "loss": 0.2238, + "step": 69810 + }, + { + "epoch": 0.9004559025516357, + "grad_norm": 0.836601836170582, + "learning_rate": 8.828598032982604e-06, + "loss": 0.2206, + "step": 69820 + }, + { + "epoch": 0.9005848707416316, + "grad_norm": 0.839991261106724, + "learning_rate": 8.82811541425324e-06, + "loss": 0.2256, + "step": 69830 + }, + { + "epoch": 0.9007138389316275, + "grad_norm": 0.9530810854979181, + "learning_rate": 8.827632709321727e-06, + "loss": 0.2401, + "step": 69840 + }, + { + "epoch": 0.9008428071216235, + "grad_norm": 0.8829193764069079, + "learning_rate": 8.827149918198936e-06, + "loss": 0.2182, + "step": 69850 + }, + { + "epoch": 0.9009717753116194, + "grad_norm": 0.8131659984197603, + "learning_rate": 8.826667040895736e-06, + "loss": 0.2323, + "step": 69860 + }, + { + "epoch": 0.9011007435016153, + "grad_norm": 0.7902383363896005, + "learning_rate": 8.826184077423e-06, + "loss": 0.2168, + "step": 69870 + }, + { + "epoch": 0.9012297116916113, + "grad_norm": 0.864386554604124, + "learning_rate": 8.825701027791609e-06, + "loss": 0.2324, + "step": 69880 + }, + { + "epoch": 0.9013586798816072, + "grad_norm": 0.8713279718654353, + "learning_rate": 8.825217892012434e-06, + "loss": 0.2416, + "step": 69890 + }, + { + "epoch": 0.9014876480716031, + "grad_norm": 0.9484461939912464, + "learning_rate": 8.824734670096357e-06, + "loss": 0.2226, + "step": 69900 + }, + { + "epoch": 0.9016166162615991, + "grad_norm": 0.9283416490572278, + "learning_rate": 8.82425136205426e-06, + "loss": 0.2202, + "step": 69910 + }, + { + "epoch": 0.901745584451595, + "grad_norm": 0.8293041769759076, + "learning_rate": 8.823767967897027e-06, + "loss": 0.2286, + "step": 69920 + }, + { + "epoch": 0.901874552641591, + "grad_norm": 0.9109542996034786, + "learning_rate": 8.82328448763554e-06, + "loss": 0.2251, + "step": 69930 + }, + { + "epoch": 0.9020035208315869, + "grad_norm": 0.9046005634668662, + "learning_rate": 8.822800921280687e-06, + "loss": 0.2365, + "step": 69940 + }, + { + "epoch": 0.9021324890215828, + "grad_norm": 0.9621725778176288, + "learning_rate": 8.822317268843358e-06, + "loss": 0.221, + "step": 69950 + }, + { + "epoch": 0.9022614572115788, + "grad_norm": 0.8852395321831749, + "learning_rate": 8.821833530334444e-06, + "loss": 0.2437, + "step": 69960 + }, + { + "epoch": 0.9023904254015747, + "grad_norm": 0.7970704065030032, + "learning_rate": 8.821349705764838e-06, + "loss": 0.2252, + "step": 69970 + }, + { + "epoch": 0.9025193935915706, + "grad_norm": 0.8689309267438797, + "learning_rate": 8.820865795145435e-06, + "loss": 0.2282, + "step": 69980 + }, + { + "epoch": 0.9026483617815666, + "grad_norm": 0.8112214281571338, + "learning_rate": 8.820381798487132e-06, + "loss": 0.2209, + "step": 69990 + }, + { + "epoch": 0.9027773299715625, + "grad_norm": 0.9306778563261716, + "learning_rate": 8.819897715800825e-06, + "loss": 0.2318, + "step": 70000 + }, + { + "epoch": 0.9029062981615584, + "grad_norm": 0.7939167766445846, + "learning_rate": 8.819413547097418e-06, + "loss": 0.2336, + "step": 70010 + }, + { + "epoch": 0.9030352663515544, + "grad_norm": 0.9459490160875704, + "learning_rate": 8.81892929238781e-06, + "loss": 0.2176, + "step": 70020 + }, + { + "epoch": 0.9031642345415504, + "grad_norm": 0.8691615040620472, + "learning_rate": 8.81844495168291e-06, + "loss": 0.2306, + "step": 70030 + }, + { + "epoch": 0.9032932027315462, + "grad_norm": 0.7752751357111953, + "learning_rate": 8.817960524993621e-06, + "loss": 0.2199, + "step": 70040 + }, + { + "epoch": 0.9034221709215422, + "grad_norm": 0.926579208735426, + "learning_rate": 8.817476012330852e-06, + "loss": 0.2226, + "step": 70050 + }, + { + "epoch": 0.9035511391115382, + "grad_norm": 0.8930825289611841, + "learning_rate": 8.816991413705515e-06, + "loss": 0.2334, + "step": 70060 + }, + { + "epoch": 0.903680107301534, + "grad_norm": 0.8735472170628993, + "learning_rate": 8.816506729128522e-06, + "loss": 0.2298, + "step": 70070 + }, + { + "epoch": 0.90380907549153, + "grad_norm": 0.8478473957450465, + "learning_rate": 8.816021958610785e-06, + "loss": 0.2321, + "step": 70080 + }, + { + "epoch": 0.903938043681526, + "grad_norm": 0.8506328421904835, + "learning_rate": 8.815537102163222e-06, + "loss": 0.225, + "step": 70090 + }, + { + "epoch": 0.9040670118715219, + "grad_norm": 0.870334169120711, + "learning_rate": 8.815052159796752e-06, + "loss": 0.2313, + "step": 70100 + }, + { + "epoch": 0.9041959800615178, + "grad_norm": 0.8649506227239768, + "learning_rate": 8.814567131522292e-06, + "loss": 0.2329, + "step": 70110 + }, + { + "epoch": 0.9043249482515138, + "grad_norm": 0.8983281862227381, + "learning_rate": 8.814082017350765e-06, + "loss": 0.2345, + "step": 70120 + }, + { + "epoch": 0.9044539164415097, + "grad_norm": 0.8171918604642717, + "learning_rate": 8.813596817293098e-06, + "loss": 0.2288, + "step": 70130 + }, + { + "epoch": 0.9045828846315056, + "grad_norm": 0.8073456137964661, + "learning_rate": 8.813111531360213e-06, + "loss": 0.2298, + "step": 70140 + }, + { + "epoch": 0.9047118528215016, + "grad_norm": 0.8534375941399318, + "learning_rate": 8.812626159563039e-06, + "loss": 0.2309, + "step": 70150 + }, + { + "epoch": 0.9048408210114975, + "grad_norm": 0.8030788797283629, + "learning_rate": 8.812140701912504e-06, + "loss": 0.236, + "step": 70160 + }, + { + "epoch": 0.9049697892014934, + "grad_norm": 0.9378093744008948, + "learning_rate": 8.811655158419543e-06, + "loss": 0.2297, + "step": 70170 + }, + { + "epoch": 0.9050987573914894, + "grad_norm": 0.7900789316413968, + "learning_rate": 8.811169529095087e-06, + "loss": 0.2267, + "step": 70180 + }, + { + "epoch": 0.9052277255814853, + "grad_norm": 0.8822230678829723, + "learning_rate": 8.810683813950073e-06, + "loss": 0.2221, + "step": 70190 + }, + { + "epoch": 0.9053566937714813, + "grad_norm": 0.8738303136412, + "learning_rate": 8.810198012995439e-06, + "loss": 0.2305, + "step": 70200 + }, + { + "epoch": 0.9054856619614772, + "grad_norm": 0.851975460423176, + "learning_rate": 8.809712126242123e-06, + "loss": 0.242, + "step": 70210 + }, + { + "epoch": 0.9056146301514731, + "grad_norm": 0.8935326085558775, + "learning_rate": 8.809226153701065e-06, + "loss": 0.2291, + "step": 70220 + }, + { + "epoch": 0.9057435983414691, + "grad_norm": 0.7996336459096441, + "learning_rate": 8.80874009538321e-06, + "loss": 0.2346, + "step": 70230 + }, + { + "epoch": 0.905872566531465, + "grad_norm": 0.8296892897513363, + "learning_rate": 8.808253951299503e-06, + "loss": 0.218, + "step": 70240 + }, + { + "epoch": 0.9060015347214609, + "grad_norm": 0.915529487999846, + "learning_rate": 8.80776772146089e-06, + "loss": 0.2211, + "step": 70250 + }, + { + "epoch": 0.9061305029114569, + "grad_norm": 0.8761876550029329, + "learning_rate": 8.807281405878323e-06, + "loss": 0.2242, + "step": 70260 + }, + { + "epoch": 0.9062594711014528, + "grad_norm": 0.8180546032088264, + "learning_rate": 8.80679500456275e-06, + "loss": 0.2158, + "step": 70270 + }, + { + "epoch": 0.9063884392914487, + "grad_norm": 0.7942719888638845, + "learning_rate": 8.806308517525124e-06, + "loss": 0.237, + "step": 70280 + }, + { + "epoch": 0.9065174074814447, + "grad_norm": 0.8692310488126987, + "learning_rate": 8.805821944776401e-06, + "loss": 0.2171, + "step": 70290 + }, + { + "epoch": 0.9066463756714407, + "grad_norm": 0.8327099005137946, + "learning_rate": 8.805335286327537e-06, + "loss": 0.2323, + "step": 70300 + }, + { + "epoch": 0.9067753438614365, + "grad_norm": 0.8270740977960506, + "learning_rate": 8.80484854218949e-06, + "loss": 0.2238, + "step": 70310 + }, + { + "epoch": 0.9069043120514325, + "grad_norm": 0.8098060446830526, + "learning_rate": 8.804361712373223e-06, + "loss": 0.2402, + "step": 70320 + }, + { + "epoch": 0.9070332802414285, + "grad_norm": 0.856150797239283, + "learning_rate": 8.803874796889697e-06, + "loss": 0.2322, + "step": 70330 + }, + { + "epoch": 0.9071622484314243, + "grad_norm": 0.8945395650830268, + "learning_rate": 8.803387795749877e-06, + "loss": 0.2236, + "step": 70340 + }, + { + "epoch": 0.9072912166214203, + "grad_norm": 0.8546023134574727, + "learning_rate": 8.802900708964725e-06, + "loss": 0.2236, + "step": 70350 + }, + { + "epoch": 0.9074201848114163, + "grad_norm": 0.8662610387600063, + "learning_rate": 8.802413536545215e-06, + "loss": 0.236, + "step": 70360 + }, + { + "epoch": 0.9075491530014123, + "grad_norm": 0.9655618015568006, + "learning_rate": 8.801926278502318e-06, + "loss": 0.2251, + "step": 70370 + }, + { + "epoch": 0.9076781211914081, + "grad_norm": 0.8909027804171658, + "learning_rate": 8.801438934847e-06, + "loss": 0.2387, + "step": 70380 + }, + { + "epoch": 0.9078070893814041, + "grad_norm": 0.7863678394799549, + "learning_rate": 8.800951505590241e-06, + "loss": 0.2364, + "step": 70390 + }, + { + "epoch": 0.9079360575714001, + "grad_norm": 0.8036636096994174, + "learning_rate": 8.800463990743012e-06, + "loss": 0.233, + "step": 70400 + }, + { + "epoch": 0.9080650257613959, + "grad_norm": 0.872671157907737, + "learning_rate": 8.799976390316295e-06, + "loss": 0.2236, + "step": 70410 + }, + { + "epoch": 0.9081939939513919, + "grad_norm": 0.8543158349917864, + "learning_rate": 8.799488704321069e-06, + "loss": 0.224, + "step": 70420 + }, + { + "epoch": 0.9083229621413879, + "grad_norm": 0.8017080315457837, + "learning_rate": 8.799000932768314e-06, + "loss": 0.2178, + "step": 70430 + }, + { + "epoch": 0.9084519303313837, + "grad_norm": 0.9166513560383855, + "learning_rate": 8.798513075669015e-06, + "loss": 0.2268, + "step": 70440 + }, + { + "epoch": 0.9085808985213797, + "grad_norm": 0.801421466612927, + "learning_rate": 8.798025133034157e-06, + "loss": 0.2346, + "step": 70450 + }, + { + "epoch": 0.9087098667113757, + "grad_norm": 0.863941216926396, + "learning_rate": 8.79753710487473e-06, + "loss": 0.2304, + "step": 70460 + }, + { + "epoch": 0.9088388349013716, + "grad_norm": 0.88179796876461, + "learning_rate": 8.797048991201721e-06, + "loss": 0.2202, + "step": 70470 + }, + { + "epoch": 0.9089678030913675, + "grad_norm": 0.835120851771644, + "learning_rate": 8.796560792026121e-06, + "loss": 0.2261, + "step": 70480 + }, + { + "epoch": 0.9090967712813635, + "grad_norm": 0.8545886019737812, + "learning_rate": 8.796072507358926e-06, + "loss": 0.2351, + "step": 70490 + }, + { + "epoch": 0.9092257394713594, + "grad_norm": 0.9573506773793048, + "learning_rate": 8.795584137211129e-06, + "loss": 0.2268, + "step": 70500 + }, + { + "epoch": 0.9093547076613553, + "grad_norm": 0.8392319481982916, + "learning_rate": 8.795095681593728e-06, + "loss": 0.2249, + "step": 70510 + }, + { + "epoch": 0.9094836758513513, + "grad_norm": 1.2914334124497873, + "learning_rate": 8.79460714051772e-06, + "loss": 0.2285, + "step": 70520 + }, + { + "epoch": 0.9096126440413472, + "grad_norm": 0.763498661606778, + "learning_rate": 8.79411851399411e-06, + "loss": 0.2315, + "step": 70530 + }, + { + "epoch": 0.9097416122313431, + "grad_norm": 0.8701243908053462, + "learning_rate": 8.793629802033901e-06, + "loss": 0.2282, + "step": 70540 + }, + { + "epoch": 0.9098705804213391, + "grad_norm": 0.8999819526430545, + "learning_rate": 8.793141004648093e-06, + "loss": 0.2309, + "step": 70550 + }, + { + "epoch": 0.909999548611335, + "grad_norm": 0.8506486381595135, + "learning_rate": 8.792652121847695e-06, + "loss": 0.2195, + "step": 70560 + }, + { + "epoch": 0.910128516801331, + "grad_norm": 0.8206742766813259, + "learning_rate": 8.792163153643719e-06, + "loss": 0.2305, + "step": 70570 + }, + { + "epoch": 0.9102574849913269, + "grad_norm": 0.8408745576831124, + "learning_rate": 8.791674100047171e-06, + "loss": 0.2158, + "step": 70580 + }, + { + "epoch": 0.9103864531813228, + "grad_norm": 0.868021629600967, + "learning_rate": 8.791184961069069e-06, + "loss": 0.2234, + "step": 70590 + }, + { + "epoch": 0.9105154213713188, + "grad_norm": 0.7763924695883931, + "learning_rate": 8.790695736720421e-06, + "loss": 0.2129, + "step": 70600 + }, + { + "epoch": 0.9106443895613147, + "grad_norm": 0.9073732876569056, + "learning_rate": 8.790206427012247e-06, + "loss": 0.2279, + "step": 70610 + }, + { + "epoch": 0.9107733577513106, + "grad_norm": 1.1481137804152117, + "learning_rate": 8.789717031955566e-06, + "loss": 0.2233, + "step": 70620 + }, + { + "epoch": 0.9109023259413066, + "grad_norm": 0.8698184467840833, + "learning_rate": 8.789227551561398e-06, + "loss": 0.2231, + "step": 70630 + }, + { + "epoch": 0.9110312941313025, + "grad_norm": 0.8592081391237665, + "learning_rate": 8.788737985840763e-06, + "loss": 0.2226, + "step": 70640 + }, + { + "epoch": 0.9111602623212984, + "grad_norm": 0.8930684964329784, + "learning_rate": 8.788248334804688e-06, + "loss": 0.2266, + "step": 70650 + }, + { + "epoch": 0.9112892305112944, + "grad_norm": 0.8078203189523852, + "learning_rate": 8.787758598464198e-06, + "loss": 0.2263, + "step": 70660 + }, + { + "epoch": 0.9114181987012904, + "grad_norm": 0.9599386656792149, + "learning_rate": 8.78726877683032e-06, + "loss": 0.2325, + "step": 70670 + }, + { + "epoch": 0.9115471668912862, + "grad_norm": 0.9084298190028319, + "learning_rate": 8.786778869914084e-06, + "loss": 0.2326, + "step": 70680 + }, + { + "epoch": 0.9116761350812822, + "grad_norm": 0.9322328166964962, + "learning_rate": 8.786288877726522e-06, + "loss": 0.2304, + "step": 70690 + }, + { + "epoch": 0.9118051032712782, + "grad_norm": 0.7703690867390052, + "learning_rate": 8.78579880027867e-06, + "loss": 0.2093, + "step": 70700 + }, + { + "epoch": 0.911934071461274, + "grad_norm": 0.8897321732403204, + "learning_rate": 8.78530863758156e-06, + "loss": 0.2148, + "step": 70710 + }, + { + "epoch": 0.91206303965127, + "grad_norm": 0.8561250903741915, + "learning_rate": 8.784818389646232e-06, + "loss": 0.2209, + "step": 70720 + }, + { + "epoch": 0.912192007841266, + "grad_norm": 0.8478246746747133, + "learning_rate": 8.784328056483724e-06, + "loss": 0.225, + "step": 70730 + }, + { + "epoch": 0.9123209760312619, + "grad_norm": 0.8341569527379953, + "learning_rate": 8.78383763810508e-06, + "loss": 0.2384, + "step": 70740 + }, + { + "epoch": 0.9124499442212578, + "grad_norm": 0.8397777665814035, + "learning_rate": 8.78334713452134e-06, + "loss": 0.2221, + "step": 70750 + }, + { + "epoch": 0.9125789124112538, + "grad_norm": 0.8605096192134991, + "learning_rate": 8.782856545743551e-06, + "loss": 0.2311, + "step": 70760 + }, + { + "epoch": 0.9127078806012497, + "grad_norm": 0.8066414496457559, + "learning_rate": 8.78236587178276e-06, + "loss": 0.2323, + "step": 70770 + }, + { + "epoch": 0.9128368487912456, + "grad_norm": 0.873687728441172, + "learning_rate": 8.781875112650017e-06, + "loss": 0.2223, + "step": 70780 + }, + { + "epoch": 0.9129658169812416, + "grad_norm": 0.770640202247663, + "learning_rate": 8.781384268356372e-06, + "loss": 0.2202, + "step": 70790 + }, + { + "epoch": 0.9130947851712375, + "grad_norm": 0.8193087450331187, + "learning_rate": 8.780893338912876e-06, + "loss": 0.2167, + "step": 70800 + }, + { + "epoch": 0.9132237533612334, + "grad_norm": 0.9316570943039902, + "learning_rate": 8.780402324330588e-06, + "loss": 0.2246, + "step": 70810 + }, + { + "epoch": 0.9133527215512294, + "grad_norm": 0.839733093381722, + "learning_rate": 8.779911224620561e-06, + "loss": 0.2365, + "step": 70820 + }, + { + "epoch": 0.9134816897412253, + "grad_norm": 0.8516882958580617, + "learning_rate": 8.779420039793856e-06, + "loss": 0.2256, + "step": 70830 + }, + { + "epoch": 0.9136106579312213, + "grad_norm": 0.832589163803362, + "learning_rate": 8.778928769861532e-06, + "loss": 0.2178, + "step": 70840 + }, + { + "epoch": 0.9137396261212172, + "grad_norm": 0.8743275442086957, + "learning_rate": 8.778437414834654e-06, + "loss": 0.2259, + "step": 70850 + }, + { + "epoch": 0.9138685943112131, + "grad_norm": 0.8193595143409509, + "learning_rate": 8.777945974724281e-06, + "loss": 0.2253, + "step": 70860 + }, + { + "epoch": 0.9139975625012091, + "grad_norm": 0.9095162305185166, + "learning_rate": 8.777454449541484e-06, + "loss": 0.2336, + "step": 70870 + }, + { + "epoch": 0.914126530691205, + "grad_norm": 0.8850840196396714, + "learning_rate": 8.776962839297334e-06, + "loss": 0.2349, + "step": 70880 + }, + { + "epoch": 0.914255498881201, + "grad_norm": 0.9272597750721716, + "learning_rate": 8.776471144002894e-06, + "loss": 0.231, + "step": 70890 + }, + { + "epoch": 0.9143844670711969, + "grad_norm": 0.8829612246094878, + "learning_rate": 8.77597936366924e-06, + "loss": 0.2423, + "step": 70900 + }, + { + "epoch": 0.9145134352611928, + "grad_norm": 0.7995421486964428, + "learning_rate": 8.775487498307447e-06, + "loss": 0.2356, + "step": 70910 + }, + { + "epoch": 0.9146424034511887, + "grad_norm": 0.8484608810397811, + "learning_rate": 8.774995547928588e-06, + "loss": 0.2359, + "step": 70920 + }, + { + "epoch": 0.9147713716411847, + "grad_norm": 0.9276819770112267, + "learning_rate": 8.774503512543741e-06, + "loss": 0.2217, + "step": 70930 + }, + { + "epoch": 0.9149003398311807, + "grad_norm": 0.8202369223246677, + "learning_rate": 8.774011392163989e-06, + "loss": 0.2364, + "step": 70940 + }, + { + "epoch": 0.9150293080211765, + "grad_norm": 0.8816971711616176, + "learning_rate": 8.773519186800411e-06, + "loss": 0.2218, + "step": 70950 + }, + { + "epoch": 0.9151582762111725, + "grad_norm": 0.8487817129688897, + "learning_rate": 8.773026896464093e-06, + "loss": 0.229, + "step": 70960 + }, + { + "epoch": 0.9152872444011685, + "grad_norm": 0.8650303814182841, + "learning_rate": 8.772534521166115e-06, + "loss": 0.2291, + "step": 70970 + }, + { + "epoch": 0.9154162125911643, + "grad_norm": 0.842028194620896, + "learning_rate": 8.77204206091757e-06, + "loss": 0.2189, + "step": 70980 + }, + { + "epoch": 0.9155451807811603, + "grad_norm": 0.8607507927166832, + "learning_rate": 8.771549515729545e-06, + "loss": 0.2262, + "step": 70990 + }, + { + "epoch": 0.9156741489711563, + "grad_norm": 0.8957421899308173, + "learning_rate": 8.771056885613132e-06, + "loss": 0.2317, + "step": 71000 + }, + { + "epoch": 0.9158031171611521, + "grad_norm": 0.8056445461004076, + "learning_rate": 8.770564170579424e-06, + "loss": 0.23, + "step": 71010 + }, + { + "epoch": 0.9159320853511481, + "grad_norm": 0.8444008450351477, + "learning_rate": 8.770071370639515e-06, + "loss": 0.2195, + "step": 71020 + }, + { + "epoch": 0.9160610535411441, + "grad_norm": 0.8717772913081615, + "learning_rate": 8.769578485804501e-06, + "loss": 0.2332, + "step": 71030 + }, + { + "epoch": 0.9161900217311401, + "grad_norm": 0.8061368597419292, + "learning_rate": 8.769085516085485e-06, + "loss": 0.2294, + "step": 71040 + }, + { + "epoch": 0.9163189899211359, + "grad_norm": 0.7858175378430673, + "learning_rate": 8.768592461493564e-06, + "loss": 0.2113, + "step": 71050 + }, + { + "epoch": 0.9164479581111319, + "grad_norm": 0.8116287299808378, + "learning_rate": 8.768099322039842e-06, + "loss": 0.2165, + "step": 71060 + }, + { + "epoch": 0.9165769263011279, + "grad_norm": 0.8659797579313812, + "learning_rate": 8.767606097735425e-06, + "loss": 0.229, + "step": 71070 + }, + { + "epoch": 0.9167058944911237, + "grad_norm": 0.9550633962393192, + "learning_rate": 8.767112788591417e-06, + "loss": 0.2326, + "step": 71080 + }, + { + "epoch": 0.9168348626811197, + "grad_norm": 0.9338667757552777, + "learning_rate": 8.766619394618929e-06, + "loss": 0.2317, + "step": 71090 + }, + { + "epoch": 0.9169638308711157, + "grad_norm": 0.8305991497792545, + "learning_rate": 8.766125915829067e-06, + "loss": 0.2234, + "step": 71100 + }, + { + "epoch": 0.9170927990611116, + "grad_norm": 0.8586631760151215, + "learning_rate": 8.765632352232948e-06, + "loss": 0.2247, + "step": 71110 + }, + { + "epoch": 0.9172217672511075, + "grad_norm": 0.7913877299950981, + "learning_rate": 8.765138703841684e-06, + "loss": 0.2369, + "step": 71120 + }, + { + "epoch": 0.9173507354411035, + "grad_norm": 0.7960098557922695, + "learning_rate": 8.764644970666392e-06, + "loss": 0.2235, + "step": 71130 + }, + { + "epoch": 0.9174797036310994, + "grad_norm": 0.8133956886580731, + "learning_rate": 8.764151152718188e-06, + "loss": 0.2344, + "step": 71140 + }, + { + "epoch": 0.9176086718210953, + "grad_norm": 0.8747505260501417, + "learning_rate": 8.763657250008193e-06, + "loss": 0.2186, + "step": 71150 + }, + { + "epoch": 0.9177376400110913, + "grad_norm": 0.94797211370429, + "learning_rate": 8.76316326254753e-06, + "loss": 0.233, + "step": 71160 + }, + { + "epoch": 0.9178666082010872, + "grad_norm": 0.8861872553712605, + "learning_rate": 8.762669190347321e-06, + "loss": 0.221, + "step": 71170 + }, + { + "epoch": 0.9179955763910831, + "grad_norm": 0.9183676977776579, + "learning_rate": 8.762175033418691e-06, + "loss": 0.2288, + "step": 71180 + }, + { + "epoch": 0.9181245445810791, + "grad_norm": 0.8897209823580058, + "learning_rate": 8.761680791772772e-06, + "loss": 0.224, + "step": 71190 + }, + { + "epoch": 0.918253512771075, + "grad_norm": 0.8409349693647084, + "learning_rate": 8.761186465420686e-06, + "loss": 0.2353, + "step": 71200 + }, + { + "epoch": 0.918382480961071, + "grad_norm": 0.9209918999483367, + "learning_rate": 8.760692054373571e-06, + "loss": 0.235, + "step": 71210 + }, + { + "epoch": 0.9185114491510669, + "grad_norm": 0.891748140295418, + "learning_rate": 8.760197558642557e-06, + "loss": 0.2208, + "step": 71220 + }, + { + "epoch": 0.9186404173410628, + "grad_norm": 0.9332254505567088, + "learning_rate": 8.75970297823878e-06, + "loss": 0.2311, + "step": 71230 + }, + { + "epoch": 0.9187693855310588, + "grad_norm": 0.8542157860317454, + "learning_rate": 8.759208313173377e-06, + "loss": 0.2349, + "step": 71240 + }, + { + "epoch": 0.9188983537210547, + "grad_norm": 0.7581447554124552, + "learning_rate": 8.758713563457487e-06, + "loss": 0.2187, + "step": 71250 + }, + { + "epoch": 0.9190273219110506, + "grad_norm": 0.8843732213529196, + "learning_rate": 8.75821872910225e-06, + "loss": 0.2283, + "step": 71260 + }, + { + "epoch": 0.9191562901010466, + "grad_norm": 0.8103181236047505, + "learning_rate": 8.75772381011881e-06, + "loss": 0.2224, + "step": 71270 + }, + { + "epoch": 0.9192852582910425, + "grad_norm": 0.8446995383664851, + "learning_rate": 8.757228806518312e-06, + "loss": 0.2295, + "step": 71280 + }, + { + "epoch": 0.9194142264810384, + "grad_norm": 0.7869965232496805, + "learning_rate": 8.7567337183119e-06, + "loss": 0.2338, + "step": 71290 + }, + { + "epoch": 0.9195431946710344, + "grad_norm": 0.7730795767152355, + "learning_rate": 8.756238545510724e-06, + "loss": 0.214, + "step": 71300 + }, + { + "epoch": 0.9196721628610304, + "grad_norm": 0.7745083548370237, + "learning_rate": 8.755743288125937e-06, + "loss": 0.2188, + "step": 71310 + }, + { + "epoch": 0.9198011310510262, + "grad_norm": 0.8381936584723086, + "learning_rate": 8.755247946168686e-06, + "loss": 0.2194, + "step": 71320 + }, + { + "epoch": 0.9199300992410222, + "grad_norm": 0.9320459183891252, + "learning_rate": 8.754752519650129e-06, + "loss": 0.2356, + "step": 71330 + }, + { + "epoch": 0.9200590674310182, + "grad_norm": 0.8368308566995377, + "learning_rate": 8.754257008581422e-06, + "loss": 0.2331, + "step": 71340 + }, + { + "epoch": 0.920188035621014, + "grad_norm": 0.8905435242973991, + "learning_rate": 8.753761412973721e-06, + "loss": 0.2393, + "step": 71350 + }, + { + "epoch": 0.92031700381101, + "grad_norm": 0.7562957791741252, + "learning_rate": 8.753265732838187e-06, + "loss": 0.2234, + "step": 71360 + }, + { + "epoch": 0.920445972001006, + "grad_norm": 0.8189172886150602, + "learning_rate": 8.752769968185983e-06, + "loss": 0.2328, + "step": 71370 + }, + { + "epoch": 0.920574940191002, + "grad_norm": 0.9600118005473938, + "learning_rate": 8.752274119028271e-06, + "loss": 0.2329, + "step": 71380 + }, + { + "epoch": 0.9207039083809978, + "grad_norm": 0.8136413606730669, + "learning_rate": 8.751778185376218e-06, + "loss": 0.2185, + "step": 71390 + }, + { + "epoch": 0.9208328765709938, + "grad_norm": 0.8681825161788161, + "learning_rate": 8.751282167240988e-06, + "loss": 0.2298, + "step": 71400 + }, + { + "epoch": 0.9209618447609897, + "grad_norm": 0.7881302273459663, + "learning_rate": 8.750786064633756e-06, + "loss": 0.2315, + "step": 71410 + }, + { + "epoch": 0.9210908129509856, + "grad_norm": 0.9227366088570814, + "learning_rate": 8.75028987756569e-06, + "loss": 0.2245, + "step": 71420 + }, + { + "epoch": 0.9212197811409816, + "grad_norm": 0.8800672929990011, + "learning_rate": 8.749793606047962e-06, + "loss": 0.2301, + "step": 71430 + }, + { + "epoch": 0.9213487493309775, + "grad_norm": 0.8060509649794101, + "learning_rate": 8.74929725009175e-06, + "loss": 0.227, + "step": 71440 + }, + { + "epoch": 0.9214777175209734, + "grad_norm": 0.7822101009867446, + "learning_rate": 8.74880080970823e-06, + "loss": 0.2169, + "step": 71450 + }, + { + "epoch": 0.9216066857109694, + "grad_norm": 0.8628217112452512, + "learning_rate": 8.74830428490858e-06, + "loss": 0.2243, + "step": 71460 + }, + { + "epoch": 0.9217356539009653, + "grad_norm": 0.9297812065342433, + "learning_rate": 8.747807675703983e-06, + "loss": 0.228, + "step": 71470 + }, + { + "epoch": 0.9218646220909613, + "grad_norm": 0.9774518904799965, + "learning_rate": 8.74731098210562e-06, + "loss": 0.2209, + "step": 71480 + }, + { + "epoch": 0.9219935902809572, + "grad_norm": 0.9342344479059091, + "learning_rate": 8.746814204124673e-06, + "loss": 0.2393, + "step": 71490 + }, + { + "epoch": 0.9221225584709531, + "grad_norm": 0.8503345206965147, + "learning_rate": 8.746317341772335e-06, + "loss": 0.2229, + "step": 71500 + }, + { + "epoch": 0.9222515266609491, + "grad_norm": 0.7712001036312619, + "learning_rate": 8.74582039505979e-06, + "loss": 0.2352, + "step": 71510 + }, + { + "epoch": 0.922380494850945, + "grad_norm": 0.9327055557226721, + "learning_rate": 8.745323363998228e-06, + "loss": 0.2388, + "step": 71520 + }, + { + "epoch": 0.922509463040941, + "grad_norm": 0.7630936796166617, + "learning_rate": 8.744826248598843e-06, + "loss": 0.2226, + "step": 71530 + }, + { + "epoch": 0.9226384312309369, + "grad_norm": 0.8884937359423569, + "learning_rate": 8.744329048872828e-06, + "loss": 0.2179, + "step": 71540 + }, + { + "epoch": 0.9227673994209328, + "grad_norm": 0.9272396693879706, + "learning_rate": 8.74383176483138e-06, + "loss": 0.2253, + "step": 71550 + }, + { + "epoch": 0.9228963676109287, + "grad_norm": 0.8100993491068347, + "learning_rate": 8.743334396485698e-06, + "loss": 0.2295, + "step": 71560 + }, + { + "epoch": 0.9230253358009247, + "grad_norm": 0.8443391737878053, + "learning_rate": 8.742836943846979e-06, + "loss": 0.214, + "step": 71570 + }, + { + "epoch": 0.9231543039909207, + "grad_norm": 0.7623410646259497, + "learning_rate": 8.742339406926428e-06, + "loss": 0.2305, + "step": 71580 + }, + { + "epoch": 0.9232832721809165, + "grad_norm": 0.9482225109098912, + "learning_rate": 8.741841785735243e-06, + "loss": 0.2285, + "step": 71590 + }, + { + "epoch": 0.9234122403709125, + "grad_norm": 0.8666527579003926, + "learning_rate": 8.741344080284636e-06, + "loss": 0.2398, + "step": 71600 + }, + { + "epoch": 0.9235412085609085, + "grad_norm": 0.840762832388236, + "learning_rate": 8.740846290585812e-06, + "loss": 0.235, + "step": 71610 + }, + { + "epoch": 0.9236701767509043, + "grad_norm": 0.7936245819809925, + "learning_rate": 8.740348416649977e-06, + "loss": 0.2237, + "step": 71620 + }, + { + "epoch": 0.9237991449409003, + "grad_norm": 0.8269856426311171, + "learning_rate": 8.739850458488349e-06, + "loss": 0.2402, + "step": 71630 + }, + { + "epoch": 0.9239281131308963, + "grad_norm": 0.8202464866927984, + "learning_rate": 8.739352416112135e-06, + "loss": 0.2384, + "step": 71640 + }, + { + "epoch": 0.9240570813208921, + "grad_norm": 0.7760199141480746, + "learning_rate": 8.738854289532552e-06, + "loss": 0.2136, + "step": 71650 + }, + { + "epoch": 0.9241860495108881, + "grad_norm": 0.9712620588430055, + "learning_rate": 8.738356078760819e-06, + "loss": 0.2211, + "step": 71660 + }, + { + "epoch": 0.9243150177008841, + "grad_norm": 0.9191703333560151, + "learning_rate": 8.73785778380815e-06, + "loss": 0.2068, + "step": 71670 + }, + { + "epoch": 0.9244439858908801, + "grad_norm": 0.8719687052824212, + "learning_rate": 8.73735940468577e-06, + "loss": 0.2177, + "step": 71680 + }, + { + "epoch": 0.9245729540808759, + "grad_norm": 0.8522036728211949, + "learning_rate": 8.736860941404901e-06, + "loss": 0.2163, + "step": 71690 + }, + { + "epoch": 0.9247019222708719, + "grad_norm": 0.837739104811926, + "learning_rate": 8.736362393976766e-06, + "loss": 0.2272, + "step": 71700 + }, + { + "epoch": 0.9248308904608679, + "grad_norm": 0.8291224776210147, + "learning_rate": 8.735863762412593e-06, + "loss": 0.2297, + "step": 71710 + }, + { + "epoch": 0.9249598586508637, + "grad_norm": 0.790702254529916, + "learning_rate": 8.73536504672361e-06, + "loss": 0.2291, + "step": 71720 + }, + { + "epoch": 0.9250888268408597, + "grad_norm": 0.9207528540578772, + "learning_rate": 8.734866246921043e-06, + "loss": 0.217, + "step": 71730 + }, + { + "epoch": 0.9252177950308557, + "grad_norm": 0.8574624569897317, + "learning_rate": 8.734367363016131e-06, + "loss": 0.2309, + "step": 71740 + }, + { + "epoch": 0.9253467632208516, + "grad_norm": 0.9272735461277039, + "learning_rate": 8.733868395020104e-06, + "loss": 0.2386, + "step": 71750 + }, + { + "epoch": 0.9254757314108475, + "grad_norm": 0.9224238777337428, + "learning_rate": 8.733369342944196e-06, + "loss": 0.2202, + "step": 71760 + }, + { + "epoch": 0.9256046996008435, + "grad_norm": 0.8577525251367669, + "learning_rate": 8.73287020679965e-06, + "loss": 0.23, + "step": 71770 + }, + { + "epoch": 0.9257336677908394, + "grad_norm": 0.7456791239776568, + "learning_rate": 8.732370986597701e-06, + "loss": 0.2265, + "step": 71780 + }, + { + "epoch": 0.9258626359808353, + "grad_norm": 0.8673580738305947, + "learning_rate": 8.731871682349592e-06, + "loss": 0.2103, + "step": 71790 + }, + { + "epoch": 0.9259916041708313, + "grad_norm": 0.8260289776724793, + "learning_rate": 8.731372294066569e-06, + "loss": 0.2332, + "step": 71800 + }, + { + "epoch": 0.9261205723608272, + "grad_norm": 0.8812662777061115, + "learning_rate": 8.730872821759873e-06, + "loss": 0.2294, + "step": 71810 + }, + { + "epoch": 0.9262495405508231, + "grad_norm": 0.8928624320839389, + "learning_rate": 8.730373265440754e-06, + "loss": 0.22, + "step": 71820 + }, + { + "epoch": 0.9263785087408191, + "grad_norm": 0.899127565312102, + "learning_rate": 8.72987362512046e-06, + "loss": 0.2293, + "step": 71830 + }, + { + "epoch": 0.926507476930815, + "grad_norm": 0.8471007864646943, + "learning_rate": 8.729373900810242e-06, + "loss": 0.2279, + "step": 71840 + }, + { + "epoch": 0.926636445120811, + "grad_norm": 0.8227381318205219, + "learning_rate": 8.728874092521354e-06, + "loss": 0.2434, + "step": 71850 + }, + { + "epoch": 0.9267654133108069, + "grad_norm": 0.8202736796712427, + "learning_rate": 8.728374200265048e-06, + "loss": 0.2312, + "step": 71860 + }, + { + "epoch": 0.9268943815008028, + "grad_norm": 0.8157239777258944, + "learning_rate": 8.727874224052583e-06, + "loss": 0.231, + "step": 71870 + }, + { + "epoch": 0.9270233496907988, + "grad_norm": 0.844958900182338, + "learning_rate": 8.72737416389522e-06, + "loss": 0.2434, + "step": 71880 + }, + { + "epoch": 0.9271523178807947, + "grad_norm": 0.8768669965867704, + "learning_rate": 8.726874019804215e-06, + "loss": 0.2147, + "step": 71890 + }, + { + "epoch": 0.9272812860707906, + "grad_norm": 0.798214999525013, + "learning_rate": 8.72637379179083e-06, + "loss": 0.2386, + "step": 71900 + }, + { + "epoch": 0.9274102542607866, + "grad_norm": 0.7919439264783271, + "learning_rate": 8.725873479866333e-06, + "loss": 0.2266, + "step": 71910 + }, + { + "epoch": 0.9275392224507825, + "grad_norm": 0.8503519530145394, + "learning_rate": 8.725373084041988e-06, + "loss": 0.2214, + "step": 71920 + }, + { + "epoch": 0.9276681906407784, + "grad_norm": 0.8657399381623151, + "learning_rate": 8.724872604329065e-06, + "loss": 0.2367, + "step": 71930 + }, + { + "epoch": 0.9277971588307744, + "grad_norm": 0.8414269073652444, + "learning_rate": 8.72437204073883e-06, + "loss": 0.2341, + "step": 71940 + }, + { + "epoch": 0.9279261270207704, + "grad_norm": 0.8587396061372913, + "learning_rate": 8.723871393282556e-06, + "loss": 0.2274, + "step": 71950 + }, + { + "epoch": 0.9280550952107662, + "grad_norm": 0.7870862839558628, + "learning_rate": 8.723370661971518e-06, + "loss": 0.2211, + "step": 71960 + }, + { + "epoch": 0.9281840634007622, + "grad_norm": 0.8752485444631775, + "learning_rate": 8.722869846816992e-06, + "loss": 0.2185, + "step": 71970 + }, + { + "epoch": 0.9283130315907582, + "grad_norm": 0.8545842810086199, + "learning_rate": 8.722368947830255e-06, + "loss": 0.2369, + "step": 71980 + }, + { + "epoch": 0.928441999780754, + "grad_norm": 0.8109665119944854, + "learning_rate": 8.721867965022585e-06, + "loss": 0.2298, + "step": 71990 + }, + { + "epoch": 0.92857096797075, + "grad_norm": 0.8022838617348522, + "learning_rate": 8.721366898405263e-06, + "loss": 0.2306, + "step": 72000 + }, + { + "epoch": 0.928699936160746, + "grad_norm": 0.876750647342623, + "learning_rate": 8.720865747989575e-06, + "loss": 0.2338, + "step": 72010 + }, + { + "epoch": 0.9288289043507418, + "grad_norm": 0.9493734287956122, + "learning_rate": 8.720364513786805e-06, + "loss": 0.2196, + "step": 72020 + }, + { + "epoch": 0.9289578725407378, + "grad_norm": 0.7891390699833308, + "learning_rate": 8.719863195808237e-06, + "loss": 0.2247, + "step": 72030 + }, + { + "epoch": 0.9290868407307338, + "grad_norm": 0.8624205092741749, + "learning_rate": 8.719361794065161e-06, + "loss": 0.2251, + "step": 72040 + }, + { + "epoch": 0.9292158089207297, + "grad_norm": 0.8812889794953541, + "learning_rate": 8.71886030856887e-06, + "loss": 0.227, + "step": 72050 + }, + { + "epoch": 0.9293447771107256, + "grad_norm": 0.8454399272499458, + "learning_rate": 8.718358739330656e-06, + "loss": 0.2249, + "step": 72060 + }, + { + "epoch": 0.9294737453007216, + "grad_norm": 0.7607734435533529, + "learning_rate": 8.717857086361811e-06, + "loss": 0.2338, + "step": 72070 + }, + { + "epoch": 0.9296027134907175, + "grad_norm": 0.7986168941433983, + "learning_rate": 8.717355349673633e-06, + "loss": 0.2242, + "step": 72080 + }, + { + "epoch": 0.9297316816807134, + "grad_norm": 0.8746599743813003, + "learning_rate": 8.71685352927742e-06, + "loss": 0.2352, + "step": 72090 + }, + { + "epoch": 0.9298606498707094, + "grad_norm": 0.8576629623311488, + "learning_rate": 8.716351625184472e-06, + "loss": 0.2319, + "step": 72100 + }, + { + "epoch": 0.9299896180607053, + "grad_norm": 0.8235410816141847, + "learning_rate": 8.71584963740609e-06, + "loss": 0.2189, + "step": 72110 + }, + { + "epoch": 0.9301185862507013, + "grad_norm": 0.8900972417118582, + "learning_rate": 8.715347565953581e-06, + "loss": 0.2323, + "step": 72120 + }, + { + "epoch": 0.9302475544406972, + "grad_norm": 0.8232538695746195, + "learning_rate": 8.714845410838247e-06, + "loss": 0.2408, + "step": 72130 + }, + { + "epoch": 0.9303765226306931, + "grad_norm": 0.7713324860568149, + "learning_rate": 8.714343172071397e-06, + "loss": 0.2221, + "step": 72140 + }, + { + "epoch": 0.9305054908206891, + "grad_norm": 0.799974719529586, + "learning_rate": 8.713840849664342e-06, + "loss": 0.2177, + "step": 72150 + }, + { + "epoch": 0.930634459010685, + "grad_norm": 0.8517662185599539, + "learning_rate": 8.713338443628392e-06, + "loss": 0.2367, + "step": 72160 + }, + { + "epoch": 0.930763427200681, + "grad_norm": 0.8374811551775122, + "learning_rate": 8.712835953974858e-06, + "loss": 0.2254, + "step": 72170 + }, + { + "epoch": 0.9308923953906769, + "grad_norm": 0.7965651357066942, + "learning_rate": 8.712333380715058e-06, + "loss": 0.2248, + "step": 72180 + }, + { + "epoch": 0.9310213635806728, + "grad_norm": 0.9911833189550304, + "learning_rate": 8.71183072386031e-06, + "loss": 0.2479, + "step": 72190 + }, + { + "epoch": 0.9311503317706687, + "grad_norm": 0.8641631302249879, + "learning_rate": 8.711327983421931e-06, + "loss": 0.2222, + "step": 72200 + }, + { + "epoch": 0.9312792999606647, + "grad_norm": 0.8191548394537562, + "learning_rate": 8.710825159411243e-06, + "loss": 0.2344, + "step": 72210 + }, + { + "epoch": 0.9314082681506607, + "grad_norm": 0.8254760187036724, + "learning_rate": 8.710322251839565e-06, + "loss": 0.2216, + "step": 72220 + }, + { + "epoch": 0.9315372363406565, + "grad_norm": 0.8691977805758238, + "learning_rate": 8.709819260718226e-06, + "loss": 0.2179, + "step": 72230 + }, + { + "epoch": 0.9316662045306525, + "grad_norm": 0.7818983226688829, + "learning_rate": 8.709316186058551e-06, + "loss": 0.2338, + "step": 72240 + }, + { + "epoch": 0.9317951727206485, + "grad_norm": 0.7726304968374611, + "learning_rate": 8.708813027871869e-06, + "loss": 0.226, + "step": 72250 + }, + { + "epoch": 0.9319241409106443, + "grad_norm": 0.7923412122848942, + "learning_rate": 8.708309786169509e-06, + "loss": 0.2233, + "step": 72260 + }, + { + "epoch": 0.9320531091006403, + "grad_norm": 0.8007869771735633, + "learning_rate": 8.707806460962802e-06, + "loss": 0.2291, + "step": 72270 + }, + { + "epoch": 0.9321820772906363, + "grad_norm": 1.0009408169602594, + "learning_rate": 8.707303052263083e-06, + "loss": 0.2255, + "step": 72280 + }, + { + "epoch": 0.9323110454806322, + "grad_norm": 0.8674839549268277, + "learning_rate": 8.706799560081692e-06, + "loss": 0.219, + "step": 72290 + }, + { + "epoch": 0.9324400136706281, + "grad_norm": 0.7910640694096908, + "learning_rate": 8.70629598442996e-06, + "loss": 0.2287, + "step": 72300 + }, + { + "epoch": 0.9325689818606241, + "grad_norm": 0.8648275610971576, + "learning_rate": 8.70579232531923e-06, + "loss": 0.2237, + "step": 72310 + }, + { + "epoch": 0.9326979500506201, + "grad_norm": 0.9018925763643224, + "learning_rate": 8.705288582760843e-06, + "loss": 0.248, + "step": 72320 + }, + { + "epoch": 0.9328269182406159, + "grad_norm": 0.8629130797295449, + "learning_rate": 8.704784756766143e-06, + "loss": 0.2223, + "step": 72330 + }, + { + "epoch": 0.9329558864306119, + "grad_norm": 0.9187088305264686, + "learning_rate": 8.704280847346474e-06, + "loss": 0.2254, + "step": 72340 + }, + { + "epoch": 0.9330848546206079, + "grad_norm": 0.9910793816377099, + "learning_rate": 8.703776854513182e-06, + "loss": 0.2369, + "step": 72350 + }, + { + "epoch": 0.9332138228106037, + "grad_norm": 0.8881470505869072, + "learning_rate": 8.703272778277621e-06, + "loss": 0.2201, + "step": 72360 + }, + { + "epoch": 0.9333427910005997, + "grad_norm": 0.8840687695440753, + "learning_rate": 8.702768618651136e-06, + "loss": 0.2284, + "step": 72370 + }, + { + "epoch": 0.9334717591905957, + "grad_norm": 0.8211155870921, + "learning_rate": 8.702264375645085e-06, + "loss": 0.221, + "step": 72380 + }, + { + "epoch": 0.9336007273805915, + "grad_norm": 0.8620729477273941, + "learning_rate": 8.701760049270818e-06, + "loss": 0.2278, + "step": 72390 + }, + { + "epoch": 0.9337296955705875, + "grad_norm": 0.929746165349796, + "learning_rate": 8.701255639539694e-06, + "loss": 0.2202, + "step": 72400 + }, + { + "epoch": 0.9338586637605835, + "grad_norm": 0.7955796707667793, + "learning_rate": 8.700751146463071e-06, + "loss": 0.22, + "step": 72410 + }, + { + "epoch": 0.9339876319505794, + "grad_norm": 0.9067699878484395, + "learning_rate": 8.70024657005231e-06, + "loss": 0.2162, + "step": 72420 + }, + { + "epoch": 0.9341166001405753, + "grad_norm": 0.7811487882030755, + "learning_rate": 8.699741910318771e-06, + "loss": 0.2215, + "step": 72430 + }, + { + "epoch": 0.9342455683305713, + "grad_norm": 0.8033733987716846, + "learning_rate": 8.69923716727382e-06, + "loss": 0.2229, + "step": 72440 + }, + { + "epoch": 0.9343745365205672, + "grad_norm": 0.7940456293438422, + "learning_rate": 8.698732340928824e-06, + "loss": 0.2321, + "step": 72450 + }, + { + "epoch": 0.9345035047105631, + "grad_norm": 0.8599640367791473, + "learning_rate": 8.698227431295146e-06, + "loss": 0.2359, + "step": 72460 + }, + { + "epoch": 0.9346324729005591, + "grad_norm": 0.832245161446501, + "learning_rate": 8.69772243838416e-06, + "loss": 0.2184, + "step": 72470 + }, + { + "epoch": 0.934761441090555, + "grad_norm": 0.816456004666729, + "learning_rate": 8.697217362207235e-06, + "loss": 0.2213, + "step": 72480 + }, + { + "epoch": 0.934890409280551, + "grad_norm": 0.8928615963619726, + "learning_rate": 8.696712202775748e-06, + "loss": 0.2333, + "step": 72490 + }, + { + "epoch": 0.9350193774705469, + "grad_norm": 0.7723792249580417, + "learning_rate": 8.69620696010107e-06, + "loss": 0.2194, + "step": 72500 + }, + { + "epoch": 0.9351483456605428, + "grad_norm": 0.7892043895215747, + "learning_rate": 8.69570163419458e-06, + "loss": 0.222, + "step": 72510 + }, + { + "epoch": 0.9352773138505388, + "grad_norm": 0.8198859135857786, + "learning_rate": 8.695196225067657e-06, + "loss": 0.2336, + "step": 72520 + }, + { + "epoch": 0.9354062820405347, + "grad_norm": 0.8300573514448835, + "learning_rate": 8.694690732731681e-06, + "loss": 0.2215, + "step": 72530 + }, + { + "epoch": 0.9355352502305306, + "grad_norm": 0.7864769444015786, + "learning_rate": 8.694185157198035e-06, + "loss": 0.2374, + "step": 72540 + }, + { + "epoch": 0.9356642184205266, + "grad_norm": 0.8093990517594329, + "learning_rate": 8.693679498478107e-06, + "loss": 0.2163, + "step": 72550 + }, + { + "epoch": 0.9357931866105225, + "grad_norm": 0.8994251253387503, + "learning_rate": 8.693173756583279e-06, + "loss": 0.235, + "step": 72560 + }, + { + "epoch": 0.9359221548005184, + "grad_norm": 0.9153813307508701, + "learning_rate": 8.692667931524943e-06, + "loss": 0.2233, + "step": 72570 + }, + { + "epoch": 0.9360511229905144, + "grad_norm": 0.9113332017188119, + "learning_rate": 8.692162023314486e-06, + "loss": 0.2278, + "step": 72580 + }, + { + "epoch": 0.9361800911805104, + "grad_norm": 0.8890642703381283, + "learning_rate": 8.6916560319633e-06, + "loss": 0.2282, + "step": 72590 + }, + { + "epoch": 0.9363090593705062, + "grad_norm": 0.9307838349389904, + "learning_rate": 8.691149957482782e-06, + "loss": 0.2241, + "step": 72600 + }, + { + "epoch": 0.9364380275605022, + "grad_norm": 0.8334388065977172, + "learning_rate": 8.690643799884326e-06, + "loss": 0.2316, + "step": 72610 + }, + { + "epoch": 0.9365669957504982, + "grad_norm": 0.8840317428399896, + "learning_rate": 8.69013755917933e-06, + "loss": 0.2258, + "step": 72620 + }, + { + "epoch": 0.936695963940494, + "grad_norm": 0.8775033139648603, + "learning_rate": 8.689631235379194e-06, + "loss": 0.2271, + "step": 72630 + }, + { + "epoch": 0.93682493213049, + "grad_norm": 0.7925790701960661, + "learning_rate": 8.68912482849532e-06, + "loss": 0.2248, + "step": 72640 + }, + { + "epoch": 0.936953900320486, + "grad_norm": 0.8436219613337487, + "learning_rate": 8.68861833853911e-06, + "loss": 0.2361, + "step": 72650 + }, + { + "epoch": 0.9370828685104818, + "grad_norm": 0.8948179495662457, + "learning_rate": 8.68811176552197e-06, + "loss": 0.2328, + "step": 72660 + }, + { + "epoch": 0.9372118367004778, + "grad_norm": 0.9054417712109647, + "learning_rate": 8.687605109455307e-06, + "loss": 0.2258, + "step": 72670 + }, + { + "epoch": 0.9373408048904738, + "grad_norm": 0.8539388832718429, + "learning_rate": 8.687098370350528e-06, + "loss": 0.2178, + "step": 72680 + }, + { + "epoch": 0.9374697730804697, + "grad_norm": 0.8164782703456106, + "learning_rate": 8.686591548219047e-06, + "loss": 0.2355, + "step": 72690 + }, + { + "epoch": 0.9375987412704656, + "grad_norm": 0.8723525486196718, + "learning_rate": 8.686084643072275e-06, + "loss": 0.2335, + "step": 72700 + }, + { + "epoch": 0.9377277094604616, + "grad_norm": 0.8497994121438605, + "learning_rate": 8.685577654921627e-06, + "loss": 0.2276, + "step": 72710 + }, + { + "epoch": 0.9378566776504575, + "grad_norm": 0.9057458038194649, + "learning_rate": 8.685070583778521e-06, + "loss": 0.2256, + "step": 72720 + }, + { + "epoch": 0.9379856458404534, + "grad_norm": 0.736999019047749, + "learning_rate": 8.684563429654373e-06, + "loss": 0.2261, + "step": 72730 + }, + { + "epoch": 0.9381146140304494, + "grad_norm": 0.9271590040504708, + "learning_rate": 8.684056192560601e-06, + "loss": 0.2302, + "step": 72740 + }, + { + "epoch": 0.9382435822204453, + "grad_norm": 0.9591670182547819, + "learning_rate": 8.683548872508634e-06, + "loss": 0.2089, + "step": 72750 + }, + { + "epoch": 0.9383725504104413, + "grad_norm": 0.855485480437972, + "learning_rate": 8.68304146950989e-06, + "loss": 0.2399, + "step": 72760 + }, + { + "epoch": 0.9385015186004372, + "grad_norm": 0.8493383953277375, + "learning_rate": 8.682533983575796e-06, + "loss": 0.2314, + "step": 72770 + }, + { + "epoch": 0.9386304867904331, + "grad_norm": 0.8786074656799551, + "learning_rate": 8.682026414717782e-06, + "loss": 0.2433, + "step": 72780 + }, + { + "epoch": 0.9387594549804291, + "grad_norm": 0.8409246901850314, + "learning_rate": 8.681518762947276e-06, + "loss": 0.2213, + "step": 72790 + }, + { + "epoch": 0.938888423170425, + "grad_norm": 0.8737354660243248, + "learning_rate": 8.681011028275707e-06, + "loss": 0.2285, + "step": 72800 + }, + { + "epoch": 0.939017391360421, + "grad_norm": 0.8852064848886984, + "learning_rate": 8.680503210714512e-06, + "loss": 0.2248, + "step": 72810 + }, + { + "epoch": 0.9391463595504169, + "grad_norm": 0.7829720152524958, + "learning_rate": 8.679995310275124e-06, + "loss": 0.2202, + "step": 72820 + }, + { + "epoch": 0.9392753277404128, + "grad_norm": 0.8202244666904551, + "learning_rate": 8.679487326968982e-06, + "loss": 0.2245, + "step": 72830 + }, + { + "epoch": 0.9394042959304088, + "grad_norm": 0.8180175304935934, + "learning_rate": 8.678979260807521e-06, + "loss": 0.2291, + "step": 72840 + }, + { + "epoch": 0.9395332641204047, + "grad_norm": 0.8043838926985465, + "learning_rate": 8.678471111802187e-06, + "loss": 0.2219, + "step": 72850 + }, + { + "epoch": 0.9396622323104007, + "grad_norm": 0.9259520818496888, + "learning_rate": 8.677962879964418e-06, + "loss": 0.2446, + "step": 72860 + }, + { + "epoch": 0.9397912005003966, + "grad_norm": 0.8079798850912966, + "learning_rate": 8.677454565305661e-06, + "loss": 0.228, + "step": 72870 + }, + { + "epoch": 0.9399201686903925, + "grad_norm": 0.8783466274059388, + "learning_rate": 8.67694616783736e-06, + "loss": 0.2414, + "step": 72880 + }, + { + "epoch": 0.9400491368803885, + "grad_norm": 0.8548359112984214, + "learning_rate": 8.676437687570965e-06, + "loss": 0.2217, + "step": 72890 + }, + { + "epoch": 0.9401781050703844, + "grad_norm": 0.7896292590186931, + "learning_rate": 8.675929124517928e-06, + "loss": 0.2412, + "step": 72900 + }, + { + "epoch": 0.9403070732603803, + "grad_norm": 0.9128393774523434, + "learning_rate": 8.675420478689696e-06, + "loss": 0.2193, + "step": 72910 + }, + { + "epoch": 0.9404360414503763, + "grad_norm": 0.8469673793274507, + "learning_rate": 8.674911750097727e-06, + "loss": 0.2251, + "step": 72920 + }, + { + "epoch": 0.9405650096403722, + "grad_norm": 0.8402535210064432, + "learning_rate": 8.674402938753475e-06, + "loss": 0.2202, + "step": 72930 + }, + { + "epoch": 0.9406939778303681, + "grad_norm": 0.8569875317968305, + "learning_rate": 8.673894044668396e-06, + "loss": 0.2258, + "step": 72940 + }, + { + "epoch": 0.9408229460203641, + "grad_norm": 0.8316290720251015, + "learning_rate": 8.673385067853952e-06, + "loss": 0.2334, + "step": 72950 + }, + { + "epoch": 0.9409519142103601, + "grad_norm": 0.7789229399473944, + "learning_rate": 8.672876008321603e-06, + "loss": 0.2166, + "step": 72960 + }, + { + "epoch": 0.9410808824003559, + "grad_norm": 0.9878081660457835, + "learning_rate": 8.672366866082814e-06, + "loss": 0.229, + "step": 72970 + }, + { + "epoch": 0.9412098505903519, + "grad_norm": 0.8501722099898423, + "learning_rate": 8.671857641149046e-06, + "loss": 0.2379, + "step": 72980 + }, + { + "epoch": 0.9413388187803479, + "grad_norm": 0.8591869786223428, + "learning_rate": 8.671348333531768e-06, + "loss": 0.249, + "step": 72990 + }, + { + "epoch": 0.9414677869703437, + "grad_norm": 0.8394817449437058, + "learning_rate": 8.67083894324245e-06, + "loss": 0.2251, + "step": 73000 + }, + { + "epoch": 0.9415967551603397, + "grad_norm": 0.825627284677042, + "learning_rate": 8.67032947029256e-06, + "loss": 0.224, + "step": 73010 + }, + { + "epoch": 0.9417257233503357, + "grad_norm": 0.9030081619641194, + "learning_rate": 8.669819914693571e-06, + "loss": 0.2384, + "step": 73020 + }, + { + "epoch": 0.9418546915403315, + "grad_norm": 0.8724433863009203, + "learning_rate": 8.669310276456959e-06, + "loss": 0.2277, + "step": 73030 + }, + { + "epoch": 0.9419836597303275, + "grad_norm": 0.7903101534531195, + "learning_rate": 8.668800555594198e-06, + "loss": 0.2224, + "step": 73040 + }, + { + "epoch": 0.9421126279203235, + "grad_norm": 0.896104644226877, + "learning_rate": 8.66829075211677e-06, + "loss": 0.2174, + "step": 73050 + }, + { + "epoch": 0.9422415961103194, + "grad_norm": 0.8898067909779254, + "learning_rate": 8.667780866036149e-06, + "loss": 0.2365, + "step": 73060 + }, + { + "epoch": 0.9423705643003153, + "grad_norm": 0.8125646471060335, + "learning_rate": 8.66727089736382e-06, + "loss": 0.217, + "step": 73070 + }, + { + "epoch": 0.9424995324903113, + "grad_norm": 0.9086559230478728, + "learning_rate": 8.666760846111266e-06, + "loss": 0.2316, + "step": 73080 + }, + { + "epoch": 0.9426285006803072, + "grad_norm": 0.8903686966536879, + "learning_rate": 8.666250712289974e-06, + "loss": 0.2144, + "step": 73090 + }, + { + "epoch": 0.9427574688703031, + "grad_norm": 0.7755319601199236, + "learning_rate": 8.665740495911428e-06, + "loss": 0.2115, + "step": 73100 + }, + { + "epoch": 0.9428864370602991, + "grad_norm": 0.8371132284416896, + "learning_rate": 8.66523019698712e-06, + "loss": 0.205, + "step": 73110 + }, + { + "epoch": 0.943015405250295, + "grad_norm": 0.8059248417783393, + "learning_rate": 8.664719815528538e-06, + "loss": 0.2182, + "step": 73120 + }, + { + "epoch": 0.943144373440291, + "grad_norm": 0.8192484537583044, + "learning_rate": 8.66420935154718e-06, + "loss": 0.2356, + "step": 73130 + }, + { + "epoch": 0.9432733416302869, + "grad_norm": 0.8602222885882309, + "learning_rate": 8.663698805054534e-06, + "loss": 0.2238, + "step": 73140 + }, + { + "epoch": 0.9434023098202828, + "grad_norm": 0.8584245008414892, + "learning_rate": 8.663188176062101e-06, + "loss": 0.2389, + "step": 73150 + }, + { + "epoch": 0.9435312780102788, + "grad_norm": 0.8838405897841097, + "learning_rate": 8.66267746458138e-06, + "loss": 0.2157, + "step": 73160 + }, + { + "epoch": 0.9436602462002747, + "grad_norm": 0.8522689375921192, + "learning_rate": 8.662166670623869e-06, + "loss": 0.229, + "step": 73170 + }, + { + "epoch": 0.9437892143902706, + "grad_norm": 0.8105995475380026, + "learning_rate": 8.661655794201069e-06, + "loss": 0.2254, + "step": 73180 + }, + { + "epoch": 0.9439181825802666, + "grad_norm": 0.8261777947816727, + "learning_rate": 8.661144835324488e-06, + "loss": 0.2038, + "step": 73190 + }, + { + "epoch": 0.9440471507702625, + "grad_norm": 0.8603288786910287, + "learning_rate": 8.66063379400563e-06, + "loss": 0.2428, + "step": 73200 + }, + { + "epoch": 0.9441761189602584, + "grad_norm": 0.8392204125703066, + "learning_rate": 8.660122670256001e-06, + "loss": 0.2332, + "step": 73210 + }, + { + "epoch": 0.9443050871502544, + "grad_norm": 0.7633999928412959, + "learning_rate": 8.659611464087115e-06, + "loss": 0.227, + "step": 73220 + }, + { + "epoch": 0.9444340553402504, + "grad_norm": 0.7974889874507978, + "learning_rate": 8.659100175510478e-06, + "loss": 0.2129, + "step": 73230 + }, + { + "epoch": 0.9445630235302462, + "grad_norm": 0.8326699665688571, + "learning_rate": 8.658588804537607e-06, + "loss": 0.2305, + "step": 73240 + }, + { + "epoch": 0.9446919917202422, + "grad_norm": 0.9688223869786297, + "learning_rate": 8.658077351180014e-06, + "loss": 0.2218, + "step": 73250 + }, + { + "epoch": 0.9448209599102382, + "grad_norm": 0.8453226079728865, + "learning_rate": 8.657565815449219e-06, + "loss": 0.2295, + "step": 73260 + }, + { + "epoch": 0.944949928100234, + "grad_norm": 0.9080313780351765, + "learning_rate": 8.65705419735674e-06, + "loss": 0.2384, + "step": 73270 + }, + { + "epoch": 0.94507889629023, + "grad_norm": 0.8919969326827413, + "learning_rate": 8.656542496914097e-06, + "loss": 0.2255, + "step": 73280 + }, + { + "epoch": 0.945207864480226, + "grad_norm": 0.852201323430993, + "learning_rate": 8.656030714132812e-06, + "loss": 0.2435, + "step": 73290 + }, + { + "epoch": 0.9453368326702218, + "grad_norm": 0.7926512404374374, + "learning_rate": 8.655518849024411e-06, + "loss": 0.2321, + "step": 73300 + }, + { + "epoch": 0.9454658008602178, + "grad_norm": 0.9049426159117472, + "learning_rate": 8.655006901600422e-06, + "loss": 0.233, + "step": 73310 + }, + { + "epoch": 0.9455947690502138, + "grad_norm": 0.8658861568108447, + "learning_rate": 8.654494871872368e-06, + "loss": 0.2198, + "step": 73320 + }, + { + "epoch": 0.9457237372402097, + "grad_norm": 0.8268479497633816, + "learning_rate": 8.653982759851783e-06, + "loss": 0.2304, + "step": 73330 + }, + { + "epoch": 0.9458527054302056, + "grad_norm": 0.8758004883146937, + "learning_rate": 8.653470565550198e-06, + "loss": 0.2264, + "step": 73340 + }, + { + "epoch": 0.9459816736202016, + "grad_norm": 0.8433908038791443, + "learning_rate": 8.652958288979146e-06, + "loss": 0.2191, + "step": 73350 + }, + { + "epoch": 0.9461106418101976, + "grad_norm": 0.8926692333502243, + "learning_rate": 8.652445930150161e-06, + "loss": 0.223, + "step": 73360 + }, + { + "epoch": 0.9462396100001934, + "grad_norm": 0.8507807452785748, + "learning_rate": 8.651933489074784e-06, + "loss": 0.2167, + "step": 73370 + }, + { + "epoch": 0.9463685781901894, + "grad_norm": 0.7594193041946308, + "learning_rate": 8.651420965764552e-06, + "loss": 0.225, + "step": 73380 + }, + { + "epoch": 0.9464975463801854, + "grad_norm": 0.8049856367448733, + "learning_rate": 8.650908360231007e-06, + "loss": 0.2331, + "step": 73390 + }, + { + "epoch": 0.9466265145701812, + "grad_norm": 0.884832478982678, + "learning_rate": 8.65039567248569e-06, + "loss": 0.2248, + "step": 73400 + }, + { + "epoch": 0.9467554827601772, + "grad_norm": 0.9160607178719744, + "learning_rate": 8.649882902540149e-06, + "loss": 0.2145, + "step": 73410 + }, + { + "epoch": 0.9468844509501732, + "grad_norm": 0.8496509190480818, + "learning_rate": 8.649370050405928e-06, + "loss": 0.2263, + "step": 73420 + }, + { + "epoch": 0.9470134191401691, + "grad_norm": 0.9959071540164081, + "learning_rate": 8.648857116094575e-06, + "loss": 0.2372, + "step": 73430 + }, + { + "epoch": 0.947142387330165, + "grad_norm": 0.9369215755287925, + "learning_rate": 8.648344099617644e-06, + "loss": 0.2354, + "step": 73440 + }, + { + "epoch": 0.947271355520161, + "grad_norm": 0.7842360060201252, + "learning_rate": 8.647831000986684e-06, + "loss": 0.2181, + "step": 73450 + }, + { + "epoch": 0.9474003237101569, + "grad_norm": 0.8316594224191304, + "learning_rate": 8.64731782021325e-06, + "loss": 0.2221, + "step": 73460 + }, + { + "epoch": 0.9475292919001528, + "grad_norm": 0.8271625238594208, + "learning_rate": 8.646804557308898e-06, + "loss": 0.2207, + "step": 73470 + }, + { + "epoch": 0.9476582600901488, + "grad_norm": 0.8796176220597972, + "learning_rate": 8.646291212285185e-06, + "loss": 0.2195, + "step": 73480 + }, + { + "epoch": 0.9477872282801447, + "grad_norm": 0.8265199022746107, + "learning_rate": 8.645777785153672e-06, + "loss": 0.2261, + "step": 73490 + }, + { + "epoch": 0.9479161964701407, + "grad_norm": 0.800078042969657, + "learning_rate": 8.645264275925919e-06, + "loss": 0.2203, + "step": 73500 + }, + { + "epoch": 0.9480451646601366, + "grad_norm": 0.7584198698739788, + "learning_rate": 8.644750684613489e-06, + "loss": 0.2256, + "step": 73510 + }, + { + "epoch": 0.9481741328501325, + "grad_norm": 0.926095199498545, + "learning_rate": 8.644237011227949e-06, + "loss": 0.2227, + "step": 73520 + }, + { + "epoch": 0.9483031010401285, + "grad_norm": 0.8257925019179835, + "learning_rate": 8.643723255780866e-06, + "loss": 0.2309, + "step": 73530 + }, + { + "epoch": 0.9484320692301244, + "grad_norm": 0.8205196481066898, + "learning_rate": 8.643209418283807e-06, + "loss": 0.2338, + "step": 73540 + }, + { + "epoch": 0.9485610374201203, + "grad_norm": 0.9223895327527488, + "learning_rate": 8.642695498748343e-06, + "loss": 0.228, + "step": 73550 + }, + { + "epoch": 0.9486900056101163, + "grad_norm": 0.7988773548211565, + "learning_rate": 8.642181497186048e-06, + "loss": 0.2226, + "step": 73560 + }, + { + "epoch": 0.9488189738001122, + "grad_norm": 0.7652620352779228, + "learning_rate": 8.641667413608494e-06, + "loss": 0.2311, + "step": 73570 + }, + { + "epoch": 0.9489479419901081, + "grad_norm": 0.9037384360295, + "learning_rate": 8.64115324802726e-06, + "loss": 0.2223, + "step": 73580 + }, + { + "epoch": 0.9490769101801041, + "grad_norm": 0.8663359727148362, + "learning_rate": 8.640639000453921e-06, + "loss": 0.229, + "step": 73590 + }, + { + "epoch": 0.9492058783701001, + "grad_norm": 0.827138428989421, + "learning_rate": 8.64012467090006e-06, + "loss": 0.2227, + "step": 73600 + }, + { + "epoch": 0.9493348465600959, + "grad_norm": 0.8256683980348897, + "learning_rate": 8.639610259377259e-06, + "loss": 0.2277, + "step": 73610 + }, + { + "epoch": 0.9494638147500919, + "grad_norm": 0.8201802468974722, + "learning_rate": 8.639095765897096e-06, + "loss": 0.2199, + "step": 73620 + }, + { + "epoch": 0.9495927829400879, + "grad_norm": 0.8650280024033162, + "learning_rate": 8.638581190471162e-06, + "loss": 0.2299, + "step": 73630 + }, + { + "epoch": 0.9497217511300837, + "grad_norm": 0.7508487045034485, + "learning_rate": 8.638066533111044e-06, + "loss": 0.2178, + "step": 73640 + }, + { + "epoch": 0.9498507193200797, + "grad_norm": 0.8991395811378028, + "learning_rate": 8.637551793828329e-06, + "loss": 0.2313, + "step": 73650 + }, + { + "epoch": 0.9499796875100757, + "grad_norm": 0.9073619631060612, + "learning_rate": 8.637036972634608e-06, + "loss": 0.2192, + "step": 73660 + }, + { + "epoch": 0.9501086557000715, + "grad_norm": 0.8095921983234329, + "learning_rate": 8.636522069541477e-06, + "loss": 0.2238, + "step": 73670 + }, + { + "epoch": 0.9502376238900675, + "grad_norm": 0.8103454572679338, + "learning_rate": 8.636007084560526e-06, + "loss": 0.2286, + "step": 73680 + }, + { + "epoch": 0.9503665920800635, + "grad_norm": 0.8199641534941043, + "learning_rate": 8.635492017703355e-06, + "loss": 0.2234, + "step": 73690 + }, + { + "epoch": 0.9504955602700594, + "grad_norm": 0.9048994339775346, + "learning_rate": 8.634976868981562e-06, + "loss": 0.223, + "step": 73700 + }, + { + "epoch": 0.9506245284600553, + "grad_norm": 0.8466336903682442, + "learning_rate": 8.634461638406745e-06, + "loss": 0.2229, + "step": 73710 + }, + { + "epoch": 0.9507534966500513, + "grad_norm": 0.9356877803210636, + "learning_rate": 8.63394632599051e-06, + "loss": 0.2273, + "step": 73720 + }, + { + "epoch": 0.9508824648400472, + "grad_norm": 0.8801819676839898, + "learning_rate": 8.633430931744455e-06, + "loss": 0.2195, + "step": 73730 + }, + { + "epoch": 0.9510114330300431, + "grad_norm": 0.8552216994294914, + "learning_rate": 8.632915455680191e-06, + "loss": 0.2508, + "step": 73740 + }, + { + "epoch": 0.9511404012200391, + "grad_norm": 0.852900099400785, + "learning_rate": 8.632399897809323e-06, + "loss": 0.2328, + "step": 73750 + }, + { + "epoch": 0.951269369410035, + "grad_norm": 0.8239752482304972, + "learning_rate": 8.631884258143461e-06, + "loss": 0.2098, + "step": 73760 + }, + { + "epoch": 0.951398337600031, + "grad_norm": 0.8203466365628089, + "learning_rate": 8.631368536694218e-06, + "loss": 0.2185, + "step": 73770 + }, + { + "epoch": 0.9515273057900269, + "grad_norm": 0.8596069116776407, + "learning_rate": 8.630852733473203e-06, + "loss": 0.2143, + "step": 73780 + }, + { + "epoch": 0.9516562739800228, + "grad_norm": 0.8687910146945904, + "learning_rate": 8.630336848492036e-06, + "loss": 0.2427, + "step": 73790 + }, + { + "epoch": 0.9517852421700188, + "grad_norm": 0.8065991243608668, + "learning_rate": 8.62982088176233e-06, + "loss": 0.22, + "step": 73800 + }, + { + "epoch": 0.9519142103600147, + "grad_norm": 0.8797876783942442, + "learning_rate": 8.629304833295704e-06, + "loss": 0.2222, + "step": 73810 + }, + { + "epoch": 0.9520431785500106, + "grad_norm": 0.8108082593753102, + "learning_rate": 8.628788703103779e-06, + "loss": 0.219, + "step": 73820 + }, + { + "epoch": 0.9521721467400066, + "grad_norm": 0.9394864164119205, + "learning_rate": 8.628272491198179e-06, + "loss": 0.2372, + "step": 73830 + }, + { + "epoch": 0.9523011149300025, + "grad_norm": 0.8173255091949917, + "learning_rate": 8.627756197590527e-06, + "loss": 0.229, + "step": 73840 + }, + { + "epoch": 0.9524300831199984, + "grad_norm": 0.8357575399056676, + "learning_rate": 8.627239822292447e-06, + "loss": 0.2187, + "step": 73850 + }, + { + "epoch": 0.9525590513099944, + "grad_norm": 0.8732719831092846, + "learning_rate": 8.62672336531557e-06, + "loss": 0.2253, + "step": 73860 + }, + { + "epoch": 0.9526880194999904, + "grad_norm": 0.8782193397976621, + "learning_rate": 8.626206826671526e-06, + "loss": 0.2251, + "step": 73870 + }, + { + "epoch": 0.9528169876899862, + "grad_norm": 0.8724271967772816, + "learning_rate": 8.625690206371941e-06, + "loss": 0.2235, + "step": 73880 + }, + { + "epoch": 0.9529459558799822, + "grad_norm": 0.9110384725930755, + "learning_rate": 8.625173504428455e-06, + "loss": 0.2223, + "step": 73890 + }, + { + "epoch": 0.9530749240699782, + "grad_norm": 0.8642315316951511, + "learning_rate": 8.624656720852699e-06, + "loss": 0.2249, + "step": 73900 + }, + { + "epoch": 0.953203892259974, + "grad_norm": 0.9156353185289887, + "learning_rate": 8.624139855656311e-06, + "loss": 0.2328, + "step": 73910 + }, + { + "epoch": 0.95333286044997, + "grad_norm": 0.8417935325016472, + "learning_rate": 8.62362290885093e-06, + "loss": 0.2333, + "step": 73920 + }, + { + "epoch": 0.953461828639966, + "grad_norm": 0.7893495041463581, + "learning_rate": 8.623105880448196e-06, + "loss": 0.2209, + "step": 73930 + }, + { + "epoch": 0.9535907968299618, + "grad_norm": 0.7449318797695048, + "learning_rate": 8.622588770459756e-06, + "loss": 0.2182, + "step": 73940 + }, + { + "epoch": 0.9537197650199578, + "grad_norm": 0.8464922426174113, + "learning_rate": 8.622071578897248e-06, + "loss": 0.2301, + "step": 73950 + }, + { + "epoch": 0.9538487332099538, + "grad_norm": 0.7984957457430755, + "learning_rate": 8.621554305772322e-06, + "loss": 0.2334, + "step": 73960 + }, + { + "epoch": 0.9539777013999498, + "grad_norm": 0.8644761516471824, + "learning_rate": 8.621036951096624e-06, + "loss": 0.2181, + "step": 73970 + }, + { + "epoch": 0.9541066695899456, + "grad_norm": 0.7881439795837512, + "learning_rate": 8.620519514881805e-06, + "loss": 0.2266, + "step": 73980 + }, + { + "epoch": 0.9542356377799416, + "grad_norm": 0.8396730990378548, + "learning_rate": 8.620001997139516e-06, + "loss": 0.2208, + "step": 73990 + }, + { + "epoch": 0.9543646059699376, + "grad_norm": 0.8787660353242398, + "learning_rate": 8.619484397881412e-06, + "loss": 0.2329, + "step": 74000 + }, + { + "epoch": 0.9544935741599334, + "grad_norm": 0.9042103286234251, + "learning_rate": 8.618966717119149e-06, + "loss": 0.221, + "step": 74010 + }, + { + "epoch": 0.9546225423499294, + "grad_norm": 0.7861139249408361, + "learning_rate": 8.61844895486438e-06, + "loss": 0.2109, + "step": 74020 + }, + { + "epoch": 0.9547515105399254, + "grad_norm": 0.8474014181428567, + "learning_rate": 8.617931111128768e-06, + "loss": 0.2264, + "step": 74030 + }, + { + "epoch": 0.9548804787299212, + "grad_norm": 0.8814908717531312, + "learning_rate": 8.617413185923973e-06, + "loss": 0.2338, + "step": 74040 + }, + { + "epoch": 0.9550094469199172, + "grad_norm": 0.8634131998574922, + "learning_rate": 8.616895179261657e-06, + "loss": 0.233, + "step": 74050 + }, + { + "epoch": 0.9551384151099132, + "grad_norm": 0.7956291218044311, + "learning_rate": 8.616377091153484e-06, + "loss": 0.225, + "step": 74060 + }, + { + "epoch": 0.9552673832999091, + "grad_norm": 0.9028893303521968, + "learning_rate": 8.615858921611124e-06, + "loss": 0.2348, + "step": 74070 + }, + { + "epoch": 0.955396351489905, + "grad_norm": 0.7783113615615693, + "learning_rate": 8.615340670646242e-06, + "loss": 0.2206, + "step": 74080 + }, + { + "epoch": 0.955525319679901, + "grad_norm": 0.8468784730972031, + "learning_rate": 8.614822338270508e-06, + "loss": 0.2183, + "step": 74090 + }, + { + "epoch": 0.9556542878698969, + "grad_norm": 0.8904391694289827, + "learning_rate": 8.614303924495592e-06, + "loss": 0.2241, + "step": 74100 + }, + { + "epoch": 0.9557832560598928, + "grad_norm": 0.9350391909605222, + "learning_rate": 8.613785429333175e-06, + "loss": 0.2265, + "step": 74110 + }, + { + "epoch": 0.9559122242498888, + "grad_norm": 0.8217986845536673, + "learning_rate": 8.613266852794924e-06, + "loss": 0.2159, + "step": 74120 + }, + { + "epoch": 0.9560411924398847, + "grad_norm": 0.8689450630403446, + "learning_rate": 8.612748194892521e-06, + "loss": 0.2265, + "step": 74130 + }, + { + "epoch": 0.9561701606298807, + "grad_norm": 0.8781723145381097, + "learning_rate": 8.612229455637647e-06, + "loss": 0.2327, + "step": 74140 + }, + { + "epoch": 0.9562991288198766, + "grad_norm": 0.798428450513118, + "learning_rate": 8.61171063504198e-06, + "loss": 0.2194, + "step": 74150 + }, + { + "epoch": 0.9564280970098725, + "grad_norm": 0.7559471350555484, + "learning_rate": 8.6111917331172e-06, + "loss": 0.2182, + "step": 74160 + }, + { + "epoch": 0.9565570651998685, + "grad_norm": 0.8633230726061208, + "learning_rate": 8.610672749874999e-06, + "loss": 0.2269, + "step": 74170 + }, + { + "epoch": 0.9566860333898644, + "grad_norm": 0.8437234643567384, + "learning_rate": 8.610153685327057e-06, + "loss": 0.2293, + "step": 74180 + }, + { + "epoch": 0.9568150015798603, + "grad_norm": 0.8479670559779696, + "learning_rate": 8.609634539485068e-06, + "loss": 0.231, + "step": 74190 + }, + { + "epoch": 0.9569439697698563, + "grad_norm": 0.8385138323764144, + "learning_rate": 8.609115312360716e-06, + "loss": 0.2234, + "step": 74200 + }, + { + "epoch": 0.9570729379598522, + "grad_norm": 0.8134220588345085, + "learning_rate": 8.608596003965698e-06, + "loss": 0.2228, + "step": 74210 + }, + { + "epoch": 0.9572019061498481, + "grad_norm": 0.8873722967103538, + "learning_rate": 8.608076614311706e-06, + "loss": 0.2274, + "step": 74220 + }, + { + "epoch": 0.9573308743398441, + "grad_norm": 0.9378974769509018, + "learning_rate": 8.607557143410436e-06, + "loss": 0.2256, + "step": 74230 + }, + { + "epoch": 0.9574598425298401, + "grad_norm": 0.796825454571825, + "learning_rate": 8.607037591273585e-06, + "loss": 0.2247, + "step": 74240 + }, + { + "epoch": 0.9575888107198359, + "grad_norm": 0.8071128290514296, + "learning_rate": 8.606517957912855e-06, + "loss": 0.229, + "step": 74250 + }, + { + "epoch": 0.9577177789098319, + "grad_norm": 0.837296504813519, + "learning_rate": 8.605998243339941e-06, + "loss": 0.2186, + "step": 74260 + }, + { + "epoch": 0.9578467470998279, + "grad_norm": 0.8744613059307149, + "learning_rate": 8.605478447566554e-06, + "loss": 0.2269, + "step": 74270 + }, + { + "epoch": 0.9579757152898237, + "grad_norm": 0.8539850535883533, + "learning_rate": 8.604958570604392e-06, + "loss": 0.2236, + "step": 74280 + }, + { + "epoch": 0.9581046834798197, + "grad_norm": 0.72769002455212, + "learning_rate": 8.604438612465168e-06, + "loss": 0.2361, + "step": 74290 + }, + { + "epoch": 0.9582336516698157, + "grad_norm": 0.9163130879351489, + "learning_rate": 8.603918573160583e-06, + "loss": 0.2256, + "step": 74300 + }, + { + "epoch": 0.9583626198598115, + "grad_norm": 0.8052938268202708, + "learning_rate": 8.603398452702353e-06, + "loss": 0.2113, + "step": 74310 + }, + { + "epoch": 0.9584915880498075, + "grad_norm": 0.8051014420712741, + "learning_rate": 8.602878251102189e-06, + "loss": 0.2267, + "step": 74320 + }, + { + "epoch": 0.9586205562398035, + "grad_norm": 0.8331935671536713, + "learning_rate": 8.602357968371804e-06, + "loss": 0.2035, + "step": 74330 + }, + { + "epoch": 0.9587495244297994, + "grad_norm": 0.8457506375074859, + "learning_rate": 8.601837604522916e-06, + "loss": 0.2173, + "step": 74340 + }, + { + "epoch": 0.9588784926197953, + "grad_norm": 0.8631504933359246, + "learning_rate": 8.60131715956724e-06, + "loss": 0.2187, + "step": 74350 + }, + { + "epoch": 0.9590074608097913, + "grad_norm": 0.828128697862823, + "learning_rate": 8.600796633516495e-06, + "loss": 0.2304, + "step": 74360 + }, + { + "epoch": 0.9591364289997872, + "grad_norm": 0.8393990572811661, + "learning_rate": 8.600276026382405e-06, + "loss": 0.2231, + "step": 74370 + }, + { + "epoch": 0.9592653971897831, + "grad_norm": 0.8164045586303132, + "learning_rate": 8.599755338176692e-06, + "loss": 0.2265, + "step": 74380 + }, + { + "epoch": 0.9593943653797791, + "grad_norm": 0.7770816570342097, + "learning_rate": 8.59923456891108e-06, + "loss": 0.2333, + "step": 74390 + }, + { + "epoch": 0.959523333569775, + "grad_norm": 0.8827984761073392, + "learning_rate": 8.598713718597298e-06, + "loss": 0.2206, + "step": 74400 + }, + { + "epoch": 0.9596523017597709, + "grad_norm": 0.8833213285008628, + "learning_rate": 8.598192787247071e-06, + "loss": 0.2441, + "step": 74410 + }, + { + "epoch": 0.9597812699497669, + "grad_norm": 0.817723105359526, + "learning_rate": 8.597671774872133e-06, + "loss": 0.2296, + "step": 74420 + }, + { + "epoch": 0.9599102381397628, + "grad_norm": 0.767997365558034, + "learning_rate": 8.597150681484215e-06, + "loss": 0.22, + "step": 74430 + }, + { + "epoch": 0.9600392063297588, + "grad_norm": 0.9189613368413871, + "learning_rate": 8.596629507095052e-06, + "loss": 0.2269, + "step": 74440 + }, + { + "epoch": 0.9601681745197547, + "grad_norm": 0.921485431817907, + "learning_rate": 8.596108251716378e-06, + "loss": 0.2216, + "step": 74450 + }, + { + "epoch": 0.9602971427097506, + "grad_norm": 0.8246171666450773, + "learning_rate": 8.59558691535993e-06, + "loss": 0.2198, + "step": 74460 + }, + { + "epoch": 0.9604261108997466, + "grad_norm": 0.8951513372154514, + "learning_rate": 8.595065498037449e-06, + "loss": 0.2179, + "step": 74470 + }, + { + "epoch": 0.9605550790897425, + "grad_norm": 0.8393953318623253, + "learning_rate": 8.594543999760678e-06, + "loss": 0.2167, + "step": 74480 + }, + { + "epoch": 0.9606840472797384, + "grad_norm": 0.8387657904051329, + "learning_rate": 8.59402242054136e-06, + "loss": 0.2261, + "step": 74490 + }, + { + "epoch": 0.9608130154697344, + "grad_norm": 0.8140291024368227, + "learning_rate": 8.593500760391236e-06, + "loss": 0.2308, + "step": 74500 + }, + { + "epoch": 0.9609419836597304, + "grad_norm": 0.763432234425687, + "learning_rate": 8.592979019322055e-06, + "loss": 0.2196, + "step": 74510 + }, + { + "epoch": 0.9610709518497262, + "grad_norm": 0.8492892334668104, + "learning_rate": 8.592457197345568e-06, + "loss": 0.2252, + "step": 74520 + }, + { + "epoch": 0.9611999200397222, + "grad_norm": 0.8867727891419535, + "learning_rate": 8.591935294473524e-06, + "loss": 0.2301, + "step": 74530 + }, + { + "epoch": 0.9613288882297182, + "grad_norm": 0.87916340596226, + "learning_rate": 8.591413310717673e-06, + "loss": 0.2261, + "step": 74540 + }, + { + "epoch": 0.961457856419714, + "grad_norm": 0.8194180991661032, + "learning_rate": 8.590891246089773e-06, + "loss": 0.2231, + "step": 74550 + }, + { + "epoch": 0.96158682460971, + "grad_norm": 0.7881896466195882, + "learning_rate": 8.590369100601576e-06, + "loss": 0.229, + "step": 74560 + }, + { + "epoch": 0.961715792799706, + "grad_norm": 0.7685501003060703, + "learning_rate": 8.589846874264843e-06, + "loss": 0.2224, + "step": 74570 + }, + { + "epoch": 0.9618447609897018, + "grad_norm": 0.8040891684150989, + "learning_rate": 8.589324567091331e-06, + "loss": 0.2293, + "step": 74580 + }, + { + "epoch": 0.9619737291796978, + "grad_norm": 0.8662725496649152, + "learning_rate": 8.588802179092803e-06, + "loss": 0.2333, + "step": 74590 + }, + { + "epoch": 0.9621026973696938, + "grad_norm": 0.7764476306771857, + "learning_rate": 8.588279710281023e-06, + "loss": 0.2168, + "step": 74600 + }, + { + "epoch": 0.9622316655596898, + "grad_norm": 0.9246913297207846, + "learning_rate": 8.587757160667754e-06, + "loss": 0.2308, + "step": 74610 + }, + { + "epoch": 0.9623606337496856, + "grad_norm": 0.8669404039675437, + "learning_rate": 8.587234530264764e-06, + "loss": 0.2264, + "step": 74620 + }, + { + "epoch": 0.9624896019396816, + "grad_norm": 0.8757604620766336, + "learning_rate": 8.586711819083822e-06, + "loss": 0.2216, + "step": 74630 + }, + { + "epoch": 0.9626185701296776, + "grad_norm": 0.8713502323293578, + "learning_rate": 8.586189027136697e-06, + "loss": 0.2272, + "step": 74640 + }, + { + "epoch": 0.9627475383196734, + "grad_norm": 0.9608974651613799, + "learning_rate": 8.585666154435164e-06, + "loss": 0.2212, + "step": 74650 + }, + { + "epoch": 0.9628765065096694, + "grad_norm": 0.8380799133660789, + "learning_rate": 8.585143200990994e-06, + "loss": 0.2113, + "step": 74660 + }, + { + "epoch": 0.9630054746996654, + "grad_norm": 0.7215160072795637, + "learning_rate": 8.584620166815965e-06, + "loss": 0.2173, + "step": 74670 + }, + { + "epoch": 0.9631344428896612, + "grad_norm": 0.8861237876645721, + "learning_rate": 8.584097051921854e-06, + "loss": 0.2295, + "step": 74680 + }, + { + "epoch": 0.9632634110796572, + "grad_norm": 0.8034552764030799, + "learning_rate": 8.58357385632044e-06, + "loss": 0.2261, + "step": 74690 + }, + { + "epoch": 0.9633923792696532, + "grad_norm": 0.8358622045204533, + "learning_rate": 8.583050580023506e-06, + "loss": 0.2282, + "step": 74700 + }, + { + "epoch": 0.9635213474596491, + "grad_norm": 0.7855743964345747, + "learning_rate": 8.582527223042835e-06, + "loss": 0.2415, + "step": 74710 + }, + { + "epoch": 0.963650315649645, + "grad_norm": 0.8629564214971842, + "learning_rate": 8.58200378539021e-06, + "loss": 0.2281, + "step": 74720 + }, + { + "epoch": 0.963779283839641, + "grad_norm": 0.8503501027368775, + "learning_rate": 8.58148026707742e-06, + "loss": 0.2289, + "step": 74730 + }, + { + "epoch": 0.9639082520296369, + "grad_norm": 0.8019278086061958, + "learning_rate": 8.580956668116254e-06, + "loss": 0.2218, + "step": 74740 + }, + { + "epoch": 0.9640372202196328, + "grad_norm": 0.7738226393571378, + "learning_rate": 8.5804329885185e-06, + "loss": 0.2198, + "step": 74750 + }, + { + "epoch": 0.9641661884096288, + "grad_norm": 0.8287567963391033, + "learning_rate": 8.57990922829595e-06, + "loss": 0.2167, + "step": 74760 + }, + { + "epoch": 0.9642951565996247, + "grad_norm": 0.8028610188224583, + "learning_rate": 8.579385387460404e-06, + "loss": 0.2271, + "step": 74770 + }, + { + "epoch": 0.9644241247896206, + "grad_norm": 0.8254825699699088, + "learning_rate": 8.578861466023652e-06, + "loss": 0.2314, + "step": 74780 + }, + { + "epoch": 0.9645530929796166, + "grad_norm": 0.810034926184237, + "learning_rate": 8.578337463997493e-06, + "loss": 0.2243, + "step": 74790 + }, + { + "epoch": 0.9646820611696125, + "grad_norm": 0.8325922551015524, + "learning_rate": 8.577813381393727e-06, + "loss": 0.2256, + "step": 74800 + }, + { + "epoch": 0.9648110293596085, + "grad_norm": 0.8950815000422392, + "learning_rate": 8.577289218224156e-06, + "loss": 0.2289, + "step": 74810 + }, + { + "epoch": 0.9649399975496044, + "grad_norm": 0.8424064325203511, + "learning_rate": 8.576764974500583e-06, + "loss": 0.2319, + "step": 74820 + }, + { + "epoch": 0.9650689657396003, + "grad_norm": 0.8747924356926013, + "learning_rate": 8.576240650234812e-06, + "loss": 0.2327, + "step": 74830 + }, + { + "epoch": 0.9651979339295963, + "grad_norm": 0.8094872316421166, + "learning_rate": 8.57571624543865e-06, + "loss": 0.2199, + "step": 74840 + }, + { + "epoch": 0.9653269021195922, + "grad_norm": 0.9085460771961679, + "learning_rate": 8.575191760123908e-06, + "loss": 0.2187, + "step": 74850 + }, + { + "epoch": 0.9654558703095881, + "grad_norm": 0.8719142690045072, + "learning_rate": 8.574667194302393e-06, + "loss": 0.2233, + "step": 74860 + }, + { + "epoch": 0.9655848384995841, + "grad_norm": 0.7679144976225473, + "learning_rate": 8.57414254798592e-06, + "loss": 0.2209, + "step": 74870 + }, + { + "epoch": 0.9657138066895801, + "grad_norm": 0.8372893531867596, + "learning_rate": 8.573617821186301e-06, + "loss": 0.2225, + "step": 74880 + }, + { + "epoch": 0.9658427748795759, + "grad_norm": 0.8277738373688811, + "learning_rate": 8.573093013915354e-06, + "loss": 0.2208, + "step": 74890 + }, + { + "epoch": 0.9659717430695719, + "grad_norm": 0.8204612107846704, + "learning_rate": 8.572568126184894e-06, + "loss": 0.2228, + "step": 74900 + }, + { + "epoch": 0.9661007112595679, + "grad_norm": 0.8532997402814966, + "learning_rate": 8.572043158006744e-06, + "loss": 0.2144, + "step": 74910 + }, + { + "epoch": 0.9662296794495637, + "grad_norm": 0.7771450881400493, + "learning_rate": 8.571518109392723e-06, + "loss": 0.2297, + "step": 74920 + }, + { + "epoch": 0.9663586476395597, + "grad_norm": 0.8503693153048134, + "learning_rate": 8.570992980354654e-06, + "loss": 0.2326, + "step": 74930 + }, + { + "epoch": 0.9664876158295557, + "grad_norm": 0.8277342227446982, + "learning_rate": 8.570467770904362e-06, + "loss": 0.2208, + "step": 74940 + }, + { + "epoch": 0.9666165840195515, + "grad_norm": 0.8809986147438345, + "learning_rate": 8.569942481053674e-06, + "loss": 0.2216, + "step": 74950 + }, + { + "epoch": 0.9667455522095475, + "grad_norm": 0.8908528477114918, + "learning_rate": 8.56941711081442e-06, + "loss": 0.2281, + "step": 74960 + }, + { + "epoch": 0.9668745203995435, + "grad_norm": 0.7260469203596149, + "learning_rate": 8.568891660198429e-06, + "loss": 0.2234, + "step": 74970 + }, + { + "epoch": 0.9670034885895394, + "grad_norm": 0.8739000976046247, + "learning_rate": 8.568366129217532e-06, + "loss": 0.2196, + "step": 74980 + }, + { + "epoch": 0.9671324567795353, + "grad_norm": 0.8269612320871016, + "learning_rate": 8.567840517883564e-06, + "loss": 0.2193, + "step": 74990 + }, + { + "epoch": 0.9672614249695313, + "grad_norm": 0.7507034926224541, + "learning_rate": 8.567314826208361e-06, + "loss": 0.2097, + "step": 75000 + }, + { + "epoch": 0.9673903931595272, + "grad_norm": 0.7987348954424753, + "learning_rate": 8.566789054203763e-06, + "loss": 0.2088, + "step": 75010 + }, + { + "epoch": 0.9675193613495231, + "grad_norm": 0.8367530409300681, + "learning_rate": 8.566263201881606e-06, + "loss": 0.2222, + "step": 75020 + }, + { + "epoch": 0.9676483295395191, + "grad_norm": 0.7718079909116197, + "learning_rate": 8.565737269253733e-06, + "loss": 0.2238, + "step": 75030 + }, + { + "epoch": 0.967777297729515, + "grad_norm": 0.9043155272404119, + "learning_rate": 8.565211256331986e-06, + "loss": 0.2366, + "step": 75040 + }, + { + "epoch": 0.9679062659195109, + "grad_norm": 0.8822696321372167, + "learning_rate": 8.56468516312821e-06, + "loss": 0.2301, + "step": 75050 + }, + { + "epoch": 0.9680352341095069, + "grad_norm": 0.9475199813475159, + "learning_rate": 8.564158989654253e-06, + "loss": 0.2322, + "step": 75060 + }, + { + "epoch": 0.9681642022995028, + "grad_norm": 0.8071661259086569, + "learning_rate": 8.56363273592196e-06, + "loss": 0.2307, + "step": 75070 + }, + { + "epoch": 0.9682931704894988, + "grad_norm": 0.8115355617431529, + "learning_rate": 8.563106401943186e-06, + "loss": 0.2177, + "step": 75080 + }, + { + "epoch": 0.9684221386794947, + "grad_norm": 0.8565535761851146, + "learning_rate": 8.56257998772978e-06, + "loss": 0.2286, + "step": 75090 + }, + { + "epoch": 0.9685511068694906, + "grad_norm": 0.852337289943447, + "learning_rate": 8.562053493293598e-06, + "loss": 0.2404, + "step": 75100 + }, + { + "epoch": 0.9686800750594866, + "grad_norm": 0.829336180160406, + "learning_rate": 8.561526918646492e-06, + "loss": 0.2404, + "step": 75110 + }, + { + "epoch": 0.9688090432494825, + "grad_norm": 0.8687438035105519, + "learning_rate": 8.561000263800324e-06, + "loss": 0.2169, + "step": 75120 + }, + { + "epoch": 0.9689380114394784, + "grad_norm": 0.8368148029144397, + "learning_rate": 8.560473528766948e-06, + "loss": 0.2354, + "step": 75130 + }, + { + "epoch": 0.9690669796294744, + "grad_norm": 0.8773469275380548, + "learning_rate": 8.55994671355823e-06, + "loss": 0.2367, + "step": 75140 + }, + { + "epoch": 0.9691959478194704, + "grad_norm": 0.9027696802745345, + "learning_rate": 8.559419818186032e-06, + "loss": 0.2276, + "step": 75150 + }, + { + "epoch": 0.9693249160094662, + "grad_norm": 0.8377164544108928, + "learning_rate": 8.558892842662217e-06, + "loss": 0.2347, + "step": 75160 + }, + { + "epoch": 0.9694538841994622, + "grad_norm": 0.8366579287232835, + "learning_rate": 8.558365786998652e-06, + "loss": 0.2244, + "step": 75170 + }, + { + "epoch": 0.9695828523894582, + "grad_norm": 0.7444674511343413, + "learning_rate": 8.557838651207207e-06, + "loss": 0.2153, + "step": 75180 + }, + { + "epoch": 0.969711820579454, + "grad_norm": 0.906941462415723, + "learning_rate": 8.55731143529975e-06, + "loss": 0.2266, + "step": 75190 + }, + { + "epoch": 0.96984078876945, + "grad_norm": 0.8561273527595301, + "learning_rate": 8.556784139288155e-06, + "loss": 0.2085, + "step": 75200 + }, + { + "epoch": 0.969969756959446, + "grad_norm": 0.8411253553892879, + "learning_rate": 8.556256763184291e-06, + "loss": 0.2154, + "step": 75210 + }, + { + "epoch": 0.9700987251494418, + "grad_norm": 0.8105537134054486, + "learning_rate": 8.555729307000042e-06, + "loss": 0.2392, + "step": 75220 + }, + { + "epoch": 0.9702276933394378, + "grad_norm": 0.9542141999693226, + "learning_rate": 8.555201770747276e-06, + "loss": 0.2568, + "step": 75230 + }, + { + "epoch": 0.9703566615294338, + "grad_norm": 0.7621513135803785, + "learning_rate": 8.55467415443788e-06, + "loss": 0.2268, + "step": 75240 + }, + { + "epoch": 0.9704856297194298, + "grad_norm": 0.7811076065279556, + "learning_rate": 8.55414645808373e-06, + "loss": 0.2089, + "step": 75250 + }, + { + "epoch": 0.9706145979094256, + "grad_norm": 0.829226466567831, + "learning_rate": 8.55361868169671e-06, + "loss": 0.2153, + "step": 75260 + }, + { + "epoch": 0.9707435660994216, + "grad_norm": 0.9284813407221579, + "learning_rate": 8.553090825288706e-06, + "loss": 0.2268, + "step": 75270 + }, + { + "epoch": 0.9708725342894176, + "grad_norm": 0.8392259978323494, + "learning_rate": 8.552562888871602e-06, + "loss": 0.2315, + "step": 75280 + }, + { + "epoch": 0.9710015024794134, + "grad_norm": 0.7737906633477433, + "learning_rate": 8.552034872457287e-06, + "loss": 0.2248, + "step": 75290 + }, + { + "epoch": 0.9711304706694094, + "grad_norm": 0.8925665445048183, + "learning_rate": 8.551506776057652e-06, + "loss": 0.2242, + "step": 75300 + }, + { + "epoch": 0.9712594388594054, + "grad_norm": 0.884154829266925, + "learning_rate": 8.550978599684588e-06, + "loss": 0.2283, + "step": 75310 + }, + { + "epoch": 0.9713884070494012, + "grad_norm": 0.967928936281223, + "learning_rate": 8.55045034334999e-06, + "loss": 0.226, + "step": 75320 + }, + { + "epoch": 0.9715173752393972, + "grad_norm": 0.9154833336729808, + "learning_rate": 8.54992200706575e-06, + "loss": 0.23, + "step": 75330 + }, + { + "epoch": 0.9716463434293932, + "grad_norm": 0.9720483229793684, + "learning_rate": 8.549393590843769e-06, + "loss": 0.2256, + "step": 75340 + }, + { + "epoch": 0.9717753116193891, + "grad_norm": 0.820332245714158, + "learning_rate": 8.548865094695942e-06, + "loss": 0.2229, + "step": 75350 + }, + { + "epoch": 0.971904279809385, + "grad_norm": 0.8294661428376999, + "learning_rate": 8.548336518634173e-06, + "loss": 0.2365, + "step": 75360 + }, + { + "epoch": 0.972033247999381, + "grad_norm": 0.7455470759055562, + "learning_rate": 8.547807862670364e-06, + "loss": 0.2248, + "step": 75370 + }, + { + "epoch": 0.9721622161893769, + "grad_norm": 0.821405086890687, + "learning_rate": 8.547279126816417e-06, + "loss": 0.2121, + "step": 75380 + }, + { + "epoch": 0.9722911843793728, + "grad_norm": 0.774063852525074, + "learning_rate": 8.546750311084242e-06, + "loss": 0.2221, + "step": 75390 + }, + { + "epoch": 0.9724201525693688, + "grad_norm": 0.864301633999558, + "learning_rate": 8.546221415485744e-06, + "loss": 0.2224, + "step": 75400 + }, + { + "epoch": 0.9725491207593647, + "grad_norm": 0.9507677246800774, + "learning_rate": 8.545692440032835e-06, + "loss": 0.2331, + "step": 75410 + }, + { + "epoch": 0.9726780889493606, + "grad_norm": 0.9374632516400525, + "learning_rate": 8.545163384737424e-06, + "loss": 0.2493, + "step": 75420 + }, + { + "epoch": 0.9728070571393566, + "grad_norm": 0.8299387753749412, + "learning_rate": 8.544634249611427e-06, + "loss": 0.232, + "step": 75430 + }, + { + "epoch": 0.9729360253293525, + "grad_norm": 0.8184133220297575, + "learning_rate": 8.544105034666757e-06, + "loss": 0.2325, + "step": 75440 + }, + { + "epoch": 0.9730649935193485, + "grad_norm": 0.8129416541817578, + "learning_rate": 8.54357573991533e-06, + "loss": 0.2242, + "step": 75450 + }, + { + "epoch": 0.9731939617093444, + "grad_norm": 0.8281048781914097, + "learning_rate": 8.543046365369068e-06, + "loss": 0.2232, + "step": 75460 + }, + { + "epoch": 0.9733229298993403, + "grad_norm": 0.8173310909478635, + "learning_rate": 8.54251691103989e-06, + "loss": 0.232, + "step": 75470 + }, + { + "epoch": 0.9734518980893363, + "grad_norm": 0.7858604706428757, + "learning_rate": 8.54198737693972e-06, + "loss": 0.2264, + "step": 75480 + }, + { + "epoch": 0.9735808662793322, + "grad_norm": 0.8972839119043459, + "learning_rate": 8.541457763080478e-06, + "loss": 0.2256, + "step": 75490 + }, + { + "epoch": 0.9737098344693281, + "grad_norm": 0.8143449723410452, + "learning_rate": 8.540928069474094e-06, + "loss": 0.2068, + "step": 75500 + }, + { + "epoch": 0.9738388026593241, + "grad_norm": 0.8158211843700841, + "learning_rate": 8.540398296132493e-06, + "loss": 0.2069, + "step": 75510 + }, + { + "epoch": 0.9739677708493201, + "grad_norm": 0.7672490735790238, + "learning_rate": 8.539868443067607e-06, + "loss": 0.2262, + "step": 75520 + }, + { + "epoch": 0.9740967390393159, + "grad_norm": 0.8237800244648366, + "learning_rate": 8.539338510291366e-06, + "loss": 0.2128, + "step": 75530 + }, + { + "epoch": 0.9742257072293119, + "grad_norm": 0.8491280643709246, + "learning_rate": 8.5388084978157e-06, + "loss": 0.2277, + "step": 75540 + }, + { + "epoch": 0.9743546754193079, + "grad_norm": 0.8078567453832758, + "learning_rate": 8.538278405652551e-06, + "loss": 0.223, + "step": 75550 + }, + { + "epoch": 0.9744836436093037, + "grad_norm": 0.8236674670839836, + "learning_rate": 8.537748233813849e-06, + "loss": 0.2302, + "step": 75560 + }, + { + "epoch": 0.9746126117992997, + "grad_norm": 0.8393810492626166, + "learning_rate": 8.537217982311537e-06, + "loss": 0.2277, + "step": 75570 + }, + { + "epoch": 0.9747415799892957, + "grad_norm": 0.7895881415443015, + "learning_rate": 8.536687651157554e-06, + "loss": 0.2253, + "step": 75580 + }, + { + "epoch": 0.9748705481792915, + "grad_norm": 0.9045992846701406, + "learning_rate": 8.536157240363841e-06, + "loss": 0.234, + "step": 75590 + }, + { + "epoch": 0.9749995163692875, + "grad_norm": 0.7756179794461058, + "learning_rate": 8.535626749942342e-06, + "loss": 0.2198, + "step": 75600 + }, + { + "epoch": 0.9751284845592835, + "grad_norm": 0.9777387219145017, + "learning_rate": 8.535096179905003e-06, + "loss": 0.2265, + "step": 75610 + }, + { + "epoch": 0.9752574527492794, + "grad_norm": 0.8447923652985847, + "learning_rate": 8.534565530263773e-06, + "loss": 0.2175, + "step": 75620 + }, + { + "epoch": 0.9753864209392753, + "grad_norm": 0.7891257040743769, + "learning_rate": 8.534034801030598e-06, + "loss": 0.2343, + "step": 75630 + }, + { + "epoch": 0.9755153891292713, + "grad_norm": 0.8487045311403858, + "learning_rate": 8.533503992217432e-06, + "loss": 0.224, + "step": 75640 + }, + { + "epoch": 0.9756443573192672, + "grad_norm": 0.7346842554110329, + "learning_rate": 8.532973103836228e-06, + "loss": 0.2219, + "step": 75650 + }, + { + "epoch": 0.9757733255092631, + "grad_norm": 0.8704740840815914, + "learning_rate": 8.532442135898938e-06, + "loss": 0.2337, + "step": 75660 + }, + { + "epoch": 0.9759022936992591, + "grad_norm": 0.8783546781759954, + "learning_rate": 8.531911088417521e-06, + "loss": 0.2319, + "step": 75670 + }, + { + "epoch": 0.976031261889255, + "grad_norm": 0.7932265846192786, + "learning_rate": 8.531379961403932e-06, + "loss": 0.2289, + "step": 75680 + }, + { + "epoch": 0.9761602300792509, + "grad_norm": 0.7782025380526892, + "learning_rate": 8.530848754870134e-06, + "loss": 0.2364, + "step": 75690 + }, + { + "epoch": 0.9762891982692469, + "grad_norm": 0.7901942050400528, + "learning_rate": 8.530317468828088e-06, + "loss": 0.2067, + "step": 75700 + }, + { + "epoch": 0.9764181664592428, + "grad_norm": 0.82904537815786, + "learning_rate": 8.529786103289758e-06, + "loss": 0.2214, + "step": 75710 + }, + { + "epoch": 0.9765471346492388, + "grad_norm": 0.8041290370049725, + "learning_rate": 8.52925465826711e-06, + "loss": 0.212, + "step": 75720 + }, + { + "epoch": 0.9766761028392347, + "grad_norm": 0.8644324473413171, + "learning_rate": 8.528723133772108e-06, + "loss": 0.2212, + "step": 75730 + }, + { + "epoch": 0.9768050710292306, + "grad_norm": 0.8284312616546958, + "learning_rate": 8.528191529816723e-06, + "loss": 0.2291, + "step": 75740 + }, + { + "epoch": 0.9769340392192266, + "grad_norm": 0.9180432070737597, + "learning_rate": 8.527659846412927e-06, + "loss": 0.2219, + "step": 75750 + }, + { + "epoch": 0.9770630074092225, + "grad_norm": 0.8668329813402317, + "learning_rate": 8.52712808357269e-06, + "loss": 0.232, + "step": 75760 + }, + { + "epoch": 0.9771919755992184, + "grad_norm": 0.8469117652177607, + "learning_rate": 8.526596241307989e-06, + "loss": 0.2214, + "step": 75770 + }, + { + "epoch": 0.9773209437892144, + "grad_norm": 0.8230194443798377, + "learning_rate": 8.526064319630798e-06, + "loss": 0.227, + "step": 75780 + }, + { + "epoch": 0.9774499119792103, + "grad_norm": 0.8413334308967062, + "learning_rate": 8.525532318553095e-06, + "loss": 0.2148, + "step": 75790 + }, + { + "epoch": 0.9775788801692062, + "grad_norm": 0.7556568441760423, + "learning_rate": 8.52500023808686e-06, + "loss": 0.212, + "step": 75800 + }, + { + "epoch": 0.9777078483592022, + "grad_norm": 0.9219807459001041, + "learning_rate": 8.524468078244078e-06, + "loss": 0.2267, + "step": 75810 + }, + { + "epoch": 0.9778368165491982, + "grad_norm": 0.8065196539584657, + "learning_rate": 8.523935839036725e-06, + "loss": 0.2134, + "step": 75820 + }, + { + "epoch": 0.977965784739194, + "grad_norm": 0.838537994349739, + "learning_rate": 8.523403520476791e-06, + "loss": 0.2247, + "step": 75830 + }, + { + "epoch": 0.97809475292919, + "grad_norm": 0.8593741094140421, + "learning_rate": 8.522871122576264e-06, + "loss": 0.2162, + "step": 75840 + }, + { + "epoch": 0.978223721119186, + "grad_norm": 0.8643579999036165, + "learning_rate": 8.522338645347129e-06, + "loss": 0.2249, + "step": 75850 + }, + { + "epoch": 0.9783526893091818, + "grad_norm": 0.8397219626756035, + "learning_rate": 8.521806088801379e-06, + "loss": 0.1985, + "step": 75860 + }, + { + "epoch": 0.9784816574991778, + "grad_norm": 0.7667748835902288, + "learning_rate": 8.521273452951003e-06, + "loss": 0.234, + "step": 75870 + }, + { + "epoch": 0.9786106256891738, + "grad_norm": 0.8747883818697948, + "learning_rate": 8.520740737808e-06, + "loss": 0.2313, + "step": 75880 + }, + { + "epoch": 0.9787395938791698, + "grad_norm": 0.8804810750738102, + "learning_rate": 8.52020794338436e-06, + "loss": 0.2199, + "step": 75890 + }, + { + "epoch": 0.9788685620691656, + "grad_norm": 0.923067458989886, + "learning_rate": 8.519675069692086e-06, + "loss": 0.2254, + "step": 75900 + }, + { + "epoch": 0.9789975302591616, + "grad_norm": 0.8350482502308529, + "learning_rate": 8.519142116743173e-06, + "loss": 0.2166, + "step": 75910 + }, + { + "epoch": 0.9791264984491576, + "grad_norm": 0.8894153992169792, + "learning_rate": 8.518609084549626e-06, + "loss": 0.221, + "step": 75920 + }, + { + "epoch": 0.9792554666391534, + "grad_norm": 0.8080224485370203, + "learning_rate": 8.518075973123443e-06, + "loss": 0.2265, + "step": 75930 + }, + { + "epoch": 0.9793844348291494, + "grad_norm": 0.8463157653744634, + "learning_rate": 8.517542782476634e-06, + "loss": 0.2282, + "step": 75940 + }, + { + "epoch": 0.9795134030191454, + "grad_norm": 0.8336555780988876, + "learning_rate": 8.517009512621202e-06, + "loss": 0.2208, + "step": 75950 + }, + { + "epoch": 0.9796423712091412, + "grad_norm": 0.8027061350746272, + "learning_rate": 8.516476163569157e-06, + "loss": 0.2278, + "step": 75960 + }, + { + "epoch": 0.9797713393991372, + "grad_norm": 0.9203330195921896, + "learning_rate": 8.515942735332508e-06, + "loss": 0.2349, + "step": 75970 + }, + { + "epoch": 0.9799003075891332, + "grad_norm": 0.8630028686185831, + "learning_rate": 8.515409227923265e-06, + "loss": 0.2139, + "step": 75980 + }, + { + "epoch": 0.9800292757791291, + "grad_norm": 0.8462975953662158, + "learning_rate": 8.514875641353446e-06, + "loss": 0.221, + "step": 75990 + }, + { + "epoch": 0.980158243969125, + "grad_norm": 0.78178297766179, + "learning_rate": 8.514341975635064e-06, + "loss": 0.218, + "step": 76000 + }, + { + "epoch": 0.980287212159121, + "grad_norm": 0.8552551695985962, + "learning_rate": 8.513808230780137e-06, + "loss": 0.2208, + "step": 76010 + }, + { + "epoch": 0.9804161803491169, + "grad_norm": 0.8518050156211286, + "learning_rate": 8.513274406800682e-06, + "loss": 0.2205, + "step": 76020 + }, + { + "epoch": 0.9805451485391128, + "grad_norm": 0.9242994144422947, + "learning_rate": 8.512740503708722e-06, + "loss": 0.2426, + "step": 76030 + }, + { + "epoch": 0.9806741167291088, + "grad_norm": 0.774839892096698, + "learning_rate": 8.512206521516277e-06, + "loss": 0.22, + "step": 76040 + }, + { + "epoch": 0.9808030849191047, + "grad_norm": 0.7964048921396126, + "learning_rate": 8.511672460235374e-06, + "loss": 0.2287, + "step": 76050 + }, + { + "epoch": 0.9809320531091006, + "grad_norm": 0.8436215738966519, + "learning_rate": 8.511138319878038e-06, + "loss": 0.2216, + "step": 76060 + }, + { + "epoch": 0.9810610212990966, + "grad_norm": 0.844476250901673, + "learning_rate": 8.510604100456297e-06, + "loss": 0.2297, + "step": 76070 + }, + { + "epoch": 0.9811899894890925, + "grad_norm": 0.8537458244443138, + "learning_rate": 8.51006980198218e-06, + "loss": 0.2311, + "step": 76080 + }, + { + "epoch": 0.9813189576790885, + "grad_norm": 0.9135253580093757, + "learning_rate": 8.50953542446772e-06, + "loss": 0.225, + "step": 76090 + }, + { + "epoch": 0.9814479258690844, + "grad_norm": 0.7908506201982316, + "learning_rate": 8.509000967924948e-06, + "loss": 0.2114, + "step": 76100 + }, + { + "epoch": 0.9815768940590803, + "grad_norm": 0.9115749882962473, + "learning_rate": 8.5084664323659e-06, + "loss": 0.2281, + "step": 76110 + }, + { + "epoch": 0.9817058622490763, + "grad_norm": 0.9007563486705006, + "learning_rate": 8.507931817802613e-06, + "loss": 0.2295, + "step": 76120 + }, + { + "epoch": 0.9818348304390722, + "grad_norm": 0.7550141996835662, + "learning_rate": 8.507397124247125e-06, + "loss": 0.2195, + "step": 76130 + }, + { + "epoch": 0.9819637986290681, + "grad_norm": 0.7877997747425647, + "learning_rate": 8.506862351711477e-06, + "loss": 0.201, + "step": 76140 + }, + { + "epoch": 0.9820927668190641, + "grad_norm": 0.7431940194417032, + "learning_rate": 8.50632750020771e-06, + "loss": 0.2265, + "step": 76150 + }, + { + "epoch": 0.9822217350090601, + "grad_norm": 0.863593115410519, + "learning_rate": 8.50579256974787e-06, + "loss": 0.229, + "step": 76160 + }, + { + "epoch": 0.9823507031990559, + "grad_norm": 0.9023522786005566, + "learning_rate": 8.505257560344e-06, + "loss": 0.2175, + "step": 76170 + }, + { + "epoch": 0.9824796713890519, + "grad_norm": 0.9548468530322481, + "learning_rate": 8.50472247200815e-06, + "loss": 0.2198, + "step": 76180 + }, + { + "epoch": 0.9826086395790479, + "grad_norm": 0.8347048389882765, + "learning_rate": 8.504187304752366e-06, + "loss": 0.2375, + "step": 76190 + }, + { + "epoch": 0.9827376077690437, + "grad_norm": 0.787124564776593, + "learning_rate": 8.503652058588703e-06, + "loss": 0.2221, + "step": 76200 + }, + { + "epoch": 0.9828665759590397, + "grad_norm": 0.8228839685630216, + "learning_rate": 8.503116733529211e-06, + "loss": 0.2257, + "step": 76210 + }, + { + "epoch": 0.9829955441490357, + "grad_norm": 0.8685066785575107, + "learning_rate": 8.502581329585944e-06, + "loss": 0.2309, + "step": 76220 + }, + { + "epoch": 0.9831245123390315, + "grad_norm": 0.780264647621223, + "learning_rate": 8.50204584677096e-06, + "loss": 0.2138, + "step": 76230 + }, + { + "epoch": 0.9832534805290275, + "grad_norm": 0.9183923648570574, + "learning_rate": 8.50151028509632e-06, + "loss": 0.2311, + "step": 76240 + }, + { + "epoch": 0.9833824487190235, + "grad_norm": 0.8021522214383874, + "learning_rate": 8.500974644574076e-06, + "loss": 0.2339, + "step": 76250 + }, + { + "epoch": 0.9835114169090194, + "grad_norm": 0.8882430190100382, + "learning_rate": 8.500438925216297e-06, + "loss": 0.2171, + "step": 76260 + }, + { + "epoch": 0.9836403850990153, + "grad_norm": 0.8028373469530307, + "learning_rate": 8.499903127035041e-06, + "loss": 0.2249, + "step": 76270 + }, + { + "epoch": 0.9837693532890113, + "grad_norm": 0.8487144127580882, + "learning_rate": 8.499367250042378e-06, + "loss": 0.2273, + "step": 76280 + }, + { + "epoch": 0.9838983214790072, + "grad_norm": 0.8777852196270691, + "learning_rate": 8.498831294250373e-06, + "loss": 0.2238, + "step": 76290 + }, + { + "epoch": 0.9840272896690031, + "grad_norm": 0.795468486085854, + "learning_rate": 8.498295259671092e-06, + "loss": 0.2151, + "step": 76300 + }, + { + "epoch": 0.9841562578589991, + "grad_norm": 0.8018329286358034, + "learning_rate": 8.49775914631661e-06, + "loss": 0.2254, + "step": 76310 + }, + { + "epoch": 0.984285226048995, + "grad_norm": 0.8667762100212394, + "learning_rate": 8.497222954198996e-06, + "loss": 0.2235, + "step": 76320 + }, + { + "epoch": 0.9844141942389909, + "grad_norm": 0.8269980637161594, + "learning_rate": 8.496686683330326e-06, + "loss": 0.2277, + "step": 76330 + }, + { + "epoch": 0.9845431624289869, + "grad_norm": 0.7678873274731047, + "learning_rate": 8.496150333722677e-06, + "loss": 0.2283, + "step": 76340 + }, + { + "epoch": 0.9846721306189828, + "grad_norm": 0.8222017054922073, + "learning_rate": 8.495613905388121e-06, + "loss": 0.2231, + "step": 76350 + }, + { + "epoch": 0.9848010988089788, + "grad_norm": 0.8812396015112891, + "learning_rate": 8.495077398338744e-06, + "loss": 0.2242, + "step": 76360 + }, + { + "epoch": 0.9849300669989747, + "grad_norm": 0.8304633841395411, + "learning_rate": 8.494540812586625e-06, + "loss": 0.2267, + "step": 76370 + }, + { + "epoch": 0.9850590351889706, + "grad_norm": 0.7901609363003974, + "learning_rate": 8.494004148143845e-06, + "loss": 0.2191, + "step": 76380 + }, + { + "epoch": 0.9851880033789666, + "grad_norm": 0.8122752988410282, + "learning_rate": 8.493467405022489e-06, + "loss": 0.2265, + "step": 76390 + }, + { + "epoch": 0.9853169715689625, + "grad_norm": 0.7994170421090186, + "learning_rate": 8.492930583234646e-06, + "loss": 0.2068, + "step": 76400 + }, + { + "epoch": 0.9854459397589584, + "grad_norm": 0.7798553235170034, + "learning_rate": 8.492393682792401e-06, + "loss": 0.2258, + "step": 76410 + }, + { + "epoch": 0.9855749079489544, + "grad_norm": 0.8750172129166276, + "learning_rate": 8.491856703707846e-06, + "loss": 0.2327, + "step": 76420 + }, + { + "epoch": 0.9857038761389503, + "grad_norm": 0.7777378491317872, + "learning_rate": 8.491319645993074e-06, + "loss": 0.2326, + "step": 76430 + }, + { + "epoch": 0.9858328443289462, + "grad_norm": 0.8599742765891584, + "learning_rate": 8.490782509660176e-06, + "loss": 0.2211, + "step": 76440 + }, + { + "epoch": 0.9859618125189422, + "grad_norm": 0.8318340795524408, + "learning_rate": 8.490245294721248e-06, + "loss": 0.2323, + "step": 76450 + }, + { + "epoch": 0.9860907807089382, + "grad_norm": 0.8628914852311669, + "learning_rate": 8.489708001188389e-06, + "loss": 0.2359, + "step": 76460 + }, + { + "epoch": 0.986219748898934, + "grad_norm": 0.7565605603123711, + "learning_rate": 8.489170629073694e-06, + "loss": 0.2189, + "step": 76470 + }, + { + "epoch": 0.98634871708893, + "grad_norm": 0.8408740047374351, + "learning_rate": 8.488633178389268e-06, + "loss": 0.2151, + "step": 76480 + }, + { + "epoch": 0.986477685278926, + "grad_norm": 0.8913505350623023, + "learning_rate": 8.48809564914721e-06, + "loss": 0.2222, + "step": 76490 + }, + { + "epoch": 0.9866066534689218, + "grad_norm": 0.8407730681878207, + "learning_rate": 8.487558041359628e-06, + "loss": 0.2117, + "step": 76500 + }, + { + "epoch": 0.9867356216589178, + "grad_norm": 0.7460894987034026, + "learning_rate": 8.487020355038622e-06, + "loss": 0.2272, + "step": 76510 + }, + { + "epoch": 0.9868645898489138, + "grad_norm": 0.8385511289099864, + "learning_rate": 8.486482590196305e-06, + "loss": 0.2328, + "step": 76520 + }, + { + "epoch": 0.9869935580389098, + "grad_norm": 0.8377974848416074, + "learning_rate": 8.485944746844786e-06, + "loss": 0.2203, + "step": 76530 + }, + { + "epoch": 0.9871225262289056, + "grad_norm": 0.844428080241598, + "learning_rate": 8.485406824996172e-06, + "loss": 0.2236, + "step": 76540 + }, + { + "epoch": 0.9872514944189016, + "grad_norm": 0.8031266352682905, + "learning_rate": 8.48486882466258e-06, + "loss": 0.2185, + "step": 76550 + }, + { + "epoch": 0.9873804626088976, + "grad_norm": 0.9749150063493491, + "learning_rate": 8.484330745856124e-06, + "loss": 0.2324, + "step": 76560 + }, + { + "epoch": 0.9875094307988934, + "grad_norm": 0.8006878777397667, + "learning_rate": 8.483792588588922e-06, + "loss": 0.2255, + "step": 76570 + }, + { + "epoch": 0.9876383989888894, + "grad_norm": 0.7651409498403758, + "learning_rate": 8.483254352873087e-06, + "loss": 0.226, + "step": 76580 + }, + { + "epoch": 0.9877673671788854, + "grad_norm": 0.7965031209722081, + "learning_rate": 8.482716038720747e-06, + "loss": 0.2262, + "step": 76590 + }, + { + "epoch": 0.9878963353688812, + "grad_norm": 0.8359939706308979, + "learning_rate": 8.482177646144017e-06, + "loss": 0.2078, + "step": 76600 + }, + { + "epoch": 0.9880253035588772, + "grad_norm": 0.797763228282135, + "learning_rate": 8.481639175155021e-06, + "loss": 0.2243, + "step": 76610 + }, + { + "epoch": 0.9881542717488732, + "grad_norm": 0.8444665890446597, + "learning_rate": 8.48110062576589e-06, + "loss": 0.2297, + "step": 76620 + }, + { + "epoch": 0.9882832399388691, + "grad_norm": 0.8401481047835094, + "learning_rate": 8.480561997988745e-06, + "loss": 0.23, + "step": 76630 + }, + { + "epoch": 0.988412208128865, + "grad_norm": 0.906088760583839, + "learning_rate": 8.48002329183572e-06, + "loss": 0.2307, + "step": 76640 + }, + { + "epoch": 0.988541176318861, + "grad_norm": 0.9253084902672501, + "learning_rate": 8.479484507318942e-06, + "loss": 0.2264, + "step": 76650 + }, + { + "epoch": 0.9886701445088569, + "grad_norm": 0.8570410681032854, + "learning_rate": 8.478945644450546e-06, + "loss": 0.2178, + "step": 76660 + }, + { + "epoch": 0.9887991126988528, + "grad_norm": 0.8992891808691281, + "learning_rate": 8.478406703242663e-06, + "loss": 0.2188, + "step": 76670 + }, + { + "epoch": 0.9889280808888488, + "grad_norm": 0.8046181714433469, + "learning_rate": 8.477867683707433e-06, + "loss": 0.2212, + "step": 76680 + }, + { + "epoch": 0.9890570490788447, + "grad_norm": 0.9290991171607952, + "learning_rate": 8.47732858585699e-06, + "loss": 0.2224, + "step": 76690 + }, + { + "epoch": 0.9891860172688406, + "grad_norm": 0.7567655101766373, + "learning_rate": 8.476789409703475e-06, + "loss": 0.2186, + "step": 76700 + }, + { + "epoch": 0.9893149854588366, + "grad_norm": 0.8356894870485984, + "learning_rate": 8.476250155259029e-06, + "loss": 0.2319, + "step": 76710 + }, + { + "epoch": 0.9894439536488325, + "grad_norm": 0.9295151523881412, + "learning_rate": 8.475710822535797e-06, + "loss": 0.2255, + "step": 76720 + }, + { + "epoch": 0.9895729218388285, + "grad_norm": 0.8663196017678729, + "learning_rate": 8.475171411545922e-06, + "loss": 0.2268, + "step": 76730 + }, + { + "epoch": 0.9897018900288244, + "grad_norm": 0.7964506577903725, + "learning_rate": 8.474631922301548e-06, + "loss": 0.2326, + "step": 76740 + }, + { + "epoch": 0.9898308582188203, + "grad_norm": 0.8186635177240223, + "learning_rate": 8.47409235481483e-06, + "loss": 0.2229, + "step": 76750 + }, + { + "epoch": 0.9899598264088163, + "grad_norm": 0.9057538623649581, + "learning_rate": 8.473552709097913e-06, + "loss": 0.2275, + "step": 76760 + }, + { + "epoch": 0.9900887945988122, + "grad_norm": 0.8672769126793032, + "learning_rate": 8.473012985162948e-06, + "loss": 0.2271, + "step": 76770 + }, + { + "epoch": 0.9902177627888081, + "grad_norm": 0.8053128039251699, + "learning_rate": 8.47247318302209e-06, + "loss": 0.2202, + "step": 76780 + }, + { + "epoch": 0.9903467309788041, + "grad_norm": 0.7555422789305979, + "learning_rate": 8.471933302687498e-06, + "loss": 0.2219, + "step": 76790 + }, + { + "epoch": 0.9904756991688, + "grad_norm": 0.8431030785543228, + "learning_rate": 8.471393344171324e-06, + "loss": 0.2196, + "step": 76800 + }, + { + "epoch": 0.9906046673587959, + "grad_norm": 0.7709456611129366, + "learning_rate": 8.470853307485728e-06, + "loss": 0.2228, + "step": 76810 + }, + { + "epoch": 0.9907336355487919, + "grad_norm": 0.7672650276592521, + "learning_rate": 8.470313192642873e-06, + "loss": 0.2336, + "step": 76820 + }, + { + "epoch": 0.9908626037387879, + "grad_norm": 0.8467408115989051, + "learning_rate": 8.46977299965492e-06, + "loss": 0.213, + "step": 76830 + }, + { + "epoch": 0.9909915719287837, + "grad_norm": 0.9209986269246057, + "learning_rate": 8.469232728534033e-06, + "loss": 0.2243, + "step": 76840 + }, + { + "epoch": 0.9911205401187797, + "grad_norm": 0.8055966934279188, + "learning_rate": 8.468692379292377e-06, + "loss": 0.2158, + "step": 76850 + }, + { + "epoch": 0.9912495083087757, + "grad_norm": 0.9003083678922761, + "learning_rate": 8.468151951942122e-06, + "loss": 0.2378, + "step": 76860 + }, + { + "epoch": 0.9913784764987715, + "grad_norm": 0.9761867748927607, + "learning_rate": 8.467611446495436e-06, + "loss": 0.2243, + "step": 76870 + }, + { + "epoch": 0.9915074446887675, + "grad_norm": 0.8022965884030742, + "learning_rate": 8.467070862964489e-06, + "loss": 0.2306, + "step": 76880 + }, + { + "epoch": 0.9916364128787635, + "grad_norm": 0.7607041570372007, + "learning_rate": 8.466530201361456e-06, + "loss": 0.2351, + "step": 76890 + }, + { + "epoch": 0.9917653810687594, + "grad_norm": 0.7680511119444667, + "learning_rate": 8.465989461698511e-06, + "loss": 0.212, + "step": 76900 + }, + { + "epoch": 0.9918943492587553, + "grad_norm": 0.8981933003903321, + "learning_rate": 8.46544864398783e-06, + "loss": 0.2278, + "step": 76910 + }, + { + "epoch": 0.9920233174487513, + "grad_norm": 0.8226073562494203, + "learning_rate": 8.464907748241592e-06, + "loss": 0.2276, + "step": 76920 + }, + { + "epoch": 0.9921522856387472, + "grad_norm": 0.8162557203853418, + "learning_rate": 8.464366774471977e-06, + "loss": 0.2159, + "step": 76930 + }, + { + "epoch": 0.9922812538287431, + "grad_norm": 0.8111200921383565, + "learning_rate": 8.463825722691165e-06, + "loss": 0.2208, + "step": 76940 + }, + { + "epoch": 0.9924102220187391, + "grad_norm": 0.8096260134375873, + "learning_rate": 8.463284592911342e-06, + "loss": 0.2116, + "step": 76950 + }, + { + "epoch": 0.992539190208735, + "grad_norm": 0.865918445162513, + "learning_rate": 8.462743385144692e-06, + "loss": 0.2238, + "step": 76960 + }, + { + "epoch": 0.9926681583987309, + "grad_norm": 0.8580457448501082, + "learning_rate": 8.462202099403401e-06, + "loss": 0.2253, + "step": 76970 + }, + { + "epoch": 0.9927971265887269, + "grad_norm": 0.8491979818211063, + "learning_rate": 8.46166073569966e-06, + "loss": 0.2229, + "step": 76980 + }, + { + "epoch": 0.9929260947787228, + "grad_norm": 0.8534026389795417, + "learning_rate": 8.46111929404566e-06, + "loss": 0.2264, + "step": 76990 + }, + { + "epoch": 0.9930550629687188, + "grad_norm": 0.7549460284860364, + "learning_rate": 8.46057777445359e-06, + "loss": 0.2156, + "step": 77000 + }, + { + "epoch": 0.9931840311587147, + "grad_norm": 0.785921378195106, + "learning_rate": 8.460036176935644e-06, + "loss": 0.2265, + "step": 77010 + }, + { + "epoch": 0.9933129993487106, + "grad_norm": 0.8847246179931441, + "learning_rate": 8.45949450150402e-06, + "loss": 0.2253, + "step": 77020 + }, + { + "epoch": 0.9934419675387066, + "grad_norm": 0.8520244113188369, + "learning_rate": 8.458952748170916e-06, + "loss": 0.2281, + "step": 77030 + }, + { + "epoch": 0.9935709357287025, + "grad_norm": 0.8208862713139952, + "learning_rate": 8.45841091694853e-06, + "loss": 0.2197, + "step": 77040 + }, + { + "epoch": 0.9936999039186984, + "grad_norm": 0.7525094532327559, + "learning_rate": 8.457869007849064e-06, + "loss": 0.2295, + "step": 77050 + }, + { + "epoch": 0.9938288721086944, + "grad_norm": 0.8101534537719639, + "learning_rate": 8.45732702088472e-06, + "loss": 0.2244, + "step": 77060 + }, + { + "epoch": 0.9939578402986903, + "grad_norm": 0.9808186059444828, + "learning_rate": 8.456784956067703e-06, + "loss": 0.238, + "step": 77070 + }, + { + "epoch": 0.9940868084886862, + "grad_norm": 0.8337264995812255, + "learning_rate": 8.456242813410219e-06, + "loss": 0.231, + "step": 77080 + }, + { + "epoch": 0.9942157766786822, + "grad_norm": 0.8809596450930105, + "learning_rate": 8.455700592924474e-06, + "loss": 0.2104, + "step": 77090 + }, + { + "epoch": 0.9943447448686782, + "grad_norm": 0.7480023201584302, + "learning_rate": 8.455158294622682e-06, + "loss": 0.213, + "step": 77100 + }, + { + "epoch": 0.994473713058674, + "grad_norm": 0.9018044040143595, + "learning_rate": 8.454615918517052e-06, + "loss": 0.2234, + "step": 77110 + }, + { + "epoch": 0.99460268124867, + "grad_norm": 0.9083742022949427, + "learning_rate": 8.454073464619798e-06, + "loss": 0.2226, + "step": 77120 + }, + { + "epoch": 0.994731649438666, + "grad_norm": 0.7929436962988586, + "learning_rate": 8.453530932943135e-06, + "loss": 0.2281, + "step": 77130 + }, + { + "epoch": 0.9948606176286618, + "grad_norm": 0.7835667264257956, + "learning_rate": 8.452988323499278e-06, + "loss": 0.2244, + "step": 77140 + }, + { + "epoch": 0.9949895858186578, + "grad_norm": 0.9097381773761566, + "learning_rate": 8.452445636300447e-06, + "loss": 0.2127, + "step": 77150 + }, + { + "epoch": 0.9951185540086538, + "grad_norm": 0.7654070342162774, + "learning_rate": 8.451902871358866e-06, + "loss": 0.2097, + "step": 77160 + }, + { + "epoch": 0.9952475221986496, + "grad_norm": 0.7928922550587604, + "learning_rate": 8.451360028686752e-06, + "loss": 0.2212, + "step": 77170 + }, + { + "epoch": 0.9953764903886456, + "grad_norm": 0.8261781115182006, + "learning_rate": 8.45081710829633e-06, + "loss": 0.2207, + "step": 77180 + }, + { + "epoch": 0.9955054585786416, + "grad_norm": 0.8431080988637196, + "learning_rate": 8.450274110199825e-06, + "loss": 0.2276, + "step": 77190 + }, + { + "epoch": 0.9956344267686376, + "grad_norm": 0.8228442363742381, + "learning_rate": 8.449731034409466e-06, + "loss": 0.2155, + "step": 77200 + }, + { + "epoch": 0.9957633949586334, + "grad_norm": 0.8051663028536974, + "learning_rate": 8.449187880937482e-06, + "loss": 0.2252, + "step": 77210 + }, + { + "epoch": 0.9958923631486294, + "grad_norm": 0.8357135525049907, + "learning_rate": 8.448644649796104e-06, + "loss": 0.2233, + "step": 77220 + }, + { + "epoch": 0.9960213313386254, + "grad_norm": 0.8636355585363087, + "learning_rate": 8.448101340997562e-06, + "loss": 0.213, + "step": 77230 + }, + { + "epoch": 0.9961502995286212, + "grad_norm": 0.8404342430348134, + "learning_rate": 8.447557954554092e-06, + "loss": 0.2224, + "step": 77240 + }, + { + "epoch": 0.9962792677186172, + "grad_norm": 0.8782411596110286, + "learning_rate": 8.447014490477932e-06, + "loss": 0.2285, + "step": 77250 + }, + { + "epoch": 0.9964082359086132, + "grad_norm": 0.8189377229016717, + "learning_rate": 8.446470948781317e-06, + "loss": 0.2137, + "step": 77260 + }, + { + "epoch": 0.9965372040986091, + "grad_norm": 0.8044216962198141, + "learning_rate": 8.44592732947649e-06, + "loss": 0.228, + "step": 77270 + }, + { + "epoch": 0.996666172288605, + "grad_norm": 0.8283275198702732, + "learning_rate": 8.445383632575685e-06, + "loss": 0.2328, + "step": 77280 + }, + { + "epoch": 0.996795140478601, + "grad_norm": 0.8621813862410797, + "learning_rate": 8.444839858091153e-06, + "loss": 0.2363, + "step": 77290 + }, + { + "epoch": 0.9969241086685969, + "grad_norm": 0.822822938338931, + "learning_rate": 8.444296006035136e-06, + "loss": 0.2208, + "step": 77300 + }, + { + "epoch": 0.9970530768585928, + "grad_norm": 0.8704048288742825, + "learning_rate": 8.443752076419879e-06, + "loss": 0.2217, + "step": 77310 + }, + { + "epoch": 0.9971820450485888, + "grad_norm": 0.7916012637626118, + "learning_rate": 8.443208069257633e-06, + "loss": 0.2318, + "step": 77320 + }, + { + "epoch": 0.9973110132385847, + "grad_norm": 0.8786310160071711, + "learning_rate": 8.442663984560644e-06, + "loss": 0.2239, + "step": 77330 + }, + { + "epoch": 0.9974399814285806, + "grad_norm": 0.7835210077377807, + "learning_rate": 8.442119822341168e-06, + "loss": 0.225, + "step": 77340 + }, + { + "epoch": 0.9975689496185766, + "grad_norm": 0.8436219838304767, + "learning_rate": 8.441575582611459e-06, + "loss": 0.2356, + "step": 77350 + }, + { + "epoch": 0.9976979178085725, + "grad_norm": 0.9023816641968622, + "learning_rate": 8.441031265383768e-06, + "loss": 0.2239, + "step": 77360 + }, + { + "epoch": 0.9978268859985685, + "grad_norm": 0.8142104579211993, + "learning_rate": 8.440486870670355e-06, + "loss": 0.2232, + "step": 77370 + }, + { + "epoch": 0.9979558541885644, + "grad_norm": 0.7837812414843206, + "learning_rate": 8.439942398483476e-06, + "loss": 0.2131, + "step": 77380 + }, + { + "epoch": 0.9980848223785603, + "grad_norm": 0.7847340833992649, + "learning_rate": 8.439397848835395e-06, + "loss": 0.2289, + "step": 77390 + }, + { + "epoch": 0.9982137905685563, + "grad_norm": 0.7909775888867407, + "learning_rate": 8.438853221738375e-06, + "loss": 0.2199, + "step": 77400 + }, + { + "epoch": 0.9983427587585522, + "grad_norm": 0.8376733074769032, + "learning_rate": 8.438308517204676e-06, + "loss": 0.2271, + "step": 77410 + }, + { + "epoch": 0.9984717269485481, + "grad_norm": 0.7945063147600687, + "learning_rate": 8.437763735246565e-06, + "loss": 0.2229, + "step": 77420 + }, + { + "epoch": 0.9986006951385441, + "grad_norm": 0.8622267510220284, + "learning_rate": 8.437218875876312e-06, + "loss": 0.233, + "step": 77430 + }, + { + "epoch": 0.99872966332854, + "grad_norm": 0.8058777503635036, + "learning_rate": 8.436673939106182e-06, + "loss": 0.2218, + "step": 77440 + }, + { + "epoch": 0.9988586315185359, + "grad_norm": 0.8473249302430779, + "learning_rate": 8.436128924948449e-06, + "loss": 0.21, + "step": 77450 + }, + { + "epoch": 0.9989875997085319, + "grad_norm": 0.7434129211474422, + "learning_rate": 8.435583833415384e-06, + "loss": 0.2226, + "step": 77460 + }, + { + "epoch": 0.9991165678985279, + "grad_norm": 0.815936176120668, + "learning_rate": 8.435038664519265e-06, + "loss": 0.2236, + "step": 77470 + }, + { + "epoch": 0.9992455360885237, + "grad_norm": 0.859735490162778, + "learning_rate": 8.434493418272365e-06, + "loss": 0.223, + "step": 77480 + }, + { + "epoch": 0.9993745042785197, + "grad_norm": 0.7866169885569095, + "learning_rate": 8.43394809468696e-06, + "loss": 0.2304, + "step": 77490 + }, + { + "epoch": 0.9995034724685157, + "grad_norm": 0.8798529401900572, + "learning_rate": 8.433402693775334e-06, + "loss": 0.2139, + "step": 77500 + }, + { + "epoch": 0.9996324406585115, + "grad_norm": 0.8281028077843561, + "learning_rate": 8.432857215549767e-06, + "loss": 0.2332, + "step": 77510 + }, + { + "epoch": 0.9997614088485075, + "grad_norm": 0.9231166422110489, + "learning_rate": 8.432311660022542e-06, + "loss": 0.2211, + "step": 77520 + }, + { + "epoch": 0.9998903770385035, + "grad_norm": 0.8333836192267694, + "learning_rate": 8.431766027205946e-06, + "loss": 0.2341, + "step": 77530 + }, + { + "epoch": 1.0000128968189996, + "grad_norm": 1.1624303723551417, + "learning_rate": 8.43122031711226e-06, + "loss": 0.2125, + "step": 77540 + }, + { + "epoch": 1.0001418650089955, + "grad_norm": 0.9545198113251713, + "learning_rate": 8.430674529753778e-06, + "loss": 0.205, + "step": 77550 + }, + { + "epoch": 1.0002708331989916, + "grad_norm": 0.8049415539539935, + "learning_rate": 8.430128665142787e-06, + "loss": 0.2045, + "step": 77560 + }, + { + "epoch": 1.0003998013889874, + "grad_norm": 0.8121087082883315, + "learning_rate": 8.42958272329158e-06, + "loss": 0.2144, + "step": 77570 + }, + { + "epoch": 1.0005287695789833, + "grad_norm": 0.8097882053263947, + "learning_rate": 8.42903670421245e-06, + "loss": 0.2031, + "step": 77580 + }, + { + "epoch": 1.0006577377689794, + "grad_norm": 0.78420342136761, + "learning_rate": 8.428490607917694e-06, + "loss": 0.2046, + "step": 77590 + }, + { + "epoch": 1.0007867059589752, + "grad_norm": 0.8864109943645925, + "learning_rate": 8.427944434419605e-06, + "loss": 0.2166, + "step": 77600 + }, + { + "epoch": 1.000915674148971, + "grad_norm": 0.856803969091765, + "learning_rate": 8.427398183730489e-06, + "loss": 0.2127, + "step": 77610 + }, + { + "epoch": 1.0010446423389672, + "grad_norm": 0.8595880397355135, + "learning_rate": 8.42685185586264e-06, + "loss": 0.2169, + "step": 77620 + }, + { + "epoch": 1.001173610528963, + "grad_norm": 0.771152701596063, + "learning_rate": 8.426305450828363e-06, + "loss": 0.2003, + "step": 77630 + }, + { + "epoch": 1.001302578718959, + "grad_norm": 0.7984659151101083, + "learning_rate": 8.425758968639961e-06, + "loss": 0.2047, + "step": 77640 + }, + { + "epoch": 1.001431546908955, + "grad_norm": 0.799770745410384, + "learning_rate": 8.425212409309742e-06, + "loss": 0.1976, + "step": 77650 + }, + { + "epoch": 1.0015605150989508, + "grad_norm": 0.9423024940948367, + "learning_rate": 8.42466577285001e-06, + "loss": 0.2176, + "step": 77660 + }, + { + "epoch": 1.0016894832889467, + "grad_norm": 0.7867953708904694, + "learning_rate": 8.424119059273077e-06, + "loss": 0.2145, + "step": 77670 + }, + { + "epoch": 1.0018184514789428, + "grad_norm": 0.7912363574630005, + "learning_rate": 8.423572268591254e-06, + "loss": 0.2173, + "step": 77680 + }, + { + "epoch": 1.0019474196689386, + "grad_norm": 0.7729525220078881, + "learning_rate": 8.423025400816852e-06, + "loss": 0.2062, + "step": 77690 + }, + { + "epoch": 1.0020763878589345, + "grad_norm": 0.7875225845321765, + "learning_rate": 8.422478455962187e-06, + "loss": 0.2031, + "step": 77700 + }, + { + "epoch": 1.0022053560489306, + "grad_norm": 0.8134510037056557, + "learning_rate": 8.421931434039573e-06, + "loss": 0.2165, + "step": 77710 + }, + { + "epoch": 1.0023343242389264, + "grad_norm": 0.7665933913579662, + "learning_rate": 8.421384335061331e-06, + "loss": 0.2049, + "step": 77720 + }, + { + "epoch": 1.0024632924289225, + "grad_norm": 0.9235790902071661, + "learning_rate": 8.42083715903978e-06, + "loss": 0.2023, + "step": 77730 + }, + { + "epoch": 1.0025922606189184, + "grad_norm": 0.8598139377210307, + "learning_rate": 8.42028990598724e-06, + "loss": 0.2087, + "step": 77740 + }, + { + "epoch": 1.0027212288089142, + "grad_norm": 0.8171637162589352, + "learning_rate": 8.419742575916034e-06, + "loss": 0.1956, + "step": 77750 + }, + { + "epoch": 1.0028501969989103, + "grad_norm": 0.8983048262370467, + "learning_rate": 8.419195168838488e-06, + "loss": 0.2097, + "step": 77760 + }, + { + "epoch": 1.0029791651889062, + "grad_norm": 0.9154441247152317, + "learning_rate": 8.418647684766928e-06, + "loss": 0.2127, + "step": 77770 + }, + { + "epoch": 1.003108133378902, + "grad_norm": 0.8336025394293477, + "learning_rate": 8.418100123713682e-06, + "loss": 0.2044, + "step": 77780 + }, + { + "epoch": 1.0032371015688981, + "grad_norm": 0.7942010606179073, + "learning_rate": 8.417552485691081e-06, + "loss": 0.2081, + "step": 77790 + }, + { + "epoch": 1.003366069758894, + "grad_norm": 0.9191018104322092, + "learning_rate": 8.417004770711455e-06, + "loss": 0.2109, + "step": 77800 + }, + { + "epoch": 1.0034950379488898, + "grad_norm": 0.8142615242470649, + "learning_rate": 8.416456978787141e-06, + "loss": 0.2078, + "step": 77810 + }, + { + "epoch": 1.003624006138886, + "grad_norm": 0.7826444150908195, + "learning_rate": 8.415909109930472e-06, + "loss": 0.1986, + "step": 77820 + }, + { + "epoch": 1.0037529743288818, + "grad_norm": 0.8644494623188864, + "learning_rate": 8.415361164153785e-06, + "loss": 0.2021, + "step": 77830 + }, + { + "epoch": 1.0038819425188776, + "grad_norm": 0.8788685368626539, + "learning_rate": 8.414813141469418e-06, + "loss": 0.2101, + "step": 77840 + }, + { + "epoch": 1.0040109107088737, + "grad_norm": 0.7557537627250893, + "learning_rate": 8.414265041889714e-06, + "loss": 0.2108, + "step": 77850 + }, + { + "epoch": 1.0041398788988696, + "grad_norm": 0.8634105117220916, + "learning_rate": 8.413716865427012e-06, + "loss": 0.2205, + "step": 77860 + }, + { + "epoch": 1.0042688470888654, + "grad_norm": 0.8482846670193962, + "learning_rate": 8.413168612093658e-06, + "loss": 0.2132, + "step": 77870 + }, + { + "epoch": 1.0043978152788615, + "grad_norm": 0.7970949049453973, + "learning_rate": 8.412620281901999e-06, + "loss": 0.2137, + "step": 77880 + }, + { + "epoch": 1.0045267834688574, + "grad_norm": 0.8258006294777823, + "learning_rate": 8.412071874864377e-06, + "loss": 0.2054, + "step": 77890 + }, + { + "epoch": 1.0046557516588532, + "grad_norm": 0.840993160393756, + "learning_rate": 8.411523390993148e-06, + "loss": 0.2261, + "step": 77900 + }, + { + "epoch": 1.0047847198488493, + "grad_norm": 0.8782201857533898, + "learning_rate": 8.410974830300659e-06, + "loss": 0.2184, + "step": 77910 + }, + { + "epoch": 1.0049136880388452, + "grad_norm": 0.8639927976610009, + "learning_rate": 8.410426192799262e-06, + "loss": 0.2125, + "step": 77920 + }, + { + "epoch": 1.0050426562288413, + "grad_norm": 0.8866621605734962, + "learning_rate": 8.409877478501313e-06, + "loss": 0.217, + "step": 77930 + }, + { + "epoch": 1.0051716244188371, + "grad_norm": 0.9826784056460259, + "learning_rate": 8.409328687419167e-06, + "loss": 0.2128, + "step": 77940 + }, + { + "epoch": 1.005300592608833, + "grad_norm": 0.8286315107625104, + "learning_rate": 8.408779819565184e-06, + "loss": 0.2046, + "step": 77950 + }, + { + "epoch": 1.005429560798829, + "grad_norm": 0.8101681848748237, + "learning_rate": 8.40823087495172e-06, + "loss": 0.2179, + "step": 77960 + }, + { + "epoch": 1.005558528988825, + "grad_norm": 0.8286605722088284, + "learning_rate": 8.40768185359114e-06, + "loss": 0.209, + "step": 77970 + }, + { + "epoch": 1.0056874971788208, + "grad_norm": 0.845925537571453, + "learning_rate": 8.407132755495803e-06, + "loss": 0.2071, + "step": 77980 + }, + { + "epoch": 1.0058164653688169, + "grad_norm": 0.8667702729414185, + "learning_rate": 8.406583580678076e-06, + "loss": 0.2179, + "step": 77990 + }, + { + "epoch": 1.0059454335588127, + "grad_norm": 0.9166475278087391, + "learning_rate": 8.406034329150325e-06, + "loss": 0.2192, + "step": 78000 + }, + { + "epoch": 1.0060744017488086, + "grad_norm": 0.8182219580258124, + "learning_rate": 8.405485000924918e-06, + "loss": 0.2162, + "step": 78010 + }, + { + "epoch": 1.0062033699388047, + "grad_norm": 0.8964817596237569, + "learning_rate": 8.404935596014226e-06, + "loss": 0.2073, + "step": 78020 + }, + { + "epoch": 1.0063323381288005, + "grad_norm": 0.877391960846784, + "learning_rate": 8.404386114430621e-06, + "loss": 0.2114, + "step": 78030 + }, + { + "epoch": 1.0064613063187964, + "grad_norm": 0.7799006530612334, + "learning_rate": 8.403836556186473e-06, + "loss": 0.197, + "step": 78040 + }, + { + "epoch": 1.0065902745087925, + "grad_norm": 0.8503969449214422, + "learning_rate": 8.403286921294158e-06, + "loss": 0.2094, + "step": 78050 + }, + { + "epoch": 1.0067192426987883, + "grad_norm": 0.835702490064334, + "learning_rate": 8.402737209766054e-06, + "loss": 0.2107, + "step": 78060 + }, + { + "epoch": 1.0068482108887842, + "grad_norm": 0.8837295559743175, + "learning_rate": 8.40218742161454e-06, + "loss": 0.2117, + "step": 78070 + }, + { + "epoch": 1.0069771790787803, + "grad_norm": 0.9002339938154708, + "learning_rate": 8.401637556851996e-06, + "loss": 0.2073, + "step": 78080 + }, + { + "epoch": 1.0071061472687761, + "grad_norm": 0.7318507079316272, + "learning_rate": 8.401087615490803e-06, + "loss": 0.2117, + "step": 78090 + }, + { + "epoch": 1.0072351154587722, + "grad_norm": 0.874471923443689, + "learning_rate": 8.400537597543346e-06, + "loss": 0.2099, + "step": 78100 + }, + { + "epoch": 1.007364083648768, + "grad_norm": 0.747754178119908, + "learning_rate": 8.399987503022009e-06, + "loss": 0.2159, + "step": 78110 + }, + { + "epoch": 1.007493051838764, + "grad_norm": 0.8815042287759356, + "learning_rate": 8.39943733193918e-06, + "loss": 0.2127, + "step": 78120 + }, + { + "epoch": 1.00762202002876, + "grad_norm": 0.8814180660113247, + "learning_rate": 8.398887084307247e-06, + "loss": 0.2103, + "step": 78130 + }, + { + "epoch": 1.0077509882187559, + "grad_norm": 0.8896005645694149, + "learning_rate": 8.398336760138601e-06, + "loss": 0.2017, + "step": 78140 + }, + { + "epoch": 1.0078799564087517, + "grad_norm": 0.7552584444016994, + "learning_rate": 8.397786359445634e-06, + "loss": 0.2011, + "step": 78150 + }, + { + "epoch": 1.0080089245987478, + "grad_norm": 0.8381384373431897, + "learning_rate": 8.397235882240741e-06, + "loss": 0.2116, + "step": 78160 + }, + { + "epoch": 1.0081378927887437, + "grad_norm": 0.8791669422242189, + "learning_rate": 8.396685328536317e-06, + "loss": 0.2182, + "step": 78170 + }, + { + "epoch": 1.0082668609787395, + "grad_norm": 0.8193129388649596, + "learning_rate": 8.39613469834476e-06, + "loss": 0.208, + "step": 78180 + }, + { + "epoch": 1.0083958291687356, + "grad_norm": 0.8594967788089352, + "learning_rate": 8.395583991678468e-06, + "loss": 0.2026, + "step": 78190 + }, + { + "epoch": 1.0085247973587315, + "grad_norm": 0.8851249745127721, + "learning_rate": 8.395033208549842e-06, + "loss": 0.2019, + "step": 78200 + }, + { + "epoch": 1.0086537655487273, + "grad_norm": 0.9494418341430092, + "learning_rate": 8.394482348971287e-06, + "loss": 0.2205, + "step": 78210 + }, + { + "epoch": 1.0087827337387234, + "grad_norm": 0.814098672125541, + "learning_rate": 8.393931412955205e-06, + "loss": 0.2059, + "step": 78220 + }, + { + "epoch": 1.0089117019287193, + "grad_norm": 0.8517614112531342, + "learning_rate": 8.393380400514002e-06, + "loss": 0.2163, + "step": 78230 + }, + { + "epoch": 1.0090406701187151, + "grad_norm": 0.9313154437940317, + "learning_rate": 8.392829311660087e-06, + "loss": 0.2178, + "step": 78240 + }, + { + "epoch": 1.0091696383087112, + "grad_norm": 0.8612958507212982, + "learning_rate": 8.39227814640587e-06, + "loss": 0.2187, + "step": 78250 + }, + { + "epoch": 1.009298606498707, + "grad_norm": 0.8962492009244326, + "learning_rate": 8.39172690476376e-06, + "loss": 0.2142, + "step": 78260 + }, + { + "epoch": 1.009427574688703, + "grad_norm": 0.9453754834454434, + "learning_rate": 8.39117558674617e-06, + "loss": 0.2084, + "step": 78270 + }, + { + "epoch": 1.009556542878699, + "grad_norm": 0.8286500668070778, + "learning_rate": 8.390624192365517e-06, + "loss": 0.2119, + "step": 78280 + }, + { + "epoch": 1.0096855110686949, + "grad_norm": 0.786042171465537, + "learning_rate": 8.390072721634217e-06, + "loss": 0.2127, + "step": 78290 + }, + { + "epoch": 1.009814479258691, + "grad_norm": 0.8713717184990105, + "learning_rate": 8.389521174564687e-06, + "loss": 0.2248, + "step": 78300 + }, + { + "epoch": 1.0099434474486868, + "grad_norm": 0.9200774244038645, + "learning_rate": 8.388969551169347e-06, + "loss": 0.2116, + "step": 78310 + }, + { + "epoch": 1.0100724156386827, + "grad_norm": 0.7865280691339406, + "learning_rate": 8.38841785146062e-06, + "loss": 0.206, + "step": 78320 + }, + { + "epoch": 1.0102013838286787, + "grad_norm": 0.8280637640746819, + "learning_rate": 8.387866075450925e-06, + "loss": 0.2096, + "step": 78330 + }, + { + "epoch": 1.0103303520186746, + "grad_norm": 0.8043827751183957, + "learning_rate": 8.387314223152694e-06, + "loss": 0.2141, + "step": 78340 + }, + { + "epoch": 1.0104593202086705, + "grad_norm": 0.8957303368781666, + "learning_rate": 8.386762294578347e-06, + "loss": 0.2069, + "step": 78350 + }, + { + "epoch": 1.0105882883986665, + "grad_norm": 0.8345685926159331, + "learning_rate": 8.386210289740315e-06, + "loss": 0.1949, + "step": 78360 + }, + { + "epoch": 1.0107172565886624, + "grad_norm": 0.984491410086898, + "learning_rate": 8.385658208651029e-06, + "loss": 0.1992, + "step": 78370 + }, + { + "epoch": 1.0108462247786583, + "grad_norm": 0.8894656748678783, + "learning_rate": 8.385106051322919e-06, + "loss": 0.211, + "step": 78380 + }, + { + "epoch": 1.0109751929686543, + "grad_norm": 0.9289723035463429, + "learning_rate": 8.384553817768422e-06, + "loss": 0.2027, + "step": 78390 + }, + { + "epoch": 1.0111041611586502, + "grad_norm": 0.9325663259612345, + "learning_rate": 8.384001507999971e-06, + "loss": 0.2182, + "step": 78400 + }, + { + "epoch": 1.011233129348646, + "grad_norm": 0.7749996255386328, + "learning_rate": 8.383449122030001e-06, + "loss": 0.2171, + "step": 78410 + }, + { + "epoch": 1.0113620975386421, + "grad_norm": 0.8697741773873358, + "learning_rate": 8.382896659870953e-06, + "loss": 0.2036, + "step": 78420 + }, + { + "epoch": 1.011491065728638, + "grad_norm": 0.8020348758229708, + "learning_rate": 8.382344121535267e-06, + "loss": 0.1995, + "step": 78430 + }, + { + "epoch": 1.0116200339186339, + "grad_norm": 0.8179655098875981, + "learning_rate": 8.381791507035386e-06, + "loss": 0.2178, + "step": 78440 + }, + { + "epoch": 1.01174900210863, + "grad_norm": 0.9009052050246033, + "learning_rate": 8.381238816383754e-06, + "loss": 0.2164, + "step": 78450 + }, + { + "epoch": 1.0118779702986258, + "grad_norm": 0.832504065282521, + "learning_rate": 8.380686049592814e-06, + "loss": 0.2085, + "step": 78460 + }, + { + "epoch": 1.0120069384886219, + "grad_norm": 0.8241139551907952, + "learning_rate": 8.380133206675016e-06, + "loss": 0.2048, + "step": 78470 + }, + { + "epoch": 1.0121359066786177, + "grad_norm": 0.8977186055675422, + "learning_rate": 8.379580287642808e-06, + "loss": 0.2131, + "step": 78480 + }, + { + "epoch": 1.0122648748686136, + "grad_norm": 0.9948455378860795, + "learning_rate": 8.37902729250864e-06, + "loss": 0.2219, + "step": 78490 + }, + { + "epoch": 1.0123938430586097, + "grad_norm": 0.8015543804929698, + "learning_rate": 8.378474221284965e-06, + "loss": 0.2093, + "step": 78500 + }, + { + "epoch": 1.0125228112486055, + "grad_norm": 0.781555823396211, + "learning_rate": 8.377921073984239e-06, + "loss": 0.2113, + "step": 78510 + }, + { + "epoch": 1.0126517794386014, + "grad_norm": 0.8252612967815142, + "learning_rate": 8.377367850618916e-06, + "loss": 0.1927, + "step": 78520 + }, + { + "epoch": 1.0127807476285975, + "grad_norm": 0.8236568335751605, + "learning_rate": 8.376814551201451e-06, + "loss": 0.1959, + "step": 78530 + }, + { + "epoch": 1.0129097158185933, + "grad_norm": 0.8430694187571817, + "learning_rate": 8.37626117574431e-06, + "loss": 0.212, + "step": 78540 + }, + { + "epoch": 1.0130386840085892, + "grad_norm": 0.8929769577807334, + "learning_rate": 8.375707724259949e-06, + "loss": 0.2194, + "step": 78550 + }, + { + "epoch": 1.0131676521985853, + "grad_norm": 0.9140610243436919, + "learning_rate": 8.37515419676083e-06, + "loss": 0.2149, + "step": 78560 + }, + { + "epoch": 1.0132966203885811, + "grad_norm": 0.7681217944596906, + "learning_rate": 8.37460059325942e-06, + "loss": 0.1964, + "step": 78570 + }, + { + "epoch": 1.013425588578577, + "grad_norm": 0.8506333820081499, + "learning_rate": 8.374046913768185e-06, + "loss": 0.2067, + "step": 78580 + }, + { + "epoch": 1.013554556768573, + "grad_norm": 0.8955731711537351, + "learning_rate": 8.373493158299592e-06, + "loss": 0.2159, + "step": 78590 + }, + { + "epoch": 1.013683524958569, + "grad_norm": 0.7975671535043481, + "learning_rate": 8.37293932686611e-06, + "loss": 0.2101, + "step": 78600 + }, + { + "epoch": 1.0138124931485648, + "grad_norm": 0.8851206588060546, + "learning_rate": 8.372385419480213e-06, + "loss": 0.2099, + "step": 78610 + }, + { + "epoch": 1.013941461338561, + "grad_norm": 0.8053362108989319, + "learning_rate": 8.37183143615437e-06, + "loss": 0.2082, + "step": 78620 + }, + { + "epoch": 1.0140704295285567, + "grad_norm": 0.8085883800321735, + "learning_rate": 8.371277376901058e-06, + "loss": 0.2095, + "step": 78630 + }, + { + "epoch": 1.0141993977185528, + "grad_norm": 0.8223553641439655, + "learning_rate": 8.370723241732755e-06, + "loss": 0.21, + "step": 78640 + }, + { + "epoch": 1.0143283659085487, + "grad_norm": 0.8924665532163961, + "learning_rate": 8.370169030661934e-06, + "loss": 0.2109, + "step": 78650 + }, + { + "epoch": 1.0144573340985445, + "grad_norm": 0.87874657162577, + "learning_rate": 8.36961474370108e-06, + "loss": 0.205, + "step": 78660 + }, + { + "epoch": 1.0145863022885406, + "grad_norm": 0.9091945107601165, + "learning_rate": 8.369060380862672e-06, + "loss": 0.2053, + "step": 78670 + }, + { + "epoch": 1.0147152704785365, + "grad_norm": 0.899959285604139, + "learning_rate": 8.368505942159196e-06, + "loss": 0.2075, + "step": 78680 + }, + { + "epoch": 1.0148442386685324, + "grad_norm": 0.9130807431374655, + "learning_rate": 8.367951427603131e-06, + "loss": 0.2206, + "step": 78690 + }, + { + "epoch": 1.0149732068585284, + "grad_norm": 0.897271197797929, + "learning_rate": 8.36739683720697e-06, + "loss": 0.2079, + "step": 78700 + }, + { + "epoch": 1.0151021750485243, + "grad_norm": 0.8293374725425751, + "learning_rate": 8.366842170983198e-06, + "loss": 0.2, + "step": 78710 + }, + { + "epoch": 1.0152311432385202, + "grad_norm": 0.8350298759600312, + "learning_rate": 8.366287428944306e-06, + "loss": 0.2075, + "step": 78720 + }, + { + "epoch": 1.0153601114285162, + "grad_norm": 0.8705611183024861, + "learning_rate": 8.365732611102787e-06, + "loss": 0.2107, + "step": 78730 + }, + { + "epoch": 1.015489079618512, + "grad_norm": 0.9172587301972623, + "learning_rate": 8.36517771747113e-06, + "loss": 0.2204, + "step": 78740 + }, + { + "epoch": 1.015618047808508, + "grad_norm": 0.8420863111894611, + "learning_rate": 8.364622748061835e-06, + "loss": 0.206, + "step": 78750 + }, + { + "epoch": 1.015747015998504, + "grad_norm": 0.8147050433857222, + "learning_rate": 8.364067702887397e-06, + "loss": 0.1983, + "step": 78760 + }, + { + "epoch": 1.0158759841885, + "grad_norm": 0.8809031514464628, + "learning_rate": 8.363512581960315e-06, + "loss": 0.2093, + "step": 78770 + }, + { + "epoch": 1.0160049523784958, + "grad_norm": 0.8277789754066172, + "learning_rate": 8.362957385293089e-06, + "loss": 0.1992, + "step": 78780 + }, + { + "epoch": 1.0161339205684918, + "grad_norm": 0.8557296635272524, + "learning_rate": 8.36240211289822e-06, + "loss": 0.2042, + "step": 78790 + }, + { + "epoch": 1.0162628887584877, + "grad_norm": 0.8371352174474793, + "learning_rate": 8.361846764788216e-06, + "loss": 0.2025, + "step": 78800 + }, + { + "epoch": 1.0163918569484836, + "grad_norm": 0.7779215426939113, + "learning_rate": 8.361291340975576e-06, + "loss": 0.2114, + "step": 78810 + }, + { + "epoch": 1.0165208251384796, + "grad_norm": 0.8663693344231922, + "learning_rate": 8.36073584147281e-06, + "loss": 0.2074, + "step": 78820 + }, + { + "epoch": 1.0166497933284755, + "grad_norm": 0.8480424669704363, + "learning_rate": 8.360180266292427e-06, + "loss": 0.2027, + "step": 78830 + }, + { + "epoch": 1.0167787615184716, + "grad_norm": 0.8556686405172512, + "learning_rate": 8.359624615446942e-06, + "loss": 0.2115, + "step": 78840 + }, + { + "epoch": 1.0169077297084674, + "grad_norm": 0.8172560517510601, + "learning_rate": 8.35906888894886e-06, + "loss": 0.2045, + "step": 78850 + }, + { + "epoch": 1.0170366978984633, + "grad_norm": 0.8446406956522253, + "learning_rate": 8.358513086810698e-06, + "loss": 0.2097, + "step": 78860 + }, + { + "epoch": 1.0171656660884594, + "grad_norm": 0.8056347628318177, + "learning_rate": 8.357957209044971e-06, + "loss": 0.2088, + "step": 78870 + }, + { + "epoch": 1.0172946342784552, + "grad_norm": 0.7856973043221995, + "learning_rate": 8.357401255664196e-06, + "loss": 0.2064, + "step": 78880 + }, + { + "epoch": 1.017423602468451, + "grad_norm": 0.8037885483962115, + "learning_rate": 8.356845226680896e-06, + "loss": 0.2045, + "step": 78890 + }, + { + "epoch": 1.0175525706584472, + "grad_norm": 0.9136425822863098, + "learning_rate": 8.356289122107589e-06, + "loss": 0.2075, + "step": 78900 + }, + { + "epoch": 1.017681538848443, + "grad_norm": 0.806741262543745, + "learning_rate": 8.355732941956793e-06, + "loss": 0.2073, + "step": 78910 + }, + { + "epoch": 1.017810507038439, + "grad_norm": 0.7947968641279122, + "learning_rate": 8.35517668624104e-06, + "loss": 0.2091, + "step": 78920 + }, + { + "epoch": 1.017939475228435, + "grad_norm": 0.8114839449745321, + "learning_rate": 8.35462035497285e-06, + "loss": 0.2088, + "step": 78930 + }, + { + "epoch": 1.0180684434184308, + "grad_norm": 0.8877447765524735, + "learning_rate": 8.354063948164755e-06, + "loss": 0.2031, + "step": 78940 + }, + { + "epoch": 1.0181974116084267, + "grad_norm": 0.9142775628384804, + "learning_rate": 8.353507465829281e-06, + "loss": 0.2173, + "step": 78950 + }, + { + "epoch": 1.0183263797984228, + "grad_norm": 0.9091501064592638, + "learning_rate": 8.352950907978958e-06, + "loss": 0.2132, + "step": 78960 + }, + { + "epoch": 1.0184553479884186, + "grad_norm": 0.9697947035577993, + "learning_rate": 8.352394274626323e-06, + "loss": 0.2307, + "step": 78970 + }, + { + "epoch": 1.0185843161784145, + "grad_norm": 0.8475623213766911, + "learning_rate": 8.351837565783907e-06, + "loss": 0.2128, + "step": 78980 + }, + { + "epoch": 1.0187132843684106, + "grad_norm": 0.7343115324306559, + "learning_rate": 8.351280781464247e-06, + "loss": 0.2002, + "step": 78990 + }, + { + "epoch": 1.0188422525584064, + "grad_norm": 0.850748008454671, + "learning_rate": 8.350723921679881e-06, + "loss": 0.2026, + "step": 79000 + }, + { + "epoch": 1.0189712207484023, + "grad_norm": 0.9142249896193554, + "learning_rate": 8.35016698644335e-06, + "loss": 0.2072, + "step": 79010 + }, + { + "epoch": 1.0191001889383984, + "grad_norm": 0.8737052074282645, + "learning_rate": 8.34960997576719e-06, + "loss": 0.2049, + "step": 79020 + }, + { + "epoch": 1.0192291571283942, + "grad_norm": 0.8478674244869253, + "learning_rate": 8.34905288966395e-06, + "loss": 0.215, + "step": 79030 + }, + { + "epoch": 1.0193581253183903, + "grad_norm": 0.836052982606256, + "learning_rate": 8.34849572814617e-06, + "loss": 0.2087, + "step": 79040 + }, + { + "epoch": 1.0194870935083862, + "grad_norm": 0.9314848424690886, + "learning_rate": 8.3479384912264e-06, + "loss": 0.211, + "step": 79050 + }, + { + "epoch": 1.019616061698382, + "grad_norm": 0.8559297504541363, + "learning_rate": 8.347381178917184e-06, + "loss": 0.1994, + "step": 79060 + }, + { + "epoch": 1.0197450298883781, + "grad_norm": 0.806476291932922, + "learning_rate": 8.346823791231076e-06, + "loss": 0.2117, + "step": 79070 + }, + { + "epoch": 1.019873998078374, + "grad_norm": 0.8464596479605632, + "learning_rate": 8.346266328180624e-06, + "loss": 0.2248, + "step": 79080 + }, + { + "epoch": 1.0200029662683698, + "grad_norm": 0.9269731897061728, + "learning_rate": 8.345708789778381e-06, + "loss": 0.2039, + "step": 79090 + }, + { + "epoch": 1.020131934458366, + "grad_norm": 0.8205912795702954, + "learning_rate": 8.345151176036904e-06, + "loss": 0.2111, + "step": 79100 + }, + { + "epoch": 1.0202609026483618, + "grad_norm": 0.7909011798652444, + "learning_rate": 8.344593486968749e-06, + "loss": 0.2069, + "step": 79110 + }, + { + "epoch": 1.0203898708383576, + "grad_norm": 0.766024802307852, + "learning_rate": 8.344035722586473e-06, + "loss": 0.2068, + "step": 79120 + }, + { + "epoch": 1.0205188390283537, + "grad_norm": 0.8450214748643007, + "learning_rate": 8.343477882902635e-06, + "loss": 0.2024, + "step": 79130 + }, + { + "epoch": 1.0206478072183496, + "grad_norm": 0.9162571463610144, + "learning_rate": 8.342919967929799e-06, + "loss": 0.208, + "step": 79140 + }, + { + "epoch": 1.0207767754083454, + "grad_norm": 0.864723980488135, + "learning_rate": 8.342361977680527e-06, + "loss": 0.2035, + "step": 79150 + }, + { + "epoch": 1.0209057435983415, + "grad_norm": 0.7927096961617794, + "learning_rate": 8.341803912167384e-06, + "loss": 0.2009, + "step": 79160 + }, + { + "epoch": 1.0210347117883374, + "grad_norm": 0.7481508022195057, + "learning_rate": 8.341245771402936e-06, + "loss": 0.1961, + "step": 79170 + }, + { + "epoch": 1.0211636799783332, + "grad_norm": 0.8398255330536518, + "learning_rate": 8.340687555399752e-06, + "loss": 0.2059, + "step": 79180 + }, + { + "epoch": 1.0212926481683293, + "grad_norm": 0.8273499152370544, + "learning_rate": 8.340129264170402e-06, + "loss": 0.2092, + "step": 79190 + }, + { + "epoch": 1.0214216163583252, + "grad_norm": 0.9113053729136844, + "learning_rate": 8.339570897727458e-06, + "loss": 0.212, + "step": 79200 + }, + { + "epoch": 1.0215505845483213, + "grad_norm": 0.8330526844390774, + "learning_rate": 8.339012456083493e-06, + "loss": 0.2078, + "step": 79210 + }, + { + "epoch": 1.0216795527383171, + "grad_norm": 0.8192237292151384, + "learning_rate": 8.33845393925108e-06, + "loss": 0.2078, + "step": 79220 + }, + { + "epoch": 1.021808520928313, + "grad_norm": 0.8944613666180957, + "learning_rate": 8.337895347242802e-06, + "loss": 0.2183, + "step": 79230 + }, + { + "epoch": 1.021937489118309, + "grad_norm": 0.7911402861794427, + "learning_rate": 8.337336680071228e-06, + "loss": 0.2236, + "step": 79240 + }, + { + "epoch": 1.022066457308305, + "grad_norm": 0.9702754510750102, + "learning_rate": 8.336777937748948e-06, + "loss": 0.2142, + "step": 79250 + }, + { + "epoch": 1.0221954254983008, + "grad_norm": 0.8953055431720605, + "learning_rate": 8.336219120288538e-06, + "loss": 0.2016, + "step": 79260 + }, + { + "epoch": 1.0223243936882969, + "grad_norm": 0.8258893021973694, + "learning_rate": 8.335660227702583e-06, + "loss": 0.2127, + "step": 79270 + }, + { + "epoch": 1.0224533618782927, + "grad_norm": 0.8284911956638883, + "learning_rate": 8.335101260003667e-06, + "loss": 0.2042, + "step": 79280 + }, + { + "epoch": 1.0225823300682886, + "grad_norm": 0.897974434570619, + "learning_rate": 8.334542217204377e-06, + "loss": 0.2096, + "step": 79290 + }, + { + "epoch": 1.0227112982582847, + "grad_norm": 0.8838072714144455, + "learning_rate": 8.333983099317304e-06, + "loss": 0.2043, + "step": 79300 + }, + { + "epoch": 1.0228402664482805, + "grad_norm": 0.8710333428067595, + "learning_rate": 8.333423906355039e-06, + "loss": 0.2094, + "step": 79310 + }, + { + "epoch": 1.0229692346382764, + "grad_norm": 0.9168283556711312, + "learning_rate": 8.33286463833017e-06, + "loss": 0.2043, + "step": 79320 + }, + { + "epoch": 1.0230982028282725, + "grad_norm": 0.8839100405847192, + "learning_rate": 8.332305295255292e-06, + "loss": 0.2109, + "step": 79330 + }, + { + "epoch": 1.0232271710182683, + "grad_norm": 0.827621621579239, + "learning_rate": 8.331745877143e-06, + "loss": 0.2072, + "step": 79340 + }, + { + "epoch": 1.0233561392082642, + "grad_norm": 0.8949591187162641, + "learning_rate": 8.331186384005894e-06, + "loss": 0.2048, + "step": 79350 + }, + { + "epoch": 1.0234851073982603, + "grad_norm": 0.8698499940919153, + "learning_rate": 8.330626815856572e-06, + "loss": 0.2074, + "step": 79360 + }, + { + "epoch": 1.0236140755882561, + "grad_norm": 0.9502900134771587, + "learning_rate": 8.330067172707633e-06, + "loss": 0.2229, + "step": 79370 + }, + { + "epoch": 1.0237430437782522, + "grad_norm": 0.7940406909920603, + "learning_rate": 8.329507454571678e-06, + "loss": 0.2059, + "step": 79380 + }, + { + "epoch": 1.023872011968248, + "grad_norm": 0.845620221302736, + "learning_rate": 8.328947661461311e-06, + "loss": 0.2092, + "step": 79390 + }, + { + "epoch": 1.024000980158244, + "grad_norm": 0.8868355974087966, + "learning_rate": 8.32838779338914e-06, + "loss": 0.2099, + "step": 79400 + }, + { + "epoch": 1.02412994834824, + "grad_norm": 0.7688904555606785, + "learning_rate": 8.327827850367773e-06, + "loss": 0.2023, + "step": 79410 + }, + { + "epoch": 1.0242589165382359, + "grad_norm": 0.9249202773472888, + "learning_rate": 8.327267832409815e-06, + "loss": 0.2106, + "step": 79420 + }, + { + "epoch": 1.0243878847282317, + "grad_norm": 0.8497927404857061, + "learning_rate": 8.326707739527878e-06, + "loss": 0.2165, + "step": 79430 + }, + { + "epoch": 1.0245168529182278, + "grad_norm": 0.8394539565524369, + "learning_rate": 8.326147571734575e-06, + "loss": 0.2125, + "step": 79440 + }, + { + "epoch": 1.0246458211082237, + "grad_norm": 0.8240954413905767, + "learning_rate": 8.32558732904252e-06, + "loss": 0.1915, + "step": 79450 + }, + { + "epoch": 1.0247747892982195, + "grad_norm": 0.8057309298085302, + "learning_rate": 8.325027011464328e-06, + "loss": 0.2135, + "step": 79460 + }, + { + "epoch": 1.0249037574882156, + "grad_norm": 0.8005942676390762, + "learning_rate": 8.324466619012618e-06, + "loss": 0.2074, + "step": 79470 + }, + { + "epoch": 1.0250327256782115, + "grad_norm": 0.7810866705945656, + "learning_rate": 8.323906151700008e-06, + "loss": 0.2012, + "step": 79480 + }, + { + "epoch": 1.0251616938682073, + "grad_norm": 0.8159192105598372, + "learning_rate": 8.323345609539115e-06, + "loss": 0.1942, + "step": 79490 + }, + { + "epoch": 1.0252906620582034, + "grad_norm": 0.7594969605578352, + "learning_rate": 8.322784992542568e-06, + "loss": 0.214, + "step": 79500 + }, + { + "epoch": 1.0254196302481993, + "grad_norm": 0.8379372877864105, + "learning_rate": 8.322224300722987e-06, + "loss": 0.2156, + "step": 79510 + }, + { + "epoch": 1.0255485984381951, + "grad_norm": 0.9077093950742787, + "learning_rate": 8.321663534092999e-06, + "loss": 0.2142, + "step": 79520 + }, + { + "epoch": 1.0256775666281912, + "grad_norm": 0.840070778738762, + "learning_rate": 8.321102692665229e-06, + "loss": 0.2022, + "step": 79530 + }, + { + "epoch": 1.025806534818187, + "grad_norm": 0.8983171331880088, + "learning_rate": 8.32054177645231e-06, + "loss": 0.2123, + "step": 79540 + }, + { + "epoch": 1.025935503008183, + "grad_norm": 0.8236806481707505, + "learning_rate": 8.31998078546687e-06, + "loss": 0.2235, + "step": 79550 + }, + { + "epoch": 1.026064471198179, + "grad_norm": 0.9456690586264237, + "learning_rate": 8.319419719721543e-06, + "loss": 0.1945, + "step": 79560 + }, + { + "epoch": 1.0261934393881749, + "grad_norm": 0.8181248855556338, + "learning_rate": 8.31885857922896e-06, + "loss": 0.209, + "step": 79570 + }, + { + "epoch": 1.026322407578171, + "grad_norm": 1.0092393091120155, + "learning_rate": 8.318297364001763e-06, + "loss": 0.2123, + "step": 79580 + }, + { + "epoch": 1.0264513757681668, + "grad_norm": 0.8093906985623455, + "learning_rate": 8.317736074052584e-06, + "loss": 0.1999, + "step": 79590 + }, + { + "epoch": 1.0265803439581627, + "grad_norm": 0.9202435038086667, + "learning_rate": 8.317174709394065e-06, + "loss": 0.2208, + "step": 79600 + }, + { + "epoch": 1.0267093121481587, + "grad_norm": 0.8392257392822589, + "learning_rate": 8.316613270038847e-06, + "loss": 0.1984, + "step": 79610 + }, + { + "epoch": 1.0268382803381546, + "grad_norm": 0.816871113045695, + "learning_rate": 8.31605175599957e-06, + "loss": 0.2052, + "step": 79620 + }, + { + "epoch": 1.0269672485281505, + "grad_norm": 0.918337266638791, + "learning_rate": 8.315490167288883e-06, + "loss": 0.2143, + "step": 79630 + }, + { + "epoch": 1.0270962167181465, + "grad_norm": 0.8130975110183877, + "learning_rate": 8.314928503919427e-06, + "loss": 0.1942, + "step": 79640 + }, + { + "epoch": 1.0272251849081424, + "grad_norm": 0.8386868208043579, + "learning_rate": 8.31436676590385e-06, + "loss": 0.2077, + "step": 79650 + }, + { + "epoch": 1.0273541530981383, + "grad_norm": 0.7706937119154748, + "learning_rate": 8.313804953254803e-06, + "loss": 0.2097, + "step": 79660 + }, + { + "epoch": 1.0274831212881343, + "grad_norm": 0.9041594338546375, + "learning_rate": 8.313243065984937e-06, + "loss": 0.2244, + "step": 79670 + }, + { + "epoch": 1.0276120894781302, + "grad_norm": 0.9534124219645284, + "learning_rate": 8.312681104106906e-06, + "loss": 0.2097, + "step": 79680 + }, + { + "epoch": 1.027741057668126, + "grad_norm": 0.7912296689792307, + "learning_rate": 8.312119067633363e-06, + "loss": 0.2131, + "step": 79690 + }, + { + "epoch": 1.0278700258581221, + "grad_norm": 0.9512618376173781, + "learning_rate": 8.311556956576962e-06, + "loss": 0.2084, + "step": 79700 + }, + { + "epoch": 1.027998994048118, + "grad_norm": 0.8642477118050698, + "learning_rate": 8.310994770950365e-06, + "loss": 0.2112, + "step": 79710 + }, + { + "epoch": 1.0281279622381139, + "grad_norm": 0.864861012500419, + "learning_rate": 8.310432510766226e-06, + "loss": 0.2019, + "step": 79720 + }, + { + "epoch": 1.02825693042811, + "grad_norm": 0.9204756272598519, + "learning_rate": 8.309870176037212e-06, + "loss": 0.1961, + "step": 79730 + }, + { + "epoch": 1.0283858986181058, + "grad_norm": 0.9674716218859367, + "learning_rate": 8.309307766775982e-06, + "loss": 0.2097, + "step": 79740 + }, + { + "epoch": 1.028514866808102, + "grad_norm": 0.7721959876414731, + "learning_rate": 8.3087452829952e-06, + "loss": 0.2091, + "step": 79750 + }, + { + "epoch": 1.0286438349980978, + "grad_norm": 0.7580977203487014, + "learning_rate": 8.308182724707534e-06, + "loss": 0.2011, + "step": 79760 + }, + { + "epoch": 1.0287728031880936, + "grad_norm": 0.8035521473832046, + "learning_rate": 8.307620091925653e-06, + "loss": 0.1954, + "step": 79770 + }, + { + "epoch": 1.0289017713780897, + "grad_norm": 0.886332638500028, + "learning_rate": 8.307057384662221e-06, + "loss": 0.2032, + "step": 79780 + }, + { + "epoch": 1.0290307395680856, + "grad_norm": 0.8265822390221228, + "learning_rate": 8.306494602929915e-06, + "loss": 0.2052, + "step": 79790 + }, + { + "epoch": 1.0291597077580814, + "grad_norm": 0.7811926004109325, + "learning_rate": 8.305931746741405e-06, + "loss": 0.2111, + "step": 79800 + }, + { + "epoch": 1.0292886759480775, + "grad_norm": 0.8966027260645419, + "learning_rate": 8.305368816109368e-06, + "loss": 0.2045, + "step": 79810 + }, + { + "epoch": 1.0294176441380734, + "grad_norm": 0.6882933399611958, + "learning_rate": 8.304805811046474e-06, + "loss": 0.2013, + "step": 79820 + }, + { + "epoch": 1.0295466123280692, + "grad_norm": 0.8679332693673164, + "learning_rate": 8.304242731565408e-06, + "loss": 0.2151, + "step": 79830 + }, + { + "epoch": 1.0296755805180653, + "grad_norm": 0.7763860738318485, + "learning_rate": 8.303679577678848e-06, + "loss": 0.2147, + "step": 79840 + }, + { + "epoch": 1.0298045487080612, + "grad_norm": 0.8180009136143983, + "learning_rate": 8.30311634939947e-06, + "loss": 0.2026, + "step": 79850 + }, + { + "epoch": 1.029933516898057, + "grad_norm": 0.8728721444499243, + "learning_rate": 8.302553046739963e-06, + "loss": 0.2097, + "step": 79860 + }, + { + "epoch": 1.030062485088053, + "grad_norm": 0.883815698390985, + "learning_rate": 8.30198966971301e-06, + "loss": 0.21, + "step": 79870 + }, + { + "epoch": 1.030191453278049, + "grad_norm": 0.8691808106748022, + "learning_rate": 8.301426218331295e-06, + "loss": 0.2238, + "step": 79880 + }, + { + "epoch": 1.0303204214680448, + "grad_norm": 0.8434602106448502, + "learning_rate": 8.300862692607506e-06, + "loss": 0.2078, + "step": 79890 + }, + { + "epoch": 1.030449389658041, + "grad_norm": 0.8977804407345313, + "learning_rate": 8.300299092554336e-06, + "loss": 0.2017, + "step": 79900 + }, + { + "epoch": 1.0305783578480368, + "grad_norm": 0.8683882153764086, + "learning_rate": 8.299735418184471e-06, + "loss": 0.202, + "step": 79910 + }, + { + "epoch": 1.0307073260380326, + "grad_norm": 0.8558227879303404, + "learning_rate": 8.29917166951061e-06, + "loss": 0.2123, + "step": 79920 + }, + { + "epoch": 1.0308362942280287, + "grad_norm": 1.152169453590942, + "learning_rate": 8.298607846545446e-06, + "loss": 0.2087, + "step": 79930 + }, + { + "epoch": 1.0309652624180246, + "grad_norm": 0.8945884526464231, + "learning_rate": 8.298043949301671e-06, + "loss": 0.2135, + "step": 79940 + }, + { + "epoch": 1.0310942306080206, + "grad_norm": 0.832673908804163, + "learning_rate": 8.297479977791987e-06, + "loss": 0.2026, + "step": 79950 + }, + { + "epoch": 1.0312231987980165, + "grad_norm": 0.8883991412478686, + "learning_rate": 8.29691593202909e-06, + "loss": 0.2146, + "step": 79960 + }, + { + "epoch": 1.0313521669880124, + "grad_norm": 0.8344522553138161, + "learning_rate": 8.296351812025687e-06, + "loss": 0.2024, + "step": 79970 + }, + { + "epoch": 1.0314811351780084, + "grad_norm": 0.8634082026681414, + "learning_rate": 8.295787617794475e-06, + "loss": 0.214, + "step": 79980 + }, + { + "epoch": 1.0316101033680043, + "grad_norm": 0.920351973283136, + "learning_rate": 8.295223349348162e-06, + "loss": 0.204, + "step": 79990 + }, + { + "epoch": 1.0317390715580002, + "grad_norm": 0.8502295867681604, + "learning_rate": 8.294659006699455e-06, + "loss": 0.201, + "step": 80000 + }, + { + "epoch": 1.0318680397479962, + "grad_norm": 0.7797872819709044, + "learning_rate": 8.294094589861059e-06, + "loss": 0.2121, + "step": 80010 + }, + { + "epoch": 1.031997007937992, + "grad_norm": 0.9514430604626031, + "learning_rate": 8.293530098845685e-06, + "loss": 0.2022, + "step": 80020 + }, + { + "epoch": 1.032125976127988, + "grad_norm": 0.964894424288574, + "learning_rate": 8.292965533666047e-06, + "loss": 0.2088, + "step": 80030 + }, + { + "epoch": 1.032254944317984, + "grad_norm": 0.7731618657049746, + "learning_rate": 8.292400894334854e-06, + "loss": 0.2046, + "step": 80040 + }, + { + "epoch": 1.03238391250798, + "grad_norm": 0.8561549404540466, + "learning_rate": 8.29183618086482e-06, + "loss": 0.2218, + "step": 80050 + }, + { + "epoch": 1.0325128806979758, + "grad_norm": 0.8493145221628478, + "learning_rate": 8.291271393268665e-06, + "loss": 0.2055, + "step": 80060 + }, + { + "epoch": 1.0326418488879718, + "grad_norm": 0.9006209283802449, + "learning_rate": 8.290706531559105e-06, + "loss": 0.2039, + "step": 80070 + }, + { + "epoch": 1.0327708170779677, + "grad_norm": 0.949115409765986, + "learning_rate": 8.29014159574886e-06, + "loss": 0.2041, + "step": 80080 + }, + { + "epoch": 1.0328997852679636, + "grad_norm": 0.8368792012932774, + "learning_rate": 8.289576585850652e-06, + "loss": 0.2042, + "step": 80090 + }, + { + "epoch": 1.0330287534579596, + "grad_norm": 0.84797779866903, + "learning_rate": 8.289011501877204e-06, + "loss": 0.2176, + "step": 80100 + }, + { + "epoch": 1.0331577216479555, + "grad_norm": 0.9006260890669825, + "learning_rate": 8.288446343841237e-06, + "loss": 0.2217, + "step": 80110 + }, + { + "epoch": 1.0332866898379516, + "grad_norm": 0.6992985816080868, + "learning_rate": 8.287881111755483e-06, + "loss": 0.2014, + "step": 80120 + }, + { + "epoch": 1.0334156580279474, + "grad_norm": 0.804275534072025, + "learning_rate": 8.287315805632666e-06, + "loss": 0.2089, + "step": 80130 + }, + { + "epoch": 1.0335446262179433, + "grad_norm": 0.8713410162207833, + "learning_rate": 8.286750425485519e-06, + "loss": 0.2126, + "step": 80140 + }, + { + "epoch": 1.0336735944079394, + "grad_norm": 0.899644457256077, + "learning_rate": 8.28618497132677e-06, + "loss": 0.2121, + "step": 80150 + }, + { + "epoch": 1.0338025625979352, + "grad_norm": 0.8033982769068351, + "learning_rate": 8.28561944316915e-06, + "loss": 0.2002, + "step": 80160 + }, + { + "epoch": 1.033931530787931, + "grad_norm": 0.9323795590218782, + "learning_rate": 8.285053841025401e-06, + "loss": 0.2091, + "step": 80170 + }, + { + "epoch": 1.0340604989779272, + "grad_norm": 0.9231820133228757, + "learning_rate": 8.284488164908254e-06, + "loss": 0.2112, + "step": 80180 + }, + { + "epoch": 1.034189467167923, + "grad_norm": 0.7952118261624996, + "learning_rate": 8.28392241483045e-06, + "loss": 0.2186, + "step": 80190 + }, + { + "epoch": 1.034318435357919, + "grad_norm": 0.8568141404353463, + "learning_rate": 8.283356590804724e-06, + "loss": 0.2072, + "step": 80200 + }, + { + "epoch": 1.034447403547915, + "grad_norm": 0.8311383828276503, + "learning_rate": 8.28279069284382e-06, + "loss": 0.2155, + "step": 80210 + }, + { + "epoch": 1.0345763717379108, + "grad_norm": 0.8768589573344393, + "learning_rate": 8.282224720960483e-06, + "loss": 0.2157, + "step": 80220 + }, + { + "epoch": 1.0347053399279067, + "grad_norm": 0.9139552830251108, + "learning_rate": 8.281658675167456e-06, + "loss": 0.2166, + "step": 80230 + }, + { + "epoch": 1.0348343081179028, + "grad_norm": 0.8254124827372779, + "learning_rate": 8.281092555477486e-06, + "loss": 0.2022, + "step": 80240 + }, + { + "epoch": 1.0349632763078986, + "grad_norm": 0.8867501768546975, + "learning_rate": 8.280526361903318e-06, + "loss": 0.1994, + "step": 80250 + }, + { + "epoch": 1.0350922444978945, + "grad_norm": 0.7718626116785656, + "learning_rate": 8.279960094457703e-06, + "loss": 0.2119, + "step": 80260 + }, + { + "epoch": 1.0352212126878906, + "grad_norm": 0.8566290031482503, + "learning_rate": 8.279393753153396e-06, + "loss": 0.2168, + "step": 80270 + }, + { + "epoch": 1.0353501808778864, + "grad_norm": 0.8203661601161055, + "learning_rate": 8.278827338003146e-06, + "loss": 0.1997, + "step": 80280 + }, + { + "epoch": 1.0354791490678825, + "grad_norm": 0.8660687208594179, + "learning_rate": 8.278260849019708e-06, + "loss": 0.1976, + "step": 80290 + }, + { + "epoch": 1.0356081172578784, + "grad_norm": 0.7842096729081086, + "learning_rate": 8.277694286215839e-06, + "loss": 0.2099, + "step": 80300 + }, + { + "epoch": 1.0357370854478742, + "grad_norm": 0.8519771731166272, + "learning_rate": 8.277127649604297e-06, + "loss": 0.2075, + "step": 80310 + }, + { + "epoch": 1.0358660536378703, + "grad_norm": 0.8579094477728005, + "learning_rate": 8.276560939197842e-06, + "loss": 0.2101, + "step": 80320 + }, + { + "epoch": 1.0359950218278662, + "grad_norm": 0.7941962451703111, + "learning_rate": 8.275994155009235e-06, + "loss": 0.2205, + "step": 80330 + }, + { + "epoch": 1.036123990017862, + "grad_norm": 0.8894214466592913, + "learning_rate": 8.275427297051239e-06, + "loss": 0.2069, + "step": 80340 + }, + { + "epoch": 1.0362529582078581, + "grad_norm": 0.8066545594473481, + "learning_rate": 8.274860365336616e-06, + "loss": 0.2041, + "step": 80350 + }, + { + "epoch": 1.036381926397854, + "grad_norm": 0.7704770704366891, + "learning_rate": 8.274293359878138e-06, + "loss": 0.2028, + "step": 80360 + }, + { + "epoch": 1.0365108945878498, + "grad_norm": 0.8927846585724387, + "learning_rate": 8.273726280688568e-06, + "loss": 0.2162, + "step": 80370 + }, + { + "epoch": 1.036639862777846, + "grad_norm": 0.8942003887388346, + "learning_rate": 8.273159127780677e-06, + "loss": 0.2117, + "step": 80380 + }, + { + "epoch": 1.0367688309678418, + "grad_norm": 0.8236758310192587, + "learning_rate": 8.272591901167238e-06, + "loss": 0.1984, + "step": 80390 + }, + { + "epoch": 1.0368977991578376, + "grad_norm": 0.868067004573043, + "learning_rate": 8.27202460086102e-06, + "loss": 0.2042, + "step": 80400 + }, + { + "epoch": 1.0370267673478337, + "grad_norm": 0.8993510294234577, + "learning_rate": 8.271457226874801e-06, + "loss": 0.2139, + "step": 80410 + }, + { + "epoch": 1.0371557355378296, + "grad_norm": 0.7936314463839204, + "learning_rate": 8.270889779221355e-06, + "loss": 0.2051, + "step": 80420 + }, + { + "epoch": 1.0372847037278254, + "grad_norm": 0.8121446887731278, + "learning_rate": 8.270322257913461e-06, + "loss": 0.2103, + "step": 80430 + }, + { + "epoch": 1.0374136719178215, + "grad_norm": 0.7695994424866155, + "learning_rate": 8.2697546629639e-06, + "loss": 0.1974, + "step": 80440 + }, + { + "epoch": 1.0375426401078174, + "grad_norm": 0.8184424762255461, + "learning_rate": 8.269186994385452e-06, + "loss": 0.2136, + "step": 80450 + }, + { + "epoch": 1.0376716082978132, + "grad_norm": 0.9171768887194509, + "learning_rate": 8.268619252190898e-06, + "loss": 0.2078, + "step": 80460 + }, + { + "epoch": 1.0378005764878093, + "grad_norm": 0.834716940947191, + "learning_rate": 8.268051436393025e-06, + "loss": 0.2116, + "step": 80470 + }, + { + "epoch": 1.0379295446778052, + "grad_norm": 0.7920279008441236, + "learning_rate": 8.267483547004618e-06, + "loss": 0.1999, + "step": 80480 + }, + { + "epoch": 1.0380585128678013, + "grad_norm": 0.8588347750274795, + "learning_rate": 8.266915584038465e-06, + "loss": 0.2037, + "step": 80490 + }, + { + "epoch": 1.0381874810577971, + "grad_norm": 0.8120820471119009, + "learning_rate": 8.266347547507356e-06, + "loss": 0.1934, + "step": 80500 + }, + { + "epoch": 1.038316449247793, + "grad_norm": 0.8026593739461859, + "learning_rate": 8.265779437424083e-06, + "loss": 0.2051, + "step": 80510 + }, + { + "epoch": 1.038445417437789, + "grad_norm": 0.891755708447309, + "learning_rate": 8.265211253801432e-06, + "loss": 0.203, + "step": 80520 + }, + { + "epoch": 1.038574385627785, + "grad_norm": 0.8979437284916959, + "learning_rate": 8.264642996652209e-06, + "loss": 0.2102, + "step": 80530 + }, + { + "epoch": 1.0387033538177808, + "grad_norm": 0.8218235473824943, + "learning_rate": 8.2640746659892e-06, + "loss": 0.21, + "step": 80540 + }, + { + "epoch": 1.0388323220077769, + "grad_norm": 0.8135558974367577, + "learning_rate": 8.263506261825209e-06, + "loss": 0.199, + "step": 80550 + }, + { + "epoch": 1.0389612901977727, + "grad_norm": 0.7669948940556552, + "learning_rate": 8.262937784173034e-06, + "loss": 0.2068, + "step": 80560 + }, + { + "epoch": 1.0390902583877686, + "grad_norm": 0.8921254422211438, + "learning_rate": 8.262369233045473e-06, + "loss": 0.2093, + "step": 80570 + }, + { + "epoch": 1.0392192265777647, + "grad_norm": 0.8386715727832458, + "learning_rate": 8.261800608455334e-06, + "loss": 0.2009, + "step": 80580 + }, + { + "epoch": 1.0393481947677605, + "grad_norm": 0.9192909447702161, + "learning_rate": 8.261231910415418e-06, + "loss": 0.2095, + "step": 80590 + }, + { + "epoch": 1.0394771629577564, + "grad_norm": 0.8181895705345423, + "learning_rate": 8.260663138938529e-06, + "loss": 0.2126, + "step": 80600 + }, + { + "epoch": 1.0396061311477525, + "grad_norm": 0.8068977104424159, + "learning_rate": 8.26009429403748e-06, + "loss": 0.2135, + "step": 80610 + }, + { + "epoch": 1.0397350993377483, + "grad_norm": 0.8702234945839037, + "learning_rate": 8.259525375725075e-06, + "loss": 0.2098, + "step": 80620 + }, + { + "epoch": 1.0398640675277442, + "grad_norm": 0.8714794183794218, + "learning_rate": 8.25895638401413e-06, + "loss": 0.2178, + "step": 80630 + }, + { + "epoch": 1.0399930357177403, + "grad_norm": 0.8061126036522213, + "learning_rate": 8.258387318917454e-06, + "loss": 0.1959, + "step": 80640 + }, + { + "epoch": 1.0401220039077361, + "grad_norm": 0.7622761744223959, + "learning_rate": 8.25781818044786e-06, + "loss": 0.2053, + "step": 80650 + }, + { + "epoch": 1.040250972097732, + "grad_norm": 0.8636026964601579, + "learning_rate": 8.25724896861817e-06, + "loss": 0.2186, + "step": 80660 + }, + { + "epoch": 1.040379940287728, + "grad_norm": 0.8391301340264166, + "learning_rate": 8.256679683441198e-06, + "loss": 0.1979, + "step": 80670 + }, + { + "epoch": 1.040508908477724, + "grad_norm": 0.97763203867014, + "learning_rate": 8.25611032492976e-06, + "loss": 0.2111, + "step": 80680 + }, + { + "epoch": 1.04063787666772, + "grad_norm": 0.8807106279331913, + "learning_rate": 8.255540893096683e-06, + "loss": 0.218, + "step": 80690 + }, + { + "epoch": 1.0407668448577159, + "grad_norm": 0.8318086824261551, + "learning_rate": 8.254971387954787e-06, + "loss": 0.2093, + "step": 80700 + }, + { + "epoch": 1.0408958130477117, + "grad_norm": 0.8169467720604736, + "learning_rate": 8.254401809516894e-06, + "loss": 0.2137, + "step": 80710 + }, + { + "epoch": 1.0410247812377078, + "grad_norm": 0.8363159081642785, + "learning_rate": 8.253832157795834e-06, + "loss": 0.201, + "step": 80720 + }, + { + "epoch": 1.0411537494277037, + "grad_norm": 0.8118299342898061, + "learning_rate": 8.25326243280443e-06, + "loss": 0.2089, + "step": 80730 + }, + { + "epoch": 1.0412827176176995, + "grad_norm": 0.8349353931533136, + "learning_rate": 8.252692634555516e-06, + "loss": 0.2133, + "step": 80740 + }, + { + "epoch": 1.0414116858076956, + "grad_norm": 0.854204886234999, + "learning_rate": 8.25212276306192e-06, + "loss": 0.2103, + "step": 80750 + }, + { + "epoch": 1.0415406539976915, + "grad_norm": 0.8115917428552879, + "learning_rate": 8.251552818336473e-06, + "loss": 0.2168, + "step": 80760 + }, + { + "epoch": 1.0416696221876873, + "grad_norm": 0.8358466408781916, + "learning_rate": 8.250982800392012e-06, + "loss": 0.1995, + "step": 80770 + }, + { + "epoch": 1.0417985903776834, + "grad_norm": 0.8696626612447712, + "learning_rate": 8.25041270924137e-06, + "loss": 0.2059, + "step": 80780 + }, + { + "epoch": 1.0419275585676793, + "grad_norm": 0.8369402241808032, + "learning_rate": 8.249842544897388e-06, + "loss": 0.2204, + "step": 80790 + }, + { + "epoch": 1.0420565267576751, + "grad_norm": 0.8957720407407596, + "learning_rate": 8.249272307372904e-06, + "loss": 0.2078, + "step": 80800 + }, + { + "epoch": 1.0421854949476712, + "grad_norm": 0.9135580231907008, + "learning_rate": 8.248701996680757e-06, + "loss": 0.2224, + "step": 80810 + }, + { + "epoch": 1.042314463137667, + "grad_norm": 0.8752619039595004, + "learning_rate": 8.24813161283379e-06, + "loss": 0.2085, + "step": 80820 + }, + { + "epoch": 1.042443431327663, + "grad_norm": 0.8040495297751886, + "learning_rate": 8.247561155844847e-06, + "loss": 0.2147, + "step": 80830 + }, + { + "epoch": 1.042572399517659, + "grad_norm": 0.8458271445365418, + "learning_rate": 8.246990625726774e-06, + "loss": 0.2114, + "step": 80840 + }, + { + "epoch": 1.0427013677076549, + "grad_norm": 0.8319021355203478, + "learning_rate": 8.246420022492419e-06, + "loss": 0.2125, + "step": 80850 + }, + { + "epoch": 1.042830335897651, + "grad_norm": 0.7825689258618738, + "learning_rate": 8.24584934615463e-06, + "loss": 0.2167, + "step": 80860 + }, + { + "epoch": 1.0429593040876468, + "grad_norm": 0.870197365879383, + "learning_rate": 8.245278596726257e-06, + "loss": 0.2284, + "step": 80870 + }, + { + "epoch": 1.0430882722776427, + "grad_norm": 0.8939622693569879, + "learning_rate": 8.24470777422015e-06, + "loss": 0.2122, + "step": 80880 + }, + { + "epoch": 1.0432172404676388, + "grad_norm": 0.8967005836510604, + "learning_rate": 8.24413687864917e-06, + "loss": 0.195, + "step": 80890 + }, + { + "epoch": 1.0433462086576346, + "grad_norm": 0.7287101708250865, + "learning_rate": 8.243565910026169e-06, + "loss": 0.21, + "step": 80900 + }, + { + "epoch": 1.0434751768476305, + "grad_norm": 0.7907894207498893, + "learning_rate": 8.242994868364e-06, + "loss": 0.2095, + "step": 80910 + }, + { + "epoch": 1.0436041450376266, + "grad_norm": 0.8185659389797582, + "learning_rate": 8.242423753675528e-06, + "loss": 0.2143, + "step": 80920 + }, + { + "epoch": 1.0437331132276224, + "grad_norm": 0.8891933383138344, + "learning_rate": 8.241852565973609e-06, + "loss": 0.2103, + "step": 80930 + }, + { + "epoch": 1.0438620814176183, + "grad_norm": 0.8739999026203619, + "learning_rate": 8.241281305271106e-06, + "loss": 0.2067, + "step": 80940 + }, + { + "epoch": 1.0439910496076144, + "grad_norm": 0.8129308147350003, + "learning_rate": 8.240709971580885e-06, + "loss": 0.2177, + "step": 80950 + }, + { + "epoch": 1.0441200177976102, + "grad_norm": 0.806461373731522, + "learning_rate": 8.24013856491581e-06, + "loss": 0.2021, + "step": 80960 + }, + { + "epoch": 1.044248985987606, + "grad_norm": 0.8474849058343334, + "learning_rate": 8.239567085288748e-06, + "loss": 0.2056, + "step": 80970 + }, + { + "epoch": 1.0443779541776022, + "grad_norm": 0.8939511731913831, + "learning_rate": 8.238995532712568e-06, + "loss": 0.2117, + "step": 80980 + }, + { + "epoch": 1.044506922367598, + "grad_norm": 0.9627892035578889, + "learning_rate": 8.238423907200139e-06, + "loss": 0.2109, + "step": 80990 + }, + { + "epoch": 1.0446358905575939, + "grad_norm": 0.9931637079994972, + "learning_rate": 8.237852208764334e-06, + "loss": 0.2036, + "step": 81000 + }, + { + "epoch": 1.04476485874759, + "grad_norm": 0.8495453058215238, + "learning_rate": 8.237280437418028e-06, + "loss": 0.2138, + "step": 81010 + }, + { + "epoch": 1.0448938269375858, + "grad_norm": 0.8767089270542877, + "learning_rate": 8.236708593174094e-06, + "loss": 0.21, + "step": 81020 + }, + { + "epoch": 1.045022795127582, + "grad_norm": 0.8696321071908963, + "learning_rate": 8.23613667604541e-06, + "loss": 0.2085, + "step": 81030 + }, + { + "epoch": 1.0451517633175778, + "grad_norm": 0.8897612632927334, + "learning_rate": 8.235564686044852e-06, + "loss": 0.2023, + "step": 81040 + }, + { + "epoch": 1.0452807315075736, + "grad_norm": 0.8912308834565235, + "learning_rate": 8.234992623185305e-06, + "loss": 0.2103, + "step": 81050 + }, + { + "epoch": 1.0454096996975697, + "grad_norm": 0.795805137942354, + "learning_rate": 8.234420487479647e-06, + "loss": 0.1989, + "step": 81060 + }, + { + "epoch": 1.0455386678875656, + "grad_norm": 0.8994932316566762, + "learning_rate": 8.233848278940764e-06, + "loss": 0.2056, + "step": 81070 + }, + { + "epoch": 1.0456676360775614, + "grad_norm": 0.8904491443325285, + "learning_rate": 8.233275997581539e-06, + "loss": 0.201, + "step": 81080 + }, + { + "epoch": 1.0457966042675575, + "grad_norm": 0.8985066984612897, + "learning_rate": 8.23270364341486e-06, + "loss": 0.215, + "step": 81090 + }, + { + "epoch": 1.0459255724575534, + "grad_norm": 0.8369288048432788, + "learning_rate": 8.232131216453613e-06, + "loss": 0.205, + "step": 81100 + }, + { + "epoch": 1.0460545406475492, + "grad_norm": 0.8274692480257692, + "learning_rate": 8.23155871671069e-06, + "loss": 0.2026, + "step": 81110 + }, + { + "epoch": 1.0461835088375453, + "grad_norm": 0.8929617466407173, + "learning_rate": 8.230986144198983e-06, + "loss": 0.2218, + "step": 81120 + }, + { + "epoch": 1.0463124770275412, + "grad_norm": 0.8748925771234705, + "learning_rate": 8.230413498931387e-06, + "loss": 0.2118, + "step": 81130 + }, + { + "epoch": 1.046441445217537, + "grad_norm": 0.845250654199071, + "learning_rate": 8.229840780920792e-06, + "loss": 0.2038, + "step": 81140 + }, + { + "epoch": 1.046570413407533, + "grad_norm": 0.8069235478213151, + "learning_rate": 8.229267990180098e-06, + "loss": 0.2126, + "step": 81150 + }, + { + "epoch": 1.046699381597529, + "grad_norm": 0.8269995984652725, + "learning_rate": 8.228695126722201e-06, + "loss": 0.2068, + "step": 81160 + }, + { + "epoch": 1.0468283497875248, + "grad_norm": 0.9409928673680615, + "learning_rate": 8.228122190560004e-06, + "loss": 0.2104, + "step": 81170 + }, + { + "epoch": 1.046957317977521, + "grad_norm": 0.8870998799162613, + "learning_rate": 8.227549181706407e-06, + "loss": 0.2108, + "step": 81180 + }, + { + "epoch": 1.0470862861675168, + "grad_norm": 0.8561307590664273, + "learning_rate": 8.226976100174313e-06, + "loss": 0.1998, + "step": 81190 + }, + { + "epoch": 1.0472152543575126, + "grad_norm": 0.9590146190925122, + "learning_rate": 8.226402945976626e-06, + "loss": 0.222, + "step": 81200 + }, + { + "epoch": 1.0473442225475087, + "grad_norm": 0.9847344845116541, + "learning_rate": 8.225829719126252e-06, + "loss": 0.2106, + "step": 81210 + }, + { + "epoch": 1.0474731907375046, + "grad_norm": 0.8598476041598753, + "learning_rate": 8.225256419636103e-06, + "loss": 0.2076, + "step": 81220 + }, + { + "epoch": 1.0476021589275006, + "grad_norm": 0.9075608850014015, + "learning_rate": 8.224683047519082e-06, + "loss": 0.2047, + "step": 81230 + }, + { + "epoch": 1.0477311271174965, + "grad_norm": 0.8081718954623635, + "learning_rate": 8.224109602788106e-06, + "loss": 0.2066, + "step": 81240 + }, + { + "epoch": 1.0478600953074924, + "grad_norm": 0.8714210170738124, + "learning_rate": 8.223536085456085e-06, + "loss": 0.2016, + "step": 81250 + }, + { + "epoch": 1.0479890634974884, + "grad_norm": 0.8417012984446037, + "learning_rate": 8.222962495535936e-06, + "loss": 0.2002, + "step": 81260 + }, + { + "epoch": 1.0481180316874843, + "grad_norm": 0.8897944778337084, + "learning_rate": 8.222388833040573e-06, + "loss": 0.2041, + "step": 81270 + }, + { + "epoch": 1.0482469998774802, + "grad_norm": 0.784070045449101, + "learning_rate": 8.221815097982912e-06, + "loss": 0.2089, + "step": 81280 + }, + { + "epoch": 1.0483759680674762, + "grad_norm": 0.9427218679469388, + "learning_rate": 8.221241290375878e-06, + "loss": 0.2076, + "step": 81290 + }, + { + "epoch": 1.048504936257472, + "grad_norm": 0.8535786345912775, + "learning_rate": 8.220667410232389e-06, + "loss": 0.2026, + "step": 81300 + }, + { + "epoch": 1.048633904447468, + "grad_norm": 0.8592063067054274, + "learning_rate": 8.220093457565365e-06, + "loss": 0.2068, + "step": 81310 + }, + { + "epoch": 1.048762872637464, + "grad_norm": 0.940747838962526, + "learning_rate": 8.219519432387736e-06, + "loss": 0.2206, + "step": 81320 + }, + { + "epoch": 1.04889184082746, + "grad_norm": 0.7987539426765999, + "learning_rate": 8.218945334712425e-06, + "loss": 0.2104, + "step": 81330 + }, + { + "epoch": 1.0490208090174558, + "grad_norm": 0.9126592197698075, + "learning_rate": 8.218371164552359e-06, + "loss": 0.1957, + "step": 81340 + }, + { + "epoch": 1.0491497772074518, + "grad_norm": 0.9524005398992452, + "learning_rate": 8.217796921920467e-06, + "loss": 0.2115, + "step": 81350 + }, + { + "epoch": 1.0492787453974477, + "grad_norm": 0.8569671253963922, + "learning_rate": 8.21722260682968e-06, + "loss": 0.213, + "step": 81360 + }, + { + "epoch": 1.0494077135874436, + "grad_norm": 0.7868867286221619, + "learning_rate": 8.216648219292934e-06, + "loss": 0.1902, + "step": 81370 + }, + { + "epoch": 1.0495366817774396, + "grad_norm": 0.8791375972510002, + "learning_rate": 8.21607375932316e-06, + "loss": 0.2157, + "step": 81380 + }, + { + "epoch": 1.0496656499674355, + "grad_norm": 0.8818548691247585, + "learning_rate": 8.215499226933295e-06, + "loss": 0.217, + "step": 81390 + }, + { + "epoch": 1.0497946181574314, + "grad_norm": 0.8407856195217914, + "learning_rate": 8.214924622136273e-06, + "loss": 0.2013, + "step": 81400 + }, + { + "epoch": 1.0499235863474274, + "grad_norm": 0.8605900767718415, + "learning_rate": 8.214349944945037e-06, + "loss": 0.2149, + "step": 81410 + }, + { + "epoch": 1.0500525545374233, + "grad_norm": 0.8676053018448224, + "learning_rate": 8.21377519537253e-06, + "loss": 0.2089, + "step": 81420 + }, + { + "epoch": 1.0501815227274194, + "grad_norm": 0.8859112876946942, + "learning_rate": 8.213200373431686e-06, + "loss": 0.2213, + "step": 81430 + }, + { + "epoch": 1.0503104909174152, + "grad_norm": 0.8711144818259717, + "learning_rate": 8.212625479135457e-06, + "loss": 0.2137, + "step": 81440 + }, + { + "epoch": 1.050439459107411, + "grad_norm": 0.9229440322501336, + "learning_rate": 8.212050512496784e-06, + "loss": 0.2055, + "step": 81450 + }, + { + "epoch": 1.0505684272974072, + "grad_norm": 0.8213989036564906, + "learning_rate": 8.211475473528613e-06, + "loss": 0.2061, + "step": 81460 + }, + { + "epoch": 1.050697395487403, + "grad_norm": 0.8942827055363874, + "learning_rate": 8.210900362243899e-06, + "loss": 0.2119, + "step": 81470 + }, + { + "epoch": 1.050826363677399, + "grad_norm": 0.7823961949082114, + "learning_rate": 8.210325178655588e-06, + "loss": 0.1892, + "step": 81480 + }, + { + "epoch": 1.050955331867395, + "grad_norm": 0.8971206008840976, + "learning_rate": 8.209749922776633e-06, + "loss": 0.2041, + "step": 81490 + }, + { + "epoch": 1.0510843000573908, + "grad_norm": 0.787664629687661, + "learning_rate": 8.209174594619987e-06, + "loss": 0.2159, + "step": 81500 + }, + { + "epoch": 1.0512132682473867, + "grad_norm": 0.8024651206421094, + "learning_rate": 8.208599194198605e-06, + "loss": 0.2053, + "step": 81510 + }, + { + "epoch": 1.0513422364373828, + "grad_norm": 0.925740644364912, + "learning_rate": 8.208023721525446e-06, + "loss": 0.2088, + "step": 81520 + }, + { + "epoch": 1.0514712046273786, + "grad_norm": 0.8002026407389862, + "learning_rate": 8.207448176613468e-06, + "loss": 0.2088, + "step": 81530 + }, + { + "epoch": 1.0516001728173745, + "grad_norm": 0.8390107692055657, + "learning_rate": 8.206872559475629e-06, + "loss": 0.2117, + "step": 81540 + }, + { + "epoch": 1.0517291410073706, + "grad_norm": 0.8089429252249725, + "learning_rate": 8.206296870124895e-06, + "loss": 0.2046, + "step": 81550 + }, + { + "epoch": 1.0518581091973664, + "grad_norm": 0.8925029482772905, + "learning_rate": 8.205721108574226e-06, + "loss": 0.2208, + "step": 81560 + }, + { + "epoch": 1.0519870773873623, + "grad_norm": 0.8435435289694747, + "learning_rate": 8.205145274836587e-06, + "loss": 0.1985, + "step": 81570 + }, + { + "epoch": 1.0521160455773584, + "grad_norm": 0.7940234559122286, + "learning_rate": 8.204569368924946e-06, + "loss": 0.1995, + "step": 81580 + }, + { + "epoch": 1.0522450137673542, + "grad_norm": 0.9086211086015108, + "learning_rate": 8.203993390852272e-06, + "loss": 0.2047, + "step": 81590 + }, + { + "epoch": 1.0523739819573503, + "grad_norm": 0.8564302126711607, + "learning_rate": 8.203417340631533e-06, + "loss": 0.2174, + "step": 81600 + }, + { + "epoch": 1.0525029501473462, + "grad_norm": 0.8167291033153373, + "learning_rate": 8.202841218275704e-06, + "loss": 0.2075, + "step": 81610 + }, + { + "epoch": 1.052631918337342, + "grad_norm": 0.8671531230806976, + "learning_rate": 8.202265023797754e-06, + "loss": 0.204, + "step": 81620 + }, + { + "epoch": 1.0527608865273381, + "grad_norm": 0.7825083696114854, + "learning_rate": 8.20168875721066e-06, + "loss": 0.1916, + "step": 81630 + }, + { + "epoch": 1.052889854717334, + "grad_norm": 0.8485154481819519, + "learning_rate": 8.201112418527398e-06, + "loss": 0.1985, + "step": 81640 + }, + { + "epoch": 1.0530188229073298, + "grad_norm": 0.9777380574636826, + "learning_rate": 8.200536007760949e-06, + "loss": 0.2136, + "step": 81650 + }, + { + "epoch": 1.053147791097326, + "grad_norm": 0.8856562331398451, + "learning_rate": 8.199959524924285e-06, + "loss": 0.2089, + "step": 81660 + }, + { + "epoch": 1.0532767592873218, + "grad_norm": 0.8236790471611474, + "learning_rate": 8.199382970030395e-06, + "loss": 0.2092, + "step": 81670 + }, + { + "epoch": 1.0534057274773176, + "grad_norm": 0.8842877817626443, + "learning_rate": 8.198806343092262e-06, + "loss": 0.1901, + "step": 81680 + }, + { + "epoch": 1.0535346956673137, + "grad_norm": 0.8038440863681061, + "learning_rate": 8.198229644122866e-06, + "loss": 0.2018, + "step": 81690 + }, + { + "epoch": 1.0536636638573096, + "grad_norm": 0.862376737829431, + "learning_rate": 8.197652873135194e-06, + "loss": 0.2269, + "step": 81700 + }, + { + "epoch": 1.0537926320473054, + "grad_norm": 0.8075204741697053, + "learning_rate": 8.197076030142236e-06, + "loss": 0.2097, + "step": 81710 + }, + { + "epoch": 1.0539216002373015, + "grad_norm": 0.8502890535366496, + "learning_rate": 8.196499115156981e-06, + "loss": 0.2069, + "step": 81720 + }, + { + "epoch": 1.0540505684272974, + "grad_norm": 0.8602198653521386, + "learning_rate": 8.195922128192418e-06, + "loss": 0.1995, + "step": 81730 + }, + { + "epoch": 1.0541795366172932, + "grad_norm": 0.9067183416595673, + "learning_rate": 8.195345069261543e-06, + "loss": 0.2067, + "step": 81740 + }, + { + "epoch": 1.0543085048072893, + "grad_norm": 0.9526021391253721, + "learning_rate": 8.194767938377347e-06, + "loss": 0.2207, + "step": 81750 + }, + { + "epoch": 1.0544374729972852, + "grad_norm": 0.8507046772163999, + "learning_rate": 8.194190735552826e-06, + "loss": 0.2048, + "step": 81760 + }, + { + "epoch": 1.0545664411872813, + "grad_norm": 0.9089271703507145, + "learning_rate": 8.193613460800981e-06, + "loss": 0.2065, + "step": 81770 + }, + { + "epoch": 1.0546954093772771, + "grad_norm": 0.9630079453812589, + "learning_rate": 8.193036114134811e-06, + "loss": 0.2058, + "step": 81780 + }, + { + "epoch": 1.054824377567273, + "grad_norm": 0.8847985144242365, + "learning_rate": 8.192458695567314e-06, + "loss": 0.2094, + "step": 81790 + }, + { + "epoch": 1.054953345757269, + "grad_norm": 0.8211968686173569, + "learning_rate": 8.191881205111491e-06, + "loss": 0.2137, + "step": 81800 + }, + { + "epoch": 1.055082313947265, + "grad_norm": 0.7760062173931854, + "learning_rate": 8.191303642780351e-06, + "loss": 0.2047, + "step": 81810 + }, + { + "epoch": 1.0552112821372608, + "grad_norm": 0.9104668430943557, + "learning_rate": 8.190726008586895e-06, + "loss": 0.2219, + "step": 81820 + }, + { + "epoch": 1.0553402503272569, + "grad_norm": 0.8647949877750715, + "learning_rate": 8.190148302544134e-06, + "loss": 0.2174, + "step": 81830 + }, + { + "epoch": 1.0554692185172527, + "grad_norm": 0.8635096711655035, + "learning_rate": 8.189570524665074e-06, + "loss": 0.2113, + "step": 81840 + }, + { + "epoch": 1.0555981867072486, + "grad_norm": 0.7734090543108634, + "learning_rate": 8.188992674962726e-06, + "loss": 0.2148, + "step": 81850 + }, + { + "epoch": 1.0557271548972447, + "grad_norm": 0.8804298038649853, + "learning_rate": 8.188414753450104e-06, + "loss": 0.2013, + "step": 81860 + }, + { + "epoch": 1.0558561230872405, + "grad_norm": 0.8227564487950915, + "learning_rate": 8.18783676014022e-06, + "loss": 0.2093, + "step": 81870 + }, + { + "epoch": 1.0559850912772364, + "grad_norm": 0.7530606545125025, + "learning_rate": 8.187258695046089e-06, + "loss": 0.1914, + "step": 81880 + }, + { + "epoch": 1.0561140594672325, + "grad_norm": 0.8055568987204658, + "learning_rate": 8.18668055818073e-06, + "loss": 0.218, + "step": 81890 + }, + { + "epoch": 1.0562430276572283, + "grad_norm": 0.7715929915766634, + "learning_rate": 8.186102349557162e-06, + "loss": 0.2091, + "step": 81900 + }, + { + "epoch": 1.0563719958472242, + "grad_norm": 0.8279404203715036, + "learning_rate": 8.185524069188403e-06, + "loss": 0.2144, + "step": 81910 + }, + { + "epoch": 1.0565009640372203, + "grad_norm": 0.8785214241572369, + "learning_rate": 8.184945717087473e-06, + "loss": 0.2092, + "step": 81920 + }, + { + "epoch": 1.0566299322272161, + "grad_norm": 0.8215472723548854, + "learning_rate": 8.1843672932674e-06, + "loss": 0.2082, + "step": 81930 + }, + { + "epoch": 1.056758900417212, + "grad_norm": 0.9158822885632065, + "learning_rate": 8.183788797741205e-06, + "loss": 0.2025, + "step": 81940 + }, + { + "epoch": 1.056887868607208, + "grad_norm": 0.9333424630442547, + "learning_rate": 8.183210230521918e-06, + "loss": 0.1943, + "step": 81950 + }, + { + "epoch": 1.057016836797204, + "grad_norm": 0.8138384959002722, + "learning_rate": 8.182631591622566e-06, + "loss": 0.2087, + "step": 81960 + }, + { + "epoch": 1.0571458049872, + "grad_norm": 0.899958654116412, + "learning_rate": 8.182052881056178e-06, + "loss": 0.2061, + "step": 81970 + }, + { + "epoch": 1.0572747731771959, + "grad_norm": 0.8614835291098145, + "learning_rate": 8.181474098835787e-06, + "loss": 0.2046, + "step": 81980 + }, + { + "epoch": 1.0574037413671917, + "grad_norm": 0.862458570889865, + "learning_rate": 8.180895244974425e-06, + "loss": 0.2061, + "step": 81990 + }, + { + "epoch": 1.0575327095571878, + "grad_norm": 0.89119771376379, + "learning_rate": 8.180316319485128e-06, + "loss": 0.1991, + "step": 82000 + }, + { + "epoch": 1.0576616777471837, + "grad_norm": 0.8128281696402163, + "learning_rate": 8.17973732238093e-06, + "loss": 0.209, + "step": 82010 + }, + { + "epoch": 1.0577906459371795, + "grad_norm": 0.8576524164911756, + "learning_rate": 8.17915825367487e-06, + "loss": 0.2032, + "step": 82020 + }, + { + "epoch": 1.0579196141271756, + "grad_norm": 0.7930138084622356, + "learning_rate": 8.178579113379989e-06, + "loss": 0.1992, + "step": 82030 + }, + { + "epoch": 1.0580485823171715, + "grad_norm": 0.8771852696872378, + "learning_rate": 8.177999901509328e-06, + "loss": 0.2118, + "step": 82040 + }, + { + "epoch": 1.0581775505071673, + "grad_norm": 0.8304498259283368, + "learning_rate": 8.177420618075927e-06, + "loss": 0.2186, + "step": 82050 + }, + { + "epoch": 1.0583065186971634, + "grad_norm": 0.7678787974192268, + "learning_rate": 8.176841263092834e-06, + "loss": 0.2032, + "step": 82060 + }, + { + "epoch": 1.0584354868871593, + "grad_norm": 0.8140932965379662, + "learning_rate": 8.176261836573091e-06, + "loss": 0.2118, + "step": 82070 + }, + { + "epoch": 1.0585644550771551, + "grad_norm": 0.845271399349386, + "learning_rate": 8.175682338529751e-06, + "loss": 0.2091, + "step": 82080 + }, + { + "epoch": 1.0586934232671512, + "grad_norm": 0.8158244639690353, + "learning_rate": 8.175102768975857e-06, + "loss": 0.2071, + "step": 82090 + }, + { + "epoch": 1.058822391457147, + "grad_norm": 0.8413801687973217, + "learning_rate": 8.174523127924464e-06, + "loss": 0.2118, + "step": 82100 + }, + { + "epoch": 1.058951359647143, + "grad_norm": 0.8462587430292575, + "learning_rate": 8.173943415388626e-06, + "loss": 0.1985, + "step": 82110 + }, + { + "epoch": 1.059080327837139, + "grad_norm": 0.8205960307027932, + "learning_rate": 8.173363631381392e-06, + "loss": 0.2045, + "step": 82120 + }, + { + "epoch": 1.0592092960271349, + "grad_norm": 0.8046590727084586, + "learning_rate": 8.172783775915823e-06, + "loss": 0.1983, + "step": 82130 + }, + { + "epoch": 1.0593382642171307, + "grad_norm": 0.7916755332470545, + "learning_rate": 8.17220384900497e-06, + "loss": 0.2063, + "step": 82140 + }, + { + "epoch": 1.0594672324071268, + "grad_norm": 0.967322486244353, + "learning_rate": 8.171623850661897e-06, + "loss": 0.209, + "step": 82150 + }, + { + "epoch": 1.0595962005971227, + "grad_norm": 0.7873125314141999, + "learning_rate": 8.171043780899664e-06, + "loss": 0.2154, + "step": 82160 + }, + { + "epoch": 1.0597251687871188, + "grad_norm": 0.878639883275458, + "learning_rate": 8.17046363973133e-06, + "loss": 0.2074, + "step": 82170 + }, + { + "epoch": 1.0598541369771146, + "grad_norm": 0.942222780197939, + "learning_rate": 8.169883427169962e-06, + "loss": 0.213, + "step": 82180 + }, + { + "epoch": 1.0599831051671105, + "grad_norm": 0.7750418666607236, + "learning_rate": 8.169303143228624e-06, + "loss": 0.2025, + "step": 82190 + }, + { + "epoch": 1.0601120733571066, + "grad_norm": 0.8040329581064035, + "learning_rate": 8.168722787920382e-06, + "loss": 0.1963, + "step": 82200 + }, + { + "epoch": 1.0602410415471024, + "grad_norm": 0.7651510119732691, + "learning_rate": 8.168142361258307e-06, + "loss": 0.2069, + "step": 82210 + }, + { + "epoch": 1.0603700097370983, + "grad_norm": 0.8278118440836729, + "learning_rate": 8.167561863255466e-06, + "loss": 0.2002, + "step": 82220 + }, + { + "epoch": 1.0604989779270944, + "grad_norm": 0.8822292523888355, + "learning_rate": 8.166981293924933e-06, + "loss": 0.209, + "step": 82230 + }, + { + "epoch": 1.0606279461170902, + "grad_norm": 0.8168455205382659, + "learning_rate": 8.166400653279782e-06, + "loss": 0.2096, + "step": 82240 + }, + { + "epoch": 1.060756914307086, + "grad_norm": 0.7975517715879229, + "learning_rate": 8.165819941333086e-06, + "loss": 0.2107, + "step": 82250 + }, + { + "epoch": 1.0608858824970822, + "grad_norm": 0.786983859931239, + "learning_rate": 8.165239158097921e-06, + "loss": 0.213, + "step": 82260 + }, + { + "epoch": 1.061014850687078, + "grad_norm": 0.8816977229260917, + "learning_rate": 8.164658303587367e-06, + "loss": 0.2031, + "step": 82270 + }, + { + "epoch": 1.0611438188770739, + "grad_norm": 0.8550177259545709, + "learning_rate": 8.164077377814503e-06, + "loss": 0.2107, + "step": 82280 + }, + { + "epoch": 1.06127278706707, + "grad_norm": 0.8740924623190208, + "learning_rate": 8.163496380792413e-06, + "loss": 0.2112, + "step": 82290 + }, + { + "epoch": 1.0614017552570658, + "grad_norm": 0.8540346157584905, + "learning_rate": 8.162915312534177e-06, + "loss": 0.203, + "step": 82300 + }, + { + "epoch": 1.0615307234470617, + "grad_norm": 0.8399092175536089, + "learning_rate": 8.16233417305288e-06, + "loss": 0.2118, + "step": 82310 + }, + { + "epoch": 1.0616596916370578, + "grad_norm": 0.7655595974778977, + "learning_rate": 8.161752962361607e-06, + "loss": 0.2109, + "step": 82320 + }, + { + "epoch": 1.0617886598270536, + "grad_norm": 0.8294698639140038, + "learning_rate": 8.161171680473447e-06, + "loss": 0.212, + "step": 82330 + }, + { + "epoch": 1.0619176280170497, + "grad_norm": 0.9151127689184172, + "learning_rate": 8.16059032740149e-06, + "loss": 0.2159, + "step": 82340 + }, + { + "epoch": 1.0620465962070456, + "grad_norm": 0.9072039452806677, + "learning_rate": 8.16000890315883e-06, + "loss": 0.2011, + "step": 82350 + }, + { + "epoch": 1.0621755643970414, + "grad_norm": 0.74404918118902, + "learning_rate": 8.159427407758553e-06, + "loss": 0.199, + "step": 82360 + }, + { + "epoch": 1.0623045325870375, + "grad_norm": 0.9730159604222065, + "learning_rate": 8.158845841213757e-06, + "loss": 0.2092, + "step": 82370 + }, + { + "epoch": 1.0624335007770334, + "grad_norm": 0.7721351876996181, + "learning_rate": 8.158264203537536e-06, + "loss": 0.2041, + "step": 82380 + }, + { + "epoch": 1.0625624689670292, + "grad_norm": 0.8915158283555208, + "learning_rate": 8.15768249474299e-06, + "loss": 0.2054, + "step": 82390 + }, + { + "epoch": 1.0626914371570253, + "grad_norm": 0.8368740154358353, + "learning_rate": 8.157100714843217e-06, + "loss": 0.199, + "step": 82400 + }, + { + "epoch": 1.0628204053470212, + "grad_norm": 0.8431332590915743, + "learning_rate": 8.156518863851316e-06, + "loss": 0.2115, + "step": 82410 + }, + { + "epoch": 1.062949373537017, + "grad_norm": 0.8489792101048786, + "learning_rate": 8.155936941780394e-06, + "loss": 0.2056, + "step": 82420 + }, + { + "epoch": 1.063078341727013, + "grad_norm": 0.8900657785149988, + "learning_rate": 8.155354948643547e-06, + "loss": 0.2074, + "step": 82430 + }, + { + "epoch": 1.063207309917009, + "grad_norm": 0.7497912061617974, + "learning_rate": 8.154772884453887e-06, + "loss": 0.1912, + "step": 82440 + }, + { + "epoch": 1.0633362781070048, + "grad_norm": 0.8940764502822697, + "learning_rate": 8.15419074922452e-06, + "loss": 0.215, + "step": 82450 + }, + { + "epoch": 1.063465246297001, + "grad_norm": 0.9078213892599956, + "learning_rate": 8.153608542968553e-06, + "loss": 0.2007, + "step": 82460 + }, + { + "epoch": 1.0635942144869968, + "grad_norm": 0.9051158134762369, + "learning_rate": 8.153026265699095e-06, + "loss": 0.2073, + "step": 82470 + }, + { + "epoch": 1.0637231826769926, + "grad_norm": 0.8532438919370651, + "learning_rate": 8.152443917429259e-06, + "loss": 0.2014, + "step": 82480 + }, + { + "epoch": 1.0638521508669887, + "grad_norm": 0.9129098582820641, + "learning_rate": 8.15186149817216e-06, + "loss": 0.227, + "step": 82490 + }, + { + "epoch": 1.0639811190569846, + "grad_norm": 0.7869608164148602, + "learning_rate": 8.151279007940912e-06, + "loss": 0.2126, + "step": 82500 + }, + { + "epoch": 1.0641100872469806, + "grad_norm": 0.8146201116205969, + "learning_rate": 8.150696446748631e-06, + "loss": 0.2001, + "step": 82510 + }, + { + "epoch": 1.0642390554369765, + "grad_norm": 0.8193526862788001, + "learning_rate": 8.150113814608437e-06, + "loss": 0.2058, + "step": 82520 + }, + { + "epoch": 1.0643680236269724, + "grad_norm": 0.8626355165657938, + "learning_rate": 8.149531111533446e-06, + "loss": 0.2045, + "step": 82530 + }, + { + "epoch": 1.0644969918169684, + "grad_norm": 0.7590862573131679, + "learning_rate": 8.148948337536784e-06, + "loss": 0.2192, + "step": 82540 + }, + { + "epoch": 1.0646259600069643, + "grad_norm": 0.8796725602287325, + "learning_rate": 8.148365492631573e-06, + "loss": 0.198, + "step": 82550 + }, + { + "epoch": 1.0647549281969602, + "grad_norm": 0.8508276257471892, + "learning_rate": 8.147782576830934e-06, + "loss": 0.2146, + "step": 82560 + }, + { + "epoch": 1.0648838963869562, + "grad_norm": 0.8319537137053443, + "learning_rate": 8.147199590147998e-06, + "loss": 0.2065, + "step": 82570 + }, + { + "epoch": 1.065012864576952, + "grad_norm": 0.8752267000785268, + "learning_rate": 8.146616532595887e-06, + "loss": 0.2025, + "step": 82580 + }, + { + "epoch": 1.065141832766948, + "grad_norm": 0.8926838020610783, + "learning_rate": 8.146033404187737e-06, + "loss": 0.2056, + "step": 82590 + }, + { + "epoch": 1.065270800956944, + "grad_norm": 0.9138558971526267, + "learning_rate": 8.145450204936676e-06, + "loss": 0.2054, + "step": 82600 + }, + { + "epoch": 1.06539976914694, + "grad_norm": 0.7552891497322207, + "learning_rate": 8.144866934855835e-06, + "loss": 0.2089, + "step": 82610 + }, + { + "epoch": 1.0655287373369358, + "grad_norm": 0.8826479304394859, + "learning_rate": 8.14428359395835e-06, + "loss": 0.2104, + "step": 82620 + }, + { + "epoch": 1.0656577055269318, + "grad_norm": 0.7959910322155314, + "learning_rate": 8.143700182257356e-06, + "loss": 0.2101, + "step": 82630 + }, + { + "epoch": 1.0657866737169277, + "grad_norm": 0.8319102606531164, + "learning_rate": 8.143116699765992e-06, + "loss": 0.1978, + "step": 82640 + }, + { + "epoch": 1.0659156419069236, + "grad_norm": 1.2496361367898272, + "learning_rate": 8.142533146497395e-06, + "loss": 0.1969, + "step": 82650 + }, + { + "epoch": 1.0660446100969196, + "grad_norm": 0.9175744061134423, + "learning_rate": 8.141949522464707e-06, + "loss": 0.2128, + "step": 82660 + }, + { + "epoch": 1.0661735782869155, + "grad_norm": 0.7825601877181034, + "learning_rate": 8.141365827681069e-06, + "loss": 0.2032, + "step": 82670 + }, + { + "epoch": 1.0663025464769116, + "grad_norm": 0.8487112988078637, + "learning_rate": 8.140782062159627e-06, + "loss": 0.2128, + "step": 82680 + }, + { + "epoch": 1.0664315146669074, + "grad_norm": 0.9022198371409573, + "learning_rate": 8.140198225913521e-06, + "loss": 0.2092, + "step": 82690 + }, + { + "epoch": 1.0665604828569033, + "grad_norm": 0.784516362178107, + "learning_rate": 8.139614318955903e-06, + "loss": 0.2194, + "step": 82700 + }, + { + "epoch": 1.0666894510468994, + "grad_norm": 0.9028436549636446, + "learning_rate": 8.13903034129992e-06, + "loss": 0.2032, + "step": 82710 + }, + { + "epoch": 1.0668184192368952, + "grad_norm": 0.7873575523865991, + "learning_rate": 8.138446292958722e-06, + "loss": 0.2067, + "step": 82720 + }, + { + "epoch": 1.066947387426891, + "grad_norm": 0.8412032884344695, + "learning_rate": 8.137862173945461e-06, + "loss": 0.2226, + "step": 82730 + }, + { + "epoch": 1.0670763556168872, + "grad_norm": 0.9462674751896827, + "learning_rate": 8.137277984273289e-06, + "loss": 0.2132, + "step": 82740 + }, + { + "epoch": 1.067205323806883, + "grad_norm": 0.7849037635274931, + "learning_rate": 8.136693723955364e-06, + "loss": 0.1968, + "step": 82750 + }, + { + "epoch": 1.067334291996879, + "grad_norm": 0.8943129638495292, + "learning_rate": 8.136109393004839e-06, + "loss": 0.2064, + "step": 82760 + }, + { + "epoch": 1.067463260186875, + "grad_norm": 0.9141530395421321, + "learning_rate": 8.135524991434873e-06, + "loss": 0.2125, + "step": 82770 + }, + { + "epoch": 1.0675922283768708, + "grad_norm": 1.0795180302357372, + "learning_rate": 8.134940519258627e-06, + "loss": 0.2132, + "step": 82780 + }, + { + "epoch": 1.0677211965668667, + "grad_norm": 0.833132869600869, + "learning_rate": 8.134355976489261e-06, + "loss": 0.2107, + "step": 82790 + }, + { + "epoch": 1.0678501647568628, + "grad_norm": 0.9327008730218744, + "learning_rate": 8.133771363139939e-06, + "loss": 0.2164, + "step": 82800 + }, + { + "epoch": 1.0679791329468586, + "grad_norm": 0.7969814634060873, + "learning_rate": 8.133186679223823e-06, + "loss": 0.2061, + "step": 82810 + }, + { + "epoch": 1.0681081011368545, + "grad_norm": 0.8503953884477571, + "learning_rate": 8.132601924754082e-06, + "loss": 0.2086, + "step": 82820 + }, + { + "epoch": 1.0682370693268506, + "grad_norm": 0.8166205294126883, + "learning_rate": 8.132017099743882e-06, + "loss": 0.2053, + "step": 82830 + }, + { + "epoch": 1.0683660375168464, + "grad_norm": 0.8246114266769621, + "learning_rate": 8.131432204206392e-06, + "loss": 0.2062, + "step": 82840 + }, + { + "epoch": 1.0684950057068423, + "grad_norm": 0.8022037011685684, + "learning_rate": 8.130847238154782e-06, + "loss": 0.2151, + "step": 82850 + }, + { + "epoch": 1.0686239738968384, + "grad_norm": 0.9804823564679207, + "learning_rate": 8.130262201602229e-06, + "loss": 0.2061, + "step": 82860 + }, + { + "epoch": 1.0687529420868342, + "grad_norm": 0.9175835995412358, + "learning_rate": 8.129677094561901e-06, + "loss": 0.2035, + "step": 82870 + }, + { + "epoch": 1.06888191027683, + "grad_norm": 0.8182327909281751, + "learning_rate": 8.129091917046976e-06, + "loss": 0.2132, + "step": 82880 + }, + { + "epoch": 1.0690108784668262, + "grad_norm": 0.819690851489953, + "learning_rate": 8.12850666907063e-06, + "loss": 0.2203, + "step": 82890 + }, + { + "epoch": 1.069139846656822, + "grad_norm": 0.8423993813015378, + "learning_rate": 8.127921350646046e-06, + "loss": 0.212, + "step": 82900 + }, + { + "epoch": 1.0692688148468181, + "grad_norm": 0.8672583197198194, + "learning_rate": 8.1273359617864e-06, + "loss": 0.2099, + "step": 82910 + }, + { + "epoch": 1.069397783036814, + "grad_norm": 0.8283519279889922, + "learning_rate": 8.126750502504874e-06, + "loss": 0.1866, + "step": 82920 + }, + { + "epoch": 1.0695267512268098, + "grad_norm": 0.7903712379515992, + "learning_rate": 8.126164972814655e-06, + "loss": 0.2106, + "step": 82930 + }, + { + "epoch": 1.069655719416806, + "grad_norm": 0.8065480328840516, + "learning_rate": 8.125579372728922e-06, + "loss": 0.2158, + "step": 82940 + }, + { + "epoch": 1.0697846876068018, + "grad_norm": 0.7795825227303755, + "learning_rate": 8.124993702260868e-06, + "loss": 0.2087, + "step": 82950 + }, + { + "epoch": 1.0699136557967976, + "grad_norm": 0.76121866639079, + "learning_rate": 8.124407961423677e-06, + "loss": 0.2112, + "step": 82960 + }, + { + "epoch": 1.0700426239867937, + "grad_norm": 0.9009727503197944, + "learning_rate": 8.12382215023054e-06, + "loss": 0.2078, + "step": 82970 + }, + { + "epoch": 1.0701715921767896, + "grad_norm": 0.8324960298287456, + "learning_rate": 8.12323626869465e-06, + "loss": 0.2069, + "step": 82980 + }, + { + "epoch": 1.0703005603667854, + "grad_norm": 0.837656582187998, + "learning_rate": 8.122650316829197e-06, + "loss": 0.2015, + "step": 82990 + }, + { + "epoch": 1.0704295285567815, + "grad_norm": 0.8184852665877415, + "learning_rate": 8.122064294647378e-06, + "loss": 0.2108, + "step": 83000 + }, + { + "epoch": 1.0705584967467774, + "grad_norm": 0.7213133446369794, + "learning_rate": 8.12147820216239e-06, + "loss": 0.1983, + "step": 83010 + }, + { + "epoch": 1.0706874649367732, + "grad_norm": 0.9062397738782961, + "learning_rate": 8.120892039387427e-06, + "loss": 0.2086, + "step": 83020 + }, + { + "epoch": 1.0708164331267693, + "grad_norm": 0.8001443633001499, + "learning_rate": 8.120305806335692e-06, + "loss": 0.1954, + "step": 83030 + }, + { + "epoch": 1.0709454013167652, + "grad_norm": 0.9377616500981545, + "learning_rate": 8.119719503020383e-06, + "loss": 0.2196, + "step": 83040 + }, + { + "epoch": 1.071074369506761, + "grad_norm": 0.8451963631858799, + "learning_rate": 8.119133129454703e-06, + "loss": 0.2113, + "step": 83050 + }, + { + "epoch": 1.0712033376967571, + "grad_norm": 0.7386056661036832, + "learning_rate": 8.118546685651859e-06, + "loss": 0.2018, + "step": 83060 + }, + { + "epoch": 1.071332305886753, + "grad_norm": 0.8757248001173626, + "learning_rate": 8.117960171625053e-06, + "loss": 0.2146, + "step": 83070 + }, + { + "epoch": 1.071461274076749, + "grad_norm": 0.8842796828112319, + "learning_rate": 8.117373587387495e-06, + "loss": 0.2104, + "step": 83080 + }, + { + "epoch": 1.071590242266745, + "grad_norm": 0.8368446694286042, + "learning_rate": 8.116786932952392e-06, + "loss": 0.2064, + "step": 83090 + }, + { + "epoch": 1.0717192104567408, + "grad_norm": 0.9909563495829469, + "learning_rate": 8.116200208332954e-06, + "loss": 0.2138, + "step": 83100 + }, + { + "epoch": 1.0718481786467369, + "grad_norm": 0.8339118821615853, + "learning_rate": 8.115613413542394e-06, + "loss": 0.2077, + "step": 83110 + }, + { + "epoch": 1.0719771468367327, + "grad_norm": 0.8424700828963014, + "learning_rate": 8.115026548593925e-06, + "loss": 0.2074, + "step": 83120 + }, + { + "epoch": 1.0721061150267286, + "grad_norm": 0.7542713092164104, + "learning_rate": 8.114439613500764e-06, + "loss": 0.205, + "step": 83130 + }, + { + "epoch": 1.0722350832167247, + "grad_norm": 0.8413651843375396, + "learning_rate": 8.113852608276126e-06, + "loss": 0.2072, + "step": 83140 + }, + { + "epoch": 1.0723640514067205, + "grad_norm": 0.7633785817694458, + "learning_rate": 8.113265532933229e-06, + "loss": 0.1909, + "step": 83150 + }, + { + "epoch": 1.0724930195967164, + "grad_norm": 0.8262238620690173, + "learning_rate": 8.112678387485292e-06, + "loss": 0.2123, + "step": 83160 + }, + { + "epoch": 1.0726219877867125, + "grad_norm": 0.8817211756002169, + "learning_rate": 8.11209117194554e-06, + "loss": 0.2045, + "step": 83170 + }, + { + "epoch": 1.0727509559767083, + "grad_norm": 0.7847074641338537, + "learning_rate": 8.111503886327192e-06, + "loss": 0.2016, + "step": 83180 + }, + { + "epoch": 1.0728799241667042, + "grad_norm": 0.896023987266038, + "learning_rate": 8.110916530643476e-06, + "loss": 0.2121, + "step": 83190 + }, + { + "epoch": 1.0730088923567003, + "grad_norm": 0.9039264598488359, + "learning_rate": 8.110329104907614e-06, + "loss": 0.2129, + "step": 83200 + }, + { + "epoch": 1.0731378605466961, + "grad_norm": 0.8301980062095804, + "learning_rate": 8.10974160913284e-06, + "loss": 0.2061, + "step": 83210 + }, + { + "epoch": 1.073266828736692, + "grad_norm": 0.7129266664936598, + "learning_rate": 8.109154043332377e-06, + "loss": 0.2016, + "step": 83220 + }, + { + "epoch": 1.073395796926688, + "grad_norm": 0.8605587999606258, + "learning_rate": 8.108566407519461e-06, + "loss": 0.2056, + "step": 83230 + }, + { + "epoch": 1.073524765116684, + "grad_norm": 0.7851754278276416, + "learning_rate": 8.107978701707319e-06, + "loss": 0.2145, + "step": 83240 + }, + { + "epoch": 1.07365373330668, + "grad_norm": 0.862913837372378, + "learning_rate": 8.10739092590919e-06, + "loss": 0.2199, + "step": 83250 + }, + { + "epoch": 1.0737827014966759, + "grad_norm": 0.8709992199495542, + "learning_rate": 8.106803080138307e-06, + "loss": 0.2115, + "step": 83260 + }, + { + "epoch": 1.0739116696866717, + "grad_norm": 0.883367271147221, + "learning_rate": 8.106215164407909e-06, + "loss": 0.2182, + "step": 83270 + }, + { + "epoch": 1.0740406378766678, + "grad_norm": 0.8909522072696638, + "learning_rate": 8.105627178731233e-06, + "loss": 0.2161, + "step": 83280 + }, + { + "epoch": 1.0741696060666637, + "grad_norm": 0.8778783106947299, + "learning_rate": 8.10503912312152e-06, + "loss": 0.2069, + "step": 83290 + }, + { + "epoch": 1.0742985742566595, + "grad_norm": 0.7956131689367413, + "learning_rate": 8.104450997592013e-06, + "loss": 0.215, + "step": 83300 + }, + { + "epoch": 1.0744275424466556, + "grad_norm": 0.8301070299678758, + "learning_rate": 8.103862802155952e-06, + "loss": 0.2094, + "step": 83310 + }, + { + "epoch": 1.0745565106366515, + "grad_norm": 0.766011407217363, + "learning_rate": 8.103274536826586e-06, + "loss": 0.2061, + "step": 83320 + }, + { + "epoch": 1.0746854788266473, + "grad_norm": 0.8135569478528965, + "learning_rate": 8.102686201617163e-06, + "loss": 0.2103, + "step": 83330 + }, + { + "epoch": 1.0748144470166434, + "grad_norm": 0.869035237131142, + "learning_rate": 8.102097796540925e-06, + "loss": 0.2189, + "step": 83340 + }, + { + "epoch": 1.0749434152066393, + "grad_norm": 0.8063329646577346, + "learning_rate": 8.101509321611125e-06, + "loss": 0.1965, + "step": 83350 + }, + { + "epoch": 1.0750723833966351, + "grad_norm": 0.861838666333424, + "learning_rate": 8.100920776841016e-06, + "loss": 0.2055, + "step": 83360 + }, + { + "epoch": 1.0752013515866312, + "grad_norm": 0.9195002610224968, + "learning_rate": 8.100332162243852e-06, + "loss": 0.2057, + "step": 83370 + }, + { + "epoch": 1.075330319776627, + "grad_norm": 0.8242752239645809, + "learning_rate": 8.099743477832882e-06, + "loss": 0.2114, + "step": 83380 + }, + { + "epoch": 1.075459287966623, + "grad_norm": 0.85412732521453, + "learning_rate": 8.099154723621368e-06, + "loss": 0.2079, + "step": 83390 + }, + { + "epoch": 1.075588256156619, + "grad_norm": 0.8436814219905208, + "learning_rate": 8.098565899622563e-06, + "loss": 0.2155, + "step": 83400 + }, + { + "epoch": 1.0757172243466149, + "grad_norm": 0.8061321760545567, + "learning_rate": 8.097977005849728e-06, + "loss": 0.2075, + "step": 83410 + }, + { + "epoch": 1.075846192536611, + "grad_norm": 0.763380847588702, + "learning_rate": 8.097388042316123e-06, + "loss": 0.2052, + "step": 83420 + }, + { + "epoch": 1.0759751607266068, + "grad_norm": 0.8171111572958438, + "learning_rate": 8.096799009035013e-06, + "loss": 0.2081, + "step": 83430 + }, + { + "epoch": 1.0761041289166027, + "grad_norm": 0.9253148270858763, + "learning_rate": 8.096209906019661e-06, + "loss": 0.2109, + "step": 83440 + }, + { + "epoch": 1.0762330971065988, + "grad_norm": 0.7750449013479638, + "learning_rate": 8.095620733283331e-06, + "loss": 0.2026, + "step": 83450 + }, + { + "epoch": 1.0763620652965946, + "grad_norm": 0.8210128984548959, + "learning_rate": 8.095031490839292e-06, + "loss": 0.1997, + "step": 83460 + }, + { + "epoch": 1.0764910334865905, + "grad_norm": 0.8359090234662979, + "learning_rate": 8.094442178700809e-06, + "loss": 0.2141, + "step": 83470 + }, + { + "epoch": 1.0766200016765866, + "grad_norm": 0.8424993724646567, + "learning_rate": 8.093852796881156e-06, + "loss": 0.2155, + "step": 83480 + }, + { + "epoch": 1.0767489698665824, + "grad_norm": 0.873401527009903, + "learning_rate": 8.093263345393605e-06, + "loss": 0.2074, + "step": 83490 + }, + { + "epoch": 1.0768779380565783, + "grad_norm": 0.816923500261002, + "learning_rate": 8.092673824251428e-06, + "loss": 0.2026, + "step": 83500 + }, + { + "epoch": 1.0770069062465744, + "grad_norm": 0.8134563907988422, + "learning_rate": 8.092084233467896e-06, + "loss": 0.2023, + "step": 83510 + }, + { + "epoch": 1.0771358744365702, + "grad_norm": 0.8339614176286589, + "learning_rate": 8.091494573056294e-06, + "loss": 0.2031, + "step": 83520 + }, + { + "epoch": 1.077264842626566, + "grad_norm": 0.8722567511747732, + "learning_rate": 8.090904843029893e-06, + "loss": 0.2016, + "step": 83530 + }, + { + "epoch": 1.0773938108165622, + "grad_norm": 0.7638925042680818, + "learning_rate": 8.090315043401975e-06, + "loss": 0.2122, + "step": 83540 + }, + { + "epoch": 1.077522779006558, + "grad_norm": 0.8099266880625103, + "learning_rate": 8.089725174185824e-06, + "loss": 0.2058, + "step": 83550 + }, + { + "epoch": 1.0776517471965539, + "grad_norm": 0.7965470231113834, + "learning_rate": 8.089135235394718e-06, + "loss": 0.2074, + "step": 83560 + }, + { + "epoch": 1.07778071538655, + "grad_norm": 0.8190290609787426, + "learning_rate": 8.088545227041944e-06, + "loss": 0.2045, + "step": 83570 + }, + { + "epoch": 1.0779096835765458, + "grad_norm": 0.8398459169014157, + "learning_rate": 8.087955149140787e-06, + "loss": 0.2096, + "step": 83580 + }, + { + "epoch": 1.078038651766542, + "grad_norm": 0.9011621876150254, + "learning_rate": 8.087365001704534e-06, + "loss": 0.2083, + "step": 83590 + }, + { + "epoch": 1.0781676199565378, + "grad_norm": 0.8104267932598935, + "learning_rate": 8.086774784746477e-06, + "loss": 0.2125, + "step": 83600 + }, + { + "epoch": 1.0782965881465336, + "grad_norm": 0.7905578210990948, + "learning_rate": 8.086184498279903e-06, + "loss": 0.2074, + "step": 83610 + }, + { + "epoch": 1.0784255563365295, + "grad_norm": 0.8383059793735628, + "learning_rate": 8.085594142318104e-06, + "loss": 0.2171, + "step": 83620 + }, + { + "epoch": 1.0785545245265256, + "grad_norm": 0.9632348695175629, + "learning_rate": 8.08500371687438e-06, + "loss": 0.2046, + "step": 83630 + }, + { + "epoch": 1.0786834927165214, + "grad_norm": 0.8354689240100582, + "learning_rate": 8.084413221962018e-06, + "loss": 0.2, + "step": 83640 + }, + { + "epoch": 1.0788124609065175, + "grad_norm": 0.7997132556170845, + "learning_rate": 8.08382265759432e-06, + "loss": 0.2022, + "step": 83650 + }, + { + "epoch": 1.0789414290965134, + "grad_norm": 0.8912337123761245, + "learning_rate": 8.083232023784582e-06, + "loss": 0.2021, + "step": 83660 + }, + { + "epoch": 1.0790703972865092, + "grad_norm": 0.8732849221568765, + "learning_rate": 8.082641320546107e-06, + "loss": 0.2023, + "step": 83670 + }, + { + "epoch": 1.0791993654765053, + "grad_norm": 0.8119568101345488, + "learning_rate": 8.082050547892193e-06, + "loss": 0.2004, + "step": 83680 + }, + { + "epoch": 1.0793283336665012, + "grad_norm": 0.8120144633758629, + "learning_rate": 8.081459705836145e-06, + "loss": 0.2166, + "step": 83690 + }, + { + "epoch": 1.079457301856497, + "grad_norm": 0.7947718838354252, + "learning_rate": 8.080868794391267e-06, + "loss": 0.1958, + "step": 83700 + }, + { + "epoch": 1.079586270046493, + "grad_norm": 0.812010314853461, + "learning_rate": 8.080277813570865e-06, + "loss": 0.2043, + "step": 83710 + }, + { + "epoch": 1.079715238236489, + "grad_norm": 0.8633775804794038, + "learning_rate": 8.079686763388249e-06, + "loss": 0.2118, + "step": 83720 + }, + { + "epoch": 1.0798442064264848, + "grad_norm": 0.8679275445649819, + "learning_rate": 8.079095643856726e-06, + "loss": 0.2073, + "step": 83730 + }, + { + "epoch": 1.079973174616481, + "grad_norm": 0.8563630264091834, + "learning_rate": 8.078504454989607e-06, + "loss": 0.2258, + "step": 83740 + }, + { + "epoch": 1.0801021428064768, + "grad_norm": 0.8803426321947099, + "learning_rate": 8.077913196800207e-06, + "loss": 0.2041, + "step": 83750 + }, + { + "epoch": 1.0802311109964726, + "grad_norm": 0.856656339290748, + "learning_rate": 8.077321869301837e-06, + "loss": 0.2199, + "step": 83760 + }, + { + "epoch": 1.0803600791864687, + "grad_norm": 0.9021578285874435, + "learning_rate": 8.076730472507813e-06, + "loss": 0.2153, + "step": 83770 + }, + { + "epoch": 1.0804890473764646, + "grad_norm": 0.8444579885780752, + "learning_rate": 8.076139006431454e-06, + "loss": 0.2068, + "step": 83780 + }, + { + "epoch": 1.0806180155664604, + "grad_norm": 0.8674206421061427, + "learning_rate": 8.075547471086079e-06, + "loss": 0.2091, + "step": 83790 + }, + { + "epoch": 1.0807469837564565, + "grad_norm": 0.8578211946535175, + "learning_rate": 8.074955866485005e-06, + "loss": 0.2059, + "step": 83800 + }, + { + "epoch": 1.0808759519464524, + "grad_norm": 0.7921313976963352, + "learning_rate": 8.074364192641555e-06, + "loss": 0.2076, + "step": 83810 + }, + { + "epoch": 1.0810049201364484, + "grad_norm": 0.8566932629924952, + "learning_rate": 8.073772449569055e-06, + "loss": 0.2127, + "step": 83820 + }, + { + "epoch": 1.0811338883264443, + "grad_norm": 0.9058947986973389, + "learning_rate": 8.073180637280828e-06, + "loss": 0.2082, + "step": 83830 + }, + { + "epoch": 1.0812628565164402, + "grad_norm": 0.9503096284085584, + "learning_rate": 8.0725887557902e-06, + "loss": 0.2138, + "step": 83840 + }, + { + "epoch": 1.0813918247064362, + "grad_norm": 0.8518040141264053, + "learning_rate": 8.071996805110502e-06, + "loss": 0.2031, + "step": 83850 + }, + { + "epoch": 1.081520792896432, + "grad_norm": 0.8760784184823516, + "learning_rate": 8.071404785255061e-06, + "loss": 0.215, + "step": 83860 + }, + { + "epoch": 1.081649761086428, + "grad_norm": 0.8145800627874702, + "learning_rate": 8.070812696237207e-06, + "loss": 0.2038, + "step": 83870 + }, + { + "epoch": 1.081778729276424, + "grad_norm": 0.8395162937838004, + "learning_rate": 8.070220538070277e-06, + "loss": 0.2079, + "step": 83880 + }, + { + "epoch": 1.08190769746642, + "grad_norm": 0.8929239464462064, + "learning_rate": 8.069628310767601e-06, + "loss": 0.1999, + "step": 83890 + }, + { + "epoch": 1.0820366656564158, + "grad_norm": 0.8972567729292341, + "learning_rate": 8.069036014342518e-06, + "loss": 0.206, + "step": 83900 + }, + { + "epoch": 1.0821656338464118, + "grad_norm": 0.8317731478069279, + "learning_rate": 8.068443648808362e-06, + "loss": 0.2011, + "step": 83910 + }, + { + "epoch": 1.0822946020364077, + "grad_norm": 0.8160142114049882, + "learning_rate": 8.067851214178475e-06, + "loss": 0.2062, + "step": 83920 + }, + { + "epoch": 1.0824235702264036, + "grad_norm": 0.8904465414497403, + "learning_rate": 8.067258710466198e-06, + "loss": 0.2156, + "step": 83930 + }, + { + "epoch": 1.0825525384163996, + "grad_norm": 0.9176530934840625, + "learning_rate": 8.066666137684869e-06, + "loss": 0.2177, + "step": 83940 + }, + { + "epoch": 1.0826815066063955, + "grad_norm": 0.8792613627709419, + "learning_rate": 8.066073495847837e-06, + "loss": 0.2077, + "step": 83950 + }, + { + "epoch": 1.0828104747963914, + "grad_norm": 0.8653715942116693, + "learning_rate": 8.065480784968444e-06, + "loss": 0.2061, + "step": 83960 + }, + { + "epoch": 1.0829394429863874, + "grad_norm": 0.7379709100076436, + "learning_rate": 8.064888005060036e-06, + "loss": 0.1906, + "step": 83970 + }, + { + "epoch": 1.0830684111763833, + "grad_norm": 0.8943785041299814, + "learning_rate": 8.064295156135965e-06, + "loss": 0.2022, + "step": 83980 + }, + { + "epoch": 1.0831973793663794, + "grad_norm": 0.8235881921880212, + "learning_rate": 8.063702238209576e-06, + "loss": 0.2035, + "step": 83990 + }, + { + "epoch": 1.0833263475563752, + "grad_norm": 0.9157851412303386, + "learning_rate": 8.063109251294225e-06, + "loss": 0.2166, + "step": 84000 + }, + { + "epoch": 1.083455315746371, + "grad_norm": 0.8788246062432159, + "learning_rate": 8.062516195403262e-06, + "loss": 0.192, + "step": 84010 + }, + { + "epoch": 1.0835842839363672, + "grad_norm": 0.915562412488042, + "learning_rate": 8.061923070550041e-06, + "loss": 0.2058, + "step": 84020 + }, + { + "epoch": 1.083713252126363, + "grad_norm": 0.8768476434946655, + "learning_rate": 8.061329876747923e-06, + "loss": 0.2132, + "step": 84030 + }, + { + "epoch": 1.083842220316359, + "grad_norm": 0.9520857827563748, + "learning_rate": 8.06073661401026e-06, + "loss": 0.2035, + "step": 84040 + }, + { + "epoch": 1.083971188506355, + "grad_norm": 0.8264758733720537, + "learning_rate": 8.060143282350415e-06, + "loss": 0.2027, + "step": 84050 + }, + { + "epoch": 1.0841001566963508, + "grad_norm": 0.8491914858574401, + "learning_rate": 8.059549881781746e-06, + "loss": 0.2194, + "step": 84060 + }, + { + "epoch": 1.0842291248863467, + "grad_norm": 0.903056753420531, + "learning_rate": 8.058956412317615e-06, + "loss": 0.2045, + "step": 84070 + }, + { + "epoch": 1.0843580930763428, + "grad_norm": 0.8618329403678142, + "learning_rate": 8.05836287397139e-06, + "loss": 0.2062, + "step": 84080 + }, + { + "epoch": 1.0844870612663386, + "grad_norm": 0.8552739236298554, + "learning_rate": 8.057769266756433e-06, + "loss": 0.1924, + "step": 84090 + }, + { + "epoch": 1.0846160294563345, + "grad_norm": 0.9003226048106487, + "learning_rate": 8.05717559068611e-06, + "loss": 0.2022, + "step": 84100 + }, + { + "epoch": 1.0847449976463306, + "grad_norm": 0.8589854763843945, + "learning_rate": 8.056581845773794e-06, + "loss": 0.2054, + "step": 84110 + }, + { + "epoch": 1.0848739658363264, + "grad_norm": 0.7774101743630648, + "learning_rate": 8.05598803203285e-06, + "loss": 0.2125, + "step": 84120 + }, + { + "epoch": 1.0850029340263223, + "grad_norm": 0.9883088633151077, + "learning_rate": 8.055394149476652e-06, + "loss": 0.2016, + "step": 84130 + }, + { + "epoch": 1.0851319022163184, + "grad_norm": 0.7994154633016695, + "learning_rate": 8.054800198118573e-06, + "loss": 0.1915, + "step": 84140 + }, + { + "epoch": 1.0852608704063142, + "grad_norm": 0.8215112716016453, + "learning_rate": 8.054206177971988e-06, + "loss": 0.2019, + "step": 84150 + }, + { + "epoch": 1.0853898385963103, + "grad_norm": 0.8496277964706754, + "learning_rate": 8.053612089050273e-06, + "loss": 0.2036, + "step": 84160 + }, + { + "epoch": 1.0855188067863062, + "grad_norm": 0.8457746203821003, + "learning_rate": 8.053017931366806e-06, + "loss": 0.217, + "step": 84170 + }, + { + "epoch": 1.085647774976302, + "grad_norm": 0.8128754273965572, + "learning_rate": 8.052423704934963e-06, + "loss": 0.1916, + "step": 84180 + }, + { + "epoch": 1.0857767431662981, + "grad_norm": 0.8432941174447794, + "learning_rate": 8.051829409768132e-06, + "loss": 0.2022, + "step": 84190 + }, + { + "epoch": 1.085905711356294, + "grad_norm": 0.8329916695802376, + "learning_rate": 8.051235045879691e-06, + "loss": 0.209, + "step": 84200 + }, + { + "epoch": 1.0860346795462898, + "grad_norm": 0.9275181793719259, + "learning_rate": 8.050640613283021e-06, + "loss": 0.2052, + "step": 84210 + }, + { + "epoch": 1.086163647736286, + "grad_norm": 0.8714565008886094, + "learning_rate": 8.050046111991515e-06, + "loss": 0.2098, + "step": 84220 + }, + { + "epoch": 1.0862926159262818, + "grad_norm": 0.8135827705572224, + "learning_rate": 8.049451542018552e-06, + "loss": 0.2108, + "step": 84230 + }, + { + "epoch": 1.0864215841162776, + "grad_norm": 0.8939093021209038, + "learning_rate": 8.048856903377526e-06, + "loss": 0.197, + "step": 84240 + }, + { + "epoch": 1.0865505523062737, + "grad_norm": 0.8553471613965585, + "learning_rate": 8.048262196081827e-06, + "loss": 0.2051, + "step": 84250 + }, + { + "epoch": 1.0866795204962696, + "grad_norm": 0.871180186307459, + "learning_rate": 8.047667420144844e-06, + "loss": 0.2054, + "step": 84260 + }, + { + "epoch": 1.0868084886862655, + "grad_norm": 0.8199119435660288, + "learning_rate": 8.047072575579973e-06, + "loss": 0.2033, + "step": 84270 + }, + { + "epoch": 1.0869374568762615, + "grad_norm": 0.8438750472919508, + "learning_rate": 8.046477662400607e-06, + "loss": 0.2136, + "step": 84280 + }, + { + "epoch": 1.0870664250662574, + "grad_norm": 0.8445736114042339, + "learning_rate": 8.045882680620143e-06, + "loss": 0.1979, + "step": 84290 + }, + { + "epoch": 1.0871953932562533, + "grad_norm": 0.8801957259032483, + "learning_rate": 8.045287630251978e-06, + "loss": 0.2067, + "step": 84300 + }, + { + "epoch": 1.0873243614462493, + "grad_norm": 0.8747292291170385, + "learning_rate": 8.044692511309513e-06, + "loss": 0.2091, + "step": 84310 + }, + { + "epoch": 1.0874533296362452, + "grad_norm": 0.7988687949985142, + "learning_rate": 8.044097323806149e-06, + "loss": 0.2064, + "step": 84320 + }, + { + "epoch": 1.0875822978262413, + "grad_norm": 0.7717900070177353, + "learning_rate": 8.043502067755286e-06, + "loss": 0.2054, + "step": 84330 + }, + { + "epoch": 1.0877112660162371, + "grad_norm": 0.8576280962841472, + "learning_rate": 8.04290674317033e-06, + "loss": 0.2085, + "step": 84340 + }, + { + "epoch": 1.087840234206233, + "grad_norm": 0.8896880968351601, + "learning_rate": 8.042311350064686e-06, + "loss": 0.2183, + "step": 84350 + }, + { + "epoch": 1.087969202396229, + "grad_norm": 0.8470405088869314, + "learning_rate": 8.041715888451765e-06, + "loss": 0.1977, + "step": 84360 + }, + { + "epoch": 1.088098170586225, + "grad_norm": 0.8890446404070044, + "learning_rate": 8.041120358344969e-06, + "loss": 0.2211, + "step": 84370 + }, + { + "epoch": 1.0882271387762208, + "grad_norm": 0.866990482099921, + "learning_rate": 8.040524759757713e-06, + "loss": 0.2065, + "step": 84380 + }, + { + "epoch": 1.0883561069662169, + "grad_norm": 0.8854195225653927, + "learning_rate": 8.039929092703407e-06, + "loss": 0.2176, + "step": 84390 + }, + { + "epoch": 1.0884850751562127, + "grad_norm": 0.7923052828664545, + "learning_rate": 8.039333357195464e-06, + "loss": 0.2114, + "step": 84400 + }, + { + "epoch": 1.0886140433462086, + "grad_norm": 0.8259080723984638, + "learning_rate": 8.0387375532473e-06, + "loss": 0.2015, + "step": 84410 + }, + { + "epoch": 1.0887430115362047, + "grad_norm": 0.8554694218540443, + "learning_rate": 8.038141680872331e-06, + "loss": 0.2094, + "step": 84420 + }, + { + "epoch": 1.0888719797262005, + "grad_norm": 0.7961761515577931, + "learning_rate": 8.037545740083976e-06, + "loss": 0.215, + "step": 84430 + }, + { + "epoch": 1.0890009479161964, + "grad_norm": 0.7999636202843904, + "learning_rate": 8.036949730895652e-06, + "loss": 0.2134, + "step": 84440 + }, + { + "epoch": 1.0891299161061925, + "grad_norm": 0.7861946899162213, + "learning_rate": 8.036353653320781e-06, + "loss": 0.2044, + "step": 84450 + }, + { + "epoch": 1.0892588842961883, + "grad_norm": 0.8732275339850932, + "learning_rate": 8.035757507372787e-06, + "loss": 0.2096, + "step": 84460 + }, + { + "epoch": 1.0893878524861842, + "grad_norm": 0.8311863628902885, + "learning_rate": 8.035161293065094e-06, + "loss": 0.1968, + "step": 84470 + }, + { + "epoch": 1.0895168206761803, + "grad_norm": 0.7993983410575449, + "learning_rate": 8.034565010411126e-06, + "loss": 0.1997, + "step": 84480 + }, + { + "epoch": 1.0896457888661761, + "grad_norm": 0.9267516384000297, + "learning_rate": 8.03396865942431e-06, + "loss": 0.2042, + "step": 84490 + }, + { + "epoch": 1.089774757056172, + "grad_norm": 0.9467066757862787, + "learning_rate": 8.033372240118077e-06, + "loss": 0.2076, + "step": 84500 + }, + { + "epoch": 1.089903725246168, + "grad_norm": 0.7992757822453602, + "learning_rate": 8.032775752505856e-06, + "loss": 0.2156, + "step": 84510 + }, + { + "epoch": 1.090032693436164, + "grad_norm": 0.7560843592169045, + "learning_rate": 8.032179196601079e-06, + "loss": 0.1981, + "step": 84520 + }, + { + "epoch": 1.0901616616261598, + "grad_norm": 0.8293187892949699, + "learning_rate": 8.031582572417178e-06, + "loss": 0.2103, + "step": 84530 + }, + { + "epoch": 1.0902906298161559, + "grad_norm": 0.9266324774952407, + "learning_rate": 8.03098587996759e-06, + "loss": 0.2063, + "step": 84540 + }, + { + "epoch": 1.0904195980061517, + "grad_norm": 0.8262870850152578, + "learning_rate": 8.030389119265751e-06, + "loss": 0.2086, + "step": 84550 + }, + { + "epoch": 1.0905485661961478, + "grad_norm": 0.8190794632250016, + "learning_rate": 8.029792290325097e-06, + "loss": 0.2155, + "step": 84560 + }, + { + "epoch": 1.0906775343861437, + "grad_norm": 0.9058263030102567, + "learning_rate": 8.02919539315907e-06, + "loss": 0.1999, + "step": 84570 + }, + { + "epoch": 1.0908065025761395, + "grad_norm": 0.8790679131658555, + "learning_rate": 8.02859842778111e-06, + "loss": 0.2149, + "step": 84580 + }, + { + "epoch": 1.0909354707661356, + "grad_norm": 0.8271594341457419, + "learning_rate": 8.028001394204657e-06, + "loss": 0.2018, + "step": 84590 + }, + { + "epoch": 1.0910644389561315, + "grad_norm": 0.8686912427849883, + "learning_rate": 8.02740429244316e-06, + "loss": 0.2072, + "step": 84600 + }, + { + "epoch": 1.0911934071461273, + "grad_norm": 0.8192026507553019, + "learning_rate": 8.026807122510063e-06, + "loss": 0.204, + "step": 84610 + }, + { + "epoch": 1.0913223753361234, + "grad_norm": 0.8106961821194699, + "learning_rate": 8.02620988441881e-06, + "loss": 0.2128, + "step": 84620 + }, + { + "epoch": 1.0914513435261193, + "grad_norm": 0.8715429208536569, + "learning_rate": 8.025612578182854e-06, + "loss": 0.2221, + "step": 84630 + }, + { + "epoch": 1.0915803117161151, + "grad_norm": 0.835829261084886, + "learning_rate": 8.025015203815642e-06, + "loss": 0.2043, + "step": 84640 + }, + { + "epoch": 1.0917092799061112, + "grad_norm": 0.8832909550808953, + "learning_rate": 8.024417761330628e-06, + "loss": 0.2031, + "step": 84650 + }, + { + "epoch": 1.091838248096107, + "grad_norm": 0.9729688772365244, + "learning_rate": 8.023820250741265e-06, + "loss": 0.2196, + "step": 84660 + }, + { + "epoch": 1.091967216286103, + "grad_norm": 0.8680667768734849, + "learning_rate": 8.023222672061006e-06, + "loss": 0.1946, + "step": 84670 + }, + { + "epoch": 1.092096184476099, + "grad_norm": 0.9147038981290739, + "learning_rate": 8.02262502530331e-06, + "loss": 0.2084, + "step": 84680 + }, + { + "epoch": 1.0922251526660949, + "grad_norm": 0.827989706234902, + "learning_rate": 8.022027310481632e-06, + "loss": 0.2072, + "step": 84690 + }, + { + "epoch": 1.0923541208560907, + "grad_norm": 0.8592013543565596, + "learning_rate": 8.021429527609435e-06, + "loss": 0.207, + "step": 84700 + }, + { + "epoch": 1.0924830890460868, + "grad_norm": 0.7398988984706215, + "learning_rate": 8.020831676700177e-06, + "loss": 0.2043, + "step": 84710 + }, + { + "epoch": 1.0926120572360827, + "grad_norm": 0.8308274541004714, + "learning_rate": 8.020233757767321e-06, + "loss": 0.2065, + "step": 84720 + }, + { + "epoch": 1.0927410254260788, + "grad_norm": 0.7504102258416497, + "learning_rate": 8.019635770824334e-06, + "loss": 0.2013, + "step": 84730 + }, + { + "epoch": 1.0928699936160746, + "grad_norm": 0.781143544590738, + "learning_rate": 8.019037715884676e-06, + "loss": 0.2026, + "step": 84740 + }, + { + "epoch": 1.0929989618060705, + "grad_norm": 0.9032264347319853, + "learning_rate": 8.01843959296182e-06, + "loss": 0.2176, + "step": 84750 + }, + { + "epoch": 1.0931279299960666, + "grad_norm": 0.7932608607174018, + "learning_rate": 8.017841402069229e-06, + "loss": 0.206, + "step": 84760 + }, + { + "epoch": 1.0932568981860624, + "grad_norm": 0.8610874771907412, + "learning_rate": 8.017243143220379e-06, + "loss": 0.1955, + "step": 84770 + }, + { + "epoch": 1.0933858663760583, + "grad_norm": 0.8339763850584323, + "learning_rate": 8.016644816428736e-06, + "loss": 0.2019, + "step": 84780 + }, + { + "epoch": 1.0935148345660544, + "grad_norm": 0.9060543977761457, + "learning_rate": 8.016046421707777e-06, + "loss": 0.2061, + "step": 84790 + }, + { + "epoch": 1.0936438027560502, + "grad_norm": 0.8679708157529686, + "learning_rate": 8.015447959070976e-06, + "loss": 0.208, + "step": 84800 + }, + { + "epoch": 1.093772770946046, + "grad_norm": 0.862389648878038, + "learning_rate": 8.014849428531809e-06, + "loss": 0.2105, + "step": 84810 + }, + { + "epoch": 1.0939017391360422, + "grad_norm": 0.8574243614160616, + "learning_rate": 8.014250830103754e-06, + "loss": 0.2067, + "step": 84820 + }, + { + "epoch": 1.094030707326038, + "grad_norm": 0.8743917175316637, + "learning_rate": 8.01365216380029e-06, + "loss": 0.2031, + "step": 84830 + }, + { + "epoch": 1.0941596755160339, + "grad_norm": 0.8765241357184426, + "learning_rate": 8.013053429634899e-06, + "loss": 0.2066, + "step": 84840 + }, + { + "epoch": 1.09428864370603, + "grad_norm": 0.9316318245152584, + "learning_rate": 8.012454627621059e-06, + "loss": 0.1912, + "step": 84850 + }, + { + "epoch": 1.0944176118960258, + "grad_norm": 0.8297871280611431, + "learning_rate": 8.01185575777226e-06, + "loss": 0.2208, + "step": 84860 + }, + { + "epoch": 1.0945465800860217, + "grad_norm": 0.8641350352072954, + "learning_rate": 8.011256820101984e-06, + "loss": 0.2035, + "step": 84870 + }, + { + "epoch": 1.0946755482760178, + "grad_norm": 0.8134207734755969, + "learning_rate": 8.010657814623719e-06, + "loss": 0.1933, + "step": 84880 + }, + { + "epoch": 1.0948045164660136, + "grad_norm": 0.9172475789098083, + "learning_rate": 8.010058741350955e-06, + "loss": 0.2102, + "step": 84890 + }, + { + "epoch": 1.0949334846560097, + "grad_norm": 0.9474629540705117, + "learning_rate": 8.009459600297177e-06, + "loss": 0.204, + "step": 84900 + }, + { + "epoch": 1.0950624528460056, + "grad_norm": 0.8073831266317558, + "learning_rate": 8.008860391475881e-06, + "loss": 0.1845, + "step": 84910 + }, + { + "epoch": 1.0951914210360014, + "grad_norm": 0.841297277081548, + "learning_rate": 8.008261114900559e-06, + "loss": 0.214, + "step": 84920 + }, + { + "epoch": 1.0953203892259975, + "grad_norm": 0.888484383713483, + "learning_rate": 8.007661770584706e-06, + "loss": 0.2135, + "step": 84930 + }, + { + "epoch": 1.0954493574159934, + "grad_norm": 0.8729076128146036, + "learning_rate": 8.007062358541819e-06, + "loss": 0.1964, + "step": 84940 + }, + { + "epoch": 1.0955783256059892, + "grad_norm": 0.8123410859649861, + "learning_rate": 8.006462878785393e-06, + "loss": 0.207, + "step": 84950 + }, + { + "epoch": 1.0957072937959853, + "grad_norm": 0.8080120558669552, + "learning_rate": 8.005863331328929e-06, + "loss": 0.205, + "step": 84960 + }, + { + "epoch": 1.0958362619859812, + "grad_norm": 0.8280175252273813, + "learning_rate": 8.005263716185927e-06, + "loss": 0.206, + "step": 84970 + }, + { + "epoch": 1.095965230175977, + "grad_norm": 0.9749681746388797, + "learning_rate": 8.00466403336989e-06, + "loss": 0.2149, + "step": 84980 + }, + { + "epoch": 1.096094198365973, + "grad_norm": 0.7780403530936095, + "learning_rate": 8.004064282894319e-06, + "loss": 0.2099, + "step": 84990 + }, + { + "epoch": 1.096223166555969, + "grad_norm": 0.8095715394935012, + "learning_rate": 8.003464464772723e-06, + "loss": 0.2005, + "step": 85000 + }, + { + "epoch": 1.0963521347459648, + "grad_norm": 0.8679205989241147, + "learning_rate": 8.002864579018608e-06, + "loss": 0.2036, + "step": 85010 + }, + { + "epoch": 1.096481102935961, + "grad_norm": 0.8758722819943808, + "learning_rate": 8.002264625645482e-06, + "loss": 0.2, + "step": 85020 + }, + { + "epoch": 1.0966100711259568, + "grad_norm": 0.756093552230632, + "learning_rate": 8.001664604666853e-06, + "loss": 0.2058, + "step": 85030 + }, + { + "epoch": 1.0967390393159526, + "grad_norm": 0.8941916713948368, + "learning_rate": 8.001064516096235e-06, + "loss": 0.2101, + "step": 85040 + }, + { + "epoch": 1.0968680075059487, + "grad_norm": 0.8554744710274037, + "learning_rate": 8.000464359947138e-06, + "loss": 0.2045, + "step": 85050 + }, + { + "epoch": 1.0969969756959446, + "grad_norm": 0.8175179118485453, + "learning_rate": 7.99986413623308e-06, + "loss": 0.2054, + "step": 85060 + }, + { + "epoch": 1.0971259438859406, + "grad_norm": 0.8205890368910244, + "learning_rate": 7.999263844967575e-06, + "loss": 0.206, + "step": 85070 + }, + { + "epoch": 1.0972549120759365, + "grad_norm": 0.7095242395797345, + "learning_rate": 7.99866348616414e-06, + "loss": 0.2087, + "step": 85080 + }, + { + "epoch": 1.0973838802659324, + "grad_norm": 0.9221014402844745, + "learning_rate": 7.998063059836296e-06, + "loss": 0.2054, + "step": 85090 + }, + { + "epoch": 1.0975128484559284, + "grad_norm": 0.7824508941656669, + "learning_rate": 7.99746256599756e-06, + "loss": 0.2229, + "step": 85100 + }, + { + "epoch": 1.0976418166459243, + "grad_norm": 0.8195548141108558, + "learning_rate": 7.996862004661457e-06, + "loss": 0.2112, + "step": 85110 + }, + { + "epoch": 1.0977707848359202, + "grad_norm": 0.8286110393410175, + "learning_rate": 7.99626137584151e-06, + "loss": 0.2086, + "step": 85120 + }, + { + "epoch": 1.0978997530259162, + "grad_norm": 0.921590170412004, + "learning_rate": 7.995660679551243e-06, + "loss": 0.2019, + "step": 85130 + }, + { + "epoch": 1.098028721215912, + "grad_norm": 0.8281239094461473, + "learning_rate": 7.995059915804184e-06, + "loss": 0.2063, + "step": 85140 + }, + { + "epoch": 1.098157689405908, + "grad_norm": 0.8327306159703305, + "learning_rate": 7.994459084613859e-06, + "loss": 0.2055, + "step": 85150 + }, + { + "epoch": 1.098286657595904, + "grad_norm": 0.849956349801592, + "learning_rate": 7.993858185993802e-06, + "loss": 0.2054, + "step": 85160 + }, + { + "epoch": 1.0984156257859, + "grad_norm": 0.8986253978263502, + "learning_rate": 7.993257219957537e-06, + "loss": 0.2212, + "step": 85170 + }, + { + "epoch": 1.0985445939758958, + "grad_norm": 0.8769376146594632, + "learning_rate": 7.992656186518603e-06, + "loss": 0.1993, + "step": 85180 + }, + { + "epoch": 1.0986735621658918, + "grad_norm": 0.7956282034967028, + "learning_rate": 7.992055085690533e-06, + "loss": 0.2147, + "step": 85190 + }, + { + "epoch": 1.0988025303558877, + "grad_norm": 0.7857155405875972, + "learning_rate": 7.99145391748686e-06, + "loss": 0.2014, + "step": 85200 + }, + { + "epoch": 1.0989314985458836, + "grad_norm": 0.8450254393554454, + "learning_rate": 7.990852681921124e-06, + "loss": 0.2077, + "step": 85210 + }, + { + "epoch": 1.0990604667358796, + "grad_norm": 0.8318149048879124, + "learning_rate": 7.990251379006862e-06, + "loss": 0.2141, + "step": 85220 + }, + { + "epoch": 1.0991894349258755, + "grad_norm": 0.7352479805291333, + "learning_rate": 7.989650008757615e-06, + "loss": 0.2023, + "step": 85230 + }, + { + "epoch": 1.0993184031158714, + "grad_norm": 0.8883343016668889, + "learning_rate": 7.989048571186926e-06, + "loss": 0.2059, + "step": 85240 + }, + { + "epoch": 1.0994473713058674, + "grad_norm": 0.8285036882488929, + "learning_rate": 7.988447066308334e-06, + "loss": 0.2203, + "step": 85250 + }, + { + "epoch": 1.0995763394958633, + "grad_norm": 0.8682937654705886, + "learning_rate": 7.987845494135387e-06, + "loss": 0.2063, + "step": 85260 + }, + { + "epoch": 1.0997053076858592, + "grad_norm": 0.7859222341144743, + "learning_rate": 7.987243854681632e-06, + "loss": 0.2108, + "step": 85270 + }, + { + "epoch": 1.0998342758758552, + "grad_norm": 0.8959774571630537, + "learning_rate": 7.986642147960617e-06, + "loss": 0.2057, + "step": 85280 + }, + { + "epoch": 1.099963244065851, + "grad_norm": 0.8687029994543995, + "learning_rate": 7.986040373985889e-06, + "loss": 0.2084, + "step": 85290 + }, + { + "epoch": 1.1000922122558472, + "grad_norm": 0.8161827349893814, + "learning_rate": 7.985438532771e-06, + "loss": 0.2031, + "step": 85300 + }, + { + "epoch": 1.100221180445843, + "grad_norm": 0.8685846034432821, + "learning_rate": 7.984836624329502e-06, + "loss": 0.2032, + "step": 85310 + }, + { + "epoch": 1.100350148635839, + "grad_norm": 0.7723374191604826, + "learning_rate": 7.984234648674948e-06, + "loss": 0.2119, + "step": 85320 + }, + { + "epoch": 1.100479116825835, + "grad_norm": 0.8992158309261882, + "learning_rate": 7.983632605820897e-06, + "loss": 0.2104, + "step": 85330 + }, + { + "epoch": 1.1006080850158309, + "grad_norm": 0.8826032179737101, + "learning_rate": 7.983030495780903e-06, + "loss": 0.1958, + "step": 85340 + }, + { + "epoch": 1.1007370532058267, + "grad_norm": 0.8925345850523798, + "learning_rate": 7.982428318568526e-06, + "loss": 0.2038, + "step": 85350 + }, + { + "epoch": 1.1008660213958228, + "grad_norm": 0.7555405761582529, + "learning_rate": 7.981826074197321e-06, + "loss": 0.2054, + "step": 85360 + }, + { + "epoch": 1.1009949895858187, + "grad_norm": 0.7948716797654767, + "learning_rate": 7.981223762680855e-06, + "loss": 0.202, + "step": 85370 + }, + { + "epoch": 1.1011239577758145, + "grad_norm": 0.907910735708237, + "learning_rate": 7.98062138403269e-06, + "loss": 0.209, + "step": 85380 + }, + { + "epoch": 1.1012529259658106, + "grad_norm": 0.8728400797940294, + "learning_rate": 7.980018938266389e-06, + "loss": 0.2087, + "step": 85390 + }, + { + "epoch": 1.1013818941558065, + "grad_norm": 0.8957309211738819, + "learning_rate": 7.979416425395519e-06, + "loss": 0.2141, + "step": 85400 + }, + { + "epoch": 1.1015108623458023, + "grad_norm": 0.8922974623372134, + "learning_rate": 7.978813845433647e-06, + "loss": 0.2044, + "step": 85410 + }, + { + "epoch": 1.1016398305357984, + "grad_norm": 0.815325335794267, + "learning_rate": 7.978211198394342e-06, + "loss": 0.209, + "step": 85420 + }, + { + "epoch": 1.1017687987257943, + "grad_norm": 0.8702109837625572, + "learning_rate": 7.977608484291176e-06, + "loss": 0.1966, + "step": 85430 + }, + { + "epoch": 1.1018977669157901, + "grad_norm": 0.849607494764083, + "learning_rate": 7.977005703137717e-06, + "loss": 0.2095, + "step": 85440 + }, + { + "epoch": 1.1020267351057862, + "grad_norm": 0.8037578893043607, + "learning_rate": 7.976402854947543e-06, + "loss": 0.1984, + "step": 85450 + }, + { + "epoch": 1.102155703295782, + "grad_norm": 0.8373475270234078, + "learning_rate": 7.975799939734227e-06, + "loss": 0.2102, + "step": 85460 + }, + { + "epoch": 1.1022846714857781, + "grad_norm": 0.9182261348296777, + "learning_rate": 7.975196957511345e-06, + "loss": 0.1954, + "step": 85470 + }, + { + "epoch": 1.102413639675774, + "grad_norm": 0.7997704543896383, + "learning_rate": 7.974593908292476e-06, + "loss": 0.1975, + "step": 85480 + }, + { + "epoch": 1.1025426078657699, + "grad_norm": 0.760498689959081, + "learning_rate": 7.973990792091202e-06, + "loss": 0.2082, + "step": 85490 + }, + { + "epoch": 1.102671576055766, + "grad_norm": 0.8302844332283291, + "learning_rate": 7.9733876089211e-06, + "loss": 0.2044, + "step": 85500 + }, + { + "epoch": 1.1028005442457618, + "grad_norm": 0.8566040230189595, + "learning_rate": 7.972784358795754e-06, + "loss": 0.1997, + "step": 85510 + }, + { + "epoch": 1.1029295124357577, + "grad_norm": 0.7956008111932914, + "learning_rate": 7.972181041728745e-06, + "loss": 0.2142, + "step": 85520 + }, + { + "epoch": 1.1030584806257537, + "grad_norm": 0.8799758008492142, + "learning_rate": 7.971577657733664e-06, + "loss": 0.2059, + "step": 85530 + }, + { + "epoch": 1.1031874488157496, + "grad_norm": 0.8109756263067625, + "learning_rate": 7.970974206824098e-06, + "loss": 0.2053, + "step": 85540 + }, + { + "epoch": 1.1033164170057455, + "grad_norm": 0.7857988842271344, + "learning_rate": 7.970370689013632e-06, + "loss": 0.1985, + "step": 85550 + }, + { + "epoch": 1.1034453851957415, + "grad_norm": 0.79518248966874, + "learning_rate": 7.969767104315856e-06, + "loss": 0.2093, + "step": 85560 + }, + { + "epoch": 1.1035743533857374, + "grad_norm": 0.9679194648056586, + "learning_rate": 7.969163452744364e-06, + "loss": 0.2094, + "step": 85570 + }, + { + "epoch": 1.1037033215757333, + "grad_norm": 0.9183979248542148, + "learning_rate": 7.968559734312748e-06, + "loss": 0.2085, + "step": 85580 + }, + { + "epoch": 1.1038322897657293, + "grad_norm": 0.8573134896220854, + "learning_rate": 7.967955949034605e-06, + "loss": 0.2094, + "step": 85590 + }, + { + "epoch": 1.1039612579557252, + "grad_norm": 0.8764883218485346, + "learning_rate": 7.967352096923529e-06, + "loss": 0.2043, + "step": 85600 + }, + { + "epoch": 1.104090226145721, + "grad_norm": 0.7761495745335789, + "learning_rate": 7.966748177993116e-06, + "loss": 0.2037, + "step": 85610 + }, + { + "epoch": 1.1042191943357171, + "grad_norm": 0.813322383924671, + "learning_rate": 7.966144192256965e-06, + "loss": 0.2012, + "step": 85620 + }, + { + "epoch": 1.104348162525713, + "grad_norm": 0.8662168405506413, + "learning_rate": 7.965540139728682e-06, + "loss": 0.212, + "step": 85630 + }, + { + "epoch": 1.104477130715709, + "grad_norm": 0.8623420319161026, + "learning_rate": 7.964936020421865e-06, + "loss": 0.2064, + "step": 85640 + }, + { + "epoch": 1.104606098905705, + "grad_norm": 0.7776201023484433, + "learning_rate": 7.964331834350119e-06, + "loss": 0.2033, + "step": 85650 + }, + { + "epoch": 1.1047350670957008, + "grad_norm": 0.9097121347462557, + "learning_rate": 7.963727581527046e-06, + "loss": 0.2071, + "step": 85660 + }, + { + "epoch": 1.1048640352856969, + "grad_norm": 0.8632820671727807, + "learning_rate": 7.963123261966257e-06, + "loss": 0.2044, + "step": 85670 + }, + { + "epoch": 1.1049930034756927, + "grad_norm": 0.88595701633915, + "learning_rate": 7.962518875681358e-06, + "loss": 0.2018, + "step": 85680 + }, + { + "epoch": 1.1051219716656886, + "grad_norm": 0.9387608083299681, + "learning_rate": 7.961914422685959e-06, + "loss": 0.2098, + "step": 85690 + }, + { + "epoch": 1.1052509398556847, + "grad_norm": 0.936656325843408, + "learning_rate": 7.961309902993672e-06, + "loss": 0.2168, + "step": 85700 + }, + { + "epoch": 1.1053799080456805, + "grad_norm": 0.9049482609011121, + "learning_rate": 7.960705316618107e-06, + "loss": 0.2082, + "step": 85710 + }, + { + "epoch": 1.1055088762356764, + "grad_norm": 0.7972201742217313, + "learning_rate": 7.96010066357288e-06, + "loss": 0.2084, + "step": 85720 + }, + { + "epoch": 1.1056378444256725, + "grad_norm": 0.8253391792357011, + "learning_rate": 7.959495943871609e-06, + "loss": 0.2173, + "step": 85730 + }, + { + "epoch": 1.1057668126156683, + "grad_norm": 0.7689475469974164, + "learning_rate": 7.958891157527908e-06, + "loss": 0.2005, + "step": 85740 + }, + { + "epoch": 1.1058957808056642, + "grad_norm": 0.8771780181607416, + "learning_rate": 7.958286304555395e-06, + "loss": 0.2062, + "step": 85750 + }, + { + "epoch": 1.1060247489956603, + "grad_norm": 0.9101454976689066, + "learning_rate": 7.957681384967696e-06, + "loss": 0.2017, + "step": 85760 + }, + { + "epoch": 1.1061537171856561, + "grad_norm": 0.8087899946773102, + "learning_rate": 7.957076398778422e-06, + "loss": 0.2084, + "step": 85770 + }, + { + "epoch": 1.106282685375652, + "grad_norm": 0.858621975610948, + "learning_rate": 7.956471346001207e-06, + "loss": 0.215, + "step": 85780 + }, + { + "epoch": 1.106411653565648, + "grad_norm": 0.9277259400515363, + "learning_rate": 7.95586622664967e-06, + "loss": 0.2172, + "step": 85790 + }, + { + "epoch": 1.106540621755644, + "grad_norm": 0.8534457260825923, + "learning_rate": 7.95526104073744e-06, + "loss": 0.2072, + "step": 85800 + }, + { + "epoch": 1.10666958994564, + "grad_norm": 0.7827839241955868, + "learning_rate": 7.954655788278143e-06, + "loss": 0.2058, + "step": 85810 + }, + { + "epoch": 1.1067985581356359, + "grad_norm": 0.7853168422912343, + "learning_rate": 7.954050469285406e-06, + "loss": 0.2168, + "step": 85820 + }, + { + "epoch": 1.1069275263256317, + "grad_norm": 0.8274040777891669, + "learning_rate": 7.953445083772863e-06, + "loss": 0.2127, + "step": 85830 + }, + { + "epoch": 1.1070564945156278, + "grad_norm": 0.7745622339498113, + "learning_rate": 7.952839631754146e-06, + "loss": 0.2011, + "step": 85840 + }, + { + "epoch": 1.1071854627056237, + "grad_norm": 0.9172760238469673, + "learning_rate": 7.952234113242889e-06, + "loss": 0.2042, + "step": 85850 + }, + { + "epoch": 1.1073144308956195, + "grad_norm": 0.8521467765850544, + "learning_rate": 7.951628528252725e-06, + "loss": 0.1964, + "step": 85860 + }, + { + "epoch": 1.1074433990856156, + "grad_norm": 0.8313924467330907, + "learning_rate": 7.951022876797291e-06, + "loss": 0.2099, + "step": 85870 + }, + { + "epoch": 1.1075723672756115, + "grad_norm": 0.8356011875746305, + "learning_rate": 7.950417158890226e-06, + "loss": 0.1922, + "step": 85880 + }, + { + "epoch": 1.1077013354656073, + "grad_norm": 0.9241432092073565, + "learning_rate": 7.949811374545169e-06, + "loss": 0.2013, + "step": 85890 + }, + { + "epoch": 1.1078303036556034, + "grad_norm": 0.8188989782628303, + "learning_rate": 7.949205523775763e-06, + "loss": 0.2097, + "step": 85900 + }, + { + "epoch": 1.1079592718455993, + "grad_norm": 0.8885266840898581, + "learning_rate": 7.948599606595648e-06, + "loss": 0.206, + "step": 85910 + }, + { + "epoch": 1.1080882400355951, + "grad_norm": 0.8199889416789712, + "learning_rate": 7.94799362301847e-06, + "loss": 0.2034, + "step": 85920 + }, + { + "epoch": 1.1082172082255912, + "grad_norm": 0.7567681486228337, + "learning_rate": 7.947387573057874e-06, + "loss": 0.2062, + "step": 85930 + }, + { + "epoch": 1.108346176415587, + "grad_norm": 0.815765757985353, + "learning_rate": 7.946781456727508e-06, + "loss": 0.2081, + "step": 85940 + }, + { + "epoch": 1.108475144605583, + "grad_norm": 0.8119042962618531, + "learning_rate": 7.94617527404102e-06, + "loss": 0.2166, + "step": 85950 + }, + { + "epoch": 1.108604112795579, + "grad_norm": 0.9344216410146083, + "learning_rate": 7.945569025012059e-06, + "loss": 0.2145, + "step": 85960 + }, + { + "epoch": 1.1087330809855749, + "grad_norm": 0.7588942357824722, + "learning_rate": 7.94496270965428e-06, + "loss": 0.2053, + "step": 85970 + }, + { + "epoch": 1.108862049175571, + "grad_norm": 0.9460178882825718, + "learning_rate": 7.94435632798133e-06, + "loss": 0.2158, + "step": 85980 + }, + { + "epoch": 1.1089910173655668, + "grad_norm": 0.8716174219336704, + "learning_rate": 7.94374988000687e-06, + "loss": 0.2048, + "step": 85990 + }, + { + "epoch": 1.1091199855555627, + "grad_norm": 0.7962364689006951, + "learning_rate": 7.943143365744552e-06, + "loss": 0.2119, + "step": 86000 + }, + { + "epoch": 1.1092489537455585, + "grad_norm": 0.7874500920041847, + "learning_rate": 7.942536785208035e-06, + "loss": 0.2118, + "step": 86010 + }, + { + "epoch": 1.1093779219355546, + "grad_norm": 0.8490926583972929, + "learning_rate": 7.941930138410979e-06, + "loss": 0.2129, + "step": 86020 + }, + { + "epoch": 1.1095068901255505, + "grad_norm": 0.8368373490500515, + "learning_rate": 7.941323425367042e-06, + "loss": 0.2121, + "step": 86030 + }, + { + "epoch": 1.1096358583155466, + "grad_norm": 0.9266807108962685, + "learning_rate": 7.94071664608989e-06, + "loss": 0.2149, + "step": 86040 + }, + { + "epoch": 1.1097648265055424, + "grad_norm": 0.8150393622359476, + "learning_rate": 7.940109800593184e-06, + "loss": 0.1953, + "step": 86050 + }, + { + "epoch": 1.1098937946955383, + "grad_norm": 0.830734979928367, + "learning_rate": 7.93950288889059e-06, + "loss": 0.2108, + "step": 86060 + }, + { + "epoch": 1.1100227628855344, + "grad_norm": 0.8691477792753249, + "learning_rate": 7.938895910995772e-06, + "loss": 0.2072, + "step": 86070 + }, + { + "epoch": 1.1101517310755302, + "grad_norm": 0.8166059094300055, + "learning_rate": 7.9382888669224e-06, + "loss": 0.2079, + "step": 86080 + }, + { + "epoch": 1.110280699265526, + "grad_norm": 0.8002236181223576, + "learning_rate": 7.937681756684144e-06, + "loss": 0.2078, + "step": 86090 + }, + { + "epoch": 1.1104096674555222, + "grad_norm": 0.8626990906125407, + "learning_rate": 7.937074580294675e-06, + "loss": 0.2048, + "step": 86100 + }, + { + "epoch": 1.110538635645518, + "grad_norm": 0.8284051103681259, + "learning_rate": 7.936467337767663e-06, + "loss": 0.2072, + "step": 86110 + }, + { + "epoch": 1.1106676038355139, + "grad_norm": 0.8925336676079137, + "learning_rate": 7.935860029116787e-06, + "loss": 0.2119, + "step": 86120 + }, + { + "epoch": 1.11079657202551, + "grad_norm": 0.855979724034792, + "learning_rate": 7.935252654355717e-06, + "loss": 0.2075, + "step": 86130 + }, + { + "epoch": 1.1109255402155058, + "grad_norm": 0.7963674752809587, + "learning_rate": 7.934645213498134e-06, + "loss": 0.2045, + "step": 86140 + }, + { + "epoch": 1.1110545084055017, + "grad_norm": 0.8648803558810024, + "learning_rate": 7.934037706557714e-06, + "loss": 0.208, + "step": 86150 + }, + { + "epoch": 1.1111834765954978, + "grad_norm": 0.8071010567856205, + "learning_rate": 7.933430133548137e-06, + "loss": 0.2024, + "step": 86160 + }, + { + "epoch": 1.1113124447854936, + "grad_norm": 0.802130600441889, + "learning_rate": 7.932822494483086e-06, + "loss": 0.2113, + "step": 86170 + }, + { + "epoch": 1.1114414129754895, + "grad_norm": 0.8877637962733108, + "learning_rate": 7.932214789376243e-06, + "loss": 0.2079, + "step": 86180 + }, + { + "epoch": 1.1115703811654856, + "grad_norm": 0.7970991467683975, + "learning_rate": 7.931607018241293e-06, + "loss": 0.2112, + "step": 86190 + }, + { + "epoch": 1.1116993493554814, + "grad_norm": 0.7593708888160963, + "learning_rate": 7.93099918109192e-06, + "loss": 0.2065, + "step": 86200 + }, + { + "epoch": 1.1118283175454775, + "grad_norm": 0.8303135878212596, + "learning_rate": 7.930391277941815e-06, + "loss": 0.211, + "step": 86210 + }, + { + "epoch": 1.1119572857354734, + "grad_norm": 0.8657913713472318, + "learning_rate": 7.929783308804663e-06, + "loss": 0.2189, + "step": 86220 + }, + { + "epoch": 1.1120862539254692, + "grad_norm": 0.8929406669878536, + "learning_rate": 7.929175273694159e-06, + "loss": 0.2124, + "step": 86230 + }, + { + "epoch": 1.1122152221154653, + "grad_norm": 0.9570169112455194, + "learning_rate": 7.928567172623989e-06, + "loss": 0.2022, + "step": 86240 + }, + { + "epoch": 1.1123441903054612, + "grad_norm": 0.8607474448398537, + "learning_rate": 7.92795900560785e-06, + "loss": 0.2045, + "step": 86250 + }, + { + "epoch": 1.112473158495457, + "grad_norm": 0.8288591011065736, + "learning_rate": 7.927350772659434e-06, + "loss": 0.2015, + "step": 86260 + }, + { + "epoch": 1.112602126685453, + "grad_norm": 0.8391026601521439, + "learning_rate": 7.926742473792443e-06, + "loss": 0.195, + "step": 86270 + }, + { + "epoch": 1.112731094875449, + "grad_norm": 0.9066141506606648, + "learning_rate": 7.926134109020569e-06, + "loss": 0.1981, + "step": 86280 + }, + { + "epoch": 1.1128600630654448, + "grad_norm": 0.8481378567843897, + "learning_rate": 7.925525678357512e-06, + "loss": 0.2067, + "step": 86290 + }, + { + "epoch": 1.112989031255441, + "grad_norm": 0.8766676424217275, + "learning_rate": 7.924917181816977e-06, + "loss": 0.217, + "step": 86300 + }, + { + "epoch": 1.1131179994454368, + "grad_norm": 0.7944732583139626, + "learning_rate": 7.924308619412661e-06, + "loss": 0.1994, + "step": 86310 + }, + { + "epoch": 1.1132469676354326, + "grad_norm": 0.9172780382683174, + "learning_rate": 7.923699991158272e-06, + "loss": 0.2131, + "step": 86320 + }, + { + "epoch": 1.1133759358254287, + "grad_norm": 0.8122900131615441, + "learning_rate": 7.923091297067512e-06, + "loss": 0.2117, + "step": 86330 + }, + { + "epoch": 1.1135049040154246, + "grad_norm": 0.7836612192336904, + "learning_rate": 7.922482537154088e-06, + "loss": 0.206, + "step": 86340 + }, + { + "epoch": 1.1136338722054204, + "grad_norm": 0.7910636110093379, + "learning_rate": 7.921873711431711e-06, + "loss": 0.1969, + "step": 86350 + }, + { + "epoch": 1.1137628403954165, + "grad_norm": 0.8081399310169877, + "learning_rate": 7.921264819914088e-06, + "loss": 0.2046, + "step": 86360 + }, + { + "epoch": 1.1138918085854124, + "grad_norm": 0.8111915647857051, + "learning_rate": 7.92065586261493e-06, + "loss": 0.1975, + "step": 86370 + }, + { + "epoch": 1.1140207767754085, + "grad_norm": 0.7834986594431722, + "learning_rate": 7.920046839547952e-06, + "loss": 0.213, + "step": 86380 + }, + { + "epoch": 1.1141497449654043, + "grad_norm": 0.8388674580696593, + "learning_rate": 7.919437750726863e-06, + "loss": 0.1872, + "step": 86390 + }, + { + "epoch": 1.1142787131554002, + "grad_norm": 0.8059652122312945, + "learning_rate": 7.918828596165386e-06, + "loss": 0.2028, + "step": 86400 + }, + { + "epoch": 1.1144076813453963, + "grad_norm": 0.8643890105754213, + "learning_rate": 7.918219375877233e-06, + "loss": 0.2019, + "step": 86410 + }, + { + "epoch": 1.114536649535392, + "grad_norm": 0.7960335435114764, + "learning_rate": 7.917610089876122e-06, + "loss": 0.2055, + "step": 86420 + }, + { + "epoch": 1.114665617725388, + "grad_norm": 0.8573769843105054, + "learning_rate": 7.917000738175777e-06, + "loss": 0.2027, + "step": 86430 + }, + { + "epoch": 1.114794585915384, + "grad_norm": 0.8216967271114536, + "learning_rate": 7.916391320789915e-06, + "loss": 0.2088, + "step": 86440 + }, + { + "epoch": 1.11492355410538, + "grad_norm": 0.8281185146415294, + "learning_rate": 7.915781837732264e-06, + "loss": 0.2047, + "step": 86450 + }, + { + "epoch": 1.1150525222953758, + "grad_norm": 0.83921543115155, + "learning_rate": 7.915172289016544e-06, + "loss": 0.2021, + "step": 86460 + }, + { + "epoch": 1.1151814904853719, + "grad_norm": 0.8203128596385555, + "learning_rate": 7.91456267465648e-06, + "loss": 0.2073, + "step": 86470 + }, + { + "epoch": 1.1153104586753677, + "grad_norm": 0.8309647133794532, + "learning_rate": 7.913952994665805e-06, + "loss": 0.2024, + "step": 86480 + }, + { + "epoch": 1.1154394268653636, + "grad_norm": 0.8743166225668116, + "learning_rate": 7.913343249058243e-06, + "loss": 0.201, + "step": 86490 + }, + { + "epoch": 1.1155683950553597, + "grad_norm": 0.7901652648758325, + "learning_rate": 7.912733437847527e-06, + "loss": 0.22, + "step": 86500 + }, + { + "epoch": 1.1156973632453555, + "grad_norm": 0.8452353288811632, + "learning_rate": 7.912123561047388e-06, + "loss": 0.2043, + "step": 86510 + }, + { + "epoch": 1.1158263314353514, + "grad_norm": 0.8071805863077296, + "learning_rate": 7.91151361867156e-06, + "loss": 0.2071, + "step": 86520 + }, + { + "epoch": 1.1159552996253475, + "grad_norm": 0.827710446153982, + "learning_rate": 7.910903610733776e-06, + "loss": 0.2133, + "step": 86530 + }, + { + "epoch": 1.1160842678153433, + "grad_norm": 0.8439252902501803, + "learning_rate": 7.910293537247772e-06, + "loss": 0.2139, + "step": 86540 + }, + { + "epoch": 1.1162132360053394, + "grad_norm": 0.885673374976495, + "learning_rate": 7.909683398227289e-06, + "loss": 0.1973, + "step": 86550 + }, + { + "epoch": 1.1163422041953353, + "grad_norm": 0.9628917893044495, + "learning_rate": 7.909073193686064e-06, + "loss": 0.1977, + "step": 86560 + }, + { + "epoch": 1.1164711723853311, + "grad_norm": 0.801768305955496, + "learning_rate": 7.908462923637836e-06, + "loss": 0.2143, + "step": 86570 + }, + { + "epoch": 1.1166001405753272, + "grad_norm": 0.9011035741597043, + "learning_rate": 7.907852588096352e-06, + "loss": 0.2031, + "step": 86580 + }, + { + "epoch": 1.116729108765323, + "grad_norm": 0.7936812570203642, + "learning_rate": 7.90724218707535e-06, + "loss": 0.1972, + "step": 86590 + }, + { + "epoch": 1.116858076955319, + "grad_norm": 0.9125165644836867, + "learning_rate": 7.906631720588578e-06, + "loss": 0.1972, + "step": 86600 + }, + { + "epoch": 1.116987045145315, + "grad_norm": 0.851948782575766, + "learning_rate": 7.906021188649785e-06, + "loss": 0.2033, + "step": 86610 + }, + { + "epoch": 1.1171160133353109, + "grad_norm": 0.8598147061980942, + "learning_rate": 7.905410591272714e-06, + "loss": 0.2131, + "step": 86620 + }, + { + "epoch": 1.1172449815253067, + "grad_norm": 0.8939886091475758, + "learning_rate": 7.904799928471118e-06, + "loss": 0.2021, + "step": 86630 + }, + { + "epoch": 1.1173739497153028, + "grad_norm": 0.8768356531637357, + "learning_rate": 7.904189200258748e-06, + "loss": 0.2163, + "step": 86640 + }, + { + "epoch": 1.1175029179052987, + "grad_norm": 0.815374447219805, + "learning_rate": 7.903578406649352e-06, + "loss": 0.2144, + "step": 86650 + }, + { + "epoch": 1.1176318860952945, + "grad_norm": 0.8198767550175529, + "learning_rate": 7.90296754765669e-06, + "loss": 0.2166, + "step": 86660 + }, + { + "epoch": 1.1177608542852906, + "grad_norm": 0.8283348976451264, + "learning_rate": 7.902356623294516e-06, + "loss": 0.1995, + "step": 86670 + }, + { + "epoch": 1.1178898224752865, + "grad_norm": 0.8757715976288817, + "learning_rate": 7.901745633576585e-06, + "loss": 0.2044, + "step": 86680 + }, + { + "epoch": 1.1180187906652823, + "grad_norm": 0.8534557067028192, + "learning_rate": 7.901134578516655e-06, + "loss": 0.2194, + "step": 86690 + }, + { + "epoch": 1.1181477588552784, + "grad_norm": 0.8852816441712105, + "learning_rate": 7.900523458128486e-06, + "loss": 0.2153, + "step": 86700 + }, + { + "epoch": 1.1182767270452743, + "grad_norm": 0.8266073519301679, + "learning_rate": 7.899912272425843e-06, + "loss": 0.2089, + "step": 86710 + }, + { + "epoch": 1.1184056952352703, + "grad_norm": 0.8689669862231455, + "learning_rate": 7.899301021422485e-06, + "loss": 0.2105, + "step": 86720 + }, + { + "epoch": 1.1185346634252662, + "grad_norm": 0.8659615164022124, + "learning_rate": 7.898689705132178e-06, + "loss": 0.2089, + "step": 86730 + }, + { + "epoch": 1.118663631615262, + "grad_norm": 0.8650252500732815, + "learning_rate": 7.898078323568686e-06, + "loss": 0.1958, + "step": 86740 + }, + { + "epoch": 1.1187925998052581, + "grad_norm": 0.7167689285188643, + "learning_rate": 7.897466876745776e-06, + "loss": 0.1919, + "step": 86750 + }, + { + "epoch": 1.118921567995254, + "grad_norm": 0.893111859483239, + "learning_rate": 7.89685536467722e-06, + "loss": 0.1933, + "step": 86760 + }, + { + "epoch": 1.1190505361852499, + "grad_norm": 0.8271582252102733, + "learning_rate": 7.896243787376786e-06, + "loss": 0.2036, + "step": 86770 + }, + { + "epoch": 1.119179504375246, + "grad_norm": 0.7734672230354394, + "learning_rate": 7.895632144858245e-06, + "loss": 0.2087, + "step": 86780 + }, + { + "epoch": 1.1193084725652418, + "grad_norm": 0.8500235679136737, + "learning_rate": 7.89502043713537e-06, + "loss": 0.2077, + "step": 86790 + }, + { + "epoch": 1.1194374407552377, + "grad_norm": 0.8312930454069704, + "learning_rate": 7.894408664221938e-06, + "loss": 0.2081, + "step": 86800 + }, + { + "epoch": 1.1195664089452337, + "grad_norm": 0.8362426589213794, + "learning_rate": 7.893796826131723e-06, + "loss": 0.2028, + "step": 86810 + }, + { + "epoch": 1.1196953771352296, + "grad_norm": 0.8056095567838589, + "learning_rate": 7.8931849228785e-06, + "loss": 0.2005, + "step": 86820 + }, + { + "epoch": 1.1198243453252255, + "grad_norm": 0.8818753324567723, + "learning_rate": 7.892572954476054e-06, + "loss": 0.2202, + "step": 86830 + }, + { + "epoch": 1.1199533135152215, + "grad_norm": 0.8112319691334928, + "learning_rate": 7.891960920938162e-06, + "loss": 0.2084, + "step": 86840 + }, + { + "epoch": 1.1200822817052174, + "grad_norm": 0.7748273089463045, + "learning_rate": 7.891348822278604e-06, + "loss": 0.2116, + "step": 86850 + }, + { + "epoch": 1.1202112498952133, + "grad_norm": 0.8886549274972989, + "learning_rate": 7.890736658511166e-06, + "loss": 0.2109, + "step": 86860 + }, + { + "epoch": 1.1203402180852093, + "grad_norm": 0.934280915644966, + "learning_rate": 7.890124429649632e-06, + "loss": 0.2099, + "step": 86870 + }, + { + "epoch": 1.1204691862752052, + "grad_norm": 0.8174216517937584, + "learning_rate": 7.88951213570779e-06, + "loss": 0.21, + "step": 86880 + }, + { + "epoch": 1.120598154465201, + "grad_norm": 0.8593956423377469, + "learning_rate": 7.888899776699423e-06, + "loss": 0.1972, + "step": 86890 + }, + { + "epoch": 1.1207271226551971, + "grad_norm": 0.7719437204602213, + "learning_rate": 7.888287352638327e-06, + "loss": 0.202, + "step": 86900 + }, + { + "epoch": 1.120856090845193, + "grad_norm": 0.8445625471894842, + "learning_rate": 7.887674863538285e-06, + "loss": 0.2067, + "step": 86910 + }, + { + "epoch": 1.1209850590351889, + "grad_norm": 0.8477946084914841, + "learning_rate": 7.887062309413096e-06, + "loss": 0.2042, + "step": 86920 + }, + { + "epoch": 1.121114027225185, + "grad_norm": 0.8226011634806281, + "learning_rate": 7.88644969027655e-06, + "loss": 0.2156, + "step": 86930 + }, + { + "epoch": 1.1212429954151808, + "grad_norm": 0.7299821115243563, + "learning_rate": 7.885837006142444e-06, + "loss": 0.207, + "step": 86940 + }, + { + "epoch": 1.1213719636051769, + "grad_norm": 0.8311176639437459, + "learning_rate": 7.885224257024571e-06, + "loss": 0.1985, + "step": 86950 + }, + { + "epoch": 1.1215009317951727, + "grad_norm": 0.7438285487896135, + "learning_rate": 7.88461144293673e-06, + "loss": 0.2033, + "step": 86960 + }, + { + "epoch": 1.1216298999851686, + "grad_norm": 0.8731021358779048, + "learning_rate": 7.883998563892726e-06, + "loss": 0.2175, + "step": 86970 + }, + { + "epoch": 1.1217588681751647, + "grad_norm": 0.9287754086810932, + "learning_rate": 7.883385619906352e-06, + "loss": 0.2015, + "step": 86980 + }, + { + "epoch": 1.1218878363651605, + "grad_norm": 0.8278955789548134, + "learning_rate": 7.882772610991416e-06, + "loss": 0.2104, + "step": 86990 + }, + { + "epoch": 1.1220168045551564, + "grad_norm": 0.8126575623419644, + "learning_rate": 7.88215953716172e-06, + "loss": 0.2044, + "step": 87000 + }, + { + "epoch": 1.1221457727451525, + "grad_norm": 0.7816085859887633, + "learning_rate": 7.881546398431068e-06, + "loss": 0.206, + "step": 87010 + }, + { + "epoch": 1.1222747409351483, + "grad_norm": 0.8681265500010168, + "learning_rate": 7.880933194813268e-06, + "loss": 0.2062, + "step": 87020 + }, + { + "epoch": 1.1224037091251442, + "grad_norm": 0.8980208131454472, + "learning_rate": 7.88031992632213e-06, + "loss": 0.2216, + "step": 87030 + }, + { + "epoch": 1.1225326773151403, + "grad_norm": 0.8211159707779346, + "learning_rate": 7.87970659297146e-06, + "loss": 0.2033, + "step": 87040 + }, + { + "epoch": 1.1226616455051361, + "grad_norm": 0.812505052934223, + "learning_rate": 7.879093194775072e-06, + "loss": 0.208, + "step": 87050 + }, + { + "epoch": 1.122790613695132, + "grad_norm": 0.8064991151263601, + "learning_rate": 7.878479731746776e-06, + "loss": 0.2107, + "step": 87060 + }, + { + "epoch": 1.122919581885128, + "grad_norm": 0.8319463768848907, + "learning_rate": 7.877866203900388e-06, + "loss": 0.2085, + "step": 87070 + }, + { + "epoch": 1.123048550075124, + "grad_norm": 0.7626738405549961, + "learning_rate": 7.877252611249726e-06, + "loss": 0.2067, + "step": 87080 + }, + { + "epoch": 1.1231775182651198, + "grad_norm": 0.8594549935802791, + "learning_rate": 7.876638953808602e-06, + "loss": 0.2052, + "step": 87090 + }, + { + "epoch": 1.1233064864551159, + "grad_norm": 0.91013211719883, + "learning_rate": 7.876025231590836e-06, + "loss": 0.2095, + "step": 87100 + }, + { + "epoch": 1.1234354546451117, + "grad_norm": 0.870041027865435, + "learning_rate": 7.875411444610248e-06, + "loss": 0.2018, + "step": 87110 + }, + { + "epoch": 1.1235644228351078, + "grad_norm": 0.8518715669067543, + "learning_rate": 7.87479759288066e-06, + "loss": 0.221, + "step": 87120 + }, + { + "epoch": 1.1236933910251037, + "grad_norm": 0.8136772334871761, + "learning_rate": 7.874183676415897e-06, + "loss": 0.2109, + "step": 87130 + }, + { + "epoch": 1.1238223592150995, + "grad_norm": 0.8902641588394034, + "learning_rate": 7.87356969522978e-06, + "loss": 0.2016, + "step": 87140 + }, + { + "epoch": 1.1239513274050956, + "grad_norm": 0.814400240570049, + "learning_rate": 7.872955649336136e-06, + "loss": 0.2017, + "step": 87150 + }, + { + "epoch": 1.1240802955950915, + "grad_norm": 0.8473669266505194, + "learning_rate": 7.87234153874879e-06, + "loss": 0.1976, + "step": 87160 + }, + { + "epoch": 1.1242092637850873, + "grad_norm": 0.8050339676380989, + "learning_rate": 7.871727363481574e-06, + "loss": 0.2116, + "step": 87170 + }, + { + "epoch": 1.1243382319750834, + "grad_norm": 0.7525697058951849, + "learning_rate": 7.871113123548316e-06, + "loss": 0.2041, + "step": 87180 + }, + { + "epoch": 1.1244672001650793, + "grad_norm": 0.8704387949862823, + "learning_rate": 7.870498818962848e-06, + "loss": 0.2096, + "step": 87190 + }, + { + "epoch": 1.1245961683550751, + "grad_norm": 2.698253355996765, + "learning_rate": 7.869884449739005e-06, + "loss": 0.2131, + "step": 87200 + }, + { + "epoch": 1.1247251365450712, + "grad_norm": 0.8237728562138726, + "learning_rate": 7.869270015890618e-06, + "loss": 0.2001, + "step": 87210 + }, + { + "epoch": 1.124854104735067, + "grad_norm": 0.9270113482051975, + "learning_rate": 7.868655517431524e-06, + "loss": 0.214, + "step": 87220 + }, + { + "epoch": 1.124983072925063, + "grad_norm": 0.8034875366816502, + "learning_rate": 7.868040954375562e-06, + "loss": 0.205, + "step": 87230 + }, + { + "epoch": 1.125112041115059, + "grad_norm": 0.8568487526394726, + "learning_rate": 7.86742632673657e-06, + "loss": 0.2018, + "step": 87240 + }, + { + "epoch": 1.1252410093050549, + "grad_norm": 0.812791645233892, + "learning_rate": 7.866811634528389e-06, + "loss": 0.1959, + "step": 87250 + }, + { + "epoch": 1.1253699774950507, + "grad_norm": 0.7550132470746558, + "learning_rate": 7.866196877764858e-06, + "loss": 0.2073, + "step": 87260 + }, + { + "epoch": 1.1254989456850468, + "grad_norm": 0.8275638900893864, + "learning_rate": 7.865582056459822e-06, + "loss": 0.2078, + "step": 87270 + }, + { + "epoch": 1.1256279138750427, + "grad_norm": 0.8411457493700308, + "learning_rate": 7.864967170627126e-06, + "loss": 0.2056, + "step": 87280 + }, + { + "epoch": 1.1257568820650388, + "grad_norm": 0.7855219617106651, + "learning_rate": 7.864352220280616e-06, + "loss": 0.199, + "step": 87290 + }, + { + "epoch": 1.1258858502550346, + "grad_norm": 0.764613668106822, + "learning_rate": 7.86373720543414e-06, + "loss": 0.2023, + "step": 87300 + }, + { + "epoch": 1.1260148184450305, + "grad_norm": 0.8159096011558077, + "learning_rate": 7.863122126101545e-06, + "loss": 0.2142, + "step": 87310 + }, + { + "epoch": 1.1261437866350266, + "grad_norm": 0.8353307437056549, + "learning_rate": 7.862506982296683e-06, + "loss": 0.2127, + "step": 87320 + }, + { + "epoch": 1.1262727548250224, + "grad_norm": 0.8129767251060046, + "learning_rate": 7.861891774033407e-06, + "loss": 0.2014, + "step": 87330 + }, + { + "epoch": 1.1264017230150183, + "grad_norm": 0.7800901227055317, + "learning_rate": 7.861276501325567e-06, + "loss": 0.2059, + "step": 87340 + }, + { + "epoch": 1.1265306912050144, + "grad_norm": 0.8454627962168667, + "learning_rate": 7.860661164187022e-06, + "loss": 0.199, + "step": 87350 + }, + { + "epoch": 1.1266596593950102, + "grad_norm": 0.781432590457481, + "learning_rate": 7.860045762631626e-06, + "loss": 0.2054, + "step": 87360 + }, + { + "epoch": 1.126788627585006, + "grad_norm": 0.8194269134787147, + "learning_rate": 7.859430296673236e-06, + "loss": 0.1972, + "step": 87370 + }, + { + "epoch": 1.1269175957750022, + "grad_norm": 0.7535497666349112, + "learning_rate": 7.858814766325712e-06, + "loss": 0.1965, + "step": 87380 + }, + { + "epoch": 1.127046563964998, + "grad_norm": 0.8031844168851728, + "learning_rate": 7.858199171602915e-06, + "loss": 0.1959, + "step": 87390 + }, + { + "epoch": 1.1271755321549939, + "grad_norm": 0.8965350325457728, + "learning_rate": 7.857583512518707e-06, + "loss": 0.2041, + "step": 87400 + }, + { + "epoch": 1.12730450034499, + "grad_norm": 0.757025177736361, + "learning_rate": 7.856967789086952e-06, + "loss": 0.2028, + "step": 87410 + }, + { + "epoch": 1.1274334685349858, + "grad_norm": 0.9321683372028411, + "learning_rate": 7.85635200132151e-06, + "loss": 0.1927, + "step": 87420 + }, + { + "epoch": 1.1275624367249817, + "grad_norm": 0.9394947234951317, + "learning_rate": 7.855736149236257e-06, + "loss": 0.2063, + "step": 87430 + }, + { + "epoch": 1.1276914049149778, + "grad_norm": 0.8482314447261261, + "learning_rate": 7.855120232845053e-06, + "loss": 0.2071, + "step": 87440 + }, + { + "epoch": 1.1278203731049736, + "grad_norm": 0.8676857497466136, + "learning_rate": 7.85450425216177e-06, + "loss": 0.2003, + "step": 87450 + }, + { + "epoch": 1.1279493412949697, + "grad_norm": 0.8084682819068088, + "learning_rate": 7.853888207200278e-06, + "loss": 0.203, + "step": 87460 + }, + { + "epoch": 1.1280783094849656, + "grad_norm": 0.7887020814680721, + "learning_rate": 7.85327209797445e-06, + "loss": 0.2143, + "step": 87470 + }, + { + "epoch": 1.1282072776749614, + "grad_norm": 0.8799433421553285, + "learning_rate": 7.85265592449816e-06, + "loss": 0.2041, + "step": 87480 + }, + { + "epoch": 1.1283362458649573, + "grad_norm": 0.8772120548759679, + "learning_rate": 7.852039686785281e-06, + "loss": 0.2061, + "step": 87490 + }, + { + "epoch": 1.1284652140549534, + "grad_norm": 0.8290115131862126, + "learning_rate": 7.851423384849693e-06, + "loss": 0.2032, + "step": 87500 + }, + { + "epoch": 1.1285941822449492, + "grad_norm": 0.8391473538001696, + "learning_rate": 7.850807018705271e-06, + "loss": 0.2058, + "step": 87510 + }, + { + "epoch": 1.1287231504349453, + "grad_norm": 0.8874281313824424, + "learning_rate": 7.850190588365896e-06, + "loss": 0.2214, + "step": 87520 + }, + { + "epoch": 1.1288521186249412, + "grad_norm": 0.8884718396465011, + "learning_rate": 7.849574093845447e-06, + "loss": 0.2121, + "step": 87530 + }, + { + "epoch": 1.128981086814937, + "grad_norm": 0.8259754097274075, + "learning_rate": 7.84895753515781e-06, + "loss": 0.202, + "step": 87540 + }, + { + "epoch": 1.129110055004933, + "grad_norm": 0.8067608255975603, + "learning_rate": 7.848340912316865e-06, + "loss": 0.2089, + "step": 87550 + }, + { + "epoch": 1.129239023194929, + "grad_norm": 0.7850164108151603, + "learning_rate": 7.847724225336499e-06, + "loss": 0.1939, + "step": 87560 + }, + { + "epoch": 1.1293679913849248, + "grad_norm": 0.7974352644816166, + "learning_rate": 7.847107474230597e-06, + "loss": 0.1945, + "step": 87570 + }, + { + "epoch": 1.129496959574921, + "grad_norm": 0.9583277451208226, + "learning_rate": 7.846490659013049e-06, + "loss": 0.2209, + "step": 87580 + }, + { + "epoch": 1.1296259277649168, + "grad_norm": 0.8466863010429274, + "learning_rate": 7.845873779697746e-06, + "loss": 0.2066, + "step": 87590 + }, + { + "epoch": 1.1297548959549126, + "grad_norm": 0.7393843284487273, + "learning_rate": 7.845256836298576e-06, + "loss": 0.1991, + "step": 87600 + }, + { + "epoch": 1.1298838641449087, + "grad_norm": 0.836742457575484, + "learning_rate": 7.844639828829431e-06, + "loss": 0.2023, + "step": 87610 + }, + { + "epoch": 1.1300128323349046, + "grad_norm": 0.8684938289228042, + "learning_rate": 7.844022757304208e-06, + "loss": 0.2071, + "step": 87620 + }, + { + "epoch": 1.1301418005249007, + "grad_norm": 0.8831362684668244, + "learning_rate": 7.843405621736798e-06, + "loss": 0.2053, + "step": 87630 + }, + { + "epoch": 1.1302707687148965, + "grad_norm": 0.850871145313648, + "learning_rate": 7.842788422141103e-06, + "loss": 0.1961, + "step": 87640 + }, + { + "epoch": 1.1303997369048924, + "grad_norm": 0.8359882028242063, + "learning_rate": 7.842171158531018e-06, + "loss": 0.205, + "step": 87650 + }, + { + "epoch": 1.1305287050948882, + "grad_norm": 0.9141094207659679, + "learning_rate": 7.841553830920444e-06, + "loss": 0.2038, + "step": 87660 + }, + { + "epoch": 1.1306576732848843, + "grad_norm": 0.9714391819513067, + "learning_rate": 7.840936439323279e-06, + "loss": 0.2066, + "step": 87670 + }, + { + "epoch": 1.1307866414748802, + "grad_norm": 0.8514429275186719, + "learning_rate": 7.840318983753429e-06, + "loss": 0.2034, + "step": 87680 + }, + { + "epoch": 1.1309156096648763, + "grad_norm": 0.8096470209459958, + "learning_rate": 7.839701464224797e-06, + "loss": 0.2149, + "step": 87690 + }, + { + "epoch": 1.1310445778548721, + "grad_norm": 0.8517667006312757, + "learning_rate": 7.839083880751289e-06, + "loss": 0.2081, + "step": 87700 + }, + { + "epoch": 1.131173546044868, + "grad_norm": 0.9117075124466274, + "learning_rate": 7.83846623334681e-06, + "loss": 0.2126, + "step": 87710 + }, + { + "epoch": 1.131302514234864, + "grad_norm": 0.8632947703069646, + "learning_rate": 7.83784852202527e-06, + "loss": 0.2017, + "step": 87720 + }, + { + "epoch": 1.13143148242486, + "grad_norm": 0.7841177470222799, + "learning_rate": 7.837230746800578e-06, + "loss": 0.1953, + "step": 87730 + }, + { + "epoch": 1.1315604506148558, + "grad_norm": 0.8290895778112073, + "learning_rate": 7.836612907686644e-06, + "loss": 0.2107, + "step": 87740 + }, + { + "epoch": 1.1316894188048519, + "grad_norm": 0.896777014643119, + "learning_rate": 7.835995004697383e-06, + "loss": 0.2103, + "step": 87750 + }, + { + "epoch": 1.1318183869948477, + "grad_norm": 0.8789493328750474, + "learning_rate": 7.835377037846708e-06, + "loss": 0.214, + "step": 87760 + }, + { + "epoch": 1.1319473551848436, + "grad_norm": 0.8837815663316199, + "learning_rate": 7.834759007148534e-06, + "loss": 0.2132, + "step": 87770 + }, + { + "epoch": 1.1320763233748397, + "grad_norm": 0.8483935777068594, + "learning_rate": 7.834140912616777e-06, + "loss": 0.1943, + "step": 87780 + }, + { + "epoch": 1.1322052915648355, + "grad_norm": 0.8970630727909936, + "learning_rate": 7.833522754265359e-06, + "loss": 0.2028, + "step": 87790 + }, + { + "epoch": 1.1323342597548314, + "grad_norm": 0.8276115734369567, + "learning_rate": 7.832904532108197e-06, + "loss": 0.196, + "step": 87800 + }, + { + "epoch": 1.1324632279448275, + "grad_norm": 0.8463506903609239, + "learning_rate": 7.83228624615921e-06, + "loss": 0.205, + "step": 87810 + }, + { + "epoch": 1.1325921961348233, + "grad_norm": 0.8251959263210206, + "learning_rate": 7.831667896432327e-06, + "loss": 0.2013, + "step": 87820 + }, + { + "epoch": 1.1327211643248192, + "grad_norm": 0.8413164835995552, + "learning_rate": 7.831049482941467e-06, + "loss": 0.2004, + "step": 87830 + }, + { + "epoch": 1.1328501325148153, + "grad_norm": 0.8566934478504136, + "learning_rate": 7.830431005700556e-06, + "loss": 0.2034, + "step": 87840 + }, + { + "epoch": 1.1329791007048111, + "grad_norm": 0.8245540868563324, + "learning_rate": 7.829812464723524e-06, + "loss": 0.1988, + "step": 87850 + }, + { + "epoch": 1.1331080688948072, + "grad_norm": 0.8014299474238737, + "learning_rate": 7.829193860024295e-06, + "loss": 0.1986, + "step": 87860 + }, + { + "epoch": 1.133237037084803, + "grad_norm": 0.8236685410745723, + "learning_rate": 7.828575191616803e-06, + "loss": 0.2048, + "step": 87870 + }, + { + "epoch": 1.133366005274799, + "grad_norm": 0.76496283410435, + "learning_rate": 7.827956459514977e-06, + "loss": 0.2086, + "step": 87880 + }, + { + "epoch": 1.133494973464795, + "grad_norm": 0.7836759286222581, + "learning_rate": 7.82733766373275e-06, + "loss": 0.2088, + "step": 87890 + }, + { + "epoch": 1.1336239416547909, + "grad_norm": 0.8724625563258527, + "learning_rate": 7.826718804284057e-06, + "loss": 0.2095, + "step": 87900 + }, + { + "epoch": 1.1337529098447867, + "grad_norm": 0.7975481823415775, + "learning_rate": 7.826099881182834e-06, + "loss": 0.1947, + "step": 87910 + }, + { + "epoch": 1.1338818780347828, + "grad_norm": 0.8006780203646995, + "learning_rate": 7.825480894443015e-06, + "loss": 0.2063, + "step": 87920 + }, + { + "epoch": 1.1340108462247787, + "grad_norm": 0.7855317793138274, + "learning_rate": 7.824861844078542e-06, + "loss": 0.1892, + "step": 87930 + }, + { + "epoch": 1.1341398144147745, + "grad_norm": 0.8281661901872116, + "learning_rate": 7.824242730103351e-06, + "loss": 0.1957, + "step": 87940 + }, + { + "epoch": 1.1342687826047706, + "grad_norm": 0.7827172986239396, + "learning_rate": 7.823623552531386e-06, + "loss": 0.2116, + "step": 87950 + }, + { + "epoch": 1.1343977507947665, + "grad_norm": 0.8786537327925564, + "learning_rate": 7.823004311376591e-06, + "loss": 0.1991, + "step": 87960 + }, + { + "epoch": 1.1345267189847623, + "grad_norm": 0.7967480918105564, + "learning_rate": 7.822385006652907e-06, + "loss": 0.2049, + "step": 87970 + }, + { + "epoch": 1.1346556871747584, + "grad_norm": 0.8194755287221641, + "learning_rate": 7.821765638374283e-06, + "loss": 0.2085, + "step": 87980 + }, + { + "epoch": 1.1347846553647543, + "grad_norm": 0.7211277582997345, + "learning_rate": 7.821146206554662e-06, + "loss": 0.2054, + "step": 87990 + }, + { + "epoch": 1.1349136235547501, + "grad_norm": 0.7689581534543233, + "learning_rate": 7.820526711207996e-06, + "loss": 0.1951, + "step": 88000 + }, + { + "epoch": 1.1350425917447462, + "grad_norm": 0.8481670437701457, + "learning_rate": 7.819907152348232e-06, + "loss": 0.2196, + "step": 88010 + }, + { + "epoch": 1.135171559934742, + "grad_norm": 0.8510070894513836, + "learning_rate": 7.819287529989326e-06, + "loss": 0.1981, + "step": 88020 + }, + { + "epoch": 1.1353005281247381, + "grad_norm": 0.9728895130429058, + "learning_rate": 7.818667844145224e-06, + "loss": 0.2176, + "step": 88030 + }, + { + "epoch": 1.135429496314734, + "grad_norm": 0.8218543023448446, + "learning_rate": 7.818048094829887e-06, + "loss": 0.2128, + "step": 88040 + }, + { + "epoch": 1.1355584645047299, + "grad_norm": 0.87545625749022, + "learning_rate": 7.817428282057264e-06, + "loss": 0.2079, + "step": 88050 + }, + { + "epoch": 1.135687432694726, + "grad_norm": 0.8234019754453064, + "learning_rate": 7.816808405841318e-06, + "loss": 0.1986, + "step": 88060 + }, + { + "epoch": 1.1358164008847218, + "grad_norm": 0.8160515801689786, + "learning_rate": 7.816188466196004e-06, + "loss": 0.2063, + "step": 88070 + }, + { + "epoch": 1.1359453690747177, + "grad_norm": 0.987743361411049, + "learning_rate": 7.815568463135284e-06, + "loss": 0.2159, + "step": 88080 + }, + { + "epoch": 1.1360743372647137, + "grad_norm": 0.7895026100952844, + "learning_rate": 7.814948396673117e-06, + "loss": 0.1966, + "step": 88090 + }, + { + "epoch": 1.1362033054547096, + "grad_norm": 0.8897210344972081, + "learning_rate": 7.814328266823468e-06, + "loss": 0.1978, + "step": 88100 + }, + { + "epoch": 1.1363322736447055, + "grad_norm": 0.8527338163433316, + "learning_rate": 7.813708073600298e-06, + "loss": 0.203, + "step": 88110 + }, + { + "epoch": 1.1364612418347015, + "grad_norm": 0.827722135351471, + "learning_rate": 7.813087817017578e-06, + "loss": 0.2078, + "step": 88120 + }, + { + "epoch": 1.1365902100246974, + "grad_norm": 0.7955538388932987, + "learning_rate": 7.81246749708927e-06, + "loss": 0.2003, + "step": 88130 + }, + { + "epoch": 1.1367191782146933, + "grad_norm": 0.8635094890441416, + "learning_rate": 7.811847113829345e-06, + "loss": 0.2118, + "step": 88140 + }, + { + "epoch": 1.1368481464046893, + "grad_norm": 0.8514574786864696, + "learning_rate": 7.811226667251771e-06, + "loss": 0.2058, + "step": 88150 + }, + { + "epoch": 1.1369771145946852, + "grad_norm": 0.8031383861395028, + "learning_rate": 7.810606157370521e-06, + "loss": 0.208, + "step": 88160 + }, + { + "epoch": 1.137106082784681, + "grad_norm": 0.7906617908609981, + "learning_rate": 7.80998558419957e-06, + "loss": 0.2069, + "step": 88170 + }, + { + "epoch": 1.1372350509746771, + "grad_norm": 0.7244213357720102, + "learning_rate": 7.809364947752885e-06, + "loss": 0.2023, + "step": 88180 + }, + { + "epoch": 1.137364019164673, + "grad_norm": 0.7830405203074732, + "learning_rate": 7.808744248044448e-06, + "loss": 0.2106, + "step": 88190 + }, + { + "epoch": 1.137492987354669, + "grad_norm": 0.8384139433557204, + "learning_rate": 7.808123485088232e-06, + "loss": 0.2107, + "step": 88200 + }, + { + "epoch": 1.137621955544665, + "grad_norm": 0.9133482814470649, + "learning_rate": 7.80750265889822e-06, + "loss": 0.213, + "step": 88210 + }, + { + "epoch": 1.1377509237346608, + "grad_norm": 0.8842339317931284, + "learning_rate": 7.80688176948839e-06, + "loss": 0.2032, + "step": 88220 + }, + { + "epoch": 1.1378798919246567, + "grad_norm": 0.8384946271541496, + "learning_rate": 7.806260816872722e-06, + "loss": 0.2065, + "step": 88230 + }, + { + "epoch": 1.1380088601146527, + "grad_norm": 0.7653856183194309, + "learning_rate": 7.805639801065196e-06, + "loss": 0.2076, + "step": 88240 + }, + { + "epoch": 1.1381378283046486, + "grad_norm": 0.8432576611040082, + "learning_rate": 7.805018722079804e-06, + "loss": 0.2025, + "step": 88250 + }, + { + "epoch": 1.1382667964946447, + "grad_norm": 0.8607480305612033, + "learning_rate": 7.804397579930524e-06, + "loss": 0.2072, + "step": 88260 + }, + { + "epoch": 1.1383957646846405, + "grad_norm": 0.8312289757993409, + "learning_rate": 7.803776374631348e-06, + "loss": 0.2099, + "step": 88270 + }, + { + "epoch": 1.1385247328746364, + "grad_norm": 0.866807172558965, + "learning_rate": 7.803155106196263e-06, + "loss": 0.2201, + "step": 88280 + }, + { + "epoch": 1.1386537010646325, + "grad_norm": 0.8301068649411371, + "learning_rate": 7.802533774639256e-06, + "loss": 0.2073, + "step": 88290 + }, + { + "epoch": 1.1387826692546283, + "grad_norm": 0.8679510910552903, + "learning_rate": 7.801912379974322e-06, + "loss": 0.1879, + "step": 88300 + }, + { + "epoch": 1.1389116374446242, + "grad_norm": 0.8292579705948313, + "learning_rate": 7.801290922215451e-06, + "loss": 0.2079, + "step": 88310 + }, + { + "epoch": 1.1390406056346203, + "grad_norm": 0.8962016601658881, + "learning_rate": 7.80066940137664e-06, + "loss": 0.2084, + "step": 88320 + }, + { + "epoch": 1.1391695738246161, + "grad_norm": 0.8891488923044135, + "learning_rate": 7.80004781747188e-06, + "loss": 0.2062, + "step": 88330 + }, + { + "epoch": 1.139298542014612, + "grad_norm": 0.7889538385158924, + "learning_rate": 7.799426170515174e-06, + "loss": 0.1983, + "step": 88340 + }, + { + "epoch": 1.139427510204608, + "grad_norm": 0.8991558807162761, + "learning_rate": 7.798804460520516e-06, + "loss": 0.2037, + "step": 88350 + }, + { + "epoch": 1.139556478394604, + "grad_norm": 0.8944542580059831, + "learning_rate": 7.798182687501905e-06, + "loss": 0.1984, + "step": 88360 + }, + { + "epoch": 1.1396854465846, + "grad_norm": 0.8855338570417208, + "learning_rate": 7.797560851473347e-06, + "loss": 0.2212, + "step": 88370 + }, + { + "epoch": 1.1398144147745959, + "grad_norm": 0.8523317768651998, + "learning_rate": 7.79693895244884e-06, + "loss": 0.2065, + "step": 88380 + }, + { + "epoch": 1.1399433829645917, + "grad_norm": 0.8181148848195231, + "learning_rate": 7.79631699044239e-06, + "loss": 0.2006, + "step": 88390 + }, + { + "epoch": 1.1400723511545876, + "grad_norm": 0.7707486462273586, + "learning_rate": 7.795694965468002e-06, + "loss": 0.2066, + "step": 88400 + }, + { + "epoch": 1.1402013193445837, + "grad_norm": 0.8617366959879839, + "learning_rate": 7.795072877539682e-06, + "loss": 0.2053, + "step": 88410 + }, + { + "epoch": 1.1403302875345795, + "grad_norm": 0.82735825664558, + "learning_rate": 7.79445072667144e-06, + "loss": 0.201, + "step": 88420 + }, + { + "epoch": 1.1404592557245756, + "grad_norm": 0.9137341863218924, + "learning_rate": 7.793828512877285e-06, + "loss": 0.2145, + "step": 88430 + }, + { + "epoch": 1.1405882239145715, + "grad_norm": 0.8341391802590583, + "learning_rate": 7.793206236171229e-06, + "loss": 0.2162, + "step": 88440 + }, + { + "epoch": 1.1407171921045673, + "grad_norm": 0.8859413244040352, + "learning_rate": 7.792583896567282e-06, + "loss": 0.2085, + "step": 88450 + }, + { + "epoch": 1.1408461602945634, + "grad_norm": 0.8310478838320211, + "learning_rate": 7.79196149407946e-06, + "loss": 0.2045, + "step": 88460 + }, + { + "epoch": 1.1409751284845593, + "grad_norm": 0.7782943128265263, + "learning_rate": 7.791339028721779e-06, + "loss": 0.2002, + "step": 88470 + }, + { + "epoch": 1.1411040966745551, + "grad_norm": 0.9238875583952748, + "learning_rate": 7.790716500508255e-06, + "loss": 0.2063, + "step": 88480 + }, + { + "epoch": 1.1412330648645512, + "grad_norm": 0.8042011105823618, + "learning_rate": 7.790093909452905e-06, + "loss": 0.2138, + "step": 88490 + }, + { + "epoch": 1.141362033054547, + "grad_norm": 0.6937625325390632, + "learning_rate": 7.78947125556975e-06, + "loss": 0.2114, + "step": 88500 + }, + { + "epoch": 1.141491001244543, + "grad_norm": 0.8664457929571795, + "learning_rate": 7.788848538872809e-06, + "loss": 0.2061, + "step": 88510 + }, + { + "epoch": 1.141619969434539, + "grad_norm": 0.8593136884594942, + "learning_rate": 7.788225759376107e-06, + "loss": 0.1977, + "step": 88520 + }, + { + "epoch": 1.1417489376245349, + "grad_norm": 0.8988813373947915, + "learning_rate": 7.787602917093667e-06, + "loss": 0.2042, + "step": 88530 + }, + { + "epoch": 1.141877905814531, + "grad_norm": 0.8433590923513545, + "learning_rate": 7.786980012039513e-06, + "loss": 0.2192, + "step": 88540 + }, + { + "epoch": 1.1420068740045268, + "grad_norm": 0.8759359998753499, + "learning_rate": 7.786357044227674e-06, + "loss": 0.1882, + "step": 88550 + }, + { + "epoch": 1.1421358421945227, + "grad_norm": 0.8112076002167173, + "learning_rate": 7.785734013672178e-06, + "loss": 0.2006, + "step": 88560 + }, + { + "epoch": 1.1422648103845185, + "grad_norm": 0.9277583506192875, + "learning_rate": 7.785110920387051e-06, + "loss": 0.2222, + "step": 88570 + }, + { + "epoch": 1.1423937785745146, + "grad_norm": 0.8768790437077604, + "learning_rate": 7.784487764386327e-06, + "loss": 0.1976, + "step": 88580 + }, + { + "epoch": 1.1425227467645105, + "grad_norm": 0.7973150118758809, + "learning_rate": 7.783864545684037e-06, + "loss": 0.2087, + "step": 88590 + }, + { + "epoch": 1.1426517149545066, + "grad_norm": 0.8328054700242085, + "learning_rate": 7.783241264294216e-06, + "loss": 0.2132, + "step": 88600 + }, + { + "epoch": 1.1427806831445024, + "grad_norm": 0.8016269250044161, + "learning_rate": 7.782617920230899e-06, + "loss": 0.2087, + "step": 88610 + }, + { + "epoch": 1.1429096513344983, + "grad_norm": 0.7648236794025475, + "learning_rate": 7.781994513508122e-06, + "loss": 0.2085, + "step": 88620 + }, + { + "epoch": 1.1430386195244944, + "grad_norm": 0.831413704449312, + "learning_rate": 7.781371044139922e-06, + "loss": 0.2088, + "step": 88630 + }, + { + "epoch": 1.1431675877144902, + "grad_norm": 0.8343649656837651, + "learning_rate": 7.780747512140342e-06, + "loss": 0.2134, + "step": 88640 + }, + { + "epoch": 1.143296555904486, + "grad_norm": 0.7519256308176611, + "learning_rate": 7.780123917523419e-06, + "loss": 0.2026, + "step": 88650 + }, + { + "epoch": 1.1434255240944822, + "grad_norm": 0.7642474922670996, + "learning_rate": 7.779500260303195e-06, + "loss": 0.197, + "step": 88660 + }, + { + "epoch": 1.143554492284478, + "grad_norm": 0.8846036718419246, + "learning_rate": 7.778876540493716e-06, + "loss": 0.2179, + "step": 88670 + }, + { + "epoch": 1.143683460474474, + "grad_norm": 0.8215737440459973, + "learning_rate": 7.778252758109026e-06, + "loss": 0.1885, + "step": 88680 + }, + { + "epoch": 1.14381242866447, + "grad_norm": 0.9487524339478692, + "learning_rate": 7.777628913163172e-06, + "loss": 0.1988, + "step": 88690 + }, + { + "epoch": 1.1439413968544658, + "grad_norm": 0.8189467652980249, + "learning_rate": 7.777005005670202e-06, + "loss": 0.1971, + "step": 88700 + }, + { + "epoch": 1.1440703650444617, + "grad_norm": 0.8559924020947999, + "learning_rate": 7.776381035644163e-06, + "loss": 0.216, + "step": 88710 + }, + { + "epoch": 1.1441993332344578, + "grad_norm": 0.8363204323305214, + "learning_rate": 7.775757003099109e-06, + "loss": 0.2152, + "step": 88720 + }, + { + "epoch": 1.1443283014244536, + "grad_norm": 0.8032669294942807, + "learning_rate": 7.77513290804909e-06, + "loss": 0.2134, + "step": 88730 + }, + { + "epoch": 1.1444572696144495, + "grad_norm": 0.7982643233348855, + "learning_rate": 7.77450875050816e-06, + "loss": 0.2097, + "step": 88740 + }, + { + "epoch": 1.1445862378044456, + "grad_norm": 0.7936458571493492, + "learning_rate": 7.773884530490372e-06, + "loss": 0.205, + "step": 88750 + }, + { + "epoch": 1.1447152059944414, + "grad_norm": 0.858468008104845, + "learning_rate": 7.773260248009784e-06, + "loss": 0.2131, + "step": 88760 + }, + { + "epoch": 1.1448441741844375, + "grad_norm": 0.8693086436760459, + "learning_rate": 7.772635903080457e-06, + "loss": 0.2155, + "step": 88770 + }, + { + "epoch": 1.1449731423744334, + "grad_norm": 0.7803981270450887, + "learning_rate": 7.772011495716444e-06, + "loss": 0.2058, + "step": 88780 + }, + { + "epoch": 1.1451021105644292, + "grad_norm": 0.8495682738369456, + "learning_rate": 7.77138702593181e-06, + "loss": 0.2004, + "step": 88790 + }, + { + "epoch": 1.1452310787544253, + "grad_norm": 0.8577038971031571, + "learning_rate": 7.770762493740614e-06, + "loss": 0.2085, + "step": 88800 + }, + { + "epoch": 1.1453600469444212, + "grad_norm": 0.8333021059737613, + "learning_rate": 7.77013789915692e-06, + "loss": 0.2019, + "step": 88810 + }, + { + "epoch": 1.145489015134417, + "grad_norm": 0.9348591073664526, + "learning_rate": 7.769513242194795e-06, + "loss": 0.2109, + "step": 88820 + }, + { + "epoch": 1.1456179833244131, + "grad_norm": 0.8041596066418251, + "learning_rate": 7.768888522868304e-06, + "loss": 0.1997, + "step": 88830 + }, + { + "epoch": 1.145746951514409, + "grad_norm": 0.8941018605697832, + "learning_rate": 7.76826374119151e-06, + "loss": 0.2149, + "step": 88840 + }, + { + "epoch": 1.1458759197044048, + "grad_norm": 0.8960320171267908, + "learning_rate": 7.76763889717849e-06, + "loss": 0.2106, + "step": 88850 + }, + { + "epoch": 1.146004887894401, + "grad_norm": 0.783274248499107, + "learning_rate": 7.767013990843308e-06, + "loss": 0.1921, + "step": 88860 + }, + { + "epoch": 1.1461338560843968, + "grad_norm": 0.8506821257202295, + "learning_rate": 7.76638902220004e-06, + "loss": 0.2092, + "step": 88870 + }, + { + "epoch": 1.1462628242743926, + "grad_norm": 0.8850570745338378, + "learning_rate": 7.765763991262756e-06, + "loss": 0.2089, + "step": 88880 + }, + { + "epoch": 1.1463917924643887, + "grad_norm": 0.8588069827325622, + "learning_rate": 7.765138898045532e-06, + "loss": 0.209, + "step": 88890 + }, + { + "epoch": 1.1465207606543846, + "grad_norm": 0.8929415843266683, + "learning_rate": 7.764513742562443e-06, + "loss": 0.2227, + "step": 88900 + }, + { + "epoch": 1.1466497288443804, + "grad_norm": 0.8422765795446305, + "learning_rate": 7.763888524827568e-06, + "loss": 0.1983, + "step": 88910 + }, + { + "epoch": 1.1467786970343765, + "grad_norm": 0.8809496855435535, + "learning_rate": 7.763263244854983e-06, + "loss": 0.2049, + "step": 88920 + }, + { + "epoch": 1.1469076652243724, + "grad_norm": 0.8387825033997525, + "learning_rate": 7.762637902658772e-06, + "loss": 0.1958, + "step": 88930 + }, + { + "epoch": 1.1470366334143685, + "grad_norm": 0.7908907641495756, + "learning_rate": 7.762012498253014e-06, + "loss": 0.1981, + "step": 88940 + }, + { + "epoch": 1.1471656016043643, + "grad_norm": 0.8016332385945203, + "learning_rate": 7.761387031651793e-06, + "loss": 0.2018, + "step": 88950 + }, + { + "epoch": 1.1472945697943602, + "grad_norm": 0.8105471361017599, + "learning_rate": 7.76076150286919e-06, + "loss": 0.2098, + "step": 88960 + }, + { + "epoch": 1.147423537984356, + "grad_norm": 0.8956742997349489, + "learning_rate": 7.760135911919296e-06, + "loss": 0.2035, + "step": 88970 + }, + { + "epoch": 1.1475525061743521, + "grad_norm": 0.9831434120291971, + "learning_rate": 7.759510258816196e-06, + "loss": 0.2192, + "step": 88980 + }, + { + "epoch": 1.147681474364348, + "grad_norm": 0.8770044446245808, + "learning_rate": 7.758884543573977e-06, + "loss": 0.196, + "step": 88990 + }, + { + "epoch": 1.147810442554344, + "grad_norm": 0.8133931278525613, + "learning_rate": 7.758258766206732e-06, + "loss": 0.2115, + "step": 89000 + }, + { + "epoch": 1.14793941074434, + "grad_norm": 0.8249395640550502, + "learning_rate": 7.757632926728549e-06, + "loss": 0.2049, + "step": 89010 + }, + { + "epoch": 1.1480683789343358, + "grad_norm": 0.801799771257564, + "learning_rate": 7.757007025153522e-06, + "loss": 0.2132, + "step": 89020 + }, + { + "epoch": 1.1481973471243319, + "grad_norm": 0.8044597766854575, + "learning_rate": 7.756381061495746e-06, + "loss": 0.1977, + "step": 89030 + }, + { + "epoch": 1.1483263153143277, + "grad_norm": 0.9868695622550188, + "learning_rate": 7.755755035769318e-06, + "loss": 0.2112, + "step": 89040 + }, + { + "epoch": 1.1484552835043236, + "grad_norm": 0.7504272314910899, + "learning_rate": 7.755128947988332e-06, + "loss": 0.2047, + "step": 89050 + }, + { + "epoch": 1.1485842516943197, + "grad_norm": 0.7860249542945551, + "learning_rate": 7.754502798166887e-06, + "loss": 0.1854, + "step": 89060 + }, + { + "epoch": 1.1487132198843155, + "grad_norm": 0.9047295742780226, + "learning_rate": 7.753876586319082e-06, + "loss": 0.2134, + "step": 89070 + }, + { + "epoch": 1.1488421880743114, + "grad_norm": 0.9336346363736487, + "learning_rate": 7.753250312459022e-06, + "loss": 0.2099, + "step": 89080 + }, + { + "epoch": 1.1489711562643075, + "grad_norm": 0.9456914582375489, + "learning_rate": 7.752623976600805e-06, + "loss": 0.2103, + "step": 89090 + }, + { + "epoch": 1.1491001244543033, + "grad_norm": 0.8920454199220397, + "learning_rate": 7.751997578758537e-06, + "loss": 0.1965, + "step": 89100 + }, + { + "epoch": 1.1492290926442994, + "grad_norm": 0.8474458019168445, + "learning_rate": 7.751371118946323e-06, + "loss": 0.205, + "step": 89110 + }, + { + "epoch": 1.1493580608342953, + "grad_norm": 0.7698184317030762, + "learning_rate": 7.750744597178271e-06, + "loss": 0.1977, + "step": 89120 + }, + { + "epoch": 1.1494870290242911, + "grad_norm": 0.8297331774470433, + "learning_rate": 7.750118013468486e-06, + "loss": 0.1967, + "step": 89130 + }, + { + "epoch": 1.149615997214287, + "grad_norm": 0.8229145750687394, + "learning_rate": 7.74949136783108e-06, + "loss": 0.211, + "step": 89140 + }, + { + "epoch": 1.149744965404283, + "grad_norm": 0.9000976439119298, + "learning_rate": 7.748864660280166e-06, + "loss": 0.2056, + "step": 89150 + }, + { + "epoch": 1.149873933594279, + "grad_norm": 0.8638098107137067, + "learning_rate": 7.74823789082985e-06, + "loss": 0.2134, + "step": 89160 + }, + { + "epoch": 1.150002901784275, + "grad_norm": 0.8327700080129443, + "learning_rate": 7.747611059494251e-06, + "loss": 0.1956, + "step": 89170 + }, + { + "epoch": 1.1501318699742709, + "grad_norm": 0.8822294729575633, + "learning_rate": 7.746984166287484e-06, + "loss": 0.2126, + "step": 89180 + }, + { + "epoch": 1.1502608381642667, + "grad_norm": 0.8616208963620534, + "learning_rate": 7.746357211223662e-06, + "loss": 0.2006, + "step": 89190 + }, + { + "epoch": 1.1503898063542628, + "grad_norm": 0.7743671020867285, + "learning_rate": 7.745730194316905e-06, + "loss": 0.2041, + "step": 89200 + }, + { + "epoch": 1.1505187745442587, + "grad_norm": 0.8132390397418973, + "learning_rate": 7.745103115581333e-06, + "loss": 0.2044, + "step": 89210 + }, + { + "epoch": 1.1506477427342545, + "grad_norm": 0.8289574742253573, + "learning_rate": 7.744475975031066e-06, + "loss": 0.2084, + "step": 89220 + }, + { + "epoch": 1.1507767109242506, + "grad_norm": 0.835557094156466, + "learning_rate": 7.743848772680224e-06, + "loss": 0.2052, + "step": 89230 + }, + { + "epoch": 1.1509056791142465, + "grad_norm": 0.8618517748378227, + "learning_rate": 7.743221508542933e-06, + "loss": 0.2129, + "step": 89240 + }, + { + "epoch": 1.1510346473042423, + "grad_norm": 0.8581801507255171, + "learning_rate": 7.742594182633319e-06, + "loss": 0.2107, + "step": 89250 + }, + { + "epoch": 1.1511636154942384, + "grad_norm": 0.7974489226007155, + "learning_rate": 7.741966794965505e-06, + "loss": 0.2053, + "step": 89260 + }, + { + "epoch": 1.1512925836842343, + "grad_norm": 0.797819954154643, + "learning_rate": 7.741339345553618e-06, + "loss": 0.2101, + "step": 89270 + }, + { + "epoch": 1.1514215518742303, + "grad_norm": 0.8170615937758966, + "learning_rate": 7.74071183441179e-06, + "loss": 0.2022, + "step": 89280 + }, + { + "epoch": 1.1515505200642262, + "grad_norm": 0.8411342819968683, + "learning_rate": 7.740084261554152e-06, + "loss": 0.2215, + "step": 89290 + }, + { + "epoch": 1.151679488254222, + "grad_norm": 0.8210984225491573, + "learning_rate": 7.739456626994835e-06, + "loss": 0.2201, + "step": 89300 + }, + { + "epoch": 1.151808456444218, + "grad_norm": 0.8648644728595853, + "learning_rate": 7.73882893074797e-06, + "loss": 0.2088, + "step": 89310 + }, + { + "epoch": 1.151937424634214, + "grad_norm": 0.9003329375919455, + "learning_rate": 7.738201172827692e-06, + "loss": 0.202, + "step": 89320 + }, + { + "epoch": 1.1520663928242099, + "grad_norm": 0.8156294506608199, + "learning_rate": 7.73757335324814e-06, + "loss": 0.2167, + "step": 89330 + }, + { + "epoch": 1.152195361014206, + "grad_norm": 0.7829401065305925, + "learning_rate": 7.736945472023447e-06, + "loss": 0.1983, + "step": 89340 + }, + { + "epoch": 1.1523243292042018, + "grad_norm": 0.8102126478152529, + "learning_rate": 7.736317529167756e-06, + "loss": 0.2046, + "step": 89350 + }, + { + "epoch": 1.1524532973941977, + "grad_norm": 0.7984506946602128, + "learning_rate": 7.735689524695204e-06, + "loss": 0.2062, + "step": 89360 + }, + { + "epoch": 1.1525822655841937, + "grad_norm": 0.8285005199932878, + "learning_rate": 7.735061458619935e-06, + "loss": 0.2114, + "step": 89370 + }, + { + "epoch": 1.1527112337741896, + "grad_norm": 0.821365340609704, + "learning_rate": 7.734433330956088e-06, + "loss": 0.2061, + "step": 89380 + }, + { + "epoch": 1.1528402019641855, + "grad_norm": 0.8694276406819245, + "learning_rate": 7.733805141717812e-06, + "loss": 0.1978, + "step": 89390 + }, + { + "epoch": 1.1529691701541815, + "grad_norm": 0.9370139824112894, + "learning_rate": 7.733176890919249e-06, + "loss": 0.2158, + "step": 89400 + }, + { + "epoch": 1.1530981383441774, + "grad_norm": 0.911883186239221, + "learning_rate": 7.732548578574547e-06, + "loss": 0.2076, + "step": 89410 + }, + { + "epoch": 1.1532271065341733, + "grad_norm": 0.8510369607407524, + "learning_rate": 7.731920204697857e-06, + "loss": 0.2105, + "step": 89420 + }, + { + "epoch": 1.1533560747241693, + "grad_norm": 0.783952034889638, + "learning_rate": 7.731291769303326e-06, + "loss": 0.2085, + "step": 89430 + }, + { + "epoch": 1.1534850429141652, + "grad_norm": 0.8401801963221541, + "learning_rate": 7.730663272405105e-06, + "loss": 0.2072, + "step": 89440 + }, + { + "epoch": 1.153614011104161, + "grad_norm": 0.8067177518791326, + "learning_rate": 7.73003471401735e-06, + "loss": 0.1982, + "step": 89450 + }, + { + "epoch": 1.1537429792941571, + "grad_norm": 0.9214717021029327, + "learning_rate": 7.729406094154211e-06, + "loss": 0.2108, + "step": 89460 + }, + { + "epoch": 1.153871947484153, + "grad_norm": 0.8930342476881223, + "learning_rate": 7.728777412829844e-06, + "loss": 0.2124, + "step": 89470 + }, + { + "epoch": 1.1540009156741489, + "grad_norm": 0.8721025340557984, + "learning_rate": 7.72814867005841e-06, + "loss": 0.2175, + "step": 89480 + }, + { + "epoch": 1.154129883864145, + "grad_norm": 0.8591284342441428, + "learning_rate": 7.72751986585406e-06, + "loss": 0.1957, + "step": 89490 + }, + { + "epoch": 1.1542588520541408, + "grad_norm": 0.8625413192438389, + "learning_rate": 7.72689100023096e-06, + "loss": 0.2064, + "step": 89500 + }, + { + "epoch": 1.1543878202441369, + "grad_norm": 0.7915477139126684, + "learning_rate": 7.726262073203268e-06, + "loss": 0.2079, + "step": 89510 + }, + { + "epoch": 1.1545167884341327, + "grad_norm": 0.7698487199756174, + "learning_rate": 7.725633084785145e-06, + "loss": 0.1978, + "step": 89520 + }, + { + "epoch": 1.1546457566241286, + "grad_norm": 0.8521261467987237, + "learning_rate": 7.725004034990757e-06, + "loss": 0.2034, + "step": 89530 + }, + { + "epoch": 1.1547747248141247, + "grad_norm": 0.8113893726808263, + "learning_rate": 7.724374923834267e-06, + "loss": 0.2108, + "step": 89540 + }, + { + "epoch": 1.1549036930041205, + "grad_norm": 0.8676053124989869, + "learning_rate": 7.723745751329846e-06, + "loss": 0.1999, + "step": 89550 + }, + { + "epoch": 1.1550326611941164, + "grad_norm": 0.8500597603685822, + "learning_rate": 7.723116517491656e-06, + "loss": 0.2074, + "step": 89560 + }, + { + "epoch": 1.1551616293841125, + "grad_norm": 0.7516677913176376, + "learning_rate": 7.72248722233387e-06, + "loss": 0.2049, + "step": 89570 + }, + { + "epoch": 1.1552905975741083, + "grad_norm": 0.8639711025818044, + "learning_rate": 7.721857865870657e-06, + "loss": 0.1996, + "step": 89580 + }, + { + "epoch": 1.1554195657641042, + "grad_norm": 0.8722254234402114, + "learning_rate": 7.721228448116189e-06, + "loss": 0.2064, + "step": 89590 + }, + { + "epoch": 1.1555485339541003, + "grad_norm": 0.7627074882529626, + "learning_rate": 7.720598969084638e-06, + "loss": 0.2021, + "step": 89600 + }, + { + "epoch": 1.1556775021440961, + "grad_norm": 0.7378480608971141, + "learning_rate": 7.719969428790185e-06, + "loss": 0.2024, + "step": 89610 + }, + { + "epoch": 1.155806470334092, + "grad_norm": 0.9242979111120367, + "learning_rate": 7.719339827246997e-06, + "loss": 0.2148, + "step": 89620 + }, + { + "epoch": 1.155935438524088, + "grad_norm": 0.9152602015129611, + "learning_rate": 7.718710164469257e-06, + "loss": 0.2036, + "step": 89630 + }, + { + "epoch": 1.156064406714084, + "grad_norm": 0.8340716772613711, + "learning_rate": 7.718080440471143e-06, + "loss": 0.2021, + "step": 89640 + }, + { + "epoch": 1.1561933749040798, + "grad_norm": 0.9251273060187298, + "learning_rate": 7.717450655266834e-06, + "loss": 0.205, + "step": 89650 + }, + { + "epoch": 1.1563223430940759, + "grad_norm": 0.8149667639480287, + "learning_rate": 7.716820808870513e-06, + "loss": 0.213, + "step": 89660 + }, + { + "epoch": 1.1564513112840717, + "grad_norm": 0.796235979583692, + "learning_rate": 7.716190901296365e-06, + "loss": 0.1966, + "step": 89670 + }, + { + "epoch": 1.1565802794740678, + "grad_norm": 0.8850299068315125, + "learning_rate": 7.715560932558569e-06, + "loss": 0.193, + "step": 89680 + }, + { + "epoch": 1.1567092476640637, + "grad_norm": 0.988503077494892, + "learning_rate": 7.714930902671314e-06, + "loss": 0.2172, + "step": 89690 + }, + { + "epoch": 1.1568382158540595, + "grad_norm": 0.8904025930355437, + "learning_rate": 7.714300811648788e-06, + "loss": 0.204, + "step": 89700 + }, + { + "epoch": 1.1569671840440556, + "grad_norm": 0.8482092373454638, + "learning_rate": 7.713670659505177e-06, + "loss": 0.2026, + "step": 89710 + }, + { + "epoch": 1.1570961522340515, + "grad_norm": 0.8651133001405313, + "learning_rate": 7.713040446254673e-06, + "loss": 0.2081, + "step": 89720 + }, + { + "epoch": 1.1572251204240473, + "grad_norm": 0.7797085663776366, + "learning_rate": 7.712410171911466e-06, + "loss": 0.2155, + "step": 89730 + }, + { + "epoch": 1.1573540886140434, + "grad_norm": 0.8146088419665851, + "learning_rate": 7.711779836489749e-06, + "loss": 0.2141, + "step": 89740 + }, + { + "epoch": 1.1574830568040393, + "grad_norm": 0.8501747014571595, + "learning_rate": 7.711149440003716e-06, + "loss": 0.2033, + "step": 89750 + }, + { + "epoch": 1.1576120249940351, + "grad_norm": 0.9472457114060411, + "learning_rate": 7.710518982467564e-06, + "loss": 0.2174, + "step": 89760 + }, + { + "epoch": 1.1577409931840312, + "grad_norm": 0.8486666996399097, + "learning_rate": 7.709888463895486e-06, + "loss": 0.2083, + "step": 89770 + }, + { + "epoch": 1.157869961374027, + "grad_norm": 0.7597879497118868, + "learning_rate": 7.709257884301684e-06, + "loss": 0.1932, + "step": 89780 + }, + { + "epoch": 1.157998929564023, + "grad_norm": 0.8873842333989376, + "learning_rate": 7.708627243700352e-06, + "loss": 0.21, + "step": 89790 + }, + { + "epoch": 1.158127897754019, + "grad_norm": 0.7972326639969247, + "learning_rate": 7.707996542105698e-06, + "loss": 0.2054, + "step": 89800 + }, + { + "epoch": 1.158256865944015, + "grad_norm": 0.885373911159646, + "learning_rate": 7.707365779531922e-06, + "loss": 0.2159, + "step": 89810 + }, + { + "epoch": 1.1583858341340108, + "grad_norm": 0.8070871663179026, + "learning_rate": 7.706734955993224e-06, + "loss": 0.1984, + "step": 89820 + }, + { + "epoch": 1.1585148023240068, + "grad_norm": 0.7960117828234323, + "learning_rate": 7.70610407150381e-06, + "loss": 0.1935, + "step": 89830 + }, + { + "epoch": 1.1586437705140027, + "grad_norm": 0.7665266157235017, + "learning_rate": 7.705473126077892e-06, + "loss": 0.2078, + "step": 89840 + }, + { + "epoch": 1.1587727387039988, + "grad_norm": 0.9395390231917734, + "learning_rate": 7.70484211972967e-06, + "loss": 0.2045, + "step": 89850 + }, + { + "epoch": 1.1589017068939946, + "grad_norm": 0.8305566107472621, + "learning_rate": 7.70421105247336e-06, + "loss": 0.2067, + "step": 89860 + }, + { + "epoch": 1.1590306750839905, + "grad_norm": 0.8815396499412403, + "learning_rate": 7.703579924323168e-06, + "loss": 0.2213, + "step": 89870 + }, + { + "epoch": 1.1591596432739864, + "grad_norm": 0.7945654320924442, + "learning_rate": 7.702948735293304e-06, + "loss": 0.1994, + "step": 89880 + }, + { + "epoch": 1.1592886114639824, + "grad_norm": 0.9376142104157472, + "learning_rate": 7.702317485397987e-06, + "loss": 0.2039, + "step": 89890 + }, + { + "epoch": 1.1594175796539783, + "grad_norm": 0.8280751411852246, + "learning_rate": 7.70168617465143e-06, + "loss": 0.1968, + "step": 89900 + }, + { + "epoch": 1.1595465478439744, + "grad_norm": 0.8613236876286859, + "learning_rate": 7.701054803067847e-06, + "loss": 0.2008, + "step": 89910 + }, + { + "epoch": 1.1596755160339702, + "grad_norm": 0.835595992266913, + "learning_rate": 7.700423370661455e-06, + "loss": 0.2097, + "step": 89920 + }, + { + "epoch": 1.159804484223966, + "grad_norm": 0.7916192079919658, + "learning_rate": 7.699791877446476e-06, + "loss": 0.2047, + "step": 89930 + }, + { + "epoch": 1.1599334524139622, + "grad_norm": 0.8371005519324267, + "learning_rate": 7.699160323437125e-06, + "loss": 0.1965, + "step": 89940 + }, + { + "epoch": 1.160062420603958, + "grad_norm": 0.8159156859797243, + "learning_rate": 7.698528708647627e-06, + "loss": 0.2054, + "step": 89950 + }, + { + "epoch": 1.160191388793954, + "grad_norm": 0.7624650985696901, + "learning_rate": 7.697897033092205e-06, + "loss": 0.2058, + "step": 89960 + }, + { + "epoch": 1.16032035698395, + "grad_norm": 0.9020885469648122, + "learning_rate": 7.697265296785082e-06, + "loss": 0.2137, + "step": 89970 + }, + { + "epoch": 1.1604493251739458, + "grad_norm": 0.862659261668591, + "learning_rate": 7.696633499740484e-06, + "loss": 0.2074, + "step": 89980 + }, + { + "epoch": 1.1605782933639417, + "grad_norm": 0.9360830028638365, + "learning_rate": 7.696001641972639e-06, + "loss": 0.2158, + "step": 89990 + }, + { + "epoch": 1.1607072615539378, + "grad_norm": 0.838484997746353, + "learning_rate": 7.695369723495771e-06, + "loss": 0.2092, + "step": 90000 + }, + { + "epoch": 1.1608362297439336, + "grad_norm": 0.9020711801916813, + "learning_rate": 7.694737744324116e-06, + "loss": 0.2161, + "step": 90010 + }, + { + "epoch": 1.1609651979339297, + "grad_norm": 0.8769187778418519, + "learning_rate": 7.6941057044719e-06, + "loss": 0.2077, + "step": 90020 + }, + { + "epoch": 1.1610941661239256, + "grad_norm": 0.8562094265177906, + "learning_rate": 7.693473603953356e-06, + "loss": 0.2096, + "step": 90030 + }, + { + "epoch": 1.1612231343139214, + "grad_norm": 0.8635392555800797, + "learning_rate": 7.692841442782723e-06, + "loss": 0.2045, + "step": 90040 + }, + { + "epoch": 1.1613521025039173, + "grad_norm": 0.7689746761437045, + "learning_rate": 7.692209220974227e-06, + "loss": 0.1899, + "step": 90050 + }, + { + "epoch": 1.1614810706939134, + "grad_norm": 0.8828415338916697, + "learning_rate": 7.691576938542113e-06, + "loss": 0.2153, + "step": 90060 + }, + { + "epoch": 1.1616100388839092, + "grad_norm": 0.8555653305373185, + "learning_rate": 7.690944595500613e-06, + "loss": 0.201, + "step": 90070 + }, + { + "epoch": 1.1617390070739053, + "grad_norm": 0.8517911266582954, + "learning_rate": 7.690312191863968e-06, + "loss": 0.2071, + "step": 90080 + }, + { + "epoch": 1.1618679752639012, + "grad_norm": 0.8466051566345667, + "learning_rate": 7.689679727646423e-06, + "loss": 0.2014, + "step": 90090 + }, + { + "epoch": 1.161996943453897, + "grad_norm": 0.7839416794124037, + "learning_rate": 7.689047202862211e-06, + "loss": 0.2052, + "step": 90100 + }, + { + "epoch": 1.1621259116438931, + "grad_norm": 0.9251461518923464, + "learning_rate": 7.688414617525583e-06, + "loss": 0.2038, + "step": 90110 + }, + { + "epoch": 1.162254879833889, + "grad_norm": 0.7022986608079076, + "learning_rate": 7.687781971650783e-06, + "loss": 0.1942, + "step": 90120 + }, + { + "epoch": 1.1623838480238848, + "grad_norm": 0.8778748209212188, + "learning_rate": 7.687149265252052e-06, + "loss": 0.2162, + "step": 90130 + }, + { + "epoch": 1.162512816213881, + "grad_norm": 0.9420146252432076, + "learning_rate": 7.68651649834364e-06, + "loss": 0.2181, + "step": 90140 + }, + { + "epoch": 1.1626417844038768, + "grad_norm": 0.87238164784377, + "learning_rate": 7.685883670939797e-06, + "loss": 0.2052, + "step": 90150 + }, + { + "epoch": 1.1627707525938726, + "grad_norm": 0.7656670930805942, + "learning_rate": 7.685250783054774e-06, + "loss": 0.2047, + "step": 90160 + }, + { + "epoch": 1.1628997207838687, + "grad_norm": 0.8735410252752427, + "learning_rate": 7.684617834702818e-06, + "loss": 0.2062, + "step": 90170 + }, + { + "epoch": 1.1630286889738646, + "grad_norm": 0.7848398671048565, + "learning_rate": 7.683984825898187e-06, + "loss": 0.2069, + "step": 90180 + }, + { + "epoch": 1.1631576571638604, + "grad_norm": 0.7673863472774483, + "learning_rate": 7.68335175665513e-06, + "loss": 0.2142, + "step": 90190 + }, + { + "epoch": 1.1632866253538565, + "grad_norm": 0.7680486350136927, + "learning_rate": 7.682718626987909e-06, + "loss": 0.2001, + "step": 90200 + }, + { + "epoch": 1.1634155935438524, + "grad_norm": 0.8167702877569868, + "learning_rate": 7.682085436910773e-06, + "loss": 0.2027, + "step": 90210 + }, + { + "epoch": 1.1635445617338482, + "grad_norm": 0.8218328413857301, + "learning_rate": 7.681452186437987e-06, + "loss": 0.2101, + "step": 90220 + }, + { + "epoch": 1.1636735299238443, + "grad_norm": 0.9211459010585156, + "learning_rate": 7.68081887558381e-06, + "loss": 0.1987, + "step": 90230 + }, + { + "epoch": 1.1638024981138402, + "grad_norm": 0.7675339096325425, + "learning_rate": 7.680185504362498e-06, + "loss": 0.2008, + "step": 90240 + }, + { + "epoch": 1.1639314663038363, + "grad_norm": 0.9038984559930612, + "learning_rate": 7.679552072788317e-06, + "loss": 0.2023, + "step": 90250 + }, + { + "epoch": 1.1640604344938321, + "grad_norm": 0.8573353045483367, + "learning_rate": 7.67891858087553e-06, + "loss": 0.2048, + "step": 90260 + }, + { + "epoch": 1.164189402683828, + "grad_norm": 0.8857302138290657, + "learning_rate": 7.678285028638404e-06, + "loss": 0.2073, + "step": 90270 + }, + { + "epoch": 1.164318370873824, + "grad_norm": 0.8905326847013029, + "learning_rate": 7.677651416091204e-06, + "loss": 0.1944, + "step": 90280 + }, + { + "epoch": 1.16444733906382, + "grad_norm": 0.8766508622891035, + "learning_rate": 7.677017743248193e-06, + "loss": 0.2085, + "step": 90290 + }, + { + "epoch": 1.1645763072538158, + "grad_norm": 0.8543994022985812, + "learning_rate": 7.676384010123649e-06, + "loss": 0.211, + "step": 90300 + }, + { + "epoch": 1.1647052754438119, + "grad_norm": 0.879203154928959, + "learning_rate": 7.675750216731838e-06, + "loss": 0.2, + "step": 90310 + }, + { + "epoch": 1.1648342436338077, + "grad_norm": 0.8355696903122338, + "learning_rate": 7.675116363087032e-06, + "loss": 0.2118, + "step": 90320 + }, + { + "epoch": 1.1649632118238036, + "grad_norm": 0.8303223644198576, + "learning_rate": 7.674482449203501e-06, + "loss": 0.2238, + "step": 90330 + }, + { + "epoch": 1.1650921800137997, + "grad_norm": 0.8335030705361465, + "learning_rate": 7.673848475095527e-06, + "loss": 0.2125, + "step": 90340 + }, + { + "epoch": 1.1652211482037955, + "grad_norm": 0.8789319017381128, + "learning_rate": 7.673214440777382e-06, + "loss": 0.2078, + "step": 90350 + }, + { + "epoch": 1.1653501163937914, + "grad_norm": 0.7957389878228726, + "learning_rate": 7.67258034626334e-06, + "loss": 0.2065, + "step": 90360 + }, + { + "epoch": 1.1654790845837875, + "grad_norm": 0.8257424780721617, + "learning_rate": 7.671946191567685e-06, + "loss": 0.1888, + "step": 90370 + }, + { + "epoch": 1.1656080527737833, + "grad_norm": 0.8319252971579582, + "learning_rate": 7.671311976704693e-06, + "loss": 0.2058, + "step": 90380 + }, + { + "epoch": 1.1657370209637792, + "grad_norm": 0.7793214307638604, + "learning_rate": 7.670677701688648e-06, + "loss": 0.1988, + "step": 90390 + }, + { + "epoch": 1.1658659891537753, + "grad_norm": 0.9027100494483223, + "learning_rate": 7.670043366533832e-06, + "loss": 0.213, + "step": 90400 + }, + { + "epoch": 1.1659949573437711, + "grad_norm": 0.908160744436715, + "learning_rate": 7.669408971254529e-06, + "loss": 0.2163, + "step": 90410 + }, + { + "epoch": 1.1661239255337672, + "grad_norm": 0.7893577749116171, + "learning_rate": 7.668774515865022e-06, + "loss": 0.2124, + "step": 90420 + }, + { + "epoch": 1.166252893723763, + "grad_norm": 0.917461556219453, + "learning_rate": 7.668140000379604e-06, + "loss": 0.2023, + "step": 90430 + }, + { + "epoch": 1.166381861913759, + "grad_norm": 0.882986750881463, + "learning_rate": 7.667505424812557e-06, + "loss": 0.2087, + "step": 90440 + }, + { + "epoch": 1.166510830103755, + "grad_norm": 0.8466568692213753, + "learning_rate": 7.666870789178173e-06, + "loss": 0.2122, + "step": 90450 + }, + { + "epoch": 1.1666397982937509, + "grad_norm": 0.8214893721052636, + "learning_rate": 7.666236093490743e-06, + "loss": 0.1973, + "step": 90460 + }, + { + "epoch": 1.1667687664837467, + "grad_norm": 0.8193065335022821, + "learning_rate": 7.665601337764558e-06, + "loss": 0.1999, + "step": 90470 + }, + { + "epoch": 1.1668977346737428, + "grad_norm": 0.8821955787281937, + "learning_rate": 7.664966522013912e-06, + "loss": 0.1959, + "step": 90480 + }, + { + "epoch": 1.1670267028637387, + "grad_norm": 0.8100260771502747, + "learning_rate": 7.6643316462531e-06, + "loss": 0.2026, + "step": 90490 + }, + { + "epoch": 1.1671556710537345, + "grad_norm": 0.7918126827349438, + "learning_rate": 7.66369671049642e-06, + "loss": 0.2086, + "step": 90500 + }, + { + "epoch": 1.1672846392437306, + "grad_norm": 0.8599948182987844, + "learning_rate": 7.663061714758166e-06, + "loss": 0.2078, + "step": 90510 + }, + { + "epoch": 1.1674136074337265, + "grad_norm": 0.8741215470638873, + "learning_rate": 7.66242665905264e-06, + "loss": 0.2168, + "step": 90520 + }, + { + "epoch": 1.1675425756237223, + "grad_norm": 0.8237919958562303, + "learning_rate": 7.661791543394142e-06, + "loss": 0.2068, + "step": 90530 + }, + { + "epoch": 1.1676715438137184, + "grad_norm": 0.8791191775815743, + "learning_rate": 7.661156367796971e-06, + "loss": 0.2031, + "step": 90540 + }, + { + "epoch": 1.1678005120037143, + "grad_norm": 0.8380902311400971, + "learning_rate": 7.660521132275433e-06, + "loss": 0.203, + "step": 90550 + }, + { + "epoch": 1.1679294801937101, + "grad_norm": 0.7887324066847248, + "learning_rate": 7.65988583684383e-06, + "loss": 0.2041, + "step": 90560 + }, + { + "epoch": 1.1680584483837062, + "grad_norm": 0.9331477347946526, + "learning_rate": 7.65925048151647e-06, + "loss": 0.2152, + "step": 90570 + }, + { + "epoch": 1.168187416573702, + "grad_norm": 0.8452165409965509, + "learning_rate": 7.65861506630766e-06, + "loss": 0.2153, + "step": 90580 + }, + { + "epoch": 1.1683163847636981, + "grad_norm": 0.8733353471250279, + "learning_rate": 7.657979591231705e-06, + "loss": 0.2048, + "step": 90590 + }, + { + "epoch": 1.168445352953694, + "grad_norm": 0.9010645268389658, + "learning_rate": 7.65734405630292e-06, + "loss": 0.201, + "step": 90600 + }, + { + "epoch": 1.1685743211436899, + "grad_norm": 0.7580773302336943, + "learning_rate": 7.65670846153561e-06, + "loss": 0.2098, + "step": 90610 + }, + { + "epoch": 1.1687032893336857, + "grad_norm": 0.823733710557387, + "learning_rate": 7.65607280694409e-06, + "loss": 0.2097, + "step": 90620 + }, + { + "epoch": 1.1688322575236818, + "grad_norm": 0.79742786321426, + "learning_rate": 7.655437092542677e-06, + "loss": 0.2034, + "step": 90630 + }, + { + "epoch": 1.1689612257136777, + "grad_norm": 0.9341523925271249, + "learning_rate": 7.654801318345684e-06, + "loss": 0.2083, + "step": 90640 + }, + { + "epoch": 1.1690901939036737, + "grad_norm": 0.94135089947497, + "learning_rate": 7.654165484367423e-06, + "loss": 0.2152, + "step": 90650 + }, + { + "epoch": 1.1692191620936696, + "grad_norm": 0.826561591694064, + "learning_rate": 7.653529590622218e-06, + "loss": 0.2069, + "step": 90660 + }, + { + "epoch": 1.1693481302836655, + "grad_norm": 0.796631825013642, + "learning_rate": 7.652893637124385e-06, + "loss": 0.2054, + "step": 90670 + }, + { + "epoch": 1.1694770984736615, + "grad_norm": 0.8346097644682445, + "learning_rate": 7.652257623888247e-06, + "loss": 0.2073, + "step": 90680 + }, + { + "epoch": 1.1696060666636574, + "grad_norm": 0.8857036665249169, + "learning_rate": 7.651621550928126e-06, + "loss": 0.202, + "step": 90690 + }, + { + "epoch": 1.1697350348536533, + "grad_norm": 0.8331777486380757, + "learning_rate": 7.65098541825834e-06, + "loss": 0.2108, + "step": 90700 + }, + { + "epoch": 1.1698640030436493, + "grad_norm": 0.8369193810453014, + "learning_rate": 7.650349225893217e-06, + "loss": 0.1934, + "step": 90710 + }, + { + "epoch": 1.1699929712336452, + "grad_norm": 0.8332745445963365, + "learning_rate": 7.649712973847083e-06, + "loss": 0.2016, + "step": 90720 + }, + { + "epoch": 1.170121939423641, + "grad_norm": 0.8126651204711028, + "learning_rate": 7.649076662134268e-06, + "loss": 0.2052, + "step": 90730 + }, + { + "epoch": 1.1702509076136371, + "grad_norm": 0.752145811239181, + "learning_rate": 7.648440290769095e-06, + "loss": 0.2017, + "step": 90740 + }, + { + "epoch": 1.170379875803633, + "grad_norm": 0.8813224820603681, + "learning_rate": 7.647803859765897e-06, + "loss": 0.1929, + "step": 90750 + }, + { + "epoch": 1.170508843993629, + "grad_norm": 0.8039120782513366, + "learning_rate": 7.647167369139007e-06, + "loss": 0.1994, + "step": 90760 + }, + { + "epoch": 1.170637812183625, + "grad_norm": 0.8631989022837154, + "learning_rate": 7.646530818902752e-06, + "loss": 0.2086, + "step": 90770 + }, + { + "epoch": 1.1707667803736208, + "grad_norm": 0.7983421011071713, + "learning_rate": 7.64589420907147e-06, + "loss": 0.2125, + "step": 90780 + }, + { + "epoch": 1.1708957485636167, + "grad_norm": 0.9354999516703503, + "learning_rate": 7.645257539659497e-06, + "loss": 0.199, + "step": 90790 + }, + { + "epoch": 1.1710247167536127, + "grad_norm": 0.8351404300144227, + "learning_rate": 7.644620810681169e-06, + "loss": 0.2061, + "step": 90800 + }, + { + "epoch": 1.1711536849436086, + "grad_norm": 0.897108897494354, + "learning_rate": 7.64398402215082e-06, + "loss": 0.2055, + "step": 90810 + }, + { + "epoch": 1.1712826531336047, + "grad_norm": 0.9134874839524535, + "learning_rate": 7.643347174082796e-06, + "loss": 0.2033, + "step": 90820 + }, + { + "epoch": 1.1714116213236005, + "grad_norm": 0.8456683579682194, + "learning_rate": 7.642710266491434e-06, + "loss": 0.1971, + "step": 90830 + }, + { + "epoch": 1.1715405895135964, + "grad_norm": 0.8710653597884328, + "learning_rate": 7.642073299391073e-06, + "loss": 0.212, + "step": 90840 + }, + { + "epoch": 1.1716695577035925, + "grad_norm": 0.8852536313387712, + "learning_rate": 7.641436272796064e-06, + "loss": 0.1996, + "step": 90850 + }, + { + "epoch": 1.1717985258935883, + "grad_norm": 0.8903358084561649, + "learning_rate": 7.640799186720745e-06, + "loss": 0.2031, + "step": 90860 + }, + { + "epoch": 1.1719274940835842, + "grad_norm": 0.8873112916591638, + "learning_rate": 7.640162041179463e-06, + "loss": 0.2189, + "step": 90870 + }, + { + "epoch": 1.1720564622735803, + "grad_norm": 0.8549940773287454, + "learning_rate": 7.63952483618657e-06, + "loss": 0.1968, + "step": 90880 + }, + { + "epoch": 1.1721854304635762, + "grad_norm": 0.855693751182055, + "learning_rate": 7.638887571756408e-06, + "loss": 0.2036, + "step": 90890 + }, + { + "epoch": 1.172314398653572, + "grad_norm": 0.8919943684372104, + "learning_rate": 7.63825024790333e-06, + "loss": 0.2105, + "step": 90900 + }, + { + "epoch": 1.172443366843568, + "grad_norm": 0.8173989105106573, + "learning_rate": 7.637612864641692e-06, + "loss": 0.1975, + "step": 90910 + }, + { + "epoch": 1.172572335033564, + "grad_norm": 0.885741595783728, + "learning_rate": 7.63697542198584e-06, + "loss": 0.2188, + "step": 90920 + }, + { + "epoch": 1.17270130322356, + "grad_norm": 0.8631876190476864, + "learning_rate": 7.63633791995013e-06, + "loss": 0.195, + "step": 90930 + }, + { + "epoch": 1.172830271413556, + "grad_norm": 0.8934973119250649, + "learning_rate": 7.635700358548918e-06, + "loss": 0.1936, + "step": 90940 + }, + { + "epoch": 1.1729592396035518, + "grad_norm": 0.8712851110966118, + "learning_rate": 7.63506273779656e-06, + "loss": 0.2076, + "step": 90950 + }, + { + "epoch": 1.1730882077935476, + "grad_norm": 0.8225555983894988, + "learning_rate": 7.634425057707417e-06, + "loss": 0.2051, + "step": 90960 + }, + { + "epoch": 1.1732171759835437, + "grad_norm": 0.8258428356290445, + "learning_rate": 7.633787318295843e-06, + "loss": 0.2029, + "step": 90970 + }, + { + "epoch": 1.1733461441735396, + "grad_norm": 0.8417669643433126, + "learning_rate": 7.633149519576202e-06, + "loss": 0.1987, + "step": 90980 + }, + { + "epoch": 1.1734751123635356, + "grad_norm": 0.8387949735810196, + "learning_rate": 7.632511661562858e-06, + "loss": 0.2079, + "step": 90990 + }, + { + "epoch": 1.1736040805535315, + "grad_norm": 0.8519753607428053, + "learning_rate": 7.631873744270172e-06, + "loss": 0.2034, + "step": 91000 + }, + { + "epoch": 1.1737330487435274, + "grad_norm": 0.8254939722550971, + "learning_rate": 7.631235767712506e-06, + "loss": 0.2097, + "step": 91010 + }, + { + "epoch": 1.1738620169335234, + "grad_norm": 0.8839870265869235, + "learning_rate": 7.63059773190423e-06, + "loss": 0.2051, + "step": 91020 + }, + { + "epoch": 1.1739909851235193, + "grad_norm": 0.7210541969481492, + "learning_rate": 7.629959636859712e-06, + "loss": 0.1993, + "step": 91030 + }, + { + "epoch": 1.1741199533135152, + "grad_norm": 0.828172456683699, + "learning_rate": 7.629321482593317e-06, + "loss": 0.1982, + "step": 91040 + }, + { + "epoch": 1.1742489215035112, + "grad_norm": 0.7988331331490782, + "learning_rate": 7.628683269119419e-06, + "loss": 0.2055, + "step": 91050 + }, + { + "epoch": 1.174377889693507, + "grad_norm": 0.8466916389421648, + "learning_rate": 7.6280449964523866e-06, + "loss": 0.2053, + "step": 91060 + }, + { + "epoch": 1.174506857883503, + "grad_norm": 0.8215852787781319, + "learning_rate": 7.627406664606594e-06, + "loss": 0.195, + "step": 91070 + }, + { + "epoch": 1.174635826073499, + "grad_norm": 0.9354567345343459, + "learning_rate": 7.6267682735964145e-06, + "loss": 0.2176, + "step": 91080 + }, + { + "epoch": 1.174764794263495, + "grad_norm": 0.8504553045995504, + "learning_rate": 7.626129823436224e-06, + "loss": 0.2097, + "step": 91090 + }, + { + "epoch": 1.1748937624534908, + "grad_norm": 0.7980527218428299, + "learning_rate": 7.625491314140399e-06, + "loss": 0.2024, + "step": 91100 + }, + { + "epoch": 1.1750227306434868, + "grad_norm": 0.8129556266583078, + "learning_rate": 7.624852745723317e-06, + "loss": 0.2067, + "step": 91110 + }, + { + "epoch": 1.1751516988334827, + "grad_norm": 0.7566399326154684, + "learning_rate": 7.624214118199358e-06, + "loss": 0.214, + "step": 91120 + }, + { + "epoch": 1.1752806670234786, + "grad_norm": 0.8566861659375863, + "learning_rate": 7.623575431582902e-06, + "loss": 0.2171, + "step": 91130 + }, + { + "epoch": 1.1754096352134746, + "grad_norm": 0.8465543534718324, + "learning_rate": 7.622936685888333e-06, + "loss": 0.2156, + "step": 91140 + }, + { + "epoch": 1.1755386034034705, + "grad_norm": 0.8315208226448265, + "learning_rate": 7.6222978811300315e-06, + "loss": 0.1962, + "step": 91150 + }, + { + "epoch": 1.1756675715934666, + "grad_norm": 0.805300157714066, + "learning_rate": 7.621659017322385e-06, + "loss": 0.21, + "step": 91160 + }, + { + "epoch": 1.1757965397834624, + "grad_norm": 0.7954775174504151, + "learning_rate": 7.621020094479779e-06, + "loss": 0.209, + "step": 91170 + }, + { + "epoch": 1.1759255079734583, + "grad_norm": 0.7538614965578772, + "learning_rate": 7.620381112616599e-06, + "loss": 0.2015, + "step": 91180 + }, + { + "epoch": 1.1760544761634544, + "grad_norm": 0.8497188801870259, + "learning_rate": 7.619742071747235e-06, + "loss": 0.2082, + "step": 91190 + }, + { + "epoch": 1.1761834443534502, + "grad_norm": 0.8073575456766215, + "learning_rate": 7.619102971886076e-06, + "loss": 0.1899, + "step": 91200 + }, + { + "epoch": 1.176312412543446, + "grad_norm": 0.8858971656854869, + "learning_rate": 7.618463813047517e-06, + "loss": 0.1939, + "step": 91210 + }, + { + "epoch": 1.1764413807334422, + "grad_norm": 0.8417956326179762, + "learning_rate": 7.617824595245946e-06, + "loss": 0.2149, + "step": 91220 + }, + { + "epoch": 1.176570348923438, + "grad_norm": 0.7872638382686797, + "learning_rate": 7.617185318495759e-06, + "loss": 0.2013, + "step": 91230 + }, + { + "epoch": 1.176699317113434, + "grad_norm": 0.7900000783799865, + "learning_rate": 7.616545982811352e-06, + "loss": 0.197, + "step": 91240 + }, + { + "epoch": 1.17682828530343, + "grad_norm": 0.7390543481219418, + "learning_rate": 7.615906588207121e-06, + "loss": 0.1952, + "step": 91250 + }, + { + "epoch": 1.1769572534934258, + "grad_norm": 0.8097507055977425, + "learning_rate": 7.615267134697464e-06, + "loss": 0.2082, + "step": 91260 + }, + { + "epoch": 1.1770862216834217, + "grad_norm": 0.8997803874768474, + "learning_rate": 7.6146276222967795e-06, + "loss": 0.2158, + "step": 91270 + }, + { + "epoch": 1.1772151898734178, + "grad_norm": 0.8634202766675605, + "learning_rate": 7.613988051019469e-06, + "loss": 0.2117, + "step": 91280 + }, + { + "epoch": 1.1773441580634136, + "grad_norm": 0.781785110629373, + "learning_rate": 7.613348420879935e-06, + "loss": 0.2139, + "step": 91290 + }, + { + "epoch": 1.1774731262534095, + "grad_norm": 0.8945861784978969, + "learning_rate": 7.612708731892582e-06, + "loss": 0.2074, + "step": 91300 + }, + { + "epoch": 1.1776020944434056, + "grad_norm": 0.7978861053065743, + "learning_rate": 7.612068984071809e-06, + "loss": 0.2118, + "step": 91310 + }, + { + "epoch": 1.1777310626334014, + "grad_norm": 0.7734051937114874, + "learning_rate": 7.611429177432029e-06, + "loss": 0.205, + "step": 91320 + }, + { + "epoch": 1.1778600308233975, + "grad_norm": 0.8619196610702987, + "learning_rate": 7.610789311987645e-06, + "loss": 0.1959, + "step": 91330 + }, + { + "epoch": 1.1779889990133934, + "grad_norm": 0.8888351356915063, + "learning_rate": 7.610149387753067e-06, + "loss": 0.2123, + "step": 91340 + }, + { + "epoch": 1.1781179672033892, + "grad_norm": 0.7958116376513468, + "learning_rate": 7.609509404742705e-06, + "loss": 0.2018, + "step": 91350 + }, + { + "epoch": 1.178246935393385, + "grad_norm": 0.8029202734836907, + "learning_rate": 7.608869362970969e-06, + "loss": 0.2035, + "step": 91360 + }, + { + "epoch": 1.1783759035833812, + "grad_norm": 0.9198457279899693, + "learning_rate": 7.608229262452272e-06, + "loss": 0.2093, + "step": 91370 + }, + { + "epoch": 1.178504871773377, + "grad_norm": 0.9728329298329641, + "learning_rate": 7.607589103201029e-06, + "loss": 0.2155, + "step": 91380 + }, + { + "epoch": 1.1786338399633731, + "grad_norm": 0.8822139203764081, + "learning_rate": 7.606948885231655e-06, + "loss": 0.2048, + "step": 91390 + }, + { + "epoch": 1.178762808153369, + "grad_norm": 0.826448970791116, + "learning_rate": 7.606308608558566e-06, + "loss": 0.2011, + "step": 91400 + }, + { + "epoch": 1.1788917763433648, + "grad_norm": 0.7997604368294912, + "learning_rate": 7.605668273196178e-06, + "loss": 0.211, + "step": 91410 + }, + { + "epoch": 1.179020744533361, + "grad_norm": 0.813972707216253, + "learning_rate": 7.605027879158915e-06, + "loss": 0.2074, + "step": 91420 + }, + { + "epoch": 1.1791497127233568, + "grad_norm": 0.8237725987290404, + "learning_rate": 7.604387426461193e-06, + "loss": 0.2164, + "step": 91430 + }, + { + "epoch": 1.1792786809133526, + "grad_norm": 0.7803111064329621, + "learning_rate": 7.603746915117436e-06, + "loss": 0.2081, + "step": 91440 + }, + { + "epoch": 1.1794076491033487, + "grad_norm": 0.9001059037418175, + "learning_rate": 7.603106345142066e-06, + "loss": 0.2135, + "step": 91450 + }, + { + "epoch": 1.1795366172933446, + "grad_norm": 0.8507370815102676, + "learning_rate": 7.602465716549508e-06, + "loss": 0.2162, + "step": 91460 + }, + { + "epoch": 1.1796655854833404, + "grad_norm": 0.7955651065492, + "learning_rate": 7.601825029354188e-06, + "loss": 0.1878, + "step": 91470 + }, + { + "epoch": 1.1797945536733365, + "grad_norm": 0.8312573808176674, + "learning_rate": 7.601184283570533e-06, + "loss": 0.2172, + "step": 91480 + }, + { + "epoch": 1.1799235218633324, + "grad_norm": 0.7566609119200125, + "learning_rate": 7.60054347921297e-06, + "loss": 0.2128, + "step": 91490 + }, + { + "epoch": 1.1800524900533285, + "grad_norm": 0.7394655938740523, + "learning_rate": 7.599902616295933e-06, + "loss": 0.2096, + "step": 91500 + }, + { + "epoch": 1.1801814582433243, + "grad_norm": 0.8548510660418239, + "learning_rate": 7.5992616948338484e-06, + "loss": 0.2133, + "step": 91510 + }, + { + "epoch": 1.1803104264333202, + "grad_norm": 0.8619881432897174, + "learning_rate": 7.598620714841149e-06, + "loss": 0.2007, + "step": 91520 + }, + { + "epoch": 1.180439394623316, + "grad_norm": 0.7323127854280791, + "learning_rate": 7.597979676332272e-06, + "loss": 0.2034, + "step": 91530 + }, + { + "epoch": 1.1805683628133121, + "grad_norm": 0.9176717467565029, + "learning_rate": 7.597338579321649e-06, + "loss": 0.2257, + "step": 91540 + }, + { + "epoch": 1.180697331003308, + "grad_norm": 0.8763684146072518, + "learning_rate": 7.596697423823718e-06, + "loss": 0.2192, + "step": 91550 + }, + { + "epoch": 1.180826299193304, + "grad_norm": 0.9143199131912482, + "learning_rate": 7.596056209852918e-06, + "loss": 0.196, + "step": 91560 + }, + { + "epoch": 1.1809552673833, + "grad_norm": 0.7856029436902983, + "learning_rate": 7.595414937423684e-06, + "loss": 0.1993, + "step": 91570 + }, + { + "epoch": 1.1810842355732958, + "grad_norm": 0.8666507448723622, + "learning_rate": 7.594773606550459e-06, + "loss": 0.2042, + "step": 91580 + }, + { + "epoch": 1.1812132037632919, + "grad_norm": 0.7926290405448836, + "learning_rate": 7.594132217247683e-06, + "loss": 0.2051, + "step": 91590 + }, + { + "epoch": 1.1813421719532877, + "grad_norm": 0.8815338301393433, + "learning_rate": 7.593490769529801e-06, + "loss": 0.2101, + "step": 91600 + }, + { + "epoch": 1.1814711401432836, + "grad_norm": 0.8060217554701219, + "learning_rate": 7.592849263411257e-06, + "loss": 0.203, + "step": 91610 + }, + { + "epoch": 1.1816001083332797, + "grad_norm": 0.7742934489468702, + "learning_rate": 7.592207698906495e-06, + "loss": 0.2072, + "step": 91620 + }, + { + "epoch": 1.1817290765232755, + "grad_norm": 0.8737285543606835, + "learning_rate": 7.591566076029962e-06, + "loss": 0.2108, + "step": 91630 + }, + { + "epoch": 1.1818580447132714, + "grad_norm": 0.9515530884112601, + "learning_rate": 7.590924394796107e-06, + "loss": 0.2097, + "step": 91640 + }, + { + "epoch": 1.1819870129032675, + "grad_norm": 0.9092508528592013, + "learning_rate": 7.590282655219378e-06, + "loss": 0.2063, + "step": 91650 + }, + { + "epoch": 1.1821159810932633, + "grad_norm": 0.925328253415486, + "learning_rate": 7.5896408573142285e-06, + "loss": 0.2174, + "step": 91660 + }, + { + "epoch": 1.1822449492832594, + "grad_norm": 0.8957779650398513, + "learning_rate": 7.5889990010951105e-06, + "loss": 0.2182, + "step": 91670 + }, + { + "epoch": 1.1823739174732553, + "grad_norm": 0.7964423770368095, + "learning_rate": 7.588357086576474e-06, + "loss": 0.2036, + "step": 91680 + }, + { + "epoch": 1.1825028856632511, + "grad_norm": 0.8197959192407775, + "learning_rate": 7.587715113772776e-06, + "loss": 0.2035, + "step": 91690 + }, + { + "epoch": 1.182631853853247, + "grad_norm": 0.8870996970847979, + "learning_rate": 7.587073082698473e-06, + "loss": 0.1935, + "step": 91700 + }, + { + "epoch": 1.182760822043243, + "grad_norm": 0.8789175797095635, + "learning_rate": 7.586430993368022e-06, + "loss": 0.2054, + "step": 91710 + }, + { + "epoch": 1.182889790233239, + "grad_norm": 0.8640007219190753, + "learning_rate": 7.585788845795883e-06, + "loss": 0.2, + "step": 91720 + }, + { + "epoch": 1.183018758423235, + "grad_norm": 0.8523365895315318, + "learning_rate": 7.585146639996512e-06, + "loss": 0.2101, + "step": 91730 + }, + { + "epoch": 1.1831477266132309, + "grad_norm": 0.8555825822494544, + "learning_rate": 7.584504375984372e-06, + "loss": 0.2099, + "step": 91740 + }, + { + "epoch": 1.1832766948032267, + "grad_norm": 0.7859931264327162, + "learning_rate": 7.583862053773927e-06, + "loss": 0.2039, + "step": 91750 + }, + { + "epoch": 1.1834056629932228, + "grad_norm": 0.8657759658914943, + "learning_rate": 7.583219673379641e-06, + "loss": 0.2112, + "step": 91760 + }, + { + "epoch": 1.1835346311832187, + "grad_norm": 0.712867887003281, + "learning_rate": 7.5825772348159784e-06, + "loss": 0.2038, + "step": 91770 + }, + { + "epoch": 1.1836635993732145, + "grad_norm": 0.8370504562221991, + "learning_rate": 7.581934738097408e-06, + "loss": 0.2082, + "step": 91780 + }, + { + "epoch": 1.1837925675632106, + "grad_norm": 0.7687578766806219, + "learning_rate": 7.581292183238391e-06, + "loss": 0.1973, + "step": 91790 + }, + { + "epoch": 1.1839215357532065, + "grad_norm": 0.845838477238964, + "learning_rate": 7.580649570253403e-06, + "loss": 0.2094, + "step": 91800 + }, + { + "epoch": 1.1840505039432023, + "grad_norm": 0.7492779713371946, + "learning_rate": 7.580006899156913e-06, + "loss": 0.2128, + "step": 91810 + }, + { + "epoch": 1.1841794721331984, + "grad_norm": 0.859106751020175, + "learning_rate": 7.579364169963392e-06, + "loss": 0.1939, + "step": 91820 + }, + { + "epoch": 1.1843084403231943, + "grad_norm": 0.8272426753206036, + "learning_rate": 7.578721382687314e-06, + "loss": 0.2011, + "step": 91830 + }, + { + "epoch": 1.1844374085131901, + "grad_norm": 0.88703890847305, + "learning_rate": 7.578078537343152e-06, + "loss": 0.2008, + "step": 91840 + }, + { + "epoch": 1.1845663767031862, + "grad_norm": 0.8320343844429705, + "learning_rate": 7.5774356339453816e-06, + "loss": 0.2153, + "step": 91850 + }, + { + "epoch": 1.184695344893182, + "grad_norm": 0.760257134439281, + "learning_rate": 7.576792672508482e-06, + "loss": 0.2048, + "step": 91860 + }, + { + "epoch": 1.184824313083178, + "grad_norm": 0.8559552103816084, + "learning_rate": 7.576149653046929e-06, + "loss": 0.1975, + "step": 91870 + }, + { + "epoch": 1.184953281273174, + "grad_norm": 0.7960099386556956, + "learning_rate": 7.5755065755752035e-06, + "loss": 0.1998, + "step": 91880 + }, + { + "epoch": 1.1850822494631699, + "grad_norm": 0.6947755138455524, + "learning_rate": 7.574863440107788e-06, + "loss": 0.1955, + "step": 91890 + }, + { + "epoch": 1.185211217653166, + "grad_norm": 0.8773231552352583, + "learning_rate": 7.57422024665916e-06, + "loss": 0.2016, + "step": 91900 + }, + { + "epoch": 1.1853401858431618, + "grad_norm": 0.825654097445776, + "learning_rate": 7.5735769952438086e-06, + "loss": 0.2156, + "step": 91910 + }, + { + "epoch": 1.1854691540331577, + "grad_norm": 0.7420652787264918, + "learning_rate": 7.572933685876216e-06, + "loss": 0.2066, + "step": 91920 + }, + { + "epoch": 1.1855981222231537, + "grad_norm": 0.809276584348618, + "learning_rate": 7.572290318570867e-06, + "loss": 0.2141, + "step": 91930 + }, + { + "epoch": 1.1857270904131496, + "grad_norm": 0.8167837807479589, + "learning_rate": 7.571646893342252e-06, + "loss": 0.2066, + "step": 91940 + }, + { + "epoch": 1.1858560586031455, + "grad_norm": 0.7998131234908007, + "learning_rate": 7.571003410204858e-06, + "loss": 0.2131, + "step": 91950 + }, + { + "epoch": 1.1859850267931416, + "grad_norm": 0.8748492090007636, + "learning_rate": 7.570359869173174e-06, + "loss": 0.2144, + "step": 91960 + }, + { + "epoch": 1.1861139949831374, + "grad_norm": 0.9177160415839951, + "learning_rate": 7.569716270261693e-06, + "loss": 0.2072, + "step": 91970 + }, + { + "epoch": 1.1862429631731333, + "grad_norm": 0.782084657231375, + "learning_rate": 7.569072613484909e-06, + "loss": 0.1986, + "step": 91980 + }, + { + "epoch": 1.1863719313631294, + "grad_norm": 0.8381935362867535, + "learning_rate": 7.568428898857314e-06, + "loss": 0.2183, + "step": 91990 + }, + { + "epoch": 1.1865008995531252, + "grad_norm": 0.8471037751194335, + "learning_rate": 7.567785126393403e-06, + "loss": 0.2166, + "step": 92000 + }, + { + "epoch": 1.186629867743121, + "grad_norm": 0.7934980767653307, + "learning_rate": 7.567141296107673e-06, + "loss": 0.1979, + "step": 92010 + }, + { + "epoch": 1.1867588359331172, + "grad_norm": 0.8328916103810655, + "learning_rate": 7.566497408014622e-06, + "loss": 0.1986, + "step": 92020 + }, + { + "epoch": 1.186887804123113, + "grad_norm": 0.8613647597342552, + "learning_rate": 7.56585346212875e-06, + "loss": 0.1892, + "step": 92030 + }, + { + "epoch": 1.1870167723131089, + "grad_norm": 0.9197015912841368, + "learning_rate": 7.565209458464556e-06, + "loss": 0.2016, + "step": 92040 + }, + { + "epoch": 1.187145740503105, + "grad_norm": 0.7920780096181796, + "learning_rate": 7.564565397036543e-06, + "loss": 0.2085, + "step": 92050 + }, + { + "epoch": 1.1872747086931008, + "grad_norm": 0.8438558672152735, + "learning_rate": 7.563921277859214e-06, + "loss": 0.2058, + "step": 92060 + }, + { + "epoch": 1.187403676883097, + "grad_norm": 0.9498535134703198, + "learning_rate": 7.563277100947072e-06, + "loss": 0.2101, + "step": 92070 + }, + { + "epoch": 1.1875326450730928, + "grad_norm": 0.8578204309845916, + "learning_rate": 7.562632866314624e-06, + "loss": 0.2171, + "step": 92080 + }, + { + "epoch": 1.1876616132630886, + "grad_norm": 0.7957560415042734, + "learning_rate": 7.561988573976378e-06, + "loss": 0.1998, + "step": 92090 + }, + { + "epoch": 1.1877905814530847, + "grad_norm": 0.7121370468644647, + "learning_rate": 7.561344223946838e-06, + "loss": 0.1953, + "step": 92100 + }, + { + "epoch": 1.1879195496430806, + "grad_norm": 0.8308569474413625, + "learning_rate": 7.560699816240519e-06, + "loss": 0.2047, + "step": 92110 + }, + { + "epoch": 1.1880485178330764, + "grad_norm": 0.7898509597118719, + "learning_rate": 7.560055350871929e-06, + "loss": 0.2063, + "step": 92120 + }, + { + "epoch": 1.1881774860230725, + "grad_norm": 0.8211667022717272, + "learning_rate": 7.55941082785558e-06, + "loss": 0.2043, + "step": 92130 + }, + { + "epoch": 1.1883064542130684, + "grad_norm": 0.7876982836841627, + "learning_rate": 7.558766247205986e-06, + "loss": 0.207, + "step": 92140 + }, + { + "epoch": 1.1884354224030642, + "grad_norm": 0.8053484704083353, + "learning_rate": 7.558121608937663e-06, + "loss": 0.2072, + "step": 92150 + }, + { + "epoch": 1.1885643905930603, + "grad_norm": 0.8845630754023023, + "learning_rate": 7.557476913065125e-06, + "loss": 0.2042, + "step": 92160 + }, + { + "epoch": 1.1886933587830562, + "grad_norm": 0.8214932086649763, + "learning_rate": 7.556832159602889e-06, + "loss": 0.2079, + "step": 92170 + }, + { + "epoch": 1.188822326973052, + "grad_norm": 0.9117513423916166, + "learning_rate": 7.556187348565478e-06, + "loss": 0.2006, + "step": 92180 + }, + { + "epoch": 1.188951295163048, + "grad_norm": 0.8445268902492903, + "learning_rate": 7.555542479967406e-06, + "loss": 0.2045, + "step": 92190 + }, + { + "epoch": 1.189080263353044, + "grad_norm": 0.944351258128304, + "learning_rate": 7.554897553823199e-06, + "loss": 0.2141, + "step": 92200 + }, + { + "epoch": 1.1892092315430398, + "grad_norm": 0.8646139539492783, + "learning_rate": 7.554252570147378e-06, + "loss": 0.2236, + "step": 92210 + }, + { + "epoch": 1.189338199733036, + "grad_norm": 0.9133189569158239, + "learning_rate": 7.553607528954465e-06, + "loss": 0.2037, + "step": 92220 + }, + { + "epoch": 1.1894671679230318, + "grad_norm": 0.7798447441752857, + "learning_rate": 7.552962430258988e-06, + "loss": 0.2005, + "step": 92230 + }, + { + "epoch": 1.1895961361130278, + "grad_norm": 0.8471475814683287, + "learning_rate": 7.552317274075471e-06, + "loss": 0.1956, + "step": 92240 + }, + { + "epoch": 1.1897251043030237, + "grad_norm": 0.8559684062232713, + "learning_rate": 7.5516720604184435e-06, + "loss": 0.2096, + "step": 92250 + }, + { + "epoch": 1.1898540724930196, + "grad_norm": 0.8420769985683549, + "learning_rate": 7.551026789302434e-06, + "loss": 0.2152, + "step": 92260 + }, + { + "epoch": 1.1899830406830154, + "grad_norm": 0.7529101435730944, + "learning_rate": 7.550381460741974e-06, + "loss": 0.2073, + "step": 92270 + }, + { + "epoch": 1.1901120088730115, + "grad_norm": 0.7173535814050499, + "learning_rate": 7.549736074751591e-06, + "loss": 0.2029, + "step": 92280 + }, + { + "epoch": 1.1902409770630074, + "grad_norm": 0.7733555713620086, + "learning_rate": 7.549090631345823e-06, + "loss": 0.2045, + "step": 92290 + }, + { + "epoch": 1.1903699452530034, + "grad_norm": 0.802191545848666, + "learning_rate": 7.548445130539201e-06, + "loss": 0.1907, + "step": 92300 + }, + { + "epoch": 1.1904989134429993, + "grad_norm": 0.9292471478300355, + "learning_rate": 7.547799572346262e-06, + "loss": 0.2111, + "step": 92310 + }, + { + "epoch": 1.1906278816329952, + "grad_norm": 0.7921251586544501, + "learning_rate": 7.5471539567815435e-06, + "loss": 0.2056, + "step": 92320 + }, + { + "epoch": 1.1907568498229912, + "grad_norm": 0.8306485273093515, + "learning_rate": 7.54650828385958e-06, + "loss": 0.1969, + "step": 92330 + }, + { + "epoch": 1.190885818012987, + "grad_norm": 0.921920780817895, + "learning_rate": 7.545862553594914e-06, + "loss": 0.2047, + "step": 92340 + }, + { + "epoch": 1.191014786202983, + "grad_norm": 0.8487884324721238, + "learning_rate": 7.545216766002086e-06, + "loss": 0.2074, + "step": 92350 + }, + { + "epoch": 1.191143754392979, + "grad_norm": 0.836215278988711, + "learning_rate": 7.544570921095636e-06, + "loss": 0.2088, + "step": 92360 + }, + { + "epoch": 1.191272722582975, + "grad_norm": 0.8378347378515404, + "learning_rate": 7.54392501889011e-06, + "loss": 0.2059, + "step": 92370 + }, + { + "epoch": 1.1914016907729708, + "grad_norm": 0.8125755183731314, + "learning_rate": 7.543279059400052e-06, + "loss": 0.2178, + "step": 92380 + }, + { + "epoch": 1.1915306589629668, + "grad_norm": 0.8184318841396361, + "learning_rate": 7.542633042640005e-06, + "loss": 0.2049, + "step": 92390 + }, + { + "epoch": 1.1916596271529627, + "grad_norm": 0.9047550125318855, + "learning_rate": 7.541986968624519e-06, + "loss": 0.2139, + "step": 92400 + }, + { + "epoch": 1.1917885953429588, + "grad_norm": 0.8225079689264425, + "learning_rate": 7.541340837368141e-06, + "loss": 0.2062, + "step": 92410 + }, + { + "epoch": 1.1919175635329546, + "grad_norm": 0.8327437609473003, + "learning_rate": 7.540694648885422e-06, + "loss": 0.1959, + "step": 92420 + }, + { + "epoch": 1.1920465317229505, + "grad_norm": 0.8963055273070509, + "learning_rate": 7.540048403190911e-06, + "loss": 0.2186, + "step": 92430 + }, + { + "epoch": 1.1921754999129464, + "grad_norm": 0.8478289805864105, + "learning_rate": 7.5394021002991625e-06, + "loss": 0.2034, + "step": 92440 + }, + { + "epoch": 1.1923044681029424, + "grad_norm": 0.8434472935328692, + "learning_rate": 7.538755740224728e-06, + "loss": 0.2063, + "step": 92450 + }, + { + "epoch": 1.1924334362929383, + "grad_norm": 0.8770649169800132, + "learning_rate": 7.538109322982163e-06, + "loss": 0.2003, + "step": 92460 + }, + { + "epoch": 1.1925624044829344, + "grad_norm": 0.8819957858034775, + "learning_rate": 7.537462848586025e-06, + "loss": 0.2089, + "step": 92470 + }, + { + "epoch": 1.1926913726729302, + "grad_norm": 0.8487069719883635, + "learning_rate": 7.5368163170508705e-06, + "loss": 0.2035, + "step": 92480 + }, + { + "epoch": 1.192820340862926, + "grad_norm": 0.8175242819461039, + "learning_rate": 7.536169728391258e-06, + "loss": 0.2039, + "step": 92490 + }, + { + "epoch": 1.1929493090529222, + "grad_norm": 0.816436602389567, + "learning_rate": 7.535523082621747e-06, + "loss": 0.2077, + "step": 92500 + }, + { + "epoch": 1.193078277242918, + "grad_norm": 0.8879111411733098, + "learning_rate": 7.534876379756899e-06, + "loss": 0.2159, + "step": 92510 + }, + { + "epoch": 1.193207245432914, + "grad_norm": 0.8663401760902727, + "learning_rate": 7.534229619811278e-06, + "loss": 0.2044, + "step": 92520 + }, + { + "epoch": 1.19333621362291, + "grad_norm": 0.8332062776248375, + "learning_rate": 7.533582802799446e-06, + "loss": 0.2116, + "step": 92530 + }, + { + "epoch": 1.1934651818129058, + "grad_norm": 0.8027610681553551, + "learning_rate": 7.532935928735969e-06, + "loss": 0.202, + "step": 92540 + }, + { + "epoch": 1.1935941500029017, + "grad_norm": 0.9240170501337809, + "learning_rate": 7.532288997635414e-06, + "loss": 0.2119, + "step": 92550 + }, + { + "epoch": 1.1937231181928978, + "grad_norm": 0.8000394397093118, + "learning_rate": 7.531642009512348e-06, + "loss": 0.2015, + "step": 92560 + }, + { + "epoch": 1.1938520863828936, + "grad_norm": 0.8792904122202981, + "learning_rate": 7.530994964381337e-06, + "loss": 0.206, + "step": 92570 + }, + { + "epoch": 1.1939810545728895, + "grad_norm": 0.7709242299272666, + "learning_rate": 7.530347862256957e-06, + "loss": 0.2023, + "step": 92580 + }, + { + "epoch": 1.1941100227628856, + "grad_norm": 0.8486651600708504, + "learning_rate": 7.529700703153779e-06, + "loss": 0.1956, + "step": 92590 + }, + { + "epoch": 1.1942389909528814, + "grad_norm": 0.7483792767008802, + "learning_rate": 7.529053487086371e-06, + "loss": 0.21, + "step": 92600 + }, + { + "epoch": 1.1943679591428773, + "grad_norm": 0.8299166201520342, + "learning_rate": 7.528406214069309e-06, + "loss": 0.2122, + "step": 92610 + }, + { + "epoch": 1.1944969273328734, + "grad_norm": 0.8314213386555014, + "learning_rate": 7.52775888411717e-06, + "loss": 0.2037, + "step": 92620 + }, + { + "epoch": 1.1946258955228692, + "grad_norm": 0.731886020322836, + "learning_rate": 7.52711149724453e-06, + "loss": 0.2, + "step": 92630 + }, + { + "epoch": 1.1947548637128653, + "grad_norm": 0.8536936498246218, + "learning_rate": 7.526464053465967e-06, + "loss": 0.1997, + "step": 92640 + }, + { + "epoch": 1.1948838319028612, + "grad_norm": 0.8099396418325994, + "learning_rate": 7.525816552796061e-06, + "loss": 0.2086, + "step": 92650 + }, + { + "epoch": 1.195012800092857, + "grad_norm": 0.9516116628360908, + "learning_rate": 7.52516899524939e-06, + "loss": 0.2087, + "step": 92660 + }, + { + "epoch": 1.1951417682828531, + "grad_norm": 0.8060873400828943, + "learning_rate": 7.524521380840537e-06, + "loss": 0.2015, + "step": 92670 + }, + { + "epoch": 1.195270736472849, + "grad_norm": 0.7260266185160659, + "learning_rate": 7.523873709584087e-06, + "loss": 0.2025, + "step": 92680 + }, + { + "epoch": 1.1953997046628448, + "grad_norm": 0.8737876707057739, + "learning_rate": 7.523225981494623e-06, + "loss": 0.197, + "step": 92690 + }, + { + "epoch": 1.195528672852841, + "grad_norm": 0.7792297990135817, + "learning_rate": 7.52257819658673e-06, + "loss": 0.1937, + "step": 92700 + }, + { + "epoch": 1.1956576410428368, + "grad_norm": 0.8772519533943938, + "learning_rate": 7.521930354874996e-06, + "loss": 0.2102, + "step": 92710 + }, + { + "epoch": 1.1957866092328326, + "grad_norm": 0.8393353888906033, + "learning_rate": 7.5212824563740084e-06, + "loss": 0.2108, + "step": 92720 + }, + { + "epoch": 1.1959155774228287, + "grad_norm": 0.8058451482818336, + "learning_rate": 7.520634501098356e-06, + "loss": 0.2112, + "step": 92730 + }, + { + "epoch": 1.1960445456128246, + "grad_norm": 0.8183007988144495, + "learning_rate": 7.519986489062632e-06, + "loss": 0.1891, + "step": 92740 + }, + { + "epoch": 1.1961735138028204, + "grad_norm": 0.8595975346362087, + "learning_rate": 7.519338420281426e-06, + "loss": 0.216, + "step": 92750 + }, + { + "epoch": 1.1963024819928165, + "grad_norm": 0.7985788507210686, + "learning_rate": 7.518690294769333e-06, + "loss": 0.201, + "step": 92760 + }, + { + "epoch": 1.1964314501828124, + "grad_norm": 0.7787903773312846, + "learning_rate": 7.518042112540947e-06, + "loss": 0.2082, + "step": 92770 + }, + { + "epoch": 1.1965604183728082, + "grad_norm": 0.8956998993992402, + "learning_rate": 7.517393873610862e-06, + "loss": 0.215, + "step": 92780 + }, + { + "epoch": 1.1966893865628043, + "grad_norm": 0.7929752309487528, + "learning_rate": 7.516745577993678e-06, + "loss": 0.2049, + "step": 92790 + }, + { + "epoch": 1.1968183547528002, + "grad_norm": 0.8594206001251155, + "learning_rate": 7.516097225703993e-06, + "loss": 0.2156, + "step": 92800 + }, + { + "epoch": 1.1969473229427963, + "grad_norm": 0.8543820465570167, + "learning_rate": 7.5154488167564065e-06, + "loss": 0.2019, + "step": 92810 + }, + { + "epoch": 1.1970762911327921, + "grad_norm": 0.7947098838236916, + "learning_rate": 7.514800351165517e-06, + "loss": 0.2069, + "step": 92820 + }, + { + "epoch": 1.197205259322788, + "grad_norm": 0.8214181934368221, + "learning_rate": 7.51415182894593e-06, + "loss": 0.2055, + "step": 92830 + }, + { + "epoch": 1.197334227512784, + "grad_norm": 0.8035256395029866, + "learning_rate": 7.513503250112249e-06, + "loss": 0.1951, + "step": 92840 + }, + { + "epoch": 1.19746319570278, + "grad_norm": 0.8712477992201704, + "learning_rate": 7.512854614679075e-06, + "loss": 0.2036, + "step": 92850 + }, + { + "epoch": 1.1975921638927758, + "grad_norm": 0.8448877851445117, + "learning_rate": 7.5122059226610185e-06, + "loss": 0.2012, + "step": 92860 + }, + { + "epoch": 1.1977211320827719, + "grad_norm": 0.825806391285683, + "learning_rate": 7.511557174072686e-06, + "loss": 0.2016, + "step": 92870 + }, + { + "epoch": 1.1978501002727677, + "grad_norm": 0.8957126178574788, + "learning_rate": 7.510908368928683e-06, + "loss": 0.2192, + "step": 92880 + }, + { + "epoch": 1.1979790684627636, + "grad_norm": 0.7958565822072478, + "learning_rate": 7.5102595072436224e-06, + "loss": 0.209, + "step": 92890 + }, + { + "epoch": 1.1981080366527597, + "grad_norm": 0.8021162237195552, + "learning_rate": 7.509610589032115e-06, + "loss": 0.1923, + "step": 92900 + }, + { + "epoch": 1.1982370048427555, + "grad_norm": 0.7772460448049154, + "learning_rate": 7.508961614308773e-06, + "loss": 0.2016, + "step": 92910 + }, + { + "epoch": 1.1983659730327514, + "grad_norm": 0.7902480415738674, + "learning_rate": 7.508312583088211e-06, + "loss": 0.2061, + "step": 92920 + }, + { + "epoch": 1.1984949412227475, + "grad_norm": 0.9487815265344876, + "learning_rate": 7.50766349538504e-06, + "loss": 0.2195, + "step": 92930 + }, + { + "epoch": 1.1986239094127433, + "grad_norm": 0.8112863762568657, + "learning_rate": 7.507014351213881e-06, + "loss": 0.2111, + "step": 92940 + }, + { + "epoch": 1.1987528776027392, + "grad_norm": 0.7973855286490279, + "learning_rate": 7.50636515058935e-06, + "loss": 0.2042, + "step": 92950 + }, + { + "epoch": 1.1988818457927353, + "grad_norm": 0.8174248380911946, + "learning_rate": 7.505715893526066e-06, + "loss": 0.2048, + "step": 92960 + }, + { + "epoch": 1.1990108139827311, + "grad_norm": 0.8674550961748214, + "learning_rate": 7.5050665800386466e-06, + "loss": 0.2025, + "step": 92970 + }, + { + "epoch": 1.1991397821727272, + "grad_norm": 0.7934626344249764, + "learning_rate": 7.504417210141717e-06, + "loss": 0.2004, + "step": 92980 + }, + { + "epoch": 1.199268750362723, + "grad_norm": 0.8274425428139734, + "learning_rate": 7.503767783849898e-06, + "loss": 0.2093, + "step": 92990 + }, + { + "epoch": 1.199397718552719, + "grad_norm": 0.8271613278787738, + "learning_rate": 7.503118301177812e-06, + "loss": 0.2071, + "step": 93000 + }, + { + "epoch": 1.1995266867427148, + "grad_norm": 0.7774663705501322, + "learning_rate": 7.502468762140087e-06, + "loss": 0.1962, + "step": 93010 + }, + { + "epoch": 1.1996556549327109, + "grad_norm": 0.9234350525814508, + "learning_rate": 7.501819166751348e-06, + "loss": 0.2013, + "step": 93020 + }, + { + "epoch": 1.1997846231227067, + "grad_norm": 0.9013009386813539, + "learning_rate": 7.501169515026223e-06, + "loss": 0.209, + "step": 93030 + }, + { + "epoch": 1.1999135913127028, + "grad_norm": 0.8094858819096226, + "learning_rate": 7.500519806979341e-06, + "loss": 0.2005, + "step": 93040 + } + ], + "logging_steps": 10, + "max_steps": 232617, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 23262, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3264766508466176e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}