{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.619795885306982, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008099789405475458, "grad_norm": 8.217827796936035, "learning_rate": 5.000000000000001e-07, "loss": 1.6162, "step": 25 }, { "epoch": 0.016199578810950917, "grad_norm": 5.221391677856445, "learning_rate": 1.0000000000000002e-06, "loss": 1.3426, "step": 50 }, { "epoch": 0.02429936821642637, "grad_norm": 6.4570441246032715, "learning_rate": 1.5e-06, "loss": 1.0591, "step": 75 }, { "epoch": 0.03239915762190183, "grad_norm": 4.746372699737549, "learning_rate": 2.0000000000000003e-06, "loss": 0.9419, "step": 100 }, { "epoch": 0.04049894702737729, "grad_norm": 5.625121593475342, "learning_rate": 2.5e-06, "loss": 0.8389, "step": 125 }, { "epoch": 0.04859873643285274, "grad_norm": 6.477604866027832, "learning_rate": 3e-06, "loss": 0.8019, "step": 150 }, { "epoch": 0.056698525838328205, "grad_norm": 4.935534954071045, "learning_rate": 3.5e-06, "loss": 0.8007, "step": 175 }, { "epoch": 0.06479831524380367, "grad_norm": 5.263591289520264, "learning_rate": 4.000000000000001e-06, "loss": 0.7419, "step": 200 }, { "epoch": 0.07289810464927912, "grad_norm": 5.378949165344238, "learning_rate": 4.5e-06, "loss": 0.7391, "step": 225 }, { "epoch": 0.08099789405475458, "grad_norm": 5.626790523529053, "learning_rate": 5e-06, "loss": 0.7243, "step": 250 }, { "epoch": 0.08909768346023003, "grad_norm": 5.457970142364502, "learning_rate": 5.500000000000001e-06, "loss": 0.7273, "step": 275 }, { "epoch": 0.09719747286570549, "grad_norm": 4.948482036590576, "learning_rate": 6e-06, "loss": 0.7105, "step": 300 }, { "epoch": 0.10529726227118096, "grad_norm": 4.352142333984375, "learning_rate": 6.5000000000000004e-06, "loss": 0.7075, "step": 325 }, { "epoch": 0.11339705167665641, "grad_norm": 4.84460973739624, "learning_rate": 7e-06, "loss": 0.6808, "step": 350 }, { "epoch": 0.12149684108213187, "grad_norm": 5.333044052124023, "learning_rate": 7.500000000000001e-06, "loss": 0.6855, "step": 375 }, { "epoch": 0.12959663048760733, "grad_norm": 5.835122585296631, "learning_rate": 8.000000000000001e-06, "loss": 0.642, "step": 400 }, { "epoch": 0.1376964198930828, "grad_norm": 6.060296058654785, "learning_rate": 8.5e-06, "loss": 0.6443, "step": 425 }, { "epoch": 0.14579620929855824, "grad_norm": 4.375275135040283, "learning_rate": 9e-06, "loss": 0.6477, "step": 450 }, { "epoch": 0.1538959987040337, "grad_norm": 4.223667144775391, "learning_rate": 9.5e-06, "loss": 0.6369, "step": 475 }, { "epoch": 0.16199578810950915, "grad_norm": 10.034436225891113, "learning_rate": 1e-05, "loss": 0.6853, "step": 500 }, { "epoch": 0.1700955775149846, "grad_norm": 4.590435028076172, "learning_rate": 9.944444444444445e-06, "loss": 0.6149, "step": 525 }, { "epoch": 0.17819536692046006, "grad_norm": 4.104828357696533, "learning_rate": 9.88888888888889e-06, "loss": 0.6267, "step": 550 }, { "epoch": 0.18629515632593552, "grad_norm": 3.9677858352661133, "learning_rate": 9.833333333333333e-06, "loss": 0.5788, "step": 575 }, { "epoch": 0.19439494573141097, "grad_norm": 3.9867312908172607, "learning_rate": 9.777777777777779e-06, "loss": 0.616, "step": 600 }, { "epoch": 0.20249473513688643, "grad_norm": 4.993474960327148, "learning_rate": 9.722222222222223e-06, "loss": 0.6151, "step": 625 }, { "epoch": 0.2105945245423619, "grad_norm": 4.049670219421387, "learning_rate": 9.666666666666667e-06, "loss": 0.5898, "step": 650 }, { "epoch": 0.21869431394783737, "grad_norm": 3.711517095565796, "learning_rate": 9.611111111111112e-06, "loss": 0.5654, "step": 675 }, { "epoch": 0.22679410335331282, "grad_norm": 4.081414222717285, "learning_rate": 9.555555555555556e-06, "loss": 0.5608, "step": 700 }, { "epoch": 0.23489389275878828, "grad_norm": 4.10880708694458, "learning_rate": 9.5e-06, "loss": 0.5576, "step": 725 }, { "epoch": 0.24299368216426373, "grad_norm": 3.764904737472534, "learning_rate": 9.444444444444445e-06, "loss": 0.5249, "step": 750 }, { "epoch": 0.2510934715697392, "grad_norm": 3.7500803470611572, "learning_rate": 9.38888888888889e-06, "loss": 0.5724, "step": 775 }, { "epoch": 0.25919326097521467, "grad_norm": 4.319125175476074, "learning_rate": 9.333333333333334e-06, "loss": 0.5368, "step": 800 }, { "epoch": 0.2672930503806901, "grad_norm": 3.590097665786743, "learning_rate": 9.277777777777778e-06, "loss": 0.5494, "step": 825 }, { "epoch": 0.2753928397861656, "grad_norm": 5.081935405731201, "learning_rate": 9.222222222222224e-06, "loss": 0.5443, "step": 850 }, { "epoch": 0.28349262919164103, "grad_norm": 3.8603386878967285, "learning_rate": 9.166666666666666e-06, "loss": 0.5378, "step": 875 }, { "epoch": 0.2915924185971165, "grad_norm": 3.7635509967803955, "learning_rate": 9.111111111111112e-06, "loss": 0.5121, "step": 900 }, { "epoch": 0.29969220800259194, "grad_norm": 4.4741692543029785, "learning_rate": 9.055555555555556e-06, "loss": 0.5313, "step": 925 }, { "epoch": 0.3077919974080674, "grad_norm": 4.126248836517334, "learning_rate": 9e-06, "loss": 0.5036, "step": 950 }, { "epoch": 0.31589178681354285, "grad_norm": 3.2901930809020996, "learning_rate": 8.944444444444446e-06, "loss": 0.537, "step": 975 }, { "epoch": 0.3239915762190183, "grad_norm": 4.152955532073975, "learning_rate": 8.888888888888888e-06, "loss": 0.5304, "step": 1000 }, { "epoch": 0.3239915762190183, "eval_loss": 0.5235934853553772, "eval_runtime": 2073.7226, "eval_samples_per_second": 1.879, "eval_steps_per_second": 0.118, "eval_wer": 0.3785823799976412, "step": 1000 }, { "epoch": 0.33209136562449376, "grad_norm": 3.5854649543762207, "learning_rate": 8.833333333333334e-06, "loss": 0.5265, "step": 1025 }, { "epoch": 0.3401911550299692, "grad_norm": 3.5874733924865723, "learning_rate": 8.777777777777778e-06, "loss": 0.4957, "step": 1050 }, { "epoch": 0.34829094443544467, "grad_norm": 4.2191338539123535, "learning_rate": 8.722222222222224e-06, "loss": 0.4892, "step": 1075 }, { "epoch": 0.3563907338409201, "grad_norm": 4.080557823181152, "learning_rate": 8.666666666666668e-06, "loss": 0.4992, "step": 1100 }, { "epoch": 0.3644905232463956, "grad_norm": 3.3749568462371826, "learning_rate": 8.611111111111112e-06, "loss": 0.4999, "step": 1125 }, { "epoch": 0.37259031265187104, "grad_norm": 4.65897798538208, "learning_rate": 8.555555555555556e-06, "loss": 0.4893, "step": 1150 }, { "epoch": 0.3806901020573465, "grad_norm": 4.345097541809082, "learning_rate": 8.5e-06, "loss": 0.5078, "step": 1175 }, { "epoch": 0.38878989146282195, "grad_norm": 3.885808229446411, "learning_rate": 8.444444444444446e-06, "loss": 0.4683, "step": 1200 }, { "epoch": 0.3968896808682974, "grad_norm": 3.44722318649292, "learning_rate": 8.38888888888889e-06, "loss": 0.488, "step": 1225 }, { "epoch": 0.40498947027377286, "grad_norm": 3.6947743892669678, "learning_rate": 8.333333333333334e-06, "loss": 0.4743, "step": 1250 }, { "epoch": 0.41308925967924837, "grad_norm": 4.338139533996582, "learning_rate": 8.277777777777778e-06, "loss": 0.4913, "step": 1275 }, { "epoch": 0.4211890490847238, "grad_norm": 3.298166513442993, "learning_rate": 8.222222222222222e-06, "loss": 0.4865, "step": 1300 }, { "epoch": 0.4292888384901993, "grad_norm": 3.604741334915161, "learning_rate": 8.166666666666668e-06, "loss": 0.4826, "step": 1325 }, { "epoch": 0.43738862789567473, "grad_norm": 3.3499221801757812, "learning_rate": 8.111111111111112e-06, "loss": 0.4722, "step": 1350 }, { "epoch": 0.4454884173011502, "grad_norm": 3.690180778503418, "learning_rate": 8.055555555555557e-06, "loss": 0.4911, "step": 1375 }, { "epoch": 0.45358820670662564, "grad_norm": 2.948256492614746, "learning_rate": 8.000000000000001e-06, "loss": 0.4757, "step": 1400 }, { "epoch": 0.4616879961121011, "grad_norm": 3.8048160076141357, "learning_rate": 7.944444444444445e-06, "loss": 0.4637, "step": 1425 }, { "epoch": 0.46978778551757655, "grad_norm": 3.5490314960479736, "learning_rate": 7.88888888888889e-06, "loss": 0.4889, "step": 1450 }, { "epoch": 0.477887574923052, "grad_norm": 5.1668009757995605, "learning_rate": 7.833333333333333e-06, "loss": 0.4595, "step": 1475 }, { "epoch": 0.48598736432852746, "grad_norm": 3.1844429969787598, "learning_rate": 7.77777777777778e-06, "loss": 0.4799, "step": 1500 }, { "epoch": 0.4940871537340029, "grad_norm": 3.055802583694458, "learning_rate": 7.722222222222223e-06, "loss": 0.4701, "step": 1525 }, { "epoch": 0.5021869431394784, "grad_norm": 3.7342355251312256, "learning_rate": 7.666666666666667e-06, "loss": 0.4866, "step": 1550 }, { "epoch": 0.5102867325449538, "grad_norm": 3.3651347160339355, "learning_rate": 7.611111111111111e-06, "loss": 0.4462, "step": 1575 }, { "epoch": 0.5183865219504293, "grad_norm": 3.7382657527923584, "learning_rate": 7.555555555555556e-06, "loss": 0.4491, "step": 1600 }, { "epoch": 0.5264863113559047, "grad_norm": 3.2548789978027344, "learning_rate": 7.500000000000001e-06, "loss": 0.4604, "step": 1625 }, { "epoch": 0.5345861007613802, "grad_norm": 3.0430402755737305, "learning_rate": 7.444444444444445e-06, "loss": 0.4354, "step": 1650 }, { "epoch": 0.5426858901668556, "grad_norm": 3.766554832458496, "learning_rate": 7.38888888888889e-06, "loss": 0.465, "step": 1675 }, { "epoch": 0.5507856795723312, "grad_norm": 3.198700428009033, "learning_rate": 7.333333333333333e-06, "loss": 0.4695, "step": 1700 }, { "epoch": 0.5588854689778066, "grad_norm": 2.7915539741516113, "learning_rate": 7.277777777777778e-06, "loss": 0.4511, "step": 1725 }, { "epoch": 0.5669852583832821, "grad_norm": 3.3938117027282715, "learning_rate": 7.222222222222223e-06, "loss": 0.4266, "step": 1750 }, { "epoch": 0.5750850477887575, "grad_norm": 3.4623613357543945, "learning_rate": 7.166666666666667e-06, "loss": 0.4483, "step": 1775 }, { "epoch": 0.583184837194233, "grad_norm": 2.7853918075561523, "learning_rate": 7.111111111111112e-06, "loss": 0.4197, "step": 1800 }, { "epoch": 0.5912846265997084, "grad_norm": 3.4827404022216797, "learning_rate": 7.055555555555557e-06, "loss": 0.4319, "step": 1825 }, { "epoch": 0.5993844160051839, "grad_norm": 3.458853244781494, "learning_rate": 7e-06, "loss": 0.4301, "step": 1850 }, { "epoch": 0.6074842054106593, "grad_norm": 3.726768970489502, "learning_rate": 6.944444444444445e-06, "loss": 0.4339, "step": 1875 }, { "epoch": 0.6155839948161348, "grad_norm": 3.7597055435180664, "learning_rate": 6.88888888888889e-06, "loss": 0.4265, "step": 1900 }, { "epoch": 0.6236837842216102, "grad_norm": 3.6740312576293945, "learning_rate": 6.833333333333334e-06, "loss": 0.4302, "step": 1925 }, { "epoch": 0.6317835736270857, "grad_norm": 3.0535056591033936, "learning_rate": 6.777777777777779e-06, "loss": 0.4336, "step": 1950 }, { "epoch": 0.6398833630325611, "grad_norm": 3.295497417449951, "learning_rate": 6.7222222222222235e-06, "loss": 0.4526, "step": 1975 }, { "epoch": 0.6479831524380366, "grad_norm": 3.080634832382202, "learning_rate": 6.666666666666667e-06, "loss": 0.4409, "step": 2000 }, { "epoch": 0.6479831524380366, "eval_loss": 0.4457571506500244, "eval_runtime": 1994.8072, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.122, "eval_wer": 0.35629201556787354, "step": 2000 }, { "epoch": 0.6560829418435121, "grad_norm": 3.1193337440490723, "learning_rate": 6.6111111111111115e-06, "loss": 0.4111, "step": 2025 }, { "epoch": 0.6641827312489875, "grad_norm": 3.554410934448242, "learning_rate": 6.555555555555556e-06, "loss": 0.4192, "step": 2050 }, { "epoch": 0.672282520654463, "grad_norm": 4.468347549438477, "learning_rate": 6.5000000000000004e-06, "loss": 0.4329, "step": 2075 }, { "epoch": 0.6803823100599384, "grad_norm": 3.5781023502349854, "learning_rate": 6.444444444444445e-06, "loss": 0.4191, "step": 2100 }, { "epoch": 0.6884820994654139, "grad_norm": 3.257659912109375, "learning_rate": 6.3888888888888885e-06, "loss": 0.4046, "step": 2125 }, { "epoch": 0.6965818888708893, "grad_norm": 3.030454397201538, "learning_rate": 6.333333333333333e-06, "loss": 0.4033, "step": 2150 }, { "epoch": 0.7046816782763649, "grad_norm": 3.3289663791656494, "learning_rate": 6.277777777777778e-06, "loss": 0.4342, "step": 2175 }, { "epoch": 0.7127814676818403, "grad_norm": 3.4743051528930664, "learning_rate": 6.222222222222223e-06, "loss": 0.4102, "step": 2200 }, { "epoch": 0.7208812570873158, "grad_norm": 3.300485610961914, "learning_rate": 6.166666666666667e-06, "loss": 0.3951, "step": 2225 }, { "epoch": 0.7289810464927912, "grad_norm": 3.374356746673584, "learning_rate": 6.111111111111112e-06, "loss": 0.3993, "step": 2250 }, { "epoch": 0.7370808358982667, "grad_norm": 3.108508586883545, "learning_rate": 6.055555555555555e-06, "loss": 0.3927, "step": 2275 }, { "epoch": 0.7451806253037421, "grad_norm": 3.448174238204956, "learning_rate": 6e-06, "loss": 0.4254, "step": 2300 }, { "epoch": 0.7532804147092176, "grad_norm": 3.17012095451355, "learning_rate": 5.944444444444445e-06, "loss": 0.396, "step": 2325 }, { "epoch": 0.761380204114693, "grad_norm": 3.3391034603118896, "learning_rate": 5.88888888888889e-06, "loss": 0.4035, "step": 2350 }, { "epoch": 0.7694799935201685, "grad_norm": 3.1390321254730225, "learning_rate": 5.833333333333334e-06, "loss": 0.4387, "step": 2375 }, { "epoch": 0.7775797829256439, "grad_norm": 2.801060199737549, "learning_rate": 5.78e-06, "loss": 0.4101, "step": 2400 }, { "epoch": 0.7856795723311194, "grad_norm": 3.169654130935669, "learning_rate": 5.724444444444445e-06, "loss": 0.4019, "step": 2425 }, { "epoch": 0.7937793617365948, "grad_norm": 3.1517333984375, "learning_rate": 5.6688888888888895e-06, "loss": 0.3955, "step": 2450 }, { "epoch": 0.8018791511420703, "grad_norm": 2.812309980392456, "learning_rate": 5.613333333333334e-06, "loss": 0.3976, "step": 2475 }, { "epoch": 0.8099789405475457, "grad_norm": 3.095435619354248, "learning_rate": 5.557777777777778e-06, "loss": 0.4173, "step": 2500 }, { "epoch": 0.8180787299530212, "grad_norm": 3.530505418777466, "learning_rate": 5.5022222222222224e-06, "loss": 0.3968, "step": 2525 }, { "epoch": 0.8261785193584967, "grad_norm": 3.6718244552612305, "learning_rate": 5.4466666666666665e-06, "loss": 0.4365, "step": 2550 }, { "epoch": 0.8342783087639721, "grad_norm": 3.455793857574463, "learning_rate": 5.391111111111111e-06, "loss": 0.3995, "step": 2575 }, { "epoch": 0.8423780981694476, "grad_norm": 3.074331760406494, "learning_rate": 5.335555555555556e-06, "loss": 0.3995, "step": 2600 }, { "epoch": 0.850477887574923, "grad_norm": 3.732499361038208, "learning_rate": 5.28e-06, "loss": 0.3873, "step": 2625 }, { "epoch": 0.8585776769803986, "grad_norm": 2.719492197036743, "learning_rate": 5.224444444444445e-06, "loss": 0.3938, "step": 2650 }, { "epoch": 0.866677466385874, "grad_norm": 3.279792308807373, "learning_rate": 5.168888888888889e-06, "loss": 0.4087, "step": 2675 }, { "epoch": 0.8747772557913495, "grad_norm": 3.178786516189575, "learning_rate": 5.113333333333333e-06, "loss": 0.3771, "step": 2700 }, { "epoch": 0.8828770451968249, "grad_norm": 2.70697283744812, "learning_rate": 5.057777777777778e-06, "loss": 0.3991, "step": 2725 }, { "epoch": 0.8909768346023004, "grad_norm": 3.0300474166870117, "learning_rate": 5.002222222222223e-06, "loss": 0.4069, "step": 2750 }, { "epoch": 0.8990766240077758, "grad_norm": 4.001626491546631, "learning_rate": 4.946666666666667e-06, "loss": 0.4153, "step": 2775 }, { "epoch": 0.9071764134132513, "grad_norm": 3.1544246673583984, "learning_rate": 4.891111111111111e-06, "loss": 0.3733, "step": 2800 }, { "epoch": 0.9152762028187267, "grad_norm": 3.553840398788452, "learning_rate": 4.835555555555556e-06, "loss": 0.409, "step": 2825 }, { "epoch": 0.9233759922242022, "grad_norm": 3.6127443313598633, "learning_rate": 4.78e-06, "loss": 0.3728, "step": 2850 }, { "epoch": 0.9314757816296776, "grad_norm": 3.1611216068267822, "learning_rate": 4.724444444444445e-06, "loss": 0.3923, "step": 2875 }, { "epoch": 0.9395755710351531, "grad_norm": 3.2189152240753174, "learning_rate": 4.66888888888889e-06, "loss": 0.4179, "step": 2900 }, { "epoch": 0.9476753604406285, "grad_norm": 3.3200769424438477, "learning_rate": 4.613333333333334e-06, "loss": 0.4067, "step": 2925 }, { "epoch": 0.955775149846104, "grad_norm": 3.38618540763855, "learning_rate": 4.557777777777778e-06, "loss": 0.384, "step": 2950 }, { "epoch": 0.9638749392515794, "grad_norm": 3.5702147483825684, "learning_rate": 4.502222222222223e-06, "loss": 0.3949, "step": 2975 }, { "epoch": 0.9719747286570549, "grad_norm": 2.6733343601226807, "learning_rate": 4.446666666666667e-06, "loss": 0.3914, "step": 3000 }, { "epoch": 0.9719747286570549, "eval_loss": 0.40348634123802185, "eval_runtime": 1837.3277, "eval_samples_per_second": 2.121, "eval_steps_per_second": 0.133, "eval_wer": 0.3089751149899752, "step": 3000 }, { "epoch": 0.9800745180625303, "grad_norm": 3.1163229942321777, "learning_rate": 4.391111111111112e-06, "loss": 0.3614, "step": 3025 }, { "epoch": 0.9881743074680058, "grad_norm": 4.919127941131592, "learning_rate": 4.3355555555555565e-06, "loss": 0.3815, "step": 3050 }, { "epoch": 0.9962740968734813, "grad_norm": 3.523918867111206, "learning_rate": 4.2800000000000005e-06, "loss": 0.3679, "step": 3075 }, { "epoch": 1.0042118904908472, "grad_norm": 2.188014268875122, "learning_rate": 4.2244444444444446e-06, "loss": 0.3305, "step": 3100 }, { "epoch": 1.0123116798963228, "grad_norm": 3.0015525817871094, "learning_rate": 4.168888888888889e-06, "loss": 0.2856, "step": 3125 }, { "epoch": 1.020411469301798, "grad_norm": 3.565284490585327, "learning_rate": 4.1133333333333335e-06, "loss": 0.296, "step": 3150 }, { "epoch": 1.0285112587072736, "grad_norm": 2.8771865367889404, "learning_rate": 4.057777777777778e-06, "loss": 0.2912, "step": 3175 }, { "epoch": 1.036611048112749, "grad_norm": 2.7181403636932373, "learning_rate": 4.002222222222222e-06, "loss": 0.2871, "step": 3200 }, { "epoch": 1.0447108375182246, "grad_norm": 2.652308225631714, "learning_rate": 3.946666666666667e-06, "loss": 0.2669, "step": 3225 }, { "epoch": 1.0528106269237, "grad_norm": 2.390458345413208, "learning_rate": 3.891111111111111e-06, "loss": 0.2852, "step": 3250 }, { "epoch": 1.0609104163291754, "grad_norm": 3.036996603012085, "learning_rate": 3.835555555555555e-06, "loss": 0.2835, "step": 3275 }, { "epoch": 1.0690102057346509, "grad_norm": 3.0076892375946045, "learning_rate": 3.7800000000000002e-06, "loss": 0.2853, "step": 3300 }, { "epoch": 1.0771099951401264, "grad_norm": 2.6001665592193604, "learning_rate": 3.724444444444445e-06, "loss": 0.2552, "step": 3325 }, { "epoch": 1.085209784545602, "grad_norm": 2.9106733798980713, "learning_rate": 3.668888888888889e-06, "loss": 0.2921, "step": 3350 }, { "epoch": 1.0933095739510772, "grad_norm": 2.579561710357666, "learning_rate": 3.6133333333333336e-06, "loss": 0.2904, "step": 3375 }, { "epoch": 1.1014093633565527, "grad_norm": 2.4961764812469482, "learning_rate": 3.5577777777777785e-06, "loss": 0.2699, "step": 3400 }, { "epoch": 1.1095091527620282, "grad_norm": 2.9800219535827637, "learning_rate": 3.5022222222222225e-06, "loss": 0.2867, "step": 3425 }, { "epoch": 1.1176089421675037, "grad_norm": 2.7929482460021973, "learning_rate": 3.446666666666667e-06, "loss": 0.2707, "step": 3450 }, { "epoch": 1.125708731572979, "grad_norm": 3.021549940109253, "learning_rate": 3.391111111111111e-06, "loss": 0.2793, "step": 3475 }, { "epoch": 1.1338085209784545, "grad_norm": 2.6613991260528564, "learning_rate": 3.335555555555556e-06, "loss": 0.2705, "step": 3500 }, { "epoch": 1.14190831038393, "grad_norm": 2.4502217769622803, "learning_rate": 3.2800000000000004e-06, "loss": 0.2776, "step": 3525 }, { "epoch": 1.1500080997894055, "grad_norm": 2.698561668395996, "learning_rate": 3.2244444444444444e-06, "loss": 0.2685, "step": 3550 }, { "epoch": 1.158107889194881, "grad_norm": 2.5799190998077393, "learning_rate": 3.1688888888888893e-06, "loss": 0.2788, "step": 3575 }, { "epoch": 1.1662076786003563, "grad_norm": 2.5856029987335205, "learning_rate": 3.1133333333333337e-06, "loss": 0.2691, "step": 3600 }, { "epoch": 1.1743074680058319, "grad_norm": 2.861074209213257, "learning_rate": 3.0577777777777778e-06, "loss": 0.2862, "step": 3625 }, { "epoch": 1.1824072574113074, "grad_norm": 2.6794049739837646, "learning_rate": 3.0022222222222227e-06, "loss": 0.269, "step": 3650 }, { "epoch": 1.1905070468167827, "grad_norm": 2.6813509464263916, "learning_rate": 2.946666666666667e-06, "loss": 0.2787, "step": 3675 }, { "epoch": 1.1986068362222582, "grad_norm": 2.5139195919036865, "learning_rate": 2.891111111111111e-06, "loss": 0.2876, "step": 3700 }, { "epoch": 1.2067066256277337, "grad_norm": 2.5221784114837646, "learning_rate": 2.835555555555556e-06, "loss": 0.2629, "step": 3725 }, { "epoch": 1.2148064150332092, "grad_norm": 2.6317663192749023, "learning_rate": 2.7800000000000005e-06, "loss": 0.2792, "step": 3750 }, { "epoch": 1.2229062044386847, "grad_norm": 3.165544271469116, "learning_rate": 2.7244444444444445e-06, "loss": 0.2798, "step": 3775 }, { "epoch": 1.23100599384416, "grad_norm": 2.9240896701812744, "learning_rate": 2.6688888888888894e-06, "loss": 0.2745, "step": 3800 }, { "epoch": 1.2391057832496355, "grad_norm": 2.7815427780151367, "learning_rate": 2.6133333333333334e-06, "loss": 0.2768, "step": 3825 }, { "epoch": 1.247205572655111, "grad_norm": 3.016592025756836, "learning_rate": 2.557777777777778e-06, "loss": 0.2735, "step": 3850 }, { "epoch": 1.2553053620605863, "grad_norm": 2.7359392642974854, "learning_rate": 2.5022222222222224e-06, "loss": 0.2757, "step": 3875 }, { "epoch": 1.2634051514660618, "grad_norm": 2.575500726699829, "learning_rate": 2.446666666666667e-06, "loss": 0.2635, "step": 3900 }, { "epoch": 1.2715049408715373, "grad_norm": 2.202298164367676, "learning_rate": 2.3911111111111113e-06, "loss": 0.2682, "step": 3925 }, { "epoch": 1.2796047302770128, "grad_norm": 3.1898550987243652, "learning_rate": 2.3355555555555557e-06, "loss": 0.2708, "step": 3950 }, { "epoch": 1.2877045196824883, "grad_norm": 2.5168306827545166, "learning_rate": 2.28e-06, "loss": 0.2639, "step": 3975 }, { "epoch": 1.2958043090879636, "grad_norm": 2.682749032974243, "learning_rate": 2.2244444444444447e-06, "loss": 0.296, "step": 4000 }, { "epoch": 1.2958043090879636, "eval_loss": 0.3868160843849182, "eval_runtime": 1836.049, "eval_samples_per_second": 2.122, "eval_steps_per_second": 0.133, "eval_wer": 0.2977002004953414, "step": 4000 }, { "epoch": 1.3039040984934391, "grad_norm": 2.805448293685913, "learning_rate": 2.168888888888889e-06, "loss": 0.2728, "step": 4025 }, { "epoch": 1.3120038878989146, "grad_norm": 2.2613837718963623, "learning_rate": 2.1133333333333336e-06, "loss": 0.2695, "step": 4050 }, { "epoch": 1.3201036773043902, "grad_norm": 2.4388859272003174, "learning_rate": 2.057777777777778e-06, "loss": 0.286, "step": 4075 }, { "epoch": 1.3282034667098657, "grad_norm": 2.807706832885742, "learning_rate": 2.0022222222222225e-06, "loss": 0.251, "step": 4100 }, { "epoch": 1.336303256115341, "grad_norm": 2.927849292755127, "learning_rate": 1.9466666666666665e-06, "loss": 0.2747, "step": 4125 }, { "epoch": 1.3444030455208165, "grad_norm": 2.2061877250671387, "learning_rate": 1.8911111111111114e-06, "loss": 0.267, "step": 4150 }, { "epoch": 1.352502834926292, "grad_norm": 2.5290238857269287, "learning_rate": 1.8355555555555557e-06, "loss": 0.2727, "step": 4175 }, { "epoch": 1.3606026243317673, "grad_norm": 2.74303936958313, "learning_rate": 1.7800000000000001e-06, "loss": 0.2726, "step": 4200 }, { "epoch": 1.3687024137372428, "grad_norm": 2.194861888885498, "learning_rate": 1.7244444444444448e-06, "loss": 0.2645, "step": 4225 }, { "epoch": 1.3768022031427183, "grad_norm": 2.4960832595825195, "learning_rate": 1.668888888888889e-06, "loss": 0.2788, "step": 4250 }, { "epoch": 1.3849019925481938, "grad_norm": 2.3262386322021484, "learning_rate": 1.6133333333333335e-06, "loss": 0.2586, "step": 4275 }, { "epoch": 1.3930017819536693, "grad_norm": 2.5423128604888916, "learning_rate": 1.5577777777777777e-06, "loss": 0.2719, "step": 4300 }, { "epoch": 1.4011015713591446, "grad_norm": 2.9188232421875, "learning_rate": 1.5022222222222224e-06, "loss": 0.2671, "step": 4325 }, { "epoch": 1.40920136076462, "grad_norm": 2.624691963195801, "learning_rate": 1.4466666666666669e-06, "loss": 0.2809, "step": 4350 }, { "epoch": 1.4173011501700956, "grad_norm": 2.674189329147339, "learning_rate": 1.3911111111111111e-06, "loss": 0.2651, "step": 4375 }, { "epoch": 1.425400939575571, "grad_norm": 2.832871913909912, "learning_rate": 1.3355555555555558e-06, "loss": 0.2676, "step": 4400 }, { "epoch": 1.4335007289810464, "grad_norm": 2.8886163234710693, "learning_rate": 1.28e-06, "loss": 0.2717, "step": 4425 }, { "epoch": 1.441600518386522, "grad_norm": 2.9759998321533203, "learning_rate": 1.2244444444444445e-06, "loss": 0.2709, "step": 4450 }, { "epoch": 1.4497003077919974, "grad_norm": 2.8396642208099365, "learning_rate": 1.168888888888889e-06, "loss": 0.2701, "step": 4475 }, { "epoch": 1.457800097197473, "grad_norm": 3.0574100017547607, "learning_rate": 1.1155555555555558e-06, "loss": 0.2592, "step": 4500 }, { "epoch": 1.4658998866029482, "grad_norm": 2.318142890930176, "learning_rate": 1.06e-06, "loss": 0.2731, "step": 4525 }, { "epoch": 1.4739996760084237, "grad_norm": 2.372235059738159, "learning_rate": 1.0044444444444445e-06, "loss": 0.2694, "step": 4550 }, { "epoch": 1.4820994654138993, "grad_norm": 2.6232292652130127, "learning_rate": 9.488888888888889e-07, "loss": 0.2529, "step": 4575 }, { "epoch": 1.4901992548193748, "grad_norm": 2.717992067337036, "learning_rate": 8.933333333333334e-07, "loss": 0.2779, "step": 4600 }, { "epoch": 1.4982990442248503, "grad_norm": 2.852125406265259, "learning_rate": 8.37777777777778e-07, "loss": 0.2596, "step": 4625 }, { "epoch": 1.5063988336303256, "grad_norm": 2.2071590423583984, "learning_rate": 7.822222222222223e-07, "loss": 0.2765, "step": 4650 }, { "epoch": 1.514498623035801, "grad_norm": 2.871159315109253, "learning_rate": 7.266666666666668e-07, "loss": 0.2594, "step": 4675 }, { "epoch": 1.5225984124412766, "grad_norm": 2.287871837615967, "learning_rate": 6.711111111111111e-07, "loss": 0.2703, "step": 4700 }, { "epoch": 1.5306982018467519, "grad_norm": 2.345743417739868, "learning_rate": 6.155555555555556e-07, "loss": 0.2641, "step": 4725 }, { "epoch": 1.5387979912522276, "grad_norm": 2.734402656555176, "learning_rate": 5.6e-07, "loss": 0.2644, "step": 4750 }, { "epoch": 1.546897780657703, "grad_norm": 2.6672370433807373, "learning_rate": 5.044444444444445e-07, "loss": 0.2568, "step": 4775 }, { "epoch": 1.5549975700631784, "grad_norm": 2.9469447135925293, "learning_rate": 4.488888888888889e-07, "loss": 0.2646, "step": 4800 }, { "epoch": 1.563097359468654, "grad_norm": 3.1269147396087646, "learning_rate": 3.9333333333333336e-07, "loss": 0.2643, "step": 4825 }, { "epoch": 1.5711971488741292, "grad_norm": 2.543921709060669, "learning_rate": 3.3777777777777777e-07, "loss": 0.2738, "step": 4850 }, { "epoch": 1.5792969382796047, "grad_norm": 2.7174246311187744, "learning_rate": 2.822222222222222e-07, "loss": 0.2439, "step": 4875 }, { "epoch": 1.5873967276850802, "grad_norm": 2.381450891494751, "learning_rate": 2.266666666666667e-07, "loss": 0.2773, "step": 4900 }, { "epoch": 1.5954965170905555, "grad_norm": 2.9978244304656982, "learning_rate": 1.7111111111111114e-07, "loss": 0.2653, "step": 4925 }, { "epoch": 1.6035963064960312, "grad_norm": 2.594560384750366, "learning_rate": 1.1555555555555556e-07, "loss": 0.2472, "step": 4950 }, { "epoch": 1.6116960959015065, "grad_norm": 2.8399264812469482, "learning_rate": 6.000000000000001e-08, "loss": 0.258, "step": 4975 }, { "epoch": 1.619795885306982, "grad_norm": 2.9382212162017822, "learning_rate": 4.444444444444445e-09, "loss": 0.274, "step": 5000 }, { "epoch": 1.619795885306982, "eval_loss": 0.3738669753074646, "eval_runtime": 1856.8663, "eval_samples_per_second": 2.099, "eval_steps_per_second": 0.131, "eval_wer": 0.29154381412902464, "step": 5000 }, { "epoch": 1.619795885306982, "step": 5000, "total_flos": 5.435419715783885e+20, "train_loss": 0.4225531764030456, "train_runtime": 64153.0147, "train_samples_per_second": 2.494, "train_steps_per_second": 0.078 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.435419715783885e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }