| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.8714672861014323, | |
| "eval_steps": 100, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.019357336430507164, | |
| "eval_loss": 3.5565404891967773, | |
| "eval_runtime": 151.5266, | |
| "eval_samples_per_second": 37.327, | |
| "eval_steps_per_second": 4.666, | |
| "eval_wer": 1.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03871467286101433, | |
| "eval_loss": 3.0301756858825684, | |
| "eval_runtime": 150.582, | |
| "eval_samples_per_second": 37.561, | |
| "eval_steps_per_second": 4.695, | |
| "eval_wer": 1.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05807200929152149, | |
| "eval_loss": 2.9460911750793457, | |
| "eval_runtime": 148.9065, | |
| "eval_samples_per_second": 37.984, | |
| "eval_steps_per_second": 4.748, | |
| "eval_wer": 1.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07742934572202866, | |
| "eval_loss": 1.8142520189285278, | |
| "eval_runtime": 149.8655, | |
| "eval_samples_per_second": 37.741, | |
| "eval_steps_per_second": 4.718, | |
| "eval_wer": 0.940732775914365, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09678668215253582, | |
| "grad_norm": 3.132490396499634, | |
| "learning_rate": 0.00029759999999999997, | |
| "loss": 3.9521, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09678668215253582, | |
| "eval_loss": 1.4195518493652344, | |
| "eval_runtime": 150.5171, | |
| "eval_samples_per_second": 37.577, | |
| "eval_steps_per_second": 4.697, | |
| "eval_wer": 0.8693007655149171, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11614401858304298, | |
| "eval_loss": 1.16689133644104, | |
| "eval_runtime": 150.5387, | |
| "eval_samples_per_second": 37.572, | |
| "eval_steps_per_second": 4.696, | |
| "eval_wer": 0.8055239042865626, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13550135501355012, | |
| "eval_loss": 1.0756505727767944, | |
| "eval_runtime": 151.2385, | |
| "eval_samples_per_second": 37.398, | |
| "eval_steps_per_second": 4.675, | |
| "eval_wer": 0.7596251063215163, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1548586914440573, | |
| "eval_loss": 0.9944618344306946, | |
| "eval_runtime": 151.1646, | |
| "eval_samples_per_second": 37.416, | |
| "eval_steps_per_second": 4.677, | |
| "eval_wer": 0.7223925149652549, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.17421602787456447, | |
| "eval_loss": 0.9381263256072998, | |
| "eval_runtime": 151.6289, | |
| "eval_samples_per_second": 37.302, | |
| "eval_steps_per_second": 4.663, | |
| "eval_wer": 0.6870857472998347, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.19357336430507163, | |
| "grad_norm": 7.335289001464844, | |
| "learning_rate": 0.0002844, | |
| "loss": 1.0266, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19357336430507163, | |
| "eval_loss": 0.8977694511413574, | |
| "eval_runtime": 156.0202, | |
| "eval_samples_per_second": 36.252, | |
| "eval_steps_per_second": 4.531, | |
| "eval_wer": 0.661472292211648, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2129307007355788, | |
| "eval_loss": 0.8770694136619568, | |
| "eval_runtime": 151.6589, | |
| "eval_samples_per_second": 37.294, | |
| "eval_steps_per_second": 4.662, | |
| "eval_wer": 0.6450385967164706, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.23228803716608595, | |
| "eval_loss": 0.851553201675415, | |
| "eval_runtime": 151.5945, | |
| "eval_samples_per_second": 37.31, | |
| "eval_steps_per_second": 4.664, | |
| "eval_wer": 0.640432668389209, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2516453735965931, | |
| "eval_loss": 0.8273979425430298, | |
| "eval_runtime": 151.4524, | |
| "eval_samples_per_second": 37.345, | |
| "eval_steps_per_second": 4.668, | |
| "eval_wer": 0.6138081558633307, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.27100271002710025, | |
| "eval_loss": 0.7992698550224304, | |
| "eval_runtime": 152.8076, | |
| "eval_samples_per_second": 37.014, | |
| "eval_steps_per_second": 4.627, | |
| "eval_wer": 0.596973247099228, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.29036004645760743, | |
| "grad_norm": 4.0737223625183105, | |
| "learning_rate": 0.00026861052631578947, | |
| "loss": 0.8454, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.29036004645760743, | |
| "eval_loss": 0.7768516540527344, | |
| "eval_runtime": 152.3743, | |
| "eval_samples_per_second": 37.119, | |
| "eval_steps_per_second": 4.64, | |
| "eval_wer": 0.5887563993516394, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3097173828881146, | |
| "eval_loss": 0.7664207220077515, | |
| "eval_runtime": 154.3668, | |
| "eval_samples_per_second": 36.64, | |
| "eval_steps_per_second": 4.58, | |
| "eval_wer": 0.5997977885124617, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.32907471931862176, | |
| "eval_loss": 0.7400562763214111, | |
| "eval_runtime": 153.7228, | |
| "eval_samples_per_second": 36.793, | |
| "eval_steps_per_second": 4.599, | |
| "eval_wer": 0.5592110542279854, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.34843205574912894, | |
| "eval_loss": 0.746478796005249, | |
| "eval_runtime": 151.7535, | |
| "eval_samples_per_second": 37.271, | |
| "eval_steps_per_second": 4.659, | |
| "eval_wer": 0.5650206223620228, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3677893921796361, | |
| "eval_loss": 0.7252949476242065, | |
| "eval_runtime": 151.7548, | |
| "eval_samples_per_second": 37.271, | |
| "eval_steps_per_second": 4.659, | |
| "eval_wer": 0.5791272808974338, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.38714672861014326, | |
| "grad_norm": 2.4802448749542236, | |
| "learning_rate": 0.0002528210526315789, | |
| "loss": 0.7537, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.38714672861014326, | |
| "eval_loss": 0.7039346098899841, | |
| "eval_runtime": 152.7969, | |
| "eval_samples_per_second": 37.016, | |
| "eval_steps_per_second": 4.627, | |
| "eval_wer": 0.5343518800853782, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4065040650406504, | |
| "eval_loss": 0.6932350397109985, | |
| "eval_runtime": 152.4406, | |
| "eval_samples_per_second": 37.103, | |
| "eval_steps_per_second": 4.638, | |
| "eval_wer": 0.5168429330294811, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4258614014711576, | |
| "eval_loss": 0.696869432926178, | |
| "eval_runtime": 153.0527, | |
| "eval_samples_per_second": 36.955, | |
| "eval_steps_per_second": 4.619, | |
| "eval_wer": 0.5364381890837894, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4452187379016647, | |
| "eval_loss": 0.6781283617019653, | |
| "eval_runtime": 152.1378, | |
| "eval_samples_per_second": 37.177, | |
| "eval_steps_per_second": 4.647, | |
| "eval_wer": 0.5173725345444624, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4645760743321719, | |
| "eval_loss": 0.6760829091072083, | |
| "eval_runtime": 151.9712, | |
| "eval_samples_per_second": 37.218, | |
| "eval_steps_per_second": 4.652, | |
| "eval_wer": 0.5050312143923223, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.48393341076267904, | |
| "grad_norm": 3.791292667388916, | |
| "learning_rate": 0.0002370315789473684, | |
| "loss": 0.681, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.48393341076267904, | |
| "eval_loss": 0.6720712780952454, | |
| "eval_runtime": 152.2414, | |
| "eval_samples_per_second": 37.152, | |
| "eval_steps_per_second": 4.644, | |
| "eval_wer": 0.528718845789668, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5032907471931862, | |
| "eval_loss": 0.6598270535469055, | |
| "eval_runtime": 151.7192, | |
| "eval_samples_per_second": 37.279, | |
| "eval_steps_per_second": 4.66, | |
| "eval_wer": 0.5195069891351447, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5226480836236934, | |
| "eval_loss": 0.6555168628692627, | |
| "eval_runtime": 152.5678, | |
| "eval_samples_per_second": 37.072, | |
| "eval_steps_per_second": 4.634, | |
| "eval_wer": 0.4975846961210701, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5420054200542005, | |
| "eval_loss": 0.6535276770591736, | |
| "eval_runtime": 152.5246, | |
| "eval_samples_per_second": 37.083, | |
| "eval_steps_per_second": 4.635, | |
| "eval_wer": 0.49936608303509816, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5613627564847077, | |
| "eval_loss": 0.6258506178855896, | |
| "eval_runtime": 151.843, | |
| "eval_samples_per_second": 37.249, | |
| "eval_steps_per_second": 4.656, | |
| "eval_wer": 0.48192133010222915, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5807200929152149, | |
| "grad_norm": 9.4619779586792, | |
| "learning_rate": 0.00022124210526315786, | |
| "loss": 0.6737, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5807200929152149, | |
| "eval_loss": 0.629943311214447, | |
| "eval_runtime": 151.8389, | |
| "eval_samples_per_second": 37.25, | |
| "eval_steps_per_second": 4.656, | |
| "eval_wer": 0.48022018584198617, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6000774293457221, | |
| "eval_loss": 0.6378594636917114, | |
| "eval_runtime": 151.6255, | |
| "eval_samples_per_second": 37.302, | |
| "eval_steps_per_second": 4.663, | |
| "eval_wer": 0.4893197027812104, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6194347657762292, | |
| "eval_loss": 0.6225672364234924, | |
| "eval_runtime": 153.0144, | |
| "eval_samples_per_second": 36.964, | |
| "eval_steps_per_second": 4.62, | |
| "eval_wer": 0.4806053505801544, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6387921022067363, | |
| "eval_loss": 0.6088670492172241, | |
| "eval_runtime": 152.2222, | |
| "eval_samples_per_second": 37.156, | |
| "eval_steps_per_second": 4.645, | |
| "eval_wer": 0.4627112387860891, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6581494386372435, | |
| "eval_loss": 0.6028585433959961, | |
| "eval_runtime": 153.0615, | |
| "eval_samples_per_second": 36.952, | |
| "eval_steps_per_second": 4.619, | |
| "eval_wer": 0.47354399704707034, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6775067750677507, | |
| "grad_norm": 3.4705822467803955, | |
| "learning_rate": 0.00020545263157894736, | |
| "loss": 0.6419, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6775067750677507, | |
| "eval_loss": 0.5871421694755554, | |
| "eval_runtime": 152.5739, | |
| "eval_samples_per_second": 37.071, | |
| "eval_steps_per_second": 4.634, | |
| "eval_wer": 0.4592126590810611, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6968641114982579, | |
| "eval_loss": 0.6001027226448059, | |
| "eval_runtime": 152.1697, | |
| "eval_samples_per_second": 37.169, | |
| "eval_steps_per_second": 4.646, | |
| "eval_wer": 0.4610742886488742, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.716221447928765, | |
| "eval_loss": 0.5848923921585083, | |
| "eval_runtime": 152.6563, | |
| "eval_samples_per_second": 37.051, | |
| "eval_steps_per_second": 4.631, | |
| "eval_wer": 0.4472565036670893, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7355787843592722, | |
| "eval_loss": 0.5923960208892822, | |
| "eval_runtime": 152.6559, | |
| "eval_samples_per_second": 37.051, | |
| "eval_steps_per_second": 4.631, | |
| "eval_wer": 0.46377044181605176, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7549361207897793, | |
| "eval_loss": 0.5767965316772461, | |
| "eval_runtime": 152.1652, | |
| "eval_samples_per_second": 37.17, | |
| "eval_steps_per_second": 4.646, | |
| "eval_wer": 0.4584904751969957, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7742934572202865, | |
| "grad_norm": 3.628082275390625, | |
| "learning_rate": 0.00018966315789473683, | |
| "loss": 0.6183, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7742934572202865, | |
| "eval_loss": 0.5672534704208374, | |
| "eval_runtime": 152.4329, | |
| "eval_samples_per_second": 37.105, | |
| "eval_steps_per_second": 4.638, | |
| "eval_wer": 0.44531463144549116, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "eval_loss": 0.5575382113456726, | |
| "eval_runtime": 152.2388, | |
| "eval_samples_per_second": 37.152, | |
| "eval_steps_per_second": 4.644, | |
| "eval_wer": 0.4451862431994351, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "eval_loss": 0.5631808042526245, | |
| "eval_runtime": 152.7545, | |
| "eval_samples_per_second": 37.027, | |
| "eval_steps_per_second": 4.628, | |
| "eval_wer": 0.4474972316284444, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.832365466511808, | |
| "eval_loss": 0.5498641729354858, | |
| "eval_runtime": 153.7788, | |
| "eval_samples_per_second": 36.78, | |
| "eval_steps_per_second": 4.598, | |
| "eval_wer": 0.44008281041870617, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8517228029423152, | |
| "eval_loss": 0.5662574172019958, | |
| "eval_runtime": 152.5034, | |
| "eval_samples_per_second": 37.088, | |
| "eval_steps_per_second": 4.636, | |
| "eval_wer": 0.43101539054099597, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8710801393728222, | |
| "grad_norm": 2.376349925994873, | |
| "learning_rate": 0.0001738736842105263, | |
| "loss": 0.5877, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8710801393728222, | |
| "eval_loss": 0.5584732294082642, | |
| "eval_runtime": 152.1714, | |
| "eval_samples_per_second": 37.169, | |
| "eval_steps_per_second": 4.646, | |
| "eval_wer": 0.4317215258943044, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8904374758033294, | |
| "eval_loss": 0.5463821291923523, | |
| "eval_runtime": 152.4923, | |
| "eval_samples_per_second": 37.09, | |
| "eval_steps_per_second": 4.636, | |
| "eval_wer": 0.41997400138017366, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.9097948122338366, | |
| "eval_loss": 0.5381494164466858, | |
| "eval_runtime": 153.2139, | |
| "eval_samples_per_second": 36.916, | |
| "eval_steps_per_second": 4.614, | |
| "eval_wer": 0.4192197204345942, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9291521486643438, | |
| "eval_loss": 0.5453722476959229, | |
| "eval_runtime": 151.9737, | |
| "eval_samples_per_second": 37.217, | |
| "eval_steps_per_second": 4.652, | |
| "eval_wer": 0.4201986808107718, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.948509485094851, | |
| "eval_loss": 0.5237515568733215, | |
| "eval_runtime": 151.8558, | |
| "eval_samples_per_second": 37.246, | |
| "eval_steps_per_second": 4.656, | |
| "eval_wer": 0.41241514339362234, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9678668215253581, | |
| "grad_norm": 2.5489518642425537, | |
| "learning_rate": 0.0001581157894736842, | |
| "loss": 0.5621, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9678668215253581, | |
| "eval_loss": 0.5303541421890259, | |
| "eval_runtime": 152.515, | |
| "eval_samples_per_second": 37.085, | |
| "eval_steps_per_second": 4.636, | |
| "eval_wer": 0.41353854054661293, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9872241579558653, | |
| "eval_loss": 0.5163344740867615, | |
| "eval_runtime": 156.7945, | |
| "eval_samples_per_second": 36.073, | |
| "eval_steps_per_second": 4.509, | |
| "eval_wer": 0.4061080708061177, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.0065814943863725, | |
| "eval_loss": 0.51596599817276, | |
| "eval_runtime": 153.2891, | |
| "eval_samples_per_second": 36.898, | |
| "eval_steps_per_second": 4.612, | |
| "eval_wer": 0.39927139670363176, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.0259388308168795, | |
| "eval_loss": 0.5088583827018738, | |
| "eval_runtime": 152.7112, | |
| "eval_samples_per_second": 37.037, | |
| "eval_steps_per_second": 4.63, | |
| "eval_wer": 0.3898509091492674, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.0452961672473868, | |
| "eval_loss": 0.5110610723495483, | |
| "eval_runtime": 152.5555, | |
| "eval_samples_per_second": 37.075, | |
| "eval_steps_per_second": 4.634, | |
| "eval_wer": 0.3985652613503234, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.064653503677894, | |
| "grad_norm": 1.1362248659133911, | |
| "learning_rate": 0.0001423578947368421, | |
| "loss": 0.4882, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.064653503677894, | |
| "eval_loss": 0.5010027885437012, | |
| "eval_runtime": 152.1249, | |
| "eval_samples_per_second": 37.18, | |
| "eval_steps_per_second": 4.647, | |
| "eval_wer": 0.38574248527547306, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.084010840108401, | |
| "eval_loss": 0.49406561255455017, | |
| "eval_runtime": 151.5623, | |
| "eval_samples_per_second": 37.318, | |
| "eval_steps_per_second": 4.665, | |
| "eval_wer": 0.3858548249907721, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.1033681765389083, | |
| "eval_loss": 0.49403733015060425, | |
| "eval_runtime": 152.7631, | |
| "eval_samples_per_second": 37.025, | |
| "eval_steps_per_second": 4.628, | |
| "eval_wer": 0.3813451878480525, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.1227255129694154, | |
| "eval_loss": 0.4913772642612457, | |
| "eval_runtime": 152.1406, | |
| "eval_samples_per_second": 37.176, | |
| "eval_steps_per_second": 4.647, | |
| "eval_wer": 0.37815153022740766, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.1420828493999227, | |
| "eval_loss": 0.48747047781944275, | |
| "eval_runtime": 151.3195, | |
| "eval_samples_per_second": 37.378, | |
| "eval_steps_per_second": 4.672, | |
| "eval_wer": 0.3745406108070806, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.1614401858304297, | |
| "grad_norm": 1.0150744915008545, | |
| "learning_rate": 0.00012656842105263156, | |
| "loss": 0.4569, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1614401858304297, | |
| "eval_loss": 0.4841971695423126, | |
| "eval_runtime": 151.8567, | |
| "eval_samples_per_second": 37.246, | |
| "eval_steps_per_second": 4.656, | |
| "eval_wer": 0.38071929514852915, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1807975222609368, | |
| "eval_loss": 0.48611822724342346, | |
| "eval_runtime": 150.971, | |
| "eval_samples_per_second": 37.464, | |
| "eval_steps_per_second": 4.683, | |
| "eval_wer": 0.37370608720771614, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.2001548586914441, | |
| "eval_loss": 0.48144644498825073, | |
| "eval_runtime": 151.4548, | |
| "eval_samples_per_second": 37.344, | |
| "eval_steps_per_second": 4.668, | |
| "eval_wer": 0.3760973182905105, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "eval_loss": 0.47813892364501953, | |
| "eval_runtime": 151.1935, | |
| "eval_samples_per_second": 37.409, | |
| "eval_steps_per_second": 4.676, | |
| "eval_wer": 0.37409125194588433, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.2388695315524583, | |
| "eval_loss": 0.4771001935005188, | |
| "eval_runtime": 151.1732, | |
| "eval_samples_per_second": 37.414, | |
| "eval_steps_per_second": 4.677, | |
| "eval_wer": 0.36815329556579096, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.2582268679829656, | |
| "grad_norm": 1.3292571306228638, | |
| "learning_rate": 0.00011077894736842105, | |
| "loss": 0.4416, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.2582268679829656, | |
| "eval_loss": 0.47095027565956116, | |
| "eval_runtime": 151.5037, | |
| "eval_samples_per_second": 37.332, | |
| "eval_steps_per_second": 4.667, | |
| "eval_wer": 0.37338511659257595, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.2775842044134726, | |
| "eval_loss": 0.47211408615112305, | |
| "eval_runtime": 150.9455, | |
| "eval_samples_per_second": 37.47, | |
| "eval_steps_per_second": 4.684, | |
| "eval_wer": 0.3659706953828377, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.29694154084398, | |
| "eval_loss": 0.4679400622844696, | |
| "eval_runtime": 151.4191, | |
| "eval_samples_per_second": 37.353, | |
| "eval_steps_per_second": 4.669, | |
| "eval_wer": 0.3638843863844265, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.316298877274487, | |
| "eval_loss": 0.46228036284446716, | |
| "eval_runtime": 151.3839, | |
| "eval_samples_per_second": 37.362, | |
| "eval_steps_per_second": 4.67, | |
| "eval_wer": 0.366532393959333, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.3356562137049943, | |
| "eval_loss": 0.46108925342559814, | |
| "eval_runtime": 151.8163, | |
| "eval_samples_per_second": 37.256, | |
| "eval_steps_per_second": 4.657, | |
| "eval_wer": 0.3601771757795574, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.3550135501355014, | |
| "grad_norm": 0.8062695860862732, | |
| "learning_rate": 9.498947368421052e-05, | |
| "loss": 0.4324, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.3550135501355014, | |
| "eval_loss": 0.46888086199760437, | |
| "eval_runtime": 152.4379, | |
| "eval_samples_per_second": 37.104, | |
| "eval_steps_per_second": 4.638, | |
| "eval_wer": 0.3609314567251368, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.3743708865660085, | |
| "eval_loss": 0.4573034346103668, | |
| "eval_runtime": 151.3077, | |
| "eval_samples_per_second": 37.381, | |
| "eval_steps_per_second": 4.673, | |
| "eval_wer": 0.3602574184333424, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.3937282229965158, | |
| "eval_loss": 0.45749789476394653, | |
| "eval_runtime": 151.5824, | |
| "eval_samples_per_second": 37.313, | |
| "eval_steps_per_second": 4.664, | |
| "eval_wer": 0.3546083356068752, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.4130855594270229, | |
| "eval_loss": 0.4555954933166504, | |
| "eval_runtime": 151.6035, | |
| "eval_samples_per_second": 37.308, | |
| "eval_steps_per_second": 4.663, | |
| "eval_wer": 0.35836369180401534, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.43244289585753, | |
| "eval_loss": 0.4495578408241272, | |
| "eval_runtime": 152.5621, | |
| "eval_samples_per_second": 37.073, | |
| "eval_steps_per_second": 4.634, | |
| "eval_wer": 0.350724591163679, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.4518002322880372, | |
| "grad_norm": 0.7916799187660217, | |
| "learning_rate": 7.92e-05, | |
| "loss": 0.4255, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4518002322880372, | |
| "eval_loss": 0.44609567523002625, | |
| "eval_runtime": 151.8498, | |
| "eval_samples_per_second": 37.247, | |
| "eval_steps_per_second": 4.656, | |
| "eval_wer": 0.34671245847442667, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4711575687185443, | |
| "eval_loss": 0.44341230392456055, | |
| "eval_runtime": 152.528, | |
| "eval_samples_per_second": 37.082, | |
| "eval_steps_per_second": 4.635, | |
| "eval_wer": 0.3462470510824734, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.4905149051490514, | |
| "eval_loss": 0.44362780451774597, | |
| "eval_runtime": 152.5253, | |
| "eval_samples_per_second": 37.082, | |
| "eval_steps_per_second": 4.635, | |
| "eval_wer": 0.3516393574168285, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.5098722415795587, | |
| "eval_loss": 0.4406072199344635, | |
| "eval_runtime": 152.4039, | |
| "eval_samples_per_second": 37.112, | |
| "eval_steps_per_second": 4.639, | |
| "eval_wer": 0.34579769222127715, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.5292295780100658, | |
| "eval_loss": 0.43874725699424744, | |
| "eval_runtime": 152.6604, | |
| "eval_samples_per_second": 37.05, | |
| "eval_steps_per_second": 4.631, | |
| "eval_wer": 0.3439360626534641, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.5485869144405728, | |
| "grad_norm": 0.7491864562034607, | |
| "learning_rate": 6.344210526315788e-05, | |
| "loss": 0.4094, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.5485869144405728, | |
| "eval_loss": 0.43253499269485474, | |
| "eval_runtime": 153.8006, | |
| "eval_samples_per_second": 36.775, | |
| "eval_steps_per_second": 4.597, | |
| "eval_wer": 0.3409831329941744, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.5679442508710801, | |
| "eval_loss": 0.4359830617904663, | |
| "eval_runtime": 153.3674, | |
| "eval_samples_per_second": 36.879, | |
| "eval_steps_per_second": 4.61, | |
| "eval_wer": 0.3419299963088379, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "eval_loss": 0.4285949170589447, | |
| "eval_runtime": 153.3711, | |
| "eval_samples_per_second": 36.878, | |
| "eval_steps_per_second": 4.61, | |
| "eval_wer": 0.3377252812505015, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.6066589237320945, | |
| "eval_loss": 0.43007034063339233, | |
| "eval_runtime": 152.2201, | |
| "eval_samples_per_second": 37.157, | |
| "eval_steps_per_second": 4.645, | |
| "eval_wer": 0.3335526632536791, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "eval_loss": 0.42966797947883606, | |
| "eval_runtime": 152.0163, | |
| "eval_samples_per_second": 37.207, | |
| "eval_steps_per_second": 4.651, | |
| "eval_wer": 0.3322848293238754, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.645373596593109, | |
| "grad_norm": 1.047472596168518, | |
| "learning_rate": 4.765263157894736e-05, | |
| "loss": 0.4018, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.645373596593109, | |
| "eval_loss": 0.4270441234111786, | |
| "eval_runtime": 152.8058, | |
| "eval_samples_per_second": 37.014, | |
| "eval_steps_per_second": 4.627, | |
| "eval_wer": 0.3338575853380623, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.664730933023616, | |
| "eval_loss": 0.4267289638519287, | |
| "eval_runtime": 152.5032, | |
| "eval_samples_per_second": 37.088, | |
| "eval_steps_per_second": 4.636, | |
| "eval_wer": 0.3319959557702492, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.684088269454123, | |
| "eval_loss": 0.4224300980567932, | |
| "eval_runtime": 152.5862, | |
| "eval_samples_per_second": 37.068, | |
| "eval_steps_per_second": 4.633, | |
| "eval_wer": 0.33275023671582865, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.7034456058846303, | |
| "eval_loss": 0.4207303822040558, | |
| "eval_runtime": 154.5205, | |
| "eval_samples_per_second": 36.604, | |
| "eval_steps_per_second": 4.575, | |
| "eval_wer": 0.32984545264881, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.7228029423151374, | |
| "eval_loss": 0.4197385013103485, | |
| "eval_runtime": 152.0624, | |
| "eval_samples_per_second": 37.195, | |
| "eval_steps_per_second": 4.649, | |
| "eval_wer": 0.32978125852578194, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.7421602787456445, | |
| "grad_norm": 1.4507739543914795, | |
| "learning_rate": 3.189473684210526e-05, | |
| "loss": 0.3899, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.7421602787456445, | |
| "eval_loss": 0.4183507561683655, | |
| "eval_runtime": 157.4278, | |
| "eval_samples_per_second": 35.928, | |
| "eval_steps_per_second": 4.491, | |
| "eval_wer": 0.3258493684903147, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.7615176151761518, | |
| "eval_loss": 0.4164830148220062, | |
| "eval_runtime": 153.0475, | |
| "eval_samples_per_second": 36.956, | |
| "eval_steps_per_second": 4.619, | |
| "eval_wer": 0.3262024361669689, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.7808749516066589, | |
| "eval_loss": 0.41182050108909607, | |
| "eval_runtime": 152.4839, | |
| "eval_samples_per_second": 37.092, | |
| "eval_steps_per_second": 4.637, | |
| "eval_wer": 0.322864341769511, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.800232288037166, | |
| "eval_loss": 0.4134317636489868, | |
| "eval_runtime": 152.6353, | |
| "eval_samples_per_second": 37.056, | |
| "eval_steps_per_second": 4.632, | |
| "eval_wer": 0.3232334579769222, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.8195896244676733, | |
| "eval_loss": 0.4126824736595154, | |
| "eval_runtime": 152.5246, | |
| "eval_samples_per_second": 37.083, | |
| "eval_steps_per_second": 4.635, | |
| "eval_wer": 0.3209064210171559, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.8389469608981805, | |
| "grad_norm": 1.0012460947036743, | |
| "learning_rate": 1.6105263157894736e-05, | |
| "loss": 0.3665, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.8389469608981805, | |
| "eval_loss": 0.41083237528800964, | |
| "eval_runtime": 152.9993, | |
| "eval_samples_per_second": 36.967, | |
| "eval_steps_per_second": 4.621, | |
| "eval_wer": 0.32109900338624, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.8583042973286876, | |
| "eval_loss": 0.4090138077735901, | |
| "eval_runtime": 152.5291, | |
| "eval_samples_per_second": 37.081, | |
| "eval_steps_per_second": 4.635, | |
| "eval_wer": 0.3199114121102213, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.8776616337591947, | |
| "eval_loss": 0.407578706741333, | |
| "eval_runtime": 153.0711, | |
| "eval_samples_per_second": 36.95, | |
| "eval_steps_per_second": 4.619, | |
| "eval_wer": 0.32087432395564186, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.897018970189702, | |
| "eval_loss": 0.40649694204330444, | |
| "eval_runtime": 154.4136, | |
| "eval_samples_per_second": 36.629, | |
| "eval_steps_per_second": 4.579, | |
| "eval_wer": 0.31981512092567926, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.916376306620209, | |
| "eval_loss": 0.40620651841163635, | |
| "eval_runtime": 153.7508, | |
| "eval_samples_per_second": 36.787, | |
| "eval_steps_per_second": 4.598, | |
| "eval_wer": 0.31923737381842693, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.9357336430507162, | |
| "grad_norm": 0.7244949340820312, | |
| "learning_rate": 3.157894736842105e-07, | |
| "loss": 0.3698, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.9357336430507162, | |
| "eval_loss": 0.4060620963573456, | |
| "eval_runtime": 153.976, | |
| "eval_samples_per_second": 36.733, | |
| "eval_steps_per_second": 4.592, | |
| "eval_wer": 0.31928551941069794, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.9550909794812235, | |
| "eval_loss": 0.45229342579841614, | |
| "eval_runtime": 154.2948, | |
| "eval_samples_per_second": 36.657, | |
| "eval_steps_per_second": 4.582, | |
| "eval_wer": 0.3406140167867632, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.9744483159117305, | |
| "eval_loss": 0.4579542577266693, | |
| "eval_runtime": 151.5074, | |
| "eval_samples_per_second": 37.331, | |
| "eval_steps_per_second": 4.666, | |
| "eval_wer": 0.3517837941936416, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.9938056523422376, | |
| "eval_loss": 0.46043792366981506, | |
| "eval_runtime": 151.4438, | |
| "eval_samples_per_second": 37.347, | |
| "eval_steps_per_second": 4.668, | |
| "eval_wer": 0.35115790149411824, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.013162988772745, | |
| "eval_loss": 0.46549099683761597, | |
| "eval_runtime": 151.5994, | |
| "eval_samples_per_second": 37.309, | |
| "eval_steps_per_second": 4.664, | |
| "eval_wer": 0.3552181797756415, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.032520325203252, | |
| "grad_norm": 0.703632652759552, | |
| "learning_rate": 0.0001463076923076923, | |
| "loss": 0.3624, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.032520325203252, | |
| "eval_loss": 0.4670031666755676, | |
| "eval_runtime": 151.5063, | |
| "eval_samples_per_second": 37.332, | |
| "eval_steps_per_second": 4.666, | |
| "eval_wer": 0.35144677504774435, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.051877661633759, | |
| "eval_loss": 0.459250271320343, | |
| "eval_runtime": 153.0971, | |
| "eval_samples_per_second": 36.944, | |
| "eval_steps_per_second": 4.618, | |
| "eval_wer": 0.3628251833544639, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.0712349980642664, | |
| "eval_loss": 0.46061432361602783, | |
| "eval_runtime": 152.0732, | |
| "eval_samples_per_second": 37.193, | |
| "eval_steps_per_second": 4.649, | |
| "eval_wer": 0.3545922870761182, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.0905923344947737, | |
| "eval_loss": 0.46500489115715027, | |
| "eval_runtime": 151.985, | |
| "eval_samples_per_second": 37.214, | |
| "eval_steps_per_second": 4.652, | |
| "eval_wer": 0.35905377862656673, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.1099496709252805, | |
| "eval_loss": 0.46085453033447266, | |
| "eval_runtime": 152.4835, | |
| "eval_samples_per_second": 37.093, | |
| "eval_steps_per_second": 4.637, | |
| "eval_wer": 0.35483301503747333, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.129307007355788, | |
| "grad_norm": 0.5008242726325989, | |
| "learning_rate": 0.00013863076923076922, | |
| "loss": 0.3755, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.129307007355788, | |
| "eval_loss": 0.4708138406276703, | |
| "eval_runtime": 152.3457, | |
| "eval_samples_per_second": 37.126, | |
| "eval_steps_per_second": 4.641, | |
| "eval_wer": 0.35573173275986586, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.148664343786295, | |
| "eval_loss": 0.4649392366409302, | |
| "eval_runtime": 152.7087, | |
| "eval_samples_per_second": 37.038, | |
| "eval_steps_per_second": 4.63, | |
| "eval_wer": 0.3548009179759593, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.168021680216802, | |
| "eval_loss": 0.4624271094799042, | |
| "eval_runtime": 153.153, | |
| "eval_samples_per_second": 36.93, | |
| "eval_steps_per_second": 4.616, | |
| "eval_wer": 0.355956412190464, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.1873790166473093, | |
| "eval_loss": 0.45822229981422424, | |
| "eval_runtime": 156.1964, | |
| "eval_samples_per_second": 36.211, | |
| "eval_steps_per_second": 4.526, | |
| "eval_wer": 0.35229734717786587, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.2067363530778166, | |
| "eval_loss": 0.466250479221344, | |
| "eval_runtime": 152.6707, | |
| "eval_samples_per_second": 37.047, | |
| "eval_steps_per_second": 4.631, | |
| "eval_wer": 0.3586044197653705, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.226093689508324, | |
| "grad_norm": 0.9631055593490601, | |
| "learning_rate": 0.00013093846153846151, | |
| "loss": 0.3891, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.226093689508324, | |
| "eval_loss": 0.46153655648231506, | |
| "eval_runtime": 153.1909, | |
| "eval_samples_per_second": 36.921, | |
| "eval_steps_per_second": 4.615, | |
| "eval_wer": 0.3552181797756415, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.2454510259388307, | |
| "eval_loss": 0.4631531238555908, | |
| "eval_runtime": 152.9395, | |
| "eval_samples_per_second": 36.982, | |
| "eval_steps_per_second": 4.623, | |
| "eval_wer": 0.35886119625748264, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.264808362369338, | |
| "eval_loss": 0.4495234191417694, | |
| "eval_runtime": 153.0237, | |
| "eval_samples_per_second": 36.962, | |
| "eval_steps_per_second": 4.62, | |
| "eval_wer": 0.3425398404776043, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.2841656987998453, | |
| "eval_loss": 0.462666779756546, | |
| "eval_runtime": 152.4714, | |
| "eval_samples_per_second": 37.095, | |
| "eval_steps_per_second": 4.637, | |
| "eval_wer": 0.34942466017236123, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.303523035230352, | |
| "eval_loss": 0.4550352096557617, | |
| "eval_runtime": 152.8072, | |
| "eval_samples_per_second": 37.014, | |
| "eval_steps_per_second": 4.627, | |
| "eval_wer": 0.3451717995217538, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.3228803716608595, | |
| "grad_norm": 0.7961182594299316, | |
| "learning_rate": 0.00012324615384615384, | |
| "loss": 0.3946, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.3228803716608595, | |
| "eval_loss": 0.44988927245140076, | |
| "eval_runtime": 152.9644, | |
| "eval_samples_per_second": 36.976, | |
| "eval_steps_per_second": 4.622, | |
| "eval_wer": 0.3462310025517164, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.3422377080913668, | |
| "eval_loss": 0.4501667320728302, | |
| "eval_runtime": 153.061, | |
| "eval_samples_per_second": 36.953, | |
| "eval_steps_per_second": 4.619, | |
| "eval_wer": 0.341978141901109, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.3615950445218736, | |
| "eval_loss": 0.4580215513706207, | |
| "eval_runtime": 153.2108, | |
| "eval_samples_per_second": 36.916, | |
| "eval_steps_per_second": 4.615, | |
| "eval_wer": 0.3412399094862865, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "eval_loss": 0.4506891667842865, | |
| "eval_runtime": 153.6611, | |
| "eval_samples_per_second": 36.808, | |
| "eval_steps_per_second": 4.601, | |
| "eval_wer": 0.34339041260772574, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.4003097173828882, | |
| "eval_loss": 0.44618555903434753, | |
| "eval_runtime": 153.273, | |
| "eval_samples_per_second": 36.901, | |
| "eval_steps_per_second": 4.613, | |
| "eval_wer": 0.34475453772207154, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.419667053813395, | |
| "grad_norm": 0.828158974647522, | |
| "learning_rate": 0.00011556923076923076, | |
| "loss": 0.3824, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.419667053813395, | |
| "eval_loss": 0.44126543402671814, | |
| "eval_runtime": 153.3979, | |
| "eval_samples_per_second": 36.871, | |
| "eval_steps_per_second": 4.609, | |
| "eval_wer": 0.34127200654780054, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "eval_loss": 0.44880929589271545, | |
| "eval_runtime": 153.7143, | |
| "eval_samples_per_second": 36.796, | |
| "eval_steps_per_second": 4.599, | |
| "eval_wer": 0.3443212273916323, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.4583817266744097, | |
| "eval_loss": 0.44148463010787964, | |
| "eval_runtime": 153.647, | |
| "eval_samples_per_second": 36.812, | |
| "eval_steps_per_second": 4.601, | |
| "eval_wer": 0.3431657331771276, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.4777390631049165, | |
| "eval_loss": 0.44202256202697754, | |
| "eval_runtime": 153.5743, | |
| "eval_samples_per_second": 36.829, | |
| "eval_steps_per_second": 4.604, | |
| "eval_wer": 0.34093498740190337, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.497096399535424, | |
| "eval_loss": 0.4379221200942993, | |
| "eval_runtime": 153.5736, | |
| "eval_samples_per_second": 36.829, | |
| "eval_steps_per_second": 4.604, | |
| "eval_wer": 0.3361204281748006, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.516453735965931, | |
| "grad_norm": 1.2163615226745605, | |
| "learning_rate": 0.00010787692307692307, | |
| "loss": 0.372, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.516453735965931, | |
| "eval_loss": 0.43855318427085876, | |
| "eval_runtime": 153.5476, | |
| "eval_samples_per_second": 36.835, | |
| "eval_steps_per_second": 4.604, | |
| "eval_wer": 0.3334884691306511, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.535811072396438, | |
| "eval_loss": 0.44449883699417114, | |
| "eval_runtime": 153.7016, | |
| "eval_samples_per_second": 36.799, | |
| "eval_steps_per_second": 4.6, | |
| "eval_wer": 0.3397794931873987, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.5551684088269453, | |
| "eval_loss": 0.4401286542415619, | |
| "eval_runtime": 154.2488, | |
| "eval_samples_per_second": 36.668, | |
| "eval_steps_per_second": 4.584, | |
| "eval_wer": 0.3392819887339314, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.5745257452574526, | |
| "eval_loss": 0.437770813703537, | |
| "eval_runtime": 153.8927, | |
| "eval_samples_per_second": 36.753, | |
| "eval_steps_per_second": 4.594, | |
| "eval_wer": 0.335077273675595, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.59388308168796, | |
| "eval_loss": 0.4315861463546753, | |
| "eval_runtime": 153.7886, | |
| "eval_samples_per_second": 36.778, | |
| "eval_steps_per_second": 4.597, | |
| "eval_wer": 0.33517356486013705, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.6132404181184667, | |
| "grad_norm": 1.084632158279419, | |
| "learning_rate": 0.0001002, | |
| "loss": 0.3521, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.6132404181184667, | |
| "eval_loss": 0.43864014744758606, | |
| "eval_runtime": 153.9711, | |
| "eval_samples_per_second": 36.734, | |
| "eval_steps_per_second": 4.592, | |
| "eval_wer": 0.33398597358411836, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.632597754548974, | |
| "eval_loss": 0.43551018834114075, | |
| "eval_runtime": 154.3017, | |
| "eval_samples_per_second": 36.655, | |
| "eval_steps_per_second": 4.582, | |
| "eval_wer": 0.33096884980180064, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.6519550909794813, | |
| "eval_loss": 0.4325660765171051, | |
| "eval_runtime": 154.4812, | |
| "eval_samples_per_second": 36.613, | |
| "eval_steps_per_second": 4.577, | |
| "eval_wer": 0.3343871868530436, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.6713124274099886, | |
| "eval_loss": 0.4263465404510498, | |
| "eval_runtime": 154.0733, | |
| "eval_samples_per_second": 36.71, | |
| "eval_steps_per_second": 4.589, | |
| "eval_wer": 0.32629872735151094, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.6906697638404955, | |
| "eval_loss": 0.42636117339134216, | |
| "eval_runtime": 154.1615, | |
| "eval_samples_per_second": 36.689, | |
| "eval_steps_per_second": 4.586, | |
| "eval_wer": 0.32353838006130536, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.710027100271003, | |
| "grad_norm": 1.1979655027389526, | |
| "learning_rate": 9.25076923076923e-05, | |
| "loss": 0.3592, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.710027100271003, | |
| "eval_loss": 0.4322036802768707, | |
| "eval_runtime": 154.5242, | |
| "eval_samples_per_second": 36.603, | |
| "eval_steps_per_second": 4.575, | |
| "eval_wer": 0.3299738408948661, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.72938443670151, | |
| "eval_loss": 0.4294193983078003, | |
| "eval_runtime": 154.4329, | |
| "eval_samples_per_second": 36.624, | |
| "eval_steps_per_second": 4.578, | |
| "eval_wer": 0.3261542905746979, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.748741773132017, | |
| "eval_loss": 0.43099814653396606, | |
| "eval_runtime": 154.4209, | |
| "eval_samples_per_second": 36.627, | |
| "eval_steps_per_second": 4.578, | |
| "eval_wer": 0.32329765209995026, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.7680991095625243, | |
| "eval_loss": 0.42700281739234924, | |
| "eval_runtime": 155.4008, | |
| "eval_samples_per_second": 36.396, | |
| "eval_steps_per_second": 4.55, | |
| "eval_wer": 0.3268122803357353, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.7874564459930316, | |
| "eval_loss": 0.4209098219871521, | |
| "eval_runtime": 156.5271, | |
| "eval_samples_per_second": 36.134, | |
| "eval_steps_per_second": 4.517, | |
| "eval_wer": 0.3254321066906325, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.8068137824235384, | |
| "grad_norm": 0.6974443793296814, | |
| "learning_rate": 8.48153846153846e-05, | |
| "loss": 0.3459, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.8068137824235384, | |
| "eval_loss": 0.42542555928230286, | |
| "eval_runtime": 157.9392, | |
| "eval_samples_per_second": 35.811, | |
| "eval_steps_per_second": 4.476, | |
| "eval_wer": 0.32729373625844554, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.8261711188540457, | |
| "eval_loss": 0.42783817648887634, | |
| "eval_runtime": 155.0217, | |
| "eval_samples_per_second": 36.485, | |
| "eval_steps_per_second": 4.561, | |
| "eval_wer": 0.3231532153231372, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.845528455284553, | |
| "eval_loss": 0.4212438464164734, | |
| "eval_runtime": 154.853, | |
| "eval_samples_per_second": 36.525, | |
| "eval_steps_per_second": 4.566, | |
| "eval_wer": 0.3215002166551652, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.86488579171506, | |
| "eval_loss": 0.4169256389141083, | |
| "eval_runtime": 154.5142, | |
| "eval_samples_per_second": 36.605, | |
| "eval_steps_per_second": 4.576, | |
| "eval_wer": 0.31928551941069794, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.884243128145567, | |
| "eval_loss": 0.42132049798965454, | |
| "eval_runtime": 154.8091, | |
| "eval_samples_per_second": 36.535, | |
| "eval_steps_per_second": 4.567, | |
| "eval_wer": 0.3195262473720531, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.9036004645760745, | |
| "grad_norm": 1.099702000617981, | |
| "learning_rate": 7.713846153846152e-05, | |
| "loss": 0.3483, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.9036004645760745, | |
| "eval_loss": 0.41696369647979736, | |
| "eval_runtime": 155.3223, | |
| "eval_samples_per_second": 36.415, | |
| "eval_steps_per_second": 4.552, | |
| "eval_wer": 0.31652517212049236, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.9229578010065813, | |
| "eval_loss": 0.41230952739715576, | |
| "eval_runtime": 154.9181, | |
| "eval_samples_per_second": 36.51, | |
| "eval_steps_per_second": 4.564, | |
| "eval_wer": 0.31418208662996905, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.9423151374370886, | |
| "eval_loss": 0.4116990566253662, | |
| "eval_runtime": 154.97, | |
| "eval_samples_per_second": 36.497, | |
| "eval_steps_per_second": 4.562, | |
| "eval_wer": 0.31337966009211854, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.961672473867596, | |
| "eval_loss": 0.410386323928833, | |
| "eval_runtime": 155.0232, | |
| "eval_samples_per_second": 36.485, | |
| "eval_steps_per_second": 4.561, | |
| "eval_wer": 0.31158222464733354, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.9810298102981028, | |
| "eval_loss": 0.41244322061538696, | |
| "eval_runtime": 154.4682, | |
| "eval_samples_per_second": 36.616, | |
| "eval_steps_per_second": 4.577, | |
| "eval_wer": 0.31419813516072603, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 3.00038714672861, | |
| "grad_norm": 0.725528359413147, | |
| "learning_rate": 6.946153846153845e-05, | |
| "loss": 0.3501, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.00038714672861, | |
| "eval_loss": 0.40684688091278076, | |
| "eval_runtime": 154.5863, | |
| "eval_samples_per_second": 36.588, | |
| "eval_steps_per_second": 4.573, | |
| "eval_wer": 0.31272167033108117, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.0197444831591174, | |
| "eval_loss": 0.4200752079486847, | |
| "eval_runtime": 154.5821, | |
| "eval_samples_per_second": 36.589, | |
| "eval_steps_per_second": 4.574, | |
| "eval_wer": 0.3087416347033429, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 3.0391018195896247, | |
| "eval_loss": 0.4186869263648987, | |
| "eval_runtime": 154.7417, | |
| "eval_samples_per_second": 36.551, | |
| "eval_steps_per_second": 4.569, | |
| "eval_wer": 0.3137808733610438, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 3.0584591560201315, | |
| "eval_loss": 0.41133585572242737, | |
| "eval_runtime": 155.21, | |
| "eval_samples_per_second": 36.441, | |
| "eval_steps_per_second": 4.555, | |
| "eval_wer": 0.31092423488629617, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 3.077816492450639, | |
| "eval_loss": 0.4191639721393585, | |
| "eval_runtime": 155.276, | |
| "eval_samples_per_second": 36.425, | |
| "eval_steps_per_second": 4.553, | |
| "eval_wer": 0.30851695527274475, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 3.097173828881146, | |
| "grad_norm": 0.5114701390266418, | |
| "learning_rate": 6.176923076923076e-05, | |
| "loss": 0.2754, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.097173828881146, | |
| "eval_loss": 0.4161028265953064, | |
| "eval_runtime": 154.597, | |
| "eval_samples_per_second": 36.585, | |
| "eval_steps_per_second": 4.573, | |
| "eval_wer": 0.30901445972621205, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.116531165311653, | |
| "eval_loss": 0.4183988571166992, | |
| "eval_runtime": 155.0124, | |
| "eval_samples_per_second": 36.487, | |
| "eval_steps_per_second": 4.561, | |
| "eval_wer": 0.307152830158399, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 3.1358885017421603, | |
| "eval_loss": 0.4186756908893585, | |
| "eval_runtime": 154.8535, | |
| "eval_samples_per_second": 36.525, | |
| "eval_steps_per_second": 4.566, | |
| "eval_wer": 0.3060936271284364, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 3.1552458381726676, | |
| "eval_loss": 0.4193824827671051, | |
| "eval_runtime": 154.3195, | |
| "eval_samples_per_second": 36.651, | |
| "eval_steps_per_second": 4.581, | |
| "eval_wer": 0.3059652388823803, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 3.1746031746031744, | |
| "eval_loss": 0.40788766741752625, | |
| "eval_runtime": 154.9673, | |
| "eval_samples_per_second": 36.498, | |
| "eval_steps_per_second": 4.562, | |
| "eval_wer": 0.3038949784147261, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 3.1939605110336817, | |
| "grad_norm": 0.5594165325164795, | |
| "learning_rate": 5.4076923076923074e-05, | |
| "loss": 0.2802, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.1939605110336817, | |
| "eval_loss": 0.41461309790611267, | |
| "eval_runtime": 154.8662, | |
| "eval_samples_per_second": 36.522, | |
| "eval_steps_per_second": 4.565, | |
| "eval_wer": 0.30424804609138034, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.213317847464189, | |
| "eval_loss": 0.4168522357940674, | |
| "eval_runtime": 155.0374, | |
| "eval_samples_per_second": 36.482, | |
| "eval_steps_per_second": 4.56, | |
| "eval_wer": 0.30116672818603457, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 3.2326751838946963, | |
| "eval_loss": 0.40926745533943176, | |
| "eval_runtime": 154.7423, | |
| "eval_samples_per_second": 36.551, | |
| "eval_steps_per_second": 4.569, | |
| "eval_wer": 0.3023864165235673, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 3.252032520325203, | |
| "eval_loss": 0.4115259051322937, | |
| "eval_runtime": 154.7933, | |
| "eval_samples_per_second": 36.539, | |
| "eval_steps_per_second": 4.567, | |
| "eval_wer": 0.3005408354865112, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 3.2713898567557105, | |
| "eval_loss": 0.40197211503982544, | |
| "eval_runtime": 155.5964, | |
| "eval_samples_per_second": 36.35, | |
| "eval_steps_per_second": 4.544, | |
| "eval_wer": 0.30410360931456726, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 3.290747193186218, | |
| "grad_norm": 1.4730154275894165, | |
| "learning_rate": 4.6384615384615385e-05, | |
| "loss": 0.2723, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.290747193186218, | |
| "eval_loss": 0.4058869779109955, | |
| "eval_runtime": 155.0898, | |
| "eval_samples_per_second": 36.469, | |
| "eval_steps_per_second": 4.559, | |
| "eval_wer": 0.30442457992970745, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.3101045296167246, | |
| "eval_loss": 0.40676185488700867, | |
| "eval_runtime": 155.0576, | |
| "eval_samples_per_second": 36.477, | |
| "eval_steps_per_second": 4.56, | |
| "eval_wer": 0.3013753590858757, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 3.329461866047232, | |
| "eval_loss": 0.40653425455093384, | |
| "eval_runtime": 155.8377, | |
| "eval_samples_per_second": 36.294, | |
| "eval_steps_per_second": 4.537, | |
| "eval_wer": 0.30878978029561394, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 3.3488192024777392, | |
| "eval_loss": 0.4082197844982147, | |
| "eval_runtime": 155.7924, | |
| "eval_samples_per_second": 36.305, | |
| "eval_steps_per_second": 4.538, | |
| "eval_wer": 0.3010543884707355, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 3.368176538908246, | |
| "eval_loss": 0.4083554446697235, | |
| "eval_runtime": 155.6775, | |
| "eval_samples_per_second": 36.332, | |
| "eval_steps_per_second": 4.541, | |
| "eval_wer": 0.3007494663863523, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 3.3875338753387534, | |
| "grad_norm": 0.5211097598075867, | |
| "learning_rate": 3.87076923076923e-05, | |
| "loss": 0.2557, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.3875338753387534, | |
| "eval_loss": 0.4009736180305481, | |
| "eval_runtime": 155.105, | |
| "eval_samples_per_second": 36.466, | |
| "eval_steps_per_second": 4.558, | |
| "eval_wer": 0.29924090449519347, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.4068912117692607, | |
| "eval_loss": 0.4061805009841919, | |
| "eval_runtime": 154.8792, | |
| "eval_samples_per_second": 36.519, | |
| "eval_steps_per_second": 4.565, | |
| "eval_wer": 0.2999951854407729, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 3.4262485481997675, | |
| "eval_loss": 0.40264037251472473, | |
| "eval_runtime": 155.9957, | |
| "eval_samples_per_second": 36.257, | |
| "eval_steps_per_second": 4.532, | |
| "eval_wer": 0.2980533132191748, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 3.445605884630275, | |
| "eval_loss": 0.40035372972488403, | |
| "eval_runtime": 155.0928, | |
| "eval_samples_per_second": 36.468, | |
| "eval_steps_per_second": 4.559, | |
| "eval_wer": 0.29893598241081026, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 3.464963221060782, | |
| "eval_loss": 0.40443336963653564, | |
| "eval_runtime": 154.9305, | |
| "eval_samples_per_second": 36.507, | |
| "eval_steps_per_second": 4.563, | |
| "eval_wer": 0.29906437065686636, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 3.484320557491289, | |
| "grad_norm": 0.7458967566490173, | |
| "learning_rate": 3.101538461538461e-05, | |
| "loss": 0.2578, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.484320557491289, | |
| "eval_loss": 0.4003549814224243, | |
| "eval_runtime": 155.7394, | |
| "eval_samples_per_second": 36.317, | |
| "eval_steps_per_second": 4.54, | |
| "eval_wer": 0.29660894545104394, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.5036778939217963, | |
| "eval_loss": 0.40592488646507263, | |
| "eval_runtime": 159.2644, | |
| "eval_samples_per_second": 35.513, | |
| "eval_steps_per_second": 4.439, | |
| "eval_wer": 0.29449053939111874, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 3.5230352303523036, | |
| "eval_loss": 0.4014962613582611, | |
| "eval_runtime": 155.6654, | |
| "eval_samples_per_second": 36.334, | |
| "eval_steps_per_second": 4.542, | |
| "eval_wer": 0.29632007189741777, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 3.5423925667828104, | |
| "eval_loss": 0.396659791469574, | |
| "eval_runtime": 156.1536, | |
| "eval_samples_per_second": 36.221, | |
| "eval_steps_per_second": 4.528, | |
| "eval_wer": 0.29585466450546455, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 3.5617499032133177, | |
| "eval_loss": 0.4001907706260681, | |
| "eval_runtime": 155.7578, | |
| "eval_samples_per_second": 36.313, | |
| "eval_steps_per_second": 4.539, | |
| "eval_wer": 0.29412142318370754, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 3.581107239643825, | |
| "grad_norm": 0.6122294664382935, | |
| "learning_rate": 2.3338461538461535e-05, | |
| "loss": 0.2508, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.581107239643825, | |
| "eval_loss": 0.39826107025146484, | |
| "eval_runtime": 155.467, | |
| "eval_samples_per_second": 36.381, | |
| "eval_steps_per_second": 4.548, | |
| "eval_wer": 0.2945547335141468, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.600464576074332, | |
| "eval_loss": 0.3958674967288971, | |
| "eval_runtime": 155.8242, | |
| "eval_samples_per_second": 36.297, | |
| "eval_steps_per_second": 4.537, | |
| "eval_wer": 0.29365601579175427, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 3.619821912504839, | |
| "eval_loss": 0.3970955014228821, | |
| "eval_runtime": 155.4329, | |
| "eval_samples_per_second": 36.389, | |
| "eval_steps_per_second": 4.549, | |
| "eval_wer": 0.2942016658374926, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 3.6391792489353465, | |
| "eval_loss": 0.3906669616699219, | |
| "eval_runtime": 155.4929, | |
| "eval_samples_per_second": 36.375, | |
| "eval_steps_per_second": 4.547, | |
| "eval_wer": 0.2923239877389225, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "eval_loss": 0.39506247639656067, | |
| "eval_runtime": 155.5246, | |
| "eval_samples_per_second": 36.367, | |
| "eval_steps_per_second": 4.546, | |
| "eval_wer": 0.2903981640480814, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 3.6778939217963607, | |
| "grad_norm": 0.33715635538101196, | |
| "learning_rate": 1.5646153846153846e-05, | |
| "loss": 0.2659, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.6778939217963607, | |
| "eval_loss": 0.3892674744129181, | |
| "eval_runtime": 155.5533, | |
| "eval_samples_per_second": 36.361, | |
| "eval_steps_per_second": 4.545, | |
| "eval_wer": 0.29309431721525897, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.697251258226868, | |
| "eval_loss": 0.39077267050743103, | |
| "eval_runtime": 155.448, | |
| "eval_samples_per_second": 36.385, | |
| "eval_steps_per_second": 4.548, | |
| "eval_wer": 0.2900771934329412, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 3.7166085946573753, | |
| "eval_loss": 0.39407432079315186, | |
| "eval_runtime": 155.4696, | |
| "eval_samples_per_second": 36.38, | |
| "eval_steps_per_second": 4.548, | |
| "eval_wer": 0.2884241947649693, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 3.7359659310878826, | |
| "eval_loss": 0.3924821615219116, | |
| "eval_runtime": 155.4791, | |
| "eval_samples_per_second": 36.378, | |
| "eval_steps_per_second": 4.547, | |
| "eval_wer": 0.2890019418722216, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 3.7553232675183894, | |
| "eval_loss": 0.3916691243648529, | |
| "eval_runtime": 155.9516, | |
| "eval_samples_per_second": 36.268, | |
| "eval_steps_per_second": 4.533, | |
| "eval_wer": 0.2892908154258478, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 3.7746806039488967, | |
| "grad_norm": 0.4647356867790222, | |
| "learning_rate": 7.953846153846153e-06, | |
| "loss": 0.2488, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.7746806039488967, | |
| "eval_loss": 0.39043277502059937, | |
| "eval_runtime": 155.3552, | |
| "eval_samples_per_second": 36.407, | |
| "eval_steps_per_second": 4.551, | |
| "eval_wer": 0.2884562918264833, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.794037940379404, | |
| "eval_loss": 0.39014604687690735, | |
| "eval_runtime": 155.2137, | |
| "eval_samples_per_second": 36.44, | |
| "eval_steps_per_second": 4.555, | |
| "eval_wer": 0.2887933109723805, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 3.813395276809911, | |
| "eval_loss": 0.3883425295352936, | |
| "eval_runtime": 155.5369, | |
| "eval_samples_per_second": 36.364, | |
| "eval_steps_per_second": 4.546, | |
| "eval_wer": 0.28922662130281973, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 3.832752613240418, | |
| "eval_loss": 0.38913780450820923, | |
| "eval_runtime": 155.8958, | |
| "eval_samples_per_second": 36.281, | |
| "eval_steps_per_second": 4.535, | |
| "eval_wer": 0.28903403893373564, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 3.8521099496709255, | |
| "eval_loss": 0.3888201415538788, | |
| "eval_runtime": 155.6372, | |
| "eval_samples_per_second": 36.341, | |
| "eval_steps_per_second": 4.543, | |
| "eval_wer": 0.2888254080338945, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 3.8714672861014323, | |
| "grad_norm": 0.3741956055164337, | |
| "learning_rate": 2.615384615384615e-07, | |
| "loss": 0.2602, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.8714672861014323, | |
| "eval_loss": 0.38884833455085754, | |
| "eval_runtime": 155.0772, | |
| "eval_samples_per_second": 36.472, | |
| "eval_steps_per_second": 4.559, | |
| "eval_wer": 0.2884883888879973, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.8714672861014323, | |
| "step": 20000, | |
| "total_flos": 2.249387574100498e+19, | |
| "train_loss": 0.15996522521972656, | |
| "train_runtime": 19346.8732, | |
| "train_samples_per_second": 8.27, | |
| "train_steps_per_second": 1.034 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 400, | |
| "total_flos": 2.249387574100498e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |