| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 100, | |
| "global_step": 5300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 4.437798503453146, | |
| "learning_rate": 7.341400415124174e-07, | |
| "loss": 5.3755, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "eval_loss": 1.0850828886032104, | |
| "eval_runtime": 149.7427, | |
| "eval_samples_per_second": 452.857, | |
| "eval_steps_per_second": 0.888, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 1.6459101244560304, | |
| "learning_rate": 8.446391282690362e-07, | |
| "loss": 0.9681, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "eval_loss": 0.9421371817588806, | |
| "eval_runtime": 149.748, | |
| "eval_samples_per_second": 452.841, | |
| "eval_steps_per_second": 0.888, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 4.060133963944513, | |
| "learning_rate": 9.09276950385592e-07, | |
| "loss": 0.9538, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "eval_loss": 0.9655478596687317, | |
| "eval_runtime": 149.5973, | |
| "eval_samples_per_second": 453.297, | |
| "eval_steps_per_second": 0.889, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 41.89172794379802, | |
| "learning_rate": 9.551382150256551e-07, | |
| "loss": 0.9761, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "eval_loss": 0.976280152797699, | |
| "eval_runtime": 149.6311, | |
| "eval_samples_per_second": 453.195, | |
| "eval_steps_per_second": 0.889, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 70.58972777939259, | |
| "learning_rate": 9.907109755120069e-07, | |
| "loss": 0.9786, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "eval_loss": 0.9799396991729736, | |
| "eval_runtime": 149.6428, | |
| "eval_samples_per_second": 453.159, | |
| "eval_steps_per_second": 0.889, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1320754716981132, | |
| "grad_norm": 154.52662720587995, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9851, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1320754716981132, | |
| "eval_loss": 0.9846892952919006, | |
| "eval_runtime": 150.8433, | |
| "eval_samples_per_second": 449.553, | |
| "eval_steps_per_second": 0.882, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.320754716981132, | |
| "grad_norm": 147.98867672055184, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9876, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.320754716981132, | |
| "eval_loss": 0.9889557957649231, | |
| "eval_runtime": 149.6471, | |
| "eval_samples_per_second": 453.146, | |
| "eval_steps_per_second": 0.889, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.509433962264151, | |
| "grad_norm": 93.50922205523368, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9892, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.509433962264151, | |
| "eval_loss": 0.9961430430412292, | |
| "eval_runtime": 149.7573, | |
| "eval_samples_per_second": 452.813, | |
| "eval_steps_per_second": 0.888, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6981132075471699, | |
| "grad_norm": 53.06237347231676, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9983, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6981132075471699, | |
| "eval_loss": 1.0119000673294067, | |
| "eval_runtime": 149.7765, | |
| "eval_samples_per_second": 452.755, | |
| "eval_steps_per_second": 0.888, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "grad_norm": 65.28104430915417, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0228, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "eval_loss": 1.027756690979004, | |
| "eval_runtime": 149.4845, | |
| "eval_samples_per_second": 453.639, | |
| "eval_steps_per_second": 0.89, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0754716981132075, | |
| "grad_norm": 84.50132037152585, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0433, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.0754716981132075, | |
| "eval_loss": 1.059239149093628, | |
| "eval_runtime": 149.7269, | |
| "eval_samples_per_second": 452.905, | |
| "eval_steps_per_second": 0.888, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.2641509433962264, | |
| "grad_norm": 29.00236133301261, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0644, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2641509433962264, | |
| "eval_loss": 1.0800738334655762, | |
| "eval_runtime": 149.9162, | |
| "eval_samples_per_second": 452.333, | |
| "eval_steps_per_second": 0.887, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.452830188679245, | |
| "grad_norm": 411.7064146634104, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0917, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.452830188679245, | |
| "eval_loss": 1.1134124994277954, | |
| "eval_runtime": 149.7734, | |
| "eval_samples_per_second": 452.764, | |
| "eval_steps_per_second": 0.888, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.641509433962264, | |
| "grad_norm": 229.88146571929195, | |
| "learning_rate": 1e-06, | |
| "loss": 1.1147, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.641509433962264, | |
| "eval_loss": 1.1160598993301392, | |
| "eval_runtime": 149.7257, | |
| "eval_samples_per_second": 452.908, | |
| "eval_steps_per_second": 0.888, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.830188679245283, | |
| "grad_norm": 7.414451982821549, | |
| "learning_rate": 1e-06, | |
| "loss": 1.1581, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.830188679245283, | |
| "eval_loss": 1.2751212120056152, | |
| "eval_runtime": 149.6311, | |
| "eval_samples_per_second": 453.195, | |
| "eval_steps_per_second": 0.889, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.018867924528302, | |
| "grad_norm": 51.75196724068806, | |
| "learning_rate": 1e-06, | |
| "loss": 1.4543, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.018867924528302, | |
| "eval_loss": 1.5799795389175415, | |
| "eval_runtime": 149.5252, | |
| "eval_samples_per_second": 453.516, | |
| "eval_steps_per_second": 0.889, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.207547169811321, | |
| "grad_norm": 8.197678623516119, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6332, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.207547169811321, | |
| "eval_loss": 1.7048678398132324, | |
| "eval_runtime": 149.7414, | |
| "eval_samples_per_second": 452.861, | |
| "eval_steps_per_second": 0.888, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.3962264150943398, | |
| "grad_norm": 9.609718378646408, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7915, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.3962264150943398, | |
| "eval_loss": 1.8891370296478271, | |
| "eval_runtime": 149.8519, | |
| "eval_samples_per_second": 452.527, | |
| "eval_steps_per_second": 0.888, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.5849056603773586, | |
| "grad_norm": 87.68727169428583, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9812, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.5849056603773586, | |
| "eval_loss": 2.057527780532837, | |
| "eval_runtime": 149.7699, | |
| "eval_samples_per_second": 452.774, | |
| "eval_steps_per_second": 0.888, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.7735849056603774, | |
| "grad_norm": 12.995456781993322, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0982, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.7735849056603774, | |
| "eval_loss": 2.1332101821899414, | |
| "eval_runtime": 149.6771, | |
| "eval_samples_per_second": 453.055, | |
| "eval_steps_per_second": 0.889, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.9622641509433962, | |
| "grad_norm": 13.605685201407708, | |
| "learning_rate": 1e-06, | |
| "loss": 2.1784, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.9622641509433962, | |
| "eval_loss": 2.2318856716156006, | |
| "eval_runtime": 149.8102, | |
| "eval_samples_per_second": 452.653, | |
| "eval_steps_per_second": 0.888, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.150943396226415, | |
| "grad_norm": 12.732750088048045, | |
| "learning_rate": 1e-06, | |
| "loss": 2.2661, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.150943396226415, | |
| "eval_loss": 2.3092291355133057, | |
| "eval_runtime": 149.6232, | |
| "eval_samples_per_second": 453.219, | |
| "eval_steps_per_second": 0.889, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.339622641509434, | |
| "grad_norm": 13.451602224902139, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3259, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.339622641509434, | |
| "eval_loss": 2.3665931224823, | |
| "eval_runtime": 149.8265, | |
| "eval_samples_per_second": 452.604, | |
| "eval_steps_per_second": 0.888, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.528301886792453, | |
| "grad_norm": 13.331046925646502, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3908, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.528301886792453, | |
| "eval_loss": 2.4305644035339355, | |
| "eval_runtime": 149.9235, | |
| "eval_samples_per_second": 452.311, | |
| "eval_steps_per_second": 0.887, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.716981132075472, | |
| "grad_norm": 17.21909294548451, | |
| "learning_rate": 1e-06, | |
| "loss": 2.4521, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.716981132075472, | |
| "eval_loss": 2.545186758041382, | |
| "eval_runtime": 149.7293, | |
| "eval_samples_per_second": 452.897, | |
| "eval_steps_per_second": 0.888, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.90566037735849, | |
| "grad_norm": 16.704993975768705, | |
| "learning_rate": 1e-06, | |
| "loss": 2.5541, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.90566037735849, | |
| "eval_loss": 2.6449944972991943, | |
| "eval_runtime": 149.6085, | |
| "eval_samples_per_second": 453.263, | |
| "eval_steps_per_second": 0.889, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.09433962264151, | |
| "grad_norm": 17.23044971023092, | |
| "learning_rate": 1e-06, | |
| "loss": 2.7022, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.09433962264151, | |
| "eval_loss": 2.7793149948120117, | |
| "eval_runtime": 149.7512, | |
| "eval_samples_per_second": 452.831, | |
| "eval_steps_per_second": 0.888, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.283018867924528, | |
| "grad_norm": 17.47208816215434, | |
| "learning_rate": 1e-06, | |
| "loss": 2.8302, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.283018867924528, | |
| "eval_loss": 2.888209581375122, | |
| "eval_runtime": 149.8872, | |
| "eval_samples_per_second": 452.42, | |
| "eval_steps_per_second": 0.887, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.471698113207547, | |
| "grad_norm": 17.705909664870283, | |
| "learning_rate": 1e-06, | |
| "loss": 2.9344, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.471698113207547, | |
| "eval_loss": 2.9951322078704834, | |
| "eval_runtime": 149.9777, | |
| "eval_samples_per_second": 452.147, | |
| "eval_steps_per_second": 0.887, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.660377358490566, | |
| "grad_norm": 19.101859660265248, | |
| "learning_rate": 1e-06, | |
| "loss": 3.0392, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.660377358490566, | |
| "eval_loss": 3.0882513523101807, | |
| "eval_runtime": 149.9153, | |
| "eval_samples_per_second": 452.335, | |
| "eval_steps_per_second": 0.887, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.849056603773585, | |
| "grad_norm": 19.07389675964658, | |
| "learning_rate": 1e-06, | |
| "loss": 3.0997, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.849056603773585, | |
| "eval_loss": 3.1218082904815674, | |
| "eval_runtime": 149.7905, | |
| "eval_samples_per_second": 452.712, | |
| "eval_steps_per_second": 0.888, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.037735849056604, | |
| "grad_norm": 18.92461175759707, | |
| "learning_rate": 1e-06, | |
| "loss": 3.1321, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.037735849056604, | |
| "eval_loss": 3.153890609741211, | |
| "eval_runtime": 149.8617, | |
| "eval_samples_per_second": 452.497, | |
| "eval_steps_per_second": 0.887, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.226415094339623, | |
| "grad_norm": 20.348062318970786, | |
| "learning_rate": 1e-06, | |
| "loss": 3.1567, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.226415094339623, | |
| "eval_loss": 3.180058240890503, | |
| "eval_runtime": 149.6522, | |
| "eval_samples_per_second": 453.131, | |
| "eval_steps_per_second": 0.889, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.415094339622642, | |
| "grad_norm": 19.380403417398043, | |
| "learning_rate": 1e-06, | |
| "loss": 3.1885, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.415094339622642, | |
| "eval_loss": 3.2085678577423096, | |
| "eval_runtime": 149.9193, | |
| "eval_samples_per_second": 452.323, | |
| "eval_steps_per_second": 0.887, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.60377358490566, | |
| "grad_norm": 19.172157540450666, | |
| "learning_rate": 1e-06, | |
| "loss": 3.2066, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.60377358490566, | |
| "eval_loss": 3.2430124282836914, | |
| "eval_runtime": 149.5804, | |
| "eval_samples_per_second": 453.348, | |
| "eval_steps_per_second": 0.889, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.7924528301886795, | |
| "grad_norm": 20.834808216022473, | |
| "learning_rate": 1e-06, | |
| "loss": 3.2529, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.7924528301886795, | |
| "eval_loss": 3.2701263427734375, | |
| "eval_runtime": 149.8287, | |
| "eval_samples_per_second": 452.597, | |
| "eval_steps_per_second": 0.888, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.981132075471698, | |
| "grad_norm": 20.71072973537811, | |
| "learning_rate": 1e-06, | |
| "loss": 3.2788, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.981132075471698, | |
| "eval_loss": 3.3030686378479004, | |
| "eval_runtime": 149.8206, | |
| "eval_samples_per_second": 452.621, | |
| "eval_steps_per_second": 0.888, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 7.169811320754717, | |
| "grad_norm": 20.550695492004113, | |
| "learning_rate": 1e-06, | |
| "loss": 3.3094, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 7.169811320754717, | |
| "eval_loss": 3.3228628635406494, | |
| "eval_runtime": 149.8243, | |
| "eval_samples_per_second": 452.61, | |
| "eval_steps_per_second": 0.888, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 7.3584905660377355, | |
| "grad_norm": 22.750882645137363, | |
| "learning_rate": 1e-06, | |
| "loss": 3.3288, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 7.3584905660377355, | |
| "eval_loss": 3.355827808380127, | |
| "eval_runtime": 149.6529, | |
| "eval_samples_per_second": 453.129, | |
| "eval_steps_per_second": 0.889, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 7.547169811320755, | |
| "grad_norm": 21.426519401026805, | |
| "learning_rate": 1e-06, | |
| "loss": 3.3727, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.547169811320755, | |
| "eval_loss": 3.3865537643432617, | |
| "eval_runtime": 150.2322, | |
| "eval_samples_per_second": 451.381, | |
| "eval_steps_per_second": 0.885, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.735849056603773, | |
| "grad_norm": 21.858010980941717, | |
| "learning_rate": 1e-06, | |
| "loss": 3.3938, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.735849056603773, | |
| "eval_loss": 3.4126923084259033, | |
| "eval_runtime": 149.7102, | |
| "eval_samples_per_second": 452.955, | |
| "eval_steps_per_second": 0.888, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.9245283018867925, | |
| "grad_norm": 21.32876029273619, | |
| "learning_rate": 1e-06, | |
| "loss": 3.4334, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 7.9245283018867925, | |
| "eval_loss": 3.4582273960113525, | |
| "eval_runtime": 149.6771, | |
| "eval_samples_per_second": 453.055, | |
| "eval_steps_per_second": 0.889, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 8.11320754716981, | |
| "grad_norm": 23.89601752008716, | |
| "learning_rate": 1e-06, | |
| "loss": 3.4769, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 8.11320754716981, | |
| "eval_loss": 3.505073308944702, | |
| "eval_runtime": 149.8114, | |
| "eval_samples_per_second": 452.649, | |
| "eval_steps_per_second": 0.888, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 8.30188679245283, | |
| "grad_norm": 22.427822055394444, | |
| "learning_rate": 1e-06, | |
| "loss": 3.5019, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 8.30188679245283, | |
| "eval_loss": 3.5005218982696533, | |
| "eval_runtime": 149.7083, | |
| "eval_samples_per_second": 452.961, | |
| "eval_steps_per_second": 0.888, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 8.49056603773585, | |
| "grad_norm": 20.122035975196678, | |
| "learning_rate": 1e-06, | |
| "loss": 3.4841, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 8.49056603773585, | |
| "eval_loss": 3.4907851219177246, | |
| "eval_runtime": 149.5966, | |
| "eval_samples_per_second": 453.299, | |
| "eval_steps_per_second": 0.889, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 8.679245283018869, | |
| "grad_norm": 7.060014135330357, | |
| "learning_rate": 1e-06, | |
| "loss": 3.4805, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.679245283018869, | |
| "eval_loss": 3.2945797443389893, | |
| "eval_runtime": 149.8934, | |
| "eval_samples_per_second": 452.402, | |
| "eval_steps_per_second": 0.887, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.867924528301886, | |
| "grad_norm": 17.493433128940666, | |
| "learning_rate": 1e-06, | |
| "loss": 3.332, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 8.867924528301886, | |
| "eval_loss": 3.3459222316741943, | |
| "eval_runtime": 149.8771, | |
| "eval_samples_per_second": 452.451, | |
| "eval_steps_per_second": 0.887, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 9.056603773584905, | |
| "grad_norm": 24.996170360974173, | |
| "learning_rate": 1e-06, | |
| "loss": 3.3797, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 9.056603773584905, | |
| "eval_loss": 3.4055655002593994, | |
| "eval_runtime": 149.7316, | |
| "eval_samples_per_second": 452.89, | |
| "eval_steps_per_second": 0.888, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 9.245283018867925, | |
| "grad_norm": 215.6769864867367, | |
| "learning_rate": 1e-06, | |
| "loss": 3.4551, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 9.245283018867925, | |
| "eval_loss": 3.5277042388916016, | |
| "eval_runtime": 149.7551, | |
| "eval_samples_per_second": 452.819, | |
| "eval_steps_per_second": 0.888, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 9.433962264150944, | |
| "grad_norm": 329.1368446406449, | |
| "learning_rate": 1e-06, | |
| "loss": 3.4127, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 9.433962264150944, | |
| "eval_loss": 3.399594783782959, | |
| "eval_runtime": 150.0027, | |
| "eval_samples_per_second": 452.072, | |
| "eval_steps_per_second": 0.887, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 9.622641509433961, | |
| "grad_norm": 189.47715891368014, | |
| "learning_rate": 1e-06, | |
| "loss": 3.5646, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 9.622641509433961, | |
| "eval_loss": 3.6223909854888916, | |
| "eval_runtime": 149.6317, | |
| "eval_samples_per_second": 453.193, | |
| "eval_steps_per_second": 0.889, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 9.81132075471698, | |
| "grad_norm": 154.57704389721005, | |
| "learning_rate": 1e-06, | |
| "loss": 3.6037, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 9.81132075471698, | |
| "eval_loss": 3.698052406311035, | |
| "eval_runtime": 149.5975, | |
| "eval_samples_per_second": 453.296, | |
| "eval_steps_per_second": 0.889, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 201.5213039296342, | |
| "learning_rate": 1e-06, | |
| "loss": 3.698, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 3.721698522567749, | |
| "eval_runtime": 149.7798, | |
| "eval_samples_per_second": 452.745, | |
| "eval_steps_per_second": 0.888, | |
| "step": 5300 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1664568262656000.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |