| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 10560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.3427704572677612, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.7018, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9115075027327835, | |
| "eval_f1": 0.5138413685847589, | |
| "eval_loss": 0.33528366684913635, | |
| "eval_precision": 0.5528781793842035, | |
| "eval_recall": 0.4799535153980244, | |
| "eval_runtime": 4.4519, | |
| "eval_samples_per_second": 210.021, | |
| "eval_steps_per_second": 3.369, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.260859727859497, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2639, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9412203120341847, | |
| "eval_f1": 0.7251732101616628, | |
| "eval_loss": 0.1912250965833664, | |
| "eval_precision": 0.6493566176470589, | |
| "eval_recall": 0.821034282393957, | |
| "eval_runtime": 4.5644, | |
| "eval_samples_per_second": 204.846, | |
| "eval_steps_per_second": 3.286, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.1796255111694336, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.1862, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9465865050183842, | |
| "eval_f1": 0.7531402204562931, | |
| "eval_loss": 0.1671685427427292, | |
| "eval_precision": 0.6738532110091743, | |
| "eval_recall": 0.8535735037768739, | |
| "eval_runtime": 4.5056, | |
| "eval_samples_per_second": 207.52, | |
| "eval_steps_per_second": 3.329, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.1285958290100098, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1612, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9539401768856206, | |
| "eval_f1": 0.7823765020026703, | |
| "eval_loss": 0.14461010694503784, | |
| "eval_precision": 0.7238142292490118, | |
| "eval_recall": 0.8512492736780941, | |
| "eval_runtime": 4.5298, | |
| "eval_samples_per_second": 206.41, | |
| "eval_steps_per_second": 3.311, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.1812392473220825, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.1439, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9545364205505317, | |
| "eval_f1": 0.7862656374767102, | |
| "eval_loss": 0.13903872668743134, | |
| "eval_precision": 0.7254420432220039, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.6125, | |
| "eval_samples_per_second": 202.712, | |
| "eval_steps_per_second": 3.252, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.9920209646224976, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.1358, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9551326642154427, | |
| "eval_f1": 0.7892923403127484, | |
| "eval_loss": 0.1392282098531723, | |
| "eval_precision": 0.7256335282651072, | |
| "eval_recall": 0.8651946542707728, | |
| "eval_runtime": 4.8436, | |
| "eval_samples_per_second": 193.037, | |
| "eval_steps_per_second": 3.097, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 2.6399483680725098, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.129, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9561264036569611, | |
| "eval_f1": 0.7918540068764879, | |
| "eval_loss": 0.13837336003780365, | |
| "eval_precision": 0.7266990291262136, | |
| "eval_recall": 0.8698431144683324, | |
| "eval_runtime": 4.5425, | |
| "eval_samples_per_second": 205.834, | |
| "eval_steps_per_second": 3.302, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.901687741279602, | |
| "learning_rate": 3e-05, | |
| "loss": 0.1228, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.957517638875087, | |
| "eval_f1": 0.7969124301304232, | |
| "eval_loss": 0.13390584290027618, | |
| "eval_precision": 0.7352652259332023, | |
| "eval_recall": 0.8698431144683324, | |
| "eval_runtime": 4.5586, | |
| "eval_samples_per_second": 205.105, | |
| "eval_steps_per_second": 3.29, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 1.3437026739120483, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.1168, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9577163867633907, | |
| "eval_f1": 0.7960010807889759, | |
| "eval_loss": 0.13209262490272522, | |
| "eval_precision": 0.7439393939393939, | |
| "eval_recall": 0.8558977338756537, | |
| "eval_runtime": 4.5415, | |
| "eval_samples_per_second": 205.88, | |
| "eval_steps_per_second": 3.303, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.851469039916992, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.1146, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.958113882539998, | |
| "eval_f1": 0.7973009446693656, | |
| "eval_loss": 0.12995323538780212, | |
| "eval_precision": 0.7444556451612904, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.5491, | |
| "eval_samples_per_second": 205.534, | |
| "eval_steps_per_second": 3.297, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 2.413081645965576, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.1105, | |
| "step": 5808 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9571201430984796, | |
| "eval_f1": 0.7947830715996806, | |
| "eval_loss": 0.13270916044712067, | |
| "eval_precision": 0.7333005893909627, | |
| "eval_recall": 0.8675188843695526, | |
| "eval_runtime": 4.536, | |
| "eval_samples_per_second": 206.127, | |
| "eval_steps_per_second": 3.307, | |
| "step": 5808 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 1.1331512928009033, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1083, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9569213952101759, | |
| "eval_f1": 0.794345158708989, | |
| "eval_loss": 0.1333465278148651, | |
| "eval_precision": 0.7342209072978304, | |
| "eval_recall": 0.8651946542707728, | |
| "eval_runtime": 4.5053, | |
| "eval_samples_per_second": 207.533, | |
| "eval_steps_per_second": 3.329, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 1.8731575012207031, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.106, | |
| "step": 6864 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9591076219815164, | |
| "eval_f1": 0.7998916869753587, | |
| "eval_loss": 0.12651574611663818, | |
| "eval_precision": 0.7489858012170385, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.4726, | |
| "eval_samples_per_second": 209.052, | |
| "eval_steps_per_second": 3.354, | |
| "step": 6864 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.8700233697891235, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.1032, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9589088740932128, | |
| "eval_f1": 0.7973009446693656, | |
| "eval_loss": 0.12690864503383636, | |
| "eval_precision": 0.7444556451612904, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.5513, | |
| "eval_samples_per_second": 205.436, | |
| "eval_steps_per_second": 3.296, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.2827842235565186, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.1023, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9585113783166054, | |
| "eval_f1": 0.7998922704012928, | |
| "eval_loss": 0.12912563979625702, | |
| "eval_precision": 0.7454819277108434, | |
| "eval_recall": 0.862870424171993, | |
| "eval_runtime": 4.5324, | |
| "eval_samples_per_second": 206.293, | |
| "eval_steps_per_second": 3.31, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.694359302520752, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1014, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.957517638875087, | |
| "eval_f1": 0.7947269303201507, | |
| "eval_loss": 0.12707427144050598, | |
| "eval_precision": 0.7399799599198397, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.562, | |
| "eval_samples_per_second": 204.955, | |
| "eval_steps_per_second": 3.288, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 1.139172911643982, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.1002, | |
| "step": 8976 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9589088740932128, | |
| "eval_f1": 0.8041789445486203, | |
| "eval_loss": 0.12810933589935303, | |
| "eval_precision": 0.7460238568588469, | |
| "eval_recall": 0.8721673445671121, | |
| "eval_runtime": 4.5254, | |
| "eval_samples_per_second": 206.614, | |
| "eval_steps_per_second": 3.315, | |
| "step": 8976 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 1.6876777410507202, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0986, | |
| "step": 9504 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9573188909867832, | |
| "eval_f1": 0.8016021361815754, | |
| "eval_loss": 0.13038571178913116, | |
| "eval_precision": 0.741600790513834, | |
| "eval_recall": 0.8721673445671121, | |
| "eval_runtime": 4.5245, | |
| "eval_samples_per_second": 206.653, | |
| "eval_steps_per_second": 3.315, | |
| "step": 9504 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.5088372230529785, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0978, | |
| "step": 10032 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9589088740932128, | |
| "eval_f1": 0.8046473925965956, | |
| "eval_loss": 0.1270894557237625, | |
| "eval_precision": 0.752020202020202, | |
| "eval_recall": 0.8651946542707728, | |
| "eval_runtime": 4.5248, | |
| "eval_samples_per_second": 206.64, | |
| "eval_steps_per_second": 3.315, | |
| "step": 10032 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 2.988950490951538, | |
| "learning_rate": 0.0, | |
| "loss": 0.0984, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9579151346516943, | |
| "eval_f1": 0.8007549204637368, | |
| "eval_loss": 0.12812790274620056, | |
| "eval_precision": 0.7469818913480886, | |
| "eval_recall": 0.862870424171993, | |
| "eval_runtime": 4.5277, | |
| "eval_samples_per_second": 206.507, | |
| "eval_steps_per_second": 3.313, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 10560, | |
| "total_flos": 4552961808488766.0, | |
| "train_loss": 0.1551312410470211, | |
| "train_runtime": 1231.1625, | |
| "train_samples_per_second": 137.057, | |
| "train_steps_per_second": 8.577 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 10560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 4552961808488766.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |