| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 10560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1915496587753296, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.7886, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.859733677829673, | |
| "eval_f1": 0.13302540415704386, | |
| "eval_loss": 0.4607163071632385, | |
| "eval_precision": 0.32432432432432434, | |
| "eval_recall": 0.08367228355607205, | |
| "eval_runtime": 4.8194, | |
| "eval_samples_per_second": 194.007, | |
| "eval_steps_per_second": 3.112, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.8614588975906372, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.3911, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9292954387359634, | |
| "eval_f1": 0.6470908102229471, | |
| "eval_loss": 0.254240483045578, | |
| "eval_precision": 0.6080735820132857, | |
| "eval_recall": 0.6914584543869843, | |
| "eval_runtime": 4.6665, | |
| "eval_samples_per_second": 200.364, | |
| "eval_steps_per_second": 3.214, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.2399094104766846, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.2384, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9376428500447183, | |
| "eval_f1": 0.7163083377031987, | |
| "eval_loss": 0.19337689876556396, | |
| "eval_precision": 0.652651696129957, | |
| "eval_recall": 0.7937245787332946, | |
| "eval_runtime": 4.7228, | |
| "eval_samples_per_second": 197.975, | |
| "eval_steps_per_second": 3.176, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.5993666648864746, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1934, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9445990261353473, | |
| "eval_f1": 0.7476784292915892, | |
| "eval_loss": 0.1678435504436493, | |
| "eval_precision": 0.68798828125, | |
| "eval_recall": 0.8187100522951772, | |
| "eval_runtime": 4.575, | |
| "eval_samples_per_second": 204.372, | |
| "eval_steps_per_second": 3.279, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.2465081214904785, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.172, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9467852529066879, | |
| "eval_f1": 0.7566290364925177, | |
| "eval_loss": 0.1589244157075882, | |
| "eval_precision": 0.6901340996168582, | |
| "eval_recall": 0.8373038930854154, | |
| "eval_runtime": 4.593, | |
| "eval_samples_per_second": 203.572, | |
| "eval_steps_per_second": 3.266, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.3054755926132202, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.1602, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9487727317897248, | |
| "eval_f1": 0.7631235309480282, | |
| "eval_loss": 0.15331855416297913, | |
| "eval_precision": 0.6930740037950665, | |
| "eval_recall": 0.8489250435793143, | |
| "eval_runtime": 4.5844, | |
| "eval_samples_per_second": 203.951, | |
| "eval_steps_per_second": 3.272, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 3.6129133701324463, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.1532, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9497664712312431, | |
| "eval_f1": 0.7661898569570872, | |
| "eval_loss": 0.15049894154071808, | |
| "eval_precision": 0.693502824858757, | |
| "eval_recall": 0.8558977338756537, | |
| "eval_runtime": 4.6147, | |
| "eval_samples_per_second": 202.613, | |
| "eval_steps_per_second": 3.25, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.5060432553291321, | |
| "learning_rate": 3e-05, | |
| "loss": 0.1457, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9521514458908874, | |
| "eval_f1": 0.7754024808656638, | |
| "eval_loss": 0.14558807015419006, | |
| "eval_precision": 0.710348162475822, | |
| "eval_recall": 0.8535735037768739, | |
| "eval_runtime": 4.5383, | |
| "eval_samples_per_second": 206.026, | |
| "eval_steps_per_second": 3.305, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 1.1229195594787598, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.1401, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9543376726622279, | |
| "eval_f1": 0.7870345566568443, | |
| "eval_loss": 0.1418333500623703, | |
| "eval_precision": 0.7301192842942346, | |
| "eval_recall": 0.8535735037768739, | |
| "eval_runtime": 4.5595, | |
| "eval_samples_per_second": 205.064, | |
| "eval_steps_per_second": 3.29, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.4011176824569702, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.1375, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9550829772433668, | |
| "eval_f1": 0.7894174238375201, | |
| "eval_loss": 0.13877230882644653, | |
| "eval_precision": 0.7308263236021771, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.5606, | |
| "eval_samples_per_second": 205.015, | |
| "eval_steps_per_second": 3.289, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 1.9114675521850586, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.1331, | |
| "step": 5808 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9554804730199742, | |
| "eval_f1": 0.7894174238375201, | |
| "eval_loss": 0.1359723061323166, | |
| "eval_precision": 0.7308263236021771, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.6464, | |
| "eval_samples_per_second": 201.233, | |
| "eval_steps_per_second": 3.228, | |
| "step": 5808 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 1.272605061531067, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1304, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.954933916327139, | |
| "eval_f1": 0.784512683578104, | |
| "eval_loss": 0.13654367625713348, | |
| "eval_precision": 0.7257905138339921, | |
| "eval_recall": 0.8535735037768739, | |
| "eval_runtime": 4.5658, | |
| "eval_samples_per_second": 204.783, | |
| "eval_steps_per_second": 3.285, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 1.4920827150344849, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.1285, | |
| "step": 6864 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9558779687965815, | |
| "eval_f1": 0.7906098219104155, | |
| "eval_loss": 0.13434641063213348, | |
| "eval_precision": 0.7380352644836272, | |
| "eval_recall": 0.8512492736780941, | |
| "eval_runtime": 4.5768, | |
| "eval_samples_per_second": 204.289, | |
| "eval_steps_per_second": 3.277, | |
| "step": 6864 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 1.0049793720245361, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.1255, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9558779687965815, | |
| "eval_f1": 0.7958087049973133, | |
| "eval_loss": 0.13445836305618286, | |
| "eval_precision": 0.7401299350324838, | |
| "eval_recall": 0.8605461940732132, | |
| "eval_runtime": 4.694, | |
| "eval_samples_per_second": 199.19, | |
| "eval_steps_per_second": 3.196, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.0454351902008057, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.1249, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.954933916327139, | |
| "eval_f1": 0.7917669072440523, | |
| "eval_loss": 0.13459959626197815, | |
| "eval_precision": 0.7331683168316832, | |
| "eval_recall": 0.8605461940732132, | |
| "eval_runtime": 4.5284, | |
| "eval_samples_per_second": 206.472, | |
| "eval_steps_per_second": 3.312, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.3529750108718872, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1238, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9551326642154427, | |
| "eval_f1": 0.7883328873427883, | |
| "eval_loss": 0.13417887687683105, | |
| "eval_precision": 0.7306547619047619, | |
| "eval_recall": 0.8558977338756537, | |
| "eval_runtime": 4.5755, | |
| "eval_samples_per_second": 204.351, | |
| "eval_steps_per_second": 3.278, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.9557709693908691, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.1232, | |
| "step": 8976 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9557289078803538, | |
| "eval_f1": 0.7904736419587904, | |
| "eval_loss": 0.13424266874790192, | |
| "eval_precision": 0.7326388888888888, | |
| "eval_recall": 0.8582219639744335, | |
| "eval_runtime": 4.5649, | |
| "eval_samples_per_second": 204.825, | |
| "eval_steps_per_second": 3.286, | |
| "step": 8976 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 1.4014308452606201, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1215, | |
| "step": 9504 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.954933916327139, | |
| "eval_f1": 0.7909212283044059, | |
| "eval_loss": 0.13506156206130981, | |
| "eval_precision": 0.7317193675889329, | |
| "eval_recall": 0.8605461940732132, | |
| "eval_runtime": 4.5303, | |
| "eval_samples_per_second": 206.389, | |
| "eval_steps_per_second": 3.311, | |
| "step": 9504 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.8569299578666687, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.1209, | |
| "step": 10032 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9547351684388353, | |
| "eval_f1": 0.786648865153538, | |
| "eval_loss": 0.1337263584136963, | |
| "eval_precision": 0.7277667984189723, | |
| "eval_recall": 0.8558977338756537, | |
| "eval_runtime": 4.5121, | |
| "eval_samples_per_second": 207.22, | |
| "eval_steps_per_second": 3.324, | |
| "step": 10032 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.4876703023910522, | |
| "learning_rate": 0.0, | |
| "loss": 0.1207, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.95553015999205, | |
| "eval_f1": 0.7861921327267862, | |
| "eval_loss": 0.133191779255867, | |
| "eval_precision": 0.7286706349206349, | |
| "eval_recall": 0.8535735037768739, | |
| "eval_runtime": 4.5648, | |
| "eval_samples_per_second": 204.83, | |
| "eval_steps_per_second": 3.286, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 10560, | |
| "total_flos": 4541164131293502.0, | |
| "train_loss": 0.18862926418131049, | |
| "train_runtime": 1259.7518, | |
| "train_samples_per_second": 133.947, | |
| "train_steps_per_second": 8.383 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 10560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 4541164131293502.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |