| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.1851325035095215, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.5657, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7243107769423559, | |
| "eval_f1": 0.6528788358114521, | |
| "eval_loss": 0.5160595774650574, | |
| "eval_precision": 0.6616220346866901, | |
| "eval_recall": 0.6474358974358975, | |
| "eval_runtime": 5.1403, | |
| "eval_samples_per_second": 77.623, | |
| "eval_steps_per_second": 9.727, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.9789302349090576, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.5088, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7393483709273183, | |
| "eval_f1": 0.6971094890510949, | |
| "eval_loss": 0.49126291275024414, | |
| "eval_precision": 0.6917293233082706, | |
| "eval_recall": 0.7055828332424077, | |
| "eval_runtime": 5.0447, | |
| "eval_samples_per_second": 79.093, | |
| "eval_steps_per_second": 9.911, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 4.5484442710876465, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.4682, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7844611528822055, | |
| "eval_f1": 0.7412841546534773, | |
| "eval_loss": 0.44238051772117615, | |
| "eval_precision": 0.7401260504201681, | |
| "eval_recall": 0.7424986361156574, | |
| "eval_runtime": 5.0582, | |
| "eval_samples_per_second": 78.882, | |
| "eval_steps_per_second": 9.885, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.417043685913086, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4114, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8095238095238095, | |
| "eval_f1": 0.7702309510820149, | |
| "eval_loss": 0.39798638224601746, | |
| "eval_precision": 0.7702309510820149, | |
| "eval_recall": 0.7702309510820149, | |
| "eval_runtime": 5.0628, | |
| "eval_samples_per_second": 78.81, | |
| "eval_steps_per_second": 9.876, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 3.0948269367218018, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.3862, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8145363408521303, | |
| "eval_f1": 0.7889190734915642, | |
| "eval_loss": 0.3889566659927368, | |
| "eval_precision": 0.7782998251748252, | |
| "eval_recall": 0.8087834151663938, | |
| "eval_runtime": 5.0983, | |
| "eval_samples_per_second": 78.262, | |
| "eval_steps_per_second": 9.807, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.5050086975097656, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.3512, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.849624060150376, | |
| "eval_f1": 0.8127815315315315, | |
| "eval_loss": 0.35834890604019165, | |
| "eval_precision": 0.8244897959183674, | |
| "eval_recall": 0.8036006546644845, | |
| "eval_runtime": 5.0754, | |
| "eval_samples_per_second": 78.614, | |
| "eval_steps_per_second": 9.851, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 1.839920163154602, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.3428, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8521303258145363, | |
| "eval_f1": 0.8229427559286084, | |
| "eval_loss": 0.34960630536079407, | |
| "eval_precision": 0.8206541218637993, | |
| "eval_recall": 0.8253773413347881, | |
| "eval_runtime": 5.0545, | |
| "eval_samples_per_second": 78.939, | |
| "eval_steps_per_second": 9.892, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.5413811206817627, | |
| "learning_rate": 3e-05, | |
| "loss": 0.3254, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.849624060150376, | |
| "eval_f1": 0.8127815315315315, | |
| "eval_loss": 0.3425041735172272, | |
| "eval_precision": 0.8244897959183674, | |
| "eval_recall": 0.8036006546644845, | |
| "eval_runtime": 5.1016, | |
| "eval_samples_per_second": 78.21, | |
| "eval_steps_per_second": 9.801, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 6.189133644104004, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3226, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8571428571428571, | |
| "eval_f1": 0.8245369048813042, | |
| "eval_loss": 0.338846355676651, | |
| "eval_precision": 0.8310003145643283, | |
| "eval_recall": 0.8189216221131115, | |
| "eval_runtime": 5.0603, | |
| "eval_samples_per_second": 78.849, | |
| "eval_steps_per_second": 9.881, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 6.058858394622803, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3063, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8646616541353384, | |
| "eval_f1": 0.8315033783783784, | |
| "eval_loss": 0.3375921845436096, | |
| "eval_precision": 0.8438775510204082, | |
| "eval_recall": 0.8217403164211674, | |
| "eval_runtime": 5.0475, | |
| "eval_samples_per_second": 79.049, | |
| "eval_steps_per_second": 9.906, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 4.5873308181762695, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.2939, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8671679197994987, | |
| "eval_f1": 0.8350789627607721, | |
| "eval_loss": 0.33186405897140503, | |
| "eval_precision": 0.8463358876939919, | |
| "eval_recall": 0.8260138206946717, | |
| "eval_runtime": 5.0675, | |
| "eval_samples_per_second": 78.738, | |
| "eval_steps_per_second": 9.867, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 9.944389343261719, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2838, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8546365914786967, | |
| "eval_f1": 0.8228567054500919, | |
| "eval_loss": 0.33234164118766785, | |
| "eval_precision": 0.8263351692555232, | |
| "eval_recall": 0.8196490270958356, | |
| "eval_runtime": 5.0657, | |
| "eval_samples_per_second": 78.765, | |
| "eval_steps_per_second": 9.87, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 3.0918514728546143, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.2916, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8646616541353384, | |
| "eval_f1": 0.8295950648528947, | |
| "eval_loss": 0.3283240497112274, | |
| "eval_precision": 0.8472157618446409, | |
| "eval_recall": 0.816739407164939, | |
| "eval_runtime": 5.0481, | |
| "eval_samples_per_second": 79.039, | |
| "eval_steps_per_second": 9.905, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 8.040557861328125, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2826, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8671679197994987, | |
| "eval_f1": 0.8350789627607721, | |
| "eval_loss": 0.32443901896476746, | |
| "eval_precision": 0.8463358876939919, | |
| "eval_recall": 0.8260138206946717, | |
| "eval_runtime": 5.0572, | |
| "eval_samples_per_second": 78.897, | |
| "eval_steps_per_second": 9.887, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.8433086276054382, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2739, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8696741854636592, | |
| "eval_f1": 0.8411818738518064, | |
| "eval_loss": 0.32310473918914795, | |
| "eval_precision": 0.8448542607834644, | |
| "eval_recall": 0.8377886888525186, | |
| "eval_runtime": 5.047, | |
| "eval_samples_per_second": 79.056, | |
| "eval_steps_per_second": 9.907, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 7.48613166809082, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2674, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8696741854636592, | |
| "eval_f1": 0.8411818738518064, | |
| "eval_loss": 0.322089284658432, | |
| "eval_precision": 0.8448542607834644, | |
| "eval_recall": 0.8377886888525186, | |
| "eval_runtime": 5.0922, | |
| "eval_samples_per_second": 78.356, | |
| "eval_steps_per_second": 9.819, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 6.076572895050049, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.2648, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8721804511278195, | |
| "eval_f1": 0.8413023981282901, | |
| "eval_loss": 0.3192753493785858, | |
| "eval_precision": 0.8527593534677056, | |
| "eval_recall": 0.8320603746135662, | |
| "eval_runtime": 5.0505, | |
| "eval_samples_per_second": 79.002, | |
| "eval_steps_per_second": 9.9, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 3.7677204608917236, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2687, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8696741854636592, | |
| "eval_f1": 0.8403508771929824, | |
| "eval_loss": 0.31721195578575134, | |
| "eval_precision": 0.8460491741741742, | |
| "eval_recall": 0.8352882342244045, | |
| "eval_runtime": 5.0513, | |
| "eval_samples_per_second": 78.99, | |
| "eval_steps_per_second": 9.899, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 3.890503406524658, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.264, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.87468671679198, | |
| "eval_f1": 0.8448388501742161, | |
| "eval_loss": 0.31702518463134766, | |
| "eval_precision": 0.8551721930610677, | |
| "eval_recall": 0.8363338788870704, | |
| "eval_runtime": 5.0604, | |
| "eval_samples_per_second": 78.847, | |
| "eval_steps_per_second": 9.881, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 8.532156944274902, | |
| "learning_rate": 0.0, | |
| "loss": 0.2637, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8696741854636592, | |
| "eval_f1": 0.8395012067578439, | |
| "eval_loss": 0.3147731125354767, | |
| "eval_precision": 0.8473584308763049, | |
| "eval_recall": 0.8327877795962902, | |
| "eval_runtime": 5.0483, | |
| "eval_samples_per_second": 79.036, | |
| "eval_steps_per_second": 9.904, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7609911792720000.0, | |
| "train_loss": 0.33715896919125415, | |
| "train_runtime": 1952.8505, | |
| "train_samples_per_second": 37.258, | |
| "train_steps_per_second": 1.249 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7609911792720000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |