| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.089293003082275, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.566, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7167919799498746, | |
| "eval_f1": 0.6444270944235455, | |
| "eval_loss": 0.5211467742919922, | |
| "eval_precision": 0.6520598138245197, | |
| "eval_recall": 0.639616293871613, | |
| "eval_runtime": 5.162, | |
| "eval_samples_per_second": 77.296, | |
| "eval_steps_per_second": 9.686, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 4.061326026916504, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.5148, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7243107769423559, | |
| "eval_f1": 0.6849698535745048, | |
| "eval_loss": 0.516937255859375, | |
| "eval_precision": 0.6790780141843972, | |
| "eval_recall": 0.6974449899981815, | |
| "eval_runtime": 5.0571, | |
| "eval_samples_per_second": 78.899, | |
| "eval_steps_per_second": 9.887, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 4.013665199279785, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.4927, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7543859649122807, | |
| "eval_f1": 0.6942098348348349, | |
| "eval_loss": 0.48614969849586487, | |
| "eval_precision": 0.7017006802721089, | |
| "eval_recall": 0.6887161302054918, | |
| "eval_runtime": 5.055, | |
| "eval_samples_per_second": 78.931, | |
| "eval_steps_per_second": 9.891, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.659437417984009, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4627, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_f1": 0.7090966301102831, | |
| "eval_loss": 0.4655974805355072, | |
| "eval_precision": 0.7119991289198606, | |
| "eval_recall": 0.7065375522822331, | |
| "eval_runtime": 5.1983, | |
| "eval_samples_per_second": 76.756, | |
| "eval_steps_per_second": 9.619, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.7017691135406494, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.4504, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7543859649122807, | |
| "eval_f1": 0.7193367786391043, | |
| "eval_loss": 0.4610688388347626, | |
| "eval_precision": 0.7119825169058223, | |
| "eval_recall": 0.7337243135115475, | |
| "eval_runtime": 5.06, | |
| "eval_samples_per_second": 78.853, | |
| "eval_steps_per_second": 9.881, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 4.450038909912109, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.4276, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7894736842105263, | |
| "eval_f1": 0.743447642375995, | |
| "eval_loss": 0.4303344488143921, | |
| "eval_precision": 0.7460857726344452, | |
| "eval_recall": 0.7410438261502091, | |
| "eval_runtime": 5.0829, | |
| "eval_samples_per_second": 78.498, | |
| "eval_steps_per_second": 9.837, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.093283176422119, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.4176, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7944862155388471, | |
| "eval_f1": 0.7533174497858737, | |
| "eval_loss": 0.4162527918815613, | |
| "eval_precision": 0.7521008403361344, | |
| "eval_recall": 0.7545917439534461, | |
| "eval_runtime": 5.0538, | |
| "eval_samples_per_second": 78.95, | |
| "eval_steps_per_second": 9.894, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 4.816234588623047, | |
| "learning_rate": 3e-05, | |
| "loss": 0.397, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8170426065162907, | |
| "eval_f1": 0.7740779522978476, | |
| "eval_loss": 0.3960316479206085, | |
| "eval_precision": 0.7814051164566629, | |
| "eval_recall": 0.7680487361338425, | |
| "eval_runtime": 5.0577, | |
| "eval_samples_per_second": 78.889, | |
| "eval_steps_per_second": 9.886, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 5.539109706878662, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3904, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8270676691729323, | |
| "eval_f1": 0.7828802611966783, | |
| "eval_loss": 0.39395618438720703, | |
| "eval_precision": 0.7969399881164587, | |
| "eval_recall": 0.7726404800872886, | |
| "eval_runtime": 5.0627, | |
| "eval_samples_per_second": 78.812, | |
| "eval_steps_per_second": 9.876, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 5.052578449249268, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3743, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8270676691729323, | |
| "eval_f1": 0.7803582113367107, | |
| "eval_loss": 0.3900292217731476, | |
| "eval_precision": 0.7993592785951591, | |
| "eval_recall": 0.7676395708310602, | |
| "eval_runtime": 5.0505, | |
| "eval_samples_per_second": 79.002, | |
| "eval_steps_per_second": 9.9, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 3.9592137336730957, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.3632, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8345864661654135, | |
| "eval_f1": 0.7929065743944637, | |
| "eval_loss": 0.38483926653862, | |
| "eval_precision": 0.8062188401994228, | |
| "eval_recall": 0.7829605382796871, | |
| "eval_runtime": 5.0602, | |
| "eval_samples_per_second": 78.851, | |
| "eval_steps_per_second": 9.881, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 13.379903793334961, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3599, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8270676691729323, | |
| "eval_f1": 0.784099374985296, | |
| "eval_loss": 0.37946680188179016, | |
| "eval_precision": 0.7958930899608865, | |
| "eval_recall": 0.7751409347154028, | |
| "eval_runtime": 5.1036, | |
| "eval_samples_per_second": 78.18, | |
| "eval_steps_per_second": 9.797, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 4.531927108764648, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.3597, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8345864661654135, | |
| "eval_f1": 0.786734693877551, | |
| "eval_loss": 0.37652111053466797, | |
| "eval_precision": 0.8135673624288424, | |
| "eval_recall": 0.7704582651391162, | |
| "eval_runtime": 5.0432, | |
| "eval_samples_per_second": 79.117, | |
| "eval_steps_per_second": 9.914, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 6.193031311035156, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.3461, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8320802005012531, | |
| "eval_f1": 0.7867246399936176, | |
| "eval_loss": 0.3729116916656494, | |
| "eval_precision": 0.8061224489795917, | |
| "eval_recall": 0.7736861247499545, | |
| "eval_runtime": 5.0678, | |
| "eval_samples_per_second": 78.732, | |
| "eval_steps_per_second": 9.866, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.2145860195159912, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.3432, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8370927318295739, | |
| "eval_f1": 0.7954669127215085, | |
| "eval_loss": 0.37140053510665894, | |
| "eval_precision": 0.8101109130520895, | |
| "eval_recall": 0.7847335879250773, | |
| "eval_runtime": 5.045, | |
| "eval_samples_per_second": 79.088, | |
| "eval_steps_per_second": 9.911, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 6.050211429595947, | |
| "learning_rate": 1e-05, | |
| "loss": 0.333, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8421052631578947, | |
| "eval_f1": 0.8006218422075398, | |
| "eval_loss": 0.3706248998641968, | |
| "eval_precision": 0.8181103678929766, | |
| "eval_recall": 0.7882796872158575, | |
| "eval_runtime": 5.0703, | |
| "eval_samples_per_second": 78.694, | |
| "eval_steps_per_second": 9.861, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 2.7747299671173096, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.3323, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8395989974937343, | |
| "eval_f1": 0.796869033982436, | |
| "eval_loss": 0.3699536621570587, | |
| "eval_precision": 0.8155050505050505, | |
| "eval_recall": 0.7840061829423532, | |
| "eval_runtime": 5.0529, | |
| "eval_samples_per_second": 78.964, | |
| "eval_steps_per_second": 9.895, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 2.8045554161071777, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3337, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8395989974937343, | |
| "eval_f1": 0.7980385953812085, | |
| "eval_loss": 0.3686981499195099, | |
| "eval_precision": 0.8140740248521496, | |
| "eval_recall": 0.7865066375704673, | |
| "eval_runtime": 5.0589, | |
| "eval_samples_per_second": 78.87, | |
| "eval_steps_per_second": 9.883, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 4.2901482582092285, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.3298, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8395989974937343, | |
| "eval_f1": 0.7980385953812085, | |
| "eval_loss": 0.36837807297706604, | |
| "eval_precision": 0.8140740248521496, | |
| "eval_recall": 0.7865066375704673, | |
| "eval_runtime": 5.0488, | |
| "eval_samples_per_second": 79.029, | |
| "eval_steps_per_second": 9.903, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 5.859314441680908, | |
| "learning_rate": 0.0, | |
| "loss": 0.3309, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8395989974937343, | |
| "eval_f1": 0.7980385953812085, | |
| "eval_loss": 0.368120402097702, | |
| "eval_precision": 0.8140740248521496, | |
| "eval_recall": 0.7865066375704673, | |
| "eval_runtime": 5.1179, | |
| "eval_samples_per_second": 77.961, | |
| "eval_steps_per_second": 9.77, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7590599775312000.0, | |
| "train_loss": 0.39626741878321914, | |
| "train_runtime": 1951.4944, | |
| "train_samples_per_second": 37.284, | |
| "train_steps_per_second": 1.25 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7590599775312000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |