| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.044961452484131, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.5657, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7243107769423559, | |
| "eval_f1": 0.6488125720138267, | |
| "eval_loss": 0.518221914768219, | |
| "eval_precision": 0.660425343073667, | |
| "eval_recall": 0.642434988179669, | |
| "eval_runtime": 5.1317, | |
| "eval_samples_per_second": 77.752, | |
| "eval_steps_per_second": 9.743, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.9827260971069336, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.5109, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7243107769423559, | |
| "eval_f1": 0.6796350364963504, | |
| "eval_loss": 0.5051248073577881, | |
| "eval_precision": 0.674812030075188, | |
| "eval_recall": 0.6874431714857246, | |
| "eval_runtime": 5.0559, | |
| "eval_samples_per_second": 78.917, | |
| "eval_steps_per_second": 9.889, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.8286046981811523, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.48, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7568922305764411, | |
| "eval_f1": 0.6947737005228665, | |
| "eval_loss": 0.4642585515975952, | |
| "eval_precision": 0.704743513567043, | |
| "eval_recall": 0.6879887252227678, | |
| "eval_runtime": 5.0508, | |
| "eval_samples_per_second": 78.997, | |
| "eval_steps_per_second": 9.899, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.501376152038574, | |
| "learning_rate": 4e-05, | |
| "loss": 0.434, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7919799498746867, | |
| "eval_f1": 0.7431297265852239, | |
| "eval_loss": 0.4281364977359772, | |
| "eval_precision": 0.7496659030164186, | |
| "eval_recall": 0.7378159665393708, | |
| "eval_runtime": 5.1744, | |
| "eval_samples_per_second": 77.111, | |
| "eval_steps_per_second": 9.663, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.9612770080566406, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.4106, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7919799498746867, | |
| "eval_f1": 0.761811604105382, | |
| "eval_loss": 0.4194311499595642, | |
| "eval_precision": 0.7527992277992278, | |
| "eval_recall": 0.777823240589198, | |
| "eval_runtime": 5.0693, | |
| "eval_samples_per_second": 78.709, | |
| "eval_steps_per_second": 9.863, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.141845941543579, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.3812, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8295739348370927, | |
| "eval_f1": 0.785416007592534, | |
| "eval_loss": 0.3935754895210266, | |
| "eval_precision": 0.8008173300551531, | |
| "eval_recall": 0.7744135297326786, | |
| "eval_runtime": 5.0513, | |
| "eval_samples_per_second": 78.989, | |
| "eval_steps_per_second": 9.898, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.9107286930084229, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.3689, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8521303258145363, | |
| "eval_f1": 0.8211781685593832, | |
| "eval_loss": 0.37001386284828186, | |
| "eval_precision": 0.8219964664310955, | |
| "eval_recall": 0.8203764320785598, | |
| "eval_runtime": 5.0579, | |
| "eval_samples_per_second": 78.886, | |
| "eval_steps_per_second": 9.885, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 4.303086280822754, | |
| "learning_rate": 3e-05, | |
| "loss": 0.3489, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8345864661654135, | |
| "eval_f1": 0.7905211912943871, | |
| "eval_loss": 0.3656045198440552, | |
| "eval_precision": 0.8087878787878788, | |
| "eval_recall": 0.7779596290234588, | |
| "eval_runtime": 5.1317, | |
| "eval_samples_per_second": 77.752, | |
| "eval_steps_per_second": 9.743, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 4.8105268478393555, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3502, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8370927318295739, | |
| "eval_f1": 0.7954669127215085, | |
| "eval_loss": 0.3640279769897461, | |
| "eval_precision": 0.8101109130520895, | |
| "eval_recall": 0.7847335879250773, | |
| "eval_runtime": 5.0641, | |
| "eval_samples_per_second": 78.79, | |
| "eval_steps_per_second": 9.873, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 7.1581597328186035, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3349, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8345864661654135, | |
| "eval_f1": 0.7917273014868713, | |
| "eval_loss": 0.3607986867427826, | |
| "eval_precision": 0.8074456774536514, | |
| "eval_recall": 0.780460083651573, | |
| "eval_runtime": 5.0646, | |
| "eval_samples_per_second": 78.782, | |
| "eval_steps_per_second": 9.872, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 4.5321431159973145, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.3189, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8395989974937343, | |
| "eval_f1": 0.7991821327461466, | |
| "eval_loss": 0.3574356436729431, | |
| "eval_precision": 0.8127623983206507, | |
| "eval_recall": 0.7890070921985816, | |
| "eval_runtime": 5.0619, | |
| "eval_samples_per_second": 78.824, | |
| "eval_steps_per_second": 9.878, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 10.805797576904297, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3121, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8471177944862155, | |
| "eval_f1": 0.813209415123445, | |
| "eval_loss": 0.3547250032424927, | |
| "eval_precision": 0.8175087108013936, | |
| "eval_recall": 0.809328968903437, | |
| "eval_runtime": 5.0575, | |
| "eval_samples_per_second": 78.893, | |
| "eval_steps_per_second": 9.886, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 4.36875057220459, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.3181, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8521303258145363, | |
| "eval_f1": 0.8121903546212454, | |
| "eval_loss": 0.347785085439682, | |
| "eval_precision": 0.8331751305173232, | |
| "eval_recall": 0.7978723404255319, | |
| "eval_runtime": 5.0847, | |
| "eval_samples_per_second": 78.471, | |
| "eval_steps_per_second": 9.833, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 10.049259185791016, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.3092, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8596491228070176, | |
| "eval_f1": 0.8252627627627628, | |
| "eval_loss": 0.34348130226135254, | |
| "eval_precision": 0.8374149659863945, | |
| "eval_recall": 0.8156937625022731, | |
| "eval_runtime": 5.0603, | |
| "eval_samples_per_second": 78.849, | |
| "eval_steps_per_second": 9.881, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.0126718282699585, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.3018, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8546365914786967, | |
| "eval_f1": 0.8200130662020906, | |
| "eval_loss": 0.34661754965782166, | |
| "eval_precision": 0.8296312892075278, | |
| "eval_recall": 0.812147663211493, | |
| "eval_runtime": 5.0762, | |
| "eval_samples_per_second": 78.603, | |
| "eval_steps_per_second": 9.85, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 7.444075584411621, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2955, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8596491228070176, | |
| "eval_f1": 0.8271551457392166, | |
| "eval_loss": 0.33646759390830994, | |
| "eval_precision": 0.8347358430876305, | |
| "eval_recall": 0.8206946717585015, | |
| "eval_runtime": 5.138, | |
| "eval_samples_per_second": 77.657, | |
| "eval_steps_per_second": 9.731, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 4.367713451385498, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.2917, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8596491228070176, | |
| "eval_f1": 0.8252627627627628, | |
| "eval_loss": 0.33527326583862305, | |
| "eval_precision": 0.8374149659863945, | |
| "eval_recall": 0.8156937625022731, | |
| "eval_runtime": 5.05, | |
| "eval_samples_per_second": 79.01, | |
| "eval_steps_per_second": 9.901, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 3.2525553703308105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2956, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8596491228070176, | |
| "eval_f1": 0.8262195121951219, | |
| "eval_loss": 0.3378874957561493, | |
| "eval_precision": 0.8360165151709128, | |
| "eval_recall": 0.8181942171303873, | |
| "eval_runtime": 5.0525, | |
| "eval_samples_per_second": 78.971, | |
| "eval_steps_per_second": 9.896, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 4.5347208976745605, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2899, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8646616541353384, | |
| "eval_f1": 0.8305599245045612, | |
| "eval_loss": 0.3353268504142761, | |
| "eval_precision": 0.8454801889267909, | |
| "eval_recall": 0.8192398617930533, | |
| "eval_runtime": 5.0467, | |
| "eval_samples_per_second": 79.061, | |
| "eval_steps_per_second": 9.907, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 8.835315704345703, | |
| "learning_rate": 0.0, | |
| "loss": 0.2885, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8621553884711779, | |
| "eval_f1": 0.8288555273932541, | |
| "eval_loss": 0.3355979323387146, | |
| "eval_precision": 0.8399124219202783, | |
| "eval_recall": 0.8199672667757774, | |
| "eval_runtime": 5.0576, | |
| "eval_samples_per_second": 78.891, | |
| "eval_steps_per_second": 9.886, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7597037114448000.0, | |
| "train_loss": 0.3603187435963115, | |
| "train_runtime": 1953.3721, | |
| "train_samples_per_second": 37.248, | |
| "train_steps_per_second": 1.249 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7597037114448000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |