| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 112, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17937219730941703, | |
| "grad_norm": 1.5203849077224731, | |
| "learning_rate": 4.984280524733107e-05, | |
| "loss": 3.5214, | |
| "num_input_tokens_seen": 111952, | |
| "step": 5, | |
| "train_runtime": 27.5462, | |
| "train_tokens_per_second": 4064.161 | |
| }, | |
| { | |
| "epoch": 0.35874439461883406, | |
| "grad_norm": 0.9002476334571838, | |
| "learning_rate": 4.9207588053056545e-05, | |
| "loss": 3.3636, | |
| "num_input_tokens_seen": 223776, | |
| "step": 10, | |
| "train_runtime": 53.4438, | |
| "train_tokens_per_second": 4187.127 | |
| }, | |
| { | |
| "epoch": 0.5381165919282511, | |
| "grad_norm": 0.6985305547714233, | |
| "learning_rate": 4.8096988312782174e-05, | |
| "loss": 3.0641, | |
| "num_input_tokens_seen": 335600, | |
| "step": 15, | |
| "train_runtime": 79.4605, | |
| "train_tokens_per_second": 4223.48 | |
| }, | |
| { | |
| "epoch": 0.7174887892376681, | |
| "grad_norm": 0.5677102208137512, | |
| "learning_rate": 4.653281570581023e-05, | |
| "loss": 2.9922, | |
| "num_input_tokens_seen": 447424, | |
| "step": 20, | |
| "train_runtime": 105.6383, | |
| "train_tokens_per_second": 4235.433 | |
| }, | |
| { | |
| "epoch": 0.8968609865470852, | |
| "grad_norm": 0.6143746376037598, | |
| "learning_rate": 4.454578706170075e-05, | |
| "loss": 3.0359, | |
| "num_input_tokens_seen": 559328, | |
| "step": 25, | |
| "train_runtime": 131.8723, | |
| "train_tokens_per_second": 4241.437 | |
| }, | |
| { | |
| "epoch": 1.0717488789237668, | |
| "grad_norm": 0.5885606408119202, | |
| "learning_rate": 4.2174923150872544e-05, | |
| "loss": 2.825, | |
| "num_input_tokens_seen": 668544, | |
| "step": 30, | |
| "train_runtime": 157.5044, | |
| "train_tokens_per_second": 4244.606 | |
| }, | |
| { | |
| "epoch": 1.251121076233184, | |
| "grad_norm": 0.5807215571403503, | |
| "learning_rate": 3.946678240449515e-05, | |
| "loss": 2.8427, | |
| "num_input_tokens_seen": 780496, | |
| "step": 35, | |
| "train_runtime": 183.817, | |
| "train_tokens_per_second": 4246.05 | |
| }, | |
| { | |
| "epoch": 1.4304932735426008, | |
| "grad_norm": 0.6312059164047241, | |
| "learning_rate": 3.6474546611688445e-05, | |
| "loss": 2.7976, | |
| "num_input_tokens_seen": 892128, | |
| "step": 40, | |
| "train_runtime": 210.129, | |
| "train_tokens_per_second": 4245.62 | |
| }, | |
| { | |
| "epoch": 1.609865470852018, | |
| "grad_norm": 0.6633515357971191, | |
| "learning_rate": 3.3256976548879184e-05, | |
| "loss": 2.6764, | |
| "num_input_tokens_seen": 1004112, | |
| "step": 45, | |
| "train_runtime": 236.4964, | |
| "train_tokens_per_second": 4245.781 | |
| }, | |
| { | |
| "epoch": 1.789237668161435, | |
| "grad_norm": 0.7430306077003479, | |
| "learning_rate": 2.9877258050403212e-05, | |
| "loss": 2.7217, | |
| "num_input_tokens_seen": 1116064, | |
| "step": 50, | |
| "train_runtime": 262.8011, | |
| "train_tokens_per_second": 4246.801 | |
| }, | |
| { | |
| "epoch": 1.9686098654708521, | |
| "grad_norm": 0.7268422245979309, | |
| "learning_rate": 2.6401761180929797e-05, | |
| "loss": 2.7066, | |
| "num_input_tokens_seen": 1227808, | |
| "step": 55, | |
| "train_runtime": 289.1347, | |
| "train_tokens_per_second": 4246.491 | |
| }, | |
| { | |
| "epoch": 2.1434977578475336, | |
| "grad_norm": 0.7635470032691956, | |
| "learning_rate": 2.2898736876768815e-05, | |
| "loss": 2.6038, | |
| "num_input_tokens_seen": 1337104, | |
| "step": 60, | |
| "train_runtime": 314.8221, | |
| "train_tokens_per_second": 4247.173 | |
| }, | |
| { | |
| "epoch": 2.3228699551569507, | |
| "grad_norm": 0.8516287207603455, | |
| "learning_rate": 1.9436976651092144e-05, | |
| "loss": 2.5954, | |
| "num_input_tokens_seen": 1448976, | |
| "step": 65, | |
| "train_runtime": 341.1436, | |
| "train_tokens_per_second": 4247.407 | |
| }, | |
| { | |
| "epoch": 2.502242152466368, | |
| "grad_norm": 0.8331743478775024, | |
| "learning_rate": 1.6084461683442176e-05, | |
| "loss": 2.6679, | |
| "num_input_tokens_seen": 1560352, | |
| "step": 70, | |
| "train_runtime": 367.4136, | |
| "train_tokens_per_second": 4246.854 | |
| }, | |
| { | |
| "epoch": 2.681614349775785, | |
| "grad_norm": 0.8733316659927368, | |
| "learning_rate": 1.2907027822369005e-05, | |
| "loss": 2.5975, | |
| "num_input_tokens_seen": 1672176, | |
| "step": 75, | |
| "train_runtime": 393.7208, | |
| "train_tokens_per_second": 4247.111 | |
| }, | |
| { | |
| "epoch": 2.8609865470852016, | |
| "grad_norm": 0.927335798740387, | |
| "learning_rate": 9.967072717539851e-06, | |
| "loss": 2.6389, | |
| "num_input_tokens_seen": 1784448, | |
| "step": 80, | |
| "train_runtime": 420.1259, | |
| "train_tokens_per_second": 4247.412 | |
| }, | |
| { | |
| "epoch": 3.0358744394618835, | |
| "grad_norm": 0.821441650390625, | |
| "learning_rate": 7.3223304703363135e-06, | |
| "loss": 2.4908, | |
| "num_input_tokens_seen": 1893216, | |
| "step": 85, | |
| "train_runtime": 445.7557, | |
| "train_tokens_per_second": 4247.205 | |
| }, | |
| { | |
| "epoch": 3.2152466367713006, | |
| "grad_norm": 0.8318443894386292, | |
| "learning_rate": 5.02473786604378e-06, | |
| "loss": 2.6384, | |
| "num_input_tokens_seen": 2005392, | |
| "step": 90, | |
| "train_runtime": 472.0937, | |
| "train_tokens_per_second": 4247.869 | |
| }, | |
| { | |
| "epoch": 3.3946188340807173, | |
| "grad_norm": 0.8994652628898621, | |
| "learning_rate": 3.119414452281158e-06, | |
| "loss": 2.5131, | |
| "num_input_tokens_seen": 2117168, | |
| "step": 95, | |
| "train_runtime": 498.4113, | |
| "train_tokens_per_second": 4247.833 | |
| }, | |
| { | |
| "epoch": 3.5739910313901344, | |
| "grad_norm": 0.9667345881462097, | |
| "learning_rate": 1.6437764926350074e-06, | |
| "loss": 2.5565, | |
| "num_input_tokens_seen": 2229024, | |
| "step": 100, | |
| "train_runtime": 524.7221, | |
| "train_tokens_per_second": 4248.009 | |
| }, | |
| { | |
| "epoch": 3.7533632286995515, | |
| "grad_norm": 0.8132877349853516, | |
| "learning_rate": 6.268021954544096e-07, | |
| "loss": 2.5378, | |
| "num_input_tokens_seen": 2340688, | |
| "step": 105, | |
| "train_runtime": 552.8372, | |
| "train_tokens_per_second": 4233.955 | |
| }, | |
| { | |
| "epoch": 3.9327354260089686, | |
| "grad_norm": 0.9859150648117065, | |
| "learning_rate": 8.846264705952289e-08, | |
| "loss": 2.5846, | |
| "num_input_tokens_seen": 2452784, | |
| "step": 110, | |
| "train_runtime": 579.23, | |
| "train_tokens_per_second": 4234.56 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "num_input_tokens_seen": 2494624, | |
| "step": 112, | |
| "total_flos": 1.0404813073494835e+17, | |
| "train_loss": 2.765545678990228, | |
| "train_runtime": 591.3728, | |
| "train_samples_per_second": 6.02, | |
| "train_steps_per_second": 0.189 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 112, | |
| "num_input_tokens_seen": 2494624, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0404813073494835e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |