{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5296610169491526, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0211864406779661, "grad_norm": 2.184818744659424, "learning_rate": 9e-05, "loss": 2.2934, "step": 10 }, { "epoch": 0.0423728813559322, "grad_norm": 2.2629222869873047, "learning_rate": 9.816326530612245e-05, "loss": 2.2189, "step": 20 }, { "epoch": 0.0635593220338983, "grad_norm": 2.3219990730285645, "learning_rate": 9.612244897959184e-05, "loss": 2.1761, "step": 30 }, { "epoch": 0.0847457627118644, "grad_norm": 1.970227837562561, "learning_rate": 9.408163265306123e-05, "loss": 2.1154, "step": 40 }, { "epoch": 0.1059322033898305, "grad_norm": 2.4447333812713623, "learning_rate": 9.204081632653062e-05, "loss": 2.0372, "step": 50 }, { "epoch": 0.1271186440677966, "grad_norm": 1.99861741065979, "learning_rate": 9e-05, "loss": 2.0285, "step": 60 }, { "epoch": 0.1483050847457627, "grad_norm": 2.0891239643096924, "learning_rate": 8.79591836734694e-05, "loss": 2.012, "step": 70 }, { "epoch": 0.1694915254237288, "grad_norm": 2.4882593154907227, "learning_rate": 8.591836734693878e-05, "loss": 1.9419, "step": 80 }, { "epoch": 0.1906779661016949, "grad_norm": 2.932203769683838, "learning_rate": 8.387755102040816e-05, "loss": 1.9201, "step": 90 }, { "epoch": 0.211864406779661, "grad_norm": 2.5325565338134766, "learning_rate": 8.183673469387756e-05, "loss": 1.9257, "step": 100 }, { "epoch": 0.2330508474576271, "grad_norm": 2.3659908771514893, "learning_rate": 7.979591836734695e-05, "loss": 1.8988, "step": 110 }, { "epoch": 0.2542372881355932, "grad_norm": 2.7854678630828857, "learning_rate": 7.775510204081632e-05, "loss": 1.8925, "step": 120 }, { "epoch": 0.2754237288135593, "grad_norm": 3.742159366607666, "learning_rate": 7.571428571428571e-05, "loss": 1.8514, "step": 130 }, { "epoch": 0.2966101694915254, "grad_norm": 2.687325954437256, "learning_rate": 7.36734693877551e-05, "loss": 1.8296, "step": 140 }, { "epoch": 0.3177966101694915, "grad_norm": 2.8193745613098145, "learning_rate": 7.163265306122449e-05, "loss": 1.8542, "step": 150 }, { "epoch": 0.3389830508474576, "grad_norm": 2.9093828201293945, "learning_rate": 6.959183673469388e-05, "loss": 1.8531, "step": 160 }, { "epoch": 0.3601694915254237, "grad_norm": 2.950552225112915, "learning_rate": 6.755102040816327e-05, "loss": 1.7967, "step": 170 }, { "epoch": 0.3813559322033898, "grad_norm": 3.1596662998199463, "learning_rate": 6.551020408163266e-05, "loss": 1.7983, "step": 180 }, { "epoch": 0.4025423728813559, "grad_norm": 2.8439338207244873, "learning_rate": 6.346938775510203e-05, "loss": 1.7886, "step": 190 }, { "epoch": 0.423728813559322, "grad_norm": 2.8642873764038086, "learning_rate": 6.142857142857143e-05, "loss": 1.7644, "step": 200 }, { "epoch": 0.4449152542372881, "grad_norm": 4.093614101409912, "learning_rate": 5.9387755102040824e-05, "loss": 1.7496, "step": 210 }, { "epoch": 0.4661016949152542, "grad_norm": 2.93874454498291, "learning_rate": 5.7346938775510206e-05, "loss": 1.7558, "step": 220 }, { "epoch": 0.4872881355932203, "grad_norm": 4.743330001831055, "learning_rate": 5.5306122448979594e-05, "loss": 1.7907, "step": 230 }, { "epoch": 0.5084745762711864, "grad_norm": 3.5946221351623535, "learning_rate": 5.3265306122448976e-05, "loss": 1.7615, "step": 240 }, { "epoch": 0.5296610169491526, "grad_norm": 3.4407248497009277, "learning_rate": 5.122448979591837e-05, "loss": 1.7587, "step": 250 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2316468707131392.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }