| { | |
| "best_metric": 0.0, | |
| "best_model_checkpoint": "/home1/wangyongqi/codes/text_encoder_finetuning/t5_finetuningg/flant5_large_t2t/checkpoint-20000", | |
| "epoch": 22.321428571428573, | |
| "eval_steps": 20000, | |
| "global_step": 40000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00019825613839285716, | |
| "loss": 0.0216, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00019651227678571428, | |
| "loss": 0.0002, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00019476841517857143, | |
| "loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00019302455357142858, | |
| "loss": 0.0018, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00019128069196428573, | |
| "loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00018953683035714288, | |
| "loss": 0.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00018779296875, | |
| "loss": 0.0, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00018604910714285715, | |
| "loss": 0.0001, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.0001843052455357143, | |
| "loss": 0.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.00018256138392857142, | |
| "loss": 0.0011, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.0001808175223214286, | |
| "loss": 0.0, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.00017907366071428572, | |
| "loss": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 0.00017732979910714287, | |
| "loss": 0.0, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 0.00017558593750000002, | |
| "loss": 0.0, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.00017384207589285714, | |
| "loss": 0.0002, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 0.00017209821428571429, | |
| "loss": 0.0001, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 0.00017035435267857144, | |
| "loss": 0.0, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 0.00016861049107142858, | |
| "loss": 0.0, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 0.00016686662946428573, | |
| "loss": 0.0, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 0.00016512276785714286, | |
| "loss": 0.0, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 0.00016337890625, | |
| "loss": 0.0, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.00016163504464285715, | |
| "loss": 0.0, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.00015989118303571428, | |
| "loss": 0.0, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 0.00015814732142857142, | |
| "loss": 0.0, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 0.0001564034598214286, | |
| "loss": 0.0, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 0.00015465959821428572, | |
| "loss": 0.0, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 0.00015291573660714287, | |
| "loss": 0.0, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 0.00015117187500000002, | |
| "loss": 0.0, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 0.00014942801339285714, | |
| "loss": 0.0, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 0.0001476841517857143, | |
| "loss": 0.0, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 0.0001459402901785714, | |
| "loss": 0.0, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 0.0001441964285714286, | |
| "loss": 0.0, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 0.00014245256696428574, | |
| "loss": 0.0, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 0.00014070870535714286, | |
| "loss": 0.0, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 0.00013896484375, | |
| "loss": 0.0, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 0.00013722098214285716, | |
| "loss": 0.0, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "learning_rate": 0.00013547712053571428, | |
| "loss": 0.0, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "learning_rate": 0.00013373325892857143, | |
| "loss": 0.0, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 0.00013198939732142858, | |
| "loss": 0.0, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "learning_rate": 0.00013024553571428573, | |
| "loss": 0.0, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "eval_loss": 0.0, | |
| "eval_runtime": 18.1, | |
| "eval_samples_per_second": 276.243, | |
| "eval_steps_per_second": 2.21, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "learning_rate": 0.00012850167410714288, | |
| "loss": 0.0, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "learning_rate": 0.0001267578125, | |
| "loss": 0.0, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.00012501395089285715, | |
| "loss": 0.0, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "learning_rate": 0.0001232700892857143, | |
| "loss": 0.0, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "learning_rate": 0.00012152622767857142, | |
| "loss": 0.0, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 0.00011978236607142858, | |
| "loss": 0.0, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 0.00011803850446428573, | |
| "loss": 0.0, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 13.39, | |
| "learning_rate": 0.00011629464285714287, | |
| "loss": 0.0, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 13.67, | |
| "learning_rate": 0.00011455078125, | |
| "loss": 0.0, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 0.00011280691964285715, | |
| "loss": 0.0, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "learning_rate": 0.00011106305803571429, | |
| "loss": 0.0, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 14.51, | |
| "learning_rate": 0.00010931919642857142, | |
| "loss": 0.0, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 14.79, | |
| "learning_rate": 0.00010757533482142858, | |
| "loss": 0.0, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 15.07, | |
| "learning_rate": 0.00010583147321428572, | |
| "loss": 0.0004, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 15.35, | |
| "learning_rate": 0.00010408761160714287, | |
| "loss": 0.0, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "learning_rate": 0.00010234375, | |
| "loss": 0.0, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 15.9, | |
| "learning_rate": 0.00010059988839285714, | |
| "loss": 0.0, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 16.18, | |
| "learning_rate": 9.885602678571429e-05, | |
| "loss": 0.0, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "learning_rate": 9.711216517857144e-05, | |
| "loss": 0.0, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 9.536830357142857e-05, | |
| "loss": 0.0, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 17.02, | |
| "learning_rate": 9.362444196428571e-05, | |
| "loss": 0.0, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 17.3, | |
| "learning_rate": 9.188058035714287e-05, | |
| "loss": 0.0, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 17.58, | |
| "learning_rate": 9.013671875000001e-05, | |
| "loss": 0.0, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 8.839285714285714e-05, | |
| "loss": 0.0, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 18.14, | |
| "learning_rate": 8.664899553571429e-05, | |
| "loss": 0.0, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 18.42, | |
| "learning_rate": 8.490513392857144e-05, | |
| "loss": 0.0, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "learning_rate": 8.316127232142858e-05, | |
| "loss": 0.0, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "learning_rate": 8.141741071428571e-05, | |
| "loss": 0.0, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "learning_rate": 7.967354910714286e-05, | |
| "loss": 0.0, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 7.792968750000001e-05, | |
| "loss": 0.0, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 7.618582589285715e-05, | |
| "loss": 0.0, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 20.09, | |
| "learning_rate": 7.44419642857143e-05, | |
| "loss": 0.0, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 20.37, | |
| "learning_rate": 7.269810267857143e-05, | |
| "loss": 0.0, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 7.095424107142858e-05, | |
| "loss": 0.0, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "learning_rate": 6.921037946428571e-05, | |
| "loss": 0.0, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 21.21, | |
| "learning_rate": 6.746651785714286e-05, | |
| "loss": 0.0, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 21.48, | |
| "learning_rate": 6.572265625e-05, | |
| "loss": 0.0, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "learning_rate": 6.397879464285715e-05, | |
| "loss": 0.0, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 22.04, | |
| "learning_rate": 6.22349330357143e-05, | |
| "loss": 0.0, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 6.049107142857143e-05, | |
| "loss": 0.0, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "eval_loss": 0.0, | |
| "eval_runtime": 18.0032, | |
| "eval_samples_per_second": 277.729, | |
| "eval_steps_per_second": 2.222, | |
| "step": 40000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 57344, | |
| "num_train_epochs": 32, | |
| "save_steps": 20000, | |
| "total_flos": 1.7738935169875476e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |