Cyanbox's picture
add finetuned flan-t5
4a9ad21 verified
{
"best_metric": 0.0,
"best_model_checkpoint": "/home1/wangyongqi/codes/text_encoder_finetuning/t5_finetuningg/flant5_large_t2t/checkpoint-20000",
"epoch": 22.321428571428573,
"eval_steps": 20000,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.28,
"learning_rate": 0.00019825613839285716,
"loss": 0.0216,
"step": 500
},
{
"epoch": 0.56,
"learning_rate": 0.00019651227678571428,
"loss": 0.0002,
"step": 1000
},
{
"epoch": 0.84,
"learning_rate": 0.00019476841517857143,
"loss": 0.0,
"step": 1500
},
{
"epoch": 1.12,
"learning_rate": 0.00019302455357142858,
"loss": 0.0018,
"step": 2000
},
{
"epoch": 1.4,
"learning_rate": 0.00019128069196428573,
"loss": 0.0,
"step": 2500
},
{
"epoch": 1.67,
"learning_rate": 0.00018953683035714288,
"loss": 0.0,
"step": 3000
},
{
"epoch": 1.95,
"learning_rate": 0.00018779296875,
"loss": 0.0,
"step": 3500
},
{
"epoch": 2.23,
"learning_rate": 0.00018604910714285715,
"loss": 0.0001,
"step": 4000
},
{
"epoch": 2.51,
"learning_rate": 0.0001843052455357143,
"loss": 0.0,
"step": 4500
},
{
"epoch": 2.79,
"learning_rate": 0.00018256138392857142,
"loss": 0.0011,
"step": 5000
},
{
"epoch": 3.07,
"learning_rate": 0.0001808175223214286,
"loss": 0.0,
"step": 5500
},
{
"epoch": 3.35,
"learning_rate": 0.00017907366071428572,
"loss": 0.0,
"step": 6000
},
{
"epoch": 3.63,
"learning_rate": 0.00017732979910714287,
"loss": 0.0,
"step": 6500
},
{
"epoch": 3.91,
"learning_rate": 0.00017558593750000002,
"loss": 0.0,
"step": 7000
},
{
"epoch": 4.19,
"learning_rate": 0.00017384207589285714,
"loss": 0.0002,
"step": 7500
},
{
"epoch": 4.46,
"learning_rate": 0.00017209821428571429,
"loss": 0.0001,
"step": 8000
},
{
"epoch": 4.74,
"learning_rate": 0.00017035435267857144,
"loss": 0.0,
"step": 8500
},
{
"epoch": 5.02,
"learning_rate": 0.00016861049107142858,
"loss": 0.0,
"step": 9000
},
{
"epoch": 5.3,
"learning_rate": 0.00016686662946428573,
"loss": 0.0,
"step": 9500
},
{
"epoch": 5.58,
"learning_rate": 0.00016512276785714286,
"loss": 0.0,
"step": 10000
},
{
"epoch": 5.86,
"learning_rate": 0.00016337890625,
"loss": 0.0,
"step": 10500
},
{
"epoch": 6.14,
"learning_rate": 0.00016163504464285715,
"loss": 0.0,
"step": 11000
},
{
"epoch": 6.42,
"learning_rate": 0.00015989118303571428,
"loss": 0.0,
"step": 11500
},
{
"epoch": 6.7,
"learning_rate": 0.00015814732142857142,
"loss": 0.0,
"step": 12000
},
{
"epoch": 6.98,
"learning_rate": 0.0001564034598214286,
"loss": 0.0,
"step": 12500
},
{
"epoch": 7.25,
"learning_rate": 0.00015465959821428572,
"loss": 0.0,
"step": 13000
},
{
"epoch": 7.53,
"learning_rate": 0.00015291573660714287,
"loss": 0.0,
"step": 13500
},
{
"epoch": 7.81,
"learning_rate": 0.00015117187500000002,
"loss": 0.0,
"step": 14000
},
{
"epoch": 8.09,
"learning_rate": 0.00014942801339285714,
"loss": 0.0,
"step": 14500
},
{
"epoch": 8.37,
"learning_rate": 0.0001476841517857143,
"loss": 0.0,
"step": 15000
},
{
"epoch": 8.65,
"learning_rate": 0.0001459402901785714,
"loss": 0.0,
"step": 15500
},
{
"epoch": 8.93,
"learning_rate": 0.0001441964285714286,
"loss": 0.0,
"step": 16000
},
{
"epoch": 9.21,
"learning_rate": 0.00014245256696428574,
"loss": 0.0,
"step": 16500
},
{
"epoch": 9.49,
"learning_rate": 0.00014070870535714286,
"loss": 0.0,
"step": 17000
},
{
"epoch": 9.77,
"learning_rate": 0.00013896484375,
"loss": 0.0,
"step": 17500
},
{
"epoch": 10.04,
"learning_rate": 0.00013722098214285716,
"loss": 0.0,
"step": 18000
},
{
"epoch": 10.32,
"learning_rate": 0.00013547712053571428,
"loss": 0.0,
"step": 18500
},
{
"epoch": 10.6,
"learning_rate": 0.00013373325892857143,
"loss": 0.0,
"step": 19000
},
{
"epoch": 10.88,
"learning_rate": 0.00013198939732142858,
"loss": 0.0,
"step": 19500
},
{
"epoch": 11.16,
"learning_rate": 0.00013024553571428573,
"loss": 0.0,
"step": 20000
},
{
"epoch": 11.16,
"eval_loss": 0.0,
"eval_runtime": 18.1,
"eval_samples_per_second": 276.243,
"eval_steps_per_second": 2.21,
"step": 20000
},
{
"epoch": 11.44,
"learning_rate": 0.00012850167410714288,
"loss": 0.0,
"step": 20500
},
{
"epoch": 11.72,
"learning_rate": 0.0001267578125,
"loss": 0.0,
"step": 21000
},
{
"epoch": 12.0,
"learning_rate": 0.00012501395089285715,
"loss": 0.0,
"step": 21500
},
{
"epoch": 12.28,
"learning_rate": 0.0001232700892857143,
"loss": 0.0,
"step": 22000
},
{
"epoch": 12.56,
"learning_rate": 0.00012152622767857142,
"loss": 0.0,
"step": 22500
},
{
"epoch": 12.83,
"learning_rate": 0.00011978236607142858,
"loss": 0.0,
"step": 23000
},
{
"epoch": 13.11,
"learning_rate": 0.00011803850446428573,
"loss": 0.0,
"step": 23500
},
{
"epoch": 13.39,
"learning_rate": 0.00011629464285714287,
"loss": 0.0,
"step": 24000
},
{
"epoch": 13.67,
"learning_rate": 0.00011455078125,
"loss": 0.0,
"step": 24500
},
{
"epoch": 13.95,
"learning_rate": 0.00011280691964285715,
"loss": 0.0,
"step": 25000
},
{
"epoch": 14.23,
"learning_rate": 0.00011106305803571429,
"loss": 0.0,
"step": 25500
},
{
"epoch": 14.51,
"learning_rate": 0.00010931919642857142,
"loss": 0.0,
"step": 26000
},
{
"epoch": 14.79,
"learning_rate": 0.00010757533482142858,
"loss": 0.0,
"step": 26500
},
{
"epoch": 15.07,
"learning_rate": 0.00010583147321428572,
"loss": 0.0004,
"step": 27000
},
{
"epoch": 15.35,
"learning_rate": 0.00010408761160714287,
"loss": 0.0,
"step": 27500
},
{
"epoch": 15.62,
"learning_rate": 0.00010234375,
"loss": 0.0,
"step": 28000
},
{
"epoch": 15.9,
"learning_rate": 0.00010059988839285714,
"loss": 0.0,
"step": 28500
},
{
"epoch": 16.18,
"learning_rate": 9.885602678571429e-05,
"loss": 0.0,
"step": 29000
},
{
"epoch": 16.46,
"learning_rate": 9.711216517857144e-05,
"loss": 0.0,
"step": 29500
},
{
"epoch": 16.74,
"learning_rate": 9.536830357142857e-05,
"loss": 0.0,
"step": 30000
},
{
"epoch": 17.02,
"learning_rate": 9.362444196428571e-05,
"loss": 0.0,
"step": 30500
},
{
"epoch": 17.3,
"learning_rate": 9.188058035714287e-05,
"loss": 0.0,
"step": 31000
},
{
"epoch": 17.58,
"learning_rate": 9.013671875000001e-05,
"loss": 0.0,
"step": 31500
},
{
"epoch": 17.86,
"learning_rate": 8.839285714285714e-05,
"loss": 0.0,
"step": 32000
},
{
"epoch": 18.14,
"learning_rate": 8.664899553571429e-05,
"loss": 0.0,
"step": 32500
},
{
"epoch": 18.42,
"learning_rate": 8.490513392857144e-05,
"loss": 0.0,
"step": 33000
},
{
"epoch": 18.69,
"learning_rate": 8.316127232142858e-05,
"loss": 0.0,
"step": 33500
},
{
"epoch": 18.97,
"learning_rate": 8.141741071428571e-05,
"loss": 0.0,
"step": 34000
},
{
"epoch": 19.25,
"learning_rate": 7.967354910714286e-05,
"loss": 0.0,
"step": 34500
},
{
"epoch": 19.53,
"learning_rate": 7.792968750000001e-05,
"loss": 0.0,
"step": 35000
},
{
"epoch": 19.81,
"learning_rate": 7.618582589285715e-05,
"loss": 0.0,
"step": 35500
},
{
"epoch": 20.09,
"learning_rate": 7.44419642857143e-05,
"loss": 0.0,
"step": 36000
},
{
"epoch": 20.37,
"learning_rate": 7.269810267857143e-05,
"loss": 0.0,
"step": 36500
},
{
"epoch": 20.65,
"learning_rate": 7.095424107142858e-05,
"loss": 0.0,
"step": 37000
},
{
"epoch": 20.93,
"learning_rate": 6.921037946428571e-05,
"loss": 0.0,
"step": 37500
},
{
"epoch": 21.21,
"learning_rate": 6.746651785714286e-05,
"loss": 0.0,
"step": 38000
},
{
"epoch": 21.48,
"learning_rate": 6.572265625e-05,
"loss": 0.0,
"step": 38500
},
{
"epoch": 21.76,
"learning_rate": 6.397879464285715e-05,
"loss": 0.0,
"step": 39000
},
{
"epoch": 22.04,
"learning_rate": 6.22349330357143e-05,
"loss": 0.0,
"step": 39500
},
{
"epoch": 22.32,
"learning_rate": 6.049107142857143e-05,
"loss": 0.0,
"step": 40000
},
{
"epoch": 22.32,
"eval_loss": 0.0,
"eval_runtime": 18.0032,
"eval_samples_per_second": 277.729,
"eval_steps_per_second": 2.222,
"step": 40000
}
],
"logging_steps": 500,
"max_steps": 57344,
"num_train_epochs": 32,
"save_steps": 20000,
"total_flos": 1.7738935169875476e+18,
"trial_name": null,
"trial_params": null
}