sticker-query-generator-en / trainer_state.json
metchee's picture
Upload 54 files
e93bae1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 112,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17937219730941703,
"grad_norm": 1.5203849077224731,
"learning_rate": 4.984280524733107e-05,
"loss": 3.5214,
"num_input_tokens_seen": 111952,
"step": 5,
"train_runtime": 27.5462,
"train_tokens_per_second": 4064.161
},
{
"epoch": 0.35874439461883406,
"grad_norm": 0.9002476334571838,
"learning_rate": 4.9207588053056545e-05,
"loss": 3.3636,
"num_input_tokens_seen": 223776,
"step": 10,
"train_runtime": 53.4438,
"train_tokens_per_second": 4187.127
},
{
"epoch": 0.5381165919282511,
"grad_norm": 0.6985305547714233,
"learning_rate": 4.8096988312782174e-05,
"loss": 3.0641,
"num_input_tokens_seen": 335600,
"step": 15,
"train_runtime": 79.4605,
"train_tokens_per_second": 4223.48
},
{
"epoch": 0.7174887892376681,
"grad_norm": 0.5677102208137512,
"learning_rate": 4.653281570581023e-05,
"loss": 2.9922,
"num_input_tokens_seen": 447424,
"step": 20,
"train_runtime": 105.6383,
"train_tokens_per_second": 4235.433
},
{
"epoch": 0.8968609865470852,
"grad_norm": 0.6143746376037598,
"learning_rate": 4.454578706170075e-05,
"loss": 3.0359,
"num_input_tokens_seen": 559328,
"step": 25,
"train_runtime": 131.8723,
"train_tokens_per_second": 4241.437
},
{
"epoch": 1.0717488789237668,
"grad_norm": 0.5885606408119202,
"learning_rate": 4.2174923150872544e-05,
"loss": 2.825,
"num_input_tokens_seen": 668544,
"step": 30,
"train_runtime": 157.5044,
"train_tokens_per_second": 4244.606
},
{
"epoch": 1.251121076233184,
"grad_norm": 0.5807215571403503,
"learning_rate": 3.946678240449515e-05,
"loss": 2.8427,
"num_input_tokens_seen": 780496,
"step": 35,
"train_runtime": 183.817,
"train_tokens_per_second": 4246.05
},
{
"epoch": 1.4304932735426008,
"grad_norm": 0.6312059164047241,
"learning_rate": 3.6474546611688445e-05,
"loss": 2.7976,
"num_input_tokens_seen": 892128,
"step": 40,
"train_runtime": 210.129,
"train_tokens_per_second": 4245.62
},
{
"epoch": 1.609865470852018,
"grad_norm": 0.6633515357971191,
"learning_rate": 3.3256976548879184e-05,
"loss": 2.6764,
"num_input_tokens_seen": 1004112,
"step": 45,
"train_runtime": 236.4964,
"train_tokens_per_second": 4245.781
},
{
"epoch": 1.789237668161435,
"grad_norm": 0.7430306077003479,
"learning_rate": 2.9877258050403212e-05,
"loss": 2.7217,
"num_input_tokens_seen": 1116064,
"step": 50,
"train_runtime": 262.8011,
"train_tokens_per_second": 4246.801
},
{
"epoch": 1.9686098654708521,
"grad_norm": 0.7268422245979309,
"learning_rate": 2.6401761180929797e-05,
"loss": 2.7066,
"num_input_tokens_seen": 1227808,
"step": 55,
"train_runtime": 289.1347,
"train_tokens_per_second": 4246.491
},
{
"epoch": 2.1434977578475336,
"grad_norm": 0.7635470032691956,
"learning_rate": 2.2898736876768815e-05,
"loss": 2.6038,
"num_input_tokens_seen": 1337104,
"step": 60,
"train_runtime": 314.8221,
"train_tokens_per_second": 4247.173
},
{
"epoch": 2.3228699551569507,
"grad_norm": 0.8516287207603455,
"learning_rate": 1.9436976651092144e-05,
"loss": 2.5954,
"num_input_tokens_seen": 1448976,
"step": 65,
"train_runtime": 341.1436,
"train_tokens_per_second": 4247.407
},
{
"epoch": 2.502242152466368,
"grad_norm": 0.8331743478775024,
"learning_rate": 1.6084461683442176e-05,
"loss": 2.6679,
"num_input_tokens_seen": 1560352,
"step": 70,
"train_runtime": 367.4136,
"train_tokens_per_second": 4246.854
},
{
"epoch": 2.681614349775785,
"grad_norm": 0.8733316659927368,
"learning_rate": 1.2907027822369005e-05,
"loss": 2.5975,
"num_input_tokens_seen": 1672176,
"step": 75,
"train_runtime": 393.7208,
"train_tokens_per_second": 4247.111
},
{
"epoch": 2.8609865470852016,
"grad_norm": 0.927335798740387,
"learning_rate": 9.967072717539851e-06,
"loss": 2.6389,
"num_input_tokens_seen": 1784448,
"step": 80,
"train_runtime": 420.1259,
"train_tokens_per_second": 4247.412
},
{
"epoch": 3.0358744394618835,
"grad_norm": 0.821441650390625,
"learning_rate": 7.3223304703363135e-06,
"loss": 2.4908,
"num_input_tokens_seen": 1893216,
"step": 85,
"train_runtime": 445.7557,
"train_tokens_per_second": 4247.205
},
{
"epoch": 3.2152466367713006,
"grad_norm": 0.8318443894386292,
"learning_rate": 5.02473786604378e-06,
"loss": 2.6384,
"num_input_tokens_seen": 2005392,
"step": 90,
"train_runtime": 472.0937,
"train_tokens_per_second": 4247.869
},
{
"epoch": 3.3946188340807173,
"grad_norm": 0.8994652628898621,
"learning_rate": 3.119414452281158e-06,
"loss": 2.5131,
"num_input_tokens_seen": 2117168,
"step": 95,
"train_runtime": 498.4113,
"train_tokens_per_second": 4247.833
},
{
"epoch": 3.5739910313901344,
"grad_norm": 0.9667345881462097,
"learning_rate": 1.6437764926350074e-06,
"loss": 2.5565,
"num_input_tokens_seen": 2229024,
"step": 100,
"train_runtime": 524.7221,
"train_tokens_per_second": 4248.009
},
{
"epoch": 3.7533632286995515,
"grad_norm": 0.8132877349853516,
"learning_rate": 6.268021954544096e-07,
"loss": 2.5378,
"num_input_tokens_seen": 2340688,
"step": 105,
"train_runtime": 552.8372,
"train_tokens_per_second": 4233.955
},
{
"epoch": 3.9327354260089686,
"grad_norm": 0.9859150648117065,
"learning_rate": 8.846264705952289e-08,
"loss": 2.5846,
"num_input_tokens_seen": 2452784,
"step": 110,
"train_runtime": 579.23,
"train_tokens_per_second": 4234.56
},
{
"epoch": 4.0,
"num_input_tokens_seen": 2494624,
"step": 112,
"total_flos": 1.0404813073494835e+17,
"train_loss": 2.765545678990228,
"train_runtime": 591.3728,
"train_samples_per_second": 6.02,
"train_steps_per_second": 0.189
}
],
"logging_steps": 5,
"max_steps": 112,
"num_input_tokens_seen": 2494624,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0404813073494835e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}