Qwen2-7B-S2R-PRL / trainer_state.json
S2R-data's picture
Upload folder using huggingface_hub
97c357b verified
raw
history blame
39.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3536425767049586,
"eval_steps": 500,
"global_step": 152,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"importance_ratio": 1.0001533031463623,
"kl_div_avg": 0.0022537275217473507,
"learning_rate": 0.0,
"loss_func": "stage2",
"step": 1,
"total_loss": 0.20475485920906067
},
{
"epoch": 0.01,
"importance_ratio": 1.0000330209732056,
"kl_div_avg": 0.0011816158657893538,
"learning_rate": 2.153382790366965e-07,
"loss_func": "stage2",
"step": 2,
"total_loss": 0.17304854094982147
},
{
"epoch": 0.01,
"importance_ratio": 0.9998020529747009,
"kl_div_avg": 0.0012814232613891363,
"learning_rate": 3.4130309724299266e-07,
"loss_func": "stage2",
"step": 3,
"total_loss": 0.39294394850730896
},
{
"epoch": 0.01,
"importance_ratio": 1.0002059936523438,
"kl_div_avg": 0.001280196476727724,
"learning_rate": 4.30676558073393e-07,
"loss_func": "stage2",
"step": 4,
"total_loss": 0.2844714820384979
},
{
"epoch": 0.02,
"importance_ratio": 1.0000617504119873,
"kl_div_avg": 0.0033944130409508944,
"learning_rate": 5e-07,
"loss_func": "stage2",
"step": 5,
"total_loss": 0.38719698786735535
},
{
"epoch": 0.02,
"importance_ratio": 1.0002291202545166,
"kl_div_avg": 0.0006711427122354507,
"learning_rate": 5e-07,
"loss_func": "stage2",
"step": 6,
"total_loss": -0.7354744672775269
},
{
"epoch": 0.02,
"importance_ratio": 1.0001697540283203,
"kl_div_avg": 0.0009293262264691293,
"learning_rate": 4.997668997668998e-07,
"loss_func": "stage2",
"step": 7,
"total_loss": 0.10072920471429825
},
{
"epoch": 0.02,
"importance_ratio": 1.0003812313079834,
"kl_div_avg": 0.0006424246821552515,
"learning_rate": 4.995337995337996e-07,
"loss_func": "stage2",
"step": 8,
"total_loss": -0.7356305122375488
},
{
"epoch": 0.03,
"importance_ratio": 1.0002334117889404,
"kl_div_avg": 0.001079258043318987,
"learning_rate": 4.993006993006993e-07,
"loss_func": "stage2",
"step": 9,
"total_loss": 0.1572389006614685
},
{
"epoch": 0.03,
"importance_ratio": 0.9999626874923706,
"kl_div_avg": 0.0012779454700648785,
"learning_rate": 4.990675990675991e-07,
"loss_func": "stage2",
"step": 10,
"total_loss": -0.2823958396911621
},
{
"epoch": 0.03,
"importance_ratio": 0.9999264478683472,
"kl_div_avg": 0.0010424605570733547,
"learning_rate": 4.988344988344988e-07,
"loss_func": "stage2",
"step": 11,
"total_loss": -0.4896008372306824
},
{
"epoch": 0.03,
"importance_ratio": 0.9999059438705444,
"kl_div_avg": 0.0011010458692908287,
"learning_rate": 4.986013986013987e-07,
"loss_func": "stage2",
"step": 12,
"total_loss": 0.4140966832637787
},
{
"epoch": 0.04,
"importance_ratio": 0.9999853372573853,
"kl_div_avg": 0.0014636358246207237,
"learning_rate": 4.983682983682983e-07,
"loss_func": "stage2",
"step": 13,
"total_loss": 0.4479982852935791
},
{
"epoch": 0.04,
"importance_ratio": 1.0001029968261719,
"kl_div_avg": 0.0010808318620547652,
"learning_rate": 4.981351981351981e-07,
"loss_func": "stage2",
"step": 14,
"total_loss": -0.8298860192298889
},
{
"epoch": 0.04,
"importance_ratio": 1.0000591278076172,
"kl_div_avg": 0.0012179139303043485,
"learning_rate": 4.979020979020978e-07,
"loss_func": "stage2",
"step": 15,
"total_loss": -0.8044635057449341
},
{
"epoch": 0.04,
"importance_ratio": 1.0000298023223877,
"kl_div_avg": 0.004863352049142122,
"learning_rate": 4.976689976689976e-07,
"loss_func": "stage2",
"step": 16,
"total_loss": 0.24484601616859436
},
{
"epoch": 0.05,
"importance_ratio": 1.0002245903015137,
"kl_div_avg": 0.0017589405179023743,
"learning_rate": 4.974358974358974e-07,
"loss_func": "stage2",
"step": 17,
"total_loss": -0.0013702064752578735
},
{
"epoch": 0.05,
"importance_ratio": 0.9999561309814453,
"kl_div_avg": 0.0018663634546101093,
"learning_rate": 4.972027972027972e-07,
"loss_func": "stage2",
"step": 18,
"total_loss": -0.10264579951763153
},
{
"epoch": 0.05,
"importance_ratio": 0.9998416900634766,
"kl_div_avg": 0.0018756331410259008,
"learning_rate": 4.969696969696969e-07,
"loss_func": "stage2",
"step": 19,
"total_loss": 0.48283857107162476
},
{
"epoch": 0.05,
"importance_ratio": 0.999815821647644,
"kl_div_avg": 0.0019035658333450556,
"learning_rate": 4.967365967365967e-07,
"loss_func": "stage2",
"step": 20,
"total_loss": -0.1848379671573639
},
{
"epoch": 0.06,
"importance_ratio": 0.999942421913147,
"kl_div_avg": 0.009937961585819721,
"learning_rate": 4.965034965034965e-07,
"loss_func": "stage2",
"step": 21,
"total_loss": 0.14985397458076477
},
{
"epoch": 0.06,
"importance_ratio": 0.9997897148132324,
"kl_div_avg": 0.00241913297213614,
"learning_rate": 4.962703962703962e-07,
"loss_func": "stage2",
"step": 22,
"total_loss": -0.29083502292633057
},
{
"epoch": 0.06,
"importance_ratio": 0.9998656511306763,
"kl_div_avg": 0.00263982149772346,
"learning_rate": 4.96037296037296e-07,
"loss_func": "stage2",
"step": 23,
"total_loss": -0.02688920497894287
},
{
"epoch": 0.06,
"importance_ratio": 1.0000394582748413,
"kl_div_avg": 0.0032202559523284435,
"learning_rate": 4.958041958041958e-07,
"loss_func": "stage2",
"step": 24,
"total_loss": 0.5130484104156494
},
{
"epoch": 0.07,
"importance_ratio": 1.0000278949737549,
"kl_div_avg": 0.00243174796923995,
"learning_rate": 4.955710955710956e-07,
"loss_func": "stage2",
"step": 25,
"total_loss": 0.309948205947876
},
{
"epoch": 0.07,
"importance_ratio": 0.9999406337738037,
"kl_div_avg": 0.003059545997530222,
"learning_rate": 4.953379953379953e-07,
"loss_func": "stage2",
"step": 26,
"total_loss": 0.11305176466703415
},
{
"epoch": 0.07,
"importance_ratio": 0.9999201893806458,
"kl_div_avg": 0.003822761122137308,
"learning_rate": 4.951048951048951e-07,
"loss_func": "stage2",
"step": 27,
"total_loss": 0.38959354162216187
},
{
"epoch": 0.07,
"importance_ratio": 0.9994360208511353,
"kl_div_avg": 0.0033927513286471367,
"learning_rate": 4.948717948717949e-07,
"loss_func": "stage2",
"step": 28,
"total_loss": 0.4653158485889435
},
{
"epoch": 0.07,
"importance_ratio": 0.9999792575836182,
"kl_div_avg": 0.0032504587434232235,
"learning_rate": 4.946386946386946e-07,
"loss_func": "stage2",
"step": 29,
"total_loss": 0.4534304141998291
},
{
"epoch": 0.07,
"importance_ratio": 1.0000840425491333,
"kl_div_avg": 0.002915366552770138,
"learning_rate": 4.944055944055944e-07,
"loss_func": "stage2",
"step": 30,
"total_loss": 0.46045419573783875
},
{
"epoch": 0.07,
"importance_ratio": 0.9996820688247681,
"kl_div_avg": 0.0030386601574718952,
"learning_rate": 4.941724941724942e-07,
"loss_func": "stage2",
"step": 31,
"total_loss": -0.8699095845222473
},
{
"epoch": 0.07,
"importance_ratio": 0.9997460842132568,
"kl_div_avg": 0.0037004691548645496,
"learning_rate": 4.93939393939394e-07,
"loss_func": "stage2",
"step": 32,
"total_loss": -0.23050172626972198
},
{
"epoch": 0.08,
"importance_ratio": 0.9999768137931824,
"kl_div_avg": 0.0046552978456020355,
"learning_rate": 4.937062937062936e-07,
"loss_func": "stage2",
"step": 33,
"total_loss": 0.3776797950267792
},
{
"epoch": 0.08,
"importance_ratio": 0.9999284148216248,
"kl_div_avg": 0.004839582834392786,
"learning_rate": 4.934731934731934e-07,
"loss_func": "stage2",
"step": 34,
"total_loss": 0.5804722309112549
},
{
"epoch": 0.08,
"importance_ratio": 0.999695897102356,
"kl_div_avg": 0.004378842655569315,
"learning_rate": 4.932400932400932e-07,
"loss_func": "stage2",
"step": 35,
"total_loss": 0.5690972805023193
},
{
"epoch": 0.08,
"importance_ratio": 0.9998815059661865,
"kl_div_avg": 0.0047516971826553345,
"learning_rate": 4.93006993006993e-07,
"loss_func": "stage2",
"step": 36,
"total_loss": 0.2298603653907776
},
{
"epoch": 0.09,
"importance_ratio": 0.9997518062591553,
"kl_div_avg": 0.004270514938980341,
"learning_rate": 4.927738927738927e-07,
"loss_func": "stage2",
"step": 37,
"total_loss": -0.20016932487487793
},
{
"epoch": 0.09,
"importance_ratio": 1.000083088874817,
"kl_div_avg": 0.004711843561381102,
"learning_rate": 4.925407925407925e-07,
"loss_func": "stage2",
"step": 38,
"total_loss": 0.25743457674980164
},
{
"epoch": 0.09,
"importance_ratio": 0.9999402761459351,
"kl_div_avg": 0.004922826308757067,
"learning_rate": 4.923076923076923e-07,
"loss_func": "stage2",
"step": 39,
"total_loss": -0.15881219506263733
},
{
"epoch": 0.09,
"importance_ratio": 0.999858021736145,
"kl_div_avg": 0.0039229318499565125,
"learning_rate": 4.92074592074592e-07,
"loss_func": "stage2",
"step": 40,
"total_loss": -0.23230578005313873
},
{
"epoch": 0.1,
"importance_ratio": 0.999944806098938,
"kl_div_avg": 0.00766429677605629,
"learning_rate": 4.918414918414918e-07,
"loss_func": "stage2",
"step": 41,
"total_loss": -0.03111131489276886
},
{
"epoch": 0.1,
"importance_ratio": 1.000047206878662,
"kl_div_avg": 0.005274048075079918,
"learning_rate": 4.916083916083916e-07,
"loss_func": "stage2",
"step": 42,
"total_loss": -0.033877044916152954
},
{
"epoch": 0.1,
"importance_ratio": 1.0002098083496094,
"kl_div_avg": 0.006382378749549389,
"learning_rate": 4.913752913752914e-07,
"loss_func": "stage2",
"step": 43,
"total_loss": 0.10409477353096008
},
{
"epoch": 0.1,
"importance_ratio": 0.9998437166213989,
"kl_div_avg": 0.006663881242275238,
"learning_rate": 4.911421911421911e-07,
"loss_func": "stage2",
"step": 44,
"total_loss": -0.9234535694122314
},
{
"epoch": 0.11,
"importance_ratio": 0.9999840259552002,
"kl_div_avg": 0.004546988755464554,
"learning_rate": 4.909090909090909e-07,
"loss_func": "stage2",
"step": 45,
"total_loss": 0.022589027881622314
},
{
"epoch": 0.11,
"importance_ratio": 1.0002349615097046,
"kl_div_avg": 0.0048853568732738495,
"learning_rate": 4.906759906759906e-07,
"loss_func": "stage2",
"step": 46,
"total_loss": 0.7868871688842773
},
{
"epoch": 0.11,
"importance_ratio": 1.0003743171691895,
"kl_div_avg": 0.005253675393760204,
"learning_rate": 4.904428904428905e-07,
"loss_func": "stage2",
"step": 47,
"total_loss": 0.7918493747711182
},
{
"epoch": 0.11,
"importance_ratio": 1.0001533031463623,
"kl_div_avg": 0.005680109839886427,
"learning_rate": 4.902097902097902e-07,
"loss_func": "stage2",
"step": 48,
"total_loss": -0.10262584686279297
},
{
"epoch": 0.12,
"importance_ratio": 0.999789834022522,
"kl_div_avg": 0.006105936132371426,
"learning_rate": 4.8997668997669e-07,
"loss_func": "stage2",
"step": 49,
"total_loss": -0.8303477168083191
},
{
"epoch": 0.12,
"importance_ratio": 0.9999826550483704,
"kl_div_avg": 0.005722599104046822,
"learning_rate": 4.897435897435897e-07,
"loss_func": "stage2",
"step": 50,
"total_loss": -0.8298835754394531
},
{
"epoch": 0.12,
"importance_ratio": 1.0003968477249146,
"kl_div_avg": 0.005780387669801712,
"learning_rate": 4.895104895104895e-07,
"loss_func": "stage2",
"step": 51,
"total_loss": -0.8298872709274292
},
{
"epoch": 0.12,
"importance_ratio": 1.001389741897583,
"kl_div_avg": 0.00528342742472887,
"learning_rate": 4.892773892773893e-07,
"loss_func": "stage2",
"step": 52,
"total_loss": -0.8320926427841187
},
{
"epoch": 0.13,
"importance_ratio": 1.0000613927841187,
"kl_div_avg": 0.005511891096830368,
"learning_rate": 4.890442890442891e-07,
"loss_func": "stage2",
"step": 53,
"total_loss": -0.8287703990936279
},
{
"epoch": 0.13,
"importance_ratio": 1.000309705734253,
"kl_div_avg": 0.00582331046462059,
"learning_rate": 4.888111888111888e-07,
"loss_func": "stage2",
"step": 54,
"total_loss": -0.007962286472320557
},
{
"epoch": 0.13,
"importance_ratio": 1.000248670578003,
"kl_div_avg": 0.005605565384030342,
"learning_rate": 4.885780885780885e-07,
"loss_func": "stage2",
"step": 55,
"total_loss": 0.30858537554740906
},
{
"epoch": 0.13,
"importance_ratio": 1.0001593828201294,
"kl_div_avg": 0.006694035604596138,
"learning_rate": 4.883449883449883e-07,
"loss_func": "stage2",
"step": 56,
"total_loss": 0.2841358482837677
},
{
"epoch": 0.14,
"importance_ratio": 0.9998378753662109,
"kl_div_avg": 0.0054409438744187355,
"learning_rate": 4.88111888111888e-07,
"loss_func": "stage2",
"step": 57,
"total_loss": 0.4940628409385681
},
{
"epoch": 0.14,
"importance_ratio": 1.0000338554382324,
"kl_div_avg": 0.00452791154384613,
"learning_rate": 4.878787878787878e-07,
"loss_func": "stage2",
"step": 58,
"total_loss": 0.25203195214271545
},
{
"epoch": 0.14,
"importance_ratio": 1.0000851154327393,
"kl_div_avg": 0.008046677336096764,
"learning_rate": 4.876456876456876e-07,
"loss_func": "stage2",
"step": 59,
"total_loss": 0.8340111970901489
},
{
"epoch": 0.14,
"importance_ratio": 1.0000518560409546,
"kl_div_avg": 0.0053115119226276875,
"learning_rate": 4.874125874125874e-07,
"loss_func": "stage2",
"step": 60,
"total_loss": -0.4959676265716553
},
{
"epoch": 0.15,
"importance_ratio": 1.0000547170639038,
"kl_div_avg": 0.009003904648125172,
"learning_rate": 4.871794871794871e-07,
"loss_func": "stage2",
"step": 61,
"total_loss": -0.7978946566581726
},
{
"epoch": 0.15,
"importance_ratio": 1.000227689743042,
"kl_div_avg": 0.007788301911205053,
"learning_rate": 4.869463869463869e-07,
"loss_func": "stage2",
"step": 62,
"total_loss": -0.801626443862915
},
{
"epoch": 0.15,
"importance_ratio": 1.0001925230026245,
"kl_div_avg": 0.009508013725280762,
"learning_rate": 4.867132867132867e-07,
"loss_func": "stage2",
"step": 63,
"total_loss": -0.8087908029556274
},
{
"epoch": 0.15,
"importance_ratio": 1.0004205703735352,
"kl_div_avg": 0.00948132760822773,
"learning_rate": 4.864801864801865e-07,
"loss_func": "stage2",
"step": 64,
"total_loss": -0.7977355718612671
},
{
"epoch": 0.16,
"importance_ratio": 0.9995088577270508,
"kl_div_avg": 0.013071361929178238,
"learning_rate": 4.862470862470862e-07,
"loss_func": "stage2",
"step": 65,
"total_loss": -0.7926455736160278
},
{
"epoch": 0.16,
"importance_ratio": 1.0000696182250977,
"kl_div_avg": 0.008261503651738167,
"learning_rate": 4.86013986013986e-07,
"loss_func": "stage2",
"step": 66,
"total_loss": 0.8929388523101807
},
{
"epoch": 0.16,
"importance_ratio": 0.9999169111251831,
"kl_div_avg": 0.0072638243436813354,
"learning_rate": 4.857808857808858e-07,
"loss_func": "stage2",
"step": 67,
"total_loss": 0.7651723623275757
},
{
"epoch": 0.16,
"importance_ratio": 1.0006506443023682,
"kl_div_avg": 0.014501883648335934,
"learning_rate": 4.855477855477855e-07,
"loss_func": "stage2",
"step": 68,
"total_loss": -0.7893998622894287
},
{
"epoch": 0.17,
"importance_ratio": 1.0001804828643799,
"kl_div_avg": 0.014889956451952457,
"learning_rate": 4.853146853146853e-07,
"loss_func": "stage2",
"step": 69,
"total_loss": -0.28294438123703003
},
{
"epoch": 0.17,
"importance_ratio": 1.0003015995025635,
"kl_div_avg": 0.012897053733468056,
"learning_rate": 4.850815850815851e-07,
"loss_func": "stage2",
"step": 70,
"total_loss": -0.29949530959129333
},
{
"epoch": 0.17,
"importance_ratio": 0.9999880194664001,
"kl_div_avg": 0.011545092798769474,
"learning_rate": 4.848484848484849e-07,
"loss_func": "stage2",
"step": 71,
"total_loss": -0.07207685708999634
},
{
"epoch": 0.17,
"importance_ratio": 1.0000548362731934,
"kl_div_avg": 0.011798446998000145,
"learning_rate": 4.846153846153846e-07,
"loss_func": "stage2",
"step": 72,
"total_loss": -0.030968129634857178
},
{
"epoch": 0.18,
"importance_ratio": 0.9998947381973267,
"kl_div_avg": 0.014826457016170025,
"learning_rate": 4.843822843822844e-07,
"loss_func": "stage2",
"step": 73,
"total_loss": -0.04581499099731445
},
{
"epoch": 0.18,
"importance_ratio": 0.9999833106994629,
"kl_div_avg": 0.01308258343487978,
"learning_rate": 4.841491841491842e-07,
"loss_func": "stage2",
"step": 74,
"total_loss": -0.2057284414768219
},
{
"epoch": 0.18,
"importance_ratio": 1.000425100326538,
"kl_div_avg": 0.014183840714395046,
"learning_rate": 4.839160839160839e-07,
"loss_func": "stage2",
"step": 75,
"total_loss": 0.6338366866111755
},
{
"epoch": 0.18,
"importance_ratio": 1.0001928806304932,
"kl_div_avg": 0.013401240110397339,
"learning_rate": 4.836829836829837e-07,
"loss_func": "stage2",
"step": 76,
"total_loss": 0.2874578833580017
},
{
"epoch": 0.19,
"importance_ratio": 0.9999673366546631,
"kl_div_avg": 0.012778308242559433,
"learning_rate": 4.834498834498834e-07,
"loss_func": "stage2",
"step": 77,
"total_loss": 0.8181835412979126
},
{
"epoch": 0.19,
"importance_ratio": 0.9999823570251465,
"kl_div_avg": 0.014732200652360916,
"learning_rate": 4.832167832167832e-07,
"loss_func": "stage2",
"step": 78,
"total_loss": 0.818813681602478
},
{
"epoch": 0.19,
"importance_ratio": 1.0003037452697754,
"kl_div_avg": 0.014829716645181179,
"learning_rate": 4.829836829836829e-07,
"loss_func": "stage2",
"step": 79,
"total_loss": 0.0298653244972229
},
{
"epoch": 0.19,
"importance_ratio": 1.0003316402435303,
"kl_div_avg": 0.01861473172903061,
"learning_rate": 4.827505827505827e-07,
"loss_func": "stage2",
"step": 80,
"total_loss": 0.47803181409835815
},
{
"epoch": 0.2,
"importance_ratio": 1.0000956058502197,
"kl_div_avg": 0.013490747660398483,
"learning_rate": 4.825174825174824e-07,
"loss_func": "stage2",
"step": 81,
"total_loss": 0.43567734956741333
},
{
"epoch": 0.2,
"importance_ratio": 1.0001178979873657,
"kl_div_avg": 0.014975698664784431,
"learning_rate": 4.822843822843823e-07,
"loss_func": "stage2",
"step": 82,
"total_loss": -0.27731338143348694
},
{
"epoch": 0.2,
"importance_ratio": 0.9999943971633911,
"kl_div_avg": 0.012739075347781181,
"learning_rate": 4.82051282051282e-07,
"loss_func": "stage2",
"step": 83,
"total_loss": 0.14717233180999756
},
{
"epoch": 0.2,
"importance_ratio": 1.000013828277588,
"kl_div_avg": 0.015260843560099602,
"learning_rate": 4.818181818181818e-07,
"loss_func": "stage2",
"step": 84,
"total_loss": -0.876125693321228
},
{
"epoch": 0.2,
"importance_ratio": 0.9999637603759766,
"kl_div_avg": 0.009859403595328331,
"learning_rate": 4.815850815850815e-07,
"loss_func": "stage2",
"step": 85,
"total_loss": -0.007574997842311859
},
{
"epoch": 0.2,
"importance_ratio": 0.9998883008956909,
"kl_div_avg": 0.012243506498634815,
"learning_rate": 4.813519813519813e-07,
"loss_func": "stage2",
"step": 86,
"total_loss": -0.8214981555938721
},
{
"epoch": 0.2,
"importance_ratio": 0.9997843503952026,
"kl_div_avg": 0.009431181475520134,
"learning_rate": 4.811188811188811e-07,
"loss_func": "stage2",
"step": 87,
"total_loss": 0.09674309194087982
},
{
"epoch": 0.2,
"importance_ratio": 0.9998980760574341,
"kl_div_avg": 0.012359343469142914,
"learning_rate": 4.808857808857809e-07,
"loss_func": "stage2",
"step": 88,
"total_loss": 0.13037657737731934
},
{
"epoch": 0.21,
"importance_ratio": 0.9999622702598572,
"kl_div_avg": 0.019689181819558144,
"learning_rate": 4.806526806526806e-07,
"loss_func": "stage2",
"step": 89,
"total_loss": -0.053394585847854614
},
{
"epoch": 0.21,
"importance_ratio": 1.0000618696212769,
"kl_div_avg": 0.013591418042778969,
"learning_rate": 4.804195804195804e-07,
"loss_func": "stage2",
"step": 90,
"total_loss": 0.7699258327484131
},
{
"epoch": 0.21,
"importance_ratio": 1.000166893005371,
"kl_div_avg": 0.016464080661535263,
"learning_rate": 4.801864801864802e-07,
"loss_func": "stage2",
"step": 91,
"total_loss": 0.16640010476112366
},
{
"epoch": 0.21,
"importance_ratio": 0.9997192621231079,
"kl_div_avg": 0.02152731642127037,
"learning_rate": 4.799533799533799e-07,
"loss_func": "stage2",
"step": 92,
"total_loss": -0.17338214814662933
},
{
"epoch": 0.22,
"importance_ratio": 1.000048279762268,
"kl_div_avg": 0.01632719114422798,
"learning_rate": 4.797202797202797e-07,
"loss_func": "stage2",
"step": 93,
"total_loss": -0.311355322599411
},
{
"epoch": 0.22,
"importance_ratio": 1.000006914138794,
"kl_div_avg": 0.02427072264254093,
"learning_rate": 4.794871794871795e-07,
"loss_func": "stage2",
"step": 94,
"total_loss": 0.6689386963844299
},
{
"epoch": 0.22,
"importance_ratio": 1.0001646280288696,
"kl_div_avg": 0.020599162206053734,
"learning_rate": 4.792540792540793e-07,
"loss_func": "stage2",
"step": 95,
"total_loss": -0.27341556549072266
},
{
"epoch": 0.22,
"importance_ratio": 1.0005509853363037,
"kl_div_avg": 0.01824137195944786,
"learning_rate": 4.79020979020979e-07,
"loss_func": "stage2",
"step": 96,
"total_loss": -0.7693477869033813
},
{
"epoch": 0.23,
"importance_ratio": 1.0000276565551758,
"kl_div_avg": 0.019494881853461266,
"learning_rate": 4.787878787878788e-07,
"loss_func": "stage2",
"step": 97,
"total_loss": 0.5362880229949951
},
{
"epoch": 0.23,
"importance_ratio": 1.0001310110092163,
"kl_div_avg": 0.018007826060056686,
"learning_rate": 4.785547785547786e-07,
"loss_func": "stage2",
"step": 98,
"total_loss": 0.13775774836540222
},
{
"epoch": 0.23,
"importance_ratio": 1.0001639127731323,
"kl_div_avg": 0.018012849614024162,
"learning_rate": 4.783216783216783e-07,
"loss_func": "stage2",
"step": 99,
"total_loss": 0.43274223804473877
},
{
"epoch": 0.23,
"importance_ratio": 1.0001168251037598,
"kl_div_avg": 0.016401609405875206,
"learning_rate": 4.78088578088578e-07,
"loss_func": "stage2",
"step": 100,
"total_loss": 0.024271167814731598
},
{
"epoch": 0.24,
"importance_ratio": 0.9998679161071777,
"kl_div_avg": 0.018232179805636406,
"learning_rate": 4.778554778554778e-07,
"loss_func": "stage2",
"step": 101,
"total_loss": -0.13439278304576874
},
{
"epoch": 0.24,
"importance_ratio": 0.9999688267707825,
"kl_div_avg": 0.01988252066075802,
"learning_rate": 4.776223776223776e-07,
"loss_func": "stage2",
"step": 102,
"total_loss": -0.5875260233879089
},
{
"epoch": 0.24,
"importance_ratio": 1.000270128250122,
"kl_div_avg": 0.024290431290864944,
"learning_rate": 4.773892773892773e-07,
"loss_func": "stage2",
"step": 103,
"total_loss": 0.43479496240615845
},
{
"epoch": 0.24,
"importance_ratio": 1.0000674724578857,
"kl_div_avg": 0.017674673348665237,
"learning_rate": 4.771561771561771e-07,
"loss_func": "stage2",
"step": 104,
"total_loss": -0.30525317788124084
},
{
"epoch": 0.25,
"importance_ratio": 0.9998775720596313,
"kl_div_avg": 0.015626681968569756,
"learning_rate": 4.769230769230769e-07,
"loss_func": "stage2",
"step": 105,
"total_loss": -0.4420226514339447
},
{
"epoch": 0.25,
"importance_ratio": 1.000077247619629,
"kl_div_avg": 0.011648900806903839,
"learning_rate": 4.7668997668997666e-07,
"loss_func": "stage2",
"step": 106,
"total_loss": -0.7798103094100952
},
{
"epoch": 0.25,
"importance_ratio": 0.9998931884765625,
"kl_div_avg": 0.02424587681889534,
"learning_rate": 4.7645687645687646e-07,
"loss_func": "stage2",
"step": 107,
"total_loss": 0.3463074564933777
},
{
"epoch": 0.25,
"importance_ratio": 1.0001239776611328,
"kl_div_avg": 0.019622065126895905,
"learning_rate": 4.762237762237762e-07,
"loss_func": "stage2",
"step": 108,
"total_loss": -0.7446590662002563
},
{
"epoch": 0.26,
"importance_ratio": 1.0000584125518799,
"kl_div_avg": 0.02028917521238327,
"learning_rate": 4.75990675990676e-07,
"loss_func": "stage2",
"step": 109,
"total_loss": -0.269249826669693
},
{
"epoch": 0.26,
"importance_ratio": 0.9998422861099243,
"kl_div_avg": 0.022249866276979446,
"learning_rate": 4.7575757575757574e-07,
"loss_func": "stage2",
"step": 110,
"total_loss": -0.6871204376220703
},
{
"epoch": 0.26,
"importance_ratio": 1.0001263618469238,
"kl_div_avg": 0.01973864436149597,
"learning_rate": 4.755244755244755e-07,
"loss_func": "stage2",
"step": 111,
"total_loss": -0.6550001502037048
},
{
"epoch": 0.26,
"importance_ratio": 0.9996439218521118,
"kl_div_avg": 0.02077101171016693,
"learning_rate": 4.7529137529137523e-07,
"loss_func": "stage2",
"step": 112,
"total_loss": 0.042905211448669434
},
{
"epoch": 0.27,
"importance_ratio": 1.000232219696045,
"kl_div_avg": 0.036871857941150665,
"learning_rate": 4.750582750582751e-07,
"loss_func": "stage2",
"step": 113,
"total_loss": -0.6672766804695129
},
{
"epoch": 0.27,
"importance_ratio": 0.9999071359634399,
"kl_div_avg": 0.023731358349323273,
"learning_rate": 4.748251748251748e-07,
"loss_func": "stage2",
"step": 114,
"total_loss": 0.22221048176288605
},
{
"epoch": 0.27,
"importance_ratio": 0.9998910427093506,
"kl_div_avg": 0.02947179228067398,
"learning_rate": 4.7459207459207457e-07,
"loss_func": "stage2",
"step": 115,
"total_loss": 0.05969160795211792
},
{
"epoch": 0.27,
"importance_ratio": 0.9999151825904846,
"kl_div_avg": 0.028714872896671295,
"learning_rate": 4.743589743589743e-07,
"loss_func": "stage2",
"step": 116,
"total_loss": 0.023228317499160767
},
{
"epoch": 0.28,
"importance_ratio": 0.9999186992645264,
"kl_div_avg": 0.022106900811195374,
"learning_rate": 4.741258741258741e-07,
"loss_func": "stage2",
"step": 117,
"total_loss": -0.4819675087928772
},
{
"epoch": 0.28,
"importance_ratio": 0.9995689392089844,
"kl_div_avg": 0.03649330139160156,
"learning_rate": 4.7389277389277386e-07,
"loss_func": "stage2",
"step": 118,
"total_loss": 0.3108961582183838
},
{
"epoch": 0.28,
"importance_ratio": 0.9996166825294495,
"kl_div_avg": 0.03707721084356308,
"learning_rate": 4.7365967365967365e-07,
"loss_func": "stage2",
"step": 119,
"total_loss": 0.3411310315132141
},
{
"epoch": 0.28,
"importance_ratio": 0.9993818998336792,
"kl_div_avg": 0.02660643495619297,
"learning_rate": 4.734265734265734e-07,
"loss_func": "stage2",
"step": 120,
"total_loss": -0.1782078891992569
},
{
"epoch": 0.29,
"importance_ratio": 1.0000483989715576,
"kl_div_avg": 0.020809054374694824,
"learning_rate": 4.731934731934732e-07,
"loss_func": "stage2",
"step": 121,
"total_loss": 0.2691134810447693
},
{
"epoch": 0.29,
"importance_ratio": 0.9999549388885498,
"kl_div_avg": 0.018363434821367264,
"learning_rate": 4.7296037296037294e-07,
"loss_func": "stage2",
"step": 122,
"total_loss": -0.11637084186077118
},
{
"epoch": 0.29,
"importance_ratio": 1.0000113248825073,
"kl_div_avg": 0.01843072474002838,
"learning_rate": 4.727272727272727e-07,
"loss_func": "stage2",
"step": 123,
"total_loss": 0.41335129737854004
},
{
"epoch": 0.29,
"importance_ratio": 0.9993541836738586,
"kl_div_avg": 0.023193594068288803,
"learning_rate": 4.724941724941724e-07,
"loss_func": "stage2",
"step": 124,
"total_loss": 0.3763417601585388
},
{
"epoch": 0.3,
"importance_ratio": 1.0000863075256348,
"kl_div_avg": 0.02424781210720539,
"learning_rate": 4.722610722610723e-07,
"loss_func": "stage2",
"step": 125,
"total_loss": 0.8012444972991943
},
{
"epoch": 0.3,
"importance_ratio": 0.9997611045837402,
"kl_div_avg": 0.019796304404735565,
"learning_rate": 4.72027972027972e-07,
"loss_func": "stage2",
"step": 126,
"total_loss": 0.2471800446510315
},
{
"epoch": 0.3,
"importance_ratio": 0.9994201064109802,
"kl_div_avg": 0.03692193701863289,
"learning_rate": 4.7179487179487176e-07,
"loss_func": "stage2",
"step": 127,
"total_loss": 0.6575199961662292
},
{
"epoch": 0.3,
"importance_ratio": 0.9993878602981567,
"kl_div_avg": 0.024065542966127396,
"learning_rate": 4.715617715617715e-07,
"loss_func": "stage2",
"step": 128,
"total_loss": 0.39468204975128174
},
{
"epoch": 0.31,
"importance_ratio": 1.0001311302185059,
"kl_div_avg": 0.024539019912481308,
"learning_rate": 4.713286713286713e-07,
"loss_func": "stage2",
"step": 129,
"total_loss": 0.1832764893770218
},
{
"epoch": 0.31,
"importance_ratio": 0.9998582005500793,
"kl_div_avg": 0.024634480476379395,
"learning_rate": 4.710955710955711e-07,
"loss_func": "stage2",
"step": 130,
"total_loss": 0.1833437830209732
},
{
"epoch": 0.31,
"importance_ratio": 0.9997899532318115,
"kl_div_avg": 0.024798087775707245,
"learning_rate": 4.7086247086247085e-07,
"loss_func": "stage2",
"step": 131,
"total_loss": -0.029144808650016785
},
{
"epoch": 0.31,
"importance_ratio": 0.9991902112960815,
"kl_div_avg": 0.022448930889368057,
"learning_rate": 4.706293706293706e-07,
"loss_func": "stage2",
"step": 132,
"total_loss": 0.5746316909790039
},
{
"epoch": 0.32,
"importance_ratio": 0.9999872446060181,
"kl_div_avg": 0.030649660155177116,
"learning_rate": 4.703962703962704e-07,
"loss_func": "stage2",
"step": 133,
"total_loss": -0.27921533584594727
},
{
"epoch": 0.32,
"importance_ratio": 0.9999322891235352,
"kl_div_avg": 0.028502434492111206,
"learning_rate": 4.7016317016317013e-07,
"loss_func": "stage2",
"step": 134,
"total_loss": -0.18240980803966522
},
{
"epoch": 0.32,
"importance_ratio": 0.9991711378097534,
"kl_div_avg": 0.034727346152067184,
"learning_rate": 4.699300699300699e-07,
"loss_func": "stage2",
"step": 135,
"total_loss": 0.20225152373313904
},
{
"epoch": 0.32,
"importance_ratio": 1.000333547592163,
"kl_div_avg": 0.02375342883169651,
"learning_rate": 4.696969696969697e-07,
"loss_func": "stage2",
"step": 136,
"total_loss": 0.19642743468284607
},
{
"epoch": 0.33,
"importance_ratio": 1.0000425577163696,
"kl_div_avg": 0.02941157855093479,
"learning_rate": 4.6946386946386947e-07,
"loss_func": "stage2",
"step": 137,
"total_loss": -0.2832520604133606
},
{
"epoch": 0.33,
"importance_ratio": 0.9998887777328491,
"kl_div_avg": 0.0380094014108181,
"learning_rate": 4.692307692307692e-07,
"loss_func": "stage2",
"step": 138,
"total_loss": 0.07638365030288696
},
{
"epoch": 0.33,
"importance_ratio": 1.0000771284103394,
"kl_div_avg": 0.031544946134090424,
"learning_rate": 4.6899766899766896e-07,
"loss_func": "stage2",
"step": 139,
"total_loss": -0.29178526997566223
},
{
"epoch": 0.33,
"importance_ratio": 0.9999626278877258,
"kl_div_avg": 0.03620228171348572,
"learning_rate": 4.6876456876456875e-07,
"loss_func": "stage2",
"step": 140,
"total_loss": 0.12343016266822815
},
{
"epoch": 0.34,
"importance_ratio": 0.9999535083770752,
"kl_div_avg": 0.026813074946403503,
"learning_rate": 4.685314685314685e-07,
"loss_func": "stage2",
"step": 141,
"total_loss": 0.024522602558135986
},
{
"epoch": 0.34,
"importance_ratio": 0.9998844265937805,
"kl_div_avg": 0.031967416405677795,
"learning_rate": 4.682983682983683e-07,
"loss_func": "stage2",
"step": 142,
"total_loss": 0.6268632411956787
},
{
"epoch": 0.34,
"importance_ratio": 0.9997915029525757,
"kl_div_avg": 0.024857094511389732,
"learning_rate": 4.6806526806526804e-07,
"loss_func": "stage2",
"step": 143,
"total_loss": 0.13797396421432495
},
{
"epoch": 0.34,
"importance_ratio": 0.9998888969421387,
"kl_div_avg": 0.04350988566875458,
"learning_rate": 4.6783216783216784e-07,
"loss_func": "stage2",
"step": 144,
"total_loss": 0.5312750935554504
},
{
"epoch": 0.34,
"importance_ratio": 0.9998694658279419,
"kl_div_avg": 0.03627926483750343,
"learning_rate": 4.675990675990676e-07,
"loss_func": "stage2",
"step": 145,
"total_loss": 0.09028466045856476
},
{
"epoch": 0.34,
"importance_ratio": 0.9999563694000244,
"kl_div_avg": 0.02497359737753868,
"learning_rate": 4.673659673659673e-07,
"loss_func": "stage2",
"step": 146,
"total_loss": 0.5303145051002502
},
{
"epoch": 0.34,
"importance_ratio": 0.9999274015426636,
"kl_div_avg": 0.026778005063533783,
"learning_rate": 4.6713286713286707e-07,
"loss_func": "stage2",
"step": 147,
"total_loss": 0.1259535402059555
},
{
"epoch": 0.34,
"importance_ratio": 0.9997484683990479,
"kl_div_avg": 0.02970227226614952,
"learning_rate": 4.668997668997669e-07,
"loss_func": "stage2",
"step": 148,
"total_loss": 0.5203793048858643
},
{
"epoch": 0.35,
"importance_ratio": 1.0000306367874146,
"kl_div_avg": 0.03883244842290878,
"learning_rate": 4.6666666666666666e-07,
"loss_func": "stage2",
"step": 149,
"total_loss": 0.5459209680557251
},
{
"epoch": 0.35,
"importance_ratio": 0.9999977946281433,
"kl_div_avg": 0.010376233607530594,
"learning_rate": 4.664335664335664e-07,
"loss_func": "stage2",
"step": 150,
"total_loss": 0.5408558249473572
},
{
"epoch": 0.35,
"importance_ratio": 1.0000672340393066,
"kl_div_avg": 0.020404186099767685,
"learning_rate": 4.6620046620046615e-07,
"loss_func": "stage2",
"step": 151,
"total_loss": 0.5100921392440796
},
{
"epoch": 0.35,
"importance_ratio": 1.0013837814331055,
"kl_div_avg": 0.032543182373046875,
"learning_rate": 4.6596736596736595e-07,
"loss_func": "stage2",
"step": 152,
"total_loss": -0.4812799394130707
}
],
"logging_steps": 1.0,
"max_steps": 2150,
"num_input_tokens_seen": 0,
"num_train_epochs": 20.0,
"save_steps": 50,
"total_flos": 0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}