Qwen2-7B-S2R-PRL / trainer_state.json

Upload folder using huggingface_hub

97c357b verified 4 months ago

39.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.3536425767049586,
	"eval_steps": 500,
	"global_step": 152,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.01,
	"importance_ratio": 1.0001533031463623,
	"kl_div_avg": 0.0022537275217473507,
	"learning_rate": 0.0,
	"loss_func": "stage2",
	"step": 1,
	"total_loss": 0.20475485920906067
	},
	{
	"epoch": 0.01,
	"importance_ratio": 1.0000330209732056,
	"kl_div_avg": 0.0011816158657893538,
	"learning_rate": 2.153382790366965e-07,
	"loss_func": "stage2",
	"step": 2,
	"total_loss": 0.17304854094982147
	},
	{
	"epoch": 0.01,
	"importance_ratio": 0.9998020529747009,
	"kl_div_avg": 0.0012814232613891363,
	"learning_rate": 3.4130309724299266e-07,
	"loss_func": "stage2",
	"step": 3,
	"total_loss": 0.39294394850730896
	},
	{
	"epoch": 0.01,
	"importance_ratio": 1.0002059936523438,
	"kl_div_avg": 0.001280196476727724,
	"learning_rate": 4.30676558073393e-07,
	"loss_func": "stage2",
	"step": 4,
	"total_loss": 0.2844714820384979
	},
	{
	"epoch": 0.02,
	"importance_ratio": 1.0000617504119873,
	"kl_div_avg": 0.0033944130409508944,
	"learning_rate": 5e-07,
	"loss_func": "stage2",
	"step": 5,
	"total_loss": 0.38719698786735535
	},
	{
	"epoch": 0.02,
	"importance_ratio": 1.0002291202545166,
	"kl_div_avg": 0.0006711427122354507,
	"learning_rate": 5e-07,
	"loss_func": "stage2",
	"step": 6,
	"total_loss": -0.7354744672775269
	},
	{
	"epoch": 0.02,
	"importance_ratio": 1.0001697540283203,
	"kl_div_avg": 0.0009293262264691293,
	"learning_rate": 4.997668997668998e-07,
	"loss_func": "stage2",
	"step": 7,
	"total_loss": 0.10072920471429825
	},
	{
	"epoch": 0.02,
	"importance_ratio": 1.0003812313079834,
	"kl_div_avg": 0.0006424246821552515,
	"learning_rate": 4.995337995337996e-07,
	"loss_func": "stage2",
	"step": 8,
	"total_loss": -0.7356305122375488
	},
	{
	"epoch": 0.03,
	"importance_ratio": 1.0002334117889404,
	"kl_div_avg": 0.001079258043318987,
	"learning_rate": 4.993006993006993e-07,
	"loss_func": "stage2",
	"step": 9,
	"total_loss": 0.1572389006614685
	},
	{
	"epoch": 0.03,
	"importance_ratio": 0.9999626874923706,
	"kl_div_avg": 0.0012779454700648785,
	"learning_rate": 4.990675990675991e-07,
	"loss_func": "stage2",
	"step": 10,
	"total_loss": -0.2823958396911621
	},
	{
	"epoch": 0.03,
	"importance_ratio": 0.9999264478683472,
	"kl_div_avg": 0.0010424605570733547,
	"learning_rate": 4.988344988344988e-07,
	"loss_func": "stage2",
	"step": 11,
	"total_loss": -0.4896008372306824
	},
	{
	"epoch": 0.03,
	"importance_ratio": 0.9999059438705444,
	"kl_div_avg": 0.0011010458692908287,
	"learning_rate": 4.986013986013987e-07,
	"loss_func": "stage2",
	"step": 12,
	"total_loss": 0.4140966832637787
	},
	{
	"epoch": 0.04,
	"importance_ratio": 0.9999853372573853,
	"kl_div_avg": 0.0014636358246207237,
	"learning_rate": 4.983682983682983e-07,
	"loss_func": "stage2",
	"step": 13,
	"total_loss": 0.4479982852935791
	},
	{
	"epoch": 0.04,
	"importance_ratio": 1.0001029968261719,
	"kl_div_avg": 0.0010808318620547652,
	"learning_rate": 4.981351981351981e-07,
	"loss_func": "stage2",
	"step": 14,
	"total_loss": -0.8298860192298889
	},
	{
	"epoch": 0.04,
	"importance_ratio": 1.0000591278076172,
	"kl_div_avg": 0.0012179139303043485,
	"learning_rate": 4.979020979020978e-07,
	"loss_func": "stage2",
	"step": 15,
	"total_loss": -0.8044635057449341
	},
	{
	"epoch": 0.04,
	"importance_ratio": 1.0000298023223877,
	"kl_div_avg": 0.004863352049142122,
	"learning_rate": 4.976689976689976e-07,
	"loss_func": "stage2",
	"step": 16,
	"total_loss": 0.24484601616859436
	},
	{
	"epoch": 0.05,
	"importance_ratio": 1.0002245903015137,
	"kl_div_avg": 0.0017589405179023743,
	"learning_rate": 4.974358974358974e-07,
	"loss_func": "stage2",
	"step": 17,
	"total_loss": -0.0013702064752578735
	},
	{
	"epoch": 0.05,
	"importance_ratio": 0.9999561309814453,
	"kl_div_avg": 0.0018663634546101093,
	"learning_rate": 4.972027972027972e-07,
	"loss_func": "stage2",
	"step": 18,
	"total_loss": -0.10264579951763153
	},
	{
	"epoch": 0.05,
	"importance_ratio": 0.9998416900634766,
	"kl_div_avg": 0.0018756331410259008,
	"learning_rate": 4.969696969696969e-07,
	"loss_func": "stage2",
	"step": 19,
	"total_loss": 0.48283857107162476
	},
	{
	"epoch": 0.05,
	"importance_ratio": 0.999815821647644,
	"kl_div_avg": 0.0019035658333450556,
	"learning_rate": 4.967365967365967e-07,
	"loss_func": "stage2",
	"step": 20,
	"total_loss": -0.1848379671573639
	},
	{
	"epoch": 0.06,
	"importance_ratio": 0.999942421913147,
	"kl_div_avg": 0.009937961585819721,
	"learning_rate": 4.965034965034965e-07,
	"loss_func": "stage2",
	"step": 21,
	"total_loss": 0.14985397458076477
	},
	{
	"epoch": 0.06,
	"importance_ratio": 0.9997897148132324,
	"kl_div_avg": 0.00241913297213614,
	"learning_rate": 4.962703962703962e-07,
	"loss_func": "stage2",
	"step": 22,
	"total_loss": -0.29083502292633057
	},
	{
	"epoch": 0.06,
	"importance_ratio": 0.9998656511306763,
	"kl_div_avg": 0.00263982149772346,
	"learning_rate": 4.96037296037296e-07,
	"loss_func": "stage2",
	"step": 23,
	"total_loss": -0.02688920497894287
	},
	{
	"epoch": 0.06,
	"importance_ratio": 1.0000394582748413,
	"kl_div_avg": 0.0032202559523284435,
	"learning_rate": 4.958041958041958e-07,
	"loss_func": "stage2",
	"step": 24,
	"total_loss": 0.5130484104156494
	},
	{
	"epoch": 0.07,
	"importance_ratio": 1.0000278949737549,
	"kl_div_avg": 0.00243174796923995,
	"learning_rate": 4.955710955710956e-07,
	"loss_func": "stage2",
	"step": 25,
	"total_loss": 0.309948205947876
	},
	{
	"epoch": 0.07,
	"importance_ratio": 0.9999406337738037,
	"kl_div_avg": 0.003059545997530222,
	"learning_rate": 4.953379953379953e-07,
	"loss_func": "stage2",
	"step": 26,
	"total_loss": 0.11305176466703415
	},
	{
	"epoch": 0.07,
	"importance_ratio": 0.9999201893806458,
	"kl_div_avg": 0.003822761122137308,
	"learning_rate": 4.951048951048951e-07,
	"loss_func": "stage2",
	"step": 27,
	"total_loss": 0.38959354162216187
	},
	{
	"epoch": 0.07,
	"importance_ratio": 0.9994360208511353,
	"kl_div_avg": 0.0033927513286471367,
	"learning_rate": 4.948717948717949e-07,
	"loss_func": "stage2",
	"step": 28,
	"total_loss": 0.4653158485889435
	},
	{
	"epoch": 0.07,
	"importance_ratio": 0.9999792575836182,
	"kl_div_avg": 0.0032504587434232235,
	"learning_rate": 4.946386946386946e-07,
	"loss_func": "stage2",
	"step": 29,
	"total_loss": 0.4534304141998291
	},
	{
	"epoch": 0.07,
	"importance_ratio": 1.0000840425491333,
	"kl_div_avg": 0.002915366552770138,
	"learning_rate": 4.944055944055944e-07,
	"loss_func": "stage2",
	"step": 30,
	"total_loss": 0.46045419573783875
	},
	{
	"epoch": 0.07,
	"importance_ratio": 0.9996820688247681,
	"kl_div_avg": 0.0030386601574718952,
	"learning_rate": 4.941724941724942e-07,
	"loss_func": "stage2",
	"step": 31,
	"total_loss": -0.8699095845222473
	},
	{
	"epoch": 0.07,
	"importance_ratio": 0.9997460842132568,
	"kl_div_avg": 0.0037004691548645496,
	"learning_rate": 4.93939393939394e-07,
	"loss_func": "stage2",
	"step": 32,
	"total_loss": -0.23050172626972198
	},
	{
	"epoch": 0.08,
	"importance_ratio": 0.9999768137931824,
	"kl_div_avg": 0.0046552978456020355,
	"learning_rate": 4.937062937062936e-07,
	"loss_func": "stage2",
	"step": 33,
	"total_loss": 0.3776797950267792
	},
	{
	"epoch": 0.08,
	"importance_ratio": 0.9999284148216248,
	"kl_div_avg": 0.004839582834392786,
	"learning_rate": 4.934731934731934e-07,
	"loss_func": "stage2",
	"step": 34,
	"total_loss": 0.5804722309112549
	},
	{
	"epoch": 0.08,
	"importance_ratio": 0.999695897102356,
	"kl_div_avg": 0.004378842655569315,
	"learning_rate": 4.932400932400932e-07,
	"loss_func": "stage2",
	"step": 35,
	"total_loss": 0.5690972805023193
	},
	{
	"epoch": 0.08,
	"importance_ratio": 0.9998815059661865,
	"kl_div_avg": 0.0047516971826553345,
	"learning_rate": 4.93006993006993e-07,
	"loss_func": "stage2",
	"step": 36,
	"total_loss": 0.2298603653907776
	},
	{
	"epoch": 0.09,
	"importance_ratio": 0.9997518062591553,
	"kl_div_avg": 0.004270514938980341,
	"learning_rate": 4.927738927738927e-07,
	"loss_func": "stage2",
	"step": 37,
	"total_loss": -0.20016932487487793
	},
	{
	"epoch": 0.09,
	"importance_ratio": 1.000083088874817,
	"kl_div_avg": 0.004711843561381102,
	"learning_rate": 4.925407925407925e-07,
	"loss_func": "stage2",
	"step": 38,
	"total_loss": 0.25743457674980164
	},
	{
	"epoch": 0.09,
	"importance_ratio": 0.9999402761459351,
	"kl_div_avg": 0.004922826308757067,
	"learning_rate": 4.923076923076923e-07,
	"loss_func": "stage2",
	"step": 39,
	"total_loss": -0.15881219506263733
	},
	{
	"epoch": 0.09,
	"importance_ratio": 0.999858021736145,
	"kl_div_avg": 0.0039229318499565125,
	"learning_rate": 4.92074592074592e-07,
	"loss_func": "stage2",
	"step": 40,
	"total_loss": -0.23230578005313873
	},
	{
	"epoch": 0.1,
	"importance_ratio": 0.999944806098938,
	"kl_div_avg": 0.00766429677605629,
	"learning_rate": 4.918414918414918e-07,
	"loss_func": "stage2",
	"step": 41,
	"total_loss": -0.03111131489276886
	},
	{
	"epoch": 0.1,
	"importance_ratio": 1.000047206878662,
	"kl_div_avg": 0.005274048075079918,
	"learning_rate": 4.916083916083916e-07,
	"loss_func": "stage2",
	"step": 42,
	"total_loss": -0.033877044916152954
	},
	{
	"epoch": 0.1,
	"importance_ratio": 1.0002098083496094,
	"kl_div_avg": 0.006382378749549389,
	"learning_rate": 4.913752913752914e-07,
	"loss_func": "stage2",
	"step": 43,
	"total_loss": 0.10409477353096008
	},
	{
	"epoch": 0.1,
	"importance_ratio": 0.9998437166213989,
	"kl_div_avg": 0.006663881242275238,
	"learning_rate": 4.911421911421911e-07,
	"loss_func": "stage2",
	"step": 44,
	"total_loss": -0.9234535694122314
	},
	{
	"epoch": 0.11,
	"importance_ratio": 0.9999840259552002,
	"kl_div_avg": 0.004546988755464554,
	"learning_rate": 4.909090909090909e-07,
	"loss_func": "stage2",
	"step": 45,
	"total_loss": 0.022589027881622314
	},
	{
	"epoch": 0.11,
	"importance_ratio": 1.0002349615097046,
	"kl_div_avg": 0.0048853568732738495,
	"learning_rate": 4.906759906759906e-07,
	"loss_func": "stage2",
	"step": 46,
	"total_loss": 0.7868871688842773
	},
	{
	"epoch": 0.11,
	"importance_ratio": 1.0003743171691895,
	"kl_div_avg": 0.005253675393760204,
	"learning_rate": 4.904428904428905e-07,
	"loss_func": "stage2",
	"step": 47,
	"total_loss": 0.7918493747711182
	},
	{
	"epoch": 0.11,
	"importance_ratio": 1.0001533031463623,
	"kl_div_avg": 0.005680109839886427,
	"learning_rate": 4.902097902097902e-07,
	"loss_func": "stage2",
	"step": 48,
	"total_loss": -0.10262584686279297
	},
	{
	"epoch": 0.12,
	"importance_ratio": 0.999789834022522,
	"kl_div_avg": 0.006105936132371426,
	"learning_rate": 4.8997668997669e-07,
	"loss_func": "stage2",
	"step": 49,
	"total_loss": -0.8303477168083191
	},
	{
	"epoch": 0.12,
	"importance_ratio": 0.9999826550483704,
	"kl_div_avg": 0.005722599104046822,
	"learning_rate": 4.897435897435897e-07,
	"loss_func": "stage2",
	"step": 50,
	"total_loss": -0.8298835754394531
	},
	{
	"epoch": 0.12,
	"importance_ratio": 1.0003968477249146,
	"kl_div_avg": 0.005780387669801712,
	"learning_rate": 4.895104895104895e-07,
	"loss_func": "stage2",
	"step": 51,
	"total_loss": -0.8298872709274292
	},
	{
	"epoch": 0.12,
	"importance_ratio": 1.001389741897583,
	"kl_div_avg": 0.00528342742472887,
	"learning_rate": 4.892773892773893e-07,
	"loss_func": "stage2",
	"step": 52,
	"total_loss": -0.8320926427841187
	},
	{
	"epoch": 0.13,
	"importance_ratio": 1.0000613927841187,
	"kl_div_avg": 0.005511891096830368,
	"learning_rate": 4.890442890442891e-07,
	"loss_func": "stage2",
	"step": 53,
	"total_loss": -0.8287703990936279
	},
	{
	"epoch": 0.13,
	"importance_ratio": 1.000309705734253,
	"kl_div_avg": 0.00582331046462059,
	"learning_rate": 4.888111888111888e-07,
	"loss_func": "stage2",
	"step": 54,
	"total_loss": -0.007962286472320557
	},
	{
	"epoch": 0.13,
	"importance_ratio": 1.000248670578003,
	"kl_div_avg": 0.005605565384030342,
	"learning_rate": 4.885780885780885e-07,
	"loss_func": "stage2",
	"step": 55,
	"total_loss": 0.30858537554740906
	},
	{
	"epoch": 0.13,
	"importance_ratio": 1.0001593828201294,
	"kl_div_avg": 0.006694035604596138,
	"learning_rate": 4.883449883449883e-07,
	"loss_func": "stage2",
	"step": 56,
	"total_loss": 0.2841358482837677
	},
	{
	"epoch": 0.14,
	"importance_ratio": 0.9998378753662109,
	"kl_div_avg": 0.0054409438744187355,
	"learning_rate": 4.88111888111888e-07,
	"loss_func": "stage2",
	"step": 57,
	"total_loss": 0.4940628409385681
	},
	{
	"epoch": 0.14,
	"importance_ratio": 1.0000338554382324,
	"kl_div_avg": 0.00452791154384613,
	"learning_rate": 4.878787878787878e-07,
	"loss_func": "stage2",
	"step": 58,
	"total_loss": 0.25203195214271545
	},
	{
	"epoch": 0.14,
	"importance_ratio": 1.0000851154327393,
	"kl_div_avg": 0.008046677336096764,
	"learning_rate": 4.876456876456876e-07,
	"loss_func": "stage2",
	"step": 59,
	"total_loss": 0.8340111970901489
	},
	{
	"epoch": 0.14,
	"importance_ratio": 1.0000518560409546,
	"kl_div_avg": 0.0053115119226276875,
	"learning_rate": 4.874125874125874e-07,
	"loss_func": "stage2",
	"step": 60,
	"total_loss": -0.4959676265716553
	},
	{
	"epoch": 0.15,
	"importance_ratio": 1.0000547170639038,
	"kl_div_avg": 0.009003904648125172,
	"learning_rate": 4.871794871794871e-07,
	"loss_func": "stage2",
	"step": 61,
	"total_loss": -0.7978946566581726
	},
	{
	"epoch": 0.15,
	"importance_ratio": 1.000227689743042,
	"kl_div_avg": 0.007788301911205053,
	"learning_rate": 4.869463869463869e-07,
	"loss_func": "stage2",
	"step": 62,
	"total_loss": -0.801626443862915
	},
	{
	"epoch": 0.15,
	"importance_ratio": 1.0001925230026245,
	"kl_div_avg": 0.009508013725280762,
	"learning_rate": 4.867132867132867e-07,
	"loss_func": "stage2",
	"step": 63,
	"total_loss": -0.8087908029556274
	},
	{
	"epoch": 0.15,
	"importance_ratio": 1.0004205703735352,
	"kl_div_avg": 0.00948132760822773,
	"learning_rate": 4.864801864801865e-07,
	"loss_func": "stage2",
	"step": 64,
	"total_loss": -0.7977355718612671
	},
	{
	"epoch": 0.16,
	"importance_ratio": 0.9995088577270508,
	"kl_div_avg": 0.013071361929178238,
	"learning_rate": 4.862470862470862e-07,
	"loss_func": "stage2",
	"step": 65,
	"total_loss": -0.7926455736160278
	},
	{
	"epoch": 0.16,
	"importance_ratio": 1.0000696182250977,
	"kl_div_avg": 0.008261503651738167,
	"learning_rate": 4.86013986013986e-07,
	"loss_func": "stage2",
	"step": 66,
	"total_loss": 0.8929388523101807
	},
	{
	"epoch": 0.16,
	"importance_ratio": 0.9999169111251831,
	"kl_div_avg": 0.0072638243436813354,
	"learning_rate": 4.857808857808858e-07,
	"loss_func": "stage2",
	"step": 67,
	"total_loss": 0.7651723623275757
	},
	{
	"epoch": 0.16,
	"importance_ratio": 1.0006506443023682,
	"kl_div_avg": 0.014501883648335934,
	"learning_rate": 4.855477855477855e-07,
	"loss_func": "stage2",
	"step": 68,
	"total_loss": -0.7893998622894287
	},
	{
	"epoch": 0.17,
	"importance_ratio": 1.0001804828643799,
	"kl_div_avg": 0.014889956451952457,
	"learning_rate": 4.853146853146853e-07,
	"loss_func": "stage2",
	"step": 69,
	"total_loss": -0.28294438123703003
	},
	{
	"epoch": 0.17,
	"importance_ratio": 1.0003015995025635,
	"kl_div_avg": 0.012897053733468056,
	"learning_rate": 4.850815850815851e-07,
	"loss_func": "stage2",
	"step": 70,
	"total_loss": -0.29949530959129333
	},
	{
	"epoch": 0.17,
	"importance_ratio": 0.9999880194664001,
	"kl_div_avg": 0.011545092798769474,
	"learning_rate": 4.848484848484849e-07,
	"loss_func": "stage2",
	"step": 71,
	"total_loss": -0.07207685708999634
	},
	{
	"epoch": 0.17,
	"importance_ratio": 1.0000548362731934,
	"kl_div_avg": 0.011798446998000145,
	"learning_rate": 4.846153846153846e-07,
	"loss_func": "stage2",
	"step": 72,
	"total_loss": -0.030968129634857178
	},
	{
	"epoch": 0.18,
	"importance_ratio": 0.9998947381973267,
	"kl_div_avg": 0.014826457016170025,
	"learning_rate": 4.843822843822844e-07,
	"loss_func": "stage2",
	"step": 73,
	"total_loss": -0.04581499099731445
	},
	{
	"epoch": 0.18,
	"importance_ratio": 0.9999833106994629,
	"kl_div_avg": 0.01308258343487978,
	"learning_rate": 4.841491841491842e-07,
	"loss_func": "stage2",
	"step": 74,
	"total_loss": -0.2057284414768219
	},
	{
	"epoch": 0.18,
	"importance_ratio": 1.000425100326538,
	"kl_div_avg": 0.014183840714395046,
	"learning_rate": 4.839160839160839e-07,
	"loss_func": "stage2",
	"step": 75,
	"total_loss": 0.6338366866111755
	},
	{
	"epoch": 0.18,
	"importance_ratio": 1.0001928806304932,
	"kl_div_avg": 0.013401240110397339,
	"learning_rate": 4.836829836829837e-07,
	"loss_func": "stage2",
	"step": 76,
	"total_loss": 0.2874578833580017
	},
	{
	"epoch": 0.19,
	"importance_ratio": 0.9999673366546631,
	"kl_div_avg": 0.012778308242559433,
	"learning_rate": 4.834498834498834e-07,
	"loss_func": "stage2",
	"step": 77,
	"total_loss": 0.8181835412979126
	},
	{
	"epoch": 0.19,
	"importance_ratio": 0.9999823570251465,
	"kl_div_avg": 0.014732200652360916,
	"learning_rate": 4.832167832167832e-07,
	"loss_func": "stage2",
	"step": 78,
	"total_loss": 0.818813681602478
	},
	{
	"epoch": 0.19,
	"importance_ratio": 1.0003037452697754,
	"kl_div_avg": 0.014829716645181179,
	"learning_rate": 4.829836829836829e-07,
	"loss_func": "stage2",
	"step": 79,
	"total_loss": 0.0298653244972229
	},
	{
	"epoch": 0.19,
	"importance_ratio": 1.0003316402435303,
	"kl_div_avg": 0.01861473172903061,
	"learning_rate": 4.827505827505827e-07,
	"loss_func": "stage2",
	"step": 80,
	"total_loss": 0.47803181409835815
	},
	{
	"epoch": 0.2,
	"importance_ratio": 1.0000956058502197,
	"kl_div_avg": 0.013490747660398483,
	"learning_rate": 4.825174825174824e-07,
	"loss_func": "stage2",
	"step": 81,
	"total_loss": 0.43567734956741333
	},
	{
	"epoch": 0.2,
	"importance_ratio": 1.0001178979873657,
	"kl_div_avg": 0.014975698664784431,
	"learning_rate": 4.822843822843823e-07,
	"loss_func": "stage2",
	"step": 82,
	"total_loss": -0.27731338143348694
	},
	{
	"epoch": 0.2,
	"importance_ratio": 0.9999943971633911,
	"kl_div_avg": 0.012739075347781181,
	"learning_rate": 4.82051282051282e-07,
	"loss_func": "stage2",
	"step": 83,
	"total_loss": 0.14717233180999756
	},
	{
	"epoch": 0.2,
	"importance_ratio": 1.000013828277588,
	"kl_div_avg": 0.015260843560099602,
	"learning_rate": 4.818181818181818e-07,
	"loss_func": "stage2",
	"step": 84,
	"total_loss": -0.876125693321228
	},
	{
	"epoch": 0.2,
	"importance_ratio": 0.9999637603759766,
	"kl_div_avg": 0.009859403595328331,
	"learning_rate": 4.815850815850815e-07,
	"loss_func": "stage2",
	"step": 85,
	"total_loss": -0.007574997842311859
	},
	{
	"epoch": 0.2,
	"importance_ratio": 0.9998883008956909,
	"kl_div_avg": 0.012243506498634815,
	"learning_rate": 4.813519813519813e-07,
	"loss_func": "stage2",
	"step": 86,
	"total_loss": -0.8214981555938721
	},
	{
	"epoch": 0.2,
	"importance_ratio": 0.9997843503952026,
	"kl_div_avg": 0.009431181475520134,
	"learning_rate": 4.811188811188811e-07,
	"loss_func": "stage2",
	"step": 87,
	"total_loss": 0.09674309194087982
	},
	{
	"epoch": 0.2,
	"importance_ratio": 0.9998980760574341,
	"kl_div_avg": 0.012359343469142914,
	"learning_rate": 4.808857808857809e-07,
	"loss_func": "stage2",
	"step": 88,
	"total_loss": 0.13037657737731934
	},
	{
	"epoch": 0.21,
	"importance_ratio": 0.9999622702598572,
	"kl_div_avg": 0.019689181819558144,
	"learning_rate": 4.806526806526806e-07,
	"loss_func": "stage2",
	"step": 89,
	"total_loss": -0.053394585847854614
	},
	{
	"epoch": 0.21,
	"importance_ratio": 1.0000618696212769,
	"kl_div_avg": 0.013591418042778969,
	"learning_rate": 4.804195804195804e-07,
	"loss_func": "stage2",
	"step": 90,
	"total_loss": 0.7699258327484131
	},
	{
	"epoch": 0.21,
	"importance_ratio": 1.000166893005371,
	"kl_div_avg": 0.016464080661535263,
	"learning_rate": 4.801864801864802e-07,
	"loss_func": "stage2",
	"step": 91,
	"total_loss": 0.16640010476112366
	},
	{
	"epoch": 0.21,
	"importance_ratio": 0.9997192621231079,
	"kl_div_avg": 0.02152731642127037,
	"learning_rate": 4.799533799533799e-07,
	"loss_func": "stage2",
	"step": 92,
	"total_loss": -0.17338214814662933
	},
	{
	"epoch": 0.22,
	"importance_ratio": 1.000048279762268,
	"kl_div_avg": 0.01632719114422798,
	"learning_rate": 4.797202797202797e-07,
	"loss_func": "stage2",
	"step": 93,
	"total_loss": -0.311355322599411
	},
	{
	"epoch": 0.22,
	"importance_ratio": 1.000006914138794,
	"kl_div_avg": 0.02427072264254093,
	"learning_rate": 4.794871794871795e-07,
	"loss_func": "stage2",
	"step": 94,
	"total_loss": 0.6689386963844299
	},
	{
	"epoch": 0.22,
	"importance_ratio": 1.0001646280288696,
	"kl_div_avg": 0.020599162206053734,
	"learning_rate": 4.792540792540793e-07,
	"loss_func": "stage2",
	"step": 95,
	"total_loss": -0.27341556549072266
	},
	{
	"epoch": 0.22,
	"importance_ratio": 1.0005509853363037,
	"kl_div_avg": 0.01824137195944786,
	"learning_rate": 4.79020979020979e-07,
	"loss_func": "stage2",
	"step": 96,
	"total_loss": -0.7693477869033813
	},
	{
	"epoch": 0.23,
	"importance_ratio": 1.0000276565551758,
	"kl_div_avg": 0.019494881853461266,
	"learning_rate": 4.787878787878788e-07,
	"loss_func": "stage2",
	"step": 97,
	"total_loss": 0.5362880229949951
	},
	{
	"epoch": 0.23,
	"importance_ratio": 1.0001310110092163,
	"kl_div_avg": 0.018007826060056686,
	"learning_rate": 4.785547785547786e-07,
	"loss_func": "stage2",
	"step": 98,
	"total_loss": 0.13775774836540222
	},
	{
	"epoch": 0.23,
	"importance_ratio": 1.0001639127731323,
	"kl_div_avg": 0.018012849614024162,
	"learning_rate": 4.783216783216783e-07,
	"loss_func": "stage2",
	"step": 99,
	"total_loss": 0.43274223804473877
	},
	{
	"epoch": 0.23,
	"importance_ratio": 1.0001168251037598,
	"kl_div_avg": 0.016401609405875206,
	"learning_rate": 4.78088578088578e-07,
	"loss_func": "stage2",
	"step": 100,
	"total_loss": 0.024271167814731598
	},
	{
	"epoch": 0.24,
	"importance_ratio": 0.9998679161071777,
	"kl_div_avg": 0.018232179805636406,
	"learning_rate": 4.778554778554778e-07,
	"loss_func": "stage2",
	"step": 101,
	"total_loss": -0.13439278304576874
	},
	{
	"epoch": 0.24,
	"importance_ratio": 0.9999688267707825,
	"kl_div_avg": 0.01988252066075802,
	"learning_rate": 4.776223776223776e-07,
	"loss_func": "stage2",
	"step": 102,
	"total_loss": -0.5875260233879089
	},
	{
	"epoch": 0.24,
	"importance_ratio": 1.000270128250122,
	"kl_div_avg": 0.024290431290864944,
	"learning_rate": 4.773892773892773e-07,
	"loss_func": "stage2",
	"step": 103,
	"total_loss": 0.43479496240615845
	},
	{
	"epoch": 0.24,
	"importance_ratio": 1.0000674724578857,
	"kl_div_avg": 0.017674673348665237,
	"learning_rate": 4.771561771561771e-07,
	"loss_func": "stage2",
	"step": 104,
	"total_loss": -0.30525317788124084
	},
	{
	"epoch": 0.25,
	"importance_ratio": 0.9998775720596313,
	"kl_div_avg": 0.015626681968569756,
	"learning_rate": 4.769230769230769e-07,
	"loss_func": "stage2",
	"step": 105,
	"total_loss": -0.4420226514339447
	},
	{
	"epoch": 0.25,
	"importance_ratio": 1.000077247619629,
	"kl_div_avg": 0.011648900806903839,
	"learning_rate": 4.7668997668997666e-07,
	"loss_func": "stage2",
	"step": 106,
	"total_loss": -0.7798103094100952
	},
	{
	"epoch": 0.25,
	"importance_ratio": 0.9998931884765625,
	"kl_div_avg": 0.02424587681889534,
	"learning_rate": 4.7645687645687646e-07,
	"loss_func": "stage2",
	"step": 107,
	"total_loss": 0.3463074564933777
	},
	{
	"epoch": 0.25,
	"importance_ratio": 1.0001239776611328,
	"kl_div_avg": 0.019622065126895905,
	"learning_rate": 4.762237762237762e-07,
	"loss_func": "stage2",
	"step": 108,
	"total_loss": -0.7446590662002563
	},
	{
	"epoch": 0.26,
	"importance_ratio": 1.0000584125518799,
	"kl_div_avg": 0.02028917521238327,
	"learning_rate": 4.75990675990676e-07,
	"loss_func": "stage2",
	"step": 109,
	"total_loss": -0.269249826669693
	},
	{
	"epoch": 0.26,
	"importance_ratio": 0.9998422861099243,
	"kl_div_avg": 0.022249866276979446,
	"learning_rate": 4.7575757575757574e-07,
	"loss_func": "stage2",
	"step": 110,
	"total_loss": -0.6871204376220703
	},
	{
	"epoch": 0.26,
	"importance_ratio": 1.0001263618469238,
	"kl_div_avg": 0.01973864436149597,
	"learning_rate": 4.755244755244755e-07,
	"loss_func": "stage2",
	"step": 111,
	"total_loss": -0.6550001502037048
	},
	{
	"epoch": 0.26,
	"importance_ratio": 0.9996439218521118,
	"kl_div_avg": 0.02077101171016693,
	"learning_rate": 4.7529137529137523e-07,
	"loss_func": "stage2",
	"step": 112,
	"total_loss": 0.042905211448669434
	},
	{
	"epoch": 0.27,
	"importance_ratio": 1.000232219696045,
	"kl_div_avg": 0.036871857941150665,
	"learning_rate": 4.750582750582751e-07,
	"loss_func": "stage2",
	"step": 113,
	"total_loss": -0.6672766804695129
	},
	{
	"epoch": 0.27,
	"importance_ratio": 0.9999071359634399,
	"kl_div_avg": 0.023731358349323273,
	"learning_rate": 4.748251748251748e-07,
	"loss_func": "stage2",
	"step": 114,
	"total_loss": 0.22221048176288605
	},
	{
	"epoch": 0.27,
	"importance_ratio": 0.9998910427093506,
	"kl_div_avg": 0.02947179228067398,
	"learning_rate": 4.7459207459207457e-07,
	"loss_func": "stage2",
	"step": 115,
	"total_loss": 0.05969160795211792
	},
	{
	"epoch": 0.27,
	"importance_ratio": 0.9999151825904846,
	"kl_div_avg": 0.028714872896671295,
	"learning_rate": 4.743589743589743e-07,
	"loss_func": "stage2",
	"step": 116,
	"total_loss": 0.023228317499160767
	},
	{
	"epoch": 0.28,
	"importance_ratio": 0.9999186992645264,
	"kl_div_avg": 0.022106900811195374,
	"learning_rate": 4.741258741258741e-07,
	"loss_func": "stage2",
	"step": 117,
	"total_loss": -0.4819675087928772
	},
	{
	"epoch": 0.28,
	"importance_ratio": 0.9995689392089844,
	"kl_div_avg": 0.03649330139160156,
	"learning_rate": 4.7389277389277386e-07,
	"loss_func": "stage2",
	"step": 118,
	"total_loss": 0.3108961582183838
	},
	{
	"epoch": 0.28,
	"importance_ratio": 0.9996166825294495,
	"kl_div_avg": 0.03707721084356308,
	"learning_rate": 4.7365967365967365e-07,
	"loss_func": "stage2",
	"step": 119,
	"total_loss": 0.3411310315132141
	},
	{
	"epoch": 0.28,
	"importance_ratio": 0.9993818998336792,
	"kl_div_avg": 0.02660643495619297,
	"learning_rate": 4.734265734265734e-07,
	"loss_func": "stage2",
	"step": 120,
	"total_loss": -0.1782078891992569
	},
	{
	"epoch": 0.29,
	"importance_ratio": 1.0000483989715576,
	"kl_div_avg": 0.020809054374694824,
	"learning_rate": 4.731934731934732e-07,
	"loss_func": "stage2",
	"step": 121,
	"total_loss": 0.2691134810447693
	},
	{
	"epoch": 0.29,
	"importance_ratio": 0.9999549388885498,
	"kl_div_avg": 0.018363434821367264,
	"learning_rate": 4.7296037296037294e-07,
	"loss_func": "stage2",
	"step": 122,
	"total_loss": -0.11637084186077118
	},
	{
	"epoch": 0.29,
	"importance_ratio": 1.0000113248825073,
	"kl_div_avg": 0.01843072474002838,
	"learning_rate": 4.727272727272727e-07,
	"loss_func": "stage2",
	"step": 123,
	"total_loss": 0.41335129737854004
	},
	{
	"epoch": 0.29,
	"importance_ratio": 0.9993541836738586,
	"kl_div_avg": 0.023193594068288803,
	"learning_rate": 4.724941724941724e-07,
	"loss_func": "stage2",
	"step": 124,
	"total_loss": 0.3763417601585388
	},
	{
	"epoch": 0.3,
	"importance_ratio": 1.0000863075256348,
	"kl_div_avg": 0.02424781210720539,
	"learning_rate": 4.722610722610723e-07,
	"loss_func": "stage2",
	"step": 125,
	"total_loss": 0.8012444972991943
	},
	{
	"epoch": 0.3,
	"importance_ratio": 0.9997611045837402,
	"kl_div_avg": 0.019796304404735565,
	"learning_rate": 4.72027972027972e-07,
	"loss_func": "stage2",
	"step": 126,
	"total_loss": 0.2471800446510315
	},
	{
	"epoch": 0.3,
	"importance_ratio": 0.9994201064109802,
	"kl_div_avg": 0.03692193701863289,
	"learning_rate": 4.7179487179487176e-07,
	"loss_func": "stage2",
	"step": 127,
	"total_loss": 0.6575199961662292
	},
	{
	"epoch": 0.3,
	"importance_ratio": 0.9993878602981567,
	"kl_div_avg": 0.024065542966127396,
	"learning_rate": 4.715617715617715e-07,
	"loss_func": "stage2",
	"step": 128,
	"total_loss": 0.39468204975128174
	},
	{
	"epoch": 0.31,
	"importance_ratio": 1.0001311302185059,
	"kl_div_avg": 0.024539019912481308,
	"learning_rate": 4.713286713286713e-07,
	"loss_func": "stage2",
	"step": 129,
	"total_loss": 0.1832764893770218
	},
	{
	"epoch": 0.31,
	"importance_ratio": 0.9998582005500793,
	"kl_div_avg": 0.024634480476379395,
	"learning_rate": 4.710955710955711e-07,
	"loss_func": "stage2",
	"step": 130,
	"total_loss": 0.1833437830209732
	},
	{
	"epoch": 0.31,
	"importance_ratio": 0.9997899532318115,
	"kl_div_avg": 0.024798087775707245,
	"learning_rate": 4.7086247086247085e-07,
	"loss_func": "stage2",
	"step": 131,
	"total_loss": -0.029144808650016785
	},
	{
	"epoch": 0.31,
	"importance_ratio": 0.9991902112960815,
	"kl_div_avg": 0.022448930889368057,
	"learning_rate": 4.706293706293706e-07,
	"loss_func": "stage2",
	"step": 132,
	"total_loss": 0.5746316909790039
	},
	{
	"epoch": 0.32,
	"importance_ratio": 0.9999872446060181,
	"kl_div_avg": 0.030649660155177116,
	"learning_rate": 4.703962703962704e-07,
	"loss_func": "stage2",
	"step": 133,
	"total_loss": -0.27921533584594727
	},
	{
	"epoch": 0.32,
	"importance_ratio": 0.9999322891235352,
	"kl_div_avg": 0.028502434492111206,
	"learning_rate": 4.7016317016317013e-07,
	"loss_func": "stage2",
	"step": 134,
	"total_loss": -0.18240980803966522
	},
	{
	"epoch": 0.32,
	"importance_ratio": 0.9991711378097534,
	"kl_div_avg": 0.034727346152067184,
	"learning_rate": 4.699300699300699e-07,
	"loss_func": "stage2",
	"step": 135,
	"total_loss": 0.20225152373313904
	},
	{
	"epoch": 0.32,
	"importance_ratio": 1.000333547592163,
	"kl_div_avg": 0.02375342883169651,
	"learning_rate": 4.696969696969697e-07,
	"loss_func": "stage2",
	"step": 136,
	"total_loss": 0.19642743468284607
	},
	{
	"epoch": 0.33,
	"importance_ratio": 1.0000425577163696,
	"kl_div_avg": 0.02941157855093479,
	"learning_rate": 4.6946386946386947e-07,
	"loss_func": "stage2",
	"step": 137,
	"total_loss": -0.2832520604133606
	},
	{
	"epoch": 0.33,
	"importance_ratio": 0.9998887777328491,
	"kl_div_avg": 0.0380094014108181,
	"learning_rate": 4.692307692307692e-07,
	"loss_func": "stage2",
	"step": 138,
	"total_loss": 0.07638365030288696
	},
	{
	"epoch": 0.33,
	"importance_ratio": 1.0000771284103394,
	"kl_div_avg": 0.031544946134090424,
	"learning_rate": 4.6899766899766896e-07,
	"loss_func": "stage2",
	"step": 139,
	"total_loss": -0.29178526997566223
	},
	{
	"epoch": 0.33,
	"importance_ratio": 0.9999626278877258,
	"kl_div_avg": 0.03620228171348572,
	"learning_rate": 4.6876456876456875e-07,
	"loss_func": "stage2",
	"step": 140,
	"total_loss": 0.12343016266822815
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9999535083770752,
	"kl_div_avg": 0.026813074946403503,
	"learning_rate": 4.685314685314685e-07,
	"loss_func": "stage2",
	"step": 141,
	"total_loss": 0.024522602558135986
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9998844265937805,
	"kl_div_avg": 0.031967416405677795,
	"learning_rate": 4.682983682983683e-07,
	"loss_func": "stage2",
	"step": 142,
	"total_loss": 0.6268632411956787
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9997915029525757,
	"kl_div_avg": 0.024857094511389732,
	"learning_rate": 4.6806526806526804e-07,
	"loss_func": "stage2",
	"step": 143,
	"total_loss": 0.13797396421432495
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9998888969421387,
	"kl_div_avg": 0.04350988566875458,
	"learning_rate": 4.6783216783216784e-07,
	"loss_func": "stage2",
	"step": 144,
	"total_loss": 0.5312750935554504
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9998694658279419,
	"kl_div_avg": 0.03627926483750343,
	"learning_rate": 4.675990675990676e-07,
	"loss_func": "stage2",
	"step": 145,
	"total_loss": 0.09028466045856476
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9999563694000244,
	"kl_div_avg": 0.02497359737753868,
	"learning_rate": 4.673659673659673e-07,
	"loss_func": "stage2",
	"step": 146,
	"total_loss": 0.5303145051002502
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9999274015426636,
	"kl_div_avg": 0.026778005063533783,
	"learning_rate": 4.6713286713286707e-07,
	"loss_func": "stage2",
	"step": 147,
	"total_loss": 0.1259535402059555
	},
	{
	"epoch": 0.34,
	"importance_ratio": 0.9997484683990479,
	"kl_div_avg": 0.02970227226614952,
	"learning_rate": 4.668997668997669e-07,
	"loss_func": "stage2",
	"step": 148,
	"total_loss": 0.5203793048858643
	},
	{
	"epoch": 0.35,
	"importance_ratio": 1.0000306367874146,
	"kl_div_avg": 0.03883244842290878,
	"learning_rate": 4.6666666666666666e-07,
	"loss_func": "stage2",
	"step": 149,
	"total_loss": 0.5459209680557251
	},
	{
	"epoch": 0.35,
	"importance_ratio": 0.9999977946281433,
	"kl_div_avg": 0.010376233607530594,
	"learning_rate": 4.664335664335664e-07,
	"loss_func": "stage2",
	"step": 150,
	"total_loss": 0.5408558249473572
	},
	{
	"epoch": 0.35,
	"importance_ratio": 1.0000672340393066,
	"kl_div_avg": 0.020404186099767685,
	"learning_rate": 4.6620046620046615e-07,
	"loss_func": "stage2",
	"step": 151,
	"total_loss": 0.5100921392440796
	},
	{
	"epoch": 0.35,
	"importance_ratio": 1.0013837814331055,
	"kl_div_avg": 0.032543182373046875,
	"learning_rate": 4.6596736596736595e-07,
	"loss_func": "stage2",
	"step": 152,
	"total_loss": -0.4812799394130707
	}
	],
	"logging_steps": 1.0,
	"max_steps": 2150,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 20.0,
	"save_steps": 50,
	"total_flos": 0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}