{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3536425767049586, "eval_steps": 500, "global_step": 152, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "importance_ratio": 1.0001533031463623, "kl_div_avg": 0.0022537275217473507, "learning_rate": 0.0, "loss_func": "stage2", "step": 1, "total_loss": 0.20475485920906067 }, { "epoch": 0.01, "importance_ratio": 1.0000330209732056, "kl_div_avg": 0.0011816158657893538, "learning_rate": 2.153382790366965e-07, "loss_func": "stage2", "step": 2, "total_loss": 0.17304854094982147 }, { "epoch": 0.01, "importance_ratio": 0.9998020529747009, "kl_div_avg": 0.0012814232613891363, "learning_rate": 3.4130309724299266e-07, "loss_func": "stage2", "step": 3, "total_loss": 0.39294394850730896 }, { "epoch": 0.01, "importance_ratio": 1.0002059936523438, "kl_div_avg": 0.001280196476727724, "learning_rate": 4.30676558073393e-07, "loss_func": "stage2", "step": 4, "total_loss": 0.2844714820384979 }, { "epoch": 0.02, "importance_ratio": 1.0000617504119873, "kl_div_avg": 0.0033944130409508944, "learning_rate": 5e-07, "loss_func": "stage2", "step": 5, "total_loss": 0.38719698786735535 }, { "epoch": 0.02, "importance_ratio": 1.0002291202545166, "kl_div_avg": 0.0006711427122354507, "learning_rate": 5e-07, "loss_func": "stage2", "step": 6, "total_loss": -0.7354744672775269 }, { "epoch": 0.02, "importance_ratio": 1.0001697540283203, "kl_div_avg": 0.0009293262264691293, "learning_rate": 4.997668997668998e-07, "loss_func": "stage2", "step": 7, "total_loss": 0.10072920471429825 }, { "epoch": 0.02, "importance_ratio": 1.0003812313079834, "kl_div_avg": 0.0006424246821552515, "learning_rate": 4.995337995337996e-07, "loss_func": "stage2", "step": 8, "total_loss": -0.7356305122375488 }, { "epoch": 0.03, "importance_ratio": 1.0002334117889404, "kl_div_avg": 0.001079258043318987, "learning_rate": 4.993006993006993e-07, "loss_func": "stage2", "step": 9, "total_loss": 0.1572389006614685 }, { "epoch": 0.03, "importance_ratio": 0.9999626874923706, "kl_div_avg": 0.0012779454700648785, "learning_rate": 4.990675990675991e-07, "loss_func": "stage2", "step": 10, "total_loss": -0.2823958396911621 }, { "epoch": 0.03, "importance_ratio": 0.9999264478683472, "kl_div_avg": 0.0010424605570733547, "learning_rate": 4.988344988344988e-07, "loss_func": "stage2", "step": 11, "total_loss": -0.4896008372306824 }, { "epoch": 0.03, "importance_ratio": 0.9999059438705444, "kl_div_avg": 0.0011010458692908287, "learning_rate": 4.986013986013987e-07, "loss_func": "stage2", "step": 12, "total_loss": 0.4140966832637787 }, { "epoch": 0.04, "importance_ratio": 0.9999853372573853, "kl_div_avg": 0.0014636358246207237, "learning_rate": 4.983682983682983e-07, "loss_func": "stage2", "step": 13, "total_loss": 0.4479982852935791 }, { "epoch": 0.04, "importance_ratio": 1.0001029968261719, "kl_div_avg": 0.0010808318620547652, "learning_rate": 4.981351981351981e-07, "loss_func": "stage2", "step": 14, "total_loss": -0.8298860192298889 }, { "epoch": 0.04, "importance_ratio": 1.0000591278076172, "kl_div_avg": 0.0012179139303043485, "learning_rate": 4.979020979020978e-07, "loss_func": "stage2", "step": 15, "total_loss": -0.8044635057449341 }, { "epoch": 0.04, "importance_ratio": 1.0000298023223877, "kl_div_avg": 0.004863352049142122, "learning_rate": 4.976689976689976e-07, "loss_func": "stage2", "step": 16, "total_loss": 0.24484601616859436 }, { "epoch": 0.05, "importance_ratio": 1.0002245903015137, "kl_div_avg": 0.0017589405179023743, "learning_rate": 4.974358974358974e-07, "loss_func": "stage2", "step": 17, "total_loss": -0.0013702064752578735 }, { "epoch": 0.05, "importance_ratio": 0.9999561309814453, "kl_div_avg": 0.0018663634546101093, "learning_rate": 4.972027972027972e-07, "loss_func": "stage2", "step": 18, "total_loss": -0.10264579951763153 }, { "epoch": 0.05, "importance_ratio": 0.9998416900634766, "kl_div_avg": 0.0018756331410259008, "learning_rate": 4.969696969696969e-07, "loss_func": "stage2", "step": 19, "total_loss": 0.48283857107162476 }, { "epoch": 0.05, "importance_ratio": 0.999815821647644, "kl_div_avg": 0.0019035658333450556, "learning_rate": 4.967365967365967e-07, "loss_func": "stage2", "step": 20, "total_loss": -0.1848379671573639 }, { "epoch": 0.06, "importance_ratio": 0.999942421913147, "kl_div_avg": 0.009937961585819721, "learning_rate": 4.965034965034965e-07, "loss_func": "stage2", "step": 21, "total_loss": 0.14985397458076477 }, { "epoch": 0.06, "importance_ratio": 0.9997897148132324, "kl_div_avg": 0.00241913297213614, "learning_rate": 4.962703962703962e-07, "loss_func": "stage2", "step": 22, "total_loss": -0.29083502292633057 }, { "epoch": 0.06, "importance_ratio": 0.9998656511306763, "kl_div_avg": 0.00263982149772346, "learning_rate": 4.96037296037296e-07, "loss_func": "stage2", "step": 23, "total_loss": -0.02688920497894287 }, { "epoch": 0.06, "importance_ratio": 1.0000394582748413, "kl_div_avg": 0.0032202559523284435, "learning_rate": 4.958041958041958e-07, "loss_func": "stage2", "step": 24, "total_loss": 0.5130484104156494 }, { "epoch": 0.07, "importance_ratio": 1.0000278949737549, "kl_div_avg": 0.00243174796923995, "learning_rate": 4.955710955710956e-07, "loss_func": "stage2", "step": 25, "total_loss": 0.309948205947876 }, { "epoch": 0.07, "importance_ratio": 0.9999406337738037, "kl_div_avg": 0.003059545997530222, "learning_rate": 4.953379953379953e-07, "loss_func": "stage2", "step": 26, "total_loss": 0.11305176466703415 }, { "epoch": 0.07, "importance_ratio": 0.9999201893806458, "kl_div_avg": 0.003822761122137308, "learning_rate": 4.951048951048951e-07, "loss_func": "stage2", "step": 27, "total_loss": 0.38959354162216187 }, { "epoch": 0.07, "importance_ratio": 0.9994360208511353, "kl_div_avg": 0.0033927513286471367, "learning_rate": 4.948717948717949e-07, "loss_func": "stage2", "step": 28, "total_loss": 0.4653158485889435 }, { "epoch": 0.07, "importance_ratio": 0.9999792575836182, "kl_div_avg": 0.0032504587434232235, "learning_rate": 4.946386946386946e-07, "loss_func": "stage2", "step": 29, "total_loss": 0.4534304141998291 }, { "epoch": 0.07, "importance_ratio": 1.0000840425491333, "kl_div_avg": 0.002915366552770138, "learning_rate": 4.944055944055944e-07, "loss_func": "stage2", "step": 30, "total_loss": 0.46045419573783875 }, { "epoch": 0.07, "importance_ratio": 0.9996820688247681, "kl_div_avg": 0.0030386601574718952, "learning_rate": 4.941724941724942e-07, "loss_func": "stage2", "step": 31, "total_loss": -0.8699095845222473 }, { "epoch": 0.07, "importance_ratio": 0.9997460842132568, "kl_div_avg": 0.0037004691548645496, "learning_rate": 4.93939393939394e-07, "loss_func": "stage2", "step": 32, "total_loss": -0.23050172626972198 }, { "epoch": 0.08, "importance_ratio": 0.9999768137931824, "kl_div_avg": 0.0046552978456020355, "learning_rate": 4.937062937062936e-07, "loss_func": "stage2", "step": 33, "total_loss": 0.3776797950267792 }, { "epoch": 0.08, "importance_ratio": 0.9999284148216248, "kl_div_avg": 0.004839582834392786, "learning_rate": 4.934731934731934e-07, "loss_func": "stage2", "step": 34, "total_loss": 0.5804722309112549 }, { "epoch": 0.08, "importance_ratio": 0.999695897102356, "kl_div_avg": 0.004378842655569315, "learning_rate": 4.932400932400932e-07, "loss_func": "stage2", "step": 35, "total_loss": 0.5690972805023193 }, { "epoch": 0.08, "importance_ratio": 0.9998815059661865, "kl_div_avg": 0.0047516971826553345, "learning_rate": 4.93006993006993e-07, "loss_func": "stage2", "step": 36, "total_loss": 0.2298603653907776 }, { "epoch": 0.09, "importance_ratio": 0.9997518062591553, "kl_div_avg": 0.004270514938980341, "learning_rate": 4.927738927738927e-07, "loss_func": "stage2", "step": 37, "total_loss": -0.20016932487487793 }, { "epoch": 0.09, "importance_ratio": 1.000083088874817, "kl_div_avg": 0.004711843561381102, "learning_rate": 4.925407925407925e-07, "loss_func": "stage2", "step": 38, "total_loss": 0.25743457674980164 }, { "epoch": 0.09, "importance_ratio": 0.9999402761459351, "kl_div_avg": 0.004922826308757067, "learning_rate": 4.923076923076923e-07, "loss_func": "stage2", "step": 39, "total_loss": -0.15881219506263733 }, { "epoch": 0.09, "importance_ratio": 0.999858021736145, "kl_div_avg": 0.0039229318499565125, "learning_rate": 4.92074592074592e-07, "loss_func": "stage2", "step": 40, "total_loss": -0.23230578005313873 }, { "epoch": 0.1, "importance_ratio": 0.999944806098938, "kl_div_avg": 0.00766429677605629, "learning_rate": 4.918414918414918e-07, "loss_func": "stage2", "step": 41, "total_loss": -0.03111131489276886 }, { "epoch": 0.1, "importance_ratio": 1.000047206878662, "kl_div_avg": 0.005274048075079918, "learning_rate": 4.916083916083916e-07, "loss_func": "stage2", "step": 42, "total_loss": -0.033877044916152954 }, { "epoch": 0.1, "importance_ratio": 1.0002098083496094, "kl_div_avg": 0.006382378749549389, "learning_rate": 4.913752913752914e-07, "loss_func": "stage2", "step": 43, "total_loss": 0.10409477353096008 }, { "epoch": 0.1, "importance_ratio": 0.9998437166213989, "kl_div_avg": 0.006663881242275238, "learning_rate": 4.911421911421911e-07, "loss_func": "stage2", "step": 44, "total_loss": -0.9234535694122314 }, { "epoch": 0.11, "importance_ratio": 0.9999840259552002, "kl_div_avg": 0.004546988755464554, "learning_rate": 4.909090909090909e-07, "loss_func": "stage2", "step": 45, "total_loss": 0.022589027881622314 }, { "epoch": 0.11, "importance_ratio": 1.0002349615097046, "kl_div_avg": 0.0048853568732738495, "learning_rate": 4.906759906759906e-07, "loss_func": "stage2", "step": 46, "total_loss": 0.7868871688842773 }, { "epoch": 0.11, "importance_ratio": 1.0003743171691895, "kl_div_avg": 0.005253675393760204, "learning_rate": 4.904428904428905e-07, "loss_func": "stage2", "step": 47, "total_loss": 0.7918493747711182 }, { "epoch": 0.11, "importance_ratio": 1.0001533031463623, "kl_div_avg": 0.005680109839886427, "learning_rate": 4.902097902097902e-07, "loss_func": "stage2", "step": 48, "total_loss": -0.10262584686279297 }, { "epoch": 0.12, "importance_ratio": 0.999789834022522, "kl_div_avg": 0.006105936132371426, "learning_rate": 4.8997668997669e-07, "loss_func": "stage2", "step": 49, "total_loss": -0.8303477168083191 }, { "epoch": 0.12, "importance_ratio": 0.9999826550483704, "kl_div_avg": 0.005722599104046822, "learning_rate": 4.897435897435897e-07, "loss_func": "stage2", "step": 50, "total_loss": -0.8298835754394531 }, { "epoch": 0.12, "importance_ratio": 1.0003968477249146, "kl_div_avg": 0.005780387669801712, "learning_rate": 4.895104895104895e-07, "loss_func": "stage2", "step": 51, "total_loss": -0.8298872709274292 }, { "epoch": 0.12, "importance_ratio": 1.001389741897583, "kl_div_avg": 0.00528342742472887, "learning_rate": 4.892773892773893e-07, "loss_func": "stage2", "step": 52, "total_loss": -0.8320926427841187 }, { "epoch": 0.13, "importance_ratio": 1.0000613927841187, "kl_div_avg": 0.005511891096830368, "learning_rate": 4.890442890442891e-07, "loss_func": "stage2", "step": 53, "total_loss": -0.8287703990936279 }, { "epoch": 0.13, "importance_ratio": 1.000309705734253, "kl_div_avg": 0.00582331046462059, "learning_rate": 4.888111888111888e-07, "loss_func": "stage2", "step": 54, "total_loss": -0.007962286472320557 }, { "epoch": 0.13, "importance_ratio": 1.000248670578003, "kl_div_avg": 0.005605565384030342, "learning_rate": 4.885780885780885e-07, "loss_func": "stage2", "step": 55, "total_loss": 0.30858537554740906 }, { "epoch": 0.13, "importance_ratio": 1.0001593828201294, "kl_div_avg": 0.006694035604596138, "learning_rate": 4.883449883449883e-07, "loss_func": "stage2", "step": 56, "total_loss": 0.2841358482837677 }, { "epoch": 0.14, "importance_ratio": 0.9998378753662109, "kl_div_avg": 0.0054409438744187355, "learning_rate": 4.88111888111888e-07, "loss_func": "stage2", "step": 57, "total_loss": 0.4940628409385681 }, { "epoch": 0.14, "importance_ratio": 1.0000338554382324, "kl_div_avg": 0.00452791154384613, "learning_rate": 4.878787878787878e-07, "loss_func": "stage2", "step": 58, "total_loss": 0.25203195214271545 }, { "epoch": 0.14, "importance_ratio": 1.0000851154327393, "kl_div_avg": 0.008046677336096764, "learning_rate": 4.876456876456876e-07, "loss_func": "stage2", "step": 59, "total_loss": 0.8340111970901489 }, { "epoch": 0.14, "importance_ratio": 1.0000518560409546, "kl_div_avg": 0.0053115119226276875, "learning_rate": 4.874125874125874e-07, "loss_func": "stage2", "step": 60, "total_loss": -0.4959676265716553 }, { "epoch": 0.15, "importance_ratio": 1.0000547170639038, "kl_div_avg": 0.009003904648125172, "learning_rate": 4.871794871794871e-07, "loss_func": "stage2", "step": 61, "total_loss": -0.7978946566581726 }, { "epoch": 0.15, "importance_ratio": 1.000227689743042, "kl_div_avg": 0.007788301911205053, "learning_rate": 4.869463869463869e-07, "loss_func": "stage2", "step": 62, "total_loss": -0.801626443862915 }, { "epoch": 0.15, "importance_ratio": 1.0001925230026245, "kl_div_avg": 0.009508013725280762, "learning_rate": 4.867132867132867e-07, "loss_func": "stage2", "step": 63, "total_loss": -0.8087908029556274 }, { "epoch": 0.15, "importance_ratio": 1.0004205703735352, "kl_div_avg": 0.00948132760822773, "learning_rate": 4.864801864801865e-07, "loss_func": "stage2", "step": 64, "total_loss": -0.7977355718612671 }, { "epoch": 0.16, "importance_ratio": 0.9995088577270508, "kl_div_avg": 0.013071361929178238, "learning_rate": 4.862470862470862e-07, "loss_func": "stage2", "step": 65, "total_loss": -0.7926455736160278 }, { "epoch": 0.16, "importance_ratio": 1.0000696182250977, "kl_div_avg": 0.008261503651738167, "learning_rate": 4.86013986013986e-07, "loss_func": "stage2", "step": 66, "total_loss": 0.8929388523101807 }, { "epoch": 0.16, "importance_ratio": 0.9999169111251831, "kl_div_avg": 0.0072638243436813354, "learning_rate": 4.857808857808858e-07, "loss_func": "stage2", "step": 67, "total_loss": 0.7651723623275757 }, { "epoch": 0.16, "importance_ratio": 1.0006506443023682, "kl_div_avg": 0.014501883648335934, "learning_rate": 4.855477855477855e-07, "loss_func": "stage2", "step": 68, "total_loss": -0.7893998622894287 }, { "epoch": 0.17, "importance_ratio": 1.0001804828643799, "kl_div_avg": 0.014889956451952457, "learning_rate": 4.853146853146853e-07, "loss_func": "stage2", "step": 69, "total_loss": -0.28294438123703003 }, { "epoch": 0.17, "importance_ratio": 1.0003015995025635, "kl_div_avg": 0.012897053733468056, "learning_rate": 4.850815850815851e-07, "loss_func": "stage2", "step": 70, "total_loss": -0.29949530959129333 }, { "epoch": 0.17, "importance_ratio": 0.9999880194664001, "kl_div_avg": 0.011545092798769474, "learning_rate": 4.848484848484849e-07, "loss_func": "stage2", "step": 71, "total_loss": -0.07207685708999634 }, { "epoch": 0.17, "importance_ratio": 1.0000548362731934, "kl_div_avg": 0.011798446998000145, "learning_rate": 4.846153846153846e-07, "loss_func": "stage2", "step": 72, "total_loss": -0.030968129634857178 }, { "epoch": 0.18, "importance_ratio": 0.9998947381973267, "kl_div_avg": 0.014826457016170025, "learning_rate": 4.843822843822844e-07, "loss_func": "stage2", "step": 73, "total_loss": -0.04581499099731445 }, { "epoch": 0.18, "importance_ratio": 0.9999833106994629, "kl_div_avg": 0.01308258343487978, "learning_rate": 4.841491841491842e-07, "loss_func": "stage2", "step": 74, "total_loss": -0.2057284414768219 }, { "epoch": 0.18, "importance_ratio": 1.000425100326538, "kl_div_avg": 0.014183840714395046, "learning_rate": 4.839160839160839e-07, "loss_func": "stage2", "step": 75, "total_loss": 0.6338366866111755 }, { "epoch": 0.18, "importance_ratio": 1.0001928806304932, "kl_div_avg": 0.013401240110397339, "learning_rate": 4.836829836829837e-07, "loss_func": "stage2", "step": 76, "total_loss": 0.2874578833580017 }, { "epoch": 0.19, "importance_ratio": 0.9999673366546631, "kl_div_avg": 0.012778308242559433, "learning_rate": 4.834498834498834e-07, "loss_func": "stage2", "step": 77, "total_loss": 0.8181835412979126 }, { "epoch": 0.19, "importance_ratio": 0.9999823570251465, "kl_div_avg": 0.014732200652360916, "learning_rate": 4.832167832167832e-07, "loss_func": "stage2", "step": 78, "total_loss": 0.818813681602478 }, { "epoch": 0.19, "importance_ratio": 1.0003037452697754, "kl_div_avg": 0.014829716645181179, "learning_rate": 4.829836829836829e-07, "loss_func": "stage2", "step": 79, "total_loss": 0.0298653244972229 }, { "epoch": 0.19, "importance_ratio": 1.0003316402435303, "kl_div_avg": 0.01861473172903061, "learning_rate": 4.827505827505827e-07, "loss_func": "stage2", "step": 80, "total_loss": 0.47803181409835815 }, { "epoch": 0.2, "importance_ratio": 1.0000956058502197, "kl_div_avg": 0.013490747660398483, "learning_rate": 4.825174825174824e-07, "loss_func": "stage2", "step": 81, "total_loss": 0.43567734956741333 }, { "epoch": 0.2, "importance_ratio": 1.0001178979873657, "kl_div_avg": 0.014975698664784431, "learning_rate": 4.822843822843823e-07, "loss_func": "stage2", "step": 82, "total_loss": -0.27731338143348694 }, { "epoch": 0.2, "importance_ratio": 0.9999943971633911, "kl_div_avg": 0.012739075347781181, "learning_rate": 4.82051282051282e-07, "loss_func": "stage2", "step": 83, "total_loss": 0.14717233180999756 }, { "epoch": 0.2, "importance_ratio": 1.000013828277588, "kl_div_avg": 0.015260843560099602, "learning_rate": 4.818181818181818e-07, "loss_func": "stage2", "step": 84, "total_loss": -0.876125693321228 }, { "epoch": 0.2, "importance_ratio": 0.9999637603759766, "kl_div_avg": 0.009859403595328331, "learning_rate": 4.815850815850815e-07, "loss_func": "stage2", "step": 85, "total_loss": -0.007574997842311859 }, { "epoch": 0.2, "importance_ratio": 0.9998883008956909, "kl_div_avg": 0.012243506498634815, "learning_rate": 4.813519813519813e-07, "loss_func": "stage2", "step": 86, "total_loss": -0.8214981555938721 }, { "epoch": 0.2, "importance_ratio": 0.9997843503952026, "kl_div_avg": 0.009431181475520134, "learning_rate": 4.811188811188811e-07, "loss_func": "stage2", "step": 87, "total_loss": 0.09674309194087982 }, { "epoch": 0.2, "importance_ratio": 0.9998980760574341, "kl_div_avg": 0.012359343469142914, "learning_rate": 4.808857808857809e-07, "loss_func": "stage2", "step": 88, "total_loss": 0.13037657737731934 }, { "epoch": 0.21, "importance_ratio": 0.9999622702598572, "kl_div_avg": 0.019689181819558144, "learning_rate": 4.806526806526806e-07, "loss_func": "stage2", "step": 89, "total_loss": -0.053394585847854614 }, { "epoch": 0.21, "importance_ratio": 1.0000618696212769, "kl_div_avg": 0.013591418042778969, "learning_rate": 4.804195804195804e-07, "loss_func": "stage2", "step": 90, "total_loss": 0.7699258327484131 }, { "epoch": 0.21, "importance_ratio": 1.000166893005371, "kl_div_avg": 0.016464080661535263, "learning_rate": 4.801864801864802e-07, "loss_func": "stage2", "step": 91, "total_loss": 0.16640010476112366 }, { "epoch": 0.21, "importance_ratio": 0.9997192621231079, "kl_div_avg": 0.02152731642127037, "learning_rate": 4.799533799533799e-07, "loss_func": "stage2", "step": 92, "total_loss": -0.17338214814662933 }, { "epoch": 0.22, "importance_ratio": 1.000048279762268, "kl_div_avg": 0.01632719114422798, "learning_rate": 4.797202797202797e-07, "loss_func": "stage2", "step": 93, "total_loss": -0.311355322599411 }, { "epoch": 0.22, "importance_ratio": 1.000006914138794, "kl_div_avg": 0.02427072264254093, "learning_rate": 4.794871794871795e-07, "loss_func": "stage2", "step": 94, "total_loss": 0.6689386963844299 }, { "epoch": 0.22, "importance_ratio": 1.0001646280288696, "kl_div_avg": 0.020599162206053734, "learning_rate": 4.792540792540793e-07, "loss_func": "stage2", "step": 95, "total_loss": -0.27341556549072266 }, { "epoch": 0.22, "importance_ratio": 1.0005509853363037, "kl_div_avg": 0.01824137195944786, "learning_rate": 4.79020979020979e-07, "loss_func": "stage2", "step": 96, "total_loss": -0.7693477869033813 }, { "epoch": 0.23, "importance_ratio": 1.0000276565551758, "kl_div_avg": 0.019494881853461266, "learning_rate": 4.787878787878788e-07, "loss_func": "stage2", "step": 97, "total_loss": 0.5362880229949951 }, { "epoch": 0.23, "importance_ratio": 1.0001310110092163, "kl_div_avg": 0.018007826060056686, "learning_rate": 4.785547785547786e-07, "loss_func": "stage2", "step": 98, "total_loss": 0.13775774836540222 }, { "epoch": 0.23, "importance_ratio": 1.0001639127731323, "kl_div_avg": 0.018012849614024162, "learning_rate": 4.783216783216783e-07, "loss_func": "stage2", "step": 99, "total_loss": 0.43274223804473877 }, { "epoch": 0.23, "importance_ratio": 1.0001168251037598, "kl_div_avg": 0.016401609405875206, "learning_rate": 4.78088578088578e-07, "loss_func": "stage2", "step": 100, "total_loss": 0.024271167814731598 }, { "epoch": 0.24, "importance_ratio": 0.9998679161071777, "kl_div_avg": 0.018232179805636406, "learning_rate": 4.778554778554778e-07, "loss_func": "stage2", "step": 101, "total_loss": -0.13439278304576874 }, { "epoch": 0.24, "importance_ratio": 0.9999688267707825, "kl_div_avg": 0.01988252066075802, "learning_rate": 4.776223776223776e-07, "loss_func": "stage2", "step": 102, "total_loss": -0.5875260233879089 }, { "epoch": 0.24, "importance_ratio": 1.000270128250122, "kl_div_avg": 0.024290431290864944, "learning_rate": 4.773892773892773e-07, "loss_func": "stage2", "step": 103, "total_loss": 0.43479496240615845 }, { "epoch": 0.24, "importance_ratio": 1.0000674724578857, "kl_div_avg": 0.017674673348665237, "learning_rate": 4.771561771561771e-07, "loss_func": "stage2", "step": 104, "total_loss": -0.30525317788124084 }, { "epoch": 0.25, "importance_ratio": 0.9998775720596313, "kl_div_avg": 0.015626681968569756, "learning_rate": 4.769230769230769e-07, "loss_func": "stage2", "step": 105, "total_loss": -0.4420226514339447 }, { "epoch": 0.25, "importance_ratio": 1.000077247619629, "kl_div_avg": 0.011648900806903839, "learning_rate": 4.7668997668997666e-07, "loss_func": "stage2", "step": 106, "total_loss": -0.7798103094100952 }, { "epoch": 0.25, "importance_ratio": 0.9998931884765625, "kl_div_avg": 0.02424587681889534, "learning_rate": 4.7645687645687646e-07, "loss_func": "stage2", "step": 107, "total_loss": 0.3463074564933777 }, { "epoch": 0.25, "importance_ratio": 1.0001239776611328, "kl_div_avg": 0.019622065126895905, "learning_rate": 4.762237762237762e-07, "loss_func": "stage2", "step": 108, "total_loss": -0.7446590662002563 }, { "epoch": 0.26, "importance_ratio": 1.0000584125518799, "kl_div_avg": 0.02028917521238327, "learning_rate": 4.75990675990676e-07, "loss_func": "stage2", "step": 109, "total_loss": -0.269249826669693 }, { "epoch": 0.26, "importance_ratio": 0.9998422861099243, "kl_div_avg": 0.022249866276979446, "learning_rate": 4.7575757575757574e-07, "loss_func": "stage2", "step": 110, "total_loss": -0.6871204376220703 }, { "epoch": 0.26, "importance_ratio": 1.0001263618469238, "kl_div_avg": 0.01973864436149597, "learning_rate": 4.755244755244755e-07, "loss_func": "stage2", "step": 111, "total_loss": -0.6550001502037048 }, { "epoch": 0.26, "importance_ratio": 0.9996439218521118, "kl_div_avg": 0.02077101171016693, "learning_rate": 4.7529137529137523e-07, "loss_func": "stage2", "step": 112, "total_loss": 0.042905211448669434 }, { "epoch": 0.27, "importance_ratio": 1.000232219696045, "kl_div_avg": 0.036871857941150665, "learning_rate": 4.750582750582751e-07, "loss_func": "stage2", "step": 113, "total_loss": -0.6672766804695129 }, { "epoch": 0.27, "importance_ratio": 0.9999071359634399, "kl_div_avg": 0.023731358349323273, "learning_rate": 4.748251748251748e-07, "loss_func": "stage2", "step": 114, "total_loss": 0.22221048176288605 }, { "epoch": 0.27, "importance_ratio": 0.9998910427093506, "kl_div_avg": 0.02947179228067398, "learning_rate": 4.7459207459207457e-07, "loss_func": "stage2", "step": 115, "total_loss": 0.05969160795211792 }, { "epoch": 0.27, "importance_ratio": 0.9999151825904846, "kl_div_avg": 0.028714872896671295, "learning_rate": 4.743589743589743e-07, "loss_func": "stage2", "step": 116, "total_loss": 0.023228317499160767 }, { "epoch": 0.28, "importance_ratio": 0.9999186992645264, "kl_div_avg": 0.022106900811195374, "learning_rate": 4.741258741258741e-07, "loss_func": "stage2", "step": 117, "total_loss": -0.4819675087928772 }, { "epoch": 0.28, "importance_ratio": 0.9995689392089844, "kl_div_avg": 0.03649330139160156, "learning_rate": 4.7389277389277386e-07, "loss_func": "stage2", "step": 118, "total_loss": 0.3108961582183838 }, { "epoch": 0.28, "importance_ratio": 0.9996166825294495, "kl_div_avg": 0.03707721084356308, "learning_rate": 4.7365967365967365e-07, "loss_func": "stage2", "step": 119, "total_loss": 0.3411310315132141 }, { "epoch": 0.28, "importance_ratio": 0.9993818998336792, "kl_div_avg": 0.02660643495619297, "learning_rate": 4.734265734265734e-07, "loss_func": "stage2", "step": 120, "total_loss": -0.1782078891992569 }, { "epoch": 0.29, "importance_ratio": 1.0000483989715576, "kl_div_avg": 0.020809054374694824, "learning_rate": 4.731934731934732e-07, "loss_func": "stage2", "step": 121, "total_loss": 0.2691134810447693 }, { "epoch": 0.29, "importance_ratio": 0.9999549388885498, "kl_div_avg": 0.018363434821367264, "learning_rate": 4.7296037296037294e-07, "loss_func": "stage2", "step": 122, "total_loss": -0.11637084186077118 }, { "epoch": 0.29, "importance_ratio": 1.0000113248825073, "kl_div_avg": 0.01843072474002838, "learning_rate": 4.727272727272727e-07, "loss_func": "stage2", "step": 123, "total_loss": 0.41335129737854004 }, { "epoch": 0.29, "importance_ratio": 0.9993541836738586, "kl_div_avg": 0.023193594068288803, "learning_rate": 4.724941724941724e-07, "loss_func": "stage2", "step": 124, "total_loss": 0.3763417601585388 }, { "epoch": 0.3, "importance_ratio": 1.0000863075256348, "kl_div_avg": 0.02424781210720539, "learning_rate": 4.722610722610723e-07, "loss_func": "stage2", "step": 125, "total_loss": 0.8012444972991943 }, { "epoch": 0.3, "importance_ratio": 0.9997611045837402, "kl_div_avg": 0.019796304404735565, "learning_rate": 4.72027972027972e-07, "loss_func": "stage2", "step": 126, "total_loss": 0.2471800446510315 }, { "epoch": 0.3, "importance_ratio": 0.9994201064109802, "kl_div_avg": 0.03692193701863289, "learning_rate": 4.7179487179487176e-07, "loss_func": "stage2", "step": 127, "total_loss": 0.6575199961662292 }, { "epoch": 0.3, "importance_ratio": 0.9993878602981567, "kl_div_avg": 0.024065542966127396, "learning_rate": 4.715617715617715e-07, "loss_func": "stage2", "step": 128, "total_loss": 0.39468204975128174 }, { "epoch": 0.31, "importance_ratio": 1.0001311302185059, "kl_div_avg": 0.024539019912481308, "learning_rate": 4.713286713286713e-07, "loss_func": "stage2", "step": 129, "total_loss": 0.1832764893770218 }, { "epoch": 0.31, "importance_ratio": 0.9998582005500793, "kl_div_avg": 0.024634480476379395, "learning_rate": 4.710955710955711e-07, "loss_func": "stage2", "step": 130, "total_loss": 0.1833437830209732 }, { "epoch": 0.31, "importance_ratio": 0.9997899532318115, "kl_div_avg": 0.024798087775707245, "learning_rate": 4.7086247086247085e-07, "loss_func": "stage2", "step": 131, "total_loss": -0.029144808650016785 }, { "epoch": 0.31, "importance_ratio": 0.9991902112960815, "kl_div_avg": 0.022448930889368057, "learning_rate": 4.706293706293706e-07, "loss_func": "stage2", "step": 132, "total_loss": 0.5746316909790039 }, { "epoch": 0.32, "importance_ratio": 0.9999872446060181, "kl_div_avg": 0.030649660155177116, "learning_rate": 4.703962703962704e-07, "loss_func": "stage2", "step": 133, "total_loss": -0.27921533584594727 }, { "epoch": 0.32, "importance_ratio": 0.9999322891235352, "kl_div_avg": 0.028502434492111206, "learning_rate": 4.7016317016317013e-07, "loss_func": "stage2", "step": 134, "total_loss": -0.18240980803966522 }, { "epoch": 0.32, "importance_ratio": 0.9991711378097534, "kl_div_avg": 0.034727346152067184, "learning_rate": 4.699300699300699e-07, "loss_func": "stage2", "step": 135, "total_loss": 0.20225152373313904 }, { "epoch": 0.32, "importance_ratio": 1.000333547592163, "kl_div_avg": 0.02375342883169651, "learning_rate": 4.696969696969697e-07, "loss_func": "stage2", "step": 136, "total_loss": 0.19642743468284607 }, { "epoch": 0.33, "importance_ratio": 1.0000425577163696, "kl_div_avg": 0.02941157855093479, "learning_rate": 4.6946386946386947e-07, "loss_func": "stage2", "step": 137, "total_loss": -0.2832520604133606 }, { "epoch": 0.33, "importance_ratio": 0.9998887777328491, "kl_div_avg": 0.0380094014108181, "learning_rate": 4.692307692307692e-07, "loss_func": "stage2", "step": 138, "total_loss": 0.07638365030288696 }, { "epoch": 0.33, "importance_ratio": 1.0000771284103394, "kl_div_avg": 0.031544946134090424, "learning_rate": 4.6899766899766896e-07, "loss_func": "stage2", "step": 139, "total_loss": -0.29178526997566223 }, { "epoch": 0.33, "importance_ratio": 0.9999626278877258, "kl_div_avg": 0.03620228171348572, "learning_rate": 4.6876456876456875e-07, "loss_func": "stage2", "step": 140, "total_loss": 0.12343016266822815 }, { "epoch": 0.34, "importance_ratio": 0.9999535083770752, "kl_div_avg": 0.026813074946403503, "learning_rate": 4.685314685314685e-07, "loss_func": "stage2", "step": 141, "total_loss": 0.024522602558135986 }, { "epoch": 0.34, "importance_ratio": 0.9998844265937805, "kl_div_avg": 0.031967416405677795, "learning_rate": 4.682983682983683e-07, "loss_func": "stage2", "step": 142, "total_loss": 0.6268632411956787 }, { "epoch": 0.34, "importance_ratio": 0.9997915029525757, "kl_div_avg": 0.024857094511389732, "learning_rate": 4.6806526806526804e-07, "loss_func": "stage2", "step": 143, "total_loss": 0.13797396421432495 }, { "epoch": 0.34, "importance_ratio": 0.9998888969421387, "kl_div_avg": 0.04350988566875458, "learning_rate": 4.6783216783216784e-07, "loss_func": "stage2", "step": 144, "total_loss": 0.5312750935554504 }, { "epoch": 0.34, "importance_ratio": 0.9998694658279419, "kl_div_avg": 0.03627926483750343, "learning_rate": 4.675990675990676e-07, "loss_func": "stage2", "step": 145, "total_loss": 0.09028466045856476 }, { "epoch": 0.34, "importance_ratio": 0.9999563694000244, "kl_div_avg": 0.02497359737753868, "learning_rate": 4.673659673659673e-07, "loss_func": "stage2", "step": 146, "total_loss": 0.5303145051002502 }, { "epoch": 0.34, "importance_ratio": 0.9999274015426636, "kl_div_avg": 0.026778005063533783, "learning_rate": 4.6713286713286707e-07, "loss_func": "stage2", "step": 147, "total_loss": 0.1259535402059555 }, { "epoch": 0.34, "importance_ratio": 0.9997484683990479, "kl_div_avg": 0.02970227226614952, "learning_rate": 4.668997668997669e-07, "loss_func": "stage2", "step": 148, "total_loss": 0.5203793048858643 }, { "epoch": 0.35, "importance_ratio": 1.0000306367874146, "kl_div_avg": 0.03883244842290878, "learning_rate": 4.6666666666666666e-07, "loss_func": "stage2", "step": 149, "total_loss": 0.5459209680557251 }, { "epoch": 0.35, "importance_ratio": 0.9999977946281433, "kl_div_avg": 0.010376233607530594, "learning_rate": 4.664335664335664e-07, "loss_func": "stage2", "step": 150, "total_loss": 0.5408558249473572 }, { "epoch": 0.35, "importance_ratio": 1.0000672340393066, "kl_div_avg": 0.020404186099767685, "learning_rate": 4.6620046620046615e-07, "loss_func": "stage2", "step": 151, "total_loss": 0.5100921392440796 }, { "epoch": 0.35, "importance_ratio": 1.0013837814331055, "kl_div_avg": 0.032543182373046875, "learning_rate": 4.6596736596736595e-07, "loss_func": "stage2", "step": 152, "total_loss": -0.4812799394130707 } ], "logging_steps": 1.0, "max_steps": 2150, "num_input_tokens_seen": 0, "num_train_epochs": 20.0, "save_steps": 50, "total_flos": 0, "train_batch_size": 1, "trial_name": null, "trial_params": null }