| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3536425767049586, | |
| "eval_steps": 500, | |
| "global_step": 152, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "importance_ratio": 1.0001533031463623, | |
| "kl_div_avg": 0.0022537275217473507, | |
| "learning_rate": 0.0, | |
| "loss_func": "stage2", | |
| "step": 1, | |
| "total_loss": 0.20475485920906067 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "importance_ratio": 1.0000330209732056, | |
| "kl_div_avg": 0.0011816158657893538, | |
| "learning_rate": 2.153382790366965e-07, | |
| "loss_func": "stage2", | |
| "step": 2, | |
| "total_loss": 0.17304854094982147 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "importance_ratio": 0.9998020529747009, | |
| "kl_div_avg": 0.0012814232613891363, | |
| "learning_rate": 3.4130309724299266e-07, | |
| "loss_func": "stage2", | |
| "step": 3, | |
| "total_loss": 0.39294394850730896 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "importance_ratio": 1.0002059936523438, | |
| "kl_div_avg": 0.001280196476727724, | |
| "learning_rate": 4.30676558073393e-07, | |
| "loss_func": "stage2", | |
| "step": 4, | |
| "total_loss": 0.2844714820384979 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "importance_ratio": 1.0000617504119873, | |
| "kl_div_avg": 0.0033944130409508944, | |
| "learning_rate": 5e-07, | |
| "loss_func": "stage2", | |
| "step": 5, | |
| "total_loss": 0.38719698786735535 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "importance_ratio": 1.0002291202545166, | |
| "kl_div_avg": 0.0006711427122354507, | |
| "learning_rate": 5e-07, | |
| "loss_func": "stage2", | |
| "step": 6, | |
| "total_loss": -0.7354744672775269 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "importance_ratio": 1.0001697540283203, | |
| "kl_div_avg": 0.0009293262264691293, | |
| "learning_rate": 4.997668997668998e-07, | |
| "loss_func": "stage2", | |
| "step": 7, | |
| "total_loss": 0.10072920471429825 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "importance_ratio": 1.0003812313079834, | |
| "kl_div_avg": 0.0006424246821552515, | |
| "learning_rate": 4.995337995337996e-07, | |
| "loss_func": "stage2", | |
| "step": 8, | |
| "total_loss": -0.7356305122375488 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "importance_ratio": 1.0002334117889404, | |
| "kl_div_avg": 0.001079258043318987, | |
| "learning_rate": 4.993006993006993e-07, | |
| "loss_func": "stage2", | |
| "step": 9, | |
| "total_loss": 0.1572389006614685 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "importance_ratio": 0.9999626874923706, | |
| "kl_div_avg": 0.0012779454700648785, | |
| "learning_rate": 4.990675990675991e-07, | |
| "loss_func": "stage2", | |
| "step": 10, | |
| "total_loss": -0.2823958396911621 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "importance_ratio": 0.9999264478683472, | |
| "kl_div_avg": 0.0010424605570733547, | |
| "learning_rate": 4.988344988344988e-07, | |
| "loss_func": "stage2", | |
| "step": 11, | |
| "total_loss": -0.4896008372306824 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "importance_ratio": 0.9999059438705444, | |
| "kl_div_avg": 0.0011010458692908287, | |
| "learning_rate": 4.986013986013987e-07, | |
| "loss_func": "stage2", | |
| "step": 12, | |
| "total_loss": 0.4140966832637787 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "importance_ratio": 0.9999853372573853, | |
| "kl_div_avg": 0.0014636358246207237, | |
| "learning_rate": 4.983682983682983e-07, | |
| "loss_func": "stage2", | |
| "step": 13, | |
| "total_loss": 0.4479982852935791 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "importance_ratio": 1.0001029968261719, | |
| "kl_div_avg": 0.0010808318620547652, | |
| "learning_rate": 4.981351981351981e-07, | |
| "loss_func": "stage2", | |
| "step": 14, | |
| "total_loss": -0.8298860192298889 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "importance_ratio": 1.0000591278076172, | |
| "kl_div_avg": 0.0012179139303043485, | |
| "learning_rate": 4.979020979020978e-07, | |
| "loss_func": "stage2", | |
| "step": 15, | |
| "total_loss": -0.8044635057449341 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "importance_ratio": 1.0000298023223877, | |
| "kl_div_avg": 0.004863352049142122, | |
| "learning_rate": 4.976689976689976e-07, | |
| "loss_func": "stage2", | |
| "step": 16, | |
| "total_loss": 0.24484601616859436 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "importance_ratio": 1.0002245903015137, | |
| "kl_div_avg": 0.0017589405179023743, | |
| "learning_rate": 4.974358974358974e-07, | |
| "loss_func": "stage2", | |
| "step": 17, | |
| "total_loss": -0.0013702064752578735 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "importance_ratio": 0.9999561309814453, | |
| "kl_div_avg": 0.0018663634546101093, | |
| "learning_rate": 4.972027972027972e-07, | |
| "loss_func": "stage2", | |
| "step": 18, | |
| "total_loss": -0.10264579951763153 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "importance_ratio": 0.9998416900634766, | |
| "kl_div_avg": 0.0018756331410259008, | |
| "learning_rate": 4.969696969696969e-07, | |
| "loss_func": "stage2", | |
| "step": 19, | |
| "total_loss": 0.48283857107162476 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "importance_ratio": 0.999815821647644, | |
| "kl_div_avg": 0.0019035658333450556, | |
| "learning_rate": 4.967365967365967e-07, | |
| "loss_func": "stage2", | |
| "step": 20, | |
| "total_loss": -0.1848379671573639 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "importance_ratio": 0.999942421913147, | |
| "kl_div_avg": 0.009937961585819721, | |
| "learning_rate": 4.965034965034965e-07, | |
| "loss_func": "stage2", | |
| "step": 21, | |
| "total_loss": 0.14985397458076477 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "importance_ratio": 0.9997897148132324, | |
| "kl_div_avg": 0.00241913297213614, | |
| "learning_rate": 4.962703962703962e-07, | |
| "loss_func": "stage2", | |
| "step": 22, | |
| "total_loss": -0.29083502292633057 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "importance_ratio": 0.9998656511306763, | |
| "kl_div_avg": 0.00263982149772346, | |
| "learning_rate": 4.96037296037296e-07, | |
| "loss_func": "stage2", | |
| "step": 23, | |
| "total_loss": -0.02688920497894287 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "importance_ratio": 1.0000394582748413, | |
| "kl_div_avg": 0.0032202559523284435, | |
| "learning_rate": 4.958041958041958e-07, | |
| "loss_func": "stage2", | |
| "step": 24, | |
| "total_loss": 0.5130484104156494 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 1.0000278949737549, | |
| "kl_div_avg": 0.00243174796923995, | |
| "learning_rate": 4.955710955710956e-07, | |
| "loss_func": "stage2", | |
| "step": 25, | |
| "total_loss": 0.309948205947876 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 0.9999406337738037, | |
| "kl_div_avg": 0.003059545997530222, | |
| "learning_rate": 4.953379953379953e-07, | |
| "loss_func": "stage2", | |
| "step": 26, | |
| "total_loss": 0.11305176466703415 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 0.9999201893806458, | |
| "kl_div_avg": 0.003822761122137308, | |
| "learning_rate": 4.951048951048951e-07, | |
| "loss_func": "stage2", | |
| "step": 27, | |
| "total_loss": 0.38959354162216187 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 0.9994360208511353, | |
| "kl_div_avg": 0.0033927513286471367, | |
| "learning_rate": 4.948717948717949e-07, | |
| "loss_func": "stage2", | |
| "step": 28, | |
| "total_loss": 0.4653158485889435 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 0.9999792575836182, | |
| "kl_div_avg": 0.0032504587434232235, | |
| "learning_rate": 4.946386946386946e-07, | |
| "loss_func": "stage2", | |
| "step": 29, | |
| "total_loss": 0.4534304141998291 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 1.0000840425491333, | |
| "kl_div_avg": 0.002915366552770138, | |
| "learning_rate": 4.944055944055944e-07, | |
| "loss_func": "stage2", | |
| "step": 30, | |
| "total_loss": 0.46045419573783875 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 0.9996820688247681, | |
| "kl_div_avg": 0.0030386601574718952, | |
| "learning_rate": 4.941724941724942e-07, | |
| "loss_func": "stage2", | |
| "step": 31, | |
| "total_loss": -0.8699095845222473 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "importance_ratio": 0.9997460842132568, | |
| "kl_div_avg": 0.0037004691548645496, | |
| "learning_rate": 4.93939393939394e-07, | |
| "loss_func": "stage2", | |
| "step": 32, | |
| "total_loss": -0.23050172626972198 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "importance_ratio": 0.9999768137931824, | |
| "kl_div_avg": 0.0046552978456020355, | |
| "learning_rate": 4.937062937062936e-07, | |
| "loss_func": "stage2", | |
| "step": 33, | |
| "total_loss": 0.3776797950267792 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "importance_ratio": 0.9999284148216248, | |
| "kl_div_avg": 0.004839582834392786, | |
| "learning_rate": 4.934731934731934e-07, | |
| "loss_func": "stage2", | |
| "step": 34, | |
| "total_loss": 0.5804722309112549 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "importance_ratio": 0.999695897102356, | |
| "kl_div_avg": 0.004378842655569315, | |
| "learning_rate": 4.932400932400932e-07, | |
| "loss_func": "stage2", | |
| "step": 35, | |
| "total_loss": 0.5690972805023193 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "importance_ratio": 0.9998815059661865, | |
| "kl_div_avg": 0.0047516971826553345, | |
| "learning_rate": 4.93006993006993e-07, | |
| "loss_func": "stage2", | |
| "step": 36, | |
| "total_loss": 0.2298603653907776 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "importance_ratio": 0.9997518062591553, | |
| "kl_div_avg": 0.004270514938980341, | |
| "learning_rate": 4.927738927738927e-07, | |
| "loss_func": "stage2", | |
| "step": 37, | |
| "total_loss": -0.20016932487487793 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "importance_ratio": 1.000083088874817, | |
| "kl_div_avg": 0.004711843561381102, | |
| "learning_rate": 4.925407925407925e-07, | |
| "loss_func": "stage2", | |
| "step": 38, | |
| "total_loss": 0.25743457674980164 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "importance_ratio": 0.9999402761459351, | |
| "kl_div_avg": 0.004922826308757067, | |
| "learning_rate": 4.923076923076923e-07, | |
| "loss_func": "stage2", | |
| "step": 39, | |
| "total_loss": -0.15881219506263733 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "importance_ratio": 0.999858021736145, | |
| "kl_div_avg": 0.0039229318499565125, | |
| "learning_rate": 4.92074592074592e-07, | |
| "loss_func": "stage2", | |
| "step": 40, | |
| "total_loss": -0.23230578005313873 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "importance_ratio": 0.999944806098938, | |
| "kl_div_avg": 0.00766429677605629, | |
| "learning_rate": 4.918414918414918e-07, | |
| "loss_func": "stage2", | |
| "step": 41, | |
| "total_loss": -0.03111131489276886 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "importance_ratio": 1.000047206878662, | |
| "kl_div_avg": 0.005274048075079918, | |
| "learning_rate": 4.916083916083916e-07, | |
| "loss_func": "stage2", | |
| "step": 42, | |
| "total_loss": -0.033877044916152954 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "importance_ratio": 1.0002098083496094, | |
| "kl_div_avg": 0.006382378749549389, | |
| "learning_rate": 4.913752913752914e-07, | |
| "loss_func": "stage2", | |
| "step": 43, | |
| "total_loss": 0.10409477353096008 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "importance_ratio": 0.9998437166213989, | |
| "kl_div_avg": 0.006663881242275238, | |
| "learning_rate": 4.911421911421911e-07, | |
| "loss_func": "stage2", | |
| "step": 44, | |
| "total_loss": -0.9234535694122314 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "importance_ratio": 0.9999840259552002, | |
| "kl_div_avg": 0.004546988755464554, | |
| "learning_rate": 4.909090909090909e-07, | |
| "loss_func": "stage2", | |
| "step": 45, | |
| "total_loss": 0.022589027881622314 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "importance_ratio": 1.0002349615097046, | |
| "kl_div_avg": 0.0048853568732738495, | |
| "learning_rate": 4.906759906759906e-07, | |
| "loss_func": "stage2", | |
| "step": 46, | |
| "total_loss": 0.7868871688842773 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "importance_ratio": 1.0003743171691895, | |
| "kl_div_avg": 0.005253675393760204, | |
| "learning_rate": 4.904428904428905e-07, | |
| "loss_func": "stage2", | |
| "step": 47, | |
| "total_loss": 0.7918493747711182 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "importance_ratio": 1.0001533031463623, | |
| "kl_div_avg": 0.005680109839886427, | |
| "learning_rate": 4.902097902097902e-07, | |
| "loss_func": "stage2", | |
| "step": 48, | |
| "total_loss": -0.10262584686279297 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "importance_ratio": 0.999789834022522, | |
| "kl_div_avg": 0.006105936132371426, | |
| "learning_rate": 4.8997668997669e-07, | |
| "loss_func": "stage2", | |
| "step": 49, | |
| "total_loss": -0.8303477168083191 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "importance_ratio": 0.9999826550483704, | |
| "kl_div_avg": 0.005722599104046822, | |
| "learning_rate": 4.897435897435897e-07, | |
| "loss_func": "stage2", | |
| "step": 50, | |
| "total_loss": -0.8298835754394531 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "importance_ratio": 1.0003968477249146, | |
| "kl_div_avg": 0.005780387669801712, | |
| "learning_rate": 4.895104895104895e-07, | |
| "loss_func": "stage2", | |
| "step": 51, | |
| "total_loss": -0.8298872709274292 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "importance_ratio": 1.001389741897583, | |
| "kl_div_avg": 0.00528342742472887, | |
| "learning_rate": 4.892773892773893e-07, | |
| "loss_func": "stage2", | |
| "step": 52, | |
| "total_loss": -0.8320926427841187 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "importance_ratio": 1.0000613927841187, | |
| "kl_div_avg": 0.005511891096830368, | |
| "learning_rate": 4.890442890442891e-07, | |
| "loss_func": "stage2", | |
| "step": 53, | |
| "total_loss": -0.8287703990936279 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "importance_ratio": 1.000309705734253, | |
| "kl_div_avg": 0.00582331046462059, | |
| "learning_rate": 4.888111888111888e-07, | |
| "loss_func": "stage2", | |
| "step": 54, | |
| "total_loss": -0.007962286472320557 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "importance_ratio": 1.000248670578003, | |
| "kl_div_avg": 0.005605565384030342, | |
| "learning_rate": 4.885780885780885e-07, | |
| "loss_func": "stage2", | |
| "step": 55, | |
| "total_loss": 0.30858537554740906 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "importance_ratio": 1.0001593828201294, | |
| "kl_div_avg": 0.006694035604596138, | |
| "learning_rate": 4.883449883449883e-07, | |
| "loss_func": "stage2", | |
| "step": 56, | |
| "total_loss": 0.2841358482837677 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "importance_ratio": 0.9998378753662109, | |
| "kl_div_avg": 0.0054409438744187355, | |
| "learning_rate": 4.88111888111888e-07, | |
| "loss_func": "stage2", | |
| "step": 57, | |
| "total_loss": 0.4940628409385681 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "importance_ratio": 1.0000338554382324, | |
| "kl_div_avg": 0.00452791154384613, | |
| "learning_rate": 4.878787878787878e-07, | |
| "loss_func": "stage2", | |
| "step": 58, | |
| "total_loss": 0.25203195214271545 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "importance_ratio": 1.0000851154327393, | |
| "kl_div_avg": 0.008046677336096764, | |
| "learning_rate": 4.876456876456876e-07, | |
| "loss_func": "stage2", | |
| "step": 59, | |
| "total_loss": 0.8340111970901489 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "importance_ratio": 1.0000518560409546, | |
| "kl_div_avg": 0.0053115119226276875, | |
| "learning_rate": 4.874125874125874e-07, | |
| "loss_func": "stage2", | |
| "step": 60, | |
| "total_loss": -0.4959676265716553 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "importance_ratio": 1.0000547170639038, | |
| "kl_div_avg": 0.009003904648125172, | |
| "learning_rate": 4.871794871794871e-07, | |
| "loss_func": "stage2", | |
| "step": 61, | |
| "total_loss": -0.7978946566581726 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "importance_ratio": 1.000227689743042, | |
| "kl_div_avg": 0.007788301911205053, | |
| "learning_rate": 4.869463869463869e-07, | |
| "loss_func": "stage2", | |
| "step": 62, | |
| "total_loss": -0.801626443862915 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "importance_ratio": 1.0001925230026245, | |
| "kl_div_avg": 0.009508013725280762, | |
| "learning_rate": 4.867132867132867e-07, | |
| "loss_func": "stage2", | |
| "step": 63, | |
| "total_loss": -0.8087908029556274 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "importance_ratio": 1.0004205703735352, | |
| "kl_div_avg": 0.00948132760822773, | |
| "learning_rate": 4.864801864801865e-07, | |
| "loss_func": "stage2", | |
| "step": 64, | |
| "total_loss": -0.7977355718612671 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "importance_ratio": 0.9995088577270508, | |
| "kl_div_avg": 0.013071361929178238, | |
| "learning_rate": 4.862470862470862e-07, | |
| "loss_func": "stage2", | |
| "step": 65, | |
| "total_loss": -0.7926455736160278 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "importance_ratio": 1.0000696182250977, | |
| "kl_div_avg": 0.008261503651738167, | |
| "learning_rate": 4.86013986013986e-07, | |
| "loss_func": "stage2", | |
| "step": 66, | |
| "total_loss": 0.8929388523101807 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "importance_ratio": 0.9999169111251831, | |
| "kl_div_avg": 0.0072638243436813354, | |
| "learning_rate": 4.857808857808858e-07, | |
| "loss_func": "stage2", | |
| "step": 67, | |
| "total_loss": 0.7651723623275757 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "importance_ratio": 1.0006506443023682, | |
| "kl_div_avg": 0.014501883648335934, | |
| "learning_rate": 4.855477855477855e-07, | |
| "loss_func": "stage2", | |
| "step": 68, | |
| "total_loss": -0.7893998622894287 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "importance_ratio": 1.0001804828643799, | |
| "kl_div_avg": 0.014889956451952457, | |
| "learning_rate": 4.853146853146853e-07, | |
| "loss_func": "stage2", | |
| "step": 69, | |
| "total_loss": -0.28294438123703003 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "importance_ratio": 1.0003015995025635, | |
| "kl_div_avg": 0.012897053733468056, | |
| "learning_rate": 4.850815850815851e-07, | |
| "loss_func": "stage2", | |
| "step": 70, | |
| "total_loss": -0.29949530959129333 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "importance_ratio": 0.9999880194664001, | |
| "kl_div_avg": 0.011545092798769474, | |
| "learning_rate": 4.848484848484849e-07, | |
| "loss_func": "stage2", | |
| "step": 71, | |
| "total_loss": -0.07207685708999634 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "importance_ratio": 1.0000548362731934, | |
| "kl_div_avg": 0.011798446998000145, | |
| "learning_rate": 4.846153846153846e-07, | |
| "loss_func": "stage2", | |
| "step": 72, | |
| "total_loss": -0.030968129634857178 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "importance_ratio": 0.9998947381973267, | |
| "kl_div_avg": 0.014826457016170025, | |
| "learning_rate": 4.843822843822844e-07, | |
| "loss_func": "stage2", | |
| "step": 73, | |
| "total_loss": -0.04581499099731445 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "importance_ratio": 0.9999833106994629, | |
| "kl_div_avg": 0.01308258343487978, | |
| "learning_rate": 4.841491841491842e-07, | |
| "loss_func": "stage2", | |
| "step": 74, | |
| "total_loss": -0.2057284414768219 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "importance_ratio": 1.000425100326538, | |
| "kl_div_avg": 0.014183840714395046, | |
| "learning_rate": 4.839160839160839e-07, | |
| "loss_func": "stage2", | |
| "step": 75, | |
| "total_loss": 0.6338366866111755 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "importance_ratio": 1.0001928806304932, | |
| "kl_div_avg": 0.013401240110397339, | |
| "learning_rate": 4.836829836829837e-07, | |
| "loss_func": "stage2", | |
| "step": 76, | |
| "total_loss": 0.2874578833580017 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "importance_ratio": 0.9999673366546631, | |
| "kl_div_avg": 0.012778308242559433, | |
| "learning_rate": 4.834498834498834e-07, | |
| "loss_func": "stage2", | |
| "step": 77, | |
| "total_loss": 0.8181835412979126 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "importance_ratio": 0.9999823570251465, | |
| "kl_div_avg": 0.014732200652360916, | |
| "learning_rate": 4.832167832167832e-07, | |
| "loss_func": "stage2", | |
| "step": 78, | |
| "total_loss": 0.818813681602478 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "importance_ratio": 1.0003037452697754, | |
| "kl_div_avg": 0.014829716645181179, | |
| "learning_rate": 4.829836829836829e-07, | |
| "loss_func": "stage2", | |
| "step": 79, | |
| "total_loss": 0.0298653244972229 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "importance_ratio": 1.0003316402435303, | |
| "kl_div_avg": 0.01861473172903061, | |
| "learning_rate": 4.827505827505827e-07, | |
| "loss_func": "stage2", | |
| "step": 80, | |
| "total_loss": 0.47803181409835815 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 1.0000956058502197, | |
| "kl_div_avg": 0.013490747660398483, | |
| "learning_rate": 4.825174825174824e-07, | |
| "loss_func": "stage2", | |
| "step": 81, | |
| "total_loss": 0.43567734956741333 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 1.0001178979873657, | |
| "kl_div_avg": 0.014975698664784431, | |
| "learning_rate": 4.822843822843823e-07, | |
| "loss_func": "stage2", | |
| "step": 82, | |
| "total_loss": -0.27731338143348694 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 0.9999943971633911, | |
| "kl_div_avg": 0.012739075347781181, | |
| "learning_rate": 4.82051282051282e-07, | |
| "loss_func": "stage2", | |
| "step": 83, | |
| "total_loss": 0.14717233180999756 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 1.000013828277588, | |
| "kl_div_avg": 0.015260843560099602, | |
| "learning_rate": 4.818181818181818e-07, | |
| "loss_func": "stage2", | |
| "step": 84, | |
| "total_loss": -0.876125693321228 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 0.9999637603759766, | |
| "kl_div_avg": 0.009859403595328331, | |
| "learning_rate": 4.815850815850815e-07, | |
| "loss_func": "stage2", | |
| "step": 85, | |
| "total_loss": -0.007574997842311859 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 0.9998883008956909, | |
| "kl_div_avg": 0.012243506498634815, | |
| "learning_rate": 4.813519813519813e-07, | |
| "loss_func": "stage2", | |
| "step": 86, | |
| "total_loss": -0.8214981555938721 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 0.9997843503952026, | |
| "kl_div_avg": 0.009431181475520134, | |
| "learning_rate": 4.811188811188811e-07, | |
| "loss_func": "stage2", | |
| "step": 87, | |
| "total_loss": 0.09674309194087982 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "importance_ratio": 0.9998980760574341, | |
| "kl_div_avg": 0.012359343469142914, | |
| "learning_rate": 4.808857808857809e-07, | |
| "loss_func": "stage2", | |
| "step": 88, | |
| "total_loss": 0.13037657737731934 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "importance_ratio": 0.9999622702598572, | |
| "kl_div_avg": 0.019689181819558144, | |
| "learning_rate": 4.806526806526806e-07, | |
| "loss_func": "stage2", | |
| "step": 89, | |
| "total_loss": -0.053394585847854614 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "importance_ratio": 1.0000618696212769, | |
| "kl_div_avg": 0.013591418042778969, | |
| "learning_rate": 4.804195804195804e-07, | |
| "loss_func": "stage2", | |
| "step": 90, | |
| "total_loss": 0.7699258327484131 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "importance_ratio": 1.000166893005371, | |
| "kl_div_avg": 0.016464080661535263, | |
| "learning_rate": 4.801864801864802e-07, | |
| "loss_func": "stage2", | |
| "step": 91, | |
| "total_loss": 0.16640010476112366 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "importance_ratio": 0.9997192621231079, | |
| "kl_div_avg": 0.02152731642127037, | |
| "learning_rate": 4.799533799533799e-07, | |
| "loss_func": "stage2", | |
| "step": 92, | |
| "total_loss": -0.17338214814662933 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "importance_ratio": 1.000048279762268, | |
| "kl_div_avg": 0.01632719114422798, | |
| "learning_rate": 4.797202797202797e-07, | |
| "loss_func": "stage2", | |
| "step": 93, | |
| "total_loss": -0.311355322599411 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "importance_ratio": 1.000006914138794, | |
| "kl_div_avg": 0.02427072264254093, | |
| "learning_rate": 4.794871794871795e-07, | |
| "loss_func": "stage2", | |
| "step": 94, | |
| "total_loss": 0.6689386963844299 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "importance_ratio": 1.0001646280288696, | |
| "kl_div_avg": 0.020599162206053734, | |
| "learning_rate": 4.792540792540793e-07, | |
| "loss_func": "stage2", | |
| "step": 95, | |
| "total_loss": -0.27341556549072266 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "importance_ratio": 1.0005509853363037, | |
| "kl_div_avg": 0.01824137195944786, | |
| "learning_rate": 4.79020979020979e-07, | |
| "loss_func": "stage2", | |
| "step": 96, | |
| "total_loss": -0.7693477869033813 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "importance_ratio": 1.0000276565551758, | |
| "kl_div_avg": 0.019494881853461266, | |
| "learning_rate": 4.787878787878788e-07, | |
| "loss_func": "stage2", | |
| "step": 97, | |
| "total_loss": 0.5362880229949951 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "importance_ratio": 1.0001310110092163, | |
| "kl_div_avg": 0.018007826060056686, | |
| "learning_rate": 4.785547785547786e-07, | |
| "loss_func": "stage2", | |
| "step": 98, | |
| "total_loss": 0.13775774836540222 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "importance_ratio": 1.0001639127731323, | |
| "kl_div_avg": 0.018012849614024162, | |
| "learning_rate": 4.783216783216783e-07, | |
| "loss_func": "stage2", | |
| "step": 99, | |
| "total_loss": 0.43274223804473877 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "importance_ratio": 1.0001168251037598, | |
| "kl_div_avg": 0.016401609405875206, | |
| "learning_rate": 4.78088578088578e-07, | |
| "loss_func": "stage2", | |
| "step": 100, | |
| "total_loss": 0.024271167814731598 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "importance_ratio": 0.9998679161071777, | |
| "kl_div_avg": 0.018232179805636406, | |
| "learning_rate": 4.778554778554778e-07, | |
| "loss_func": "stage2", | |
| "step": 101, | |
| "total_loss": -0.13439278304576874 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "importance_ratio": 0.9999688267707825, | |
| "kl_div_avg": 0.01988252066075802, | |
| "learning_rate": 4.776223776223776e-07, | |
| "loss_func": "stage2", | |
| "step": 102, | |
| "total_loss": -0.5875260233879089 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "importance_ratio": 1.000270128250122, | |
| "kl_div_avg": 0.024290431290864944, | |
| "learning_rate": 4.773892773892773e-07, | |
| "loss_func": "stage2", | |
| "step": 103, | |
| "total_loss": 0.43479496240615845 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "importance_ratio": 1.0000674724578857, | |
| "kl_div_avg": 0.017674673348665237, | |
| "learning_rate": 4.771561771561771e-07, | |
| "loss_func": "stage2", | |
| "step": 104, | |
| "total_loss": -0.30525317788124084 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "importance_ratio": 0.9998775720596313, | |
| "kl_div_avg": 0.015626681968569756, | |
| "learning_rate": 4.769230769230769e-07, | |
| "loss_func": "stage2", | |
| "step": 105, | |
| "total_loss": -0.4420226514339447 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "importance_ratio": 1.000077247619629, | |
| "kl_div_avg": 0.011648900806903839, | |
| "learning_rate": 4.7668997668997666e-07, | |
| "loss_func": "stage2", | |
| "step": 106, | |
| "total_loss": -0.7798103094100952 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "importance_ratio": 0.9998931884765625, | |
| "kl_div_avg": 0.02424587681889534, | |
| "learning_rate": 4.7645687645687646e-07, | |
| "loss_func": "stage2", | |
| "step": 107, | |
| "total_loss": 0.3463074564933777 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "importance_ratio": 1.0001239776611328, | |
| "kl_div_avg": 0.019622065126895905, | |
| "learning_rate": 4.762237762237762e-07, | |
| "loss_func": "stage2", | |
| "step": 108, | |
| "total_loss": -0.7446590662002563 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "importance_ratio": 1.0000584125518799, | |
| "kl_div_avg": 0.02028917521238327, | |
| "learning_rate": 4.75990675990676e-07, | |
| "loss_func": "stage2", | |
| "step": 109, | |
| "total_loss": -0.269249826669693 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "importance_ratio": 0.9998422861099243, | |
| "kl_div_avg": 0.022249866276979446, | |
| "learning_rate": 4.7575757575757574e-07, | |
| "loss_func": "stage2", | |
| "step": 110, | |
| "total_loss": -0.6871204376220703 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "importance_ratio": 1.0001263618469238, | |
| "kl_div_avg": 0.01973864436149597, | |
| "learning_rate": 4.755244755244755e-07, | |
| "loss_func": "stage2", | |
| "step": 111, | |
| "total_loss": -0.6550001502037048 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "importance_ratio": 0.9996439218521118, | |
| "kl_div_avg": 0.02077101171016693, | |
| "learning_rate": 4.7529137529137523e-07, | |
| "loss_func": "stage2", | |
| "step": 112, | |
| "total_loss": 0.042905211448669434 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "importance_ratio": 1.000232219696045, | |
| "kl_div_avg": 0.036871857941150665, | |
| "learning_rate": 4.750582750582751e-07, | |
| "loss_func": "stage2", | |
| "step": 113, | |
| "total_loss": -0.6672766804695129 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "importance_ratio": 0.9999071359634399, | |
| "kl_div_avg": 0.023731358349323273, | |
| "learning_rate": 4.748251748251748e-07, | |
| "loss_func": "stage2", | |
| "step": 114, | |
| "total_loss": 0.22221048176288605 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "importance_ratio": 0.9998910427093506, | |
| "kl_div_avg": 0.02947179228067398, | |
| "learning_rate": 4.7459207459207457e-07, | |
| "loss_func": "stage2", | |
| "step": 115, | |
| "total_loss": 0.05969160795211792 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "importance_ratio": 0.9999151825904846, | |
| "kl_div_avg": 0.028714872896671295, | |
| "learning_rate": 4.743589743589743e-07, | |
| "loss_func": "stage2", | |
| "step": 116, | |
| "total_loss": 0.023228317499160767 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "importance_ratio": 0.9999186992645264, | |
| "kl_div_avg": 0.022106900811195374, | |
| "learning_rate": 4.741258741258741e-07, | |
| "loss_func": "stage2", | |
| "step": 117, | |
| "total_loss": -0.4819675087928772 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "importance_ratio": 0.9995689392089844, | |
| "kl_div_avg": 0.03649330139160156, | |
| "learning_rate": 4.7389277389277386e-07, | |
| "loss_func": "stage2", | |
| "step": 118, | |
| "total_loss": 0.3108961582183838 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "importance_ratio": 0.9996166825294495, | |
| "kl_div_avg": 0.03707721084356308, | |
| "learning_rate": 4.7365967365967365e-07, | |
| "loss_func": "stage2", | |
| "step": 119, | |
| "total_loss": 0.3411310315132141 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "importance_ratio": 0.9993818998336792, | |
| "kl_div_avg": 0.02660643495619297, | |
| "learning_rate": 4.734265734265734e-07, | |
| "loss_func": "stage2", | |
| "step": 120, | |
| "total_loss": -0.1782078891992569 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "importance_ratio": 1.0000483989715576, | |
| "kl_div_avg": 0.020809054374694824, | |
| "learning_rate": 4.731934731934732e-07, | |
| "loss_func": "stage2", | |
| "step": 121, | |
| "total_loss": 0.2691134810447693 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "importance_ratio": 0.9999549388885498, | |
| "kl_div_avg": 0.018363434821367264, | |
| "learning_rate": 4.7296037296037294e-07, | |
| "loss_func": "stage2", | |
| "step": 122, | |
| "total_loss": -0.11637084186077118 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "importance_ratio": 1.0000113248825073, | |
| "kl_div_avg": 0.01843072474002838, | |
| "learning_rate": 4.727272727272727e-07, | |
| "loss_func": "stage2", | |
| "step": 123, | |
| "total_loss": 0.41335129737854004 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "importance_ratio": 0.9993541836738586, | |
| "kl_div_avg": 0.023193594068288803, | |
| "learning_rate": 4.724941724941724e-07, | |
| "loss_func": "stage2", | |
| "step": 124, | |
| "total_loss": 0.3763417601585388 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "importance_ratio": 1.0000863075256348, | |
| "kl_div_avg": 0.02424781210720539, | |
| "learning_rate": 4.722610722610723e-07, | |
| "loss_func": "stage2", | |
| "step": 125, | |
| "total_loss": 0.8012444972991943 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "importance_ratio": 0.9997611045837402, | |
| "kl_div_avg": 0.019796304404735565, | |
| "learning_rate": 4.72027972027972e-07, | |
| "loss_func": "stage2", | |
| "step": 126, | |
| "total_loss": 0.2471800446510315 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "importance_ratio": 0.9994201064109802, | |
| "kl_div_avg": 0.03692193701863289, | |
| "learning_rate": 4.7179487179487176e-07, | |
| "loss_func": "stage2", | |
| "step": 127, | |
| "total_loss": 0.6575199961662292 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "importance_ratio": 0.9993878602981567, | |
| "kl_div_avg": 0.024065542966127396, | |
| "learning_rate": 4.715617715617715e-07, | |
| "loss_func": "stage2", | |
| "step": 128, | |
| "total_loss": 0.39468204975128174 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "importance_ratio": 1.0001311302185059, | |
| "kl_div_avg": 0.024539019912481308, | |
| "learning_rate": 4.713286713286713e-07, | |
| "loss_func": "stage2", | |
| "step": 129, | |
| "total_loss": 0.1832764893770218 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "importance_ratio": 0.9998582005500793, | |
| "kl_div_avg": 0.024634480476379395, | |
| "learning_rate": 4.710955710955711e-07, | |
| "loss_func": "stage2", | |
| "step": 130, | |
| "total_loss": 0.1833437830209732 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "importance_ratio": 0.9997899532318115, | |
| "kl_div_avg": 0.024798087775707245, | |
| "learning_rate": 4.7086247086247085e-07, | |
| "loss_func": "stage2", | |
| "step": 131, | |
| "total_loss": -0.029144808650016785 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "importance_ratio": 0.9991902112960815, | |
| "kl_div_avg": 0.022448930889368057, | |
| "learning_rate": 4.706293706293706e-07, | |
| "loss_func": "stage2", | |
| "step": 132, | |
| "total_loss": 0.5746316909790039 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "importance_ratio": 0.9999872446060181, | |
| "kl_div_avg": 0.030649660155177116, | |
| "learning_rate": 4.703962703962704e-07, | |
| "loss_func": "stage2", | |
| "step": 133, | |
| "total_loss": -0.27921533584594727 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "importance_ratio": 0.9999322891235352, | |
| "kl_div_avg": 0.028502434492111206, | |
| "learning_rate": 4.7016317016317013e-07, | |
| "loss_func": "stage2", | |
| "step": 134, | |
| "total_loss": -0.18240980803966522 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "importance_ratio": 0.9991711378097534, | |
| "kl_div_avg": 0.034727346152067184, | |
| "learning_rate": 4.699300699300699e-07, | |
| "loss_func": "stage2", | |
| "step": 135, | |
| "total_loss": 0.20225152373313904 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "importance_ratio": 1.000333547592163, | |
| "kl_div_avg": 0.02375342883169651, | |
| "learning_rate": 4.696969696969697e-07, | |
| "loss_func": "stage2", | |
| "step": 136, | |
| "total_loss": 0.19642743468284607 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "importance_ratio": 1.0000425577163696, | |
| "kl_div_avg": 0.02941157855093479, | |
| "learning_rate": 4.6946386946386947e-07, | |
| "loss_func": "stage2", | |
| "step": 137, | |
| "total_loss": -0.2832520604133606 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "importance_ratio": 0.9998887777328491, | |
| "kl_div_avg": 0.0380094014108181, | |
| "learning_rate": 4.692307692307692e-07, | |
| "loss_func": "stage2", | |
| "step": 138, | |
| "total_loss": 0.07638365030288696 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "importance_ratio": 1.0000771284103394, | |
| "kl_div_avg": 0.031544946134090424, | |
| "learning_rate": 4.6899766899766896e-07, | |
| "loss_func": "stage2", | |
| "step": 139, | |
| "total_loss": -0.29178526997566223 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "importance_ratio": 0.9999626278877258, | |
| "kl_div_avg": 0.03620228171348572, | |
| "learning_rate": 4.6876456876456875e-07, | |
| "loss_func": "stage2", | |
| "step": 140, | |
| "total_loss": 0.12343016266822815 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9999535083770752, | |
| "kl_div_avg": 0.026813074946403503, | |
| "learning_rate": 4.685314685314685e-07, | |
| "loss_func": "stage2", | |
| "step": 141, | |
| "total_loss": 0.024522602558135986 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9998844265937805, | |
| "kl_div_avg": 0.031967416405677795, | |
| "learning_rate": 4.682983682983683e-07, | |
| "loss_func": "stage2", | |
| "step": 142, | |
| "total_loss": 0.6268632411956787 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9997915029525757, | |
| "kl_div_avg": 0.024857094511389732, | |
| "learning_rate": 4.6806526806526804e-07, | |
| "loss_func": "stage2", | |
| "step": 143, | |
| "total_loss": 0.13797396421432495 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9998888969421387, | |
| "kl_div_avg": 0.04350988566875458, | |
| "learning_rate": 4.6783216783216784e-07, | |
| "loss_func": "stage2", | |
| "step": 144, | |
| "total_loss": 0.5312750935554504 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9998694658279419, | |
| "kl_div_avg": 0.03627926483750343, | |
| "learning_rate": 4.675990675990676e-07, | |
| "loss_func": "stage2", | |
| "step": 145, | |
| "total_loss": 0.09028466045856476 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9999563694000244, | |
| "kl_div_avg": 0.02497359737753868, | |
| "learning_rate": 4.673659673659673e-07, | |
| "loss_func": "stage2", | |
| "step": 146, | |
| "total_loss": 0.5303145051002502 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9999274015426636, | |
| "kl_div_avg": 0.026778005063533783, | |
| "learning_rate": 4.6713286713286707e-07, | |
| "loss_func": "stage2", | |
| "step": 147, | |
| "total_loss": 0.1259535402059555 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "importance_ratio": 0.9997484683990479, | |
| "kl_div_avg": 0.02970227226614952, | |
| "learning_rate": 4.668997668997669e-07, | |
| "loss_func": "stage2", | |
| "step": 148, | |
| "total_loss": 0.5203793048858643 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "importance_ratio": 1.0000306367874146, | |
| "kl_div_avg": 0.03883244842290878, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "loss_func": "stage2", | |
| "step": 149, | |
| "total_loss": 0.5459209680557251 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "importance_ratio": 0.9999977946281433, | |
| "kl_div_avg": 0.010376233607530594, | |
| "learning_rate": 4.664335664335664e-07, | |
| "loss_func": "stage2", | |
| "step": 150, | |
| "total_loss": 0.5408558249473572 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "importance_ratio": 1.0000672340393066, | |
| "kl_div_avg": 0.020404186099767685, | |
| "learning_rate": 4.6620046620046615e-07, | |
| "loss_func": "stage2", | |
| "step": 151, | |
| "total_loss": 0.5100921392440796 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "importance_ratio": 1.0013837814331055, | |
| "kl_div_avg": 0.032543182373046875, | |
| "learning_rate": 4.6596736596736595e-07, | |
| "loss_func": "stage2", | |
| "step": 152, | |
| "total_loss": -0.4812799394130707 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20.0, | |
| "save_steps": 50, | |
| "total_flos": 0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |