| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1088, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01838235294117647, | |
| "grad_norm": 4.980741500854492, | |
| "learning_rate": 8.256880733944956e-07, | |
| "loss": 0.4373, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03676470588235294, | |
| "grad_norm": 1.1804918050765991, | |
| "learning_rate": 1.743119266055046e-06, | |
| "loss": 0.3921, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05514705882352941, | |
| "grad_norm": 0.9609614610671997, | |
| "learning_rate": 2.6605504587155968e-06, | |
| "loss": 0.3541, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07352941176470588, | |
| "grad_norm": 0.8202292323112488, | |
| "learning_rate": 3.5779816513761473e-06, | |
| "loss": 0.3062, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09191176470588236, | |
| "grad_norm": 0.839361846446991, | |
| "learning_rate": 4.4954128440366975e-06, | |
| "loss": 0.2999, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11029411764705882, | |
| "grad_norm": 0.6450493335723877, | |
| "learning_rate": 5.412844036697248e-06, | |
| "loss": 0.2592, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12867647058823528, | |
| "grad_norm": 0.8965937495231628, | |
| "learning_rate": 6.330275229357799e-06, | |
| "loss": 0.3104, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 0.7927365899085999, | |
| "learning_rate": 7.247706422018349e-06, | |
| "loss": 0.2719, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16544117647058823, | |
| "grad_norm": 1.1923980712890625, | |
| "learning_rate": 8.1651376146789e-06, | |
| "loss": 0.2705, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18382352941176472, | |
| "grad_norm": 0.7955174446105957, | |
| "learning_rate": 9.08256880733945e-06, | |
| "loss": 0.276, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20220588235294118, | |
| "grad_norm": 0.9915638566017151, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2757, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22058823529411764, | |
| "grad_norm": 1.160922646522522, | |
| "learning_rate": 9.99742583072674e-06, | |
| "loss": 0.2652, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23897058823529413, | |
| "grad_norm": 0.896353542804718, | |
| "learning_rate": 9.98970597344593e-06, | |
| "loss": 0.2949, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25735294117647056, | |
| "grad_norm": 0.6837694048881531, | |
| "learning_rate": 9.976848377045343e-06, | |
| "loss": 0.247, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2757352941176471, | |
| "grad_norm": 0.7046304941177368, | |
| "learning_rate": 9.958866280576803e-06, | |
| "loss": 0.2403, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 0.8753547072410583, | |
| "learning_rate": 9.935778199624394e-06, | |
| "loss": 0.2597, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 0.8226246237754822, | |
| "learning_rate": 9.90760790723954e-06, | |
| "loss": 0.2574, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.33088235294117646, | |
| "grad_norm": 0.820383608341217, | |
| "learning_rate": 9.874384409462673e-06, | |
| "loss": 0.2652, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3492647058823529, | |
| "grad_norm": 0.7547650337219238, | |
| "learning_rate": 9.836141915456646e-06, | |
| "loss": 0.2522, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36764705882352944, | |
| "grad_norm": 0.72496497631073, | |
| "learning_rate": 9.792919802282656e-06, | |
| "loss": 0.2536, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3860294117647059, | |
| "grad_norm": 0.8042762279510498, | |
| "learning_rate": 9.744762574354967e-06, | |
| "loss": 0.2482, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.40441176470588236, | |
| "grad_norm": 0.7786635756492615, | |
| "learning_rate": 9.691719817616148e-06, | |
| "loss": 0.2167, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4227941176470588, | |
| "grad_norm": 0.8994752764701843, | |
| "learning_rate": 9.633846148480024e-06, | |
| "loss": 0.2359, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 0.7100334763526917, | |
| "learning_rate": 9.571201157594925e-06, | |
| "loss": 0.231, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.45955882352941174, | |
| "grad_norm": 0.6736875176429749, | |
| "learning_rate": 9.503849348485112e-06, | |
| "loss": 0.2596, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.47794117647058826, | |
| "grad_norm": 0.8927339315414429, | |
| "learning_rate": 9.431860071133592e-06, | |
| "loss": 0.2653, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4963235294117647, | |
| "grad_norm": 0.7796552181243896, | |
| "learning_rate": 9.355307450574666e-06, | |
| "loss": 0.2423, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5147058823529411, | |
| "grad_norm": 0.8039178252220154, | |
| "learning_rate": 9.27427031056979e-06, | |
| "loss": 0.2497, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5330882352941176, | |
| "grad_norm": 0.8636899590492249, | |
| "learning_rate": 9.188832092445281e-06, | |
| "loss": 0.249, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5514705882352942, | |
| "grad_norm": 0.6733593940734863, | |
| "learning_rate": 9.09908076917548e-06, | |
| "loss": 0.2415, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5698529411764706, | |
| "grad_norm": 0.6637346148490906, | |
| "learning_rate": 9.00510875479983e-06, | |
| "loss": 0.2207, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.6064783930778503, | |
| "learning_rate": 8.907012809267107e-06, | |
| "loss": 0.2559, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6066176470588235, | |
| "grad_norm": 0.8090744018554688, | |
| "learning_rate": 8.804893938804839e-06, | |
| "loss": 0.2446, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.639586865901947, | |
| "learning_rate": 8.698857291916456e-06, | |
| "loss": 0.2277, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6433823529411765, | |
| "grad_norm": 0.695817768573761, | |
| "learning_rate": 8.58901205111326e-06, | |
| "loss": 0.2669, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6617647058823529, | |
| "grad_norm": 0.8129636645317078, | |
| "learning_rate": 8.475471320492728e-06, | |
| "loss": 0.2233, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6801470588235294, | |
| "grad_norm": 0.6946842074394226, | |
| "learning_rate": 8.35835200927887e-06, | |
| "loss": 0.2359, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6985294117647058, | |
| "grad_norm": 0.4986434280872345, | |
| "learning_rate": 8.237774711444575e-06, | |
| "loss": 0.2144, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7169117647058824, | |
| "grad_norm": 0.7395103573799133, | |
| "learning_rate": 8.113863581539905e-06, | |
| "loss": 0.231, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 0.643147349357605, | |
| "learning_rate": 7.986746206854143e-06, | |
| "loss": 0.2324, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7536764705882353, | |
| "grad_norm": 0.6714458465576172, | |
| "learning_rate": 7.856553476043294e-06, | |
| "loss": 0.2401, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7720588235294118, | |
| "grad_norm": 0.8807610273361206, | |
| "learning_rate": 7.723419444358261e-06, | |
| "loss": 0.2445, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7904411764705882, | |
| "grad_norm": 0.8745494484901428, | |
| "learning_rate": 7.5874811956124805e-06, | |
| "loss": 0.2574, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8088235294117647, | |
| "grad_norm": 0.6509207487106323, | |
| "learning_rate": 7.4488787010311425e-06, | |
| "loss": 0.2179, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8272058823529411, | |
| "grad_norm": 0.680579423904419, | |
| "learning_rate": 7.3077546751273494e-06, | |
| "loss": 0.2246, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8455882352941176, | |
| "grad_norm": 0.6059340834617615, | |
| "learning_rate": 7.164254428753581e-06, | |
| "loss": 0.2352, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8639705882352942, | |
| "grad_norm": 0.7696027755737305, | |
| "learning_rate": 7.018525719479805e-06, | |
| "loss": 0.2517, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 0.7263879179954529, | |
| "learning_rate": 6.870718599452279e-06, | |
| "loss": 0.2282, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9007352941176471, | |
| "grad_norm": 0.7986669540405273, | |
| "learning_rate": 6.7209852608897005e-06, | |
| "loss": 0.238, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9191176470588235, | |
| "grad_norm": 0.7858214974403381, | |
| "learning_rate": 6.569479879375795e-06, | |
| "loss": 0.2461, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 0.5920738577842712, | |
| "learning_rate": 6.416358455109695e-06, | |
| "loss": 0.2452, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9558823529411765, | |
| "grad_norm": 0.6721693277359009, | |
| "learning_rate": 6.261778652277565e-06, | |
| "loss": 0.2225, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9742647058823529, | |
| "grad_norm": 0.8532955646514893, | |
| "learning_rate": 6.105899636710895e-06, | |
| "loss": 0.2449, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9926470588235294, | |
| "grad_norm": 0.7130278944969177, | |
| "learning_rate": 5.948881911998572e-06, | |
| "loss": 0.2111, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0110294117647058, | |
| "grad_norm": 0.6958662271499634, | |
| "learning_rate": 5.790887154221521e-06, | |
| "loss": 0.2243, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0294117647058822, | |
| "grad_norm": 0.7343012094497681, | |
| "learning_rate": 5.632078045480065e-06, | |
| "loss": 0.2013, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0477941176470589, | |
| "grad_norm": 0.7366646528244019, | |
| "learning_rate": 5.472618106385415e-06, | |
| "loss": 0.2045, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0661764705882353, | |
| "grad_norm": 0.500787615776062, | |
| "learning_rate": 5.31267152768779e-06, | |
| "loss": 0.2004, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.0845588235294117, | |
| "grad_norm": 0.6535488367080688, | |
| "learning_rate": 5.152403001214483e-06, | |
| "loss": 0.203, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1029411764705883, | |
| "grad_norm": 0.8450496196746826, | |
| "learning_rate": 4.991977550292028e-06, | |
| "loss": 0.2002, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1213235294117647, | |
| "grad_norm": 0.8866769671440125, | |
| "learning_rate": 4.831560359826985e-06, | |
| "loss": 0.2179, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1397058823529411, | |
| "grad_norm": 0.8293766975402832, | |
| "learning_rate": 4.671316606220394e-06, | |
| "loss": 0.2025, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1580882352941178, | |
| "grad_norm": 0.7488539218902588, | |
| "learning_rate": 4.511411287290964e-06, | |
| "loss": 0.2119, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 0.6722701191902161, | |
| "learning_rate": 4.35200905238214e-06, | |
| "loss": 0.2063, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.1948529411764706, | |
| "grad_norm": 0.7874387502670288, | |
| "learning_rate": 4.193274032828e-06, | |
| "loss": 0.1826, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.213235294117647, | |
| "grad_norm": 0.6973450779914856, | |
| "learning_rate": 4.035369672952516e-06, | |
| "loss": 0.1919, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2316176470588236, | |
| "grad_norm": 0.5885277390480042, | |
| "learning_rate": 3.8784585617762084e-06, | |
| "loss": 0.2193, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.8693950772285461, | |
| "learning_rate": 3.7227022656034873e-06, | |
| "loss": 0.2069, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2683823529411764, | |
| "grad_norm": 0.747329592704773, | |
| "learning_rate": 3.568261161663042e-06, | |
| "loss": 0.2188, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.2867647058823528, | |
| "grad_norm": 0.7728601098060608, | |
| "learning_rate": 3.4152942729725896e-06, | |
| "loss": 0.2019, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3051470588235294, | |
| "grad_norm": 0.676393449306488, | |
| "learning_rate": 3.263959104598009e-06, | |
| "loss": 0.2036, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3235294117647058, | |
| "grad_norm": 0.6831977963447571, | |
| "learning_rate": 3.114411481475455e-06, | |
| "loss": 0.2052, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.3419117647058822, | |
| "grad_norm": 0.5936601161956787, | |
| "learning_rate": 2.966805387963463e-06, | |
| "loss": 0.2003, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.3602941176470589, | |
| "grad_norm": 0.6197609901428223, | |
| "learning_rate": 2.821292809290217e-06, | |
| "loss": 0.1894, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.3786764705882353, | |
| "grad_norm": 0.4961455464363098, | |
| "learning_rate": 2.678023575059274e-06, | |
| "loss": 0.1873, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.3970588235294117, | |
| "grad_norm": 0.8226666450500488, | |
| "learning_rate": 2.5371452049748603e-06, | |
| "loss": 0.2247, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.4154411764705883, | |
| "grad_norm": 0.623539388179779, | |
| "learning_rate": 2.3988027569455895e-06, | |
| "loss": 0.1846, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4338235294117647, | |
| "grad_norm": 0.6834192276000977, | |
| "learning_rate": 2.2631386777230248e-06, | |
| "loss": 0.1829, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4522058823529411, | |
| "grad_norm": 0.9627503156661987, | |
| "learning_rate": 2.130292656228856e-06, | |
| "loss": 0.1813, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 0.5110943913459778, | |
| "learning_rate": 2.0004014797217207e-06, | |
| "loss": 0.193, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4889705882352942, | |
| "grad_norm": 0.6010143756866455, | |
| "learning_rate": 1.873598892951795e-06, | |
| "loss": 0.208, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.5073529411764706, | |
| "grad_norm": 0.8185718655586243, | |
| "learning_rate": 1.7500154604481312e-06, | |
| "loss": 0.2006, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5257352941176472, | |
| "grad_norm": 0.6126958131790161, | |
| "learning_rate": 1.629778432080586e-06, | |
| "loss": 0.1746, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.5441176470588234, | |
| "grad_norm": 0.5845675468444824, | |
| "learning_rate": 1.513011612034726e-06, | |
| "loss": 0.2013, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.5625, | |
| "grad_norm": 0.8226555585861206, | |
| "learning_rate": 1.3998352313346768e-06, | |
| "loss": 0.2159, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5808823529411766, | |
| "grad_norm": 0.8147032856941223, | |
| "learning_rate": 1.2903658240450989e-06, | |
| "loss": 0.2234, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.5992647058823528, | |
| "grad_norm": 0.7199397683143616, | |
| "learning_rate": 1.184716107279837e-06, | |
| "loss": 0.1798, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.6176470588235294, | |
| "grad_norm": 0.6966183185577393, | |
| "learning_rate": 1.0829948651407374e-06, | |
| "loss": 0.1886, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.6360294117647058, | |
| "grad_norm": 0.614043116569519, | |
| "learning_rate": 9.85306836706184e-07, | |
| "loss": 0.1908, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.6544117647058822, | |
| "grad_norm": 0.8714206218719482, | |
| "learning_rate": 8.917526081846411e-07, | |
| "loss": 0.1956, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6727941176470589, | |
| "grad_norm": 0.7226178050041199, | |
| "learning_rate": 8.024285093442874e-07, | |
| "loss": 0.1845, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.6911764705882353, | |
| "grad_norm": 0.6284250020980835, | |
| "learning_rate": 7.17426514325359e-07, | |
| "loss": 0.204, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.7095588235294117, | |
| "grad_norm": 0.6917927265167236, | |
| "learning_rate": 6.36834146937354e-07, | |
| "loss": 0.1878, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.7279411764705883, | |
| "grad_norm": 0.5628108382225037, | |
| "learning_rate": 5.607343905385898e-07, | |
| "loss": 0.1668, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7463235294117647, | |
| "grad_norm": 0.8957297801971436, | |
| "learning_rate": 4.892056025909148e-07, | |
| "loss": 0.2103, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 0.6452038288116455, | |
| "learning_rate": 4.2232143397756607e-07, | |
| "loss": 0.1933, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.7830882352941178, | |
| "grad_norm": 1.0203959941864014, | |
| "learning_rate": 3.6015075316722605e-07, | |
| "loss": 0.21, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.8014705882352942, | |
| "grad_norm": 0.696893572807312, | |
| "learning_rate": 3.02757575302392e-07, | |
| "loss": 0.1955, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.8198529411764706, | |
| "grad_norm": 0.7462745904922485, | |
| "learning_rate": 2.5020099628504603e-07, | |
| "loss": 0.1765, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8382352941176472, | |
| "grad_norm": 0.6422279477119446, | |
| "learning_rate": 2.0253513192751374e-07, | |
| "loss": 0.1964, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8566176470588234, | |
| "grad_norm": 0.6333200931549072, | |
| "learning_rate": 1.5980906223115933e-07, | |
| "loss": 0.1917, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 0.6655058264732361, | |
| "learning_rate": 1.220667808502951e-07, | |
| "loss": 0.197, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.8933823529411766, | |
| "grad_norm": 0.4924130141735077, | |
| "learning_rate": 8.934714979333403e-08, | |
| "loss": 0.2033, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.9117647058823528, | |
| "grad_norm": 0.6515531539916992, | |
| "learning_rate": 6.168385940783727e-08, | |
| "loss": 0.1886, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.9301470588235294, | |
| "grad_norm": 0.5237254500389099, | |
| "learning_rate": 3.910539369064603e-08, | |
| "loss": 0.1924, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9485294117647058, | |
| "grad_norm": 0.8416038751602173, | |
| "learning_rate": 2.1635000958836748e-08, | |
| "loss": 0.1904, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.9669117647058822, | |
| "grad_norm": 0.604259729385376, | |
| "learning_rate": 9.290669911672934e-09, | |
| "loss": 0.194, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.9852941176470589, | |
| "grad_norm": 0.6746427416801453, | |
| "learning_rate": 2.085111108227067e-09, | |
| "loss": 0.1885, | |
| "step": 1080 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1088, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1729409604948328e+19, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |