{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.999613003095975, "eval_steps": 500, "global_step": 20664, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001934984520123839, "grad_norm": 0.8183349967002869, "learning_rate": 0.0, "loss": 0.1882, "step": 1 }, { "epoch": 0.0003869969040247678, "grad_norm": 0.7742494940757751, "learning_rate": 6.41025641025641e-07, "loss": 0.1854, "step": 2 }, { "epoch": 0.0005804953560371517, "grad_norm": 0.8177714347839355, "learning_rate": 1.282051282051282e-06, "loss": 0.1984, "step": 3 }, { "epoch": 0.0007739938080495357, "grad_norm": 0.8438796997070312, "learning_rate": 1.9230769230769234e-06, "loss": 0.1845, "step": 4 }, { "epoch": 0.0009674922600619195, "grad_norm": 0.7959060072898865, "learning_rate": 2.564102564102564e-06, "loss": 0.1852, "step": 5 }, { "epoch": 0.0011609907120743034, "grad_norm": 0.7296947240829468, "learning_rate": 3.205128205128205e-06, "loss": 0.1884, "step": 6 }, { "epoch": 0.0013544891640866873, "grad_norm": 0.7730377912521362, "learning_rate": 3.846153846153847e-06, "loss": 0.1992, "step": 7 }, { "epoch": 0.0015479876160990713, "grad_norm": 0.8084168434143066, "learning_rate": 4.487179487179488e-06, "loss": 0.1839, "step": 8 }, { "epoch": 0.001741486068111455, "grad_norm": 0.746585488319397, "learning_rate": 5.128205128205128e-06, "loss": 0.1888, "step": 9 }, { "epoch": 0.001934984520123839, "grad_norm": 0.7145912647247314, "learning_rate": 5.76923076923077e-06, "loss": 0.1968, "step": 10 }, { "epoch": 0.0021284829721362228, "grad_norm": 0.663398265838623, "learning_rate": 6.41025641025641e-06, "loss": 0.19, "step": 11 }, { "epoch": 0.0023219814241486067, "grad_norm": 0.5043975710868835, "learning_rate": 7.051282051282052e-06, "loss": 0.1656, "step": 12 }, { "epoch": 0.0025154798761609907, "grad_norm": 0.5118488669395447, "learning_rate": 7.692307692307694e-06, "loss": 0.1664, "step": 13 }, { "epoch": 0.0027089783281733747, "grad_norm": 0.48946407437324524, "learning_rate": 8.333333333333334e-06, "loss": 0.1799, "step": 14 }, { "epoch": 0.0029024767801857586, "grad_norm": 0.48404568433761597, "learning_rate": 8.974358974358976e-06, "loss": 0.1826, "step": 15 }, { "epoch": 0.0030959752321981426, "grad_norm": 0.5684599280357361, "learning_rate": 9.615384615384616e-06, "loss": 0.1671, "step": 16 }, { "epoch": 0.003289473684210526, "grad_norm": 0.7415030002593994, "learning_rate": 1.0256410256410256e-05, "loss": 0.1456, "step": 17 }, { "epoch": 0.00348297213622291, "grad_norm": 0.7989171743392944, "learning_rate": 1.0897435897435898e-05, "loss": 0.1571, "step": 18 }, { "epoch": 0.003676470588235294, "grad_norm": 0.707086980342865, "learning_rate": 1.153846153846154e-05, "loss": 0.1494, "step": 19 }, { "epoch": 0.003869969040247678, "grad_norm": 0.7556892037391663, "learning_rate": 1.217948717948718e-05, "loss": 0.1799, "step": 20 }, { "epoch": 0.004063467492260062, "grad_norm": 0.6621124744415283, "learning_rate": 1.282051282051282e-05, "loss": 0.149, "step": 21 }, { "epoch": 0.0042569659442724455, "grad_norm": 0.5024965405464172, "learning_rate": 1.3461538461538462e-05, "loss": 0.1565, "step": 22 }, { "epoch": 0.00445046439628483, "grad_norm": 0.464155375957489, "learning_rate": 1.4102564102564104e-05, "loss": 0.1412, "step": 23 }, { "epoch": 0.0046439628482972135, "grad_norm": 0.4555774927139282, "learning_rate": 1.4743589743589745e-05, "loss": 0.133, "step": 24 }, { "epoch": 0.004837461300309598, "grad_norm": 0.5358014106750488, "learning_rate": 1.5384615384615387e-05, "loss": 0.129, "step": 25 }, { "epoch": 0.005030959752321981, "grad_norm": 0.546299397945404, "learning_rate": 1.602564102564103e-05, "loss": 0.13, "step": 26 }, { "epoch": 0.005224458204334365, "grad_norm": 0.5411003232002258, "learning_rate": 1.6666666666666667e-05, "loss": 0.1216, "step": 27 }, { "epoch": 0.005417956656346749, "grad_norm": 0.5039526224136353, "learning_rate": 1.730769230769231e-05, "loss": 0.125, "step": 28 }, { "epoch": 0.005611455108359133, "grad_norm": 0.44415056705474854, "learning_rate": 1.794871794871795e-05, "loss": 0.1044, "step": 29 }, { "epoch": 0.005804953560371517, "grad_norm": 0.43691906332969666, "learning_rate": 1.858974358974359e-05, "loss": 0.1114, "step": 30 }, { "epoch": 0.005998452012383901, "grad_norm": 0.45508405566215515, "learning_rate": 1.923076923076923e-05, "loss": 0.1137, "step": 31 }, { "epoch": 0.006191950464396285, "grad_norm": 0.43553102016448975, "learning_rate": 1.987179487179487e-05, "loss": 0.1042, "step": 32 }, { "epoch": 0.006385448916408669, "grad_norm": 0.4308745861053467, "learning_rate": 2.0512820512820512e-05, "loss": 0.0971, "step": 33 }, { "epoch": 0.006578947368421052, "grad_norm": 0.4405583143234253, "learning_rate": 2.1153846153846154e-05, "loss": 0.1001, "step": 34 }, { "epoch": 0.006772445820433437, "grad_norm": 0.43282443284988403, "learning_rate": 2.1794871794871795e-05, "loss": 0.0902, "step": 35 }, { "epoch": 0.00696594427244582, "grad_norm": 0.4534406363964081, "learning_rate": 2.2435897435897437e-05, "loss": 0.093, "step": 36 }, { "epoch": 0.007159442724458205, "grad_norm": 0.4205000400543213, "learning_rate": 2.307692307692308e-05, "loss": 0.0834, "step": 37 }, { "epoch": 0.007352941176470588, "grad_norm": 0.40933045744895935, "learning_rate": 2.3717948717948718e-05, "loss": 0.0776, "step": 38 }, { "epoch": 0.0075464396284829725, "grad_norm": 0.3861753046512604, "learning_rate": 2.435897435897436e-05, "loss": 0.0746, "step": 39 }, { "epoch": 0.007739938080495356, "grad_norm": 0.3801393508911133, "learning_rate": 2.5e-05, "loss": 0.0661, "step": 40 }, { "epoch": 0.00793343653250774, "grad_norm": 0.3467535972595215, "learning_rate": 2.564102564102564e-05, "loss": 0.0598, "step": 41 }, { "epoch": 0.008126934984520124, "grad_norm": 0.26516029238700867, "learning_rate": 2.6282051282051285e-05, "loss": 0.0633, "step": 42 }, { "epoch": 0.008320433436532508, "grad_norm": 0.26151126623153687, "learning_rate": 2.6923076923076923e-05, "loss": 0.0536, "step": 43 }, { "epoch": 0.008513931888544891, "grad_norm": 0.21448847651481628, "learning_rate": 2.756410256410257e-05, "loss": 0.0577, "step": 44 }, { "epoch": 0.008707430340557275, "grad_norm": 0.1568843424320221, "learning_rate": 2.8205128205128207e-05, "loss": 0.0469, "step": 45 }, { "epoch": 0.00890092879256966, "grad_norm": 0.11201606690883636, "learning_rate": 2.8846153846153845e-05, "loss": 0.0462, "step": 46 }, { "epoch": 0.009094427244582043, "grad_norm": 0.16873207688331604, "learning_rate": 2.948717948717949e-05, "loss": 0.0491, "step": 47 }, { "epoch": 0.009287925696594427, "grad_norm": 0.27713286876678467, "learning_rate": 3.012820512820513e-05, "loss": 0.0468, "step": 48 }, { "epoch": 0.009481424148606811, "grad_norm": 0.3300670385360718, "learning_rate": 3.0769230769230774e-05, "loss": 0.0522, "step": 49 }, { "epoch": 0.009674922600619196, "grad_norm": 0.3742794692516327, "learning_rate": 3.141025641025641e-05, "loss": 0.0474, "step": 50 }, { "epoch": 0.009868421052631578, "grad_norm": 0.45741474628448486, "learning_rate": 3.205128205128206e-05, "loss": 0.0557, "step": 51 }, { "epoch": 0.010061919504643963, "grad_norm": 0.43496593832969666, "learning_rate": 3.269230769230769e-05, "loss": 0.0517, "step": 52 }, { "epoch": 0.010255417956656347, "grad_norm": 0.42359137535095215, "learning_rate": 3.3333333333333335e-05, "loss": 0.0558, "step": 53 }, { "epoch": 0.01044891640866873, "grad_norm": 0.28517210483551025, "learning_rate": 3.397435897435898e-05, "loss": 0.0522, "step": 54 }, { "epoch": 0.010642414860681114, "grad_norm": 0.16828086972236633, "learning_rate": 3.461538461538462e-05, "loss": 0.0433, "step": 55 }, { "epoch": 0.010835913312693499, "grad_norm": 0.15381716191768646, "learning_rate": 3.525641025641026e-05, "loss": 0.0449, "step": 56 }, { "epoch": 0.011029411764705883, "grad_norm": 0.20227043330669403, "learning_rate": 3.58974358974359e-05, "loss": 0.0396, "step": 57 }, { "epoch": 0.011222910216718266, "grad_norm": 0.20381596684455872, "learning_rate": 3.653846153846154e-05, "loss": 0.0416, "step": 58 }, { "epoch": 0.01141640866873065, "grad_norm": 0.17562046647071838, "learning_rate": 3.717948717948718e-05, "loss": 0.0496, "step": 59 }, { "epoch": 0.011609907120743035, "grad_norm": 0.21138831973075867, "learning_rate": 3.782051282051282e-05, "loss": 0.043, "step": 60 }, { "epoch": 0.011803405572755417, "grad_norm": 0.23214790225028992, "learning_rate": 3.846153846153846e-05, "loss": 0.0542, "step": 61 }, { "epoch": 0.011996904024767802, "grad_norm": 0.199771910905838, "learning_rate": 3.9102564102564105e-05, "loss": 0.0399, "step": 62 }, { "epoch": 0.012190402476780186, "grad_norm": 0.20005881786346436, "learning_rate": 3.974358974358974e-05, "loss": 0.0434, "step": 63 }, { "epoch": 0.01238390092879257, "grad_norm": 0.287185400724411, "learning_rate": 4.038461538461539e-05, "loss": 0.0484, "step": 64 }, { "epoch": 0.012577399380804953, "grad_norm": 0.2539328634738922, "learning_rate": 4.1025641025641023e-05, "loss": 0.0503, "step": 65 }, { "epoch": 0.012770897832817337, "grad_norm": 0.16829143464565277, "learning_rate": 4.166666666666667e-05, "loss": 0.0361, "step": 66 }, { "epoch": 0.012964396284829722, "grad_norm": 0.16546620428562164, "learning_rate": 4.230769230769231e-05, "loss": 0.0401, "step": 67 }, { "epoch": 0.013157894736842105, "grad_norm": 0.17139483988285065, "learning_rate": 4.294871794871795e-05, "loss": 0.0457, "step": 68 }, { "epoch": 0.013351393188854489, "grad_norm": 0.1976938247680664, "learning_rate": 4.358974358974359e-05, "loss": 0.0487, "step": 69 }, { "epoch": 0.013544891640866873, "grad_norm": 0.12015397846698761, "learning_rate": 4.423076923076923e-05, "loss": 0.0475, "step": 70 }, { "epoch": 0.013738390092879258, "grad_norm": 0.19430142641067505, "learning_rate": 4.4871794871794874e-05, "loss": 0.0382, "step": 71 }, { "epoch": 0.01393188854489164, "grad_norm": 0.1377352923154831, "learning_rate": 4.5512820512820516e-05, "loss": 0.0364, "step": 72 }, { "epoch": 0.014125386996904025, "grad_norm": 0.14669062197208405, "learning_rate": 4.615384615384616e-05, "loss": 0.0388, "step": 73 }, { "epoch": 0.01431888544891641, "grad_norm": 0.16174161434173584, "learning_rate": 4.67948717948718e-05, "loss": 0.0337, "step": 74 }, { "epoch": 0.014512383900928792, "grad_norm": 0.1960008442401886, "learning_rate": 4.7435897435897435e-05, "loss": 0.0341, "step": 75 }, { "epoch": 0.014705882352941176, "grad_norm": 0.10589081048965454, "learning_rate": 4.8076923076923084e-05, "loss": 0.0349, "step": 76 }, { "epoch": 0.01489938080495356, "grad_norm": 0.12014921754598618, "learning_rate": 4.871794871794872e-05, "loss": 0.0289, "step": 77 }, { "epoch": 0.015092879256965945, "grad_norm": 0.24903666973114014, "learning_rate": 4.935897435897436e-05, "loss": 0.0391, "step": 78 }, { "epoch": 0.015286377708978328, "grad_norm": 0.22562551498413086, "learning_rate": 5e-05, "loss": 0.0446, "step": 79 }, { "epoch": 0.015479876160990712, "grad_norm": 0.11937607824802399, "learning_rate": 5.0641025641025644e-05, "loss": 0.0326, "step": 80 }, { "epoch": 0.015673374613003097, "grad_norm": 0.14980317652225494, "learning_rate": 5.128205128205128e-05, "loss": 0.0345, "step": 81 }, { "epoch": 0.01586687306501548, "grad_norm": 0.13236387073993683, "learning_rate": 5.192307692307693e-05, "loss": 0.0362, "step": 82 }, { "epoch": 0.016060371517027865, "grad_norm": 0.13055358827114105, "learning_rate": 5.256410256410257e-05, "loss": 0.0327, "step": 83 }, { "epoch": 0.016253869969040248, "grad_norm": 0.12051844596862793, "learning_rate": 5.3205128205128205e-05, "loss": 0.0339, "step": 84 }, { "epoch": 0.01644736842105263, "grad_norm": 0.10130244493484497, "learning_rate": 5.384615384615385e-05, "loss": 0.0309, "step": 85 }, { "epoch": 0.016640866873065017, "grad_norm": 0.08976416289806366, "learning_rate": 5.448717948717948e-05, "loss": 0.0336, "step": 86 }, { "epoch": 0.0168343653250774, "grad_norm": 0.11117298156023026, "learning_rate": 5.512820512820514e-05, "loss": 0.0285, "step": 87 }, { "epoch": 0.017027863777089782, "grad_norm": 0.10062684118747711, "learning_rate": 5.576923076923077e-05, "loss": 0.0302, "step": 88 }, { "epoch": 0.01722136222910217, "grad_norm": 0.1313498169183731, "learning_rate": 5.6410256410256414e-05, "loss": 0.0316, "step": 89 }, { "epoch": 0.01741486068111455, "grad_norm": 0.09688585996627808, "learning_rate": 5.705128205128205e-05, "loss": 0.0334, "step": 90 }, { "epoch": 0.017608359133126934, "grad_norm": 0.11499274522066116, "learning_rate": 5.769230769230769e-05, "loss": 0.0302, "step": 91 }, { "epoch": 0.01780185758513932, "grad_norm": 0.08817349374294281, "learning_rate": 5.833333333333334e-05, "loss": 0.0303, "step": 92 }, { "epoch": 0.017995356037151702, "grad_norm": 0.07077126950025558, "learning_rate": 5.897435897435898e-05, "loss": 0.0279, "step": 93 }, { "epoch": 0.018188854489164085, "grad_norm": 0.11623702943325043, "learning_rate": 5.9615384615384616e-05, "loss": 0.0357, "step": 94 }, { "epoch": 0.01838235294117647, "grad_norm": 0.07012739777565002, "learning_rate": 6.025641025641026e-05, "loss": 0.0262, "step": 95 }, { "epoch": 0.018575851393188854, "grad_norm": 0.20371699333190918, "learning_rate": 6.089743589743589e-05, "loss": 0.0292, "step": 96 }, { "epoch": 0.01876934984520124, "grad_norm": 0.09902871400117874, "learning_rate": 6.153846153846155e-05, "loss": 0.0326, "step": 97 }, { "epoch": 0.018962848297213623, "grad_norm": 0.15896765887737274, "learning_rate": 6.217948717948718e-05, "loss": 0.036, "step": 98 }, { "epoch": 0.019156346749226005, "grad_norm": 0.26603397727012634, "learning_rate": 6.282051282051282e-05, "loss": 0.0297, "step": 99 }, { "epoch": 0.01934984520123839, "grad_norm": 0.2015334963798523, "learning_rate": 6.346153846153847e-05, "loss": 0.0317, "step": 100 }, { "epoch": 0.019543343653250774, "grad_norm": 0.1830584555864334, "learning_rate": 6.410256410256412e-05, "loss": 0.0299, "step": 101 }, { "epoch": 0.019736842105263157, "grad_norm": 0.147419273853302, "learning_rate": 6.474358974358975e-05, "loss": 0.0283, "step": 102 }, { "epoch": 0.019930340557275543, "grad_norm": 0.11239234358072281, "learning_rate": 6.538461538461539e-05, "loss": 0.0285, "step": 103 }, { "epoch": 0.020123839009287926, "grad_norm": 0.13886557519435883, "learning_rate": 6.602564102564102e-05, "loss": 0.0272, "step": 104 }, { "epoch": 0.02031733746130031, "grad_norm": 0.30493736267089844, "learning_rate": 6.666666666666667e-05, "loss": 0.0266, "step": 105 }, { "epoch": 0.020510835913312694, "grad_norm": 0.12020603567361832, "learning_rate": 6.730769230769232e-05, "loss": 0.0244, "step": 106 }, { "epoch": 0.020704334365325077, "grad_norm": 0.1808277815580368, "learning_rate": 6.794871794871795e-05, "loss": 0.0252, "step": 107 }, { "epoch": 0.02089783281733746, "grad_norm": 0.166920468211174, "learning_rate": 6.858974358974359e-05, "loss": 0.0277, "step": 108 }, { "epoch": 0.021091331269349846, "grad_norm": 0.15229041874408722, "learning_rate": 6.923076923076924e-05, "loss": 0.0266, "step": 109 }, { "epoch": 0.02128482972136223, "grad_norm": 0.14977368712425232, "learning_rate": 6.987179487179487e-05, "loss": 0.0277, "step": 110 }, { "epoch": 0.021478328173374615, "grad_norm": 0.1519925594329834, "learning_rate": 7.051282051282052e-05, "loss": 0.0232, "step": 111 }, { "epoch": 0.021671826625386997, "grad_norm": 0.1823054403066635, "learning_rate": 7.115384615384616e-05, "loss": 0.0265, "step": 112 }, { "epoch": 0.02186532507739938, "grad_norm": 0.16931822896003723, "learning_rate": 7.17948717948718e-05, "loss": 0.0252, "step": 113 }, { "epoch": 0.022058823529411766, "grad_norm": 0.1092604398727417, "learning_rate": 7.243589743589744e-05, "loss": 0.0221, "step": 114 }, { "epoch": 0.02225232198142415, "grad_norm": 0.09190355986356735, "learning_rate": 7.307692307692307e-05, "loss": 0.0233, "step": 115 }, { "epoch": 0.02244582043343653, "grad_norm": 0.22106137871742249, "learning_rate": 7.371794871794872e-05, "loss": 0.0274, "step": 116 }, { "epoch": 0.022639318885448918, "grad_norm": 0.18441008031368256, "learning_rate": 7.435897435897436e-05, "loss": 0.0274, "step": 117 }, { "epoch": 0.0228328173374613, "grad_norm": 0.3231974244117737, "learning_rate": 7.500000000000001e-05, "loss": 0.0347, "step": 118 }, { "epoch": 0.023026315789473683, "grad_norm": 0.16101565957069397, "learning_rate": 7.564102564102564e-05, "loss": 0.0257, "step": 119 }, { "epoch": 0.02321981424148607, "grad_norm": 0.17983663082122803, "learning_rate": 7.628205128205128e-05, "loss": 0.0275, "step": 120 }, { "epoch": 0.02341331269349845, "grad_norm": 0.23781165480613708, "learning_rate": 7.692307692307693e-05, "loss": 0.0261, "step": 121 }, { "epoch": 0.023606811145510834, "grad_norm": 0.329618364572525, "learning_rate": 7.756410256410257e-05, "loss": 0.0283, "step": 122 }, { "epoch": 0.02380030959752322, "grad_norm": 0.33367475867271423, "learning_rate": 7.820512820512821e-05, "loss": 0.0263, "step": 123 }, { "epoch": 0.023993808049535603, "grad_norm": 0.12840025126934052, "learning_rate": 7.884615384615384e-05, "loss": 0.0221, "step": 124 }, { "epoch": 0.02418730650154799, "grad_norm": 0.31646615266799927, "learning_rate": 7.948717948717948e-05, "loss": 0.0219, "step": 125 }, { "epoch": 0.024380804953560372, "grad_norm": 0.2454126924276352, "learning_rate": 8.012820512820514e-05, "loss": 0.0249, "step": 126 }, { "epoch": 0.024574303405572755, "grad_norm": 0.17925415933132172, "learning_rate": 8.076923076923078e-05, "loss": 0.0232, "step": 127 }, { "epoch": 0.02476780185758514, "grad_norm": 0.35357165336608887, "learning_rate": 8.141025641025641e-05, "loss": 0.0204, "step": 128 }, { "epoch": 0.024961300309597523, "grad_norm": 0.16300369799137115, "learning_rate": 8.205128205128205e-05, "loss": 0.0214, "step": 129 }, { "epoch": 0.025154798761609906, "grad_norm": 0.3139899969100952, "learning_rate": 8.26923076923077e-05, "loss": 0.0251, "step": 130 }, { "epoch": 0.025348297213622292, "grad_norm": 0.15925544500350952, "learning_rate": 8.333333333333334e-05, "loss": 0.0203, "step": 131 }, { "epoch": 0.025541795665634675, "grad_norm": 0.19722296297550201, "learning_rate": 8.397435897435898e-05, "loss": 0.0239, "step": 132 }, { "epoch": 0.025735294117647058, "grad_norm": 0.3536292314529419, "learning_rate": 8.461538461538461e-05, "loss": 0.0261, "step": 133 }, { "epoch": 0.025928792569659444, "grad_norm": 0.22256964445114136, "learning_rate": 8.525641025641026e-05, "loss": 0.0277, "step": 134 }, { "epoch": 0.026122291021671826, "grad_norm": 0.2683435082435608, "learning_rate": 8.58974358974359e-05, "loss": 0.0244, "step": 135 }, { "epoch": 0.02631578947368421, "grad_norm": 0.19496773183345795, "learning_rate": 8.653846153846155e-05, "loss": 0.0213, "step": 136 }, { "epoch": 0.026509287925696595, "grad_norm": 0.23761513829231262, "learning_rate": 8.717948717948718e-05, "loss": 0.0231, "step": 137 }, { "epoch": 0.026702786377708978, "grad_norm": 0.2586689293384552, "learning_rate": 8.782051282051283e-05, "loss": 0.0242, "step": 138 }, { "epoch": 0.02689628482972136, "grad_norm": 0.21141645312309265, "learning_rate": 8.846153846153847e-05, "loss": 0.0251, "step": 139 }, { "epoch": 0.027089783281733747, "grad_norm": 0.31300607323646545, "learning_rate": 8.910256410256411e-05, "loss": 0.0235, "step": 140 }, { "epoch": 0.02728328173374613, "grad_norm": 0.1064833402633667, "learning_rate": 8.974358974358975e-05, "loss": 0.0256, "step": 141 }, { "epoch": 0.027476780185758515, "grad_norm": 0.2576051950454712, "learning_rate": 9.038461538461538e-05, "loss": 0.021, "step": 142 }, { "epoch": 0.027670278637770898, "grad_norm": 0.20473606884479523, "learning_rate": 9.102564102564103e-05, "loss": 0.0275, "step": 143 }, { "epoch": 0.02786377708978328, "grad_norm": 0.13783115148544312, "learning_rate": 9.166666666666667e-05, "loss": 0.0262, "step": 144 }, { "epoch": 0.028057275541795667, "grad_norm": 0.2589997947216034, "learning_rate": 9.230769230769232e-05, "loss": 0.026, "step": 145 }, { "epoch": 0.02825077399380805, "grad_norm": 0.16393916308879852, "learning_rate": 9.294871794871795e-05, "loss": 0.0243, "step": 146 }, { "epoch": 0.028444272445820432, "grad_norm": 0.26873132586479187, "learning_rate": 9.35897435897436e-05, "loss": 0.025, "step": 147 }, { "epoch": 0.02863777089783282, "grad_norm": 0.19376811385154724, "learning_rate": 9.423076923076924e-05, "loss": 0.024, "step": 148 }, { "epoch": 0.0288312693498452, "grad_norm": 0.33840295672416687, "learning_rate": 9.487179487179487e-05, "loss": 0.0229, "step": 149 }, { "epoch": 0.029024767801857584, "grad_norm": 0.19426432251930237, "learning_rate": 9.551282051282052e-05, "loss": 0.0242, "step": 150 }, { "epoch": 0.02921826625386997, "grad_norm": 0.24482667446136475, "learning_rate": 9.615384615384617e-05, "loss": 0.0212, "step": 151 }, { "epoch": 0.029411764705882353, "grad_norm": 0.2675112783908844, "learning_rate": 9.67948717948718e-05, "loss": 0.0239, "step": 152 }, { "epoch": 0.029605263157894735, "grad_norm": 0.2703535556793213, "learning_rate": 9.743589743589744e-05, "loss": 0.0244, "step": 153 }, { "epoch": 0.02979876160990712, "grad_norm": 0.20602045953273773, "learning_rate": 9.807692307692307e-05, "loss": 0.0219, "step": 154 }, { "epoch": 0.029992260061919504, "grad_norm": 0.19870364665985107, "learning_rate": 9.871794871794872e-05, "loss": 0.0212, "step": 155 }, { "epoch": 0.03018575851393189, "grad_norm": 0.2229195386171341, "learning_rate": 9.935897435897437e-05, "loss": 0.0245, "step": 156 }, { "epoch": 0.030379256965944273, "grad_norm": 0.12221026420593262, "learning_rate": 0.0001, "loss": 0.0237, "step": 157 }, { "epoch": 0.030572755417956655, "grad_norm": 0.3316829204559326, "learning_rate": 9.999999991635055e-05, "loss": 0.0246, "step": 158 }, { "epoch": 0.03076625386996904, "grad_norm": 0.1483875960111618, "learning_rate": 9.999999966540218e-05, "loss": 0.0232, "step": 159 }, { "epoch": 0.030959752321981424, "grad_norm": 0.25672823190689087, "learning_rate": 9.99999992471549e-05, "loss": 0.0243, "step": 160 }, { "epoch": 0.031153250773993807, "grad_norm": 0.19750350713729858, "learning_rate": 9.999999866160872e-05, "loss": 0.0226, "step": 161 }, { "epoch": 0.03134674922600619, "grad_norm": 0.180254265666008, "learning_rate": 9.999999790876363e-05, "loss": 0.0241, "step": 162 }, { "epoch": 0.03154024767801858, "grad_norm": 0.14186570048332214, "learning_rate": 9.999999698861962e-05, "loss": 0.0197, "step": 163 }, { "epoch": 0.03173374613003096, "grad_norm": 0.23751454055309296, "learning_rate": 9.999999590117673e-05, "loss": 0.0248, "step": 164 }, { "epoch": 0.031927244582043345, "grad_norm": 0.18784686923027039, "learning_rate": 9.999999464643493e-05, "loss": 0.0216, "step": 165 }, { "epoch": 0.03212074303405573, "grad_norm": 0.1422816812992096, "learning_rate": 9.999999322439425e-05, "loss": 0.0239, "step": 166 }, { "epoch": 0.03231424148606811, "grad_norm": 0.1311737596988678, "learning_rate": 9.999999163505467e-05, "loss": 0.0226, "step": 167 }, { "epoch": 0.032507739938080496, "grad_norm": 0.12001163512468338, "learning_rate": 9.999998987841622e-05, "loss": 0.0188, "step": 168 }, { "epoch": 0.03270123839009288, "grad_norm": 0.12929031252861023, "learning_rate": 9.999998795447889e-05, "loss": 0.0227, "step": 169 }, { "epoch": 0.03289473684210526, "grad_norm": 0.18261918425559998, "learning_rate": 9.99999858632427e-05, "loss": 0.0208, "step": 170 }, { "epoch": 0.03308823529411765, "grad_norm": 0.09928259998559952, "learning_rate": 9.999998360470764e-05, "loss": 0.0184, "step": 171 }, { "epoch": 0.033281733746130034, "grad_norm": 0.15650048851966858, "learning_rate": 9.999998117887375e-05, "loss": 0.0232, "step": 172 }, { "epoch": 0.03347523219814241, "grad_norm": 0.19204051792621613, "learning_rate": 9.9999978585741e-05, "loss": 0.0195, "step": 173 }, { "epoch": 0.0336687306501548, "grad_norm": 0.15069417655467987, "learning_rate": 9.999997582530941e-05, "loss": 0.0229, "step": 174 }, { "epoch": 0.033862229102167185, "grad_norm": 0.1807214468717575, "learning_rate": 9.999997289757902e-05, "loss": 0.0159, "step": 175 }, { "epoch": 0.034055727554179564, "grad_norm": 0.12691578269004822, "learning_rate": 9.99999698025498e-05, "loss": 0.0158, "step": 176 }, { "epoch": 0.03424922600619195, "grad_norm": 0.13076402246952057, "learning_rate": 9.99999665402218e-05, "loss": 0.0191, "step": 177 }, { "epoch": 0.03444272445820434, "grad_norm": 0.20631548762321472, "learning_rate": 9.9999963110595e-05, "loss": 0.0216, "step": 178 }, { "epoch": 0.034636222910216716, "grad_norm": 0.1650087535381317, "learning_rate": 9.999995951366942e-05, "loss": 0.02, "step": 179 }, { "epoch": 0.0348297213622291, "grad_norm": 0.18794631958007812, "learning_rate": 9.999995574944508e-05, "loss": 0.0223, "step": 180 }, { "epoch": 0.03502321981424149, "grad_norm": 0.0963270366191864, "learning_rate": 9.999995181792201e-05, "loss": 0.022, "step": 181 }, { "epoch": 0.03521671826625387, "grad_norm": 0.16872307658195496, "learning_rate": 9.99999477191002e-05, "loss": 0.0232, "step": 182 }, { "epoch": 0.03541021671826625, "grad_norm": 0.11275999248027802, "learning_rate": 9.999994345297966e-05, "loss": 0.0245, "step": 183 }, { "epoch": 0.03560371517027864, "grad_norm": 0.0911858007311821, "learning_rate": 9.999993901956043e-05, "loss": 0.0207, "step": 184 }, { "epoch": 0.03579721362229102, "grad_norm": 0.11704543232917786, "learning_rate": 9.999993441884252e-05, "loss": 0.0221, "step": 185 }, { "epoch": 0.035990712074303405, "grad_norm": 0.05968195199966431, "learning_rate": 9.999992965082593e-05, "loss": 0.0214, "step": 186 }, { "epoch": 0.03618421052631579, "grad_norm": 0.10899179428815842, "learning_rate": 9.99999247155107e-05, "loss": 0.023, "step": 187 }, { "epoch": 0.03637770897832817, "grad_norm": 0.1377984583377838, "learning_rate": 9.999991961289684e-05, "loss": 0.0209, "step": 188 }, { "epoch": 0.036571207430340556, "grad_norm": 0.10770270973443985, "learning_rate": 9.999991434298434e-05, "loss": 0.0199, "step": 189 }, { "epoch": 0.03676470588235294, "grad_norm": 0.08594047278165817, "learning_rate": 9.999990890577328e-05, "loss": 0.0209, "step": 190 }, { "epoch": 0.03695820433436533, "grad_norm": 0.1528596729040146, "learning_rate": 9.999990330126362e-05, "loss": 0.0201, "step": 191 }, { "epoch": 0.03715170278637771, "grad_norm": 0.1300247758626938, "learning_rate": 9.999989752945544e-05, "loss": 0.0211, "step": 192 }, { "epoch": 0.037345201238390094, "grad_norm": 0.10507983714342117, "learning_rate": 9.99998915903487e-05, "loss": 0.0196, "step": 193 }, { "epoch": 0.03753869969040248, "grad_norm": 0.16060388088226318, "learning_rate": 9.999988548394347e-05, "loss": 0.0187, "step": 194 }, { "epoch": 0.03773219814241486, "grad_norm": 0.10991954058408737, "learning_rate": 9.999987921023973e-05, "loss": 0.0215, "step": 195 }, { "epoch": 0.037925696594427245, "grad_norm": 0.14021772146224976, "learning_rate": 9.999987276923754e-05, "loss": 0.0205, "step": 196 }, { "epoch": 0.03811919504643963, "grad_norm": 0.15346795320510864, "learning_rate": 9.999986616093692e-05, "loss": 0.0194, "step": 197 }, { "epoch": 0.03831269349845201, "grad_norm": 0.20555925369262695, "learning_rate": 9.999985938533788e-05, "loss": 0.0219, "step": 198 }, { "epoch": 0.0385061919504644, "grad_norm": 0.07007822394371033, "learning_rate": 9.999985244244045e-05, "loss": 0.0199, "step": 199 }, { "epoch": 0.03869969040247678, "grad_norm": 0.11274555325508118, "learning_rate": 9.999984533224466e-05, "loss": 0.0171, "step": 200 }, { "epoch": 0.03889318885448916, "grad_norm": 0.10898193717002869, "learning_rate": 9.999983805475054e-05, "loss": 0.0243, "step": 201 }, { "epoch": 0.03908668730650155, "grad_norm": 0.12214785814285278, "learning_rate": 9.99998306099581e-05, "loss": 0.0194, "step": 202 }, { "epoch": 0.039280185758513934, "grad_norm": 0.16709306836128235, "learning_rate": 9.999982299786738e-05, "loss": 0.0191, "step": 203 }, { "epoch": 0.039473684210526314, "grad_norm": 0.2136012464761734, "learning_rate": 9.99998152184784e-05, "loss": 0.0218, "step": 204 }, { "epoch": 0.0396671826625387, "grad_norm": 0.1682896912097931, "learning_rate": 9.99998072717912e-05, "loss": 0.0196, "step": 205 }, { "epoch": 0.039860681114551086, "grad_norm": 0.11917117983102798, "learning_rate": 9.999979915780581e-05, "loss": 0.021, "step": 206 }, { "epoch": 0.040054179566563465, "grad_norm": 0.2289707213640213, "learning_rate": 9.999979087652225e-05, "loss": 0.0195, "step": 207 }, { "epoch": 0.04024767801857585, "grad_norm": 0.13538353145122528, "learning_rate": 9.999978242794056e-05, "loss": 0.0209, "step": 208 }, { "epoch": 0.04044117647058824, "grad_norm": 0.154699444770813, "learning_rate": 9.999977381206078e-05, "loss": 0.0173, "step": 209 }, { "epoch": 0.04063467492260062, "grad_norm": 0.2712431848049164, "learning_rate": 9.999976502888288e-05, "loss": 0.0205, "step": 210 }, { "epoch": 0.040828173374613, "grad_norm": 0.21103635430335999, "learning_rate": 9.999975607840698e-05, "loss": 0.0213, "step": 211 }, { "epoch": 0.04102167182662539, "grad_norm": 0.2531260848045349, "learning_rate": 9.999974696063309e-05, "loss": 0.0184, "step": 212 }, { "epoch": 0.04121517027863777, "grad_norm": 0.13093134760856628, "learning_rate": 9.99997376755612e-05, "loss": 0.0194, "step": 213 }, { "epoch": 0.041408668730650154, "grad_norm": 0.21336086094379425, "learning_rate": 9.999972822319139e-05, "loss": 0.0231, "step": 214 }, { "epoch": 0.04160216718266254, "grad_norm": 0.1295040100812912, "learning_rate": 9.999971860352367e-05, "loss": 0.0161, "step": 215 }, { "epoch": 0.04179566563467492, "grad_norm": 0.18227821588516235, "learning_rate": 9.999970881655808e-05, "loss": 0.022, "step": 216 }, { "epoch": 0.041989164086687306, "grad_norm": 0.2231021672487259, "learning_rate": 9.999969886229467e-05, "loss": 0.0195, "step": 217 }, { "epoch": 0.04218266253869969, "grad_norm": 0.0781494602560997, "learning_rate": 9.999968874073346e-05, "loss": 0.0212, "step": 218 }, { "epoch": 0.04237616099071207, "grad_norm": 0.2531234920024872, "learning_rate": 9.99996784518745e-05, "loss": 0.018, "step": 219 }, { "epoch": 0.04256965944272446, "grad_norm": 0.12329356372356415, "learning_rate": 9.999966799571781e-05, "loss": 0.0197, "step": 220 }, { "epoch": 0.04276315789473684, "grad_norm": 0.20246906578540802, "learning_rate": 9.999965737226347e-05, "loss": 0.0202, "step": 221 }, { "epoch": 0.04295665634674923, "grad_norm": 0.23997709155082703, "learning_rate": 9.999964658151148e-05, "loss": 0.0202, "step": 222 }, { "epoch": 0.04315015479876161, "grad_norm": 0.12832611799240112, "learning_rate": 9.999963562346188e-05, "loss": 0.0191, "step": 223 }, { "epoch": 0.043343653250773995, "grad_norm": 0.2855100929737091, "learning_rate": 9.999962449811474e-05, "loss": 0.0209, "step": 224 }, { "epoch": 0.04353715170278638, "grad_norm": 0.12032892554998398, "learning_rate": 9.999961320547007e-05, "loss": 0.0213, "step": 225 }, { "epoch": 0.04373065015479876, "grad_norm": 0.24164651334285736, "learning_rate": 9.999960174552795e-05, "loss": 0.0186, "step": 226 }, { "epoch": 0.043924148606811146, "grad_norm": 0.13403773307800293, "learning_rate": 9.999959011828837e-05, "loss": 0.0218, "step": 227 }, { "epoch": 0.04411764705882353, "grad_norm": 0.15854805707931519, "learning_rate": 9.999957832375143e-05, "loss": 0.0187, "step": 228 }, { "epoch": 0.04431114551083591, "grad_norm": 0.23376494646072388, "learning_rate": 9.999956636191712e-05, "loss": 0.02, "step": 229 }, { "epoch": 0.0445046439628483, "grad_norm": 0.11891715973615646, "learning_rate": 9.999955423278552e-05, "loss": 0.0201, "step": 230 }, { "epoch": 0.044698142414860684, "grad_norm": 0.11526286602020264, "learning_rate": 9.999954193635666e-05, "loss": 0.0184, "step": 231 }, { "epoch": 0.04489164086687306, "grad_norm": 0.09993506222963333, "learning_rate": 9.999952947263059e-05, "loss": 0.0212, "step": 232 }, { "epoch": 0.04508513931888545, "grad_norm": 0.20855610072612762, "learning_rate": 9.999951684160738e-05, "loss": 0.0206, "step": 233 }, { "epoch": 0.045278637770897835, "grad_norm": 0.08664413541555405, "learning_rate": 9.999950404328705e-05, "loss": 0.0174, "step": 234 }, { "epoch": 0.045472136222910214, "grad_norm": 0.18439531326293945, "learning_rate": 9.999949107766962e-05, "loss": 0.0174, "step": 235 }, { "epoch": 0.0456656346749226, "grad_norm": 0.20406702160835266, "learning_rate": 9.999947794475521e-05, "loss": 0.0188, "step": 236 }, { "epoch": 0.04585913312693499, "grad_norm": 0.12732094526290894, "learning_rate": 9.999946464454379e-05, "loss": 0.018, "step": 237 }, { "epoch": 0.046052631578947366, "grad_norm": 0.13164538145065308, "learning_rate": 9.999945117703549e-05, "loss": 0.0246, "step": 238 }, { "epoch": 0.04624613003095975, "grad_norm": 0.18088209629058838, "learning_rate": 9.999943754223029e-05, "loss": 0.0149, "step": 239 }, { "epoch": 0.04643962848297214, "grad_norm": 0.10136450827121735, "learning_rate": 9.999942374012829e-05, "loss": 0.0182, "step": 240 }, { "epoch": 0.04663312693498452, "grad_norm": 0.1904931366443634, "learning_rate": 9.99994097707295e-05, "loss": 0.0213, "step": 241 }, { "epoch": 0.0468266253869969, "grad_norm": 0.13400712609291077, "learning_rate": 9.9999395634034e-05, "loss": 0.0166, "step": 242 }, { "epoch": 0.04702012383900929, "grad_norm": 0.14795778691768646, "learning_rate": 9.999938133004185e-05, "loss": 0.0176, "step": 243 }, { "epoch": 0.04721362229102167, "grad_norm": 0.15623292326927185, "learning_rate": 9.999936685875307e-05, "loss": 0.018, "step": 244 }, { "epoch": 0.047407120743034055, "grad_norm": 0.13897624611854553, "learning_rate": 9.999935222016773e-05, "loss": 0.0219, "step": 245 }, { "epoch": 0.04760061919504644, "grad_norm": 0.1810641586780548, "learning_rate": 9.999933741428589e-05, "loss": 0.0197, "step": 246 }, { "epoch": 0.04779411764705882, "grad_norm": 0.20145584642887115, "learning_rate": 9.999932244110762e-05, "loss": 0.0194, "step": 247 }, { "epoch": 0.047987616099071206, "grad_norm": 0.3018135726451874, "learning_rate": 9.999930730063294e-05, "loss": 0.0195, "step": 248 }, { "epoch": 0.04818111455108359, "grad_norm": 0.13875623047351837, "learning_rate": 9.999929199286193e-05, "loss": 0.0191, "step": 249 }, { "epoch": 0.04837461300309598, "grad_norm": 0.34689363837242126, "learning_rate": 9.999927651779461e-05, "loss": 0.0165, "step": 250 }, { "epoch": 0.04856811145510836, "grad_norm": 0.170144185423851, "learning_rate": 9.999926087543111e-05, "loss": 0.0186, "step": 251 }, { "epoch": 0.048761609907120744, "grad_norm": 0.24389487504959106, "learning_rate": 9.999924506577143e-05, "loss": 0.0222, "step": 252 }, { "epoch": 0.04895510835913313, "grad_norm": 0.2194085270166397, "learning_rate": 9.999922908881566e-05, "loss": 0.0181, "step": 253 }, { "epoch": 0.04914860681114551, "grad_norm": 0.219267338514328, "learning_rate": 9.999921294456383e-05, "loss": 0.0192, "step": 254 }, { "epoch": 0.049342105263157895, "grad_norm": 0.212637796998024, "learning_rate": 9.999919663301601e-05, "loss": 0.0202, "step": 255 }, { "epoch": 0.04953560371517028, "grad_norm": 0.2412109225988388, "learning_rate": 9.999918015417225e-05, "loss": 0.0171, "step": 256 }, { "epoch": 0.04972910216718266, "grad_norm": 0.1868411749601364, "learning_rate": 9.999916350803265e-05, "loss": 0.0198, "step": 257 }, { "epoch": 0.04992260061919505, "grad_norm": 0.28613561391830444, "learning_rate": 9.999914669459724e-05, "loss": 0.0184, "step": 258 }, { "epoch": 0.05011609907120743, "grad_norm": 0.1899171769618988, "learning_rate": 9.99991297138661e-05, "loss": 0.0181, "step": 259 }, { "epoch": 0.05030959752321981, "grad_norm": 0.21525295078754425, "learning_rate": 9.999911256583929e-05, "loss": 0.0198, "step": 260 }, { "epoch": 0.0505030959752322, "grad_norm": 0.2069910615682602, "learning_rate": 9.999909525051685e-05, "loss": 0.0187, "step": 261 }, { "epoch": 0.050696594427244585, "grad_norm": 0.20525093376636505, "learning_rate": 9.999907776789887e-05, "loss": 0.0193, "step": 262 }, { "epoch": 0.050890092879256964, "grad_norm": 0.16290025413036346, "learning_rate": 9.99990601179854e-05, "loss": 0.0192, "step": 263 }, { "epoch": 0.05108359133126935, "grad_norm": 0.18302415311336517, "learning_rate": 9.999904230077652e-05, "loss": 0.0181, "step": 264 }, { "epoch": 0.051277089783281736, "grad_norm": 0.12674574553966522, "learning_rate": 9.999902431627228e-05, "loss": 0.0185, "step": 265 }, { "epoch": 0.051470588235294115, "grad_norm": 0.20000706613063812, "learning_rate": 9.999900616447277e-05, "loss": 0.0184, "step": 266 }, { "epoch": 0.0516640866873065, "grad_norm": 0.16876403987407684, "learning_rate": 9.999898784537803e-05, "loss": 0.0207, "step": 267 }, { "epoch": 0.05185758513931889, "grad_norm": 0.12450139224529266, "learning_rate": 9.999896935898815e-05, "loss": 0.0184, "step": 268 }, { "epoch": 0.05205108359133127, "grad_norm": 0.19110137224197388, "learning_rate": 9.999895070530318e-05, "loss": 0.0178, "step": 269 }, { "epoch": 0.05224458204334365, "grad_norm": 0.14042600989341736, "learning_rate": 9.99989318843232e-05, "loss": 0.0203, "step": 270 }, { "epoch": 0.05243808049535604, "grad_norm": 0.23919932544231415, "learning_rate": 9.999891289604829e-05, "loss": 0.0169, "step": 271 }, { "epoch": 0.05263157894736842, "grad_norm": 0.12957645952701569, "learning_rate": 9.999889374047849e-05, "loss": 0.0231, "step": 272 }, { "epoch": 0.052825077399380804, "grad_norm": 0.1830633282661438, "learning_rate": 9.999887441761392e-05, "loss": 0.0192, "step": 273 }, { "epoch": 0.05301857585139319, "grad_norm": 0.15940819680690765, "learning_rate": 9.99988549274546e-05, "loss": 0.0182, "step": 274 }, { "epoch": 0.05321207430340557, "grad_norm": 0.14069533348083496, "learning_rate": 9.999883527000064e-05, "loss": 0.0196, "step": 275 }, { "epoch": 0.053405572755417956, "grad_norm": 0.21708036959171295, "learning_rate": 9.99988154452521e-05, "loss": 0.0174, "step": 276 }, { "epoch": 0.05359907120743034, "grad_norm": 0.11652477085590363, "learning_rate": 9.999879545320904e-05, "loss": 0.0193, "step": 277 }, { "epoch": 0.05379256965944272, "grad_norm": 0.13382992148399353, "learning_rate": 9.999877529387154e-05, "loss": 0.0207, "step": 278 }, { "epoch": 0.05398606811145511, "grad_norm": 0.1052941381931305, "learning_rate": 9.99987549672397e-05, "loss": 0.0177, "step": 279 }, { "epoch": 0.05417956656346749, "grad_norm": 0.09551995247602463, "learning_rate": 9.999873447331356e-05, "loss": 0.0183, "step": 280 }, { "epoch": 0.05437306501547988, "grad_norm": 0.09334518760442734, "learning_rate": 9.999871381209323e-05, "loss": 0.0186, "step": 281 }, { "epoch": 0.05456656346749226, "grad_norm": 0.0777226909995079, "learning_rate": 9.999869298357875e-05, "loss": 0.0149, "step": 282 }, { "epoch": 0.054760061919504645, "grad_norm": 0.11641152948141098, "learning_rate": 9.999867198777024e-05, "loss": 0.0186, "step": 283 }, { "epoch": 0.05495356037151703, "grad_norm": 0.11753205209970474, "learning_rate": 9.999865082466775e-05, "loss": 0.021, "step": 284 }, { "epoch": 0.05514705882352941, "grad_norm": 0.13240313529968262, "learning_rate": 9.999862949427135e-05, "loss": 0.0214, "step": 285 }, { "epoch": 0.055340557275541796, "grad_norm": 0.13855411112308502, "learning_rate": 9.999860799658116e-05, "loss": 0.0209, "step": 286 }, { "epoch": 0.05553405572755418, "grad_norm": 0.1563778966665268, "learning_rate": 9.999858633159721e-05, "loss": 0.0151, "step": 287 }, { "epoch": 0.05572755417956656, "grad_norm": 0.10610263794660568, "learning_rate": 9.99985644993196e-05, "loss": 0.0189, "step": 288 }, { "epoch": 0.05592105263157895, "grad_norm": 0.21304042637348175, "learning_rate": 9.999854249974843e-05, "loss": 0.0195, "step": 289 }, { "epoch": 0.056114551083591334, "grad_norm": 0.1943940818309784, "learning_rate": 9.999852033288375e-05, "loss": 0.0192, "step": 290 }, { "epoch": 0.05630804953560371, "grad_norm": 0.17978905141353607, "learning_rate": 9.999849799872569e-05, "loss": 0.0185, "step": 291 }, { "epoch": 0.0565015479876161, "grad_norm": 0.12858256697654724, "learning_rate": 9.999847549727429e-05, "loss": 0.018, "step": 292 }, { "epoch": 0.056695046439628485, "grad_norm": 0.2385629117488861, "learning_rate": 9.999845282852962e-05, "loss": 0.0209, "step": 293 }, { "epoch": 0.056888544891640865, "grad_norm": 0.15830324590206146, "learning_rate": 9.999842999249182e-05, "loss": 0.019, "step": 294 }, { "epoch": 0.05708204334365325, "grad_norm": 0.2585143446922302, "learning_rate": 9.999840698916092e-05, "loss": 0.0178, "step": 295 }, { "epoch": 0.05727554179566564, "grad_norm": 0.1213347464799881, "learning_rate": 9.999838381853705e-05, "loss": 0.0152, "step": 296 }, { "epoch": 0.057469040247678016, "grad_norm": 0.291469931602478, "learning_rate": 9.999836048062028e-05, "loss": 0.0169, "step": 297 }, { "epoch": 0.0576625386996904, "grad_norm": 0.07338448613882065, "learning_rate": 9.999833697541068e-05, "loss": 0.0185, "step": 298 }, { "epoch": 0.05785603715170279, "grad_norm": 0.24372941255569458, "learning_rate": 9.999831330290836e-05, "loss": 0.0202, "step": 299 }, { "epoch": 0.05804953560371517, "grad_norm": 0.1301235407590866, "learning_rate": 9.999828946311339e-05, "loss": 0.0169, "step": 300 }, { "epoch": 0.058243034055727554, "grad_norm": 0.20373553037643433, "learning_rate": 9.999826545602588e-05, "loss": 0.0182, "step": 301 }, { "epoch": 0.05843653250773994, "grad_norm": 0.19994613528251648, "learning_rate": 9.99982412816459e-05, "loss": 0.0179, "step": 302 }, { "epoch": 0.05863003095975232, "grad_norm": 0.28446292877197266, "learning_rate": 9.999821693997354e-05, "loss": 0.0198, "step": 303 }, { "epoch": 0.058823529411764705, "grad_norm": 0.14555899798870087, "learning_rate": 9.999819243100891e-05, "loss": 0.015, "step": 304 }, { "epoch": 0.05901702786377709, "grad_norm": 0.24956640601158142, "learning_rate": 9.999816775475208e-05, "loss": 0.019, "step": 305 }, { "epoch": 0.05921052631578947, "grad_norm": 0.16954632103443146, "learning_rate": 9.999814291120316e-05, "loss": 0.0181, "step": 306 }, { "epoch": 0.05940402476780186, "grad_norm": 0.1596585214138031, "learning_rate": 9.999811790036222e-05, "loss": 0.0168, "step": 307 }, { "epoch": 0.05959752321981424, "grad_norm": 0.24126406013965607, "learning_rate": 9.999809272222938e-05, "loss": 0.0166, "step": 308 }, { "epoch": 0.05979102167182663, "grad_norm": 0.21257640421390533, "learning_rate": 9.999806737680471e-05, "loss": 0.0224, "step": 309 }, { "epoch": 0.05998452012383901, "grad_norm": 0.16736526787281036, "learning_rate": 9.999804186408831e-05, "loss": 0.0158, "step": 310 }, { "epoch": 0.060178018575851394, "grad_norm": 0.19477541744709015, "learning_rate": 9.99980161840803e-05, "loss": 0.0143, "step": 311 }, { "epoch": 0.06037151702786378, "grad_norm": 0.2323702573776245, "learning_rate": 9.99979903367807e-05, "loss": 0.0185, "step": 312 }, { "epoch": 0.06056501547987616, "grad_norm": 0.26910340785980225, "learning_rate": 9.999796432218971e-05, "loss": 0.0146, "step": 313 }, { "epoch": 0.060758513931888546, "grad_norm": 0.19505834579467773, "learning_rate": 9.999793814030737e-05, "loss": 0.018, "step": 314 }, { "epoch": 0.06095201238390093, "grad_norm": 0.17679254710674286, "learning_rate": 9.999791179113376e-05, "loss": 0.0194, "step": 315 }, { "epoch": 0.06114551083591331, "grad_norm": 0.2338397353887558, "learning_rate": 9.999788527466901e-05, "loss": 0.0164, "step": 316 }, { "epoch": 0.0613390092879257, "grad_norm": 0.18339554965496063, "learning_rate": 9.999785859091324e-05, "loss": 0.0179, "step": 317 }, { "epoch": 0.06153250773993808, "grad_norm": 0.1294165849685669, "learning_rate": 9.999783173986649e-05, "loss": 0.0159, "step": 318 }, { "epoch": 0.06172600619195046, "grad_norm": 0.1289224475622177, "learning_rate": 9.99978047215289e-05, "loss": 0.0177, "step": 319 }, { "epoch": 0.06191950464396285, "grad_norm": 0.15721319615840912, "learning_rate": 9.999777753590054e-05, "loss": 0.0144, "step": 320 }, { "epoch": 0.062113003095975235, "grad_norm": 0.08955816179513931, "learning_rate": 9.999775018298157e-05, "loss": 0.0184, "step": 321 }, { "epoch": 0.062306501547987614, "grad_norm": 0.18241989612579346, "learning_rate": 9.999772266277202e-05, "loss": 0.0189, "step": 322 }, { "epoch": 0.0625, "grad_norm": 0.05381360650062561, "learning_rate": 9.999769497527203e-05, "loss": 0.0177, "step": 323 }, { "epoch": 0.06269349845201239, "grad_norm": 0.1588289588689804, "learning_rate": 9.99976671204817e-05, "loss": 0.0157, "step": 324 }, { "epoch": 0.06288699690402477, "grad_norm": 0.19597506523132324, "learning_rate": 9.999763909840112e-05, "loss": 0.0181, "step": 325 }, { "epoch": 0.06308049535603716, "grad_norm": 0.13203534483909607, "learning_rate": 9.999761090903042e-05, "loss": 0.0168, "step": 326 }, { "epoch": 0.06327399380804953, "grad_norm": 0.14854972064495087, "learning_rate": 9.999758255236969e-05, "loss": 0.0211, "step": 327 }, { "epoch": 0.06346749226006192, "grad_norm": 0.11782171577215195, "learning_rate": 9.999755402841905e-05, "loss": 0.0186, "step": 328 }, { "epoch": 0.0636609907120743, "grad_norm": 0.14867739379405975, "learning_rate": 9.999752533717857e-05, "loss": 0.0158, "step": 329 }, { "epoch": 0.06385448916408669, "grad_norm": 0.10568628460168839, "learning_rate": 9.999749647864838e-05, "loss": 0.0231, "step": 330 }, { "epoch": 0.06404798761609908, "grad_norm": 0.2914767861366272, "learning_rate": 9.999746745282859e-05, "loss": 0.0187, "step": 331 }, { "epoch": 0.06424148606811146, "grad_norm": 0.15207989513874054, "learning_rate": 9.999743825971931e-05, "loss": 0.0185, "step": 332 }, { "epoch": 0.06443498452012383, "grad_norm": 0.21644236147403717, "learning_rate": 9.999740889932064e-05, "loss": 0.0149, "step": 333 }, { "epoch": 0.06462848297213622, "grad_norm": 0.11210273206233978, "learning_rate": 9.999737937163269e-05, "loss": 0.0193, "step": 334 }, { "epoch": 0.0648219814241486, "grad_norm": 0.29633867740631104, "learning_rate": 9.999734967665556e-05, "loss": 0.0204, "step": 335 }, { "epoch": 0.06501547987616099, "grad_norm": 0.1305873692035675, "learning_rate": 9.999731981438938e-05, "loss": 0.018, "step": 336 }, { "epoch": 0.06520897832817338, "grad_norm": 0.25009727478027344, "learning_rate": 9.999728978483426e-05, "loss": 0.0199, "step": 337 }, { "epoch": 0.06540247678018576, "grad_norm": 0.14306782186031342, "learning_rate": 9.999725958799029e-05, "loss": 0.0147, "step": 338 }, { "epoch": 0.06559597523219814, "grad_norm": 0.1687147170305252, "learning_rate": 9.99972292238576e-05, "loss": 0.0176, "step": 339 }, { "epoch": 0.06578947368421052, "grad_norm": 0.08891718089580536, "learning_rate": 9.999719869243631e-05, "loss": 0.022, "step": 340 }, { "epoch": 0.06598297213622291, "grad_norm": 0.2735013961791992, "learning_rate": 9.999716799372651e-05, "loss": 0.02, "step": 341 }, { "epoch": 0.0661764705882353, "grad_norm": 0.13118457794189453, "learning_rate": 9.999713712772833e-05, "loss": 0.0182, "step": 342 }, { "epoch": 0.06636996904024768, "grad_norm": 0.22743211686611176, "learning_rate": 9.999710609444189e-05, "loss": 0.0176, "step": 343 }, { "epoch": 0.06656346749226007, "grad_norm": 0.16619059443473816, "learning_rate": 9.99970748938673e-05, "loss": 0.0193, "step": 344 }, { "epoch": 0.06675696594427244, "grad_norm": 0.1653999388217926, "learning_rate": 9.999704352600466e-05, "loss": 0.0164, "step": 345 }, { "epoch": 0.06695046439628483, "grad_norm": 0.14908091723918915, "learning_rate": 9.999701199085411e-05, "loss": 0.0147, "step": 346 }, { "epoch": 0.06714396284829721, "grad_norm": 0.15729385614395142, "learning_rate": 9.999698028841575e-05, "loss": 0.0204, "step": 347 }, { "epoch": 0.0673374613003096, "grad_norm": 0.14060816168785095, "learning_rate": 9.99969484186897e-05, "loss": 0.0151, "step": 348 }, { "epoch": 0.06753095975232198, "grad_norm": 0.12679168581962585, "learning_rate": 9.999691638167609e-05, "loss": 0.0205, "step": 349 }, { "epoch": 0.06772445820433437, "grad_norm": 0.1487703025341034, "learning_rate": 9.999688417737504e-05, "loss": 0.0168, "step": 350 }, { "epoch": 0.06791795665634674, "grad_norm": 0.12773659825325012, "learning_rate": 9.999685180578664e-05, "loss": 0.019, "step": 351 }, { "epoch": 0.06811145510835913, "grad_norm": 0.133652463555336, "learning_rate": 9.999681926691105e-05, "loss": 0.0208, "step": 352 }, { "epoch": 0.06830495356037151, "grad_norm": 0.18717637658119202, "learning_rate": 9.999678656074837e-05, "loss": 0.0136, "step": 353 }, { "epoch": 0.0684984520123839, "grad_norm": 0.11603961139917374, "learning_rate": 9.999675368729872e-05, "loss": 0.0191, "step": 354 }, { "epoch": 0.06869195046439629, "grad_norm": 0.12211036682128906, "learning_rate": 9.999672064656225e-05, "loss": 0.0171, "step": 355 }, { "epoch": 0.06888544891640867, "grad_norm": 0.14042022824287415, "learning_rate": 9.999668743853903e-05, "loss": 0.0154, "step": 356 }, { "epoch": 0.06907894736842106, "grad_norm": 0.09849569201469421, "learning_rate": 9.999665406322922e-05, "loss": 0.0162, "step": 357 }, { "epoch": 0.06927244582043343, "grad_norm": 0.13808362185955048, "learning_rate": 9.999662052063296e-05, "loss": 0.017, "step": 358 }, { "epoch": 0.06946594427244582, "grad_norm": 0.10548018664121628, "learning_rate": 9.999658681075032e-05, "loss": 0.0154, "step": 359 }, { "epoch": 0.0696594427244582, "grad_norm": 0.13213030993938446, "learning_rate": 9.999655293358147e-05, "loss": 0.0155, "step": 360 }, { "epoch": 0.06985294117647059, "grad_norm": 0.06992532312870026, "learning_rate": 9.999651888912653e-05, "loss": 0.0178, "step": 361 }, { "epoch": 0.07004643962848298, "grad_norm": 0.11093372106552124, "learning_rate": 9.999648467738562e-05, "loss": 0.0195, "step": 362 }, { "epoch": 0.07023993808049536, "grad_norm": 0.11895785480737686, "learning_rate": 9.999645029835887e-05, "loss": 0.0152, "step": 363 }, { "epoch": 0.07043343653250773, "grad_norm": 0.09909258037805557, "learning_rate": 9.99964157520464e-05, "loss": 0.018, "step": 364 }, { "epoch": 0.07062693498452012, "grad_norm": 0.16342279314994812, "learning_rate": 9.999638103844833e-05, "loss": 0.0211, "step": 365 }, { "epoch": 0.0708204334365325, "grad_norm": 0.10796412825584412, "learning_rate": 9.999634615756483e-05, "loss": 0.0158, "step": 366 }, { "epoch": 0.07101393188854489, "grad_norm": 0.07601839303970337, "learning_rate": 9.999631110939599e-05, "loss": 0.0183, "step": 367 }, { "epoch": 0.07120743034055728, "grad_norm": 0.12430508434772491, "learning_rate": 9.999627589394195e-05, "loss": 0.0165, "step": 368 }, { "epoch": 0.07140092879256967, "grad_norm": 0.14925925433635712, "learning_rate": 9.999624051120284e-05, "loss": 0.0163, "step": 369 }, { "epoch": 0.07159442724458204, "grad_norm": 0.11293239891529083, "learning_rate": 9.99962049611788e-05, "loss": 0.0132, "step": 370 }, { "epoch": 0.07178792569659442, "grad_norm": 0.1593189686536789, "learning_rate": 9.999616924386996e-05, "loss": 0.0152, "step": 371 }, { "epoch": 0.07198142414860681, "grad_norm": 0.1024324893951416, "learning_rate": 9.999613335927644e-05, "loss": 0.0154, "step": 372 }, { "epoch": 0.0721749226006192, "grad_norm": 0.21038775146007538, "learning_rate": 9.99960973073984e-05, "loss": 0.018, "step": 373 }, { "epoch": 0.07236842105263158, "grad_norm": 0.2684749364852905, "learning_rate": 9.999606108823595e-05, "loss": 0.0238, "step": 374 }, { "epoch": 0.07256191950464397, "grad_norm": 0.15663844347000122, "learning_rate": 9.999602470178922e-05, "loss": 0.0172, "step": 375 }, { "epoch": 0.07275541795665634, "grad_norm": 0.21711647510528564, "learning_rate": 9.999598814805837e-05, "loss": 0.0163, "step": 376 }, { "epoch": 0.07294891640866873, "grad_norm": 0.12928436696529388, "learning_rate": 9.999595142704352e-05, "loss": 0.0161, "step": 377 }, { "epoch": 0.07314241486068111, "grad_norm": 0.2799195349216461, "learning_rate": 9.999591453874482e-05, "loss": 0.0173, "step": 378 }, { "epoch": 0.0733359133126935, "grad_norm": 0.08800293505191803, "learning_rate": 9.999587748316238e-05, "loss": 0.0172, "step": 379 }, { "epoch": 0.07352941176470588, "grad_norm": 0.29338347911834717, "learning_rate": 9.999584026029635e-05, "loss": 0.0175, "step": 380 }, { "epoch": 0.07372291021671827, "grad_norm": 0.13192032277584076, "learning_rate": 9.999580287014689e-05, "loss": 0.0156, "step": 381 }, { "epoch": 0.07391640866873066, "grad_norm": 0.3073115348815918, "learning_rate": 9.999576531271413e-05, "loss": 0.0179, "step": 382 }, { "epoch": 0.07410990712074303, "grad_norm": 0.1418263465166092, "learning_rate": 9.999572758799817e-05, "loss": 0.0175, "step": 383 }, { "epoch": 0.07430340557275542, "grad_norm": 0.17882676422595978, "learning_rate": 9.999568969599923e-05, "loss": 0.0172, "step": 384 }, { "epoch": 0.0744969040247678, "grad_norm": 0.20903100073337555, "learning_rate": 9.999565163671736e-05, "loss": 0.0192, "step": 385 }, { "epoch": 0.07469040247678019, "grad_norm": 0.13578279316425323, "learning_rate": 9.999561341015277e-05, "loss": 0.0155, "step": 386 }, { "epoch": 0.07488390092879257, "grad_norm": 0.2972484827041626, "learning_rate": 9.999557501630556e-05, "loss": 0.0163, "step": 387 }, { "epoch": 0.07507739938080496, "grad_norm": 0.1990124136209488, "learning_rate": 9.99955364551759e-05, "loss": 0.0183, "step": 388 }, { "epoch": 0.07527089783281733, "grad_norm": 0.23251120746135712, "learning_rate": 9.999549772676392e-05, "loss": 0.0159, "step": 389 }, { "epoch": 0.07546439628482972, "grad_norm": 0.25533294677734375, "learning_rate": 9.999545883106976e-05, "loss": 0.0248, "step": 390 }, { "epoch": 0.0756578947368421, "grad_norm": 0.1107964962720871, "learning_rate": 9.999541976809358e-05, "loss": 0.0166, "step": 391 }, { "epoch": 0.07585139318885449, "grad_norm": 0.19158011674880981, "learning_rate": 9.99953805378355e-05, "loss": 0.0171, "step": 392 }, { "epoch": 0.07604489164086688, "grad_norm": 0.1792931854724884, "learning_rate": 9.999534114029571e-05, "loss": 0.0159, "step": 393 }, { "epoch": 0.07623839009287926, "grad_norm": 0.152277871966362, "learning_rate": 9.99953015754743e-05, "loss": 0.0173, "step": 394 }, { "epoch": 0.07643188854489164, "grad_norm": 0.1875959187746048, "learning_rate": 9.999526184337146e-05, "loss": 0.0182, "step": 395 }, { "epoch": 0.07662538699690402, "grad_norm": 0.26157769560813904, "learning_rate": 9.999522194398731e-05, "loss": 0.0154, "step": 396 }, { "epoch": 0.07681888544891641, "grad_norm": 0.187631294131279, "learning_rate": 9.999518187732203e-05, "loss": 0.0165, "step": 397 }, { "epoch": 0.0770123839009288, "grad_norm": 0.2583997845649719, "learning_rate": 9.999514164337575e-05, "loss": 0.0192, "step": 398 }, { "epoch": 0.07720588235294118, "grad_norm": 0.12862659990787506, "learning_rate": 9.999510124214861e-05, "loss": 0.0147, "step": 399 }, { "epoch": 0.07739938080495357, "grad_norm": 0.30284714698791504, "learning_rate": 9.999506067364079e-05, "loss": 0.0177, "step": 400 }, { "epoch": 0.07759287925696594, "grad_norm": 0.09621581435203552, "learning_rate": 9.99950199378524e-05, "loss": 0.0176, "step": 401 }, { "epoch": 0.07778637770897832, "grad_norm": 0.3690032362937927, "learning_rate": 9.999497903478363e-05, "loss": 0.0148, "step": 402 }, { "epoch": 0.07797987616099071, "grad_norm": 0.12337011843919754, "learning_rate": 9.99949379644346e-05, "loss": 0.0197, "step": 403 }, { "epoch": 0.0781733746130031, "grad_norm": 0.43229344487190247, "learning_rate": 9.999489672680547e-05, "loss": 0.0166, "step": 404 }, { "epoch": 0.07836687306501548, "grad_norm": 0.19276019930839539, "learning_rate": 9.999485532189642e-05, "loss": 0.0157, "step": 405 }, { "epoch": 0.07856037151702787, "grad_norm": 0.26093941926956177, "learning_rate": 9.999481374970758e-05, "loss": 0.0192, "step": 406 }, { "epoch": 0.07875386996904024, "grad_norm": 0.24185600876808167, "learning_rate": 9.999477201023912e-05, "loss": 0.0169, "step": 407 }, { "epoch": 0.07894736842105263, "grad_norm": 0.17059092223644257, "learning_rate": 9.999473010349118e-05, "loss": 0.0182, "step": 408 }, { "epoch": 0.07914086687306501, "grad_norm": 0.16172827780246735, "learning_rate": 9.99946880294639e-05, "loss": 0.0166, "step": 409 }, { "epoch": 0.0793343653250774, "grad_norm": 0.13262046873569489, "learning_rate": 9.999464578815747e-05, "loss": 0.02, "step": 410 }, { "epoch": 0.07952786377708979, "grad_norm": 0.11593908816576004, "learning_rate": 9.999460337957203e-05, "loss": 0.0182, "step": 411 }, { "epoch": 0.07972136222910217, "grad_norm": 0.09971686452627182, "learning_rate": 9.999456080370776e-05, "loss": 0.0191, "step": 412 }, { "epoch": 0.07991486068111456, "grad_norm": 0.14786119759082794, "learning_rate": 9.999451806056476e-05, "loss": 0.0158, "step": 413 }, { "epoch": 0.08010835913312693, "grad_norm": 0.19680997729301453, "learning_rate": 9.999447515014328e-05, "loss": 0.0183, "step": 414 }, { "epoch": 0.08030185758513932, "grad_norm": 0.16427728533744812, "learning_rate": 9.99944320724434e-05, "loss": 0.018, "step": 415 }, { "epoch": 0.0804953560371517, "grad_norm": 0.14785726368427277, "learning_rate": 9.999438882746531e-05, "loss": 0.0166, "step": 416 }, { "epoch": 0.08068885448916409, "grad_norm": 0.2852758467197418, "learning_rate": 9.999434541520916e-05, "loss": 0.0214, "step": 417 }, { "epoch": 0.08088235294117647, "grad_norm": 0.18202421069145203, "learning_rate": 9.999430183567514e-05, "loss": 0.0181, "step": 418 }, { "epoch": 0.08107585139318886, "grad_norm": 0.21477365493774414, "learning_rate": 9.99942580888634e-05, "loss": 0.0173, "step": 419 }, { "epoch": 0.08126934984520123, "grad_norm": 0.15545323491096497, "learning_rate": 9.999421417477406e-05, "loss": 0.0204, "step": 420 }, { "epoch": 0.08146284829721362, "grad_norm": 0.28516334295272827, "learning_rate": 9.999417009340734e-05, "loss": 0.0165, "step": 421 }, { "epoch": 0.081656346749226, "grad_norm": 0.12827329337596893, "learning_rate": 9.999412584476338e-05, "loss": 0.0194, "step": 422 }, { "epoch": 0.08184984520123839, "grad_norm": 0.18132413923740387, "learning_rate": 9.999408142884235e-05, "loss": 0.0155, "step": 423 }, { "epoch": 0.08204334365325078, "grad_norm": 0.31498289108276367, "learning_rate": 9.99940368456444e-05, "loss": 0.0171, "step": 424 }, { "epoch": 0.08223684210526316, "grad_norm": 0.12646615505218506, "learning_rate": 9.999399209516972e-05, "loss": 0.0186, "step": 425 }, { "epoch": 0.08243034055727554, "grad_norm": 0.3251175284385681, "learning_rate": 9.999394717741846e-05, "loss": 0.0161, "step": 426 }, { "epoch": 0.08262383900928792, "grad_norm": 0.19532400369644165, "learning_rate": 9.999390209239081e-05, "loss": 0.0154, "step": 427 }, { "epoch": 0.08281733746130031, "grad_norm": 0.20381125807762146, "learning_rate": 9.999385684008688e-05, "loss": 0.0187, "step": 428 }, { "epoch": 0.0830108359133127, "grad_norm": 0.09880512207746506, "learning_rate": 9.99938114205069e-05, "loss": 0.0161, "step": 429 }, { "epoch": 0.08320433436532508, "grad_norm": 0.1549815684556961, "learning_rate": 9.999376583365101e-05, "loss": 0.0144, "step": 430 }, { "epoch": 0.08339783281733747, "grad_norm": 0.08464109152555466, "learning_rate": 9.99937200795194e-05, "loss": 0.0153, "step": 431 }, { "epoch": 0.08359133126934984, "grad_norm": 0.26531898975372314, "learning_rate": 9.999367415811222e-05, "loss": 0.0153, "step": 432 }, { "epoch": 0.08378482972136223, "grad_norm": 0.19301243126392365, "learning_rate": 9.999362806942965e-05, "loss": 0.0187, "step": 433 }, { "epoch": 0.08397832817337461, "grad_norm": 0.09757360070943832, "learning_rate": 9.999358181347183e-05, "loss": 0.0143, "step": 434 }, { "epoch": 0.084171826625387, "grad_norm": 0.11330573260784149, "learning_rate": 9.999353539023898e-05, "loss": 0.0186, "step": 435 }, { "epoch": 0.08436532507739938, "grad_norm": 0.28610652685165405, "learning_rate": 9.999348879973124e-05, "loss": 0.0176, "step": 436 }, { "epoch": 0.08455882352941177, "grad_norm": 0.1383676379919052, "learning_rate": 9.999344204194882e-05, "loss": 0.0201, "step": 437 }, { "epoch": 0.08475232198142414, "grad_norm": 0.2701675295829773, "learning_rate": 9.999339511689186e-05, "loss": 0.0149, "step": 438 }, { "epoch": 0.08494582043343653, "grad_norm": 0.2806735634803772, "learning_rate": 9.999334802456054e-05, "loss": 0.0188, "step": 439 }, { "epoch": 0.08513931888544891, "grad_norm": 0.15507377684116364, "learning_rate": 9.999330076495503e-05, "loss": 0.0217, "step": 440 }, { "epoch": 0.0853328173374613, "grad_norm": 0.29834994673728943, "learning_rate": 9.999325333807552e-05, "loss": 0.0162, "step": 441 }, { "epoch": 0.08552631578947369, "grad_norm": 0.157026007771492, "learning_rate": 9.999320574392219e-05, "loss": 0.02, "step": 442 }, { "epoch": 0.08571981424148607, "grad_norm": 0.18395492434501648, "learning_rate": 9.999315798249519e-05, "loss": 0.019, "step": 443 }, { "epoch": 0.08591331269349846, "grad_norm": 0.15149059891700745, "learning_rate": 9.999311005379472e-05, "loss": 0.0158, "step": 444 }, { "epoch": 0.08610681114551083, "grad_norm": 0.17323243618011475, "learning_rate": 9.999306195782097e-05, "loss": 0.0149, "step": 445 }, { "epoch": 0.08630030959752322, "grad_norm": 0.1384853571653366, "learning_rate": 9.999301369457408e-05, "loss": 0.0181, "step": 446 }, { "epoch": 0.0864938080495356, "grad_norm": 0.36614495515823364, "learning_rate": 9.999296526405428e-05, "loss": 0.015, "step": 447 }, { "epoch": 0.08668730650154799, "grad_norm": 0.15678030252456665, "learning_rate": 9.99929166662617e-05, "loss": 0.0169, "step": 448 }, { "epoch": 0.08688080495356038, "grad_norm": 0.5277025103569031, "learning_rate": 9.999286790119654e-05, "loss": 0.0177, "step": 449 }, { "epoch": 0.08707430340557276, "grad_norm": 0.14745762944221497, "learning_rate": 9.999281896885898e-05, "loss": 0.0154, "step": 450 }, { "epoch": 0.08726780185758513, "grad_norm": 0.36256441473960876, "learning_rate": 9.999276986924923e-05, "loss": 0.0172, "step": 451 }, { "epoch": 0.08746130030959752, "grad_norm": 0.18055447936058044, "learning_rate": 9.999272060236742e-05, "loss": 0.0158, "step": 452 }, { "epoch": 0.0876547987616099, "grad_norm": 0.12653088569641113, "learning_rate": 9.999267116821378e-05, "loss": 0.0175, "step": 453 }, { "epoch": 0.08784829721362229, "grad_norm": 0.19966235756874084, "learning_rate": 9.999262156678846e-05, "loss": 0.0188, "step": 454 }, { "epoch": 0.08804179566563468, "grad_norm": 0.14697134494781494, "learning_rate": 9.999257179809168e-05, "loss": 0.0189, "step": 455 }, { "epoch": 0.08823529411764706, "grad_norm": 0.14531004428863525, "learning_rate": 9.99925218621236e-05, "loss": 0.0154, "step": 456 }, { "epoch": 0.08842879256965944, "grad_norm": 0.17053507268428802, "learning_rate": 9.99924717588844e-05, "loss": 0.0182, "step": 457 }, { "epoch": 0.08862229102167182, "grad_norm": 0.06724616885185242, "learning_rate": 9.999242148837427e-05, "loss": 0.0177, "step": 458 }, { "epoch": 0.08881578947368421, "grad_norm": 0.2526479959487915, "learning_rate": 9.999237105059342e-05, "loss": 0.0204, "step": 459 }, { "epoch": 0.0890092879256966, "grad_norm": 0.11192723363637924, "learning_rate": 9.999232044554201e-05, "loss": 0.0191, "step": 460 }, { "epoch": 0.08920278637770898, "grad_norm": 0.22082936763763428, "learning_rate": 9.999226967322025e-05, "loss": 0.0141, "step": 461 }, { "epoch": 0.08939628482972137, "grad_norm": 0.15095211565494537, "learning_rate": 9.999221873362831e-05, "loss": 0.0176, "step": 462 }, { "epoch": 0.08958978328173374, "grad_norm": 0.15428072214126587, "learning_rate": 9.999216762676639e-05, "loss": 0.0165, "step": 463 }, { "epoch": 0.08978328173374613, "grad_norm": 0.16174998879432678, "learning_rate": 9.999211635263468e-05, "loss": 0.019, "step": 464 }, { "epoch": 0.08997678018575851, "grad_norm": 0.10592792928218842, "learning_rate": 9.999206491123336e-05, "loss": 0.0161, "step": 465 }, { "epoch": 0.0901702786377709, "grad_norm": 0.10997148603200912, "learning_rate": 9.999201330256262e-05, "loss": 0.0199, "step": 466 }, { "epoch": 0.09036377708978328, "grad_norm": 0.09463375806808472, "learning_rate": 9.999196152662268e-05, "loss": 0.0152, "step": 467 }, { "epoch": 0.09055727554179567, "grad_norm": 0.10522163659334183, "learning_rate": 9.999190958341371e-05, "loss": 0.0178, "step": 468 }, { "epoch": 0.09075077399380804, "grad_norm": 0.1298103779554367, "learning_rate": 9.99918574729359e-05, "loss": 0.017, "step": 469 }, { "epoch": 0.09094427244582043, "grad_norm": 0.1338934600353241, "learning_rate": 9.999180519518945e-05, "loss": 0.0161, "step": 470 }, { "epoch": 0.09113777089783281, "grad_norm": 0.11998597532510757, "learning_rate": 9.999175275017456e-05, "loss": 0.0172, "step": 471 }, { "epoch": 0.0913312693498452, "grad_norm": 0.35282987356185913, "learning_rate": 9.999170013789141e-05, "loss": 0.0154, "step": 472 }, { "epoch": 0.09152476780185759, "grad_norm": 0.17812129855155945, "learning_rate": 9.999164735834022e-05, "loss": 0.0194, "step": 473 }, { "epoch": 0.09171826625386997, "grad_norm": 0.3230014443397522, "learning_rate": 9.999159441152117e-05, "loss": 0.0197, "step": 474 }, { "epoch": 0.09191176470588236, "grad_norm": 0.22882667183876038, "learning_rate": 9.999154129743445e-05, "loss": 0.0159, "step": 475 }, { "epoch": 0.09210526315789473, "grad_norm": 0.17357482016086578, "learning_rate": 9.999148801608026e-05, "loss": 0.0158, "step": 476 }, { "epoch": 0.09229876160990712, "grad_norm": 0.24337083101272583, "learning_rate": 9.999143456745881e-05, "loss": 0.0167, "step": 477 }, { "epoch": 0.0924922600619195, "grad_norm": 0.10081472992897034, "learning_rate": 9.99913809515703e-05, "loss": 0.0194, "step": 478 }, { "epoch": 0.09268575851393189, "grad_norm": 0.20211538672447205, "learning_rate": 9.99913271684149e-05, "loss": 0.0199, "step": 479 }, { "epoch": 0.09287925696594428, "grad_norm": 0.10287286341190338, "learning_rate": 9.999127321799285e-05, "loss": 0.0139, "step": 480 }, { "epoch": 0.09307275541795666, "grad_norm": 0.19341214001178741, "learning_rate": 9.999121910030435e-05, "loss": 0.0192, "step": 481 }, { "epoch": 0.09326625386996903, "grad_norm": 0.14559108018875122, "learning_rate": 9.999116481534956e-05, "loss": 0.0156, "step": 482 }, { "epoch": 0.09345975232198142, "grad_norm": 0.14027486741542816, "learning_rate": 9.999111036312871e-05, "loss": 0.014, "step": 483 }, { "epoch": 0.0936532507739938, "grad_norm": 0.10762495547533035, "learning_rate": 9.999105574364199e-05, "loss": 0.0175, "step": 484 }, { "epoch": 0.0938467492260062, "grad_norm": 0.1764158010482788, "learning_rate": 9.999100095688961e-05, "loss": 0.0144, "step": 485 }, { "epoch": 0.09404024767801858, "grad_norm": 0.06441190838813782, "learning_rate": 9.999094600287178e-05, "loss": 0.0148, "step": 486 }, { "epoch": 0.09423374613003097, "grad_norm": 0.13735876977443695, "learning_rate": 9.99908908815887e-05, "loss": 0.016, "step": 487 }, { "epoch": 0.09442724458204334, "grad_norm": 0.10030899196863174, "learning_rate": 9.999083559304057e-05, "loss": 0.017, "step": 488 }, { "epoch": 0.09462074303405572, "grad_norm": 0.13160952925682068, "learning_rate": 9.99907801372276e-05, "loss": 0.019, "step": 489 }, { "epoch": 0.09481424148606811, "grad_norm": 0.16086092591285706, "learning_rate": 9.999072451415002e-05, "loss": 0.0148, "step": 490 }, { "epoch": 0.0950077399380805, "grad_norm": 0.17667798697948456, "learning_rate": 9.999066872380797e-05, "loss": 0.018, "step": 491 }, { "epoch": 0.09520123839009288, "grad_norm": 0.24705053865909576, "learning_rate": 9.999061276620174e-05, "loss": 0.0162, "step": 492 }, { "epoch": 0.09539473684210527, "grad_norm": 0.14371618628501892, "learning_rate": 9.999055664133147e-05, "loss": 0.0202, "step": 493 }, { "epoch": 0.09558823529411764, "grad_norm": 0.3342151939868927, "learning_rate": 9.999050034919739e-05, "loss": 0.0137, "step": 494 }, { "epoch": 0.09578173374613003, "grad_norm": 0.1373172104358673, "learning_rate": 9.999044388979974e-05, "loss": 0.0152, "step": 495 }, { "epoch": 0.09597523219814241, "grad_norm": 0.2892400622367859, "learning_rate": 9.99903872631387e-05, "loss": 0.0157, "step": 496 }, { "epoch": 0.0961687306501548, "grad_norm": 0.09748225659132004, "learning_rate": 9.999033046921448e-05, "loss": 0.0136, "step": 497 }, { "epoch": 0.09636222910216719, "grad_norm": 0.26005229353904724, "learning_rate": 9.99902735080273e-05, "loss": 0.019, "step": 498 }, { "epoch": 0.09655572755417957, "grad_norm": 0.10944041609764099, "learning_rate": 9.999021637957735e-05, "loss": 0.0125, "step": 499 }, { "epoch": 0.09674922600619196, "grad_norm": 0.1453152894973755, "learning_rate": 9.999015908386489e-05, "loss": 0.0146, "step": 500 }, { "epoch": 0.09694272445820433, "grad_norm": 0.16985486447811127, "learning_rate": 9.999010162089008e-05, "loss": 0.0154, "step": 501 }, { "epoch": 0.09713622291021672, "grad_norm": 0.166620671749115, "learning_rate": 9.999004399065318e-05, "loss": 0.0166, "step": 502 }, { "epoch": 0.0973297213622291, "grad_norm": 0.1961694359779358, "learning_rate": 9.998998619315437e-05, "loss": 0.0166, "step": 503 }, { "epoch": 0.09752321981424149, "grad_norm": 0.14538492262363434, "learning_rate": 9.99899282283939e-05, "loss": 0.0161, "step": 504 }, { "epoch": 0.09771671826625387, "grad_norm": 0.1934945285320282, "learning_rate": 9.998987009637193e-05, "loss": 0.014, "step": 505 }, { "epoch": 0.09791021671826626, "grad_norm": 0.14384324848651886, "learning_rate": 9.998981179708873e-05, "loss": 0.017, "step": 506 }, { "epoch": 0.09810371517027863, "grad_norm": 0.17322778701782227, "learning_rate": 9.998975333054448e-05, "loss": 0.0164, "step": 507 }, { "epoch": 0.09829721362229102, "grad_norm": 0.14356845617294312, "learning_rate": 9.998969469673942e-05, "loss": 0.0174, "step": 508 }, { "epoch": 0.0984907120743034, "grad_norm": 0.21861106157302856, "learning_rate": 9.998963589567377e-05, "loss": 0.0174, "step": 509 }, { "epoch": 0.09868421052631579, "grad_norm": 0.20869240164756775, "learning_rate": 9.998957692734773e-05, "loss": 0.0139, "step": 510 }, { "epoch": 0.09887770897832818, "grad_norm": 0.26006779074668884, "learning_rate": 9.998951779176153e-05, "loss": 0.0163, "step": 511 }, { "epoch": 0.09907120743034056, "grad_norm": 0.16245970129966736, "learning_rate": 9.99894584889154e-05, "loss": 0.0177, "step": 512 }, { "epoch": 0.09926470588235294, "grad_norm": 0.29872947931289673, "learning_rate": 9.998939901880952e-05, "loss": 0.0155, "step": 513 }, { "epoch": 0.09945820433436532, "grad_norm": 0.15481626987457275, "learning_rate": 9.998933938144415e-05, "loss": 0.0167, "step": 514 }, { "epoch": 0.09965170278637771, "grad_norm": 0.22238686680793762, "learning_rate": 9.998927957681951e-05, "loss": 0.0149, "step": 515 }, { "epoch": 0.0998452012383901, "grad_norm": 0.2011832743883133, "learning_rate": 9.998921960493582e-05, "loss": 0.017, "step": 516 }, { "epoch": 0.10003869969040248, "grad_norm": 0.13800233602523804, "learning_rate": 9.998915946579328e-05, "loss": 0.0166, "step": 517 }, { "epoch": 0.10023219814241487, "grad_norm": 0.1648498773574829, "learning_rate": 9.998909915939216e-05, "loss": 0.0134, "step": 518 }, { "epoch": 0.10042569659442724, "grad_norm": 0.1570141762495041, "learning_rate": 9.998903868573263e-05, "loss": 0.0143, "step": 519 }, { "epoch": 0.10061919504643962, "grad_norm": 0.20000600814819336, "learning_rate": 9.998897804481494e-05, "loss": 0.018, "step": 520 }, { "epoch": 0.10081269349845201, "grad_norm": 0.16949254274368286, "learning_rate": 9.998891723663932e-05, "loss": 0.0187, "step": 521 }, { "epoch": 0.1010061919504644, "grad_norm": 0.30460432171821594, "learning_rate": 9.9988856261206e-05, "loss": 0.0171, "step": 522 }, { "epoch": 0.10119969040247678, "grad_norm": 0.3381250202655792, "learning_rate": 9.998879511851518e-05, "loss": 0.017, "step": 523 }, { "epoch": 0.10139318885448917, "grad_norm": 0.2737542986869812, "learning_rate": 9.998873380856711e-05, "loss": 0.0187, "step": 524 }, { "epoch": 0.10158668730650154, "grad_norm": 0.2908898591995239, "learning_rate": 9.998867233136203e-05, "loss": 0.0166, "step": 525 }, { "epoch": 0.10178018575851393, "grad_norm": 0.26302629709243774, "learning_rate": 9.998861068690013e-05, "loss": 0.0197, "step": 526 }, { "epoch": 0.10197368421052631, "grad_norm": 0.2402387261390686, "learning_rate": 9.998854887518168e-05, "loss": 0.0146, "step": 527 }, { "epoch": 0.1021671826625387, "grad_norm": 0.2721598148345947, "learning_rate": 9.998848689620688e-05, "loss": 0.0155, "step": 528 }, { "epoch": 0.10236068111455109, "grad_norm": 0.14214999973773956, "learning_rate": 9.998842474997597e-05, "loss": 0.0155, "step": 529 }, { "epoch": 0.10255417956656347, "grad_norm": 0.34426259994506836, "learning_rate": 9.998836243648919e-05, "loss": 0.0168, "step": 530 }, { "epoch": 0.10274767801857586, "grad_norm": 0.2342548966407776, "learning_rate": 9.998829995574676e-05, "loss": 0.0139, "step": 531 }, { "epoch": 0.10294117647058823, "grad_norm": 0.19918593764305115, "learning_rate": 9.998823730774892e-05, "loss": 0.0163, "step": 532 }, { "epoch": 0.10313467492260062, "grad_norm": 0.37017500400543213, "learning_rate": 9.998817449249588e-05, "loss": 0.0204, "step": 533 }, { "epoch": 0.103328173374613, "grad_norm": 0.10173092782497406, "learning_rate": 9.998811150998791e-05, "loss": 0.0166, "step": 534 }, { "epoch": 0.10352167182662539, "grad_norm": 0.33041390776634216, "learning_rate": 9.998804836022523e-05, "loss": 0.0165, "step": 535 }, { "epoch": 0.10371517027863777, "grad_norm": 0.12260551750659943, "learning_rate": 9.998798504320807e-05, "loss": 0.0144, "step": 536 }, { "epoch": 0.10390866873065016, "grad_norm": 0.12858159840106964, "learning_rate": 9.998792155893664e-05, "loss": 0.0155, "step": 537 }, { "epoch": 0.10410216718266253, "grad_norm": 0.1826135814189911, "learning_rate": 9.998785790741123e-05, "loss": 0.0136, "step": 538 }, { "epoch": 0.10429566563467492, "grad_norm": 0.15158629417419434, "learning_rate": 9.998779408863205e-05, "loss": 0.0191, "step": 539 }, { "epoch": 0.1044891640866873, "grad_norm": 0.16623467206954956, "learning_rate": 9.998773010259934e-05, "loss": 0.0154, "step": 540 }, { "epoch": 0.10468266253869969, "grad_norm": 0.162513867020607, "learning_rate": 9.998766594931332e-05, "loss": 0.0141, "step": 541 }, { "epoch": 0.10487616099071208, "grad_norm": 0.12621526420116425, "learning_rate": 9.998760162877426e-05, "loss": 0.0153, "step": 542 }, { "epoch": 0.10506965944272446, "grad_norm": 0.06010030210018158, "learning_rate": 9.998753714098236e-05, "loss": 0.0142, "step": 543 }, { "epoch": 0.10526315789473684, "grad_norm": 0.12535838782787323, "learning_rate": 9.99874724859379e-05, "loss": 0.0175, "step": 544 }, { "epoch": 0.10545665634674922, "grad_norm": 0.08964352309703827, "learning_rate": 9.998740766364109e-05, "loss": 0.0146, "step": 545 }, { "epoch": 0.10565015479876161, "grad_norm": 0.07647857069969177, "learning_rate": 9.998734267409219e-05, "loss": 0.0141, "step": 546 }, { "epoch": 0.105843653250774, "grad_norm": 0.11711270362138748, "learning_rate": 9.998727751729143e-05, "loss": 0.0176, "step": 547 }, { "epoch": 0.10603715170278638, "grad_norm": 0.10664697736501694, "learning_rate": 9.998721219323907e-05, "loss": 0.014, "step": 548 }, { "epoch": 0.10623065015479877, "grad_norm": 0.1445661187171936, "learning_rate": 9.998714670193533e-05, "loss": 0.017, "step": 549 }, { "epoch": 0.10642414860681114, "grad_norm": 0.1638614535331726, "learning_rate": 9.998708104338047e-05, "loss": 0.0149, "step": 550 }, { "epoch": 0.10661764705882353, "grad_norm": 0.14410540461540222, "learning_rate": 9.998701521757472e-05, "loss": 0.0154, "step": 551 }, { "epoch": 0.10681114551083591, "grad_norm": 0.12261345982551575, "learning_rate": 9.998694922451834e-05, "loss": 0.0185, "step": 552 }, { "epoch": 0.1070046439628483, "grad_norm": 0.19562691450119019, "learning_rate": 9.998688306421156e-05, "loss": 0.0139, "step": 553 }, { "epoch": 0.10719814241486068, "grad_norm": 0.12469644844532013, "learning_rate": 9.998681673665464e-05, "loss": 0.0197, "step": 554 }, { "epoch": 0.10739164086687307, "grad_norm": 0.13324326276779175, "learning_rate": 9.998675024184781e-05, "loss": 0.0154, "step": 555 }, { "epoch": 0.10758513931888544, "grad_norm": 0.10913301259279251, "learning_rate": 9.998668357979134e-05, "loss": 0.0125, "step": 556 }, { "epoch": 0.10777863777089783, "grad_norm": 0.11818792670965195, "learning_rate": 9.998661675048545e-05, "loss": 0.017, "step": 557 }, { "epoch": 0.10797213622291021, "grad_norm": 0.15055303275585175, "learning_rate": 9.998654975393044e-05, "loss": 0.0174, "step": 558 }, { "epoch": 0.1081656346749226, "grad_norm": 0.1604711413383484, "learning_rate": 9.99864825901265e-05, "loss": 0.0151, "step": 559 }, { "epoch": 0.10835913312693499, "grad_norm": 0.12810221314430237, "learning_rate": 9.998641525907389e-05, "loss": 0.0139, "step": 560 }, { "epoch": 0.10855263157894737, "grad_norm": 0.1959846317768097, "learning_rate": 9.998634776077288e-05, "loss": 0.0193, "step": 561 }, { "epoch": 0.10874613003095976, "grad_norm": 0.12713240087032318, "learning_rate": 9.998628009522372e-05, "loss": 0.015, "step": 562 }, { "epoch": 0.10893962848297213, "grad_norm": 0.18000543117523193, "learning_rate": 9.998621226242664e-05, "loss": 0.0157, "step": 563 }, { "epoch": 0.10913312693498452, "grad_norm": 0.19103512167930603, "learning_rate": 9.998614426238194e-05, "loss": 0.0177, "step": 564 }, { "epoch": 0.1093266253869969, "grad_norm": 0.09686203300952911, "learning_rate": 9.998607609508981e-05, "loss": 0.0139, "step": 565 }, { "epoch": 0.10952012383900929, "grad_norm": 0.19410526752471924, "learning_rate": 9.998600776055056e-05, "loss": 0.0152, "step": 566 }, { "epoch": 0.10971362229102168, "grad_norm": 0.16006101667881012, "learning_rate": 9.998593925876439e-05, "loss": 0.0153, "step": 567 }, { "epoch": 0.10990712074303406, "grad_norm": 0.12919403612613678, "learning_rate": 9.998587058973158e-05, "loss": 0.0137, "step": 568 }, { "epoch": 0.11010061919504643, "grad_norm": 0.18229806423187256, "learning_rate": 9.998580175345242e-05, "loss": 0.0156, "step": 569 }, { "epoch": 0.11029411764705882, "grad_norm": 0.11208629608154297, "learning_rate": 9.998573274992711e-05, "loss": 0.0139, "step": 570 }, { "epoch": 0.1104876160990712, "grad_norm": 0.21862676739692688, "learning_rate": 9.998566357915595e-05, "loss": 0.0121, "step": 571 }, { "epoch": 0.11068111455108359, "grad_norm": 0.17535775899887085, "learning_rate": 9.998559424113916e-05, "loss": 0.0141, "step": 572 }, { "epoch": 0.11087461300309598, "grad_norm": 0.1638898104429245, "learning_rate": 9.998552473587702e-05, "loss": 0.0163, "step": 573 }, { "epoch": 0.11106811145510836, "grad_norm": 0.17864654958248138, "learning_rate": 9.99854550633698e-05, "loss": 0.0165, "step": 574 }, { "epoch": 0.11126160990712074, "grad_norm": 0.13348756730556488, "learning_rate": 9.99853852236177e-05, "loss": 0.0154, "step": 575 }, { "epoch": 0.11145510835913312, "grad_norm": 0.17177163064479828, "learning_rate": 9.998531521662106e-05, "loss": 0.0152, "step": 576 }, { "epoch": 0.11164860681114551, "grad_norm": 0.17894381284713745, "learning_rate": 9.998524504238009e-05, "loss": 0.0153, "step": 577 }, { "epoch": 0.1118421052631579, "grad_norm": 0.23988372087478638, "learning_rate": 9.998517470089508e-05, "loss": 0.0156, "step": 578 }, { "epoch": 0.11203560371517028, "grad_norm": 0.1936187893152237, "learning_rate": 9.998510419216626e-05, "loss": 0.0178, "step": 579 }, { "epoch": 0.11222910216718267, "grad_norm": 0.10353164374828339, "learning_rate": 9.99850335161939e-05, "loss": 0.0157, "step": 580 }, { "epoch": 0.11242260061919504, "grad_norm": 0.2845863401889801, "learning_rate": 9.998496267297828e-05, "loss": 0.0177, "step": 581 }, { "epoch": 0.11261609907120743, "grad_norm": 0.17142993211746216, "learning_rate": 9.998489166251967e-05, "loss": 0.0153, "step": 582 }, { "epoch": 0.11280959752321981, "grad_norm": 0.2268482893705368, "learning_rate": 9.998482048481828e-05, "loss": 0.0187, "step": 583 }, { "epoch": 0.1130030959752322, "grad_norm": 0.18636220693588257, "learning_rate": 9.998474913987444e-05, "loss": 0.0167, "step": 584 }, { "epoch": 0.11319659442724458, "grad_norm": 0.1283341497182846, "learning_rate": 9.998467762768839e-05, "loss": 0.0158, "step": 585 }, { "epoch": 0.11339009287925697, "grad_norm": 0.19159427285194397, "learning_rate": 9.998460594826037e-05, "loss": 0.0187, "step": 586 }, { "epoch": 0.11358359133126934, "grad_norm": 0.1241028681397438, "learning_rate": 9.998453410159068e-05, "loss": 0.0158, "step": 587 }, { "epoch": 0.11377708978328173, "grad_norm": 0.16838383674621582, "learning_rate": 9.998446208767957e-05, "loss": 0.0159, "step": 588 }, { "epoch": 0.11397058823529412, "grad_norm": 0.09692959487438202, "learning_rate": 9.998438990652732e-05, "loss": 0.0153, "step": 589 }, { "epoch": 0.1141640866873065, "grad_norm": 0.17475369572639465, "learning_rate": 9.99843175581342e-05, "loss": 0.0153, "step": 590 }, { "epoch": 0.11435758513931889, "grad_norm": 0.06387436389923096, "learning_rate": 9.998424504250047e-05, "loss": 0.014, "step": 591 }, { "epoch": 0.11455108359133127, "grad_norm": 0.20406734943389893, "learning_rate": 9.998417235962639e-05, "loss": 0.0169, "step": 592 }, { "epoch": 0.11474458204334366, "grad_norm": 0.06303288042545319, "learning_rate": 9.998409950951225e-05, "loss": 0.0165, "step": 593 }, { "epoch": 0.11493808049535603, "grad_norm": 0.18696415424346924, "learning_rate": 9.998402649215832e-05, "loss": 0.0184, "step": 594 }, { "epoch": 0.11513157894736842, "grad_norm": 0.2163047343492508, "learning_rate": 9.998395330756485e-05, "loss": 0.0146, "step": 595 }, { "epoch": 0.1153250773993808, "grad_norm": 0.22201722860336304, "learning_rate": 9.998387995573214e-05, "loss": 0.0196, "step": 596 }, { "epoch": 0.11551857585139319, "grad_norm": 0.2044106125831604, "learning_rate": 9.998380643666043e-05, "loss": 0.0179, "step": 597 }, { "epoch": 0.11571207430340558, "grad_norm": 0.1646067202091217, "learning_rate": 9.998373275035003e-05, "loss": 0.016, "step": 598 }, { "epoch": 0.11590557275541796, "grad_norm": 0.18838076293468475, "learning_rate": 9.998365889680118e-05, "loss": 0.0173, "step": 599 }, { "epoch": 0.11609907120743033, "grad_norm": 0.2645617127418518, "learning_rate": 9.998358487601416e-05, "loss": 0.0154, "step": 600 }, { "epoch": 0.11629256965944272, "grad_norm": 0.13073532283306122, "learning_rate": 9.998351068798929e-05, "loss": 0.0152, "step": 601 }, { "epoch": 0.11648606811145511, "grad_norm": 0.23354952037334442, "learning_rate": 9.99834363327268e-05, "loss": 0.0181, "step": 602 }, { "epoch": 0.1166795665634675, "grad_norm": 0.25897935032844543, "learning_rate": 9.998336181022697e-05, "loss": 0.0166, "step": 603 }, { "epoch": 0.11687306501547988, "grad_norm": 0.17864486575126648, "learning_rate": 9.99832871204901e-05, "loss": 0.015, "step": 604 }, { "epoch": 0.11706656346749227, "grad_norm": 0.25040486454963684, "learning_rate": 9.998321226351644e-05, "loss": 0.019, "step": 605 }, { "epoch": 0.11726006191950464, "grad_norm": 0.14509621262550354, "learning_rate": 9.998313723930627e-05, "loss": 0.0183, "step": 606 }, { "epoch": 0.11745356037151702, "grad_norm": 0.13668470084667206, "learning_rate": 9.99830620478599e-05, "loss": 0.0155, "step": 607 }, { "epoch": 0.11764705882352941, "grad_norm": 0.12887568771839142, "learning_rate": 9.998298668917758e-05, "loss": 0.0136, "step": 608 }, { "epoch": 0.1178405572755418, "grad_norm": 0.12194279581308365, "learning_rate": 9.99829111632596e-05, "loss": 0.016, "step": 609 }, { "epoch": 0.11803405572755418, "grad_norm": 0.13247911632061005, "learning_rate": 9.998283547010624e-05, "loss": 0.0226, "step": 610 }, { "epoch": 0.11822755417956657, "grad_norm": 0.09823466837406158, "learning_rate": 9.998275960971778e-05, "loss": 0.0162, "step": 611 }, { "epoch": 0.11842105263157894, "grad_norm": 0.2533249855041504, "learning_rate": 9.998268358209451e-05, "loss": 0.0142, "step": 612 }, { "epoch": 0.11861455108359133, "grad_norm": 0.3223123550415039, "learning_rate": 9.99826073872367e-05, "loss": 0.015, "step": 613 }, { "epoch": 0.11880804953560371, "grad_norm": 0.13202299177646637, "learning_rate": 9.998253102514464e-05, "loss": 0.018, "step": 614 }, { "epoch": 0.1190015479876161, "grad_norm": 0.17719624936580658, "learning_rate": 9.998245449581862e-05, "loss": 0.0148, "step": 615 }, { "epoch": 0.11919504643962849, "grad_norm": 0.10681665688753128, "learning_rate": 9.998237779925891e-05, "loss": 0.0151, "step": 616 }, { "epoch": 0.11938854489164087, "grad_norm": 0.18468183279037476, "learning_rate": 9.998230093546582e-05, "loss": 0.0178, "step": 617 }, { "epoch": 0.11958204334365326, "grad_norm": 0.06731060892343521, "learning_rate": 9.998222390443959e-05, "loss": 0.0166, "step": 618 }, { "epoch": 0.11977554179566563, "grad_norm": 0.19424590468406677, "learning_rate": 9.998214670618056e-05, "loss": 0.0164, "step": 619 }, { "epoch": 0.11996904024767802, "grad_norm": 0.08380955457687378, "learning_rate": 9.998206934068899e-05, "loss": 0.0129, "step": 620 }, { "epoch": 0.1201625386996904, "grad_norm": 0.2286527305841446, "learning_rate": 9.998199180796516e-05, "loss": 0.0147, "step": 621 }, { "epoch": 0.12035603715170279, "grad_norm": 0.12668083608150482, "learning_rate": 9.998191410800938e-05, "loss": 0.0163, "step": 622 }, { "epoch": 0.12054953560371517, "grad_norm": 0.16421356797218323, "learning_rate": 9.998183624082191e-05, "loss": 0.012, "step": 623 }, { "epoch": 0.12074303405572756, "grad_norm": 0.09529675543308258, "learning_rate": 9.998175820640307e-05, "loss": 0.0153, "step": 624 }, { "epoch": 0.12093653250773993, "grad_norm": 0.12983748316764832, "learning_rate": 9.998168000475313e-05, "loss": 0.0142, "step": 625 }, { "epoch": 0.12113003095975232, "grad_norm": 0.11985009908676147, "learning_rate": 9.99816016358724e-05, "loss": 0.016, "step": 626 }, { "epoch": 0.1213235294117647, "grad_norm": 0.20430438220500946, "learning_rate": 9.998152309976114e-05, "loss": 0.0162, "step": 627 }, { "epoch": 0.12151702786377709, "grad_norm": 0.15459071099758148, "learning_rate": 9.998144439641969e-05, "loss": 0.018, "step": 628 }, { "epoch": 0.12171052631578948, "grad_norm": 0.23596379160881042, "learning_rate": 9.998136552584828e-05, "loss": 0.013, "step": 629 }, { "epoch": 0.12190402476780186, "grad_norm": 0.21394267678260803, "learning_rate": 9.998128648804724e-05, "loss": 0.0168, "step": 630 }, { "epoch": 0.12209752321981424, "grad_norm": 0.1425265371799469, "learning_rate": 9.998120728301689e-05, "loss": 0.0148, "step": 631 }, { "epoch": 0.12229102167182662, "grad_norm": 0.20041723549365997, "learning_rate": 9.998112791075747e-05, "loss": 0.0166, "step": 632 }, { "epoch": 0.12248452012383901, "grad_norm": 0.09276635199785233, "learning_rate": 9.998104837126931e-05, "loss": 0.0135, "step": 633 }, { "epoch": 0.1226780185758514, "grad_norm": 0.1827368289232254, "learning_rate": 9.998096866455269e-05, "loss": 0.0154, "step": 634 }, { "epoch": 0.12287151702786378, "grad_norm": 0.11411753296852112, "learning_rate": 9.998088879060791e-05, "loss": 0.0134, "step": 635 }, { "epoch": 0.12306501547987617, "grad_norm": 0.12606804072856903, "learning_rate": 9.998080874943528e-05, "loss": 0.0137, "step": 636 }, { "epoch": 0.12325851393188854, "grad_norm": 0.12572485208511353, "learning_rate": 9.998072854103508e-05, "loss": 0.0152, "step": 637 }, { "epoch": 0.12345201238390092, "grad_norm": 0.18822698295116425, "learning_rate": 9.99806481654076e-05, "loss": 0.0166, "step": 638 }, { "epoch": 0.12364551083591331, "grad_norm": 0.051687102764844894, "learning_rate": 9.998056762255318e-05, "loss": 0.0154, "step": 639 }, { "epoch": 0.1238390092879257, "grad_norm": 0.12700212001800537, "learning_rate": 9.998048691247207e-05, "loss": 0.0173, "step": 640 }, { "epoch": 0.12403250773993808, "grad_norm": 0.10711104422807693, "learning_rate": 9.998040603516461e-05, "loss": 0.0176, "step": 641 }, { "epoch": 0.12422600619195047, "grad_norm": 0.14571908116340637, "learning_rate": 9.998032499063107e-05, "loss": 0.0146, "step": 642 }, { "epoch": 0.12441950464396284, "grad_norm": 0.13485382497310638, "learning_rate": 9.998024377887178e-05, "loss": 0.0174, "step": 643 }, { "epoch": 0.12461300309597523, "grad_norm": 0.1596008837223053, "learning_rate": 9.998016239988704e-05, "loss": 0.0178, "step": 644 }, { "epoch": 0.12480650154798761, "grad_norm": 0.11844892054796219, "learning_rate": 9.998008085367711e-05, "loss": 0.0147, "step": 645 }, { "epoch": 0.125, "grad_norm": 0.131742462515831, "learning_rate": 9.997999914024233e-05, "loss": 0.0144, "step": 646 }, { "epoch": 0.1251934984520124, "grad_norm": 0.1250230073928833, "learning_rate": 9.9979917259583e-05, "loss": 0.0156, "step": 647 }, { "epoch": 0.12538699690402477, "grad_norm": 0.09437748044729233, "learning_rate": 9.997983521169942e-05, "loss": 0.0164, "step": 648 }, { "epoch": 0.12558049535603716, "grad_norm": 0.2338043749332428, "learning_rate": 9.997975299659189e-05, "loss": 0.018, "step": 649 }, { "epoch": 0.12577399380804954, "grad_norm": 0.09364930540323257, "learning_rate": 9.997967061426074e-05, "loss": 0.0158, "step": 650 }, { "epoch": 0.12596749226006193, "grad_norm": 0.17874343693256378, "learning_rate": 9.997958806470624e-05, "loss": 0.0176, "step": 651 }, { "epoch": 0.12616099071207432, "grad_norm": 0.15790553390979767, "learning_rate": 9.997950534792872e-05, "loss": 0.0131, "step": 652 }, { "epoch": 0.12635448916408668, "grad_norm": 0.1671922355890274, "learning_rate": 9.997942246392849e-05, "loss": 0.0157, "step": 653 }, { "epoch": 0.12654798761609906, "grad_norm": 0.16015441715717316, "learning_rate": 9.997933941270583e-05, "loss": 0.0148, "step": 654 }, { "epoch": 0.12674148606811145, "grad_norm": 0.14003457129001617, "learning_rate": 9.997925619426109e-05, "loss": 0.0154, "step": 655 }, { "epoch": 0.12693498452012383, "grad_norm": 0.071893110871315, "learning_rate": 9.997917280859454e-05, "loss": 0.0175, "step": 656 }, { "epoch": 0.12712848297213622, "grad_norm": 0.08053063601255417, "learning_rate": 9.997908925570653e-05, "loss": 0.0144, "step": 657 }, { "epoch": 0.1273219814241486, "grad_norm": 0.050996679812669754, "learning_rate": 9.997900553559733e-05, "loss": 0.0117, "step": 658 }, { "epoch": 0.127515479876161, "grad_norm": 0.10648167133331299, "learning_rate": 9.997892164826729e-05, "loss": 0.0152, "step": 659 }, { "epoch": 0.12770897832817338, "grad_norm": 0.09036972373723984, "learning_rate": 9.997883759371669e-05, "loss": 0.0155, "step": 660 }, { "epoch": 0.12790247678018576, "grad_norm": 0.09348076581954956, "learning_rate": 9.997875337194585e-05, "loss": 0.0152, "step": 661 }, { "epoch": 0.12809597523219815, "grad_norm": 0.11368770897388458, "learning_rate": 9.99786689829551e-05, "loss": 0.0139, "step": 662 }, { "epoch": 0.12828947368421054, "grad_norm": 0.23048613965511322, "learning_rate": 9.997858442674473e-05, "loss": 0.0157, "step": 663 }, { "epoch": 0.12848297213622292, "grad_norm": 0.13578467071056366, "learning_rate": 9.997849970331509e-05, "loss": 0.0148, "step": 664 }, { "epoch": 0.12867647058823528, "grad_norm": 0.23192745447158813, "learning_rate": 9.997841481266645e-05, "loss": 0.0175, "step": 665 }, { "epoch": 0.12886996904024767, "grad_norm": 0.11708995699882507, "learning_rate": 9.997832975479915e-05, "loss": 0.0168, "step": 666 }, { "epoch": 0.12906346749226005, "grad_norm": 0.29786911606788635, "learning_rate": 9.997824452971349e-05, "loss": 0.014, "step": 667 }, { "epoch": 0.12925696594427244, "grad_norm": 0.0724741593003273, "learning_rate": 9.997815913740982e-05, "loss": 0.0151, "step": 668 }, { "epoch": 0.12945046439628483, "grad_norm": 0.2567076086997986, "learning_rate": 9.997807357788844e-05, "loss": 0.0156, "step": 669 }, { "epoch": 0.1296439628482972, "grad_norm": 0.08424907177686691, "learning_rate": 9.997798785114966e-05, "loss": 0.012, "step": 670 }, { "epoch": 0.1298374613003096, "grad_norm": 0.19569481909275055, "learning_rate": 9.997790195719378e-05, "loss": 0.0164, "step": 671 }, { "epoch": 0.13003095975232198, "grad_norm": 0.10988965630531311, "learning_rate": 9.997781589602116e-05, "loss": 0.0142, "step": 672 }, { "epoch": 0.13022445820433437, "grad_norm": 0.2068198174238205, "learning_rate": 9.997772966763211e-05, "loss": 0.0151, "step": 673 }, { "epoch": 0.13041795665634676, "grad_norm": 0.087258480489254, "learning_rate": 9.997764327202695e-05, "loss": 0.0134, "step": 674 }, { "epoch": 0.13061145510835914, "grad_norm": 0.19325792789459229, "learning_rate": 9.997755670920599e-05, "loss": 0.0165, "step": 675 }, { "epoch": 0.13080495356037153, "grad_norm": 0.07151731848716736, "learning_rate": 9.997746997916955e-05, "loss": 0.0171, "step": 676 }, { "epoch": 0.13099845201238391, "grad_norm": 0.10160386562347412, "learning_rate": 9.997738308191796e-05, "loss": 0.0165, "step": 677 }, { "epoch": 0.13119195046439627, "grad_norm": 0.11514932662248611, "learning_rate": 9.997729601745155e-05, "loss": 0.0149, "step": 678 }, { "epoch": 0.13138544891640866, "grad_norm": 0.07088000327348709, "learning_rate": 9.997720878577062e-05, "loss": 0.0156, "step": 679 }, { "epoch": 0.13157894736842105, "grad_norm": 0.18691743910312653, "learning_rate": 9.997712138687552e-05, "loss": 0.0165, "step": 680 }, { "epoch": 0.13177244582043343, "grad_norm": 0.08300840109586716, "learning_rate": 9.997703382076656e-05, "loss": 0.0142, "step": 681 }, { "epoch": 0.13196594427244582, "grad_norm": 0.1032569631934166, "learning_rate": 9.997694608744408e-05, "loss": 0.0153, "step": 682 }, { "epoch": 0.1321594427244582, "grad_norm": 0.08492562919855118, "learning_rate": 9.99768581869084e-05, "loss": 0.0132, "step": 683 }, { "epoch": 0.1323529411764706, "grad_norm": 0.09973819553852081, "learning_rate": 9.997677011915983e-05, "loss": 0.012, "step": 684 }, { "epoch": 0.13254643962848298, "grad_norm": 0.0883437842130661, "learning_rate": 9.997668188419872e-05, "loss": 0.0161, "step": 685 }, { "epoch": 0.13273993808049536, "grad_norm": 0.061680085957050323, "learning_rate": 9.99765934820254e-05, "loss": 0.0124, "step": 686 }, { "epoch": 0.13293343653250775, "grad_norm": 0.11142054945230484, "learning_rate": 9.997650491264017e-05, "loss": 0.0182, "step": 687 }, { "epoch": 0.13312693498452013, "grad_norm": 0.1862499713897705, "learning_rate": 9.997641617604338e-05, "loss": 0.0145, "step": 688 }, { "epoch": 0.13332043343653252, "grad_norm": 0.20776398479938507, "learning_rate": 9.997632727223536e-05, "loss": 0.015, "step": 689 }, { "epoch": 0.13351393188854488, "grad_norm": 0.09987493604421616, "learning_rate": 9.997623820121645e-05, "loss": 0.0139, "step": 690 }, { "epoch": 0.13370743034055727, "grad_norm": 0.18685755133628845, "learning_rate": 9.997614896298693e-05, "loss": 0.0134, "step": 691 }, { "epoch": 0.13390092879256965, "grad_norm": 0.08515207469463348, "learning_rate": 9.997605955754721e-05, "loss": 0.0162, "step": 692 }, { "epoch": 0.13409442724458204, "grad_norm": 0.11308068782091141, "learning_rate": 9.997596998489758e-05, "loss": 0.0185, "step": 693 }, { "epoch": 0.13428792569659442, "grad_norm": 0.22210834920406342, "learning_rate": 9.997588024503836e-05, "loss": 0.0154, "step": 694 }, { "epoch": 0.1344814241486068, "grad_norm": 0.11497639864683151, "learning_rate": 9.99757903379699e-05, "loss": 0.0144, "step": 695 }, { "epoch": 0.1346749226006192, "grad_norm": 0.2125556766986847, "learning_rate": 9.997570026369254e-05, "loss": 0.0176, "step": 696 }, { "epoch": 0.13486842105263158, "grad_norm": 0.1716739982366562, "learning_rate": 9.997561002220661e-05, "loss": 0.0174, "step": 697 }, { "epoch": 0.13506191950464397, "grad_norm": 0.16275672614574432, "learning_rate": 9.997551961351243e-05, "loss": 0.0135, "step": 698 }, { "epoch": 0.13525541795665635, "grad_norm": 0.20716483891010284, "learning_rate": 9.997542903761037e-05, "loss": 0.0147, "step": 699 }, { "epoch": 0.13544891640866874, "grad_norm": 0.2876508831977844, "learning_rate": 9.997533829450075e-05, "loss": 0.0152, "step": 700 }, { "epoch": 0.13564241486068113, "grad_norm": 0.10584639757871628, "learning_rate": 9.997524738418389e-05, "loss": 0.0135, "step": 701 }, { "epoch": 0.13583591331269348, "grad_norm": 0.17386682331562042, "learning_rate": 9.997515630666015e-05, "loss": 0.0188, "step": 702 }, { "epoch": 0.13602941176470587, "grad_norm": 0.09306498616933823, "learning_rate": 9.997506506192985e-05, "loss": 0.0158, "step": 703 }, { "epoch": 0.13622291021671826, "grad_norm": 0.2294795662164688, "learning_rate": 9.997497364999334e-05, "loss": 0.0161, "step": 704 }, { "epoch": 0.13641640866873064, "grad_norm": 0.1625499576330185, "learning_rate": 9.997488207085097e-05, "loss": 0.0175, "step": 705 }, { "epoch": 0.13660990712074303, "grad_norm": 0.2061169296503067, "learning_rate": 9.997479032450308e-05, "loss": 0.0175, "step": 706 }, { "epoch": 0.13680340557275542, "grad_norm": 0.18968093395233154, "learning_rate": 9.997469841094998e-05, "loss": 0.0162, "step": 707 }, { "epoch": 0.1369969040247678, "grad_norm": 0.21248947083950043, "learning_rate": 9.997460633019204e-05, "loss": 0.0151, "step": 708 }, { "epoch": 0.1371904024767802, "grad_norm": 0.22977840900421143, "learning_rate": 9.997451408222961e-05, "loss": 0.0163, "step": 709 }, { "epoch": 0.13738390092879257, "grad_norm": 0.19691069424152374, "learning_rate": 9.9974421667063e-05, "loss": 0.0149, "step": 710 }, { "epoch": 0.13757739938080496, "grad_norm": 0.26918795704841614, "learning_rate": 9.997432908469259e-05, "loss": 0.0175, "step": 711 }, { "epoch": 0.13777089783281735, "grad_norm": 0.15634995698928833, "learning_rate": 9.997423633511869e-05, "loss": 0.0146, "step": 712 }, { "epoch": 0.13796439628482973, "grad_norm": 0.26605984568595886, "learning_rate": 9.997414341834168e-05, "loss": 0.0161, "step": 713 }, { "epoch": 0.13815789473684212, "grad_norm": 0.1703415811061859, "learning_rate": 9.997405033436187e-05, "loss": 0.0165, "step": 714 }, { "epoch": 0.13835139318885448, "grad_norm": 0.23611602187156677, "learning_rate": 9.997395708317961e-05, "loss": 0.0131, "step": 715 }, { "epoch": 0.13854489164086686, "grad_norm": 0.21918901801109314, "learning_rate": 9.997386366479529e-05, "loss": 0.0164, "step": 716 }, { "epoch": 0.13873839009287925, "grad_norm": 0.30985185503959656, "learning_rate": 9.997377007920921e-05, "loss": 0.0163, "step": 717 }, { "epoch": 0.13893188854489164, "grad_norm": 0.12042386084794998, "learning_rate": 9.997367632642173e-05, "loss": 0.0149, "step": 718 }, { "epoch": 0.13912538699690402, "grad_norm": 0.1273852437734604, "learning_rate": 9.997358240643322e-05, "loss": 0.0161, "step": 719 }, { "epoch": 0.1393188854489164, "grad_norm": 0.17168159782886505, "learning_rate": 9.997348831924401e-05, "loss": 0.016, "step": 720 }, { "epoch": 0.1395123839009288, "grad_norm": 0.11250752210617065, "learning_rate": 9.997339406485444e-05, "loss": 0.0148, "step": 721 }, { "epoch": 0.13970588235294118, "grad_norm": 0.18912795186042786, "learning_rate": 9.997329964326487e-05, "loss": 0.0157, "step": 722 }, { "epoch": 0.13989938080495357, "grad_norm": 0.10272803157567978, "learning_rate": 9.997320505447568e-05, "loss": 0.0171, "step": 723 }, { "epoch": 0.14009287925696595, "grad_norm": 0.2337987869977951, "learning_rate": 9.997311029848717e-05, "loss": 0.0116, "step": 724 }, { "epoch": 0.14028637770897834, "grad_norm": 0.09886428713798523, "learning_rate": 9.997301537529972e-05, "loss": 0.0155, "step": 725 }, { "epoch": 0.14047987616099072, "grad_norm": 0.2378496676683426, "learning_rate": 9.997292028491369e-05, "loss": 0.0156, "step": 726 }, { "epoch": 0.14067337461300308, "grad_norm": 0.1320827752351761, "learning_rate": 9.997282502732942e-05, "loss": 0.0184, "step": 727 }, { "epoch": 0.14086687306501547, "grad_norm": 0.08791425079107285, "learning_rate": 9.997272960254725e-05, "loss": 0.0161, "step": 728 }, { "epoch": 0.14106037151702785, "grad_norm": 0.27593544125556946, "learning_rate": 9.99726340105676e-05, "loss": 0.0148, "step": 729 }, { "epoch": 0.14125386996904024, "grad_norm": 0.08548282831907272, "learning_rate": 9.997253825139072e-05, "loss": 0.014, "step": 730 }, { "epoch": 0.14144736842105263, "grad_norm": 0.2457757443189621, "learning_rate": 9.997244232501705e-05, "loss": 0.017, "step": 731 }, { "epoch": 0.141640866873065, "grad_norm": 0.10322213172912598, "learning_rate": 9.997234623144694e-05, "loss": 0.0158, "step": 732 }, { "epoch": 0.1418343653250774, "grad_norm": 0.16134801506996155, "learning_rate": 9.997224997068071e-05, "loss": 0.0114, "step": 733 }, { "epoch": 0.14202786377708979, "grad_norm": 0.27248701453208923, "learning_rate": 9.997215354271874e-05, "loss": 0.014, "step": 734 }, { "epoch": 0.14222136222910217, "grad_norm": 0.2359810173511505, "learning_rate": 9.997205694756138e-05, "loss": 0.0162, "step": 735 }, { "epoch": 0.14241486068111456, "grad_norm": 0.34962454438209534, "learning_rate": 9.997196018520901e-05, "loss": 0.014, "step": 736 }, { "epoch": 0.14260835913312694, "grad_norm": 0.2846287190914154, "learning_rate": 9.997186325566195e-05, "loss": 0.0167, "step": 737 }, { "epoch": 0.14280185758513933, "grad_norm": 0.22854816913604736, "learning_rate": 9.997176615892058e-05, "loss": 0.0173, "step": 738 }, { "epoch": 0.14299535603715172, "grad_norm": 0.14334002137184143, "learning_rate": 9.997166889498527e-05, "loss": 0.0145, "step": 739 }, { "epoch": 0.14318885448916407, "grad_norm": 0.24609757959842682, "learning_rate": 9.99715714638564e-05, "loss": 0.0165, "step": 740 }, { "epoch": 0.14338235294117646, "grad_norm": 0.16444715857505798, "learning_rate": 9.997147386553428e-05, "loss": 0.0145, "step": 741 }, { "epoch": 0.14357585139318885, "grad_norm": 0.20545263588428497, "learning_rate": 9.997137610001931e-05, "loss": 0.0157, "step": 742 }, { "epoch": 0.14376934984520123, "grad_norm": 0.13248521089553833, "learning_rate": 9.997127816731185e-05, "loss": 0.0195, "step": 743 }, { "epoch": 0.14396284829721362, "grad_norm": 0.27761849761009216, "learning_rate": 9.997118006741227e-05, "loss": 0.0187, "step": 744 }, { "epoch": 0.144156346749226, "grad_norm": 0.11521649360656738, "learning_rate": 9.997108180032091e-05, "loss": 0.0145, "step": 745 }, { "epoch": 0.1443498452012384, "grad_norm": 0.24654416739940643, "learning_rate": 9.997098336603816e-05, "loss": 0.0161, "step": 746 }, { "epoch": 0.14454334365325078, "grad_norm": 0.08247005939483643, "learning_rate": 9.997088476456437e-05, "loss": 0.0148, "step": 747 }, { "epoch": 0.14473684210526316, "grad_norm": 0.15498130023479462, "learning_rate": 9.99707859958999e-05, "loss": 0.0124, "step": 748 }, { "epoch": 0.14493034055727555, "grad_norm": 0.19367820024490356, "learning_rate": 9.997068706004512e-05, "loss": 0.0172, "step": 749 }, { "epoch": 0.14512383900928794, "grad_norm": 0.0930752232670784, "learning_rate": 9.997058795700044e-05, "loss": 0.0173, "step": 750 }, { "epoch": 0.14531733746130032, "grad_norm": 0.20045693218708038, "learning_rate": 9.997048868676616e-05, "loss": 0.0179, "step": 751 }, { "epoch": 0.14551083591331268, "grad_norm": 0.1179644986987114, "learning_rate": 9.997038924934271e-05, "loss": 0.0157, "step": 752 }, { "epoch": 0.14570433436532507, "grad_norm": 0.06462740898132324, "learning_rate": 9.997028964473043e-05, "loss": 0.0145, "step": 753 }, { "epoch": 0.14589783281733745, "grad_norm": 0.06943650543689728, "learning_rate": 9.997018987292968e-05, "loss": 0.0152, "step": 754 }, { "epoch": 0.14609133126934984, "grad_norm": 0.12493183463811874, "learning_rate": 9.997008993394086e-05, "loss": 0.0173, "step": 755 }, { "epoch": 0.14628482972136223, "grad_norm": 0.1331477016210556, "learning_rate": 9.996998982776431e-05, "loss": 0.0132, "step": 756 }, { "epoch": 0.1464783281733746, "grad_norm": 0.12436734884977341, "learning_rate": 9.996988955440044e-05, "loss": 0.0164, "step": 757 }, { "epoch": 0.146671826625387, "grad_norm": 0.15036210417747498, "learning_rate": 9.996978911384959e-05, "loss": 0.0175, "step": 758 }, { "epoch": 0.14686532507739938, "grad_norm": 0.07312178611755371, "learning_rate": 9.996968850611214e-05, "loss": 0.0156, "step": 759 }, { "epoch": 0.14705882352941177, "grad_norm": 0.1713397204875946, "learning_rate": 9.996958773118847e-05, "loss": 0.0135, "step": 760 }, { "epoch": 0.14725232198142416, "grad_norm": 0.08206896483898163, "learning_rate": 9.996948678907897e-05, "loss": 0.0151, "step": 761 }, { "epoch": 0.14744582043343654, "grad_norm": 0.13457146286964417, "learning_rate": 9.996938567978399e-05, "loss": 0.0135, "step": 762 }, { "epoch": 0.14763931888544893, "grad_norm": 0.07590144872665405, "learning_rate": 9.99692844033039e-05, "loss": 0.0141, "step": 763 }, { "epoch": 0.14783281733746131, "grad_norm": 0.12825866043567657, "learning_rate": 9.99691829596391e-05, "loss": 0.0162, "step": 764 }, { "epoch": 0.14802631578947367, "grad_norm": 0.09500410407781601, "learning_rate": 9.996908134878996e-05, "loss": 0.017, "step": 765 }, { "epoch": 0.14821981424148606, "grad_norm": 0.1264011263847351, "learning_rate": 9.996897957075685e-05, "loss": 0.0158, "step": 766 }, { "epoch": 0.14841331269349844, "grad_norm": 0.0677277073264122, "learning_rate": 9.996887762554018e-05, "loss": 0.0112, "step": 767 }, { "epoch": 0.14860681114551083, "grad_norm": 0.10702094435691833, "learning_rate": 9.996877551314028e-05, "loss": 0.0144, "step": 768 }, { "epoch": 0.14880030959752322, "grad_norm": 0.24827954173088074, "learning_rate": 9.996867323355757e-05, "loss": 0.0147, "step": 769 }, { "epoch": 0.1489938080495356, "grad_norm": 0.2853389382362366, "learning_rate": 9.99685707867924e-05, "loss": 0.0155, "step": 770 }, { "epoch": 0.149187306501548, "grad_norm": 0.15682353079319, "learning_rate": 9.996846817284517e-05, "loss": 0.0174, "step": 771 }, { "epoch": 0.14938080495356038, "grad_norm": 0.30391210317611694, "learning_rate": 9.996836539171625e-05, "loss": 0.0183, "step": 772 }, { "epoch": 0.14957430340557276, "grad_norm": 0.2457723766565323, "learning_rate": 9.996826244340604e-05, "loss": 0.0152, "step": 773 }, { "epoch": 0.14976780185758515, "grad_norm": 0.2668212950229645, "learning_rate": 9.99681593279149e-05, "loss": 0.0121, "step": 774 }, { "epoch": 0.14996130030959753, "grad_norm": 0.2823275029659271, "learning_rate": 9.996805604524322e-05, "loss": 0.018, "step": 775 }, { "epoch": 0.15015479876160992, "grad_norm": 0.12087113410234451, "learning_rate": 9.99679525953914e-05, "loss": 0.0173, "step": 776 }, { "epoch": 0.15034829721362228, "grad_norm": 0.23093579709529877, "learning_rate": 9.99678489783598e-05, "loss": 0.0139, "step": 777 }, { "epoch": 0.15054179566563466, "grad_norm": 0.06345119327306747, "learning_rate": 9.996774519414882e-05, "loss": 0.0153, "step": 778 }, { "epoch": 0.15073529411764705, "grad_norm": 0.21227020025253296, "learning_rate": 9.996764124275884e-05, "loss": 0.0156, "step": 779 }, { "epoch": 0.15092879256965944, "grad_norm": 0.07057752460241318, "learning_rate": 9.996753712419027e-05, "loss": 0.0134, "step": 780 }, { "epoch": 0.15112229102167182, "grad_norm": 0.3170605003833771, "learning_rate": 9.996743283844346e-05, "loss": 0.0172, "step": 781 }, { "epoch": 0.1513157894736842, "grad_norm": 0.09203161299228668, "learning_rate": 9.996732838551882e-05, "loss": 0.0151, "step": 782 }, { "epoch": 0.1515092879256966, "grad_norm": 0.3002665042877197, "learning_rate": 9.996722376541673e-05, "loss": 0.0177, "step": 783 }, { "epoch": 0.15170278637770898, "grad_norm": 0.18334819376468658, "learning_rate": 9.996711897813759e-05, "loss": 0.0177, "step": 784 }, { "epoch": 0.15189628482972137, "grad_norm": 0.21412798762321472, "learning_rate": 9.996701402368178e-05, "loss": 0.0154, "step": 785 }, { "epoch": 0.15208978328173375, "grad_norm": 0.26026439666748047, "learning_rate": 9.996690890204968e-05, "loss": 0.0146, "step": 786 }, { "epoch": 0.15228328173374614, "grad_norm": 0.17728310823440552, "learning_rate": 9.99668036132417e-05, "loss": 0.018, "step": 787 }, { "epoch": 0.15247678018575853, "grad_norm": 0.2330128252506256, "learning_rate": 9.996669815725823e-05, "loss": 0.0161, "step": 788 }, { "epoch": 0.15267027863777088, "grad_norm": 0.11112486571073532, "learning_rate": 9.996659253409965e-05, "loss": 0.0164, "step": 789 }, { "epoch": 0.15286377708978327, "grad_norm": 0.2003282606601715, "learning_rate": 9.996648674376637e-05, "loss": 0.015, "step": 790 }, { "epoch": 0.15305727554179566, "grad_norm": 0.12119761109352112, "learning_rate": 9.996638078625876e-05, "loss": 0.0146, "step": 791 }, { "epoch": 0.15325077399380804, "grad_norm": 0.2389514297246933, "learning_rate": 9.996627466157723e-05, "loss": 0.0166, "step": 792 }, { "epoch": 0.15344427244582043, "grad_norm": 0.15317970514297485, "learning_rate": 9.996616836972217e-05, "loss": 0.0132, "step": 793 }, { "epoch": 0.15363777089783281, "grad_norm": 0.32066720724105835, "learning_rate": 9.996606191069399e-05, "loss": 0.016, "step": 794 }, { "epoch": 0.1538312693498452, "grad_norm": 0.21782317757606506, "learning_rate": 9.996595528449304e-05, "loss": 0.0135, "step": 795 }, { "epoch": 0.1540247678018576, "grad_norm": 0.2551986873149872, "learning_rate": 9.996584849111978e-05, "loss": 0.0168, "step": 796 }, { "epoch": 0.15421826625386997, "grad_norm": 0.17944276332855225, "learning_rate": 9.996574153057457e-05, "loss": 0.0178, "step": 797 }, { "epoch": 0.15441176470588236, "grad_norm": 0.3106604814529419, "learning_rate": 9.99656344028578e-05, "loss": 0.0157, "step": 798 }, { "epoch": 0.15460526315789475, "grad_norm": 0.14170843362808228, "learning_rate": 9.99655271079699e-05, "loss": 0.0173, "step": 799 }, { "epoch": 0.15479876160990713, "grad_norm": 0.23644410073757172, "learning_rate": 9.996541964591122e-05, "loss": 0.0161, "step": 800 }, { "epoch": 0.15499226006191952, "grad_norm": 0.1684119999408722, "learning_rate": 9.996531201668221e-05, "loss": 0.0137, "step": 801 }, { "epoch": 0.15518575851393188, "grad_norm": 0.23186103999614716, "learning_rate": 9.996520422028323e-05, "loss": 0.0168, "step": 802 }, { "epoch": 0.15537925696594426, "grad_norm": 0.24819540977478027, "learning_rate": 9.996509625671471e-05, "loss": 0.0156, "step": 803 }, { "epoch": 0.15557275541795665, "grad_norm": 0.23335996270179749, "learning_rate": 9.996498812597706e-05, "loss": 0.0137, "step": 804 }, { "epoch": 0.15576625386996903, "grad_norm": 0.2558073103427887, "learning_rate": 9.996487982807065e-05, "loss": 0.0163, "step": 805 }, { "epoch": 0.15595975232198142, "grad_norm": 0.19061976671218872, "learning_rate": 9.996477136299589e-05, "loss": 0.0148, "step": 806 }, { "epoch": 0.1561532507739938, "grad_norm": 0.30846092104911804, "learning_rate": 9.99646627307532e-05, "loss": 0.0163, "step": 807 }, { "epoch": 0.1563467492260062, "grad_norm": 0.1418217122554779, "learning_rate": 9.996455393134298e-05, "loss": 0.0129, "step": 808 }, { "epoch": 0.15654024767801858, "grad_norm": 0.2904703617095947, "learning_rate": 9.996444496476562e-05, "loss": 0.0167, "step": 809 }, { "epoch": 0.15673374613003097, "grad_norm": 0.16072022914886475, "learning_rate": 9.996433583102151e-05, "loss": 0.0159, "step": 810 }, { "epoch": 0.15692724458204335, "grad_norm": 0.26649200916290283, "learning_rate": 9.996422653011111e-05, "loss": 0.0168, "step": 811 }, { "epoch": 0.15712074303405574, "grad_norm": 0.17643994092941284, "learning_rate": 9.99641170620348e-05, "loss": 0.0142, "step": 812 }, { "epoch": 0.15731424148606812, "grad_norm": 0.20737814903259277, "learning_rate": 9.996400742679297e-05, "loss": 0.0175, "step": 813 }, { "epoch": 0.15750773993808048, "grad_norm": 0.2313859462738037, "learning_rate": 9.996389762438603e-05, "loss": 0.0167, "step": 814 }, { "epoch": 0.15770123839009287, "grad_norm": 0.17030666768550873, "learning_rate": 9.99637876548144e-05, "loss": 0.0149, "step": 815 }, { "epoch": 0.15789473684210525, "grad_norm": 0.17369192838668823, "learning_rate": 9.99636775180785e-05, "loss": 0.0127, "step": 816 }, { "epoch": 0.15808823529411764, "grad_norm": 0.14454440772533417, "learning_rate": 9.996356721417872e-05, "loss": 0.0133, "step": 817 }, { "epoch": 0.15828173374613003, "grad_norm": 0.2045048624277115, "learning_rate": 9.996345674311547e-05, "loss": 0.014, "step": 818 }, { "epoch": 0.1584752321981424, "grad_norm": 0.07797345519065857, "learning_rate": 9.996334610488917e-05, "loss": 0.0134, "step": 819 }, { "epoch": 0.1586687306501548, "grad_norm": 0.21821469068527222, "learning_rate": 9.996323529950024e-05, "loss": 0.0165, "step": 820 }, { "epoch": 0.15886222910216719, "grad_norm": 0.09241624921560287, "learning_rate": 9.996312432694906e-05, "loss": 0.0132, "step": 821 }, { "epoch": 0.15905572755417957, "grad_norm": 0.15968479216098785, "learning_rate": 9.996301318723608e-05, "loss": 0.0124, "step": 822 }, { "epoch": 0.15924922600619196, "grad_norm": 0.16038180887699127, "learning_rate": 9.99629018803617e-05, "loss": 0.0124, "step": 823 }, { "epoch": 0.15944272445820434, "grad_norm": 0.1281355768442154, "learning_rate": 9.99627904063263e-05, "loss": 0.0139, "step": 824 }, { "epoch": 0.15963622291021673, "grad_norm": 0.1567426472902298, "learning_rate": 9.996267876513035e-05, "loss": 0.0147, "step": 825 }, { "epoch": 0.15982972136222912, "grad_norm": 0.11077495664358139, "learning_rate": 9.996256695677424e-05, "loss": 0.0156, "step": 826 }, { "epoch": 0.16002321981424147, "grad_norm": 0.1153041198849678, "learning_rate": 9.996245498125837e-05, "loss": 0.0154, "step": 827 }, { "epoch": 0.16021671826625386, "grad_norm": 0.09467215836048126, "learning_rate": 9.99623428385832e-05, "loss": 0.0149, "step": 828 }, { "epoch": 0.16041021671826625, "grad_norm": 0.07105046510696411, "learning_rate": 9.996223052874909e-05, "loss": 0.0184, "step": 829 }, { "epoch": 0.16060371517027863, "grad_norm": 0.09177067130804062, "learning_rate": 9.996211805175648e-05, "loss": 0.0159, "step": 830 }, { "epoch": 0.16079721362229102, "grad_norm": 0.048452120274305344, "learning_rate": 9.996200540760582e-05, "loss": 0.0114, "step": 831 }, { "epoch": 0.1609907120743034, "grad_norm": 0.1310005486011505, "learning_rate": 9.99618925962975e-05, "loss": 0.0167, "step": 832 }, { "epoch": 0.1611842105263158, "grad_norm": 0.05164783075451851, "learning_rate": 9.996177961783192e-05, "loss": 0.0157, "step": 833 }, { "epoch": 0.16137770897832818, "grad_norm": 0.15976876020431519, "learning_rate": 9.996166647220955e-05, "loss": 0.0133, "step": 834 }, { "epoch": 0.16157120743034056, "grad_norm": 0.07471873611211777, "learning_rate": 9.996155315943075e-05, "loss": 0.0136, "step": 835 }, { "epoch": 0.16176470588235295, "grad_norm": 0.052317071706056595, "learning_rate": 9.9961439679496e-05, "loss": 0.0173, "step": 836 }, { "epoch": 0.16195820433436534, "grad_norm": 0.07450465857982635, "learning_rate": 9.996132603240569e-05, "loss": 0.0154, "step": 837 }, { "epoch": 0.16215170278637772, "grad_norm": 0.11989328265190125, "learning_rate": 9.996121221816025e-05, "loss": 0.0144, "step": 838 }, { "epoch": 0.16234520123839008, "grad_norm": 0.07022504508495331, "learning_rate": 9.99610982367601e-05, "loss": 0.0144, "step": 839 }, { "epoch": 0.16253869969040247, "grad_norm": 0.161363422870636, "learning_rate": 9.996098408820567e-05, "loss": 0.0156, "step": 840 }, { "epoch": 0.16273219814241485, "grad_norm": 0.13028232753276825, "learning_rate": 9.996086977249736e-05, "loss": 0.0125, "step": 841 }, { "epoch": 0.16292569659442724, "grad_norm": 0.10386782139539719, "learning_rate": 9.996075528963563e-05, "loss": 0.0141, "step": 842 }, { "epoch": 0.16311919504643962, "grad_norm": 0.0690714418888092, "learning_rate": 9.99606406396209e-05, "loss": 0.0134, "step": 843 }, { "epoch": 0.163312693498452, "grad_norm": 0.10674726963043213, "learning_rate": 9.996052582245357e-05, "loss": 0.0155, "step": 844 }, { "epoch": 0.1635061919504644, "grad_norm": 0.06811432540416718, "learning_rate": 9.99604108381341e-05, "loss": 0.0145, "step": 845 }, { "epoch": 0.16369969040247678, "grad_norm": 0.11209653317928314, "learning_rate": 9.996029568666288e-05, "loss": 0.0137, "step": 846 }, { "epoch": 0.16389318885448917, "grad_norm": 0.1054922342300415, "learning_rate": 9.996018036804038e-05, "loss": 0.0159, "step": 847 }, { "epoch": 0.16408668730650156, "grad_norm": 0.05941816419363022, "learning_rate": 9.9960064882267e-05, "loss": 0.0135, "step": 848 }, { "epoch": 0.16428018575851394, "grad_norm": 0.0904889702796936, "learning_rate": 9.995994922934317e-05, "loss": 0.0154, "step": 849 }, { "epoch": 0.16447368421052633, "grad_norm": 0.08001329004764557, "learning_rate": 9.995983340926933e-05, "loss": 0.013, "step": 850 }, { "epoch": 0.16466718266253869, "grad_norm": 0.10885697603225708, "learning_rate": 9.995971742204591e-05, "loss": 0.0139, "step": 851 }, { "epoch": 0.16486068111455107, "grad_norm": 0.046753089874982834, "learning_rate": 9.995960126767334e-05, "loss": 0.0122, "step": 852 }, { "epoch": 0.16505417956656346, "grad_norm": 0.12245172262191772, "learning_rate": 9.995948494615205e-05, "loss": 0.0127, "step": 853 }, { "epoch": 0.16524767801857584, "grad_norm": 0.051878586411476135, "learning_rate": 9.995936845748248e-05, "loss": 0.0156, "step": 854 }, { "epoch": 0.16544117647058823, "grad_norm": 0.14304594695568085, "learning_rate": 9.995925180166505e-05, "loss": 0.0125, "step": 855 }, { "epoch": 0.16563467492260062, "grad_norm": 0.06913010030984879, "learning_rate": 9.99591349787002e-05, "loss": 0.0124, "step": 856 }, { "epoch": 0.165828173374613, "grad_norm": 0.12092205882072449, "learning_rate": 9.995901798858836e-05, "loss": 0.0156, "step": 857 }, { "epoch": 0.1660216718266254, "grad_norm": 0.11567211151123047, "learning_rate": 9.995890083132998e-05, "loss": 0.0166, "step": 858 }, { "epoch": 0.16621517027863777, "grad_norm": 0.14095647633075714, "learning_rate": 9.995878350692547e-05, "loss": 0.0118, "step": 859 }, { "epoch": 0.16640866873065016, "grad_norm": 0.10371950268745422, "learning_rate": 9.995866601537529e-05, "loss": 0.0169, "step": 860 }, { "epoch": 0.16660216718266255, "grad_norm": 0.08726176619529724, "learning_rate": 9.995854835667985e-05, "loss": 0.0152, "step": 861 }, { "epoch": 0.16679566563467493, "grad_norm": 0.06432624906301498, "learning_rate": 9.995843053083964e-05, "loss": 0.0146, "step": 862 }, { "epoch": 0.16698916408668732, "grad_norm": 0.08359110355377197, "learning_rate": 9.995831253785505e-05, "loss": 0.014, "step": 863 }, { "epoch": 0.16718266253869968, "grad_norm": 0.11596274375915527, "learning_rate": 9.995819437772653e-05, "loss": 0.0148, "step": 864 }, { "epoch": 0.16737616099071206, "grad_norm": 0.08730116486549377, "learning_rate": 9.99580760504545e-05, "loss": 0.0156, "step": 865 }, { "epoch": 0.16756965944272445, "grad_norm": 0.09570646286010742, "learning_rate": 9.995795755603944e-05, "loss": 0.0155, "step": 866 }, { "epoch": 0.16776315789473684, "grad_norm": 0.11647776514291763, "learning_rate": 9.995783889448179e-05, "loss": 0.0148, "step": 867 }, { "epoch": 0.16795665634674922, "grad_norm": 0.08392111957073212, "learning_rate": 9.995772006578195e-05, "loss": 0.0157, "step": 868 }, { "epoch": 0.1681501547987616, "grad_norm": 0.09148628264665604, "learning_rate": 9.99576010699404e-05, "loss": 0.0139, "step": 869 }, { "epoch": 0.168343653250774, "grad_norm": 0.08618632704019547, "learning_rate": 9.995748190695754e-05, "loss": 0.0143, "step": 870 }, { "epoch": 0.16853715170278638, "grad_norm": 0.13412725925445557, "learning_rate": 9.995736257683385e-05, "loss": 0.0132, "step": 871 }, { "epoch": 0.16873065015479877, "grad_norm": 0.13481652736663818, "learning_rate": 9.995724307956977e-05, "loss": 0.0129, "step": 872 }, { "epoch": 0.16892414860681115, "grad_norm": 0.15556971728801727, "learning_rate": 9.995712341516576e-05, "loss": 0.0152, "step": 873 }, { "epoch": 0.16911764705882354, "grad_norm": 0.06583016365766525, "learning_rate": 9.99570035836222e-05, "loss": 0.0141, "step": 874 }, { "epoch": 0.16931114551083593, "grad_norm": 0.1953500211238861, "learning_rate": 9.995688358493959e-05, "loss": 0.0164, "step": 875 }, { "epoch": 0.16950464396284828, "grad_norm": 0.09373877197504044, "learning_rate": 9.995676341911838e-05, "loss": 0.0125, "step": 876 }, { "epoch": 0.16969814241486067, "grad_norm": 0.19092567265033722, "learning_rate": 9.9956643086159e-05, "loss": 0.0141, "step": 877 }, { "epoch": 0.16989164086687306, "grad_norm": 0.1891077607870102, "learning_rate": 9.99565225860619e-05, "loss": 0.0165, "step": 878 }, { "epoch": 0.17008513931888544, "grad_norm": 0.13657894730567932, "learning_rate": 9.995640191882751e-05, "loss": 0.0105, "step": 879 }, { "epoch": 0.17027863777089783, "grad_norm": 0.15732398629188538, "learning_rate": 9.99562810844563e-05, "loss": 0.0141, "step": 880 }, { "epoch": 0.17047213622291021, "grad_norm": 0.22003071010112762, "learning_rate": 9.995616008294873e-05, "loss": 0.0156, "step": 881 }, { "epoch": 0.1706656346749226, "grad_norm": 0.14848513901233673, "learning_rate": 9.995603891430523e-05, "loss": 0.0171, "step": 882 }, { "epoch": 0.170859133126935, "grad_norm": 0.20559151470661163, "learning_rate": 9.995591757852624e-05, "loss": 0.0138, "step": 883 }, { "epoch": 0.17105263157894737, "grad_norm": 0.15850573778152466, "learning_rate": 9.995579607561224e-05, "loss": 0.0148, "step": 884 }, { "epoch": 0.17124613003095976, "grad_norm": 0.0941869243979454, "learning_rate": 9.995567440556366e-05, "loss": 0.0138, "step": 885 }, { "epoch": 0.17143962848297215, "grad_norm": 0.06564158201217651, "learning_rate": 9.995555256838096e-05, "loss": 0.0172, "step": 886 }, { "epoch": 0.17163312693498453, "grad_norm": 0.14244553446769714, "learning_rate": 9.995543056406459e-05, "loss": 0.0142, "step": 887 }, { "epoch": 0.17182662538699692, "grad_norm": 0.08016452193260193, "learning_rate": 9.995530839261501e-05, "loss": 0.0166, "step": 888 }, { "epoch": 0.17202012383900928, "grad_norm": 0.10767979919910431, "learning_rate": 9.995518605403268e-05, "loss": 0.0114, "step": 889 }, { "epoch": 0.17221362229102166, "grad_norm": 0.0923546627163887, "learning_rate": 9.995506354831804e-05, "loss": 0.0188, "step": 890 }, { "epoch": 0.17240712074303405, "grad_norm": 0.1688215434551239, "learning_rate": 9.995494087547154e-05, "loss": 0.0139, "step": 891 }, { "epoch": 0.17260061919504643, "grad_norm": 0.0717136487364769, "learning_rate": 9.995481803549366e-05, "loss": 0.0139, "step": 892 }, { "epoch": 0.17279411764705882, "grad_norm": 0.2039027065038681, "learning_rate": 9.995469502838483e-05, "loss": 0.0126, "step": 893 }, { "epoch": 0.1729876160990712, "grad_norm": 0.09008289873600006, "learning_rate": 9.995457185414554e-05, "loss": 0.0138, "step": 894 }, { "epoch": 0.1731811145510836, "grad_norm": 0.1811145395040512, "learning_rate": 9.995444851277622e-05, "loss": 0.0141, "step": 895 }, { "epoch": 0.17337461300309598, "grad_norm": 0.10111704468727112, "learning_rate": 9.995432500427734e-05, "loss": 0.0128, "step": 896 }, { "epoch": 0.17356811145510836, "grad_norm": 0.14617401361465454, "learning_rate": 9.995420132864935e-05, "loss": 0.0169, "step": 897 }, { "epoch": 0.17376160990712075, "grad_norm": 0.0956917405128479, "learning_rate": 9.995407748589271e-05, "loss": 0.0165, "step": 898 }, { "epoch": 0.17395510835913314, "grad_norm": 0.13311873376369476, "learning_rate": 9.99539534760079e-05, "loss": 0.0107, "step": 899 }, { "epoch": 0.17414860681114552, "grad_norm": 0.07492826133966446, "learning_rate": 9.995382929899537e-05, "loss": 0.0156, "step": 900 }, { "epoch": 0.17434210526315788, "grad_norm": 0.22453930974006653, "learning_rate": 9.995370495485557e-05, "loss": 0.0147, "step": 901 }, { "epoch": 0.17453560371517027, "grad_norm": 0.07888969033956528, "learning_rate": 9.995358044358897e-05, "loss": 0.0146, "step": 902 }, { "epoch": 0.17472910216718265, "grad_norm": 0.15055051445960999, "learning_rate": 9.995345576519603e-05, "loss": 0.015, "step": 903 }, { "epoch": 0.17492260061919504, "grad_norm": 0.10272381454706192, "learning_rate": 9.995333091967724e-05, "loss": 0.012, "step": 904 }, { "epoch": 0.17511609907120743, "grad_norm": 0.14271490275859833, "learning_rate": 9.995320590703304e-05, "loss": 0.0142, "step": 905 }, { "epoch": 0.1753095975232198, "grad_norm": 0.12876282632350922, "learning_rate": 9.995308072726389e-05, "loss": 0.0156, "step": 906 }, { "epoch": 0.1755030959752322, "grad_norm": 0.1810142993927002, "learning_rate": 9.995295538037026e-05, "loss": 0.0164, "step": 907 }, { "epoch": 0.17569659442724458, "grad_norm": 0.12179873883724213, "learning_rate": 9.99528298663526e-05, "loss": 0.0129, "step": 908 }, { "epoch": 0.17589009287925697, "grad_norm": 0.1552296280860901, "learning_rate": 9.995270418521144e-05, "loss": 0.0164, "step": 909 }, { "epoch": 0.17608359133126936, "grad_norm": 0.12421425431966782, "learning_rate": 9.995257833694716e-05, "loss": 0.0139, "step": 910 }, { "epoch": 0.17627708978328174, "grad_norm": 0.10692549496889114, "learning_rate": 9.99524523215603e-05, "loss": 0.0149, "step": 911 }, { "epoch": 0.17647058823529413, "grad_norm": 0.14940690994262695, "learning_rate": 9.995232613905128e-05, "loss": 0.0149, "step": 912 }, { "epoch": 0.17666408668730652, "grad_norm": 0.20148774981498718, "learning_rate": 9.99521997894206e-05, "loss": 0.0145, "step": 913 }, { "epoch": 0.17685758513931887, "grad_norm": 0.10380459576845169, "learning_rate": 9.99520732726687e-05, "loss": 0.0148, "step": 914 }, { "epoch": 0.17705108359133126, "grad_norm": 0.20994523167610168, "learning_rate": 9.995194658879608e-05, "loss": 0.0156, "step": 915 }, { "epoch": 0.17724458204334365, "grad_norm": 0.11686275154352188, "learning_rate": 9.99518197378032e-05, "loss": 0.0152, "step": 916 }, { "epoch": 0.17743808049535603, "grad_norm": 0.21227183938026428, "learning_rate": 9.995169271969053e-05, "loss": 0.0143, "step": 917 }, { "epoch": 0.17763157894736842, "grad_norm": 0.1502663493156433, "learning_rate": 9.995156553445854e-05, "loss": 0.0153, "step": 918 }, { "epoch": 0.1778250773993808, "grad_norm": 0.1928689181804657, "learning_rate": 9.995143818210771e-05, "loss": 0.0131, "step": 919 }, { "epoch": 0.1780185758513932, "grad_norm": 0.13029786944389343, "learning_rate": 9.995131066263851e-05, "loss": 0.0137, "step": 920 }, { "epoch": 0.17821207430340558, "grad_norm": 0.18330229818820953, "learning_rate": 9.995118297605142e-05, "loss": 0.0145, "step": 921 }, { "epoch": 0.17840557275541796, "grad_norm": 0.09332665055990219, "learning_rate": 9.995105512234689e-05, "loss": 0.0142, "step": 922 }, { "epoch": 0.17859907120743035, "grad_norm": 0.15206150710582733, "learning_rate": 9.995092710152542e-05, "loss": 0.0128, "step": 923 }, { "epoch": 0.17879256965944273, "grad_norm": 0.08465169370174408, "learning_rate": 9.995079891358748e-05, "loss": 0.0139, "step": 924 }, { "epoch": 0.17898606811145512, "grad_norm": 0.14744612574577332, "learning_rate": 9.995067055853355e-05, "loss": 0.0168, "step": 925 }, { "epoch": 0.17917956656346748, "grad_norm": 0.13983377814292908, "learning_rate": 9.995054203636409e-05, "loss": 0.0155, "step": 926 }, { "epoch": 0.17937306501547987, "grad_norm": 0.19626376032829285, "learning_rate": 9.995041334707959e-05, "loss": 0.0176, "step": 927 }, { "epoch": 0.17956656346749225, "grad_norm": 0.2815062999725342, "learning_rate": 9.995028449068054e-05, "loss": 0.0131, "step": 928 }, { "epoch": 0.17976006191950464, "grad_norm": 0.11140445619821548, "learning_rate": 9.995015546716742e-05, "loss": 0.0195, "step": 929 }, { "epoch": 0.17995356037151702, "grad_norm": 0.23514355719089508, "learning_rate": 9.99500262765407e-05, "loss": 0.0132, "step": 930 }, { "epoch": 0.1801470588235294, "grad_norm": 0.07176005095243454, "learning_rate": 9.994989691880084e-05, "loss": 0.0128, "step": 931 }, { "epoch": 0.1803405572755418, "grad_norm": 0.1286063939332962, "learning_rate": 9.994976739394833e-05, "loss": 0.0175, "step": 932 }, { "epoch": 0.18053405572755418, "grad_norm": 0.13057857751846313, "learning_rate": 9.994963770198368e-05, "loss": 0.0163, "step": 933 }, { "epoch": 0.18072755417956657, "grad_norm": 0.12913960218429565, "learning_rate": 9.994950784290734e-05, "loss": 0.0111, "step": 934 }, { "epoch": 0.18092105263157895, "grad_norm": 0.13981062173843384, "learning_rate": 9.994937781671982e-05, "loss": 0.0152, "step": 935 }, { "epoch": 0.18111455108359134, "grad_norm": 0.12560999393463135, "learning_rate": 9.994924762342158e-05, "loss": 0.0128, "step": 936 }, { "epoch": 0.18130804953560373, "grad_norm": 0.1655731350183487, "learning_rate": 9.994911726301312e-05, "loss": 0.0142, "step": 937 }, { "epoch": 0.18150154798761609, "grad_norm": 0.11598310619592667, "learning_rate": 9.994898673549492e-05, "loss": 0.0149, "step": 938 }, { "epoch": 0.18169504643962847, "grad_norm": 0.17283515632152557, "learning_rate": 9.994885604086745e-05, "loss": 0.0141, "step": 939 }, { "epoch": 0.18188854489164086, "grad_norm": 0.11439178884029388, "learning_rate": 9.994872517913121e-05, "loss": 0.0148, "step": 940 }, { "epoch": 0.18208204334365324, "grad_norm": 0.15692812204360962, "learning_rate": 9.99485941502867e-05, "loss": 0.0136, "step": 941 }, { "epoch": 0.18227554179566563, "grad_norm": 0.11873666942119598, "learning_rate": 9.994846295433437e-05, "loss": 0.0128, "step": 942 }, { "epoch": 0.18246904024767802, "grad_norm": 0.07950963824987411, "learning_rate": 9.994833159127474e-05, "loss": 0.012, "step": 943 }, { "epoch": 0.1826625386996904, "grad_norm": 0.12978845834732056, "learning_rate": 9.994820006110829e-05, "loss": 0.0169, "step": 944 }, { "epoch": 0.1828560371517028, "grad_norm": 0.17858731746673584, "learning_rate": 9.99480683638355e-05, "loss": 0.0136, "step": 945 }, { "epoch": 0.18304953560371517, "grad_norm": 0.1122288852930069, "learning_rate": 9.994793649945688e-05, "loss": 0.0147, "step": 946 }, { "epoch": 0.18324303405572756, "grad_norm": 0.18190860748291016, "learning_rate": 9.994780446797292e-05, "loss": 0.0118, "step": 947 }, { "epoch": 0.18343653250773995, "grad_norm": 0.12472666800022125, "learning_rate": 9.994767226938408e-05, "loss": 0.0135, "step": 948 }, { "epoch": 0.18363003095975233, "grad_norm": 0.12263232469558716, "learning_rate": 9.994753990369087e-05, "loss": 0.0172, "step": 949 }, { "epoch": 0.18382352941176472, "grad_norm": 0.1292906403541565, "learning_rate": 9.994740737089377e-05, "loss": 0.0139, "step": 950 }, { "epoch": 0.18401702786377708, "grad_norm": 0.06642648577690125, "learning_rate": 9.99472746709933e-05, "loss": 0.0122, "step": 951 }, { "epoch": 0.18421052631578946, "grad_norm": 0.14258302748203278, "learning_rate": 9.994714180398995e-05, "loss": 0.0161, "step": 952 }, { "epoch": 0.18440402476780185, "grad_norm": 0.047475665807724, "learning_rate": 9.994700876988419e-05, "loss": 0.015, "step": 953 }, { "epoch": 0.18459752321981424, "grad_norm": 0.13934631645679474, "learning_rate": 9.994687556867653e-05, "loss": 0.0148, "step": 954 }, { "epoch": 0.18479102167182662, "grad_norm": 0.06882733106613159, "learning_rate": 9.994674220036744e-05, "loss": 0.0154, "step": 955 }, { "epoch": 0.184984520123839, "grad_norm": 0.04174954444169998, "learning_rate": 9.994660866495746e-05, "loss": 0.0141, "step": 956 }, { "epoch": 0.1851780185758514, "grad_norm": 0.07708290964365005, "learning_rate": 9.994647496244707e-05, "loss": 0.015, "step": 957 }, { "epoch": 0.18537151702786378, "grad_norm": 0.07084149122238159, "learning_rate": 9.994634109283675e-05, "loss": 0.0149, "step": 958 }, { "epoch": 0.18556501547987617, "grad_norm": 0.11826010048389435, "learning_rate": 9.9946207056127e-05, "loss": 0.0136, "step": 959 }, { "epoch": 0.18575851393188855, "grad_norm": 0.13736271858215332, "learning_rate": 9.994607285231834e-05, "loss": 0.0149, "step": 960 }, { "epoch": 0.18595201238390094, "grad_norm": 0.061414025723934174, "learning_rate": 9.994593848141126e-05, "loss": 0.0135, "step": 961 }, { "epoch": 0.18614551083591332, "grad_norm": 0.10206583142280579, "learning_rate": 9.994580394340624e-05, "loss": 0.0164, "step": 962 }, { "epoch": 0.18633900928792568, "grad_norm": 0.13770852982997894, "learning_rate": 9.994566923830379e-05, "loss": 0.0119, "step": 963 }, { "epoch": 0.18653250773993807, "grad_norm": 0.14436224102973938, "learning_rate": 9.994553436610445e-05, "loss": 0.0178, "step": 964 }, { "epoch": 0.18672600619195046, "grad_norm": 0.07836562395095825, "learning_rate": 9.994539932680866e-05, "loss": 0.0131, "step": 965 }, { "epoch": 0.18691950464396284, "grad_norm": 0.10433219373226166, "learning_rate": 9.994526412041696e-05, "loss": 0.0152, "step": 966 }, { "epoch": 0.18711300309597523, "grad_norm": 0.15095460414886475, "learning_rate": 9.994512874692984e-05, "loss": 0.0148, "step": 967 }, { "epoch": 0.1873065015479876, "grad_norm": 0.13946354389190674, "learning_rate": 9.99449932063478e-05, "loss": 0.0121, "step": 968 }, { "epoch": 0.1875, "grad_norm": 0.14908818900585175, "learning_rate": 9.994485749867136e-05, "loss": 0.0141, "step": 969 }, { "epoch": 0.1876934984520124, "grad_norm": 0.1483326107263565, "learning_rate": 9.994472162390102e-05, "loss": 0.0156, "step": 970 }, { "epoch": 0.18788699690402477, "grad_norm": 0.16584201157093048, "learning_rate": 9.994458558203726e-05, "loss": 0.0133, "step": 971 }, { "epoch": 0.18808049535603716, "grad_norm": 0.25405603647232056, "learning_rate": 9.994444937308063e-05, "loss": 0.0126, "step": 972 }, { "epoch": 0.18827399380804954, "grad_norm": 0.09686595946550369, "learning_rate": 9.994431299703159e-05, "loss": 0.0136, "step": 973 }, { "epoch": 0.18846749226006193, "grad_norm": 0.1270056515932083, "learning_rate": 9.994417645389069e-05, "loss": 0.0154, "step": 974 }, { "epoch": 0.18866099071207432, "grad_norm": 0.14294305443763733, "learning_rate": 9.99440397436584e-05, "loss": 0.0153, "step": 975 }, { "epoch": 0.18885448916408668, "grad_norm": 0.08836259692907333, "learning_rate": 9.994390286633525e-05, "loss": 0.0133, "step": 976 }, { "epoch": 0.18904798761609906, "grad_norm": 0.16021065413951874, "learning_rate": 9.994376582192175e-05, "loss": 0.0136, "step": 977 }, { "epoch": 0.18924148606811145, "grad_norm": 0.10446682572364807, "learning_rate": 9.99436286104184e-05, "loss": 0.0134, "step": 978 }, { "epoch": 0.18943498452012383, "grad_norm": 0.1565922647714615, "learning_rate": 9.994349123182569e-05, "loss": 0.0136, "step": 979 }, { "epoch": 0.18962848297213622, "grad_norm": 0.1418229341506958, "learning_rate": 9.994335368614418e-05, "loss": 0.0128, "step": 980 }, { "epoch": 0.1898219814241486, "grad_norm": 0.17461596429347992, "learning_rate": 9.994321597337435e-05, "loss": 0.0168, "step": 981 }, { "epoch": 0.190015479876161, "grad_norm": 0.14905036985874176, "learning_rate": 9.994307809351671e-05, "loss": 0.011, "step": 982 }, { "epoch": 0.19020897832817338, "grad_norm": 0.07911402732133865, "learning_rate": 9.99429400465718e-05, "loss": 0.0159, "step": 983 }, { "epoch": 0.19040247678018576, "grad_norm": 0.22697311639785767, "learning_rate": 9.994280183254009e-05, "loss": 0.0138, "step": 984 }, { "epoch": 0.19059597523219815, "grad_norm": 0.11531927436590195, "learning_rate": 9.994266345142211e-05, "loss": 0.0174, "step": 985 }, { "epoch": 0.19078947368421054, "grad_norm": 0.16109582781791687, "learning_rate": 9.99425249032184e-05, "loss": 0.013, "step": 986 }, { "epoch": 0.19098297213622292, "grad_norm": 0.10983595252037048, "learning_rate": 9.994238618792945e-05, "loss": 0.013, "step": 987 }, { "epoch": 0.19117647058823528, "grad_norm": 0.0650850385427475, "learning_rate": 9.994224730555581e-05, "loss": 0.0138, "step": 988 }, { "epoch": 0.19136996904024767, "grad_norm": 0.06763597577810287, "learning_rate": 9.994210825609793e-05, "loss": 0.0132, "step": 989 }, { "epoch": 0.19156346749226005, "grad_norm": 0.07593245059251785, "learning_rate": 9.994196903955637e-05, "loss": 0.0139, "step": 990 }, { "epoch": 0.19175696594427244, "grad_norm": 0.07612422853708267, "learning_rate": 9.994182965593165e-05, "loss": 0.0143, "step": 991 }, { "epoch": 0.19195046439628483, "grad_norm": 0.050772566348314285, "learning_rate": 9.994169010522429e-05, "loss": 0.0129, "step": 992 }, { "epoch": 0.1921439628482972, "grad_norm": 0.08863548189401627, "learning_rate": 9.994155038743479e-05, "loss": 0.0166, "step": 993 }, { "epoch": 0.1923374613003096, "grad_norm": 0.1217384859919548, "learning_rate": 9.994141050256369e-05, "loss": 0.0148, "step": 994 }, { "epoch": 0.19253095975232198, "grad_norm": 0.13496603071689606, "learning_rate": 9.994127045061149e-05, "loss": 0.015, "step": 995 }, { "epoch": 0.19272445820433437, "grad_norm": 0.15198874473571777, "learning_rate": 9.994113023157872e-05, "loss": 0.0133, "step": 996 }, { "epoch": 0.19291795665634676, "grad_norm": 0.1456543654203415, "learning_rate": 9.994098984546591e-05, "loss": 0.0156, "step": 997 }, { "epoch": 0.19311145510835914, "grad_norm": 0.1733216494321823, "learning_rate": 9.994084929227356e-05, "loss": 0.0117, "step": 998 }, { "epoch": 0.19330495356037153, "grad_norm": 0.08493223786354065, "learning_rate": 9.994070857200223e-05, "loss": 0.0143, "step": 999 }, { "epoch": 0.19349845201238391, "grad_norm": 0.15733304619789124, "learning_rate": 9.99405676846524e-05, "loss": 0.0149, "step": 1000 }, { "epoch": 0.19369195046439627, "grad_norm": 0.08426505327224731, "learning_rate": 9.994042663022462e-05, "loss": 0.0132, "step": 1001 }, { "epoch": 0.19388544891640866, "grad_norm": 0.10863766819238663, "learning_rate": 9.99402854087194e-05, "loss": 0.0118, "step": 1002 }, { "epoch": 0.19407894736842105, "grad_norm": 0.08696933090686798, "learning_rate": 9.994014402013727e-05, "loss": 0.0165, "step": 1003 }, { "epoch": 0.19427244582043343, "grad_norm": 0.088398776948452, "learning_rate": 9.994000246447877e-05, "loss": 0.0153, "step": 1004 }, { "epoch": 0.19446594427244582, "grad_norm": 0.07750371843576431, "learning_rate": 9.99398607417444e-05, "loss": 0.0156, "step": 1005 }, { "epoch": 0.1946594427244582, "grad_norm": 0.1823616623878479, "learning_rate": 9.993971885193473e-05, "loss": 0.0138, "step": 1006 }, { "epoch": 0.1948529411764706, "grad_norm": 0.15676745772361755, "learning_rate": 9.993957679505023e-05, "loss": 0.016, "step": 1007 }, { "epoch": 0.19504643962848298, "grad_norm": 0.06604135781526566, "learning_rate": 9.993943457109147e-05, "loss": 0.015, "step": 1008 }, { "epoch": 0.19523993808049536, "grad_norm": 0.07541079074144363, "learning_rate": 9.993929218005895e-05, "loss": 0.0126, "step": 1009 }, { "epoch": 0.19543343653250775, "grad_norm": 0.08362367749214172, "learning_rate": 9.993914962195323e-05, "loss": 0.0147, "step": 1010 }, { "epoch": 0.19562693498452013, "grad_norm": 0.046456776559352875, "learning_rate": 9.993900689677481e-05, "loss": 0.0129, "step": 1011 }, { "epoch": 0.19582043343653252, "grad_norm": 0.06646149605512619, "learning_rate": 9.993886400452428e-05, "loss": 0.0192, "step": 1012 }, { "epoch": 0.19601393188854488, "grad_norm": 0.06218772754073143, "learning_rate": 9.993872094520207e-05, "loss": 0.0111, "step": 1013 }, { "epoch": 0.19620743034055727, "grad_norm": 0.12822140753269196, "learning_rate": 9.993857771880879e-05, "loss": 0.0125, "step": 1014 }, { "epoch": 0.19640092879256965, "grad_norm": 0.09120473265647888, "learning_rate": 9.993843432534495e-05, "loss": 0.0164, "step": 1015 }, { "epoch": 0.19659442724458204, "grad_norm": 0.1172621101140976, "learning_rate": 9.993829076481108e-05, "loss": 0.012, "step": 1016 }, { "epoch": 0.19678792569659442, "grad_norm": 0.09039350599050522, "learning_rate": 9.993814703720774e-05, "loss": 0.0149, "step": 1017 }, { "epoch": 0.1969814241486068, "grad_norm": 0.10177267342805862, "learning_rate": 9.993800314253543e-05, "loss": 0.0123, "step": 1018 }, { "epoch": 0.1971749226006192, "grad_norm": 0.11933190375566483, "learning_rate": 9.993785908079469e-05, "loss": 0.0152, "step": 1019 }, { "epoch": 0.19736842105263158, "grad_norm": 0.08448562771081924, "learning_rate": 9.993771485198608e-05, "loss": 0.0124, "step": 1020 }, { "epoch": 0.19756191950464397, "grad_norm": 0.19793134927749634, "learning_rate": 9.99375704561101e-05, "loss": 0.0156, "step": 1021 }, { "epoch": 0.19775541795665635, "grad_norm": 0.11932411789894104, "learning_rate": 9.99374258931673e-05, "loss": 0.013, "step": 1022 }, { "epoch": 0.19794891640866874, "grad_norm": 0.220861554145813, "learning_rate": 9.993728116315824e-05, "loss": 0.0116, "step": 1023 }, { "epoch": 0.19814241486068113, "grad_norm": 0.20527604222297668, "learning_rate": 9.993713626608345e-05, "loss": 0.0142, "step": 1024 }, { "epoch": 0.19833591331269348, "grad_norm": 0.17470353841781616, "learning_rate": 9.993699120194345e-05, "loss": 0.0129, "step": 1025 }, { "epoch": 0.19852941176470587, "grad_norm": 0.20764298737049103, "learning_rate": 9.993684597073879e-05, "loss": 0.0152, "step": 1026 }, { "epoch": 0.19872291021671826, "grad_norm": 0.15581294894218445, "learning_rate": 9.993670057247e-05, "loss": 0.0148, "step": 1027 }, { "epoch": 0.19891640866873064, "grad_norm": 0.1694687306880951, "learning_rate": 9.993655500713763e-05, "loss": 0.0166, "step": 1028 }, { "epoch": 0.19910990712074303, "grad_norm": 0.15352441370487213, "learning_rate": 9.993640927474223e-05, "loss": 0.0152, "step": 1029 }, { "epoch": 0.19930340557275542, "grad_norm": 0.18163737654685974, "learning_rate": 9.993626337528434e-05, "loss": 0.0143, "step": 1030 }, { "epoch": 0.1994969040247678, "grad_norm": 0.14126740396022797, "learning_rate": 9.993611730876448e-05, "loss": 0.0136, "step": 1031 }, { "epoch": 0.1996904024767802, "grad_norm": 0.18031451106071472, "learning_rate": 9.993597107518322e-05, "loss": 0.0123, "step": 1032 }, { "epoch": 0.19988390092879257, "grad_norm": 0.16426542401313782, "learning_rate": 9.993582467454108e-05, "loss": 0.0146, "step": 1033 }, { "epoch": 0.20007739938080496, "grad_norm": 0.15322907269001007, "learning_rate": 9.993567810683861e-05, "loss": 0.0172, "step": 1034 }, { "epoch": 0.20027089783281735, "grad_norm": 0.14700952172279358, "learning_rate": 9.993553137207638e-05, "loss": 0.0151, "step": 1035 }, { "epoch": 0.20046439628482973, "grad_norm": 0.11907035112380981, "learning_rate": 9.993538447025492e-05, "loss": 0.0125, "step": 1036 }, { "epoch": 0.20065789473684212, "grad_norm": 0.10936133563518524, "learning_rate": 9.993523740137477e-05, "loss": 0.0138, "step": 1037 }, { "epoch": 0.20085139318885448, "grad_norm": 0.16832131147384644, "learning_rate": 9.993509016543648e-05, "loss": 0.0109, "step": 1038 }, { "epoch": 0.20104489164086686, "grad_norm": 0.16046710312366486, "learning_rate": 9.993494276244059e-05, "loss": 0.0175, "step": 1039 }, { "epoch": 0.20123839009287925, "grad_norm": 0.15197399258613586, "learning_rate": 9.993479519238765e-05, "loss": 0.0113, "step": 1040 }, { "epoch": 0.20143188854489164, "grad_norm": 0.12648236751556396, "learning_rate": 9.99346474552782e-05, "loss": 0.015, "step": 1041 }, { "epoch": 0.20162538699690402, "grad_norm": 0.19700688123703003, "learning_rate": 9.993449955111283e-05, "loss": 0.0123, "step": 1042 }, { "epoch": 0.2018188854489164, "grad_norm": 0.13793012499809265, "learning_rate": 9.993435147989206e-05, "loss": 0.0128, "step": 1043 }, { "epoch": 0.2020123839009288, "grad_norm": 0.17124028503894806, "learning_rate": 9.993420324161644e-05, "loss": 0.0161, "step": 1044 }, { "epoch": 0.20220588235294118, "grad_norm": 0.12502184510231018, "learning_rate": 9.993405483628652e-05, "loss": 0.0157, "step": 1045 }, { "epoch": 0.20239938080495357, "grad_norm": 0.17508293688297272, "learning_rate": 9.993390626390286e-05, "loss": 0.0158, "step": 1046 }, { "epoch": 0.20259287925696595, "grad_norm": 0.12283172458410263, "learning_rate": 9.993375752446601e-05, "loss": 0.0146, "step": 1047 }, { "epoch": 0.20278637770897834, "grad_norm": 0.2128782868385315, "learning_rate": 9.993360861797653e-05, "loss": 0.0175, "step": 1048 }, { "epoch": 0.20297987616099072, "grad_norm": 0.2188488394021988, "learning_rate": 9.993345954443494e-05, "loss": 0.0135, "step": 1049 }, { "epoch": 0.20317337461300308, "grad_norm": 0.16455669701099396, "learning_rate": 9.993331030384183e-05, "loss": 0.0153, "step": 1050 }, { "epoch": 0.20336687306501547, "grad_norm": 0.17197157442569733, "learning_rate": 9.993316089619775e-05, "loss": 0.0127, "step": 1051 }, { "epoch": 0.20356037151702785, "grad_norm": 0.2948020398616791, "learning_rate": 9.993301132150324e-05, "loss": 0.0138, "step": 1052 }, { "epoch": 0.20375386996904024, "grad_norm": 0.16576185822486877, "learning_rate": 9.993286157975888e-05, "loss": 0.0136, "step": 1053 }, { "epoch": 0.20394736842105263, "grad_norm": 0.3184609115123749, "learning_rate": 9.99327116709652e-05, "loss": 0.0159, "step": 1054 }, { "epoch": 0.204140866873065, "grad_norm": 0.14403925836086273, "learning_rate": 9.993256159512279e-05, "loss": 0.0159, "step": 1055 }, { "epoch": 0.2043343653250774, "grad_norm": 0.30059221386909485, "learning_rate": 9.993241135223218e-05, "loss": 0.017, "step": 1056 }, { "epoch": 0.20452786377708979, "grad_norm": 0.12440890818834305, "learning_rate": 9.993226094229391e-05, "loss": 0.0144, "step": 1057 }, { "epoch": 0.20472136222910217, "grad_norm": 0.22331316769123077, "learning_rate": 9.99321103653086e-05, "loss": 0.013, "step": 1058 }, { "epoch": 0.20491486068111456, "grad_norm": 0.06699984520673752, "learning_rate": 9.993195962127674e-05, "loss": 0.0157, "step": 1059 }, { "epoch": 0.20510835913312694, "grad_norm": 0.21382570266723633, "learning_rate": 9.993180871019895e-05, "loss": 0.0161, "step": 1060 }, { "epoch": 0.20530185758513933, "grad_norm": 0.059684086591005325, "learning_rate": 9.993165763207576e-05, "loss": 0.0134, "step": 1061 }, { "epoch": 0.20549535603715172, "grad_norm": 0.1382162719964981, "learning_rate": 9.993150638690774e-05, "loss": 0.0131, "step": 1062 }, { "epoch": 0.20568885448916407, "grad_norm": 0.16288228332996368, "learning_rate": 9.993135497469545e-05, "loss": 0.0125, "step": 1063 }, { "epoch": 0.20588235294117646, "grad_norm": 0.08908411115407944, "learning_rate": 9.993120339543946e-05, "loss": 0.0144, "step": 1064 }, { "epoch": 0.20607585139318885, "grad_norm": 0.09995443373918533, "learning_rate": 9.993105164914032e-05, "loss": 0.0127, "step": 1065 }, { "epoch": 0.20626934984520123, "grad_norm": 0.11076422780752182, "learning_rate": 9.99308997357986e-05, "loss": 0.0164, "step": 1066 }, { "epoch": 0.20646284829721362, "grad_norm": 0.06756841391324997, "learning_rate": 9.993074765541487e-05, "loss": 0.0166, "step": 1067 }, { "epoch": 0.206656346749226, "grad_norm": 0.1216847151517868, "learning_rate": 9.993059540798969e-05, "loss": 0.0157, "step": 1068 }, { "epoch": 0.2068498452012384, "grad_norm": 0.06285707652568817, "learning_rate": 9.993044299352363e-05, "loss": 0.0107, "step": 1069 }, { "epoch": 0.20704334365325078, "grad_norm": 0.14948037266731262, "learning_rate": 9.993029041201724e-05, "loss": 0.0129, "step": 1070 }, { "epoch": 0.20723684210526316, "grad_norm": 0.0710487887263298, "learning_rate": 9.993013766347112e-05, "loss": 0.0135, "step": 1071 }, { "epoch": 0.20743034055727555, "grad_norm": 0.1474865823984146, "learning_rate": 9.992998474788581e-05, "loss": 0.0133, "step": 1072 }, { "epoch": 0.20762383900928794, "grad_norm": 0.09347362071275711, "learning_rate": 9.99298316652619e-05, "loss": 0.0117, "step": 1073 }, { "epoch": 0.20781733746130032, "grad_norm": 0.13915550708770752, "learning_rate": 9.992967841559993e-05, "loss": 0.0125, "step": 1074 }, { "epoch": 0.20801083591331268, "grad_norm": 0.1287284940481186, "learning_rate": 9.99295249989005e-05, "loss": 0.0126, "step": 1075 }, { "epoch": 0.20820433436532507, "grad_norm": 0.13672757148742676, "learning_rate": 9.992937141516415e-05, "loss": 0.0123, "step": 1076 }, { "epoch": 0.20839783281733745, "grad_norm": 0.14003634452819824, "learning_rate": 9.992921766439149e-05, "loss": 0.0157, "step": 1077 }, { "epoch": 0.20859133126934984, "grad_norm": 0.1321680098772049, "learning_rate": 9.992906374658306e-05, "loss": 0.014, "step": 1078 }, { "epoch": 0.20878482972136223, "grad_norm": 0.06626471877098083, "learning_rate": 9.992890966173942e-05, "loss": 0.0136, "step": 1079 }, { "epoch": 0.2089783281733746, "grad_norm": 0.1523239016532898, "learning_rate": 9.992875540986119e-05, "loss": 0.0159, "step": 1080 }, { "epoch": 0.209171826625387, "grad_norm": 0.11639585345983505, "learning_rate": 9.992860099094891e-05, "loss": 0.0146, "step": 1081 }, { "epoch": 0.20936532507739938, "grad_norm": 0.07641036808490753, "learning_rate": 9.992844640500317e-05, "loss": 0.0135, "step": 1082 }, { "epoch": 0.20955882352941177, "grad_norm": 0.16610975563526154, "learning_rate": 9.992829165202452e-05, "loss": 0.0127, "step": 1083 }, { "epoch": 0.20975232198142416, "grad_norm": 0.10671953856945038, "learning_rate": 9.992813673201357e-05, "loss": 0.0126, "step": 1084 }, { "epoch": 0.20994582043343654, "grad_norm": 0.14507216215133667, "learning_rate": 9.992798164497088e-05, "loss": 0.0145, "step": 1085 }, { "epoch": 0.21013931888544893, "grad_norm": 0.12434206902980804, "learning_rate": 9.9927826390897e-05, "loss": 0.0153, "step": 1086 }, { "epoch": 0.21033281733746131, "grad_norm": 0.14414608478546143, "learning_rate": 9.992767096979255e-05, "loss": 0.0127, "step": 1087 }, { "epoch": 0.21052631578947367, "grad_norm": 0.10141997784376144, "learning_rate": 9.99275153816581e-05, "loss": 0.0115, "step": 1088 }, { "epoch": 0.21071981424148606, "grad_norm": 0.1621636152267456, "learning_rate": 9.99273596264942e-05, "loss": 0.0151, "step": 1089 }, { "epoch": 0.21091331269349844, "grad_norm": 0.06210942566394806, "learning_rate": 9.992720370430145e-05, "loss": 0.0116, "step": 1090 }, { "epoch": 0.21110681114551083, "grad_norm": 0.12452758103609085, "learning_rate": 9.992704761508044e-05, "loss": 0.0154, "step": 1091 }, { "epoch": 0.21130030959752322, "grad_norm": 0.2880903482437134, "learning_rate": 9.992689135883172e-05, "loss": 0.0164, "step": 1092 }, { "epoch": 0.2114938080495356, "grad_norm": 0.1831246167421341, "learning_rate": 9.992673493555589e-05, "loss": 0.0152, "step": 1093 }, { "epoch": 0.211687306501548, "grad_norm": 0.3915548026561737, "learning_rate": 9.992657834525356e-05, "loss": 0.0149, "step": 1094 }, { "epoch": 0.21188080495356038, "grad_norm": 0.2222842127084732, "learning_rate": 9.992642158792525e-05, "loss": 0.0163, "step": 1095 }, { "epoch": 0.21207430340557276, "grad_norm": 0.4058367908000946, "learning_rate": 9.992626466357158e-05, "loss": 0.0127, "step": 1096 }, { "epoch": 0.21226780185758515, "grad_norm": 0.15253165364265442, "learning_rate": 9.992610757219312e-05, "loss": 0.0148, "step": 1097 }, { "epoch": 0.21246130030959753, "grad_norm": 0.32212743163108826, "learning_rate": 9.992595031379047e-05, "loss": 0.0145, "step": 1098 }, { "epoch": 0.21265479876160992, "grad_norm": 0.1075211688876152, "learning_rate": 9.992579288836422e-05, "loss": 0.0144, "step": 1099 }, { "epoch": 0.21284829721362228, "grad_norm": 0.19688990712165833, "learning_rate": 9.992563529591493e-05, "loss": 0.014, "step": 1100 }, { "epoch": 0.21304179566563466, "grad_norm": 0.14862345159053802, "learning_rate": 9.99254775364432e-05, "loss": 0.0158, "step": 1101 }, { "epoch": 0.21323529411764705, "grad_norm": 0.1385173499584198, "learning_rate": 9.99253196099496e-05, "loss": 0.0142, "step": 1102 }, { "epoch": 0.21342879256965944, "grad_norm": 0.09452048689126968, "learning_rate": 9.992516151643476e-05, "loss": 0.0143, "step": 1103 }, { "epoch": 0.21362229102167182, "grad_norm": 0.11313559859991074, "learning_rate": 9.992500325589922e-05, "loss": 0.0121, "step": 1104 }, { "epoch": 0.2138157894736842, "grad_norm": 0.06870365142822266, "learning_rate": 9.99248448283436e-05, "loss": 0.0158, "step": 1105 }, { "epoch": 0.2140092879256966, "grad_norm": 0.12884850800037384, "learning_rate": 9.992468623376846e-05, "loss": 0.0144, "step": 1106 }, { "epoch": 0.21420278637770898, "grad_norm": 0.059020984917879105, "learning_rate": 9.99245274721744e-05, "loss": 0.013, "step": 1107 }, { "epoch": 0.21439628482972137, "grad_norm": 0.12993934750556946, "learning_rate": 9.992436854356206e-05, "loss": 0.0141, "step": 1108 }, { "epoch": 0.21458978328173375, "grad_norm": 0.18736080825328827, "learning_rate": 9.992420944793195e-05, "loss": 0.0147, "step": 1109 }, { "epoch": 0.21478328173374614, "grad_norm": 0.24258673191070557, "learning_rate": 9.992405018528471e-05, "loss": 0.0117, "step": 1110 }, { "epoch": 0.21497678018575853, "grad_norm": 0.08833150565624237, "learning_rate": 9.992389075562091e-05, "loss": 0.0143, "step": 1111 }, { "epoch": 0.21517027863777088, "grad_norm": 0.1765373796224594, "learning_rate": 9.992373115894117e-05, "loss": 0.0151, "step": 1112 }, { "epoch": 0.21536377708978327, "grad_norm": 0.08677516132593155, "learning_rate": 9.992357139524606e-05, "loss": 0.0167, "step": 1113 }, { "epoch": 0.21555727554179566, "grad_norm": 0.1615583300590515, "learning_rate": 9.992341146453619e-05, "loss": 0.0132, "step": 1114 }, { "epoch": 0.21575077399380804, "grad_norm": 0.11306799948215485, "learning_rate": 9.992325136681212e-05, "loss": 0.0135, "step": 1115 }, { "epoch": 0.21594427244582043, "grad_norm": 0.1461450606584549, "learning_rate": 9.992309110207448e-05, "loss": 0.0133, "step": 1116 }, { "epoch": 0.21613777089783281, "grad_norm": 0.12887993454933167, "learning_rate": 9.992293067032388e-05, "loss": 0.013, "step": 1117 }, { "epoch": 0.2163312693498452, "grad_norm": 0.14490413665771484, "learning_rate": 9.992277007156086e-05, "loss": 0.0157, "step": 1118 }, { "epoch": 0.2165247678018576, "grad_norm": 0.1763673573732376, "learning_rate": 9.992260930578608e-05, "loss": 0.0115, "step": 1119 }, { "epoch": 0.21671826625386997, "grad_norm": 0.19876937568187714, "learning_rate": 9.992244837300009e-05, "loss": 0.0147, "step": 1120 }, { "epoch": 0.21691176470588236, "grad_norm": 0.16314758360385895, "learning_rate": 9.992228727320351e-05, "loss": 0.0172, "step": 1121 }, { "epoch": 0.21710526315789475, "grad_norm": 0.10652532428503036, "learning_rate": 9.992212600639694e-05, "loss": 0.0117, "step": 1122 }, { "epoch": 0.21729876160990713, "grad_norm": 0.16404318809509277, "learning_rate": 9.992196457258095e-05, "loss": 0.0148, "step": 1123 }, { "epoch": 0.21749226006191952, "grad_norm": 0.14019708335399628, "learning_rate": 9.99218029717562e-05, "loss": 0.0149, "step": 1124 }, { "epoch": 0.21768575851393188, "grad_norm": 0.10615821182727814, "learning_rate": 9.992164120392324e-05, "loss": 0.0134, "step": 1125 }, { "epoch": 0.21787925696594426, "grad_norm": 0.11908544600009918, "learning_rate": 9.992147926908268e-05, "loss": 0.015, "step": 1126 }, { "epoch": 0.21807275541795665, "grad_norm": 0.10306712239980698, "learning_rate": 9.992131716723514e-05, "loss": 0.0177, "step": 1127 }, { "epoch": 0.21826625386996903, "grad_norm": 0.14471831917762756, "learning_rate": 9.992115489838121e-05, "loss": 0.0126, "step": 1128 }, { "epoch": 0.21845975232198142, "grad_norm": 0.1122494637966156, "learning_rate": 9.992099246252149e-05, "loss": 0.0151, "step": 1129 }, { "epoch": 0.2186532507739938, "grad_norm": 0.10484328120946884, "learning_rate": 9.99208298596566e-05, "loss": 0.0125, "step": 1130 }, { "epoch": 0.2188467492260062, "grad_norm": 0.13407884538173676, "learning_rate": 9.992066708978713e-05, "loss": 0.0125, "step": 1131 }, { "epoch": 0.21904024767801858, "grad_norm": 0.09899730235338211, "learning_rate": 9.992050415291368e-05, "loss": 0.0127, "step": 1132 }, { "epoch": 0.21923374613003097, "grad_norm": 0.14302971959114075, "learning_rate": 9.992034104903686e-05, "loss": 0.0128, "step": 1133 }, { "epoch": 0.21942724458204335, "grad_norm": 0.07950276881456375, "learning_rate": 9.99201777781573e-05, "loss": 0.0151, "step": 1134 }, { "epoch": 0.21962074303405574, "grad_norm": 0.16285809874534607, "learning_rate": 9.992001434027557e-05, "loss": 0.0128, "step": 1135 }, { "epoch": 0.21981424148606812, "grad_norm": 0.15438903868198395, "learning_rate": 9.99198507353923e-05, "loss": 0.0148, "step": 1136 }, { "epoch": 0.22000773993808048, "grad_norm": 0.07568935304880142, "learning_rate": 9.99196869635081e-05, "loss": 0.0131, "step": 1137 }, { "epoch": 0.22020123839009287, "grad_norm": 0.10054972767829895, "learning_rate": 9.991952302462356e-05, "loss": 0.0158, "step": 1138 }, { "epoch": 0.22039473684210525, "grad_norm": 0.1603708118200302, "learning_rate": 9.991935891873931e-05, "loss": 0.0135, "step": 1139 }, { "epoch": 0.22058823529411764, "grad_norm": 0.14318005740642548, "learning_rate": 9.991919464585596e-05, "loss": 0.015, "step": 1140 }, { "epoch": 0.22078173374613003, "grad_norm": 0.13743524253368378, "learning_rate": 9.991903020597409e-05, "loss": 0.0128, "step": 1141 }, { "epoch": 0.2209752321981424, "grad_norm": 0.17502105236053467, "learning_rate": 9.991886559909436e-05, "loss": 0.0147, "step": 1142 }, { "epoch": 0.2211687306501548, "grad_norm": 0.11117829382419586, "learning_rate": 9.991870082521735e-05, "loss": 0.0132, "step": 1143 }, { "epoch": 0.22136222910216719, "grad_norm": 0.20161907374858856, "learning_rate": 9.991853588434367e-05, "loss": 0.0136, "step": 1144 }, { "epoch": 0.22155572755417957, "grad_norm": 0.0981411412358284, "learning_rate": 9.991837077647394e-05, "loss": 0.0164, "step": 1145 }, { "epoch": 0.22174922600619196, "grad_norm": 0.1419457644224167, "learning_rate": 9.991820550160877e-05, "loss": 0.0164, "step": 1146 }, { "epoch": 0.22194272445820434, "grad_norm": 0.18120743334293365, "learning_rate": 9.99180400597488e-05, "loss": 0.0185, "step": 1147 }, { "epoch": 0.22213622291021673, "grad_norm": 0.21331404149532318, "learning_rate": 9.991787445089461e-05, "loss": 0.016, "step": 1148 }, { "epoch": 0.22232972136222912, "grad_norm": 0.1486945003271103, "learning_rate": 9.991770867504684e-05, "loss": 0.013, "step": 1149 }, { "epoch": 0.22252321981424147, "grad_norm": 0.1868649423122406, "learning_rate": 9.991754273220609e-05, "loss": 0.0121, "step": 1150 }, { "epoch": 0.22271671826625386, "grad_norm": 0.15815575420856476, "learning_rate": 9.991737662237299e-05, "loss": 0.017, "step": 1151 }, { "epoch": 0.22291021671826625, "grad_norm": 0.2088010013103485, "learning_rate": 9.991721034554813e-05, "loss": 0.0128, "step": 1152 }, { "epoch": 0.22310371517027863, "grad_norm": 0.17470315098762512, "learning_rate": 9.991704390173218e-05, "loss": 0.0117, "step": 1153 }, { "epoch": 0.22329721362229102, "grad_norm": 0.2900124192237854, "learning_rate": 9.991687729092572e-05, "loss": 0.0124, "step": 1154 }, { "epoch": 0.2234907120743034, "grad_norm": 0.10690262913703918, "learning_rate": 9.991671051312938e-05, "loss": 0.0151, "step": 1155 }, { "epoch": 0.2236842105263158, "grad_norm": 0.1301293522119522, "learning_rate": 9.991654356834376e-05, "loss": 0.0117, "step": 1156 }, { "epoch": 0.22387770897832818, "grad_norm": 0.09983327984809875, "learning_rate": 9.991637645656951e-05, "loss": 0.0141, "step": 1157 }, { "epoch": 0.22407120743034056, "grad_norm": 0.08703908324241638, "learning_rate": 9.991620917780725e-05, "loss": 0.0147, "step": 1158 }, { "epoch": 0.22426470588235295, "grad_norm": 0.11334415525197983, "learning_rate": 9.991604173205759e-05, "loss": 0.0145, "step": 1159 }, { "epoch": 0.22445820433436534, "grad_norm": 0.10336212068796158, "learning_rate": 9.991587411932114e-05, "loss": 0.0129, "step": 1160 }, { "epoch": 0.22465170278637772, "grad_norm": 0.11701468378305435, "learning_rate": 9.991570633959854e-05, "loss": 0.0098, "step": 1161 }, { "epoch": 0.22484520123839008, "grad_norm": 0.0521756075322628, "learning_rate": 9.991553839289043e-05, "loss": 0.013, "step": 1162 }, { "epoch": 0.22503869969040247, "grad_norm": 0.12932807207107544, "learning_rate": 9.99153702791974e-05, "loss": 0.0143, "step": 1163 }, { "epoch": 0.22523219814241485, "grad_norm": 0.11610401421785355, "learning_rate": 9.99152019985201e-05, "loss": 0.0147, "step": 1164 }, { "epoch": 0.22542569659442724, "grad_norm": 0.12103550136089325, "learning_rate": 9.991503355085913e-05, "loss": 0.012, "step": 1165 }, { "epoch": 0.22561919504643962, "grad_norm": 0.14915432035923004, "learning_rate": 9.991486493621514e-05, "loss": 0.0123, "step": 1166 }, { "epoch": 0.225812693498452, "grad_norm": 0.0954701155424118, "learning_rate": 9.991469615458876e-05, "loss": 0.0111, "step": 1167 }, { "epoch": 0.2260061919504644, "grad_norm": 0.19642554223537445, "learning_rate": 9.991452720598059e-05, "loss": 0.0111, "step": 1168 }, { "epoch": 0.22619969040247678, "grad_norm": 0.18218523263931274, "learning_rate": 9.991435809039129e-05, "loss": 0.016, "step": 1169 }, { "epoch": 0.22639318885448917, "grad_norm": 0.2884807884693146, "learning_rate": 9.991418880782146e-05, "loss": 0.0143, "step": 1170 }, { "epoch": 0.22658668730650156, "grad_norm": 0.13346153497695923, "learning_rate": 9.991401935827176e-05, "loss": 0.015, "step": 1171 }, { "epoch": 0.22678018575851394, "grad_norm": 0.3025968074798584, "learning_rate": 9.991384974174278e-05, "loss": 0.0119, "step": 1172 }, { "epoch": 0.22697368421052633, "grad_norm": 0.20222336053848267, "learning_rate": 9.99136799582352e-05, "loss": 0.0169, "step": 1173 }, { "epoch": 0.22716718266253869, "grad_norm": 0.4485871195793152, "learning_rate": 9.99135100077496e-05, "loss": 0.0147, "step": 1174 }, { "epoch": 0.22736068111455107, "grad_norm": 0.19645582139492035, "learning_rate": 9.991333989028666e-05, "loss": 0.014, "step": 1175 }, { "epoch": 0.22755417956656346, "grad_norm": 0.31249120831489563, "learning_rate": 9.991316960584698e-05, "loss": 0.0142, "step": 1176 }, { "epoch": 0.22774767801857584, "grad_norm": 0.3690617084503174, "learning_rate": 9.99129991544312e-05, "loss": 0.0146, "step": 1177 }, { "epoch": 0.22794117647058823, "grad_norm": 0.13857759535312653, "learning_rate": 9.991282853603995e-05, "loss": 0.0165, "step": 1178 }, { "epoch": 0.22813467492260062, "grad_norm": 0.4432319104671478, "learning_rate": 9.991265775067387e-05, "loss": 0.0143, "step": 1179 }, { "epoch": 0.228328173374613, "grad_norm": 0.22242823243141174, "learning_rate": 9.991248679833362e-05, "loss": 0.0122, "step": 1180 }, { "epoch": 0.2285216718266254, "grad_norm": 0.3002127408981323, "learning_rate": 9.991231567901979e-05, "loss": 0.0107, "step": 1181 }, { "epoch": 0.22871517027863777, "grad_norm": 0.47160276770591736, "learning_rate": 9.991214439273304e-05, "loss": 0.0108, "step": 1182 }, { "epoch": 0.22890866873065016, "grad_norm": 0.26244837045669556, "learning_rate": 9.991197293947401e-05, "loss": 0.0138, "step": 1183 }, { "epoch": 0.22910216718266255, "grad_norm": 0.48315224051475525, "learning_rate": 9.991180131924333e-05, "loss": 0.0125, "step": 1184 }, { "epoch": 0.22929566563467493, "grad_norm": 0.164605051279068, "learning_rate": 9.991162953204162e-05, "loss": 0.0129, "step": 1185 }, { "epoch": 0.22948916408668732, "grad_norm": 0.36324527859687805, "learning_rate": 9.991145757786956e-05, "loss": 0.0133, "step": 1186 }, { "epoch": 0.22968266253869968, "grad_norm": 0.16113488376140594, "learning_rate": 9.991128545672775e-05, "loss": 0.0128, "step": 1187 }, { "epoch": 0.22987616099071206, "grad_norm": 0.2410571128129959, "learning_rate": 9.991111316861685e-05, "loss": 0.0165, "step": 1188 }, { "epoch": 0.23006965944272445, "grad_norm": 0.1871313750743866, "learning_rate": 9.991094071353751e-05, "loss": 0.0164, "step": 1189 }, { "epoch": 0.23026315789473684, "grad_norm": 0.20030297338962555, "learning_rate": 9.991076809149036e-05, "loss": 0.0146, "step": 1190 }, { "epoch": 0.23045665634674922, "grad_norm": 0.18465293943881989, "learning_rate": 9.991059530247603e-05, "loss": 0.0137, "step": 1191 }, { "epoch": 0.2306501547987616, "grad_norm": 0.09496738016605377, "learning_rate": 9.991042234649516e-05, "loss": 0.0155, "step": 1192 }, { "epoch": 0.230843653250774, "grad_norm": 0.18745273351669312, "learning_rate": 9.991024922354844e-05, "loss": 0.0166, "step": 1193 }, { "epoch": 0.23103715170278638, "grad_norm": 0.11399532854557037, "learning_rate": 9.991007593363645e-05, "loss": 0.0132, "step": 1194 }, { "epoch": 0.23123065015479877, "grad_norm": 0.13128040730953217, "learning_rate": 9.990990247675988e-05, "loss": 0.0129, "step": 1195 }, { "epoch": 0.23142414860681115, "grad_norm": 0.13444268703460693, "learning_rate": 9.990972885291937e-05, "loss": 0.013, "step": 1196 }, { "epoch": 0.23161764705882354, "grad_norm": 0.16028156876564026, "learning_rate": 9.990955506211553e-05, "loss": 0.0151, "step": 1197 }, { "epoch": 0.23181114551083593, "grad_norm": 0.17052070796489716, "learning_rate": 9.990938110434903e-05, "loss": 0.0165, "step": 1198 }, { "epoch": 0.23200464396284828, "grad_norm": 0.16102737188339233, "learning_rate": 9.990920697962053e-05, "loss": 0.0137, "step": 1199 }, { "epoch": 0.23219814241486067, "grad_norm": 0.14628027379512787, "learning_rate": 9.990903268793065e-05, "loss": 0.017, "step": 1200 }, { "epoch": 0.23239164086687306, "grad_norm": 0.16447103023529053, "learning_rate": 9.990885822928006e-05, "loss": 0.0124, "step": 1201 }, { "epoch": 0.23258513931888544, "grad_norm": 0.1293811798095703, "learning_rate": 9.990868360366942e-05, "loss": 0.0145, "step": 1202 }, { "epoch": 0.23277863777089783, "grad_norm": 0.0939779207110405, "learning_rate": 9.990850881109934e-05, "loss": 0.0134, "step": 1203 }, { "epoch": 0.23297213622291021, "grad_norm": 0.17927850782871246, "learning_rate": 9.990833385157049e-05, "loss": 0.0139, "step": 1204 }, { "epoch": 0.2331656346749226, "grad_norm": 0.1233980655670166, "learning_rate": 9.990815872508353e-05, "loss": 0.0152, "step": 1205 }, { "epoch": 0.233359133126935, "grad_norm": 0.16544994711875916, "learning_rate": 9.990798343163908e-05, "loss": 0.0124, "step": 1206 }, { "epoch": 0.23355263157894737, "grad_norm": 0.17247414588928223, "learning_rate": 9.990780797123784e-05, "loss": 0.0139, "step": 1207 }, { "epoch": 0.23374613003095976, "grad_norm": 0.21814695000648499, "learning_rate": 9.990763234388041e-05, "loss": 0.0155, "step": 1208 }, { "epoch": 0.23393962848297215, "grad_norm": 0.11185569316148758, "learning_rate": 9.99074565495675e-05, "loss": 0.0102, "step": 1209 }, { "epoch": 0.23413312693498453, "grad_norm": 0.0854959711432457, "learning_rate": 9.990728058829971e-05, "loss": 0.0153, "step": 1210 }, { "epoch": 0.23432662538699692, "grad_norm": 0.12371832877397537, "learning_rate": 9.990710446007772e-05, "loss": 0.0132, "step": 1211 }, { "epoch": 0.23452012383900928, "grad_norm": 0.2106633484363556, "learning_rate": 9.990692816490217e-05, "loss": 0.0165, "step": 1212 }, { "epoch": 0.23471362229102166, "grad_norm": 0.17068932950496674, "learning_rate": 9.990675170277375e-05, "loss": 0.0128, "step": 1213 }, { "epoch": 0.23490712074303405, "grad_norm": 0.16474339365959167, "learning_rate": 9.990657507369308e-05, "loss": 0.0148, "step": 1214 }, { "epoch": 0.23510061919504643, "grad_norm": 0.2098543643951416, "learning_rate": 9.990639827766081e-05, "loss": 0.0139, "step": 1215 }, { "epoch": 0.23529411764705882, "grad_norm": 0.12310770153999329, "learning_rate": 9.990622131467765e-05, "loss": 0.0143, "step": 1216 }, { "epoch": 0.2354876160990712, "grad_norm": 0.15454772114753723, "learning_rate": 9.99060441847442e-05, "loss": 0.0112, "step": 1217 }, { "epoch": 0.2356811145510836, "grad_norm": 0.18792730569839478, "learning_rate": 9.990586688786115e-05, "loss": 0.013, "step": 1218 }, { "epoch": 0.23587461300309598, "grad_norm": 0.10843822360038757, "learning_rate": 9.990568942402916e-05, "loss": 0.0143, "step": 1219 }, { "epoch": 0.23606811145510836, "grad_norm": 0.17170602083206177, "learning_rate": 9.990551179324888e-05, "loss": 0.0135, "step": 1220 }, { "epoch": 0.23626160990712075, "grad_norm": 0.13218864798545837, "learning_rate": 9.990533399552096e-05, "loss": 0.0128, "step": 1221 }, { "epoch": 0.23645510835913314, "grad_norm": 0.14229007065296173, "learning_rate": 9.99051560308461e-05, "loss": 0.0134, "step": 1222 }, { "epoch": 0.23664860681114552, "grad_norm": 0.10289449244737625, "learning_rate": 9.990497789922492e-05, "loss": 0.0131, "step": 1223 }, { "epoch": 0.23684210526315788, "grad_norm": 0.08755715191364288, "learning_rate": 9.990479960065809e-05, "loss": 0.0122, "step": 1224 }, { "epoch": 0.23703560371517027, "grad_norm": 0.14442078769207, "learning_rate": 9.990462113514627e-05, "loss": 0.015, "step": 1225 }, { "epoch": 0.23722910216718265, "grad_norm": 0.2022145688533783, "learning_rate": 9.990444250269015e-05, "loss": 0.0145, "step": 1226 }, { "epoch": 0.23742260061919504, "grad_norm": 0.202696293592453, "learning_rate": 9.990426370329038e-05, "loss": 0.0131, "step": 1227 }, { "epoch": 0.23761609907120743, "grad_norm": 0.25997358560562134, "learning_rate": 9.990408473694761e-05, "loss": 0.0136, "step": 1228 }, { "epoch": 0.2378095975232198, "grad_norm": 0.2741006314754486, "learning_rate": 9.990390560366253e-05, "loss": 0.0156, "step": 1229 }, { "epoch": 0.2380030959752322, "grad_norm": 0.3104366958141327, "learning_rate": 9.990372630343578e-05, "loss": 0.0154, "step": 1230 }, { "epoch": 0.23819659442724458, "grad_norm": 0.24650435149669647, "learning_rate": 9.990354683626806e-05, "loss": 0.0149, "step": 1231 }, { "epoch": 0.23839009287925697, "grad_norm": 0.17886163294315338, "learning_rate": 9.990336720216e-05, "loss": 0.0148, "step": 1232 }, { "epoch": 0.23858359133126936, "grad_norm": 0.37174519896507263, "learning_rate": 9.99031874011123e-05, "loss": 0.016, "step": 1233 }, { "epoch": 0.23877708978328174, "grad_norm": 0.09924076497554779, "learning_rate": 9.99030074331256e-05, "loss": 0.011, "step": 1234 }, { "epoch": 0.23897058823529413, "grad_norm": 0.43289411067962646, "learning_rate": 9.990282729820059e-05, "loss": 0.0142, "step": 1235 }, { "epoch": 0.23916408668730652, "grad_norm": 0.18251527845859528, "learning_rate": 9.990264699633793e-05, "loss": 0.0107, "step": 1236 }, { "epoch": 0.23935758513931887, "grad_norm": 0.23986905813217163, "learning_rate": 9.990246652753829e-05, "loss": 0.0115, "step": 1237 }, { "epoch": 0.23955108359133126, "grad_norm": 0.386489599943161, "learning_rate": 9.990228589180234e-05, "loss": 0.0172, "step": 1238 }, { "epoch": 0.23974458204334365, "grad_norm": 0.1463913917541504, "learning_rate": 9.990210508913076e-05, "loss": 0.0144, "step": 1239 }, { "epoch": 0.23993808049535603, "grad_norm": 0.2760433554649353, "learning_rate": 9.990192411952422e-05, "loss": 0.014, "step": 1240 }, { "epoch": 0.24013157894736842, "grad_norm": 0.20455490052700043, "learning_rate": 9.99017429829834e-05, "loss": 0.0136, "step": 1241 }, { "epoch": 0.2403250773993808, "grad_norm": 0.2153535783290863, "learning_rate": 9.990156167950894e-05, "loss": 0.0154, "step": 1242 }, { "epoch": 0.2405185758513932, "grad_norm": 0.22118042409420013, "learning_rate": 9.990138020910155e-05, "loss": 0.0154, "step": 1243 }, { "epoch": 0.24071207430340558, "grad_norm": 0.1214895024895668, "learning_rate": 9.990119857176189e-05, "loss": 0.0118, "step": 1244 }, { "epoch": 0.24090557275541796, "grad_norm": 0.1936112344264984, "learning_rate": 9.990101676749065e-05, "loss": 0.0136, "step": 1245 }, { "epoch": 0.24109907120743035, "grad_norm": 0.08236906677484512, "learning_rate": 9.99008347962885e-05, "loss": 0.0145, "step": 1246 }, { "epoch": 0.24129256965944273, "grad_norm": 0.19288069009780884, "learning_rate": 9.990065265815606e-05, "loss": 0.0117, "step": 1247 }, { "epoch": 0.24148606811145512, "grad_norm": 0.13925474882125854, "learning_rate": 9.99004703530941e-05, "loss": 0.0141, "step": 1248 }, { "epoch": 0.24167956656346748, "grad_norm": 0.16871292889118195, "learning_rate": 9.990028788110325e-05, "loss": 0.014, "step": 1249 }, { "epoch": 0.24187306501547987, "grad_norm": 0.20095954835414886, "learning_rate": 9.990010524218418e-05, "loss": 0.0156, "step": 1250 }, { "epoch": 0.24206656346749225, "grad_norm": 0.09629127383232117, "learning_rate": 9.98999224363376e-05, "loss": 0.0113, "step": 1251 }, { "epoch": 0.24226006191950464, "grad_norm": 0.19336570799350739, "learning_rate": 9.989973946356417e-05, "loss": 0.0138, "step": 1252 }, { "epoch": 0.24245356037151702, "grad_norm": 0.10208187252283096, "learning_rate": 9.989955632386457e-05, "loss": 0.0121, "step": 1253 }, { "epoch": 0.2426470588235294, "grad_norm": 0.06105076149106026, "learning_rate": 9.989937301723948e-05, "loss": 0.0117, "step": 1254 }, { "epoch": 0.2428405572755418, "grad_norm": 0.08282541483640671, "learning_rate": 9.989918954368957e-05, "loss": 0.0151, "step": 1255 }, { "epoch": 0.24303405572755418, "grad_norm": 0.1701306402683258, "learning_rate": 9.989900590321555e-05, "loss": 0.015, "step": 1256 }, { "epoch": 0.24322755417956657, "grad_norm": 0.1584170013666153, "learning_rate": 9.98988220958181e-05, "loss": 0.0141, "step": 1257 }, { "epoch": 0.24342105263157895, "grad_norm": 0.20187987387180328, "learning_rate": 9.989863812149787e-05, "loss": 0.0106, "step": 1258 }, { "epoch": 0.24361455108359134, "grad_norm": 0.13863879442214966, "learning_rate": 9.989845398025558e-05, "loss": 0.0126, "step": 1259 }, { "epoch": 0.24380804953560373, "grad_norm": 0.07298299670219421, "learning_rate": 9.98982696720919e-05, "loss": 0.0132, "step": 1260 }, { "epoch": 0.24400154798761609, "grad_norm": 0.1423168033361435, "learning_rate": 9.989808519700753e-05, "loss": 0.0133, "step": 1261 }, { "epoch": 0.24419504643962847, "grad_norm": 0.08086596429347992, "learning_rate": 9.989790055500312e-05, "loss": 0.0139, "step": 1262 }, { "epoch": 0.24438854489164086, "grad_norm": 0.1296520233154297, "learning_rate": 9.989771574607938e-05, "loss": 0.0145, "step": 1263 }, { "epoch": 0.24458204334365324, "grad_norm": 0.06900708377361298, "learning_rate": 9.989753077023702e-05, "loss": 0.0156, "step": 1264 }, { "epoch": 0.24477554179566563, "grad_norm": 0.11103757470846176, "learning_rate": 9.989734562747668e-05, "loss": 0.0126, "step": 1265 }, { "epoch": 0.24496904024767802, "grad_norm": 0.08397048711776733, "learning_rate": 9.989716031779908e-05, "loss": 0.0134, "step": 1266 }, { "epoch": 0.2451625386996904, "grad_norm": 0.08225642144680023, "learning_rate": 9.98969748412049e-05, "loss": 0.011, "step": 1267 }, { "epoch": 0.2453560371517028, "grad_norm": 0.0922863557934761, "learning_rate": 9.989678919769485e-05, "loss": 0.0152, "step": 1268 }, { "epoch": 0.24554953560371517, "grad_norm": 0.062451522797346115, "learning_rate": 9.989660338726956e-05, "loss": 0.0148, "step": 1269 }, { "epoch": 0.24574303405572756, "grad_norm": 0.10207223147153854, "learning_rate": 9.989641740992979e-05, "loss": 0.0129, "step": 1270 }, { "epoch": 0.24593653250773995, "grad_norm": 0.07172605395317078, "learning_rate": 9.989623126567619e-05, "loss": 0.0116, "step": 1271 }, { "epoch": 0.24613003095975233, "grad_norm": 0.07215768843889236, "learning_rate": 9.989604495450948e-05, "loss": 0.014, "step": 1272 }, { "epoch": 0.24632352941176472, "grad_norm": 0.06950497627258301, "learning_rate": 9.989585847643032e-05, "loss": 0.0116, "step": 1273 }, { "epoch": 0.24651702786377708, "grad_norm": 0.11675609648227692, "learning_rate": 9.989567183143944e-05, "loss": 0.0149, "step": 1274 }, { "epoch": 0.24671052631578946, "grad_norm": 0.10257434099912643, "learning_rate": 9.989548501953749e-05, "loss": 0.0128, "step": 1275 }, { "epoch": 0.24690402476780185, "grad_norm": 0.15288783609867096, "learning_rate": 9.98952980407252e-05, "loss": 0.0129, "step": 1276 }, { "epoch": 0.24709752321981424, "grad_norm": 0.05741138011217117, "learning_rate": 9.989511089500327e-05, "loss": 0.0134, "step": 1277 }, { "epoch": 0.24729102167182662, "grad_norm": 0.1398114114999771, "learning_rate": 9.989492358237235e-05, "loss": 0.0136, "step": 1278 }, { "epoch": 0.247484520123839, "grad_norm": 0.09218219667673111, "learning_rate": 9.989473610283319e-05, "loss": 0.0138, "step": 1279 }, { "epoch": 0.2476780185758514, "grad_norm": 0.09962920099496841, "learning_rate": 9.989454845638646e-05, "loss": 0.0122, "step": 1280 }, { "epoch": 0.24787151702786378, "grad_norm": 0.09841171652078629, "learning_rate": 9.989436064303287e-05, "loss": 0.0161, "step": 1281 }, { "epoch": 0.24806501547987617, "grad_norm": 0.09361279755830765, "learning_rate": 9.989417266277309e-05, "loss": 0.0109, "step": 1282 }, { "epoch": 0.24825851393188855, "grad_norm": 0.16277898848056793, "learning_rate": 9.989398451560785e-05, "loss": 0.0169, "step": 1283 }, { "epoch": 0.24845201238390094, "grad_norm": 0.1403643637895584, "learning_rate": 9.989379620153782e-05, "loss": 0.0167, "step": 1284 }, { "epoch": 0.24864551083591332, "grad_norm": 0.1710033118724823, "learning_rate": 9.989360772056373e-05, "loss": 0.0143, "step": 1285 }, { "epoch": 0.24883900928792568, "grad_norm": 0.12199639528989792, "learning_rate": 9.989341907268628e-05, "loss": 0.014, "step": 1286 }, { "epoch": 0.24903250773993807, "grad_norm": 0.2135307490825653, "learning_rate": 9.989323025790614e-05, "loss": 0.0113, "step": 1287 }, { "epoch": 0.24922600619195046, "grad_norm": 0.08859959244728088, "learning_rate": 9.989304127622406e-05, "loss": 0.0126, "step": 1288 }, { "epoch": 0.24941950464396284, "grad_norm": 0.21493089199066162, "learning_rate": 9.989285212764069e-05, "loss": 0.0131, "step": 1289 }, { "epoch": 0.24961300309597523, "grad_norm": 0.11360014230012894, "learning_rate": 9.989266281215676e-05, "loss": 0.0173, "step": 1290 }, { "epoch": 0.2498065015479876, "grad_norm": 0.1553160697221756, "learning_rate": 9.989247332977297e-05, "loss": 0.0146, "step": 1291 }, { "epoch": 0.25, "grad_norm": 0.15542246401309967, "learning_rate": 9.989228368049004e-05, "loss": 0.0137, "step": 1292 }, { "epoch": 0.2501934984520124, "grad_norm": 0.11846387386322021, "learning_rate": 9.989209386430866e-05, "loss": 0.0136, "step": 1293 }, { "epoch": 0.2503869969040248, "grad_norm": 0.07401223480701447, "learning_rate": 9.989190388122952e-05, "loss": 0.0121, "step": 1294 }, { "epoch": 0.25058049535603716, "grad_norm": 0.11058290302753448, "learning_rate": 9.989171373125335e-05, "loss": 0.0116, "step": 1295 }, { "epoch": 0.25077399380804954, "grad_norm": 0.05490979924798012, "learning_rate": 9.989152341438086e-05, "loss": 0.0123, "step": 1296 }, { "epoch": 0.25096749226006193, "grad_norm": 0.128203883767128, "learning_rate": 9.989133293061274e-05, "loss": 0.0154, "step": 1297 }, { "epoch": 0.2511609907120743, "grad_norm": 0.1439015418291092, "learning_rate": 9.98911422799497e-05, "loss": 0.012, "step": 1298 }, { "epoch": 0.2513544891640867, "grad_norm": 0.11025826632976532, "learning_rate": 9.989095146239246e-05, "loss": 0.0136, "step": 1299 }, { "epoch": 0.2515479876160991, "grad_norm": 0.12905842065811157, "learning_rate": 9.989076047794172e-05, "loss": 0.0113, "step": 1300 }, { "epoch": 0.2517414860681115, "grad_norm": 0.11202433705329895, "learning_rate": 9.98905693265982e-05, "loss": 0.0137, "step": 1301 }, { "epoch": 0.25193498452012386, "grad_norm": 0.0929252952337265, "learning_rate": 9.98903780083626e-05, "loss": 0.0147, "step": 1302 }, { "epoch": 0.25212848297213625, "grad_norm": 0.18595503270626068, "learning_rate": 9.989018652323564e-05, "loss": 0.0137, "step": 1303 }, { "epoch": 0.25232198142414863, "grad_norm": 0.11897996068000793, "learning_rate": 9.988999487121803e-05, "loss": 0.0126, "step": 1304 }, { "epoch": 0.25251547987616096, "grad_norm": 0.16667552292346954, "learning_rate": 9.988980305231047e-05, "loss": 0.0117, "step": 1305 }, { "epoch": 0.25270897832817335, "grad_norm": 0.20958088338375092, "learning_rate": 9.98896110665137e-05, "loss": 0.0134, "step": 1306 }, { "epoch": 0.25290247678018574, "grad_norm": 0.16909773647785187, "learning_rate": 9.988941891382841e-05, "loss": 0.0125, "step": 1307 }, { "epoch": 0.2530959752321981, "grad_norm": 0.23166553676128387, "learning_rate": 9.98892265942553e-05, "loss": 0.0127, "step": 1308 }, { "epoch": 0.2532894736842105, "grad_norm": 0.20831595361232758, "learning_rate": 9.988903410779513e-05, "loss": 0.0118, "step": 1309 }, { "epoch": 0.2534829721362229, "grad_norm": 0.4197518825531006, "learning_rate": 9.988884145444859e-05, "loss": 0.0138, "step": 1310 }, { "epoch": 0.2536764705882353, "grad_norm": 0.14486601948738098, "learning_rate": 9.988864863421638e-05, "loss": 0.0153, "step": 1311 }, { "epoch": 0.25386996904024767, "grad_norm": 0.4153054356575012, "learning_rate": 9.988845564709926e-05, "loss": 0.0134, "step": 1312 }, { "epoch": 0.25406346749226005, "grad_norm": 0.1930963695049286, "learning_rate": 9.988826249309791e-05, "loss": 0.0139, "step": 1313 }, { "epoch": 0.25425696594427244, "grad_norm": 0.33146554231643677, "learning_rate": 9.988806917221307e-05, "loss": 0.0144, "step": 1314 }, { "epoch": 0.2544504643962848, "grad_norm": 0.2589700222015381, "learning_rate": 9.988787568444545e-05, "loss": 0.0162, "step": 1315 }, { "epoch": 0.2546439628482972, "grad_norm": 0.22194960713386536, "learning_rate": 9.988768202979575e-05, "loss": 0.0133, "step": 1316 }, { "epoch": 0.2548374613003096, "grad_norm": 0.26222607493400574, "learning_rate": 9.988748820826472e-05, "loss": 0.016, "step": 1317 }, { "epoch": 0.255030959752322, "grad_norm": 0.1252402663230896, "learning_rate": 9.988729421985307e-05, "loss": 0.0152, "step": 1318 }, { "epoch": 0.25522445820433437, "grad_norm": 0.20480628311634064, "learning_rate": 9.988710006456152e-05, "loss": 0.0134, "step": 1319 }, { "epoch": 0.25541795665634676, "grad_norm": 0.10233883559703827, "learning_rate": 9.98869057423908e-05, "loss": 0.0137, "step": 1320 }, { "epoch": 0.25561145510835914, "grad_norm": 0.22008462250232697, "learning_rate": 9.988671125334162e-05, "loss": 0.0137, "step": 1321 }, { "epoch": 0.25580495356037153, "grad_norm": 0.13868995010852814, "learning_rate": 9.98865165974147e-05, "loss": 0.0125, "step": 1322 }, { "epoch": 0.2559984520123839, "grad_norm": 0.1799471378326416, "learning_rate": 9.988632177461077e-05, "loss": 0.0153, "step": 1323 }, { "epoch": 0.2561919504643963, "grad_norm": 0.21033498644828796, "learning_rate": 9.988612678493056e-05, "loss": 0.0139, "step": 1324 }, { "epoch": 0.2563854489164087, "grad_norm": 0.09117583185434341, "learning_rate": 9.988593162837481e-05, "loss": 0.014, "step": 1325 }, { "epoch": 0.2565789473684211, "grad_norm": 0.1974211186170578, "learning_rate": 9.988573630494422e-05, "loss": 0.0103, "step": 1326 }, { "epoch": 0.25677244582043346, "grad_norm": 0.06086663156747818, "learning_rate": 9.98855408146395e-05, "loss": 0.0145, "step": 1327 }, { "epoch": 0.25696594427244585, "grad_norm": 0.1473408192396164, "learning_rate": 9.98853451574614e-05, "loss": 0.0132, "step": 1328 }, { "epoch": 0.25715944272445823, "grad_norm": 0.06521157175302505, "learning_rate": 9.988514933341066e-05, "loss": 0.0118, "step": 1329 }, { "epoch": 0.25735294117647056, "grad_norm": 0.08313033729791641, "learning_rate": 9.988495334248801e-05, "loss": 0.0116, "step": 1330 }, { "epoch": 0.25754643962848295, "grad_norm": 0.05202213674783707, "learning_rate": 9.988475718469414e-05, "loss": 0.013, "step": 1331 }, { "epoch": 0.25773993808049533, "grad_norm": 0.09626540541648865, "learning_rate": 9.988456086002981e-05, "loss": 0.0112, "step": 1332 }, { "epoch": 0.2579334365325077, "grad_norm": 0.08923303335905075, "learning_rate": 9.988436436849574e-05, "loss": 0.0134, "step": 1333 }, { "epoch": 0.2581269349845201, "grad_norm": 0.0955887958407402, "learning_rate": 9.988416771009267e-05, "loss": 0.0097, "step": 1334 }, { "epoch": 0.2583204334365325, "grad_norm": 0.08872464299201965, "learning_rate": 9.988397088482132e-05, "loss": 0.016, "step": 1335 }, { "epoch": 0.2585139318885449, "grad_norm": 0.07278778403997421, "learning_rate": 9.988377389268242e-05, "loss": 0.0137, "step": 1336 }, { "epoch": 0.25870743034055727, "grad_norm": 0.06390061229467392, "learning_rate": 9.98835767336767e-05, "loss": 0.015, "step": 1337 }, { "epoch": 0.25890092879256965, "grad_norm": 0.08376254886388779, "learning_rate": 9.988337940780493e-05, "loss": 0.0126, "step": 1338 }, { "epoch": 0.25909442724458204, "grad_norm": 0.06566376239061356, "learning_rate": 9.98831819150678e-05, "loss": 0.0137, "step": 1339 }, { "epoch": 0.2592879256965944, "grad_norm": 0.08113279938697815, "learning_rate": 9.988298425546606e-05, "loss": 0.0129, "step": 1340 }, { "epoch": 0.2594814241486068, "grad_norm": 0.10275324434041977, "learning_rate": 9.988278642900046e-05, "loss": 0.0127, "step": 1341 }, { "epoch": 0.2596749226006192, "grad_norm": 0.0562610886991024, "learning_rate": 9.988258843567169e-05, "loss": 0.0129, "step": 1342 }, { "epoch": 0.2598684210526316, "grad_norm": 0.08131901174783707, "learning_rate": 9.988239027548052e-05, "loss": 0.011, "step": 1343 }, { "epoch": 0.26006191950464397, "grad_norm": 0.05511404201388359, "learning_rate": 9.988219194842771e-05, "loss": 0.014, "step": 1344 }, { "epoch": 0.26025541795665635, "grad_norm": 0.08628266304731369, "learning_rate": 9.988199345451394e-05, "loss": 0.0159, "step": 1345 }, { "epoch": 0.26044891640866874, "grad_norm": 0.07695169001817703, "learning_rate": 9.988179479373999e-05, "loss": 0.0123, "step": 1346 }, { "epoch": 0.2606424148606811, "grad_norm": 0.05966389551758766, "learning_rate": 9.988159596610658e-05, "loss": 0.0095, "step": 1347 }, { "epoch": 0.2608359133126935, "grad_norm": 0.18046791851520538, "learning_rate": 9.988139697161446e-05, "loss": 0.0146, "step": 1348 }, { "epoch": 0.2610294117647059, "grad_norm": 0.07434944808483124, "learning_rate": 9.988119781026436e-05, "loss": 0.012, "step": 1349 }, { "epoch": 0.2612229102167183, "grad_norm": 0.2552446126937866, "learning_rate": 9.988099848205703e-05, "loss": 0.012, "step": 1350 }, { "epoch": 0.26141640866873067, "grad_norm": 0.0847959890961647, "learning_rate": 9.988079898699321e-05, "loss": 0.016, "step": 1351 }, { "epoch": 0.26160990712074306, "grad_norm": 0.309317946434021, "learning_rate": 9.988059932507363e-05, "loss": 0.0126, "step": 1352 }, { "epoch": 0.26180340557275544, "grad_norm": 0.12590478360652924, "learning_rate": 9.988039949629905e-05, "loss": 0.0167, "step": 1353 }, { "epoch": 0.26199690402476783, "grad_norm": 0.2630947530269623, "learning_rate": 9.98801995006702e-05, "loss": 0.0112, "step": 1354 }, { "epoch": 0.26219040247678016, "grad_norm": 0.1868101954460144, "learning_rate": 9.987999933818783e-05, "loss": 0.0156, "step": 1355 }, { "epoch": 0.26238390092879255, "grad_norm": 0.20300151407718658, "learning_rate": 9.987979900885268e-05, "loss": 0.0139, "step": 1356 }, { "epoch": 0.26257739938080493, "grad_norm": 0.23837895691394806, "learning_rate": 9.987959851266548e-05, "loss": 0.014, "step": 1357 }, { "epoch": 0.2627708978328173, "grad_norm": 0.20559583604335785, "learning_rate": 9.987939784962699e-05, "loss": 0.0171, "step": 1358 }, { "epoch": 0.2629643962848297, "grad_norm": 0.16599395871162415, "learning_rate": 9.987919701973798e-05, "loss": 0.0126, "step": 1359 }, { "epoch": 0.2631578947368421, "grad_norm": 0.1529269963502884, "learning_rate": 9.987899602299917e-05, "loss": 0.0144, "step": 1360 }, { "epoch": 0.2633513931888545, "grad_norm": 0.19032660126686096, "learning_rate": 9.987879485941132e-05, "loss": 0.014, "step": 1361 }, { "epoch": 0.26354489164086686, "grad_norm": 0.21960893273353577, "learning_rate": 9.987859352897514e-05, "loss": 0.0128, "step": 1362 }, { "epoch": 0.26373839009287925, "grad_norm": 0.2174445390701294, "learning_rate": 9.987839203169142e-05, "loss": 0.0142, "step": 1363 }, { "epoch": 0.26393188854489164, "grad_norm": 0.24683311581611633, "learning_rate": 9.98781903675609e-05, "loss": 0.0142, "step": 1364 }, { "epoch": 0.264125386996904, "grad_norm": 0.1749783158302307, "learning_rate": 9.987798853658433e-05, "loss": 0.0144, "step": 1365 }, { "epoch": 0.2643188854489164, "grad_norm": 0.22081513702869415, "learning_rate": 9.987778653876246e-05, "loss": 0.0123, "step": 1366 }, { "epoch": 0.2645123839009288, "grad_norm": 0.26824620366096497, "learning_rate": 9.987758437409604e-05, "loss": 0.011, "step": 1367 }, { "epoch": 0.2647058823529412, "grad_norm": 0.12246990948915482, "learning_rate": 9.987738204258579e-05, "loss": 0.0127, "step": 1368 }, { "epoch": 0.26489938080495357, "grad_norm": 0.2936118245124817, "learning_rate": 9.987717954423253e-05, "loss": 0.0148, "step": 1369 }, { "epoch": 0.26509287925696595, "grad_norm": 0.20577210187911987, "learning_rate": 9.987697687903695e-05, "loss": 0.016, "step": 1370 }, { "epoch": 0.26528637770897834, "grad_norm": 0.18605373799800873, "learning_rate": 9.987677404699983e-05, "loss": 0.0171, "step": 1371 }, { "epoch": 0.2654798761609907, "grad_norm": 0.2971231937408447, "learning_rate": 9.987657104812192e-05, "loss": 0.0151, "step": 1372 }, { "epoch": 0.2656733746130031, "grad_norm": 0.113067127764225, "learning_rate": 9.987636788240399e-05, "loss": 0.0129, "step": 1373 }, { "epoch": 0.2658668730650155, "grad_norm": 0.2424742430448532, "learning_rate": 9.987616454984679e-05, "loss": 0.0129, "step": 1374 }, { "epoch": 0.2660603715170279, "grad_norm": 0.18579216301441193, "learning_rate": 9.987596105045104e-05, "loss": 0.0141, "step": 1375 }, { "epoch": 0.26625386996904027, "grad_norm": 0.14937859773635864, "learning_rate": 9.987575738421753e-05, "loss": 0.016, "step": 1376 }, { "epoch": 0.26644736842105265, "grad_norm": 0.1479075402021408, "learning_rate": 9.987555355114703e-05, "loss": 0.0107, "step": 1377 }, { "epoch": 0.26664086687306504, "grad_norm": 0.14033140242099762, "learning_rate": 9.987534955124026e-05, "loss": 0.0171, "step": 1378 }, { "epoch": 0.26683436532507737, "grad_norm": 0.07573121786117554, "learning_rate": 9.987514538449801e-05, "loss": 0.0106, "step": 1379 }, { "epoch": 0.26702786377708976, "grad_norm": 0.11465469002723694, "learning_rate": 9.987494105092103e-05, "loss": 0.0162, "step": 1380 }, { "epoch": 0.26722136222910214, "grad_norm": 0.08194807171821594, "learning_rate": 9.987473655051006e-05, "loss": 0.0127, "step": 1381 }, { "epoch": 0.26741486068111453, "grad_norm": 0.07296275347471237, "learning_rate": 9.987453188326589e-05, "loss": 0.0127, "step": 1382 }, { "epoch": 0.2676083591331269, "grad_norm": 0.12426229566335678, "learning_rate": 9.987432704918927e-05, "loss": 0.0143, "step": 1383 }, { "epoch": 0.2678018575851393, "grad_norm": 0.07443466037511826, "learning_rate": 9.987412204828095e-05, "loss": 0.013, "step": 1384 }, { "epoch": 0.2679953560371517, "grad_norm": 0.11415376514196396, "learning_rate": 9.98739168805417e-05, "loss": 0.0111, "step": 1385 }, { "epoch": 0.2681888544891641, "grad_norm": 0.08460409194231033, "learning_rate": 9.987371154597229e-05, "loss": 0.014, "step": 1386 }, { "epoch": 0.26838235294117646, "grad_norm": 0.10812225937843323, "learning_rate": 9.987350604457347e-05, "loss": 0.0143, "step": 1387 }, { "epoch": 0.26857585139318885, "grad_norm": 0.12424807995557785, "learning_rate": 9.987330037634603e-05, "loss": 0.014, "step": 1388 }, { "epoch": 0.26876934984520123, "grad_norm": 0.0624486580491066, "learning_rate": 9.987309454129071e-05, "loss": 0.0113, "step": 1389 }, { "epoch": 0.2689628482972136, "grad_norm": 0.07767600566148758, "learning_rate": 9.987288853940828e-05, "loss": 0.0131, "step": 1390 }, { "epoch": 0.269156346749226, "grad_norm": 0.18039654195308685, "learning_rate": 9.98726823706995e-05, "loss": 0.0128, "step": 1391 }, { "epoch": 0.2693498452012384, "grad_norm": 0.10516270250082016, "learning_rate": 9.987247603516515e-05, "loss": 0.0094, "step": 1392 }, { "epoch": 0.2695433436532508, "grad_norm": 0.13016833364963531, "learning_rate": 9.987226953280598e-05, "loss": 0.0137, "step": 1393 }, { "epoch": 0.26973684210526316, "grad_norm": 0.07650524377822876, "learning_rate": 9.987206286362279e-05, "loss": 0.0115, "step": 1394 }, { "epoch": 0.26993034055727555, "grad_norm": 0.1162012368440628, "learning_rate": 9.987185602761631e-05, "loss": 0.0125, "step": 1395 }, { "epoch": 0.27012383900928794, "grad_norm": 0.10833857953548431, "learning_rate": 9.987164902478733e-05, "loss": 0.013, "step": 1396 }, { "epoch": 0.2703173374613003, "grad_norm": 0.15546981990337372, "learning_rate": 9.987144185513662e-05, "loss": 0.012, "step": 1397 }, { "epoch": 0.2705108359133127, "grad_norm": 0.10120183974504471, "learning_rate": 9.987123451866495e-05, "loss": 0.0129, "step": 1398 }, { "epoch": 0.2707043343653251, "grad_norm": 0.17898112535476685, "learning_rate": 9.987102701537306e-05, "loss": 0.014, "step": 1399 }, { "epoch": 0.2708978328173375, "grad_norm": 0.11543359607458115, "learning_rate": 9.987081934526177e-05, "loss": 0.0141, "step": 1400 }, { "epoch": 0.27109133126934987, "grad_norm": 0.18035322427749634, "learning_rate": 9.987061150833184e-05, "loss": 0.0126, "step": 1401 }, { "epoch": 0.27128482972136225, "grad_norm": 0.1766473948955536, "learning_rate": 9.987040350458403e-05, "loss": 0.0136, "step": 1402 }, { "epoch": 0.27147832817337464, "grad_norm": 0.14572496712207794, "learning_rate": 9.987019533401909e-05, "loss": 0.0128, "step": 1403 }, { "epoch": 0.27167182662538697, "grad_norm": 0.18050120770931244, "learning_rate": 9.986998699663784e-05, "loss": 0.0122, "step": 1404 }, { "epoch": 0.27186532507739936, "grad_norm": 0.1314445286989212, "learning_rate": 9.986977849244102e-05, "loss": 0.0135, "step": 1405 }, { "epoch": 0.27205882352941174, "grad_norm": 0.08606015145778656, "learning_rate": 9.986956982142944e-05, "loss": 0.0161, "step": 1406 }, { "epoch": 0.27225232198142413, "grad_norm": 0.1239871010184288, "learning_rate": 9.986936098360384e-05, "loss": 0.0106, "step": 1407 }, { "epoch": 0.2724458204334365, "grad_norm": 0.04710520803928375, "learning_rate": 9.986915197896502e-05, "loss": 0.0091, "step": 1408 }, { "epoch": 0.2726393188854489, "grad_norm": 0.10686378926038742, "learning_rate": 9.986894280751374e-05, "loss": 0.0122, "step": 1409 }, { "epoch": 0.2728328173374613, "grad_norm": 0.10982570797204971, "learning_rate": 9.98687334692508e-05, "loss": 0.0148, "step": 1410 }, { "epoch": 0.2730263157894737, "grad_norm": 0.11645462363958359, "learning_rate": 9.986852396417695e-05, "loss": 0.0138, "step": 1411 }, { "epoch": 0.27321981424148606, "grad_norm": 0.09461385011672974, "learning_rate": 9.9868314292293e-05, "loss": 0.0114, "step": 1412 }, { "epoch": 0.27341331269349844, "grad_norm": 0.12157543003559113, "learning_rate": 9.986810445359971e-05, "loss": 0.0119, "step": 1413 }, { "epoch": 0.27360681114551083, "grad_norm": 0.10575959831476212, "learning_rate": 9.986789444809785e-05, "loss": 0.0125, "step": 1414 }, { "epoch": 0.2738003095975232, "grad_norm": 0.10443951934576035, "learning_rate": 9.986768427578822e-05, "loss": 0.0119, "step": 1415 }, { "epoch": 0.2739938080495356, "grad_norm": 0.0546681173145771, "learning_rate": 9.986747393667159e-05, "loss": 0.0144, "step": 1416 }, { "epoch": 0.274187306501548, "grad_norm": 0.13293923437595367, "learning_rate": 9.986726343074877e-05, "loss": 0.0132, "step": 1417 }, { "epoch": 0.2743808049535604, "grad_norm": 0.06302051991224289, "learning_rate": 9.986705275802051e-05, "loss": 0.0121, "step": 1418 }, { "epoch": 0.27457430340557276, "grad_norm": 0.14156095683574677, "learning_rate": 9.98668419184876e-05, "loss": 0.0125, "step": 1419 }, { "epoch": 0.27476780185758515, "grad_norm": 0.10712119191884995, "learning_rate": 9.986663091215083e-05, "loss": 0.0126, "step": 1420 }, { "epoch": 0.27496130030959753, "grad_norm": 0.14298492670059204, "learning_rate": 9.986641973901097e-05, "loss": 0.0131, "step": 1421 }, { "epoch": 0.2751547987616099, "grad_norm": 0.060943931341171265, "learning_rate": 9.986620839906883e-05, "loss": 0.0136, "step": 1422 }, { "epoch": 0.2753482972136223, "grad_norm": 0.17699237167835236, "learning_rate": 9.986599689232517e-05, "loss": 0.0121, "step": 1423 }, { "epoch": 0.2755417956656347, "grad_norm": 0.06718025356531143, "learning_rate": 9.986578521878079e-05, "loss": 0.0127, "step": 1424 }, { "epoch": 0.2757352941176471, "grad_norm": 0.17978127300739288, "learning_rate": 9.98655733784365e-05, "loss": 0.0123, "step": 1425 }, { "epoch": 0.27592879256965946, "grad_norm": 0.13084451854228973, "learning_rate": 9.986536137129303e-05, "loss": 0.0095, "step": 1426 }, { "epoch": 0.27612229102167185, "grad_norm": 0.17275328934192657, "learning_rate": 9.986514919735123e-05, "loss": 0.0141, "step": 1427 }, { "epoch": 0.27631578947368424, "grad_norm": 0.21458318829536438, "learning_rate": 9.986493685661185e-05, "loss": 0.0143, "step": 1428 }, { "epoch": 0.27650928792569657, "grad_norm": 0.15761707723140717, "learning_rate": 9.986472434907568e-05, "loss": 0.0149, "step": 1429 }, { "epoch": 0.27670278637770895, "grad_norm": 0.18758173286914825, "learning_rate": 9.986451167474353e-05, "loss": 0.012, "step": 1430 }, { "epoch": 0.27689628482972134, "grad_norm": 0.11501946300268173, "learning_rate": 9.986429883361617e-05, "loss": 0.0136, "step": 1431 }, { "epoch": 0.2770897832817337, "grad_norm": 0.1738690733909607, "learning_rate": 9.98640858256944e-05, "loss": 0.0113, "step": 1432 }, { "epoch": 0.2772832817337461, "grad_norm": 0.10649122297763824, "learning_rate": 9.986387265097903e-05, "loss": 0.0125, "step": 1433 }, { "epoch": 0.2774767801857585, "grad_norm": 0.11208340525627136, "learning_rate": 9.986365930947083e-05, "loss": 0.0111, "step": 1434 }, { "epoch": 0.2776702786377709, "grad_norm": 0.1953885704278946, "learning_rate": 9.986344580117061e-05, "loss": 0.013, "step": 1435 }, { "epoch": 0.27786377708978327, "grad_norm": 0.19624385237693787, "learning_rate": 9.986323212607912e-05, "loss": 0.0151, "step": 1436 }, { "epoch": 0.27805727554179566, "grad_norm": 0.09401648491621017, "learning_rate": 9.986301828419722e-05, "loss": 0.0136, "step": 1437 }, { "epoch": 0.27825077399380804, "grad_norm": 0.07570108771324158, "learning_rate": 9.986280427552566e-05, "loss": 0.0129, "step": 1438 }, { "epoch": 0.27844427244582043, "grad_norm": 0.07545025646686554, "learning_rate": 9.986259010006526e-05, "loss": 0.0131, "step": 1439 }, { "epoch": 0.2786377708978328, "grad_norm": 0.04751734435558319, "learning_rate": 9.986237575781679e-05, "loss": 0.0107, "step": 1440 }, { "epoch": 0.2788312693498452, "grad_norm": 0.09054313600063324, "learning_rate": 9.986216124878108e-05, "loss": 0.0138, "step": 1441 }, { "epoch": 0.2790247678018576, "grad_norm": 0.06378984451293945, "learning_rate": 9.986194657295886e-05, "loss": 0.0137, "step": 1442 }, { "epoch": 0.27921826625387, "grad_norm": 0.0696721076965332, "learning_rate": 9.986173173035103e-05, "loss": 0.0164, "step": 1443 }, { "epoch": 0.27941176470588236, "grad_norm": 0.08606787025928497, "learning_rate": 9.986151672095831e-05, "loss": 0.0153, "step": 1444 }, { "epoch": 0.27960526315789475, "grad_norm": 0.0673237293958664, "learning_rate": 9.986130154478153e-05, "loss": 0.0146, "step": 1445 }, { "epoch": 0.27979876160990713, "grad_norm": 0.06419012695550919, "learning_rate": 9.986108620182149e-05, "loss": 0.012, "step": 1446 }, { "epoch": 0.2799922600619195, "grad_norm": 0.11538435518741608, "learning_rate": 9.986087069207898e-05, "loss": 0.0137, "step": 1447 }, { "epoch": 0.2801857585139319, "grad_norm": 0.06896347552537918, "learning_rate": 9.986065501555481e-05, "loss": 0.0142, "step": 1448 }, { "epoch": 0.2803792569659443, "grad_norm": 0.1421159952878952, "learning_rate": 9.986043917224978e-05, "loss": 0.0148, "step": 1449 }, { "epoch": 0.2805727554179567, "grad_norm": 0.09629291296005249, "learning_rate": 9.986022316216469e-05, "loss": 0.0123, "step": 1450 }, { "epoch": 0.28076625386996906, "grad_norm": 0.1338699460029602, "learning_rate": 9.986000698530034e-05, "loss": 0.0139, "step": 1451 }, { "epoch": 0.28095975232198145, "grad_norm": 0.08737238496541977, "learning_rate": 9.985979064165755e-05, "loss": 0.0106, "step": 1452 }, { "epoch": 0.28115325077399383, "grad_norm": 0.15774190425872803, "learning_rate": 9.98595741312371e-05, "loss": 0.0142, "step": 1453 }, { "epoch": 0.28134674922600617, "grad_norm": 0.06473331153392792, "learning_rate": 9.98593574540398e-05, "loss": 0.0121, "step": 1454 }, { "epoch": 0.28154024767801855, "grad_norm": 0.14291676878929138, "learning_rate": 9.985914061006647e-05, "loss": 0.0108, "step": 1455 }, { "epoch": 0.28173374613003094, "grad_norm": 0.039018455892801285, "learning_rate": 9.985892359931792e-05, "loss": 0.0134, "step": 1456 }, { "epoch": 0.2819272445820433, "grad_norm": 0.1089872196316719, "learning_rate": 9.985870642179494e-05, "loss": 0.0121, "step": 1457 }, { "epoch": 0.2821207430340557, "grad_norm": 0.057626403868198395, "learning_rate": 9.985848907749833e-05, "loss": 0.0127, "step": 1458 }, { "epoch": 0.2823142414860681, "grad_norm": 0.060804277658462524, "learning_rate": 9.985827156642892e-05, "loss": 0.0123, "step": 1459 }, { "epoch": 0.2825077399380805, "grad_norm": 0.050205525010824203, "learning_rate": 9.985805388858751e-05, "loss": 0.0145, "step": 1460 }, { "epoch": 0.28270123839009287, "grad_norm": 0.08638343214988708, "learning_rate": 9.985783604397491e-05, "loss": 0.0112, "step": 1461 }, { "epoch": 0.28289473684210525, "grad_norm": 0.09696147590875626, "learning_rate": 9.985761803259193e-05, "loss": 0.0114, "step": 1462 }, { "epoch": 0.28308823529411764, "grad_norm": 0.11433330923318863, "learning_rate": 9.985739985443937e-05, "loss": 0.0129, "step": 1463 }, { "epoch": 0.28328173374613, "grad_norm": 0.06079969182610512, "learning_rate": 9.985718150951806e-05, "loss": 0.0145, "step": 1464 }, { "epoch": 0.2834752321981424, "grad_norm": 0.12951384484767914, "learning_rate": 9.985696299782879e-05, "loss": 0.0145, "step": 1465 }, { "epoch": 0.2836687306501548, "grad_norm": 0.08822016417980194, "learning_rate": 9.985674431937241e-05, "loss": 0.0114, "step": 1466 }, { "epoch": 0.2838622291021672, "grad_norm": 0.12473125755786896, "learning_rate": 9.98565254741497e-05, "loss": 0.0124, "step": 1467 }, { "epoch": 0.28405572755417957, "grad_norm": 0.06492988765239716, "learning_rate": 9.985630646216146e-05, "loss": 0.0122, "step": 1468 }, { "epoch": 0.28424922600619196, "grad_norm": 0.1508677452802658, "learning_rate": 9.985608728340854e-05, "loss": 0.0125, "step": 1469 }, { "epoch": 0.28444272445820434, "grad_norm": 0.08515216410160065, "learning_rate": 9.985586793789175e-05, "loss": 0.0129, "step": 1470 }, { "epoch": 0.28463622291021673, "grad_norm": 0.12948819994926453, "learning_rate": 9.985564842561188e-05, "loss": 0.0129, "step": 1471 }, { "epoch": 0.2848297213622291, "grad_norm": 0.19712761044502258, "learning_rate": 9.985542874656975e-05, "loss": 0.0122, "step": 1472 }, { "epoch": 0.2850232198142415, "grad_norm": 0.07053713500499725, "learning_rate": 9.985520890076622e-05, "loss": 0.0115, "step": 1473 }, { "epoch": 0.2852167182662539, "grad_norm": 0.19883789122104645, "learning_rate": 9.985498888820206e-05, "loss": 0.0148, "step": 1474 }, { "epoch": 0.2854102167182663, "grad_norm": 0.0522150918841362, "learning_rate": 9.98547687088781e-05, "loss": 0.0126, "step": 1475 }, { "epoch": 0.28560371517027866, "grad_norm": 0.15589874982833862, "learning_rate": 9.985454836279518e-05, "loss": 0.0131, "step": 1476 }, { "epoch": 0.28579721362229105, "grad_norm": 0.0579456128180027, "learning_rate": 9.985432784995408e-05, "loss": 0.0151, "step": 1477 }, { "epoch": 0.28599071207430343, "grad_norm": 0.11887189000844955, "learning_rate": 9.985410717035566e-05, "loss": 0.0139, "step": 1478 }, { "epoch": 0.28618421052631576, "grad_norm": 0.07611209899187088, "learning_rate": 9.98538863240007e-05, "loss": 0.0119, "step": 1479 }, { "epoch": 0.28637770897832815, "grad_norm": 0.12107839435338974, "learning_rate": 9.985366531089006e-05, "loss": 0.0115, "step": 1480 }, { "epoch": 0.28657120743034054, "grad_norm": 0.101437509059906, "learning_rate": 9.985344413102454e-05, "loss": 0.0114, "step": 1481 }, { "epoch": 0.2867647058823529, "grad_norm": 0.1042843759059906, "learning_rate": 9.985322278440496e-05, "loss": 0.0125, "step": 1482 }, { "epoch": 0.2869582043343653, "grad_norm": 0.06698393821716309, "learning_rate": 9.985300127103217e-05, "loss": 0.012, "step": 1483 }, { "epoch": 0.2871517027863777, "grad_norm": 0.07061345130205154, "learning_rate": 9.985277959090696e-05, "loss": 0.0128, "step": 1484 }, { "epoch": 0.2873452012383901, "grad_norm": 0.10715988278388977, "learning_rate": 9.985255774403015e-05, "loss": 0.0134, "step": 1485 }, { "epoch": 0.28753869969040247, "grad_norm": 0.077539823949337, "learning_rate": 9.985233573040262e-05, "loss": 0.0157, "step": 1486 }, { "epoch": 0.28773219814241485, "grad_norm": 0.12994325160980225, "learning_rate": 9.985211355002513e-05, "loss": 0.0116, "step": 1487 }, { "epoch": 0.28792569659442724, "grad_norm": 0.150228351354599, "learning_rate": 9.985189120289854e-05, "loss": 0.0146, "step": 1488 }, { "epoch": 0.2881191950464396, "grad_norm": 0.11316511034965515, "learning_rate": 9.985166868902367e-05, "loss": 0.0128, "step": 1489 }, { "epoch": 0.288312693498452, "grad_norm": 0.11299389600753784, "learning_rate": 9.985144600840134e-05, "loss": 0.014, "step": 1490 }, { "epoch": 0.2885061919504644, "grad_norm": 0.10660313814878464, "learning_rate": 9.985122316103241e-05, "loss": 0.0123, "step": 1491 }, { "epoch": 0.2886996904024768, "grad_norm": 0.12985092401504517, "learning_rate": 9.985100014691766e-05, "loss": 0.0111, "step": 1492 }, { "epoch": 0.28889318885448917, "grad_norm": 0.05750546604394913, "learning_rate": 9.985077696605795e-05, "loss": 0.0138, "step": 1493 }, { "epoch": 0.28908668730650156, "grad_norm": 0.09903056174516678, "learning_rate": 9.98505536184541e-05, "loss": 0.0137, "step": 1494 }, { "epoch": 0.28928018575851394, "grad_norm": 0.09461762011051178, "learning_rate": 9.985033010410695e-05, "loss": 0.0136, "step": 1495 }, { "epoch": 0.2894736842105263, "grad_norm": 0.10333674401044846, "learning_rate": 9.985010642301732e-05, "loss": 0.013, "step": 1496 }, { "epoch": 0.2896671826625387, "grad_norm": 0.15052855014801025, "learning_rate": 9.984988257518604e-05, "loss": 0.0141, "step": 1497 }, { "epoch": 0.2898606811145511, "grad_norm": 0.08923124521970749, "learning_rate": 9.984965856061396e-05, "loss": 0.0138, "step": 1498 }, { "epoch": 0.2900541795665635, "grad_norm": 0.07922772318124771, "learning_rate": 9.98494343793019e-05, "loss": 0.0109, "step": 1499 }, { "epoch": 0.29024767801857587, "grad_norm": 0.06821250170469284, "learning_rate": 9.984921003125068e-05, "loss": 0.0134, "step": 1500 }, { "epoch": 0.29044117647058826, "grad_norm": 0.14068257808685303, "learning_rate": 9.984898551646117e-05, "loss": 0.0151, "step": 1501 }, { "epoch": 0.29063467492260064, "grad_norm": 0.06508544087409973, "learning_rate": 9.984876083493417e-05, "loss": 0.0151, "step": 1502 }, { "epoch": 0.29082817337461303, "grad_norm": 0.14747340977191925, "learning_rate": 9.984853598667052e-05, "loss": 0.0149, "step": 1503 }, { "epoch": 0.29102167182662536, "grad_norm": 0.08126181364059448, "learning_rate": 9.984831097167107e-05, "loss": 0.0117, "step": 1504 }, { "epoch": 0.29121517027863775, "grad_norm": 0.11027940362691879, "learning_rate": 9.984808578993666e-05, "loss": 0.012, "step": 1505 }, { "epoch": 0.29140866873065013, "grad_norm": 0.06483864784240723, "learning_rate": 9.984786044146812e-05, "loss": 0.0128, "step": 1506 }, { "epoch": 0.2916021671826625, "grad_norm": 0.09017638862133026, "learning_rate": 9.984763492626626e-05, "loss": 0.0134, "step": 1507 }, { "epoch": 0.2917956656346749, "grad_norm": 0.06458459049463272, "learning_rate": 9.984740924433197e-05, "loss": 0.0118, "step": 1508 }, { "epoch": 0.2919891640866873, "grad_norm": 0.15133289992809296, "learning_rate": 9.984718339566605e-05, "loss": 0.0134, "step": 1509 }, { "epoch": 0.2921826625386997, "grad_norm": 0.10907676070928574, "learning_rate": 9.984695738026936e-05, "loss": 0.0149, "step": 1510 }, { "epoch": 0.29237616099071206, "grad_norm": 0.13494838774204254, "learning_rate": 9.984673119814273e-05, "loss": 0.0136, "step": 1511 }, { "epoch": 0.29256965944272445, "grad_norm": 0.03926781564950943, "learning_rate": 9.9846504849287e-05, "loss": 0.0128, "step": 1512 }, { "epoch": 0.29276315789473684, "grad_norm": 0.11637963354587555, "learning_rate": 9.984627833370303e-05, "loss": 0.012, "step": 1513 }, { "epoch": 0.2929566563467492, "grad_norm": 0.07374364137649536, "learning_rate": 9.984605165139163e-05, "loss": 0.0135, "step": 1514 }, { "epoch": 0.2931501547987616, "grad_norm": 0.11789841204881668, "learning_rate": 9.984582480235366e-05, "loss": 0.0131, "step": 1515 }, { "epoch": 0.293343653250774, "grad_norm": 0.059878814965486526, "learning_rate": 9.984559778658997e-05, "loss": 0.0127, "step": 1516 }, { "epoch": 0.2935371517027864, "grad_norm": 0.11545500159263611, "learning_rate": 9.984537060410141e-05, "loss": 0.014, "step": 1517 }, { "epoch": 0.29373065015479877, "grad_norm": 0.06555278599262238, "learning_rate": 9.984514325488879e-05, "loss": 0.0116, "step": 1518 }, { "epoch": 0.29392414860681115, "grad_norm": 0.11741475760936737, "learning_rate": 9.9844915738953e-05, "loss": 0.0134, "step": 1519 }, { "epoch": 0.29411764705882354, "grad_norm": 0.08173494040966034, "learning_rate": 9.984468805629484e-05, "loss": 0.0133, "step": 1520 }, { "epoch": 0.2943111455108359, "grad_norm": 0.09283512830734253, "learning_rate": 9.984446020691521e-05, "loss": 0.0135, "step": 1521 }, { "epoch": 0.2945046439628483, "grad_norm": 0.12026666849851608, "learning_rate": 9.98442321908149e-05, "loss": 0.013, "step": 1522 }, { "epoch": 0.2946981424148607, "grad_norm": 0.05414456129074097, "learning_rate": 9.984400400799478e-05, "loss": 0.0117, "step": 1523 }, { "epoch": 0.2948916408668731, "grad_norm": 0.1245589405298233, "learning_rate": 9.984377565845572e-05, "loss": 0.0128, "step": 1524 }, { "epoch": 0.29508513931888547, "grad_norm": 0.06790738552808762, "learning_rate": 9.984354714219855e-05, "loss": 0.0117, "step": 1525 }, { "epoch": 0.29527863777089786, "grad_norm": 0.10997394472360611, "learning_rate": 9.984331845922412e-05, "loss": 0.0147, "step": 1526 }, { "epoch": 0.29547213622291024, "grad_norm": 0.17779479920864105, "learning_rate": 9.984308960953328e-05, "loss": 0.0119, "step": 1527 }, { "epoch": 0.29566563467492263, "grad_norm": 0.08529994636774063, "learning_rate": 9.984286059312688e-05, "loss": 0.0098, "step": 1528 }, { "epoch": 0.29585913312693496, "grad_norm": 0.12133757025003433, "learning_rate": 9.984263141000578e-05, "loss": 0.0117, "step": 1529 }, { "epoch": 0.29605263157894735, "grad_norm": 0.08000415563583374, "learning_rate": 9.984240206017083e-05, "loss": 0.0138, "step": 1530 }, { "epoch": 0.29624613003095973, "grad_norm": 0.14491398632526398, "learning_rate": 9.984217254362286e-05, "loss": 0.0129, "step": 1531 }, { "epoch": 0.2964396284829721, "grad_norm": 0.09083855152130127, "learning_rate": 9.984194286036276e-05, "loss": 0.0122, "step": 1532 }, { "epoch": 0.2966331269349845, "grad_norm": 0.09608635306358337, "learning_rate": 9.984171301039135e-05, "loss": 0.0149, "step": 1533 }, { "epoch": 0.2968266253869969, "grad_norm": 0.062444575130939484, "learning_rate": 9.984148299370952e-05, "loss": 0.012, "step": 1534 }, { "epoch": 0.2970201238390093, "grad_norm": 0.07559388875961304, "learning_rate": 9.984125281031809e-05, "loss": 0.0131, "step": 1535 }, { "epoch": 0.29721362229102166, "grad_norm": 0.07060492783784866, "learning_rate": 9.984102246021794e-05, "loss": 0.0112, "step": 1536 }, { "epoch": 0.29740712074303405, "grad_norm": 0.11528400331735611, "learning_rate": 9.98407919434099e-05, "loss": 0.0155, "step": 1537 }, { "epoch": 0.29760061919504643, "grad_norm": 0.1512472778558731, "learning_rate": 9.984056125989486e-05, "loss": 0.0116, "step": 1538 }, { "epoch": 0.2977941176470588, "grad_norm": 0.09428410232067108, "learning_rate": 9.984033040967367e-05, "loss": 0.0128, "step": 1539 }, { "epoch": 0.2979876160990712, "grad_norm": 0.17445889115333557, "learning_rate": 9.984009939274717e-05, "loss": 0.0112, "step": 1540 }, { "epoch": 0.2981811145510836, "grad_norm": 0.09292645007371902, "learning_rate": 9.983986820911623e-05, "loss": 0.0146, "step": 1541 }, { "epoch": 0.298374613003096, "grad_norm": 0.15541215240955353, "learning_rate": 9.98396368587817e-05, "loss": 0.0119, "step": 1542 }, { "epoch": 0.29856811145510836, "grad_norm": 0.13295689225196838, "learning_rate": 9.983940534174447e-05, "loss": 0.0134, "step": 1543 }, { "epoch": 0.29876160990712075, "grad_norm": 0.16326195001602173, "learning_rate": 9.983917365800537e-05, "loss": 0.0121, "step": 1544 }, { "epoch": 0.29895510835913314, "grad_norm": 0.1687326431274414, "learning_rate": 9.983894180756526e-05, "loss": 0.0132, "step": 1545 }, { "epoch": 0.2991486068111455, "grad_norm": 0.2104375660419464, "learning_rate": 9.983870979042502e-05, "loss": 0.0122, "step": 1546 }, { "epoch": 0.2993421052631579, "grad_norm": 0.06706080585718155, "learning_rate": 9.983847760658551e-05, "loss": 0.0136, "step": 1547 }, { "epoch": 0.2995356037151703, "grad_norm": 0.1037582978606224, "learning_rate": 9.98382452560476e-05, "loss": 0.0137, "step": 1548 }, { "epoch": 0.2997291021671827, "grad_norm": 0.09524036943912506, "learning_rate": 9.983801273881211e-05, "loss": 0.0131, "step": 1549 }, { "epoch": 0.29992260061919507, "grad_norm": 0.12417102605104446, "learning_rate": 9.983778005487997e-05, "loss": 0.0161, "step": 1550 }, { "epoch": 0.30011609907120745, "grad_norm": 0.09199677407741547, "learning_rate": 9.983754720425199e-05, "loss": 0.0127, "step": 1551 }, { "epoch": 0.30030959752321984, "grad_norm": 0.11629690229892731, "learning_rate": 9.983731418692906e-05, "loss": 0.0125, "step": 1552 }, { "epoch": 0.30050309597523217, "grad_norm": 0.11640997231006622, "learning_rate": 9.983708100291206e-05, "loss": 0.0111, "step": 1553 }, { "epoch": 0.30069659442724456, "grad_norm": 0.21497589349746704, "learning_rate": 9.983684765220182e-05, "loss": 0.0134, "step": 1554 }, { "epoch": 0.30089009287925694, "grad_norm": 0.07629744708538055, "learning_rate": 9.983661413479925e-05, "loss": 0.0127, "step": 1555 }, { "epoch": 0.30108359133126933, "grad_norm": 0.28883177042007446, "learning_rate": 9.983638045070517e-05, "loss": 0.0157, "step": 1556 }, { "epoch": 0.3012770897832817, "grad_norm": 0.10638315975666046, "learning_rate": 9.983614659992049e-05, "loss": 0.0143, "step": 1557 }, { "epoch": 0.3014705882352941, "grad_norm": 0.24680083990097046, "learning_rate": 9.983591258244608e-05, "loss": 0.0123, "step": 1558 }, { "epoch": 0.3016640866873065, "grad_norm": 0.298024982213974, "learning_rate": 9.983567839828278e-05, "loss": 0.016, "step": 1559 }, { "epoch": 0.3018575851393189, "grad_norm": 0.24669422209262848, "learning_rate": 9.983544404743147e-05, "loss": 0.0156, "step": 1560 }, { "epoch": 0.30205108359133126, "grad_norm": 0.3372911512851715, "learning_rate": 9.983520952989302e-05, "loss": 0.0138, "step": 1561 }, { "epoch": 0.30224458204334365, "grad_norm": 0.11443817615509033, "learning_rate": 9.983497484566833e-05, "loss": 0.0136, "step": 1562 }, { "epoch": 0.30243808049535603, "grad_norm": 0.37213194370269775, "learning_rate": 9.983473999475824e-05, "loss": 0.0126, "step": 1563 }, { "epoch": 0.3026315789473684, "grad_norm": 0.11752510070800781, "learning_rate": 9.983450497716365e-05, "loss": 0.013, "step": 1564 }, { "epoch": 0.3028250773993808, "grad_norm": 0.31175926327705383, "learning_rate": 9.98342697928854e-05, "loss": 0.015, "step": 1565 }, { "epoch": 0.3030185758513932, "grad_norm": 0.12938901782035828, "learning_rate": 9.983403444192439e-05, "loss": 0.0144, "step": 1566 }, { "epoch": 0.3032120743034056, "grad_norm": 0.1488756239414215, "learning_rate": 9.983379892428149e-05, "loss": 0.0116, "step": 1567 }, { "epoch": 0.30340557275541796, "grad_norm": 0.16208040714263916, "learning_rate": 9.983356323995755e-05, "loss": 0.0118, "step": 1568 }, { "epoch": 0.30359907120743035, "grad_norm": 0.11022542417049408, "learning_rate": 9.98333273889535e-05, "loss": 0.0133, "step": 1569 }, { "epoch": 0.30379256965944273, "grad_norm": 0.25861477851867676, "learning_rate": 9.983309137127016e-05, "loss": 0.0145, "step": 1570 }, { "epoch": 0.3039860681114551, "grad_norm": 0.2569458782672882, "learning_rate": 9.983285518690845e-05, "loss": 0.0119, "step": 1571 }, { "epoch": 0.3041795665634675, "grad_norm": 0.12324687093496323, "learning_rate": 9.983261883586923e-05, "loss": 0.013, "step": 1572 }, { "epoch": 0.3043730650154799, "grad_norm": 0.17832162976264954, "learning_rate": 9.983238231815339e-05, "loss": 0.0147, "step": 1573 }, { "epoch": 0.3045665634674923, "grad_norm": 0.2001314014196396, "learning_rate": 9.983214563376178e-05, "loss": 0.012, "step": 1574 }, { "epoch": 0.30476006191950467, "grad_norm": 0.15886057913303375, "learning_rate": 9.98319087826953e-05, "loss": 0.0144, "step": 1575 }, { "epoch": 0.30495356037151705, "grad_norm": 0.22994160652160645, "learning_rate": 9.983167176495482e-05, "loss": 0.0164, "step": 1576 }, { "epoch": 0.30514705882352944, "grad_norm": 0.13457316160202026, "learning_rate": 9.983143458054126e-05, "loss": 0.0147, "step": 1577 }, { "epoch": 0.30534055727554177, "grad_norm": 0.20522892475128174, "learning_rate": 9.983119722945546e-05, "loss": 0.0129, "step": 1578 }, { "epoch": 0.30553405572755415, "grad_norm": 0.10072201490402222, "learning_rate": 9.983095971169832e-05, "loss": 0.0143, "step": 1579 }, { "epoch": 0.30572755417956654, "grad_norm": 0.17335864901542664, "learning_rate": 9.98307220272707e-05, "loss": 0.0135, "step": 1580 }, { "epoch": 0.3059210526315789, "grad_norm": 0.07509196549654007, "learning_rate": 9.983048417617352e-05, "loss": 0.0133, "step": 1581 }, { "epoch": 0.3061145510835913, "grad_norm": 0.12805961072444916, "learning_rate": 9.983024615840765e-05, "loss": 0.0146, "step": 1582 }, { "epoch": 0.3063080495356037, "grad_norm": 0.09075120091438293, "learning_rate": 9.983000797397396e-05, "loss": 0.011, "step": 1583 }, { "epoch": 0.3065015479876161, "grad_norm": 0.09963192790746689, "learning_rate": 9.982976962287336e-05, "loss": 0.0122, "step": 1584 }, { "epoch": 0.30669504643962847, "grad_norm": 0.059837836772203445, "learning_rate": 9.982953110510671e-05, "loss": 0.0127, "step": 1585 }, { "epoch": 0.30688854489164086, "grad_norm": 0.08382851630449295, "learning_rate": 9.98292924206749e-05, "loss": 0.0138, "step": 1586 }, { "epoch": 0.30708204334365324, "grad_norm": 0.15795129537582397, "learning_rate": 9.982905356957886e-05, "loss": 0.0152, "step": 1587 }, { "epoch": 0.30727554179566563, "grad_norm": 0.14842553436756134, "learning_rate": 9.982881455181942e-05, "loss": 0.0124, "step": 1588 }, { "epoch": 0.307469040247678, "grad_norm": 0.1530265361070633, "learning_rate": 9.982857536739749e-05, "loss": 0.0128, "step": 1589 }, { "epoch": 0.3076625386996904, "grad_norm": 0.12834493815898895, "learning_rate": 9.982833601631397e-05, "loss": 0.0118, "step": 1590 }, { "epoch": 0.3078560371517028, "grad_norm": 0.20391890406608582, "learning_rate": 9.982809649856976e-05, "loss": 0.0142, "step": 1591 }, { "epoch": 0.3080495356037152, "grad_norm": 0.10104414075613022, "learning_rate": 9.982785681416571e-05, "loss": 0.014, "step": 1592 }, { "epoch": 0.30824303405572756, "grad_norm": 0.21709540486335754, "learning_rate": 9.982761696310274e-05, "loss": 0.0139, "step": 1593 }, { "epoch": 0.30843653250773995, "grad_norm": 0.13836683332920074, "learning_rate": 9.982737694538173e-05, "loss": 0.0142, "step": 1594 }, { "epoch": 0.30863003095975233, "grad_norm": 0.14315013587474823, "learning_rate": 9.98271367610036e-05, "loss": 0.0123, "step": 1595 }, { "epoch": 0.3088235294117647, "grad_norm": 0.21418888866901398, "learning_rate": 9.982689640996919e-05, "loss": 0.0129, "step": 1596 }, { "epoch": 0.3090170278637771, "grad_norm": 0.1261289119720459, "learning_rate": 9.982665589227946e-05, "loss": 0.0111, "step": 1597 }, { "epoch": 0.3092105263157895, "grad_norm": 0.1786692589521408, "learning_rate": 9.982641520793524e-05, "loss": 0.0109, "step": 1598 }, { "epoch": 0.3094040247678019, "grad_norm": 0.07122573256492615, "learning_rate": 9.982617435693746e-05, "loss": 0.0128, "step": 1599 }, { "epoch": 0.30959752321981426, "grad_norm": 0.15740367770195007, "learning_rate": 9.982593333928702e-05, "loss": 0.0108, "step": 1600 }, { "epoch": 0.30979102167182665, "grad_norm": 0.1654926985502243, "learning_rate": 9.982569215498479e-05, "loss": 0.012, "step": 1601 }, { "epoch": 0.30998452012383904, "grad_norm": 0.09653368592262268, "learning_rate": 9.98254508040317e-05, "loss": 0.0123, "step": 1602 }, { "epoch": 0.31017801857585137, "grad_norm": 0.15776287019252777, "learning_rate": 9.982520928642861e-05, "loss": 0.0122, "step": 1603 }, { "epoch": 0.31037151702786375, "grad_norm": 0.15279218554496765, "learning_rate": 9.982496760217645e-05, "loss": 0.0139, "step": 1604 }, { "epoch": 0.31056501547987614, "grad_norm": 0.08533375710248947, "learning_rate": 9.98247257512761e-05, "loss": 0.014, "step": 1605 }, { "epoch": 0.3107585139318885, "grad_norm": 0.2423248291015625, "learning_rate": 9.982448373372847e-05, "loss": 0.0139, "step": 1606 }, { "epoch": 0.3109520123839009, "grad_norm": 0.1704263985157013, "learning_rate": 9.982424154953446e-05, "loss": 0.0144, "step": 1607 }, { "epoch": 0.3111455108359133, "grad_norm": 0.231465682387352, "learning_rate": 9.982399919869494e-05, "loss": 0.0117, "step": 1608 }, { "epoch": 0.3113390092879257, "grad_norm": 0.27168184518814087, "learning_rate": 9.982375668121086e-05, "loss": 0.0111, "step": 1609 }, { "epoch": 0.31153250773993807, "grad_norm": 0.12391309440135956, "learning_rate": 9.982351399708308e-05, "loss": 0.013, "step": 1610 }, { "epoch": 0.31172600619195046, "grad_norm": 0.25225162506103516, "learning_rate": 9.982327114631255e-05, "loss": 0.0123, "step": 1611 }, { "epoch": 0.31191950464396284, "grad_norm": 0.06224728375673294, "learning_rate": 9.982302812890011e-05, "loss": 0.0147, "step": 1612 }, { "epoch": 0.3121130030959752, "grad_norm": 0.1494426131248474, "learning_rate": 9.982278494484672e-05, "loss": 0.0129, "step": 1613 }, { "epoch": 0.3123065015479876, "grad_norm": 0.08421433717012405, "learning_rate": 9.982254159415324e-05, "loss": 0.0148, "step": 1614 }, { "epoch": 0.3125, "grad_norm": 0.10306815803050995, "learning_rate": 9.982229807682061e-05, "loss": 0.0118, "step": 1615 }, { "epoch": 0.3126934984520124, "grad_norm": 0.15016567707061768, "learning_rate": 9.98220543928497e-05, "loss": 0.0129, "step": 1616 }, { "epoch": 0.3128869969040248, "grad_norm": 0.10684554278850555, "learning_rate": 9.982181054224146e-05, "loss": 0.0136, "step": 1617 }, { "epoch": 0.31308049535603716, "grad_norm": 0.13705287873744965, "learning_rate": 9.982156652499677e-05, "loss": 0.0117, "step": 1618 }, { "epoch": 0.31327399380804954, "grad_norm": 0.13834907114505768, "learning_rate": 9.982132234111653e-05, "loss": 0.0137, "step": 1619 }, { "epoch": 0.31346749226006193, "grad_norm": 0.13509981334209442, "learning_rate": 9.982107799060165e-05, "loss": 0.0116, "step": 1620 }, { "epoch": 0.3136609907120743, "grad_norm": 0.12342581152915955, "learning_rate": 9.982083347345306e-05, "loss": 0.0115, "step": 1621 }, { "epoch": 0.3138544891640867, "grad_norm": 0.16231539845466614, "learning_rate": 9.982058878967166e-05, "loss": 0.0136, "step": 1622 }, { "epoch": 0.3140479876160991, "grad_norm": 0.11721988022327423, "learning_rate": 9.982034393925834e-05, "loss": 0.015, "step": 1623 }, { "epoch": 0.3142414860681115, "grad_norm": 0.19203795492649078, "learning_rate": 9.982009892221402e-05, "loss": 0.0105, "step": 1624 }, { "epoch": 0.31443498452012386, "grad_norm": 0.12568697333335876, "learning_rate": 9.981985373853964e-05, "loss": 0.0145, "step": 1625 }, { "epoch": 0.31462848297213625, "grad_norm": 0.24129581451416016, "learning_rate": 9.981960838823606e-05, "loss": 0.0142, "step": 1626 }, { "epoch": 0.31482198142414863, "grad_norm": 0.11910700798034668, "learning_rate": 9.981936287130424e-05, "loss": 0.0126, "step": 1627 }, { "epoch": 0.31501547987616096, "grad_norm": 0.25425708293914795, "learning_rate": 9.981911718774506e-05, "loss": 0.0132, "step": 1628 }, { "epoch": 0.31520897832817335, "grad_norm": 0.03881537541747093, "learning_rate": 9.981887133755948e-05, "loss": 0.0128, "step": 1629 }, { "epoch": 0.31540247678018574, "grad_norm": 0.3245409429073334, "learning_rate": 9.981862532074835e-05, "loss": 0.0121, "step": 1630 }, { "epoch": 0.3155959752321981, "grad_norm": 0.0886220932006836, "learning_rate": 9.981837913731261e-05, "loss": 0.0127, "step": 1631 }, { "epoch": 0.3157894736842105, "grad_norm": 0.3060154914855957, "learning_rate": 9.98181327872532e-05, "loss": 0.0158, "step": 1632 }, { "epoch": 0.3159829721362229, "grad_norm": 0.1542864590883255, "learning_rate": 9.9817886270571e-05, "loss": 0.0125, "step": 1633 }, { "epoch": 0.3161764705882353, "grad_norm": 0.24073345959186554, "learning_rate": 9.981763958726695e-05, "loss": 0.0117, "step": 1634 }, { "epoch": 0.31636996904024767, "grad_norm": 0.19921183586120605, "learning_rate": 9.981739273734195e-05, "loss": 0.0117, "step": 1635 }, { "epoch": 0.31656346749226005, "grad_norm": 0.15820322930812836, "learning_rate": 9.981714572079693e-05, "loss": 0.0104, "step": 1636 }, { "epoch": 0.31675696594427244, "grad_norm": 0.21265080571174622, "learning_rate": 9.981689853763282e-05, "loss": 0.013, "step": 1637 }, { "epoch": 0.3169504643962848, "grad_norm": 0.09818902611732483, "learning_rate": 9.981665118785049e-05, "loss": 0.0162, "step": 1638 }, { "epoch": 0.3171439628482972, "grad_norm": 0.19596394896507263, "learning_rate": 9.981640367145093e-05, "loss": 0.0141, "step": 1639 }, { "epoch": 0.3173374613003096, "grad_norm": 0.06328592449426651, "learning_rate": 9.981615598843501e-05, "loss": 0.0113, "step": 1640 }, { "epoch": 0.317530959752322, "grad_norm": 0.1743742823600769, "learning_rate": 9.981590813880366e-05, "loss": 0.0148, "step": 1641 }, { "epoch": 0.31772445820433437, "grad_norm": 0.11699637770652771, "learning_rate": 9.981566012255783e-05, "loss": 0.0129, "step": 1642 }, { "epoch": 0.31791795665634676, "grad_norm": 0.10608460009098053, "learning_rate": 9.98154119396984e-05, "loss": 0.0118, "step": 1643 }, { "epoch": 0.31811145510835914, "grad_norm": 0.16128015518188477, "learning_rate": 9.98151635902263e-05, "loss": 0.0116, "step": 1644 }, { "epoch": 0.31830495356037153, "grad_norm": 0.0755249485373497, "learning_rate": 9.98149150741425e-05, "loss": 0.0141, "step": 1645 }, { "epoch": 0.3184984520123839, "grad_norm": 0.1761290580034256, "learning_rate": 9.981466639144786e-05, "loss": 0.0136, "step": 1646 }, { "epoch": 0.3186919504643963, "grad_norm": 0.08725931495428085, "learning_rate": 9.981441754214332e-05, "loss": 0.0139, "step": 1647 }, { "epoch": 0.3188854489164087, "grad_norm": 0.147026926279068, "learning_rate": 9.981416852622984e-05, "loss": 0.0133, "step": 1648 }, { "epoch": 0.3190789473684211, "grad_norm": 0.09999815374612808, "learning_rate": 9.981391934370831e-05, "loss": 0.0119, "step": 1649 }, { "epoch": 0.31927244582043346, "grad_norm": 0.1741681694984436, "learning_rate": 9.98136699945797e-05, "loss": 0.0129, "step": 1650 }, { "epoch": 0.31946594427244585, "grad_norm": 0.07006775587797165, "learning_rate": 9.981342047884487e-05, "loss": 0.0106, "step": 1651 }, { "epoch": 0.31965944272445823, "grad_norm": 0.14232811331748962, "learning_rate": 9.981317079650482e-05, "loss": 0.0131, "step": 1652 }, { "epoch": 0.31985294117647056, "grad_norm": 0.06709760427474976, "learning_rate": 9.981292094756043e-05, "loss": 0.0137, "step": 1653 }, { "epoch": 0.32004643962848295, "grad_norm": 0.11456294357776642, "learning_rate": 9.981267093201262e-05, "loss": 0.0143, "step": 1654 }, { "epoch": 0.32023993808049533, "grad_norm": 0.08971510082483292, "learning_rate": 9.981242074986237e-05, "loss": 0.0108, "step": 1655 }, { "epoch": 0.3204334365325077, "grad_norm": 0.1961657553911209, "learning_rate": 9.981217040111057e-05, "loss": 0.0131, "step": 1656 }, { "epoch": 0.3206269349845201, "grad_norm": 0.0902334451675415, "learning_rate": 9.981191988575817e-05, "loss": 0.0143, "step": 1657 }, { "epoch": 0.3208204334365325, "grad_norm": 0.20100733637809753, "learning_rate": 9.981166920380608e-05, "loss": 0.0131, "step": 1658 }, { "epoch": 0.3210139318885449, "grad_norm": 0.15682902932167053, "learning_rate": 9.981141835525525e-05, "loss": 0.0099, "step": 1659 }, { "epoch": 0.32120743034055727, "grad_norm": 0.3426731824874878, "learning_rate": 9.981116734010661e-05, "loss": 0.0182, "step": 1660 }, { "epoch": 0.32140092879256965, "grad_norm": 0.12986581027507782, "learning_rate": 9.981091615836108e-05, "loss": 0.0122, "step": 1661 }, { "epoch": 0.32159442724458204, "grad_norm": 0.243559792637825, "learning_rate": 9.981066481001964e-05, "loss": 0.0131, "step": 1662 }, { "epoch": 0.3217879256965944, "grad_norm": 0.133949413895607, "learning_rate": 9.981041329508316e-05, "loss": 0.0138, "step": 1663 }, { "epoch": 0.3219814241486068, "grad_norm": 0.18389850854873657, "learning_rate": 9.981016161355263e-05, "loss": 0.0118, "step": 1664 }, { "epoch": 0.3221749226006192, "grad_norm": 0.24690882861614227, "learning_rate": 9.980990976542893e-05, "loss": 0.0135, "step": 1665 }, { "epoch": 0.3223684210526316, "grad_norm": 0.22719962894916534, "learning_rate": 9.980965775071305e-05, "loss": 0.0143, "step": 1666 }, { "epoch": 0.32256191950464397, "grad_norm": 0.3316818177700043, "learning_rate": 9.980940556940589e-05, "loss": 0.0136, "step": 1667 }, { "epoch": 0.32275541795665635, "grad_norm": 0.0693027675151825, "learning_rate": 9.98091532215084e-05, "loss": 0.0129, "step": 1668 }, { "epoch": 0.32294891640866874, "grad_norm": 0.4203914403915405, "learning_rate": 9.980890070702153e-05, "loss": 0.0161, "step": 1669 }, { "epoch": 0.3231424148606811, "grad_norm": 0.19182825088500977, "learning_rate": 9.980864802594621e-05, "loss": 0.0119, "step": 1670 }, { "epoch": 0.3233359133126935, "grad_norm": 0.3202020525932312, "learning_rate": 9.980839517828337e-05, "loss": 0.0148, "step": 1671 }, { "epoch": 0.3235294117647059, "grad_norm": 0.23954428732395172, "learning_rate": 9.980814216403394e-05, "loss": 0.0118, "step": 1672 }, { "epoch": 0.3237229102167183, "grad_norm": 0.20958317816257477, "learning_rate": 9.980788898319892e-05, "loss": 0.0125, "step": 1673 }, { "epoch": 0.32391640866873067, "grad_norm": 0.17355038225650787, "learning_rate": 9.98076356357792e-05, "loss": 0.0141, "step": 1674 }, { "epoch": 0.32410990712074306, "grad_norm": 0.11532709002494812, "learning_rate": 9.980738212177571e-05, "loss": 0.0113, "step": 1675 }, { "epoch": 0.32430340557275544, "grad_norm": 0.2516854405403137, "learning_rate": 9.980712844118942e-05, "loss": 0.0118, "step": 1676 }, { "epoch": 0.32449690402476783, "grad_norm": 0.10452290624380112, "learning_rate": 9.980687459402127e-05, "loss": 0.0138, "step": 1677 }, { "epoch": 0.32469040247678016, "grad_norm": 0.32578542828559875, "learning_rate": 9.98066205802722e-05, "loss": 0.0137, "step": 1678 }, { "epoch": 0.32488390092879255, "grad_norm": 0.16265659034252167, "learning_rate": 9.980636639994317e-05, "loss": 0.0147, "step": 1679 }, { "epoch": 0.32507739938080493, "grad_norm": 0.255021333694458, "learning_rate": 9.980611205303509e-05, "loss": 0.0116, "step": 1680 }, { "epoch": 0.3252708978328173, "grad_norm": 0.30883675813674927, "learning_rate": 9.980585753954893e-05, "loss": 0.0132, "step": 1681 }, { "epoch": 0.3254643962848297, "grad_norm": 0.07707850635051727, "learning_rate": 9.980560285948562e-05, "loss": 0.0142, "step": 1682 }, { "epoch": 0.3256578947368421, "grad_norm": 0.27577662467956543, "learning_rate": 9.980534801284615e-05, "loss": 0.0123, "step": 1683 }, { "epoch": 0.3258513931888545, "grad_norm": 0.11777565628290176, "learning_rate": 9.980509299963142e-05, "loss": 0.0168, "step": 1684 }, { "epoch": 0.32604489164086686, "grad_norm": 0.18660493195056915, "learning_rate": 9.980483781984239e-05, "loss": 0.0152, "step": 1685 }, { "epoch": 0.32623839009287925, "grad_norm": 0.07806065678596497, "learning_rate": 9.980458247348001e-05, "loss": 0.0147, "step": 1686 }, { "epoch": 0.32643188854489164, "grad_norm": 0.11556188762187958, "learning_rate": 9.980432696054525e-05, "loss": 0.0086, "step": 1687 }, { "epoch": 0.326625386996904, "grad_norm": 0.10175032913684845, "learning_rate": 9.980407128103904e-05, "loss": 0.0119, "step": 1688 }, { "epoch": 0.3268188854489164, "grad_norm": 0.10024570673704147, "learning_rate": 9.980381543496232e-05, "loss": 0.0122, "step": 1689 }, { "epoch": 0.3270123839009288, "grad_norm": 0.16782152652740479, "learning_rate": 9.980355942231607e-05, "loss": 0.0124, "step": 1690 }, { "epoch": 0.3272058823529412, "grad_norm": 0.10788904130458832, "learning_rate": 9.98033032431012e-05, "loss": 0.0116, "step": 1691 }, { "epoch": 0.32739938080495357, "grad_norm": 0.32644951343536377, "learning_rate": 9.98030468973187e-05, "loss": 0.0149, "step": 1692 }, { "epoch": 0.32759287925696595, "grad_norm": 0.12322191894054413, "learning_rate": 9.980279038496953e-05, "loss": 0.0132, "step": 1693 }, { "epoch": 0.32778637770897834, "grad_norm": 0.22333800792694092, "learning_rate": 9.98025337060546e-05, "loss": 0.0118, "step": 1694 }, { "epoch": 0.3279798761609907, "grad_norm": 0.175804004073143, "learning_rate": 9.980227686057492e-05, "loss": 0.0125, "step": 1695 }, { "epoch": 0.3281733746130031, "grad_norm": 0.1723267138004303, "learning_rate": 9.980201984853138e-05, "loss": 0.0141, "step": 1696 }, { "epoch": 0.3283668730650155, "grad_norm": 0.17082956433296204, "learning_rate": 9.980176266992498e-05, "loss": 0.0113, "step": 1697 }, { "epoch": 0.3285603715170279, "grad_norm": 0.14324739575386047, "learning_rate": 9.980150532475668e-05, "loss": 0.0135, "step": 1698 }, { "epoch": 0.32875386996904027, "grad_norm": 0.3196532428264618, "learning_rate": 9.98012478130274e-05, "loss": 0.0159, "step": 1699 }, { "epoch": 0.32894736842105265, "grad_norm": 0.17627987265586853, "learning_rate": 9.980099013473814e-05, "loss": 0.0137, "step": 1700 }, { "epoch": 0.32914086687306504, "grad_norm": 0.22364479303359985, "learning_rate": 9.980073228988983e-05, "loss": 0.0114, "step": 1701 }, { "epoch": 0.32933436532507737, "grad_norm": 0.24349819123744965, "learning_rate": 9.980047427848343e-05, "loss": 0.0134, "step": 1702 }, { "epoch": 0.32952786377708976, "grad_norm": 0.09551291912794113, "learning_rate": 9.980021610051991e-05, "loss": 0.0097, "step": 1703 }, { "epoch": 0.32972136222910214, "grad_norm": 0.3417440354824066, "learning_rate": 9.979995775600024e-05, "loss": 0.0152, "step": 1704 }, { "epoch": 0.32991486068111453, "grad_norm": 0.18064472079277039, "learning_rate": 9.979969924492535e-05, "loss": 0.013, "step": 1705 }, { "epoch": 0.3301083591331269, "grad_norm": 0.21379351615905762, "learning_rate": 9.979944056729622e-05, "loss": 0.0135, "step": 1706 }, { "epoch": 0.3303018575851393, "grad_norm": 0.27888286113739014, "learning_rate": 9.97991817231138e-05, "loss": 0.0145, "step": 1707 }, { "epoch": 0.3304953560371517, "grad_norm": 0.0962265133857727, "learning_rate": 9.979892271237909e-05, "loss": 0.0109, "step": 1708 }, { "epoch": 0.3306888544891641, "grad_norm": 0.31659120321273804, "learning_rate": 9.9798663535093e-05, "loss": 0.0156, "step": 1709 }, { "epoch": 0.33088235294117646, "grad_norm": 0.161092609167099, "learning_rate": 9.979840419125652e-05, "loss": 0.0145, "step": 1710 }, { "epoch": 0.33107585139318885, "grad_norm": 0.29246607422828674, "learning_rate": 9.97981446808706e-05, "loss": 0.0148, "step": 1711 }, { "epoch": 0.33126934984520123, "grad_norm": 0.1690840721130371, "learning_rate": 9.979788500393626e-05, "loss": 0.0168, "step": 1712 }, { "epoch": 0.3314628482972136, "grad_norm": 0.2306041419506073, "learning_rate": 9.979762516045438e-05, "loss": 0.0162, "step": 1713 }, { "epoch": 0.331656346749226, "grad_norm": 0.15838010609149933, "learning_rate": 9.979736515042598e-05, "loss": 0.0131, "step": 1714 }, { "epoch": 0.3318498452012384, "grad_norm": 0.1854519248008728, "learning_rate": 9.979710497385202e-05, "loss": 0.0165, "step": 1715 }, { "epoch": 0.3320433436532508, "grad_norm": 0.21075782179832458, "learning_rate": 9.979684463073347e-05, "loss": 0.0135, "step": 1716 }, { "epoch": 0.33223684210526316, "grad_norm": 0.1274898797273636, "learning_rate": 9.979658412107126e-05, "loss": 0.0111, "step": 1717 }, { "epoch": 0.33243034055727555, "grad_norm": 0.23158541321754456, "learning_rate": 9.979632344486641e-05, "loss": 0.0158, "step": 1718 }, { "epoch": 0.33262383900928794, "grad_norm": 0.17205113172531128, "learning_rate": 9.979606260211986e-05, "loss": 0.014, "step": 1719 }, { "epoch": 0.3328173374613003, "grad_norm": 0.1710013747215271, "learning_rate": 9.97958015928326e-05, "loss": 0.012, "step": 1720 }, { "epoch": 0.3330108359133127, "grad_norm": 0.1300080120563507, "learning_rate": 9.979554041700557e-05, "loss": 0.0123, "step": 1721 }, { "epoch": 0.3332043343653251, "grad_norm": 0.11051678657531738, "learning_rate": 9.979527907463977e-05, "loss": 0.0118, "step": 1722 }, { "epoch": 0.3333978328173375, "grad_norm": 0.14245761930942535, "learning_rate": 9.979501756573615e-05, "loss": 0.0155, "step": 1723 }, { "epoch": 0.33359133126934987, "grad_norm": 0.09408383071422577, "learning_rate": 9.979475589029571e-05, "loss": 0.0153, "step": 1724 }, { "epoch": 0.33378482972136225, "grad_norm": 0.1378316432237625, "learning_rate": 9.979449404831939e-05, "loss": 0.0135, "step": 1725 }, { "epoch": 0.33397832817337464, "grad_norm": 0.08578129857778549, "learning_rate": 9.979423203980817e-05, "loss": 0.0142, "step": 1726 }, { "epoch": 0.33417182662538697, "grad_norm": 0.11588750034570694, "learning_rate": 9.979396986476307e-05, "loss": 0.0109, "step": 1727 }, { "epoch": 0.33436532507739936, "grad_norm": 0.13732150197029114, "learning_rate": 9.9793707523185e-05, "loss": 0.0136, "step": 1728 }, { "epoch": 0.33455882352941174, "grad_norm": 0.11542196571826935, "learning_rate": 9.979344501507497e-05, "loss": 0.0145, "step": 1729 }, { "epoch": 0.33475232198142413, "grad_norm": 0.12521855533123016, "learning_rate": 9.979318234043394e-05, "loss": 0.0136, "step": 1730 }, { "epoch": 0.3349458204334365, "grad_norm": 0.056097276508808136, "learning_rate": 9.97929194992629e-05, "loss": 0.0113, "step": 1731 }, { "epoch": 0.3351393188854489, "grad_norm": 0.16159524023532867, "learning_rate": 9.979265649156284e-05, "loss": 0.0131, "step": 1732 }, { "epoch": 0.3353328173374613, "grad_norm": 0.08716951310634613, "learning_rate": 9.979239331733468e-05, "loss": 0.0136, "step": 1733 }, { "epoch": 0.3355263157894737, "grad_norm": 0.13862675428390503, "learning_rate": 9.979212997657949e-05, "loss": 0.0126, "step": 1734 }, { "epoch": 0.33571981424148606, "grad_norm": 0.19687926769256592, "learning_rate": 9.979186646929816e-05, "loss": 0.0115, "step": 1735 }, { "epoch": 0.33591331269349844, "grad_norm": 0.11754707247018814, "learning_rate": 9.979160279549171e-05, "loss": 0.0119, "step": 1736 }, { "epoch": 0.33610681114551083, "grad_norm": 0.2247573286294937, "learning_rate": 9.979133895516113e-05, "loss": 0.0124, "step": 1737 }, { "epoch": 0.3363003095975232, "grad_norm": 0.136222705245018, "learning_rate": 9.979107494830739e-05, "loss": 0.0118, "step": 1738 }, { "epoch": 0.3364938080495356, "grad_norm": 0.1537436693906784, "learning_rate": 9.979081077493146e-05, "loss": 0.0138, "step": 1739 }, { "epoch": 0.336687306501548, "grad_norm": 0.1829412430524826, "learning_rate": 9.979054643503434e-05, "loss": 0.0122, "step": 1740 }, { "epoch": 0.3368808049535604, "grad_norm": 0.11141204088926315, "learning_rate": 9.9790281928617e-05, "loss": 0.0135, "step": 1741 }, { "epoch": 0.33707430340557276, "grad_norm": 0.15873666107654572, "learning_rate": 9.979001725568044e-05, "loss": 0.0112, "step": 1742 }, { "epoch": 0.33726780185758515, "grad_norm": 0.10534970462322235, "learning_rate": 9.97897524162256e-05, "loss": 0.0131, "step": 1743 }, { "epoch": 0.33746130030959753, "grad_norm": 0.1416589617729187, "learning_rate": 9.978948741025353e-05, "loss": 0.0117, "step": 1744 }, { "epoch": 0.3376547987616099, "grad_norm": 0.1712937355041504, "learning_rate": 9.978922223776517e-05, "loss": 0.0134, "step": 1745 }, { "epoch": 0.3378482972136223, "grad_norm": 0.09455063939094543, "learning_rate": 9.978895689876152e-05, "loss": 0.0134, "step": 1746 }, { "epoch": 0.3380417956656347, "grad_norm": 0.2546294033527374, "learning_rate": 9.978869139324357e-05, "loss": 0.0143, "step": 1747 }, { "epoch": 0.3382352941176471, "grad_norm": 0.10047745704650879, "learning_rate": 9.97884257212123e-05, "loss": 0.0112, "step": 1748 }, { "epoch": 0.33842879256965946, "grad_norm": 0.22922459244728088, "learning_rate": 9.97881598826687e-05, "loss": 0.0147, "step": 1749 }, { "epoch": 0.33862229102167185, "grad_norm": 0.08952988684177399, "learning_rate": 9.978789387761375e-05, "loss": 0.0121, "step": 1750 }, { "epoch": 0.33881578947368424, "grad_norm": 0.1460699439048767, "learning_rate": 9.978762770604845e-05, "loss": 0.0117, "step": 1751 }, { "epoch": 0.33900928792569657, "grad_norm": 0.10877901315689087, "learning_rate": 9.978736136797379e-05, "loss": 0.0106, "step": 1752 }, { "epoch": 0.33920278637770895, "grad_norm": 0.15860289335250854, "learning_rate": 9.978709486339074e-05, "loss": 0.0143, "step": 1753 }, { "epoch": 0.33939628482972134, "grad_norm": 0.13686296343803406, "learning_rate": 9.978682819230031e-05, "loss": 0.0122, "step": 1754 }, { "epoch": 0.3395897832817337, "grad_norm": 0.18730312585830688, "learning_rate": 9.97865613547035e-05, "loss": 0.0131, "step": 1755 }, { "epoch": 0.3397832817337461, "grad_norm": 0.13906531035900116, "learning_rate": 9.978629435060129e-05, "loss": 0.0131, "step": 1756 }, { "epoch": 0.3399767801857585, "grad_norm": 0.18043680489063263, "learning_rate": 9.978602717999467e-05, "loss": 0.0138, "step": 1757 }, { "epoch": 0.3401702786377709, "grad_norm": 0.16173896193504333, "learning_rate": 9.978575984288465e-05, "loss": 0.0134, "step": 1758 }, { "epoch": 0.34036377708978327, "grad_norm": 0.0976288691163063, "learning_rate": 9.97854923392722e-05, "loss": 0.0145, "step": 1759 }, { "epoch": 0.34055727554179566, "grad_norm": 0.15887558460235596, "learning_rate": 9.978522466915831e-05, "loss": 0.0121, "step": 1760 }, { "epoch": 0.34075077399380804, "grad_norm": 0.053234364837408066, "learning_rate": 9.978495683254402e-05, "loss": 0.0102, "step": 1761 }, { "epoch": 0.34094427244582043, "grad_norm": 0.2391861230134964, "learning_rate": 9.978468882943027e-05, "loss": 0.0093, "step": 1762 }, { "epoch": 0.3411377708978328, "grad_norm": 0.050806038081645966, "learning_rate": 9.978442065981809e-05, "loss": 0.0143, "step": 1763 }, { "epoch": 0.3413312693498452, "grad_norm": 0.18390588462352753, "learning_rate": 9.978415232370848e-05, "loss": 0.0132, "step": 1764 }, { "epoch": 0.3415247678018576, "grad_norm": 0.11996489018201828, "learning_rate": 9.978388382110241e-05, "loss": 0.0152, "step": 1765 }, { "epoch": 0.34171826625387, "grad_norm": 0.2080775797367096, "learning_rate": 9.97836151520009e-05, "loss": 0.0141, "step": 1766 }, { "epoch": 0.34191176470588236, "grad_norm": 0.08962317556142807, "learning_rate": 9.978334631640496e-05, "loss": 0.0097, "step": 1767 }, { "epoch": 0.34210526315789475, "grad_norm": 0.11369433254003525, "learning_rate": 9.978307731431556e-05, "loss": 0.0127, "step": 1768 }, { "epoch": 0.34229876160990713, "grad_norm": 0.09028492867946625, "learning_rate": 9.978280814573371e-05, "loss": 0.0116, "step": 1769 }, { "epoch": 0.3424922600619195, "grad_norm": 0.14553028345108032, "learning_rate": 9.978253881066041e-05, "loss": 0.0138, "step": 1770 }, { "epoch": 0.3426857585139319, "grad_norm": 0.08474720269441605, "learning_rate": 9.978226930909667e-05, "loss": 0.0119, "step": 1771 }, { "epoch": 0.3428792569659443, "grad_norm": 0.052595239132642746, "learning_rate": 9.978199964104348e-05, "loss": 0.0131, "step": 1772 }, { "epoch": 0.3430727554179567, "grad_norm": 0.19925057888031006, "learning_rate": 9.978172980650186e-05, "loss": 0.0131, "step": 1773 }, { "epoch": 0.34326625386996906, "grad_norm": 0.07251691818237305, "learning_rate": 9.978145980547282e-05, "loss": 0.0111, "step": 1774 }, { "epoch": 0.34345975232198145, "grad_norm": 0.1939103901386261, "learning_rate": 9.978118963795733e-05, "loss": 0.014, "step": 1775 }, { "epoch": 0.34365325077399383, "grad_norm": 0.09972553700208664, "learning_rate": 9.97809193039564e-05, "loss": 0.0112, "step": 1776 }, { "epoch": 0.34384674922600617, "grad_norm": 0.13092663884162903, "learning_rate": 9.978064880347106e-05, "loss": 0.0121, "step": 1777 }, { "epoch": 0.34404024767801855, "grad_norm": 0.10241831839084625, "learning_rate": 9.978037813650229e-05, "loss": 0.0153, "step": 1778 }, { "epoch": 0.34423374613003094, "grad_norm": 0.1307242065668106, "learning_rate": 9.978010730305113e-05, "loss": 0.0119, "step": 1779 }, { "epoch": 0.3444272445820433, "grad_norm": 0.09374504536390305, "learning_rate": 9.977983630311856e-05, "loss": 0.014, "step": 1780 }, { "epoch": 0.3446207430340557, "grad_norm": 0.15159985423088074, "learning_rate": 9.977956513670556e-05, "loss": 0.0132, "step": 1781 }, { "epoch": 0.3448142414860681, "grad_norm": 0.1037457287311554, "learning_rate": 9.977929380381321e-05, "loss": 0.0127, "step": 1782 }, { "epoch": 0.3450077399380805, "grad_norm": 0.2500961422920227, "learning_rate": 9.977902230444247e-05, "loss": 0.0116, "step": 1783 }, { "epoch": 0.34520123839009287, "grad_norm": 0.11786907911300659, "learning_rate": 9.977875063859435e-05, "loss": 0.0138, "step": 1784 }, { "epoch": 0.34539473684210525, "grad_norm": 0.1396908313035965, "learning_rate": 9.977847880626987e-05, "loss": 0.0114, "step": 1785 }, { "epoch": 0.34558823529411764, "grad_norm": 0.136434406042099, "learning_rate": 9.977820680747004e-05, "loss": 0.012, "step": 1786 }, { "epoch": 0.34578173374613, "grad_norm": 0.09329982101917267, "learning_rate": 9.977793464219588e-05, "loss": 0.0112, "step": 1787 }, { "epoch": 0.3459752321981424, "grad_norm": 0.115172378718853, "learning_rate": 9.977766231044838e-05, "loss": 0.0126, "step": 1788 }, { "epoch": 0.3461687306501548, "grad_norm": 0.10550236701965332, "learning_rate": 9.977738981222857e-05, "loss": 0.0117, "step": 1789 }, { "epoch": 0.3463622291021672, "grad_norm": 0.10121453553438187, "learning_rate": 9.977711714753746e-05, "loss": 0.0128, "step": 1790 }, { "epoch": 0.34655572755417957, "grad_norm": 0.12935379147529602, "learning_rate": 9.977684431637607e-05, "loss": 0.0122, "step": 1791 }, { "epoch": 0.34674922600619196, "grad_norm": 0.11799792945384979, "learning_rate": 9.977657131874539e-05, "loss": 0.0109, "step": 1792 }, { "epoch": 0.34694272445820434, "grad_norm": 0.107821024954319, "learning_rate": 9.977629815464645e-05, "loss": 0.0131, "step": 1793 }, { "epoch": 0.34713622291021673, "grad_norm": 0.11546541005373001, "learning_rate": 9.977602482408027e-05, "loss": 0.014, "step": 1794 }, { "epoch": 0.3473297213622291, "grad_norm": 0.12871061265468597, "learning_rate": 9.977575132704787e-05, "loss": 0.0148, "step": 1795 }, { "epoch": 0.3475232198142415, "grad_norm": 0.0961475819349289, "learning_rate": 9.977547766355026e-05, "loss": 0.0119, "step": 1796 }, { "epoch": 0.3477167182662539, "grad_norm": 0.1288011223077774, "learning_rate": 9.977520383358846e-05, "loss": 0.0152, "step": 1797 }, { "epoch": 0.3479102167182663, "grad_norm": 0.09132088720798492, "learning_rate": 9.977492983716347e-05, "loss": 0.0115, "step": 1798 }, { "epoch": 0.34810371517027866, "grad_norm": 0.1117282435297966, "learning_rate": 9.977465567427634e-05, "loss": 0.0132, "step": 1799 }, { "epoch": 0.34829721362229105, "grad_norm": 0.08315646648406982, "learning_rate": 9.977438134492806e-05, "loss": 0.0134, "step": 1800 }, { "epoch": 0.34849071207430343, "grad_norm": 0.09107047319412231, "learning_rate": 9.977410684911968e-05, "loss": 0.0137, "step": 1801 }, { "epoch": 0.34868421052631576, "grad_norm": 0.1118122786283493, "learning_rate": 9.977383218685219e-05, "loss": 0.0097, "step": 1802 }, { "epoch": 0.34887770897832815, "grad_norm": 0.14895004034042358, "learning_rate": 9.97735573581266e-05, "loss": 0.0134, "step": 1803 }, { "epoch": 0.34907120743034054, "grad_norm": 0.08048015832901001, "learning_rate": 9.9773282362944e-05, "loss": 0.0123, "step": 1804 }, { "epoch": 0.3492647058823529, "grad_norm": 0.1686326116323471, "learning_rate": 9.977300720130534e-05, "loss": 0.0157, "step": 1805 }, { "epoch": 0.3494582043343653, "grad_norm": 0.07850610464811325, "learning_rate": 9.977273187321169e-05, "loss": 0.0138, "step": 1806 }, { "epoch": 0.3496517027863777, "grad_norm": 0.15204575657844543, "learning_rate": 9.977245637866405e-05, "loss": 0.0116, "step": 1807 }, { "epoch": 0.3498452012383901, "grad_norm": 0.12490460276603699, "learning_rate": 9.977218071766344e-05, "loss": 0.0123, "step": 1808 }, { "epoch": 0.35003869969040247, "grad_norm": 0.17917238175868988, "learning_rate": 9.977190489021092e-05, "loss": 0.0124, "step": 1809 }, { "epoch": 0.35023219814241485, "grad_norm": 0.10767637938261032, "learning_rate": 9.977162889630746e-05, "loss": 0.0128, "step": 1810 }, { "epoch": 0.35042569659442724, "grad_norm": 0.12281858921051025, "learning_rate": 9.977135273595412e-05, "loss": 0.0118, "step": 1811 }, { "epoch": 0.3506191950464396, "grad_norm": 0.1179090142250061, "learning_rate": 9.977107640915193e-05, "loss": 0.0139, "step": 1812 }, { "epoch": 0.350812693498452, "grad_norm": 0.08168457448482513, "learning_rate": 9.977079991590191e-05, "loss": 0.0104, "step": 1813 }, { "epoch": 0.3510061919504644, "grad_norm": 0.12374882400035858, "learning_rate": 9.977052325620509e-05, "loss": 0.0116, "step": 1814 }, { "epoch": 0.3511996904024768, "grad_norm": 0.07836401462554932, "learning_rate": 9.97702464300625e-05, "loss": 0.0126, "step": 1815 }, { "epoch": 0.35139318885448917, "grad_norm": 0.07324644178152084, "learning_rate": 9.976996943747515e-05, "loss": 0.0133, "step": 1816 }, { "epoch": 0.35158668730650156, "grad_norm": 0.06731598824262619, "learning_rate": 9.976969227844411e-05, "loss": 0.0119, "step": 1817 }, { "epoch": 0.35178018575851394, "grad_norm": 0.07311508804559708, "learning_rate": 9.976941495297035e-05, "loss": 0.0115, "step": 1818 }, { "epoch": 0.3519736842105263, "grad_norm": 0.08189289271831512, "learning_rate": 9.976913746105496e-05, "loss": 0.0101, "step": 1819 }, { "epoch": 0.3521671826625387, "grad_norm": 0.12200301140546799, "learning_rate": 9.976885980269894e-05, "loss": 0.0131, "step": 1820 }, { "epoch": 0.3523606811145511, "grad_norm": 0.1801607310771942, "learning_rate": 9.976858197790333e-05, "loss": 0.0097, "step": 1821 }, { "epoch": 0.3525541795665635, "grad_norm": 0.13362224400043488, "learning_rate": 9.976830398666918e-05, "loss": 0.0124, "step": 1822 }, { "epoch": 0.35274767801857587, "grad_norm": 0.18772336840629578, "learning_rate": 9.976802582899748e-05, "loss": 0.0124, "step": 1823 }, { "epoch": 0.35294117647058826, "grad_norm": 0.241125226020813, "learning_rate": 9.976774750488932e-05, "loss": 0.0137, "step": 1824 }, { "epoch": 0.35313467492260064, "grad_norm": 0.16243010759353638, "learning_rate": 9.976746901434568e-05, "loss": 0.0113, "step": 1825 }, { "epoch": 0.35332817337461303, "grad_norm": 0.20738856494426727, "learning_rate": 9.976719035736764e-05, "loss": 0.0115, "step": 1826 }, { "epoch": 0.35352167182662536, "grad_norm": 0.09427265077829361, "learning_rate": 9.97669115339562e-05, "loss": 0.0113, "step": 1827 }, { "epoch": 0.35371517027863775, "grad_norm": 0.25575554370880127, "learning_rate": 9.976663254411243e-05, "loss": 0.014, "step": 1828 }, { "epoch": 0.35390866873065013, "grad_norm": 0.11017905175685883, "learning_rate": 9.976635338783734e-05, "loss": 0.0152, "step": 1829 }, { "epoch": 0.3541021671826625, "grad_norm": 0.26043686270713806, "learning_rate": 9.9766074065132e-05, "loss": 0.0133, "step": 1830 }, { "epoch": 0.3542956656346749, "grad_norm": 0.11417850106954575, "learning_rate": 9.976579457599741e-05, "loss": 0.0122, "step": 1831 }, { "epoch": 0.3544891640866873, "grad_norm": 0.20825336873531342, "learning_rate": 9.976551492043461e-05, "loss": 0.0134, "step": 1832 }, { "epoch": 0.3546826625386997, "grad_norm": 0.22329308092594147, "learning_rate": 9.976523509844469e-05, "loss": 0.0153, "step": 1833 }, { "epoch": 0.35487616099071206, "grad_norm": 0.2260550856590271, "learning_rate": 9.976495511002864e-05, "loss": 0.0112, "step": 1834 }, { "epoch": 0.35506965944272445, "grad_norm": 0.26626893877983093, "learning_rate": 9.976467495518751e-05, "loss": 0.0159, "step": 1835 }, { "epoch": 0.35526315789473684, "grad_norm": 0.19404900074005127, "learning_rate": 9.976439463392236e-05, "loss": 0.0141, "step": 1836 }, { "epoch": 0.3554566563467492, "grad_norm": 0.2756352424621582, "learning_rate": 9.976411414623422e-05, "loss": 0.0138, "step": 1837 }, { "epoch": 0.3556501547987616, "grad_norm": 0.11895760148763657, "learning_rate": 9.976383349212413e-05, "loss": 0.0151, "step": 1838 }, { "epoch": 0.355843653250774, "grad_norm": 0.27223503589630127, "learning_rate": 9.976355267159313e-05, "loss": 0.0149, "step": 1839 }, { "epoch": 0.3560371517027864, "grad_norm": 0.10971100628376007, "learning_rate": 9.976327168464227e-05, "loss": 0.012, "step": 1840 }, { "epoch": 0.35623065015479877, "grad_norm": 0.20889680087566376, "learning_rate": 9.97629905312726e-05, "loss": 0.0161, "step": 1841 }, { "epoch": 0.35642414860681115, "grad_norm": 0.14252761006355286, "learning_rate": 9.976270921148518e-05, "loss": 0.0128, "step": 1842 }, { "epoch": 0.35661764705882354, "grad_norm": 0.09030670672655106, "learning_rate": 9.976242772528101e-05, "loss": 0.0116, "step": 1843 }, { "epoch": 0.3568111455108359, "grad_norm": 0.18289291858673096, "learning_rate": 9.97621460726612e-05, "loss": 0.0126, "step": 1844 }, { "epoch": 0.3570046439628483, "grad_norm": 0.08689071238040924, "learning_rate": 9.976186425362672e-05, "loss": 0.0143, "step": 1845 }, { "epoch": 0.3571981424148607, "grad_norm": 0.22622239589691162, "learning_rate": 9.976158226817868e-05, "loss": 0.0107, "step": 1846 }, { "epoch": 0.3573916408668731, "grad_norm": 0.10276836156845093, "learning_rate": 9.97613001163181e-05, "loss": 0.0092, "step": 1847 }, { "epoch": 0.35758513931888547, "grad_norm": 0.21320569515228271, "learning_rate": 9.976101779804603e-05, "loss": 0.0152, "step": 1848 }, { "epoch": 0.35777863777089786, "grad_norm": 0.07537335157394409, "learning_rate": 9.976073531336355e-05, "loss": 0.0124, "step": 1849 }, { "epoch": 0.35797213622291024, "grad_norm": 0.13280099630355835, "learning_rate": 9.976045266227165e-05, "loss": 0.0103, "step": 1850 }, { "epoch": 0.35816563467492263, "grad_norm": 0.07124726474285126, "learning_rate": 9.976016984477143e-05, "loss": 0.0137, "step": 1851 }, { "epoch": 0.35835913312693496, "grad_norm": 0.10747521370649338, "learning_rate": 9.975988686086395e-05, "loss": 0.0142, "step": 1852 }, { "epoch": 0.35855263157894735, "grad_norm": 0.11215991526842117, "learning_rate": 9.975960371055022e-05, "loss": 0.0117, "step": 1853 }, { "epoch": 0.35874613003095973, "grad_norm": 0.08831890672445297, "learning_rate": 9.975932039383132e-05, "loss": 0.0115, "step": 1854 }, { "epoch": 0.3589396284829721, "grad_norm": 0.119183249771595, "learning_rate": 9.975903691070827e-05, "loss": 0.0114, "step": 1855 }, { "epoch": 0.3591331269349845, "grad_norm": 0.04710770398378372, "learning_rate": 9.975875326118219e-05, "loss": 0.0113, "step": 1856 }, { "epoch": 0.3593266253869969, "grad_norm": 0.11522042751312256, "learning_rate": 9.975846944525406e-05, "loss": 0.0126, "step": 1857 }, { "epoch": 0.3595201238390093, "grad_norm": 0.13674509525299072, "learning_rate": 9.975818546292498e-05, "loss": 0.0116, "step": 1858 }, { "epoch": 0.35971362229102166, "grad_norm": 0.08206430822610855, "learning_rate": 9.975790131419601e-05, "loss": 0.0114, "step": 1859 }, { "epoch": 0.35990712074303405, "grad_norm": 0.22576725482940674, "learning_rate": 9.975761699906817e-05, "loss": 0.0131, "step": 1860 }, { "epoch": 0.36010061919504643, "grad_norm": 0.06613728404045105, "learning_rate": 9.975733251754255e-05, "loss": 0.014, "step": 1861 }, { "epoch": 0.3602941176470588, "grad_norm": 0.20346051454544067, "learning_rate": 9.97570478696202e-05, "loss": 0.0136, "step": 1862 }, { "epoch": 0.3604876160990712, "grad_norm": 0.06747918576002121, "learning_rate": 9.975676305530217e-05, "loss": 0.0098, "step": 1863 }, { "epoch": 0.3606811145510836, "grad_norm": 0.12793217599391937, "learning_rate": 9.975647807458952e-05, "loss": 0.0137, "step": 1864 }, { "epoch": 0.360874613003096, "grad_norm": 0.09186074882745743, "learning_rate": 9.975619292748331e-05, "loss": 0.0109, "step": 1865 }, { "epoch": 0.36106811145510836, "grad_norm": 0.04272010549902916, "learning_rate": 9.975590761398462e-05, "loss": 0.0116, "step": 1866 }, { "epoch": 0.36126160990712075, "grad_norm": 0.10025916993618011, "learning_rate": 9.975562213409447e-05, "loss": 0.0115, "step": 1867 }, { "epoch": 0.36145510835913314, "grad_norm": 0.0653059110045433, "learning_rate": 9.975533648781395e-05, "loss": 0.0132, "step": 1868 }, { "epoch": 0.3616486068111455, "grad_norm": 0.13441622257232666, "learning_rate": 9.975505067514412e-05, "loss": 0.0124, "step": 1869 }, { "epoch": 0.3618421052631579, "grad_norm": 0.04762616753578186, "learning_rate": 9.975476469608606e-05, "loss": 0.0124, "step": 1870 }, { "epoch": 0.3620356037151703, "grad_norm": 0.06862585991621017, "learning_rate": 9.975447855064079e-05, "loss": 0.0132, "step": 1871 }, { "epoch": 0.3622291021671827, "grad_norm": 0.0591282844543457, "learning_rate": 9.97541922388094e-05, "loss": 0.0126, "step": 1872 }, { "epoch": 0.36242260061919507, "grad_norm": 0.17698509991168976, "learning_rate": 9.975390576059296e-05, "loss": 0.0138, "step": 1873 }, { "epoch": 0.36261609907120745, "grad_norm": 0.1392662227153778, "learning_rate": 9.975361911599251e-05, "loss": 0.0127, "step": 1874 }, { "epoch": 0.36280959752321984, "grad_norm": 0.11574394255876541, "learning_rate": 9.975333230500914e-05, "loss": 0.0094, "step": 1875 }, { "epoch": 0.36300309597523217, "grad_norm": 0.17407575249671936, "learning_rate": 9.97530453276439e-05, "loss": 0.0154, "step": 1876 }, { "epoch": 0.36319659442724456, "grad_norm": 0.15721024572849274, "learning_rate": 9.975275818389788e-05, "loss": 0.0122, "step": 1877 }, { "epoch": 0.36339009287925694, "grad_norm": 0.24523288011550903, "learning_rate": 9.975247087377213e-05, "loss": 0.0125, "step": 1878 }, { "epoch": 0.36358359133126933, "grad_norm": 0.15044555068016052, "learning_rate": 9.975218339726771e-05, "loss": 0.013, "step": 1879 }, { "epoch": 0.3637770897832817, "grad_norm": 0.21300843358039856, "learning_rate": 9.975189575438572e-05, "loss": 0.0094, "step": 1880 }, { "epoch": 0.3639705882352941, "grad_norm": 0.06661459803581238, "learning_rate": 9.975160794512718e-05, "loss": 0.0123, "step": 1881 }, { "epoch": 0.3641640866873065, "grad_norm": 0.3692232072353363, "learning_rate": 9.975131996949322e-05, "loss": 0.013, "step": 1882 }, { "epoch": 0.3643575851393189, "grad_norm": 0.0779041051864624, "learning_rate": 9.975103182748485e-05, "loss": 0.0141, "step": 1883 }, { "epoch": 0.36455108359133126, "grad_norm": 0.31598377227783203, "learning_rate": 9.975074351910319e-05, "loss": 0.0139, "step": 1884 }, { "epoch": 0.36474458204334365, "grad_norm": 0.18056510388851166, "learning_rate": 9.975045504434929e-05, "loss": 0.0096, "step": 1885 }, { "epoch": 0.36493808049535603, "grad_norm": 0.2753310799598694, "learning_rate": 9.97501664032242e-05, "loss": 0.0135, "step": 1886 }, { "epoch": 0.3651315789473684, "grad_norm": 0.2800087034702301, "learning_rate": 9.974987759572905e-05, "loss": 0.0144, "step": 1887 }, { "epoch": 0.3653250773993808, "grad_norm": 0.15733473002910614, "learning_rate": 9.974958862186487e-05, "loss": 0.0147, "step": 1888 }, { "epoch": 0.3655185758513932, "grad_norm": 0.31540894508361816, "learning_rate": 9.974929948163275e-05, "loss": 0.0122, "step": 1889 }, { "epoch": 0.3657120743034056, "grad_norm": 0.08301012217998505, "learning_rate": 9.974901017503376e-05, "loss": 0.0116, "step": 1890 }, { "epoch": 0.36590557275541796, "grad_norm": 0.27287280559539795, "learning_rate": 9.974872070206897e-05, "loss": 0.0126, "step": 1891 }, { "epoch": 0.36609907120743035, "grad_norm": 0.12464970350265503, "learning_rate": 9.974843106273945e-05, "loss": 0.0126, "step": 1892 }, { "epoch": 0.36629256965944273, "grad_norm": 0.19445988535881042, "learning_rate": 9.974814125704631e-05, "loss": 0.0147, "step": 1893 }, { "epoch": 0.3664860681114551, "grad_norm": 0.20250976085662842, "learning_rate": 9.97478512849906e-05, "loss": 0.0127, "step": 1894 }, { "epoch": 0.3666795665634675, "grad_norm": 0.16487891972064972, "learning_rate": 9.974756114657341e-05, "loss": 0.013, "step": 1895 }, { "epoch": 0.3668730650154799, "grad_norm": 0.13452041149139404, "learning_rate": 9.974727084179581e-05, "loss": 0.0129, "step": 1896 }, { "epoch": 0.3670665634674923, "grad_norm": 0.08038463443517685, "learning_rate": 9.97469803706589e-05, "loss": 0.014, "step": 1897 }, { "epoch": 0.36726006191950467, "grad_norm": 0.12070489674806595, "learning_rate": 9.974668973316372e-05, "loss": 0.0118, "step": 1898 }, { "epoch": 0.36745356037151705, "grad_norm": 0.08729046583175659, "learning_rate": 9.974639892931138e-05, "loss": 0.0124, "step": 1899 }, { "epoch": 0.36764705882352944, "grad_norm": 0.12859511375427246, "learning_rate": 9.974610795910295e-05, "loss": 0.0127, "step": 1900 }, { "epoch": 0.36784055727554177, "grad_norm": 0.10214626044034958, "learning_rate": 9.974581682253954e-05, "loss": 0.0128, "step": 1901 }, { "epoch": 0.36803405572755415, "grad_norm": 0.0912294089794159, "learning_rate": 9.974552551962219e-05, "loss": 0.0122, "step": 1902 }, { "epoch": 0.36822755417956654, "grad_norm": 0.10738855600357056, "learning_rate": 9.974523405035199e-05, "loss": 0.0098, "step": 1903 }, { "epoch": 0.3684210526315789, "grad_norm": 0.1033361479640007, "learning_rate": 9.974494241473006e-05, "loss": 0.0113, "step": 1904 }, { "epoch": 0.3686145510835913, "grad_norm": 0.11160436272621155, "learning_rate": 9.974465061275743e-05, "loss": 0.0117, "step": 1905 }, { "epoch": 0.3688080495356037, "grad_norm": 0.1597677767276764, "learning_rate": 9.974435864443525e-05, "loss": 0.0129, "step": 1906 }, { "epoch": 0.3690015479876161, "grad_norm": 0.07542756199836731, "learning_rate": 9.974406650976455e-05, "loss": 0.0135, "step": 1907 }, { "epoch": 0.36919504643962847, "grad_norm": 0.15295523405075073, "learning_rate": 9.974377420874643e-05, "loss": 0.0106, "step": 1908 }, { "epoch": 0.36938854489164086, "grad_norm": 0.16603276133537292, "learning_rate": 9.9743481741382e-05, "loss": 0.0128, "step": 1909 }, { "epoch": 0.36958204334365324, "grad_norm": 0.07618916779756546, "learning_rate": 9.974318910767232e-05, "loss": 0.0131, "step": 1910 }, { "epoch": 0.36977554179566563, "grad_norm": 0.1366998702287674, "learning_rate": 9.974289630761848e-05, "loss": 0.0131, "step": 1911 }, { "epoch": 0.369969040247678, "grad_norm": 0.09841887652873993, "learning_rate": 9.974260334122158e-05, "loss": 0.0165, "step": 1912 }, { "epoch": 0.3701625386996904, "grad_norm": 0.06628848612308502, "learning_rate": 9.974231020848272e-05, "loss": 0.0101, "step": 1913 }, { "epoch": 0.3703560371517028, "grad_norm": 0.13053742051124573, "learning_rate": 9.974201690940296e-05, "loss": 0.0133, "step": 1914 }, { "epoch": 0.3705495356037152, "grad_norm": 0.0762704387307167, "learning_rate": 9.974172344398341e-05, "loss": 0.0134, "step": 1915 }, { "epoch": 0.37074303405572756, "grad_norm": 0.12164950370788574, "learning_rate": 9.974142981222516e-05, "loss": 0.0124, "step": 1916 }, { "epoch": 0.37093653250773995, "grad_norm": 0.10399528592824936, "learning_rate": 9.974113601412931e-05, "loss": 0.0127, "step": 1917 }, { "epoch": 0.37113003095975233, "grad_norm": 0.09781540185213089, "learning_rate": 9.974084204969691e-05, "loss": 0.0134, "step": 1918 }, { "epoch": 0.3713235294117647, "grad_norm": 0.09729351103305817, "learning_rate": 9.97405479189291e-05, "loss": 0.0136, "step": 1919 }, { "epoch": 0.3715170278637771, "grad_norm": 0.12210115045309067, "learning_rate": 9.974025362182697e-05, "loss": 0.0128, "step": 1920 }, { "epoch": 0.3717105263157895, "grad_norm": 0.08026482164859772, "learning_rate": 9.973995915839159e-05, "loss": 0.0117, "step": 1921 }, { "epoch": 0.3719040247678019, "grad_norm": 0.10512084513902664, "learning_rate": 9.973966452862406e-05, "loss": 0.0132, "step": 1922 }, { "epoch": 0.37209752321981426, "grad_norm": 0.0894240066409111, "learning_rate": 9.97393697325255e-05, "loss": 0.0145, "step": 1923 }, { "epoch": 0.37229102167182665, "grad_norm": 0.13804826140403748, "learning_rate": 9.973907477009696e-05, "loss": 0.0123, "step": 1924 }, { "epoch": 0.37248452012383904, "grad_norm": 0.08127070963382721, "learning_rate": 9.973877964133959e-05, "loss": 0.0132, "step": 1925 }, { "epoch": 0.37267801857585137, "grad_norm": 0.13892318308353424, "learning_rate": 9.973848434625444e-05, "loss": 0.0117, "step": 1926 }, { "epoch": 0.37287151702786375, "grad_norm": 0.05586814507842064, "learning_rate": 9.973818888484265e-05, "loss": 0.0114, "step": 1927 }, { "epoch": 0.37306501547987614, "grad_norm": 0.14499960839748383, "learning_rate": 9.973789325710528e-05, "loss": 0.0123, "step": 1928 }, { "epoch": 0.3732585139318885, "grad_norm": 0.07833931595087051, "learning_rate": 9.973759746304345e-05, "loss": 0.0138, "step": 1929 }, { "epoch": 0.3734520123839009, "grad_norm": 0.12335377186536789, "learning_rate": 9.973730150265826e-05, "loss": 0.0127, "step": 1930 }, { "epoch": 0.3736455108359133, "grad_norm": 0.21510721743106842, "learning_rate": 9.973700537595081e-05, "loss": 0.0122, "step": 1931 }, { "epoch": 0.3738390092879257, "grad_norm": 0.20869329571723938, "learning_rate": 9.97367090829222e-05, "loss": 0.014, "step": 1932 }, { "epoch": 0.37403250773993807, "grad_norm": 0.2757473587989807, "learning_rate": 9.97364126235735e-05, "loss": 0.0139, "step": 1933 }, { "epoch": 0.37422600619195046, "grad_norm": 0.19746440649032593, "learning_rate": 9.973611599790588e-05, "loss": 0.0128, "step": 1934 }, { "epoch": 0.37441950464396284, "grad_norm": 0.16707487404346466, "learning_rate": 9.973581920592039e-05, "loss": 0.0123, "step": 1935 }, { "epoch": 0.3746130030959752, "grad_norm": 0.057528894394636154, "learning_rate": 9.973552224761814e-05, "loss": 0.0112, "step": 1936 }, { "epoch": 0.3748065015479876, "grad_norm": 0.121848925948143, "learning_rate": 9.973522512300026e-05, "loss": 0.0128, "step": 1937 }, { "epoch": 0.375, "grad_norm": 0.11312764883041382, "learning_rate": 9.973492783206779e-05, "loss": 0.0129, "step": 1938 }, { "epoch": 0.3751934984520124, "grad_norm": 0.09063243120908737, "learning_rate": 9.973463037482193e-05, "loss": 0.0114, "step": 1939 }, { "epoch": 0.3753869969040248, "grad_norm": 0.1307225078344345, "learning_rate": 9.973433275126372e-05, "loss": 0.012, "step": 1940 }, { "epoch": 0.37558049535603716, "grad_norm": 0.06559816002845764, "learning_rate": 9.973403496139428e-05, "loss": 0.0137, "step": 1941 }, { "epoch": 0.37577399380804954, "grad_norm": 0.13794434070587158, "learning_rate": 9.973373700521474e-05, "loss": 0.0128, "step": 1942 }, { "epoch": 0.37596749226006193, "grad_norm": 0.11357332020998001, "learning_rate": 9.973343888272616e-05, "loss": 0.0131, "step": 1943 }, { "epoch": 0.3761609907120743, "grad_norm": 0.1340329945087433, "learning_rate": 9.97331405939297e-05, "loss": 0.011, "step": 1944 }, { "epoch": 0.3763544891640867, "grad_norm": 0.08923814445734024, "learning_rate": 9.973284213882642e-05, "loss": 0.0086, "step": 1945 }, { "epoch": 0.3765479876160991, "grad_norm": 0.11135523021221161, "learning_rate": 9.973254351741747e-05, "loss": 0.0105, "step": 1946 }, { "epoch": 0.3767414860681115, "grad_norm": 0.10027367621660233, "learning_rate": 9.973224472970395e-05, "loss": 0.0115, "step": 1947 }, { "epoch": 0.37693498452012386, "grad_norm": 0.10031553357839584, "learning_rate": 9.973194577568696e-05, "loss": 0.0143, "step": 1948 }, { "epoch": 0.37712848297213625, "grad_norm": 0.13845868408679962, "learning_rate": 9.973164665536764e-05, "loss": 0.0159, "step": 1949 }, { "epoch": 0.37732198142414863, "grad_norm": 0.11786256730556488, "learning_rate": 9.973134736874705e-05, "loss": 0.0099, "step": 1950 }, { "epoch": 0.37751547987616096, "grad_norm": 0.0953511968255043, "learning_rate": 9.973104791582635e-05, "loss": 0.0139, "step": 1951 }, { "epoch": 0.37770897832817335, "grad_norm": 0.1410224586725235, "learning_rate": 9.973074829660663e-05, "loss": 0.0121, "step": 1952 }, { "epoch": 0.37790247678018574, "grad_norm": 0.08897356688976288, "learning_rate": 9.973044851108901e-05, "loss": 0.0135, "step": 1953 }, { "epoch": 0.3780959752321981, "grad_norm": 0.12175490707159042, "learning_rate": 9.973014855927459e-05, "loss": 0.011, "step": 1954 }, { "epoch": 0.3782894736842105, "grad_norm": 0.1008831039071083, "learning_rate": 9.972984844116451e-05, "loss": 0.0127, "step": 1955 }, { "epoch": 0.3784829721362229, "grad_norm": 0.046509869396686554, "learning_rate": 9.972954815675989e-05, "loss": 0.014, "step": 1956 }, { "epoch": 0.3786764705882353, "grad_norm": 0.1618010252714157, "learning_rate": 9.972924770606182e-05, "loss": 0.0122, "step": 1957 }, { "epoch": 0.37886996904024767, "grad_norm": 0.08142004907131195, "learning_rate": 9.972894708907144e-05, "loss": 0.0175, "step": 1958 }, { "epoch": 0.37906346749226005, "grad_norm": 0.13669602572917938, "learning_rate": 9.972864630578983e-05, "loss": 0.0148, "step": 1959 }, { "epoch": 0.37925696594427244, "grad_norm": 0.09175140410661697, "learning_rate": 9.972834535621814e-05, "loss": 0.0148, "step": 1960 }, { "epoch": 0.3794504643962848, "grad_norm": 0.13418029248714447, "learning_rate": 9.97280442403575e-05, "loss": 0.0116, "step": 1961 }, { "epoch": 0.3796439628482972, "grad_norm": 0.09254588931798935, "learning_rate": 9.972774295820901e-05, "loss": 0.0148, "step": 1962 }, { "epoch": 0.3798374613003096, "grad_norm": 0.1671774536371231, "learning_rate": 9.972744150977378e-05, "loss": 0.0132, "step": 1963 }, { "epoch": 0.380030959752322, "grad_norm": 0.14481613039970398, "learning_rate": 9.972713989505296e-05, "loss": 0.0111, "step": 1964 }, { "epoch": 0.38022445820433437, "grad_norm": 0.11737358570098877, "learning_rate": 9.972683811404763e-05, "loss": 0.0149, "step": 1965 }, { "epoch": 0.38041795665634676, "grad_norm": 0.11997726559638977, "learning_rate": 9.972653616675895e-05, "loss": 0.014, "step": 1966 }, { "epoch": 0.38061145510835914, "grad_norm": 0.08772414922714233, "learning_rate": 9.972623405318805e-05, "loss": 0.0124, "step": 1967 }, { "epoch": 0.38080495356037153, "grad_norm": 0.1026032567024231, "learning_rate": 9.972593177333602e-05, "loss": 0.0101, "step": 1968 }, { "epoch": 0.3809984520123839, "grad_norm": 0.1306181102991104, "learning_rate": 9.972562932720398e-05, "loss": 0.0124, "step": 1969 }, { "epoch": 0.3811919504643963, "grad_norm": 0.1230425164103508, "learning_rate": 9.972532671479308e-05, "loss": 0.0116, "step": 1970 }, { "epoch": 0.3813854489164087, "grad_norm": 0.10244838893413544, "learning_rate": 9.972502393610444e-05, "loss": 0.0104, "step": 1971 }, { "epoch": 0.3815789473684211, "grad_norm": 0.14537735283374786, "learning_rate": 9.972472099113918e-05, "loss": 0.0138, "step": 1972 }, { "epoch": 0.38177244582043346, "grad_norm": 0.0676647424697876, "learning_rate": 9.972441787989842e-05, "loss": 0.0122, "step": 1973 }, { "epoch": 0.38196594427244585, "grad_norm": 0.13474422693252563, "learning_rate": 9.972411460238331e-05, "loss": 0.0139, "step": 1974 }, { "epoch": 0.38215944272445823, "grad_norm": 0.09847729653120041, "learning_rate": 9.972381115859494e-05, "loss": 0.0116, "step": 1975 }, { "epoch": 0.38235294117647056, "grad_norm": 0.12420990318059921, "learning_rate": 9.972350754853449e-05, "loss": 0.015, "step": 1976 }, { "epoch": 0.38254643962848295, "grad_norm": 0.13168734312057495, "learning_rate": 9.972320377220304e-05, "loss": 0.0138, "step": 1977 }, { "epoch": 0.38273993808049533, "grad_norm": 0.06695844233036041, "learning_rate": 9.972289982960175e-05, "loss": 0.01, "step": 1978 }, { "epoch": 0.3829334365325077, "grad_norm": 0.13520051538944244, "learning_rate": 9.972259572073173e-05, "loss": 0.0139, "step": 1979 }, { "epoch": 0.3831269349845201, "grad_norm": 0.09612817317247391, "learning_rate": 9.972229144559412e-05, "loss": 0.0115, "step": 1980 }, { "epoch": 0.3833204334365325, "grad_norm": 0.16932369768619537, "learning_rate": 9.972198700419004e-05, "loss": 0.0124, "step": 1981 }, { "epoch": 0.3835139318885449, "grad_norm": 0.1318742334842682, "learning_rate": 9.972168239652064e-05, "loss": 0.0147, "step": 1982 }, { "epoch": 0.38370743034055727, "grad_norm": 0.1486351490020752, "learning_rate": 9.972137762258705e-05, "loss": 0.0095, "step": 1983 }, { "epoch": 0.38390092879256965, "grad_norm": 0.07223190367221832, "learning_rate": 9.972107268239041e-05, "loss": 0.0114, "step": 1984 }, { "epoch": 0.38409442724458204, "grad_norm": 0.1642937809228897, "learning_rate": 9.972076757593181e-05, "loss": 0.0157, "step": 1985 }, { "epoch": 0.3842879256965944, "grad_norm": 0.08845067769289017, "learning_rate": 9.972046230321244e-05, "loss": 0.0116, "step": 1986 }, { "epoch": 0.3844814241486068, "grad_norm": 0.11957579106092453, "learning_rate": 9.97201568642334e-05, "loss": 0.0106, "step": 1987 }, { "epoch": 0.3846749226006192, "grad_norm": 0.12293440103530884, "learning_rate": 9.971985125899583e-05, "loss": 0.0103, "step": 1988 }, { "epoch": 0.3848684210526316, "grad_norm": 0.09887723624706268, "learning_rate": 9.971954548750087e-05, "loss": 0.0087, "step": 1989 }, { "epoch": 0.38506191950464397, "grad_norm": 0.12121758610010147, "learning_rate": 9.971923954974968e-05, "loss": 0.0119, "step": 1990 }, { "epoch": 0.38525541795665635, "grad_norm": 0.08439388126134872, "learning_rate": 9.971893344574337e-05, "loss": 0.0137, "step": 1991 }, { "epoch": 0.38544891640866874, "grad_norm": 0.04846806824207306, "learning_rate": 9.971862717548308e-05, "loss": 0.0115, "step": 1992 }, { "epoch": 0.3856424148606811, "grad_norm": 0.12035994976758957, "learning_rate": 9.971832073896995e-05, "loss": 0.0139, "step": 1993 }, { "epoch": 0.3858359133126935, "grad_norm": 0.058425240218639374, "learning_rate": 9.971801413620512e-05, "loss": 0.0124, "step": 1994 }, { "epoch": 0.3860294117647059, "grad_norm": 0.13592462241649628, "learning_rate": 9.971770736718973e-05, "loss": 0.011, "step": 1995 }, { "epoch": 0.3862229102167183, "grad_norm": 0.18005704879760742, "learning_rate": 9.971740043192494e-05, "loss": 0.0125, "step": 1996 }, { "epoch": 0.38641640866873067, "grad_norm": 0.1063423901796341, "learning_rate": 9.971709333041187e-05, "loss": 0.0137, "step": 1997 }, { "epoch": 0.38660990712074306, "grad_norm": 0.15672434866428375, "learning_rate": 9.971678606265166e-05, "loss": 0.0122, "step": 1998 }, { "epoch": 0.38680340557275544, "grad_norm": 0.06011052429676056, "learning_rate": 9.971647862864546e-05, "loss": 0.0117, "step": 1999 }, { "epoch": 0.38699690402476783, "grad_norm": 0.12152373045682907, "learning_rate": 9.971617102839441e-05, "loss": 0.0096, "step": 2000 }, { "epoch": 0.38719040247678016, "grad_norm": 0.09670767933130264, "learning_rate": 9.971586326189964e-05, "loss": 0.0122, "step": 2001 }, { "epoch": 0.38738390092879255, "grad_norm": 0.09385755658149719, "learning_rate": 9.971555532916234e-05, "loss": 0.0119, "step": 2002 }, { "epoch": 0.38757739938080493, "grad_norm": 0.09150703996419907, "learning_rate": 9.97152472301836e-05, "loss": 0.0131, "step": 2003 }, { "epoch": 0.3877708978328173, "grad_norm": 0.09370113909244537, "learning_rate": 9.971493896496459e-05, "loss": 0.012, "step": 2004 }, { "epoch": 0.3879643962848297, "grad_norm": 0.06287793070077896, "learning_rate": 9.971463053350646e-05, "loss": 0.012, "step": 2005 }, { "epoch": 0.3881578947368421, "grad_norm": 0.13559791445732117, "learning_rate": 9.971432193581034e-05, "loss": 0.0138, "step": 2006 }, { "epoch": 0.3883513931888545, "grad_norm": 0.0709252879023552, "learning_rate": 9.97140131718774e-05, "loss": 0.011, "step": 2007 }, { "epoch": 0.38854489164086686, "grad_norm": 0.1272907257080078, "learning_rate": 9.971370424170878e-05, "loss": 0.0118, "step": 2008 }, { "epoch": 0.38873839009287925, "grad_norm": 0.11172366887331009, "learning_rate": 9.971339514530562e-05, "loss": 0.0133, "step": 2009 }, { "epoch": 0.38893188854489164, "grad_norm": 0.12350016832351685, "learning_rate": 9.971308588266907e-05, "loss": 0.0123, "step": 2010 }, { "epoch": 0.389125386996904, "grad_norm": 0.13761340081691742, "learning_rate": 9.97127764538003e-05, "loss": 0.013, "step": 2011 }, { "epoch": 0.3893188854489164, "grad_norm": 0.09894204884767532, "learning_rate": 9.971246685870043e-05, "loss": 0.0102, "step": 2012 }, { "epoch": 0.3895123839009288, "grad_norm": 0.11451055109500885, "learning_rate": 9.971215709737063e-05, "loss": 0.0118, "step": 2013 }, { "epoch": 0.3897058823529412, "grad_norm": 0.1107846200466156, "learning_rate": 9.971184716981205e-05, "loss": 0.0109, "step": 2014 }, { "epoch": 0.38989938080495357, "grad_norm": 0.16758625209331512, "learning_rate": 9.971153707602585e-05, "loss": 0.0129, "step": 2015 }, { "epoch": 0.39009287925696595, "grad_norm": 0.041788775473833084, "learning_rate": 9.971122681601315e-05, "loss": 0.012, "step": 2016 }, { "epoch": 0.39028637770897834, "grad_norm": 0.11652247607707977, "learning_rate": 9.971091638977513e-05, "loss": 0.0116, "step": 2017 }, { "epoch": 0.3904798761609907, "grad_norm": 0.08042038232088089, "learning_rate": 9.971060579731297e-05, "loss": 0.0124, "step": 2018 }, { "epoch": 0.3906733746130031, "grad_norm": 0.14578664302825928, "learning_rate": 9.971029503862776e-05, "loss": 0.0116, "step": 2019 }, { "epoch": 0.3908668730650155, "grad_norm": 0.07683295756578445, "learning_rate": 9.97099841137207e-05, "loss": 0.0109, "step": 2020 }, { "epoch": 0.3910603715170279, "grad_norm": 0.1507319211959839, "learning_rate": 9.970967302259293e-05, "loss": 0.0125, "step": 2021 }, { "epoch": 0.39125386996904027, "grad_norm": 0.08700759708881378, "learning_rate": 9.970936176524561e-05, "loss": 0.0117, "step": 2022 }, { "epoch": 0.39144736842105265, "grad_norm": 0.12208200246095657, "learning_rate": 9.970905034167991e-05, "loss": 0.012, "step": 2023 }, { "epoch": 0.39164086687306504, "grad_norm": 0.062208838760852814, "learning_rate": 9.970873875189699e-05, "loss": 0.0123, "step": 2024 }, { "epoch": 0.39183436532507737, "grad_norm": 0.13379226624965668, "learning_rate": 9.970842699589798e-05, "loss": 0.0135, "step": 2025 }, { "epoch": 0.39202786377708976, "grad_norm": 0.042583856731653214, "learning_rate": 9.970811507368405e-05, "loss": 0.0103, "step": 2026 }, { "epoch": 0.39222136222910214, "grad_norm": 0.12271606177091599, "learning_rate": 9.970780298525637e-05, "loss": 0.0135, "step": 2027 }, { "epoch": 0.39241486068111453, "grad_norm": 0.06823946535587311, "learning_rate": 9.970749073061608e-05, "loss": 0.0133, "step": 2028 }, { "epoch": 0.3926083591331269, "grad_norm": 0.07251641154289246, "learning_rate": 9.970717830976439e-05, "loss": 0.0103, "step": 2029 }, { "epoch": 0.3928018575851393, "grad_norm": 0.08641615509986877, "learning_rate": 9.97068657227024e-05, "loss": 0.0115, "step": 2030 }, { "epoch": 0.3929953560371517, "grad_norm": 0.06318818777799606, "learning_rate": 9.97065529694313e-05, "loss": 0.0123, "step": 2031 }, { "epoch": 0.3931888544891641, "grad_norm": 0.15512655675411224, "learning_rate": 9.970624004995227e-05, "loss": 0.0119, "step": 2032 }, { "epoch": 0.39338235294117646, "grad_norm": 0.07948901504278183, "learning_rate": 9.970592696426644e-05, "loss": 0.0114, "step": 2033 }, { "epoch": 0.39357585139318885, "grad_norm": 0.15666386485099792, "learning_rate": 9.9705613712375e-05, "loss": 0.0099, "step": 2034 }, { "epoch": 0.39376934984520123, "grad_norm": 0.08066043257713318, "learning_rate": 9.970530029427909e-05, "loss": 0.0123, "step": 2035 }, { "epoch": 0.3939628482972136, "grad_norm": 0.2309161126613617, "learning_rate": 9.970498670997991e-05, "loss": 0.0141, "step": 2036 }, { "epoch": 0.394156346749226, "grad_norm": 0.09876692295074463, "learning_rate": 9.970467295947858e-05, "loss": 0.0117, "step": 2037 }, { "epoch": 0.3943498452012384, "grad_norm": 0.25793740153312683, "learning_rate": 9.970435904277632e-05, "loss": 0.011, "step": 2038 }, { "epoch": 0.3945433436532508, "grad_norm": 0.12808972597122192, "learning_rate": 9.970404495987424e-05, "loss": 0.0127, "step": 2039 }, { "epoch": 0.39473684210526316, "grad_norm": 0.17772144079208374, "learning_rate": 9.970373071077355e-05, "loss": 0.0114, "step": 2040 }, { "epoch": 0.39493034055727555, "grad_norm": 0.1925363540649414, "learning_rate": 9.970341629547539e-05, "loss": 0.0125, "step": 2041 }, { "epoch": 0.39512383900928794, "grad_norm": 0.13154590129852295, "learning_rate": 9.970310171398096e-05, "loss": 0.0116, "step": 2042 }, { "epoch": 0.3953173374613003, "grad_norm": 0.29967397451400757, "learning_rate": 9.970278696629142e-05, "loss": 0.0123, "step": 2043 }, { "epoch": 0.3955108359133127, "grad_norm": 0.08069558441638947, "learning_rate": 9.970247205240792e-05, "loss": 0.0113, "step": 2044 }, { "epoch": 0.3957043343653251, "grad_norm": 0.26372361183166504, "learning_rate": 9.970215697233163e-05, "loss": 0.014, "step": 2045 }, { "epoch": 0.3958978328173375, "grad_norm": 0.09354196488857269, "learning_rate": 9.970184172606375e-05, "loss": 0.0131, "step": 2046 }, { "epoch": 0.39609133126934987, "grad_norm": 0.20533475279808044, "learning_rate": 9.970152631360546e-05, "loss": 0.0107, "step": 2047 }, { "epoch": 0.39628482972136225, "grad_norm": 0.10804440826177597, "learning_rate": 9.970121073495789e-05, "loss": 0.0121, "step": 2048 }, { "epoch": 0.39647832817337464, "grad_norm": 0.11206699907779694, "learning_rate": 9.970089499012223e-05, "loss": 0.0109, "step": 2049 }, { "epoch": 0.39667182662538697, "grad_norm": 0.09309230744838715, "learning_rate": 9.970057907909965e-05, "loss": 0.0108, "step": 2050 }, { "epoch": 0.39686532507739936, "grad_norm": 0.07266286760568619, "learning_rate": 9.970026300189135e-05, "loss": 0.0133, "step": 2051 }, { "epoch": 0.39705882352941174, "grad_norm": 0.1263664811849594, "learning_rate": 9.969994675849847e-05, "loss": 0.0116, "step": 2052 }, { "epoch": 0.39725232198142413, "grad_norm": 0.08381784707307816, "learning_rate": 9.969963034892221e-05, "loss": 0.0133, "step": 2053 }, { "epoch": 0.3974458204334365, "grad_norm": 0.09987753629684448, "learning_rate": 9.969931377316374e-05, "loss": 0.0117, "step": 2054 }, { "epoch": 0.3976393188854489, "grad_norm": 0.06670907139778137, "learning_rate": 9.969899703122423e-05, "loss": 0.0118, "step": 2055 }, { "epoch": 0.3978328173374613, "grad_norm": 0.12272512912750244, "learning_rate": 9.969868012310485e-05, "loss": 0.0121, "step": 2056 }, { "epoch": 0.3980263157894737, "grad_norm": 0.04851328209042549, "learning_rate": 9.969836304880682e-05, "loss": 0.0107, "step": 2057 }, { "epoch": 0.39821981424148606, "grad_norm": 0.10990210622549057, "learning_rate": 9.969804580833128e-05, "loss": 0.0117, "step": 2058 }, { "epoch": 0.39841331269349844, "grad_norm": 0.08385566622018814, "learning_rate": 9.969772840167942e-05, "loss": 0.0115, "step": 2059 }, { "epoch": 0.39860681114551083, "grad_norm": 0.12155935913324356, "learning_rate": 9.969741082885241e-05, "loss": 0.0131, "step": 2060 }, { "epoch": 0.3988003095975232, "grad_norm": 0.09363085776567459, "learning_rate": 9.969709308985145e-05, "loss": 0.0122, "step": 2061 }, { "epoch": 0.3989938080495356, "grad_norm": 0.04845526069402695, "learning_rate": 9.969677518467771e-05, "loss": 0.0105, "step": 2062 }, { "epoch": 0.399187306501548, "grad_norm": 0.1026758924126625, "learning_rate": 9.969645711333238e-05, "loss": 0.0138, "step": 2063 }, { "epoch": 0.3993808049535604, "grad_norm": 0.06369167566299438, "learning_rate": 9.969613887581661e-05, "loss": 0.0112, "step": 2064 }, { "epoch": 0.39957430340557276, "grad_norm": 0.1152399480342865, "learning_rate": 9.969582047213164e-05, "loss": 0.0118, "step": 2065 }, { "epoch": 0.39976780185758515, "grad_norm": 0.11791329830884933, "learning_rate": 9.96955019022786e-05, "loss": 0.0127, "step": 2066 }, { "epoch": 0.39996130030959753, "grad_norm": 0.1033533588051796, "learning_rate": 9.96951831662587e-05, "loss": 0.0134, "step": 2067 }, { "epoch": 0.4001547987616099, "grad_norm": 0.13675811886787415, "learning_rate": 9.969486426407315e-05, "loss": 0.0128, "step": 2068 }, { "epoch": 0.4003482972136223, "grad_norm": 0.0806540846824646, "learning_rate": 9.969454519572309e-05, "loss": 0.0117, "step": 2069 }, { "epoch": 0.4005417956656347, "grad_norm": 0.1314551681280136, "learning_rate": 9.96942259612097e-05, "loss": 0.0126, "step": 2070 }, { "epoch": 0.4007352941176471, "grad_norm": 0.06867225468158722, "learning_rate": 9.969390656053422e-05, "loss": 0.0163, "step": 2071 }, { "epoch": 0.40092879256965946, "grad_norm": 0.2801772356033325, "learning_rate": 9.96935869936978e-05, "loss": 0.0125, "step": 2072 }, { "epoch": 0.40112229102167185, "grad_norm": 0.1595776081085205, "learning_rate": 9.969326726070164e-05, "loss": 0.0114, "step": 2073 }, { "epoch": 0.40131578947368424, "grad_norm": 0.15438221395015717, "learning_rate": 9.969294736154692e-05, "loss": 0.0135, "step": 2074 }, { "epoch": 0.40150928792569657, "grad_norm": 0.1444838047027588, "learning_rate": 9.969262729623485e-05, "loss": 0.0149, "step": 2075 }, { "epoch": 0.40170278637770895, "grad_norm": 0.13326823711395264, "learning_rate": 9.969230706476658e-05, "loss": 0.0126, "step": 2076 }, { "epoch": 0.40189628482972134, "grad_norm": 0.05935155600309372, "learning_rate": 9.969198666714332e-05, "loss": 0.0115, "step": 2077 }, { "epoch": 0.4020897832817337, "grad_norm": 0.16702312231063843, "learning_rate": 9.969166610336629e-05, "loss": 0.0135, "step": 2078 }, { "epoch": 0.4022832817337461, "grad_norm": 0.06047232076525688, "learning_rate": 9.969134537343666e-05, "loss": 0.0141, "step": 2079 }, { "epoch": 0.4024767801857585, "grad_norm": 0.13783033192157745, "learning_rate": 9.96910244773556e-05, "loss": 0.012, "step": 2080 }, { "epoch": 0.4026702786377709, "grad_norm": 0.09643878787755966, "learning_rate": 9.969070341512433e-05, "loss": 0.012, "step": 2081 }, { "epoch": 0.40286377708978327, "grad_norm": 0.12903690338134766, "learning_rate": 9.969038218674405e-05, "loss": 0.0114, "step": 2082 }, { "epoch": 0.40305727554179566, "grad_norm": 0.0746171697974205, "learning_rate": 9.969006079221591e-05, "loss": 0.0111, "step": 2083 }, { "epoch": 0.40325077399380804, "grad_norm": 0.10764744132757187, "learning_rate": 9.968973923154117e-05, "loss": 0.0104, "step": 2084 }, { "epoch": 0.40344427244582043, "grad_norm": 0.07445418834686279, "learning_rate": 9.968941750472097e-05, "loss": 0.0117, "step": 2085 }, { "epoch": 0.4036377708978328, "grad_norm": 0.09368970990180969, "learning_rate": 9.968909561175654e-05, "loss": 0.0112, "step": 2086 }, { "epoch": 0.4038312693498452, "grad_norm": 0.07737617939710617, "learning_rate": 9.968877355264904e-05, "loss": 0.0112, "step": 2087 }, { "epoch": 0.4040247678018576, "grad_norm": 0.09297838807106018, "learning_rate": 9.968845132739973e-05, "loss": 0.0116, "step": 2088 }, { "epoch": 0.40421826625387, "grad_norm": 0.05587533861398697, "learning_rate": 9.968812893600975e-05, "loss": 0.01, "step": 2089 }, { "epoch": 0.40441176470588236, "grad_norm": 0.07925523072481155, "learning_rate": 9.96878063784803e-05, "loss": 0.0109, "step": 2090 }, { "epoch": 0.40460526315789475, "grad_norm": 0.05425853654742241, "learning_rate": 9.968748365481262e-05, "loss": 0.0108, "step": 2091 }, { "epoch": 0.40479876160990713, "grad_norm": 0.07245011627674103, "learning_rate": 9.968716076500788e-05, "loss": 0.0107, "step": 2092 }, { "epoch": 0.4049922600619195, "grad_norm": 0.08624107390642166, "learning_rate": 9.968683770906731e-05, "loss": 0.0118, "step": 2093 }, { "epoch": 0.4051857585139319, "grad_norm": 0.047367870807647705, "learning_rate": 9.968651448699205e-05, "loss": 0.0127, "step": 2094 }, { "epoch": 0.4053792569659443, "grad_norm": 0.12314682453870773, "learning_rate": 9.968619109878337e-05, "loss": 0.0132, "step": 2095 }, { "epoch": 0.4055727554179567, "grad_norm": 0.05442296713590622, "learning_rate": 9.968586754444242e-05, "loss": 0.0103, "step": 2096 }, { "epoch": 0.40576625386996906, "grad_norm": 0.18322888016700745, "learning_rate": 9.968554382397044e-05, "loss": 0.012, "step": 2097 }, { "epoch": 0.40595975232198145, "grad_norm": 0.06888652592897415, "learning_rate": 9.96852199373686e-05, "loss": 0.0122, "step": 2098 }, { "epoch": 0.40615325077399383, "grad_norm": 0.22195005416870117, "learning_rate": 9.968489588463815e-05, "loss": 0.0151, "step": 2099 }, { "epoch": 0.40634674922600617, "grad_norm": 0.08313322812318802, "learning_rate": 9.968457166578024e-05, "loss": 0.0132, "step": 2100 }, { "epoch": 0.40654024767801855, "grad_norm": 0.18462280929088593, "learning_rate": 9.968424728079613e-05, "loss": 0.0127, "step": 2101 }, { "epoch": 0.40673374613003094, "grad_norm": 0.1302013099193573, "learning_rate": 9.968392272968698e-05, "loss": 0.011, "step": 2102 }, { "epoch": 0.4069272445820433, "grad_norm": 0.14025382697582245, "learning_rate": 9.968359801245402e-05, "loss": 0.011, "step": 2103 }, { "epoch": 0.4071207430340557, "grad_norm": 0.1390603631734848, "learning_rate": 9.968327312909847e-05, "loss": 0.0115, "step": 2104 }, { "epoch": 0.4073142414860681, "grad_norm": 0.22732901573181152, "learning_rate": 9.96829480796215e-05, "loss": 0.014, "step": 2105 }, { "epoch": 0.4075077399380805, "grad_norm": 0.11389683187007904, "learning_rate": 9.968262286402434e-05, "loss": 0.0103, "step": 2106 }, { "epoch": 0.40770123839009287, "grad_norm": 0.18351079523563385, "learning_rate": 9.968229748230821e-05, "loss": 0.0119, "step": 2107 }, { "epoch": 0.40789473684210525, "grad_norm": 0.14330531656742096, "learning_rate": 9.968197193447429e-05, "loss": 0.0125, "step": 2108 }, { "epoch": 0.40808823529411764, "grad_norm": 0.14108747243881226, "learning_rate": 9.968164622052383e-05, "loss": 0.0112, "step": 2109 }, { "epoch": 0.40828173374613, "grad_norm": 0.1954091191291809, "learning_rate": 9.968132034045801e-05, "loss": 0.0116, "step": 2110 }, { "epoch": 0.4084752321981424, "grad_norm": 0.17855069041252136, "learning_rate": 9.968099429427806e-05, "loss": 0.0126, "step": 2111 }, { "epoch": 0.4086687306501548, "grad_norm": 0.2203456163406372, "learning_rate": 9.968066808198517e-05, "loss": 0.0107, "step": 2112 }, { "epoch": 0.4088622291021672, "grad_norm": 0.13251326978206635, "learning_rate": 9.968034170358056e-05, "loss": 0.0119, "step": 2113 }, { "epoch": 0.40905572755417957, "grad_norm": 0.2511588931083679, "learning_rate": 9.968001515906546e-05, "loss": 0.0114, "step": 2114 }, { "epoch": 0.40924922600619196, "grad_norm": 0.09716958552598953, "learning_rate": 9.967968844844108e-05, "loss": 0.0136, "step": 2115 }, { "epoch": 0.40944272445820434, "grad_norm": 0.22282642126083374, "learning_rate": 9.967936157170861e-05, "loss": 0.0125, "step": 2116 }, { "epoch": 0.40963622291021673, "grad_norm": 0.07145579159259796, "learning_rate": 9.967903452886928e-05, "loss": 0.0115, "step": 2117 }, { "epoch": 0.4098297213622291, "grad_norm": 0.20813478529453278, "learning_rate": 9.967870731992433e-05, "loss": 0.013, "step": 2118 }, { "epoch": 0.4100232198142415, "grad_norm": 0.04960613697767258, "learning_rate": 9.967837994487495e-05, "loss": 0.0122, "step": 2119 }, { "epoch": 0.4102167182662539, "grad_norm": 0.14520631730556488, "learning_rate": 9.967805240372236e-05, "loss": 0.0128, "step": 2120 }, { "epoch": 0.4104102167182663, "grad_norm": 0.061706945300102234, "learning_rate": 9.967772469646777e-05, "loss": 0.0122, "step": 2121 }, { "epoch": 0.41060371517027866, "grad_norm": 0.14995446801185608, "learning_rate": 9.967739682311242e-05, "loss": 0.0101, "step": 2122 }, { "epoch": 0.41079721362229105, "grad_norm": 0.09997021406888962, "learning_rate": 9.967706878365751e-05, "loss": 0.0133, "step": 2123 }, { "epoch": 0.41099071207430343, "grad_norm": 0.15807859599590302, "learning_rate": 9.967674057810427e-05, "loss": 0.0134, "step": 2124 }, { "epoch": 0.41118421052631576, "grad_norm": 0.1011505275964737, "learning_rate": 9.967641220645392e-05, "loss": 0.0135, "step": 2125 }, { "epoch": 0.41137770897832815, "grad_norm": 0.1553584486246109, "learning_rate": 9.967608366870769e-05, "loss": 0.011, "step": 2126 }, { "epoch": 0.41157120743034054, "grad_norm": 0.06272876262664795, "learning_rate": 9.967575496486676e-05, "loss": 0.012, "step": 2127 }, { "epoch": 0.4117647058823529, "grad_norm": 0.08257956057786942, "learning_rate": 9.967542609493239e-05, "loss": 0.0098, "step": 2128 }, { "epoch": 0.4119582043343653, "grad_norm": 0.06695422530174255, "learning_rate": 9.96750970589058e-05, "loss": 0.0129, "step": 2129 }, { "epoch": 0.4121517027863777, "grad_norm": 0.07825231552124023, "learning_rate": 9.967476785678823e-05, "loss": 0.0131, "step": 2130 }, { "epoch": 0.4123452012383901, "grad_norm": 0.049820855259895325, "learning_rate": 9.967443848858085e-05, "loss": 0.0124, "step": 2131 }, { "epoch": 0.41253869969040247, "grad_norm": 0.12936441600322723, "learning_rate": 9.967410895428492e-05, "loss": 0.0119, "step": 2132 }, { "epoch": 0.41273219814241485, "grad_norm": 0.10373374819755554, "learning_rate": 9.967377925390167e-05, "loss": 0.0126, "step": 2133 }, { "epoch": 0.41292569659442724, "grad_norm": 0.10187545418739319, "learning_rate": 9.96734493874323e-05, "loss": 0.0125, "step": 2134 }, { "epoch": 0.4131191950464396, "grad_norm": 0.09225388616323471, "learning_rate": 9.967311935487808e-05, "loss": 0.0137, "step": 2135 }, { "epoch": 0.413312693498452, "grad_norm": 0.1314781904220581, "learning_rate": 9.967278915624018e-05, "loss": 0.0127, "step": 2136 }, { "epoch": 0.4135061919504644, "grad_norm": 0.11941248178482056, "learning_rate": 9.967245879151987e-05, "loss": 0.0122, "step": 2137 }, { "epoch": 0.4136996904024768, "grad_norm": 0.12371671944856644, "learning_rate": 9.967212826071837e-05, "loss": 0.013, "step": 2138 }, { "epoch": 0.41389318885448917, "grad_norm": 0.1055801510810852, "learning_rate": 9.967179756383692e-05, "loss": 0.0125, "step": 2139 }, { "epoch": 0.41408668730650156, "grad_norm": 0.1317983716726303, "learning_rate": 9.967146670087672e-05, "loss": 0.0123, "step": 2140 }, { "epoch": 0.41428018575851394, "grad_norm": 0.09620138257741928, "learning_rate": 9.9671135671839e-05, "loss": 0.0135, "step": 2141 }, { "epoch": 0.4144736842105263, "grad_norm": 0.1342541128396988, "learning_rate": 9.967080447672502e-05, "loss": 0.0139, "step": 2142 }, { "epoch": 0.4146671826625387, "grad_norm": 0.0771954283118248, "learning_rate": 9.967047311553599e-05, "loss": 0.0139, "step": 2143 }, { "epoch": 0.4148606811145511, "grad_norm": 0.09194451570510864, "learning_rate": 9.967014158827314e-05, "loss": 0.0139, "step": 2144 }, { "epoch": 0.4150541795665635, "grad_norm": 0.08211933821439743, "learning_rate": 9.966980989493773e-05, "loss": 0.0126, "step": 2145 }, { "epoch": 0.41524767801857587, "grad_norm": 0.10093057155609131, "learning_rate": 9.966947803553096e-05, "loss": 0.0122, "step": 2146 }, { "epoch": 0.41544117647058826, "grad_norm": 0.06561557203531265, "learning_rate": 9.966914601005409e-05, "loss": 0.0121, "step": 2147 }, { "epoch": 0.41563467492260064, "grad_norm": 0.057562991976737976, "learning_rate": 9.966881381850833e-05, "loss": 0.0094, "step": 2148 }, { "epoch": 0.41582817337461303, "grad_norm": 0.11922137439250946, "learning_rate": 9.966848146089494e-05, "loss": 0.0107, "step": 2149 }, { "epoch": 0.41602167182662536, "grad_norm": 0.153501495718956, "learning_rate": 9.966814893721514e-05, "loss": 0.0116, "step": 2150 }, { "epoch": 0.41621517027863775, "grad_norm": 0.07447219640016556, "learning_rate": 9.966781624747016e-05, "loss": 0.0112, "step": 2151 }, { "epoch": 0.41640866873065013, "grad_norm": 0.10614662617444992, "learning_rate": 9.966748339166125e-05, "loss": 0.0123, "step": 2152 }, { "epoch": 0.4166021671826625, "grad_norm": 0.13264526426792145, "learning_rate": 9.966715036978962e-05, "loss": 0.0116, "step": 2153 }, { "epoch": 0.4167956656346749, "grad_norm": 0.10855519771575928, "learning_rate": 9.966681718185656e-05, "loss": 0.0128, "step": 2154 }, { "epoch": 0.4169891640866873, "grad_norm": 0.10636991262435913, "learning_rate": 9.966648382786326e-05, "loss": 0.0115, "step": 2155 }, { "epoch": 0.4171826625386997, "grad_norm": 0.05826040729880333, "learning_rate": 9.9666150307811e-05, "loss": 0.0097, "step": 2156 }, { "epoch": 0.41737616099071206, "grad_norm": 0.1464260220527649, "learning_rate": 9.9665816621701e-05, "loss": 0.0107, "step": 2157 }, { "epoch": 0.41756965944272445, "grad_norm": 0.04917038977146149, "learning_rate": 9.966548276953447e-05, "loss": 0.0121, "step": 2158 }, { "epoch": 0.41776315789473684, "grad_norm": 0.13641129434108734, "learning_rate": 9.966514875131272e-05, "loss": 0.0129, "step": 2159 }, { "epoch": 0.4179566563467492, "grad_norm": 0.07200634479522705, "learning_rate": 9.966481456703692e-05, "loss": 0.0108, "step": 2160 }, { "epoch": 0.4181501547987616, "grad_norm": 0.144388809800148, "learning_rate": 9.966448021670836e-05, "loss": 0.0126, "step": 2161 }, { "epoch": 0.418343653250774, "grad_norm": 0.05014742910861969, "learning_rate": 9.966414570032827e-05, "loss": 0.0118, "step": 2162 }, { "epoch": 0.4185371517027864, "grad_norm": 0.16605976223945618, "learning_rate": 9.966381101789788e-05, "loss": 0.0124, "step": 2163 }, { "epoch": 0.41873065015479877, "grad_norm": 0.12340700626373291, "learning_rate": 9.966347616941847e-05, "loss": 0.0144, "step": 2164 }, { "epoch": 0.41892414860681115, "grad_norm": 0.2131829559803009, "learning_rate": 9.966314115489124e-05, "loss": 0.0112, "step": 2165 }, { "epoch": 0.41911764705882354, "grad_norm": 0.07171622663736343, "learning_rate": 9.966280597431746e-05, "loss": 0.0119, "step": 2166 }, { "epoch": 0.4193111455108359, "grad_norm": 0.12112367153167725, "learning_rate": 9.966247062769837e-05, "loss": 0.0118, "step": 2167 }, { "epoch": 0.4195046439628483, "grad_norm": 0.10068540275096893, "learning_rate": 9.966213511503523e-05, "loss": 0.012, "step": 2168 }, { "epoch": 0.4196981424148607, "grad_norm": 0.0995708778500557, "learning_rate": 9.966179943632928e-05, "loss": 0.0122, "step": 2169 }, { "epoch": 0.4198916408668731, "grad_norm": 0.07349894940853119, "learning_rate": 9.966146359158176e-05, "loss": 0.0116, "step": 2170 }, { "epoch": 0.42008513931888547, "grad_norm": 0.043236516416072845, "learning_rate": 9.966112758079392e-05, "loss": 0.0137, "step": 2171 }, { "epoch": 0.42027863777089786, "grad_norm": 0.06270784884691238, "learning_rate": 9.9660791403967e-05, "loss": 0.0096, "step": 2172 }, { "epoch": 0.42047213622291024, "grad_norm": 0.04396429657936096, "learning_rate": 9.966045506110228e-05, "loss": 0.0116, "step": 2173 }, { "epoch": 0.42066563467492263, "grad_norm": 0.06599914282560349, "learning_rate": 9.966011855220098e-05, "loss": 0.0128, "step": 2174 }, { "epoch": 0.42085913312693496, "grad_norm": 0.07863711565732956, "learning_rate": 9.965978187726435e-05, "loss": 0.0109, "step": 2175 }, { "epoch": 0.42105263157894735, "grad_norm": 0.05228324979543686, "learning_rate": 9.965944503629368e-05, "loss": 0.0113, "step": 2176 }, { "epoch": 0.42124613003095973, "grad_norm": 0.10974341630935669, "learning_rate": 9.96591080292902e-05, "loss": 0.0133, "step": 2177 }, { "epoch": 0.4214396284829721, "grad_norm": 0.06394258886575699, "learning_rate": 9.965877085625516e-05, "loss": 0.0112, "step": 2178 }, { "epoch": 0.4216331269349845, "grad_norm": 0.10413360595703125, "learning_rate": 9.965843351718981e-05, "loss": 0.0128, "step": 2179 }, { "epoch": 0.4218266253869969, "grad_norm": 0.05732932686805725, "learning_rate": 9.96580960120954e-05, "loss": 0.0092, "step": 2180 }, { "epoch": 0.4220201238390093, "grad_norm": 0.10741722583770752, "learning_rate": 9.965775834097321e-05, "loss": 0.0118, "step": 2181 }, { "epoch": 0.42221362229102166, "grad_norm": 0.04081190004944801, "learning_rate": 9.965742050382447e-05, "loss": 0.0111, "step": 2182 }, { "epoch": 0.42240712074303405, "grad_norm": 0.14744070172309875, "learning_rate": 9.965708250065042e-05, "loss": 0.0091, "step": 2183 }, { "epoch": 0.42260061919504643, "grad_norm": 0.048116765916347504, "learning_rate": 9.965674433145237e-05, "loss": 0.0089, "step": 2184 }, { "epoch": 0.4227941176470588, "grad_norm": 0.10300002992153168, "learning_rate": 9.965640599623154e-05, "loss": 0.0111, "step": 2185 }, { "epoch": 0.4229876160990712, "grad_norm": 0.07611729204654694, "learning_rate": 9.965606749498921e-05, "loss": 0.011, "step": 2186 }, { "epoch": 0.4231811145510836, "grad_norm": 0.08763132989406586, "learning_rate": 9.96557288277266e-05, "loss": 0.0118, "step": 2187 }, { "epoch": 0.423374613003096, "grad_norm": 0.12308446317911148, "learning_rate": 9.965538999444502e-05, "loss": 0.0103, "step": 2188 }, { "epoch": 0.42356811145510836, "grad_norm": 0.12222924083471298, "learning_rate": 9.965505099514568e-05, "loss": 0.013, "step": 2189 }, { "epoch": 0.42376160990712075, "grad_norm": 0.06594037264585495, "learning_rate": 9.965471182982987e-05, "loss": 0.0122, "step": 2190 }, { "epoch": 0.42395510835913314, "grad_norm": 0.0728192999958992, "learning_rate": 9.965437249849883e-05, "loss": 0.0094, "step": 2191 }, { "epoch": 0.4241486068111455, "grad_norm": 0.0668802261352539, "learning_rate": 9.965403300115387e-05, "loss": 0.0123, "step": 2192 }, { "epoch": 0.4243421052631579, "grad_norm": 0.06751009076833725, "learning_rate": 9.965369333779621e-05, "loss": 0.0117, "step": 2193 }, { "epoch": 0.4245356037151703, "grad_norm": 0.10518073290586472, "learning_rate": 9.965335350842711e-05, "loss": 0.0141, "step": 2194 }, { "epoch": 0.4247291021671827, "grad_norm": 0.039966803044080734, "learning_rate": 9.965301351304785e-05, "loss": 0.0101, "step": 2195 }, { "epoch": 0.42492260061919507, "grad_norm": 0.11815690994262695, "learning_rate": 9.965267335165968e-05, "loss": 0.0119, "step": 2196 }, { "epoch": 0.42511609907120745, "grad_norm": 0.04186254367232323, "learning_rate": 9.965233302426387e-05, "loss": 0.0107, "step": 2197 }, { "epoch": 0.42530959752321984, "grad_norm": 0.12624242901802063, "learning_rate": 9.965199253086171e-05, "loss": 0.0122, "step": 2198 }, { "epoch": 0.42550309597523217, "grad_norm": 0.053549181669950485, "learning_rate": 9.965165187145444e-05, "loss": 0.0149, "step": 2199 }, { "epoch": 0.42569659442724456, "grad_norm": 0.08855362236499786, "learning_rate": 9.96513110460433e-05, "loss": 0.0125, "step": 2200 }, { "epoch": 0.42589009287925694, "grad_norm": 0.09533292800188065, "learning_rate": 9.965097005462961e-05, "loss": 0.0095, "step": 2201 }, { "epoch": 0.42608359133126933, "grad_norm": 0.0627984032034874, "learning_rate": 9.965062889721463e-05, "loss": 0.0108, "step": 2202 }, { "epoch": 0.4262770897832817, "grad_norm": 0.13569939136505127, "learning_rate": 9.96502875737996e-05, "loss": 0.0118, "step": 2203 }, { "epoch": 0.4264705882352941, "grad_norm": 0.0865352675318718, "learning_rate": 9.96499460843858e-05, "loss": 0.0108, "step": 2204 }, { "epoch": 0.4266640866873065, "grad_norm": 0.12148880213499069, "learning_rate": 9.96496044289745e-05, "loss": 0.0121, "step": 2205 }, { "epoch": 0.4268575851393189, "grad_norm": 0.06256219744682312, "learning_rate": 9.964926260756698e-05, "loss": 0.0115, "step": 2206 }, { "epoch": 0.42705108359133126, "grad_norm": 0.10600662231445312, "learning_rate": 9.96489206201645e-05, "loss": 0.0116, "step": 2207 }, { "epoch": 0.42724458204334365, "grad_norm": 0.07650887221097946, "learning_rate": 9.964857846676833e-05, "loss": 0.0124, "step": 2208 }, { "epoch": 0.42743808049535603, "grad_norm": 0.03688763454556465, "learning_rate": 9.964823614737976e-05, "loss": 0.0117, "step": 2209 }, { "epoch": 0.4276315789473684, "grad_norm": 0.06278447061777115, "learning_rate": 9.964789366200004e-05, "loss": 0.0117, "step": 2210 }, { "epoch": 0.4278250773993808, "grad_norm": 0.08701730519533157, "learning_rate": 9.964755101063046e-05, "loss": 0.0114, "step": 2211 }, { "epoch": 0.4280185758513932, "grad_norm": 0.07200013846158981, "learning_rate": 9.964720819327226e-05, "loss": 0.012, "step": 2212 }, { "epoch": 0.4282120743034056, "grad_norm": 0.10320968180894852, "learning_rate": 9.964686520992679e-05, "loss": 0.0108, "step": 2213 }, { "epoch": 0.42840557275541796, "grad_norm": 0.08102025091648102, "learning_rate": 9.964652206059524e-05, "loss": 0.0138, "step": 2214 }, { "epoch": 0.42859907120743035, "grad_norm": 0.06749430298805237, "learning_rate": 9.964617874527894e-05, "loss": 0.0168, "step": 2215 }, { "epoch": 0.42879256965944273, "grad_norm": 0.09902665764093399, "learning_rate": 9.964583526397914e-05, "loss": 0.013, "step": 2216 }, { "epoch": 0.4289860681114551, "grad_norm": 0.10300391167402267, "learning_rate": 9.964549161669713e-05, "loss": 0.0118, "step": 2217 }, { "epoch": 0.4291795665634675, "grad_norm": 0.10704667121171951, "learning_rate": 9.964514780343416e-05, "loss": 0.0105, "step": 2218 }, { "epoch": 0.4293730650154799, "grad_norm": 0.10520897060632706, "learning_rate": 9.964480382419156e-05, "loss": 0.0111, "step": 2219 }, { "epoch": 0.4295665634674923, "grad_norm": 0.08458569645881653, "learning_rate": 9.964445967897058e-05, "loss": 0.0118, "step": 2220 }, { "epoch": 0.42976006191950467, "grad_norm": 0.055929120630025864, "learning_rate": 9.964411536777249e-05, "loss": 0.0126, "step": 2221 }, { "epoch": 0.42995356037151705, "grad_norm": 0.040929824113845825, "learning_rate": 9.964377089059857e-05, "loss": 0.0097, "step": 2222 }, { "epoch": 0.43014705882352944, "grad_norm": 0.06825193017721176, "learning_rate": 9.964342624745012e-05, "loss": 0.0117, "step": 2223 }, { "epoch": 0.43034055727554177, "grad_norm": 0.07888229936361313, "learning_rate": 9.96430814383284e-05, "loss": 0.0132, "step": 2224 }, { "epoch": 0.43053405572755415, "grad_norm": 0.09995580464601517, "learning_rate": 9.964273646323474e-05, "loss": 0.0134, "step": 2225 }, { "epoch": 0.43072755417956654, "grad_norm": 0.08388746529817581, "learning_rate": 9.964239132217035e-05, "loss": 0.0131, "step": 2226 }, { "epoch": 0.4309210526315789, "grad_norm": 0.0771464854478836, "learning_rate": 9.964204601513656e-05, "loss": 0.0113, "step": 2227 }, { "epoch": 0.4311145510835913, "grad_norm": 0.10768334567546844, "learning_rate": 9.964170054213465e-05, "loss": 0.0137, "step": 2228 }, { "epoch": 0.4313080495356037, "grad_norm": 0.08199785649776459, "learning_rate": 9.964135490316589e-05, "loss": 0.012, "step": 2229 }, { "epoch": 0.4315015479876161, "grad_norm": 0.08221236616373062, "learning_rate": 9.964100909823156e-05, "loss": 0.0095, "step": 2230 }, { "epoch": 0.43169504643962847, "grad_norm": 0.08812808990478516, "learning_rate": 9.964066312733298e-05, "loss": 0.0127, "step": 2231 }, { "epoch": 0.43188854489164086, "grad_norm": 0.09663010388612747, "learning_rate": 9.96403169904714e-05, "loss": 0.0117, "step": 2232 }, { "epoch": 0.43208204334365324, "grad_norm": 0.05528079718351364, "learning_rate": 9.963997068764814e-05, "loss": 0.0136, "step": 2233 }, { "epoch": 0.43227554179566563, "grad_norm": 0.0909237340092659, "learning_rate": 9.963962421886446e-05, "loss": 0.016, "step": 2234 }, { "epoch": 0.432469040247678, "grad_norm": 0.05251909792423248, "learning_rate": 9.963927758412165e-05, "loss": 0.0121, "step": 2235 }, { "epoch": 0.4326625386996904, "grad_norm": 0.07415010035037994, "learning_rate": 9.9638930783421e-05, "loss": 0.0138, "step": 2236 }, { "epoch": 0.4328560371517028, "grad_norm": 0.15973302721977234, "learning_rate": 9.963858381676381e-05, "loss": 0.0095, "step": 2237 }, { "epoch": 0.4330495356037152, "grad_norm": 0.10227546095848083, "learning_rate": 9.963823668415136e-05, "loss": 0.0125, "step": 2238 }, { "epoch": 0.43324303405572756, "grad_norm": 0.15554042160511017, "learning_rate": 9.963788938558496e-05, "loss": 0.013, "step": 2239 }, { "epoch": 0.43343653250773995, "grad_norm": 0.10641065984964371, "learning_rate": 9.963754192106588e-05, "loss": 0.0127, "step": 2240 }, { "epoch": 0.43363003095975233, "grad_norm": 0.1357923448085785, "learning_rate": 9.963719429059542e-05, "loss": 0.0124, "step": 2241 }, { "epoch": 0.4338235294117647, "grad_norm": 0.19185996055603027, "learning_rate": 9.963684649417485e-05, "loss": 0.0116, "step": 2242 }, { "epoch": 0.4340170278637771, "grad_norm": 0.1145913302898407, "learning_rate": 9.96364985318055e-05, "loss": 0.0096, "step": 2243 }, { "epoch": 0.4342105263157895, "grad_norm": 0.13344821333885193, "learning_rate": 9.963615040348865e-05, "loss": 0.0122, "step": 2244 }, { "epoch": 0.4344040247678019, "grad_norm": 0.09390295296907425, "learning_rate": 9.96358021092256e-05, "loss": 0.0124, "step": 2245 }, { "epoch": 0.43459752321981426, "grad_norm": 0.10634049028158188, "learning_rate": 9.963545364901762e-05, "loss": 0.015, "step": 2246 }, { "epoch": 0.43479102167182665, "grad_norm": 0.1044473722577095, "learning_rate": 9.963510502286603e-05, "loss": 0.0105, "step": 2247 }, { "epoch": 0.43498452012383904, "grad_norm": 0.04911574348807335, "learning_rate": 9.963475623077211e-05, "loss": 0.011, "step": 2248 }, { "epoch": 0.43517801857585137, "grad_norm": 0.11714940518140793, "learning_rate": 9.963440727273716e-05, "loss": 0.0107, "step": 2249 }, { "epoch": 0.43537151702786375, "grad_norm": 0.05657166987657547, "learning_rate": 9.96340581487625e-05, "loss": 0.0095, "step": 2250 }, { "epoch": 0.43556501547987614, "grad_norm": 0.09115251898765564, "learning_rate": 9.96337088588494e-05, "loss": 0.0118, "step": 2251 }, { "epoch": 0.4357585139318885, "grad_norm": 0.18086694180965424, "learning_rate": 9.963335940299917e-05, "loss": 0.0109, "step": 2252 }, { "epoch": 0.4359520123839009, "grad_norm": 0.1241464763879776, "learning_rate": 9.96330097812131e-05, "loss": 0.0103, "step": 2253 }, { "epoch": 0.4361455108359133, "grad_norm": 0.2452206164598465, "learning_rate": 9.96326599934925e-05, "loss": 0.011, "step": 2254 }, { "epoch": 0.4363390092879257, "grad_norm": 0.18206381797790527, "learning_rate": 9.963231003983869e-05, "loss": 0.0114, "step": 2255 }, { "epoch": 0.43653250773993807, "grad_norm": 0.18023820221424103, "learning_rate": 9.963195992025293e-05, "loss": 0.0124, "step": 2256 }, { "epoch": 0.43672600619195046, "grad_norm": 0.08778563141822815, "learning_rate": 9.963160963473653e-05, "loss": 0.0127, "step": 2257 }, { "epoch": 0.43691950464396284, "grad_norm": 0.09586875140666962, "learning_rate": 9.963125918329082e-05, "loss": 0.0129, "step": 2258 }, { "epoch": 0.4371130030959752, "grad_norm": 0.12556131184101105, "learning_rate": 9.963090856591707e-05, "loss": 0.015, "step": 2259 }, { "epoch": 0.4373065015479876, "grad_norm": 0.08544368296861649, "learning_rate": 9.963055778261662e-05, "loss": 0.0132, "step": 2260 }, { "epoch": 0.4375, "grad_norm": 0.19949693977832794, "learning_rate": 9.963020683339074e-05, "loss": 0.0125, "step": 2261 }, { "epoch": 0.4376934984520124, "grad_norm": 0.09095504134893417, "learning_rate": 9.962985571824076e-05, "loss": 0.0115, "step": 2262 }, { "epoch": 0.4378869969040248, "grad_norm": 0.3369191288948059, "learning_rate": 9.962950443716796e-05, "loss": 0.0114, "step": 2263 }, { "epoch": 0.43808049535603716, "grad_norm": 0.13159602880477905, "learning_rate": 9.962915299017366e-05, "loss": 0.0129, "step": 2264 }, { "epoch": 0.43827399380804954, "grad_norm": 0.3652958273887634, "learning_rate": 9.962880137725918e-05, "loss": 0.0133, "step": 2265 }, { "epoch": 0.43846749226006193, "grad_norm": 0.23706123232841492, "learning_rate": 9.962844959842579e-05, "loss": 0.0133, "step": 2266 }, { "epoch": 0.4386609907120743, "grad_norm": 0.3422769606113434, "learning_rate": 9.962809765367485e-05, "loss": 0.0156, "step": 2267 }, { "epoch": 0.4388544891640867, "grad_norm": 0.2887646555900574, "learning_rate": 9.962774554300762e-05, "loss": 0.0104, "step": 2268 }, { "epoch": 0.4390479876160991, "grad_norm": 0.30588099360466003, "learning_rate": 9.962739326642543e-05, "loss": 0.0155, "step": 2269 }, { "epoch": 0.4392414860681115, "grad_norm": 0.23551614582538605, "learning_rate": 9.962704082392959e-05, "loss": 0.0133, "step": 2270 }, { "epoch": 0.43943498452012386, "grad_norm": 0.21001701056957245, "learning_rate": 9.962668821552141e-05, "loss": 0.0121, "step": 2271 }, { "epoch": 0.43962848297213625, "grad_norm": 0.23689502477645874, "learning_rate": 9.962633544120221e-05, "loss": 0.0128, "step": 2272 }, { "epoch": 0.43982198142414863, "grad_norm": 0.1140151396393776, "learning_rate": 9.962598250097327e-05, "loss": 0.0109, "step": 2273 }, { "epoch": 0.44001547987616096, "grad_norm": 0.4514857530593872, "learning_rate": 9.962562939483594e-05, "loss": 0.0137, "step": 2274 }, { "epoch": 0.44020897832817335, "grad_norm": 0.08271913230419159, "learning_rate": 9.96252761227915e-05, "loss": 0.0121, "step": 2275 }, { "epoch": 0.44040247678018574, "grad_norm": 0.6216798424720764, "learning_rate": 9.96249226848413e-05, "loss": 0.0113, "step": 2276 }, { "epoch": 0.4405959752321981, "grad_norm": 0.07010726630687714, "learning_rate": 9.962456908098662e-05, "loss": 0.0118, "step": 2277 }, { "epoch": 0.4407894736842105, "grad_norm": 0.39332255721092224, "learning_rate": 9.962421531122879e-05, "loss": 0.0143, "step": 2278 }, { "epoch": 0.4409829721362229, "grad_norm": 0.33129921555519104, "learning_rate": 9.962386137556913e-05, "loss": 0.012, "step": 2279 }, { "epoch": 0.4411764705882353, "grad_norm": 0.1787526160478592, "learning_rate": 9.962350727400894e-05, "loss": 0.0136, "step": 2280 }, { "epoch": 0.44136996904024767, "grad_norm": 0.31230053305625916, "learning_rate": 9.962315300654955e-05, "loss": 0.0142, "step": 2281 }, { "epoch": 0.44156346749226005, "grad_norm": 0.09787870198488235, "learning_rate": 9.962279857319225e-05, "loss": 0.013, "step": 2282 }, { "epoch": 0.44175696594427244, "grad_norm": 0.18851903080940247, "learning_rate": 9.962244397393841e-05, "loss": 0.0121, "step": 2283 }, { "epoch": 0.4419504643962848, "grad_norm": 0.084090955555439, "learning_rate": 9.96220892087893e-05, "loss": 0.0122, "step": 2284 }, { "epoch": 0.4421439628482972, "grad_norm": 0.12046699970960617, "learning_rate": 9.962173427774627e-05, "loss": 0.0114, "step": 2285 }, { "epoch": 0.4423374613003096, "grad_norm": 0.061641741544008255, "learning_rate": 9.962137918081062e-05, "loss": 0.0105, "step": 2286 }, { "epoch": 0.442530959752322, "grad_norm": 0.15680904686450958, "learning_rate": 9.96210239179837e-05, "loss": 0.0124, "step": 2287 }, { "epoch": 0.44272445820433437, "grad_norm": 0.10818386822938919, "learning_rate": 9.962066848926677e-05, "loss": 0.0137, "step": 2288 }, { "epoch": 0.44291795665634676, "grad_norm": 0.12814830243587494, "learning_rate": 9.962031289466122e-05, "loss": 0.0119, "step": 2289 }, { "epoch": 0.44311145510835914, "grad_norm": 0.2344737946987152, "learning_rate": 9.961995713416833e-05, "loss": 0.0102, "step": 2290 }, { "epoch": 0.44330495356037153, "grad_norm": 0.13080723583698273, "learning_rate": 9.961960120778943e-05, "loss": 0.0117, "step": 2291 }, { "epoch": 0.4434984520123839, "grad_norm": 0.20549188554286957, "learning_rate": 9.961924511552584e-05, "loss": 0.0121, "step": 2292 }, { "epoch": 0.4436919504643963, "grad_norm": 0.06702469289302826, "learning_rate": 9.961888885737892e-05, "loss": 0.0103, "step": 2293 }, { "epoch": 0.4438854489164087, "grad_norm": 0.22627294063568115, "learning_rate": 9.961853243334994e-05, "loss": 0.0122, "step": 2294 }, { "epoch": 0.4440789473684211, "grad_norm": 0.09711122512817383, "learning_rate": 9.961817584344025e-05, "loss": 0.015, "step": 2295 }, { "epoch": 0.44427244582043346, "grad_norm": 0.15793873369693756, "learning_rate": 9.961781908765119e-05, "loss": 0.0144, "step": 2296 }, { "epoch": 0.44446594427244585, "grad_norm": 0.11144180595874786, "learning_rate": 9.961746216598406e-05, "loss": 0.012, "step": 2297 }, { "epoch": 0.44465944272445823, "grad_norm": 0.11080120503902435, "learning_rate": 9.96171050784402e-05, "loss": 0.0114, "step": 2298 }, { "epoch": 0.44485294117647056, "grad_norm": 0.17625094950199127, "learning_rate": 9.961674782502095e-05, "loss": 0.0123, "step": 2299 }, { "epoch": 0.44504643962848295, "grad_norm": 0.10053074359893799, "learning_rate": 9.961639040572763e-05, "loss": 0.0103, "step": 2300 }, { "epoch": 0.44523993808049533, "grad_norm": 0.145124152302742, "learning_rate": 9.961603282056155e-05, "loss": 0.0107, "step": 2301 }, { "epoch": 0.4454334365325077, "grad_norm": 0.06295774132013321, "learning_rate": 9.961567506952406e-05, "loss": 0.0124, "step": 2302 }, { "epoch": 0.4456269349845201, "grad_norm": 0.08116191625595093, "learning_rate": 9.961531715261648e-05, "loss": 0.0108, "step": 2303 }, { "epoch": 0.4458204334365325, "grad_norm": 0.11839618533849716, "learning_rate": 9.961495906984015e-05, "loss": 0.0116, "step": 2304 }, { "epoch": 0.4460139318885449, "grad_norm": 0.14030970633029938, "learning_rate": 9.96146008211964e-05, "loss": 0.0134, "step": 2305 }, { "epoch": 0.44620743034055727, "grad_norm": 0.12832967936992645, "learning_rate": 9.961424240668655e-05, "loss": 0.0121, "step": 2306 }, { "epoch": 0.44640092879256965, "grad_norm": 0.20974130928516388, "learning_rate": 9.961388382631192e-05, "loss": 0.0127, "step": 2307 }, { "epoch": 0.44659442724458204, "grad_norm": 0.15759794414043427, "learning_rate": 9.961352508007389e-05, "loss": 0.0146, "step": 2308 }, { "epoch": 0.4467879256965944, "grad_norm": 0.22074399888515472, "learning_rate": 9.961316616797375e-05, "loss": 0.0136, "step": 2309 }, { "epoch": 0.4469814241486068, "grad_norm": 0.22506171464920044, "learning_rate": 9.961280709001286e-05, "loss": 0.0125, "step": 2310 }, { "epoch": 0.4471749226006192, "grad_norm": 0.16544274985790253, "learning_rate": 9.961244784619255e-05, "loss": 0.0129, "step": 2311 }, { "epoch": 0.4473684210526316, "grad_norm": 0.3114984333515167, "learning_rate": 9.961208843651414e-05, "loss": 0.0111, "step": 2312 }, { "epoch": 0.44756191950464397, "grad_norm": 0.08858996629714966, "learning_rate": 9.961172886097898e-05, "loss": 0.0115, "step": 2313 }, { "epoch": 0.44775541795665635, "grad_norm": 0.2877676486968994, "learning_rate": 9.96113691195884e-05, "loss": 0.0139, "step": 2314 }, { "epoch": 0.44794891640866874, "grad_norm": 0.17780381441116333, "learning_rate": 9.961100921234377e-05, "loss": 0.012, "step": 2315 }, { "epoch": 0.4481424148606811, "grad_norm": 0.24033594131469727, "learning_rate": 9.961064913924639e-05, "loss": 0.0108, "step": 2316 }, { "epoch": 0.4483359133126935, "grad_norm": 0.25513193011283875, "learning_rate": 9.961028890029758e-05, "loss": 0.0113, "step": 2317 }, { "epoch": 0.4485294117647059, "grad_norm": 0.17286227643489838, "learning_rate": 9.960992849549874e-05, "loss": 0.012, "step": 2318 }, { "epoch": 0.4487229102167183, "grad_norm": 0.22128596901893616, "learning_rate": 9.960956792485116e-05, "loss": 0.0127, "step": 2319 }, { "epoch": 0.44891640866873067, "grad_norm": 0.0737045407295227, "learning_rate": 9.960920718835621e-05, "loss": 0.0142, "step": 2320 }, { "epoch": 0.44910990712074306, "grad_norm": 0.24291378259658813, "learning_rate": 9.960884628601522e-05, "loss": 0.014, "step": 2321 }, { "epoch": 0.44930340557275544, "grad_norm": 0.08821136504411697, "learning_rate": 9.960848521782951e-05, "loss": 0.0128, "step": 2322 }, { "epoch": 0.44949690402476783, "grad_norm": 0.1903783529996872, "learning_rate": 9.960812398380047e-05, "loss": 0.0129, "step": 2323 }, { "epoch": 0.44969040247678016, "grad_norm": 0.06139199808239937, "learning_rate": 9.96077625839294e-05, "loss": 0.0096, "step": 2324 }, { "epoch": 0.44988390092879255, "grad_norm": 0.1648493856191635, "learning_rate": 9.960740101821768e-05, "loss": 0.0112, "step": 2325 }, { "epoch": 0.45007739938080493, "grad_norm": 0.09622551500797272, "learning_rate": 9.960703928666661e-05, "loss": 0.0115, "step": 2326 }, { "epoch": 0.4502708978328173, "grad_norm": 0.1979985237121582, "learning_rate": 9.960667738927757e-05, "loss": 0.0099, "step": 2327 }, { "epoch": 0.4504643962848297, "grad_norm": 0.10891932994127274, "learning_rate": 9.960631532605191e-05, "loss": 0.0086, "step": 2328 }, { "epoch": 0.4506578947368421, "grad_norm": 0.1670670211315155, "learning_rate": 9.960595309699094e-05, "loss": 0.0113, "step": 2329 }, { "epoch": 0.4508513931888545, "grad_norm": 0.17468835413455963, "learning_rate": 9.960559070209603e-05, "loss": 0.0119, "step": 2330 }, { "epoch": 0.45104489164086686, "grad_norm": 0.07464418560266495, "learning_rate": 9.960522814136853e-05, "loss": 0.0128, "step": 2331 }, { "epoch": 0.45123839009287925, "grad_norm": 0.19414551556110382, "learning_rate": 9.960486541480978e-05, "loss": 0.0119, "step": 2332 }, { "epoch": 0.45143188854489164, "grad_norm": 0.11407545953989029, "learning_rate": 9.960450252242114e-05, "loss": 0.0102, "step": 2333 }, { "epoch": 0.451625386996904, "grad_norm": 0.16081172227859497, "learning_rate": 9.960413946420394e-05, "loss": 0.0101, "step": 2334 }, { "epoch": 0.4518188854489164, "grad_norm": 0.10437676310539246, "learning_rate": 9.960377624015955e-05, "loss": 0.0103, "step": 2335 }, { "epoch": 0.4520123839009288, "grad_norm": 0.09663363546133041, "learning_rate": 9.96034128502893e-05, "loss": 0.0124, "step": 2336 }, { "epoch": 0.4522058823529412, "grad_norm": 0.08783163130283356, "learning_rate": 9.960304929459456e-05, "loss": 0.0127, "step": 2337 }, { "epoch": 0.45239938080495357, "grad_norm": 0.06386353820562363, "learning_rate": 9.960268557307666e-05, "loss": 0.0103, "step": 2338 }, { "epoch": 0.45259287925696595, "grad_norm": 0.09296710044145584, "learning_rate": 9.9602321685737e-05, "loss": 0.0111, "step": 2339 }, { "epoch": 0.45278637770897834, "grad_norm": 0.07470618933439255, "learning_rate": 9.960195763257686e-05, "loss": 0.0119, "step": 2340 }, { "epoch": 0.4529798761609907, "grad_norm": 0.11212745308876038, "learning_rate": 9.960159341359764e-05, "loss": 0.0129, "step": 2341 }, { "epoch": 0.4531733746130031, "grad_norm": 0.09163174778223038, "learning_rate": 9.96012290288007e-05, "loss": 0.0119, "step": 2342 }, { "epoch": 0.4533668730650155, "grad_norm": 0.09536664932966232, "learning_rate": 9.960086447818737e-05, "loss": 0.0104, "step": 2343 }, { "epoch": 0.4535603715170279, "grad_norm": 0.13869601488113403, "learning_rate": 9.9600499761759e-05, "loss": 0.0079, "step": 2344 }, { "epoch": 0.45375386996904027, "grad_norm": 0.08428574353456497, "learning_rate": 9.960013487951699e-05, "loss": 0.0118, "step": 2345 }, { "epoch": 0.45394736842105265, "grad_norm": 0.05915733799338341, "learning_rate": 9.959976983146265e-05, "loss": 0.0104, "step": 2346 }, { "epoch": 0.45414086687306504, "grad_norm": 0.09802743047475815, "learning_rate": 9.959940461759735e-05, "loss": 0.0118, "step": 2347 }, { "epoch": 0.45433436532507737, "grad_norm": 0.08835373818874359, "learning_rate": 9.959903923792249e-05, "loss": 0.0117, "step": 2348 }, { "epoch": 0.45452786377708976, "grad_norm": 0.08758627623319626, "learning_rate": 9.959867369243936e-05, "loss": 0.0103, "step": 2349 }, { "epoch": 0.45472136222910214, "grad_norm": 0.1264006644487381, "learning_rate": 9.959830798114935e-05, "loss": 0.0132, "step": 2350 }, { "epoch": 0.45491486068111453, "grad_norm": 0.11156158149242401, "learning_rate": 9.959794210405383e-05, "loss": 0.0146, "step": 2351 }, { "epoch": 0.4551083591331269, "grad_norm": 0.13943754136562347, "learning_rate": 9.959757606115416e-05, "loss": 0.0132, "step": 2352 }, { "epoch": 0.4553018575851393, "grad_norm": 0.09099696576595306, "learning_rate": 9.959720985245166e-05, "loss": 0.0127, "step": 2353 }, { "epoch": 0.4554953560371517, "grad_norm": 0.10414217412471771, "learning_rate": 9.959684347794775e-05, "loss": 0.0103, "step": 2354 }, { "epoch": 0.4556888544891641, "grad_norm": 0.17904305458068848, "learning_rate": 9.959647693764376e-05, "loss": 0.0121, "step": 2355 }, { "epoch": 0.45588235294117646, "grad_norm": 0.10088086873292923, "learning_rate": 9.959611023154104e-05, "loss": 0.0118, "step": 2356 }, { "epoch": 0.45607585139318885, "grad_norm": 0.2251967489719391, "learning_rate": 9.9595743359641e-05, "loss": 0.0146, "step": 2357 }, { "epoch": 0.45626934984520123, "grad_norm": 0.08166122436523438, "learning_rate": 9.959537632194496e-05, "loss": 0.012, "step": 2358 }, { "epoch": 0.4564628482972136, "grad_norm": 0.19452041387557983, "learning_rate": 9.95950091184543e-05, "loss": 0.0131, "step": 2359 }, { "epoch": 0.456656346749226, "grad_norm": 0.0809381827712059, "learning_rate": 9.959464174917039e-05, "loss": 0.013, "step": 2360 }, { "epoch": 0.4568498452012384, "grad_norm": 0.16417358815670013, "learning_rate": 9.959427421409457e-05, "loss": 0.0132, "step": 2361 }, { "epoch": 0.4570433436532508, "grad_norm": 0.05035915598273277, "learning_rate": 9.959390651322826e-05, "loss": 0.0131, "step": 2362 }, { "epoch": 0.45723684210526316, "grad_norm": 0.09806675463914871, "learning_rate": 9.959353864657276e-05, "loss": 0.0105, "step": 2363 }, { "epoch": 0.45743034055727555, "grad_norm": 0.1038389727473259, "learning_rate": 9.95931706141295e-05, "loss": 0.0114, "step": 2364 }, { "epoch": 0.45762383900928794, "grad_norm": 0.1313261240720749, "learning_rate": 9.95928024158998e-05, "loss": 0.0136, "step": 2365 }, { "epoch": 0.4578173374613003, "grad_norm": 0.0715220645070076, "learning_rate": 9.959243405188506e-05, "loss": 0.0101, "step": 2366 }, { "epoch": 0.4580108359133127, "grad_norm": 0.12178105115890503, "learning_rate": 9.959206552208664e-05, "loss": 0.014, "step": 2367 }, { "epoch": 0.4582043343653251, "grad_norm": 0.09321141242980957, "learning_rate": 9.959169682650587e-05, "loss": 0.0119, "step": 2368 }, { "epoch": 0.4583978328173375, "grad_norm": 0.13944290578365326, "learning_rate": 9.959132796514421e-05, "loss": 0.0124, "step": 2369 }, { "epoch": 0.45859133126934987, "grad_norm": 0.11588766425848007, "learning_rate": 9.959095893800297e-05, "loss": 0.0108, "step": 2370 }, { "epoch": 0.45878482972136225, "grad_norm": 0.1913560926914215, "learning_rate": 9.959058974508351e-05, "loss": 0.0131, "step": 2371 }, { "epoch": 0.45897832817337464, "grad_norm": 0.08695844560861588, "learning_rate": 9.959022038638724e-05, "loss": 0.012, "step": 2372 }, { "epoch": 0.45917182662538697, "grad_norm": 0.16446244716644287, "learning_rate": 9.958985086191551e-05, "loss": 0.0099, "step": 2373 }, { "epoch": 0.45936532507739936, "grad_norm": 0.13496756553649902, "learning_rate": 9.958948117166971e-05, "loss": 0.0095, "step": 2374 }, { "epoch": 0.45955882352941174, "grad_norm": 0.1711568981409073, "learning_rate": 9.958911131565121e-05, "loss": 0.0104, "step": 2375 }, { "epoch": 0.45975232198142413, "grad_norm": 0.12870077788829803, "learning_rate": 9.958874129386136e-05, "loss": 0.0142, "step": 2376 }, { "epoch": 0.4599458204334365, "grad_norm": 0.11202190816402435, "learning_rate": 9.958837110630157e-05, "loss": 0.0116, "step": 2377 }, { "epoch": 0.4601393188854489, "grad_norm": 0.17766176164150238, "learning_rate": 9.958800075297321e-05, "loss": 0.0108, "step": 2378 }, { "epoch": 0.4603328173374613, "grad_norm": 0.07119185477495193, "learning_rate": 9.958763023387764e-05, "loss": 0.0113, "step": 2379 }, { "epoch": 0.4605263157894737, "grad_norm": 0.31916067004203796, "learning_rate": 9.958725954901625e-05, "loss": 0.0124, "step": 2380 }, { "epoch": 0.46071981424148606, "grad_norm": 0.06923111528158188, "learning_rate": 9.958688869839042e-05, "loss": 0.0121, "step": 2381 }, { "epoch": 0.46091331269349844, "grad_norm": 0.24351155757904053, "learning_rate": 9.95865176820015e-05, "loss": 0.0114, "step": 2382 }, { "epoch": 0.46110681114551083, "grad_norm": 0.11048676818609238, "learning_rate": 9.958614649985094e-05, "loss": 0.0126, "step": 2383 }, { "epoch": 0.4613003095975232, "grad_norm": 0.1589275598526001, "learning_rate": 9.958577515194005e-05, "loss": 0.0108, "step": 2384 }, { "epoch": 0.4614938080495356, "grad_norm": 0.10104209929704666, "learning_rate": 9.958540363827023e-05, "loss": 0.0131, "step": 2385 }, { "epoch": 0.461687306501548, "grad_norm": 0.18050381541252136, "learning_rate": 9.958503195884287e-05, "loss": 0.0123, "step": 2386 }, { "epoch": 0.4618808049535604, "grad_norm": 0.07039568573236465, "learning_rate": 9.958466011365935e-05, "loss": 0.0122, "step": 2387 }, { "epoch": 0.46207430340557276, "grad_norm": 0.1368512362241745, "learning_rate": 9.958428810272103e-05, "loss": 0.0121, "step": 2388 }, { "epoch": 0.46226780185758515, "grad_norm": 0.06788787990808487, "learning_rate": 9.958391592602934e-05, "loss": 0.0123, "step": 2389 }, { "epoch": 0.46246130030959753, "grad_norm": 0.13173893094062805, "learning_rate": 9.958354358358561e-05, "loss": 0.0116, "step": 2390 }, { "epoch": 0.4626547987616099, "grad_norm": 0.05810985714197159, "learning_rate": 9.958317107539126e-05, "loss": 0.0142, "step": 2391 }, { "epoch": 0.4628482972136223, "grad_norm": 0.09260295331478119, "learning_rate": 9.958279840144768e-05, "loss": 0.0115, "step": 2392 }, { "epoch": 0.4630417956656347, "grad_norm": 0.06104697659611702, "learning_rate": 9.958242556175624e-05, "loss": 0.012, "step": 2393 }, { "epoch": 0.4632352941176471, "grad_norm": 0.12242823839187622, "learning_rate": 9.95820525563183e-05, "loss": 0.012, "step": 2394 }, { "epoch": 0.46342879256965946, "grad_norm": 0.13231243193149567, "learning_rate": 9.95816793851353e-05, "loss": 0.0119, "step": 2395 }, { "epoch": 0.46362229102167185, "grad_norm": 0.13919572532176971, "learning_rate": 9.95813060482086e-05, "loss": 0.0122, "step": 2396 }, { "epoch": 0.46381578947368424, "grad_norm": 0.2118324190378189, "learning_rate": 9.958093254553959e-05, "loss": 0.0124, "step": 2397 }, { "epoch": 0.46400928792569657, "grad_norm": 0.09487602114677429, "learning_rate": 9.958055887712964e-05, "loss": 0.0121, "step": 2398 }, { "epoch": 0.46420278637770895, "grad_norm": 0.31297194957733154, "learning_rate": 9.958018504298017e-05, "loss": 0.0145, "step": 2399 }, { "epoch": 0.46439628482972134, "grad_norm": 0.09637273102998734, "learning_rate": 9.957981104309255e-05, "loss": 0.0142, "step": 2400 }, { "epoch": 0.4645897832817337, "grad_norm": 0.2610211968421936, "learning_rate": 9.957943687746819e-05, "loss": 0.0143, "step": 2401 }, { "epoch": 0.4647832817337461, "grad_norm": 0.06892063468694687, "learning_rate": 9.957906254610845e-05, "loss": 0.0128, "step": 2402 }, { "epoch": 0.4649767801857585, "grad_norm": 0.19836218655109406, "learning_rate": 9.957868804901475e-05, "loss": 0.0116, "step": 2403 }, { "epoch": 0.4651702786377709, "grad_norm": 0.1283164620399475, "learning_rate": 9.957831338618849e-05, "loss": 0.0123, "step": 2404 }, { "epoch": 0.46536377708978327, "grad_norm": 0.12690958380699158, "learning_rate": 9.9577938557631e-05, "loss": 0.0105, "step": 2405 }, { "epoch": 0.46555727554179566, "grad_norm": 0.16911645233631134, "learning_rate": 9.957756356334375e-05, "loss": 0.0133, "step": 2406 }, { "epoch": 0.46575077399380804, "grad_norm": 0.049652017652988434, "learning_rate": 9.957718840332811e-05, "loss": 0.0106, "step": 2407 }, { "epoch": 0.46594427244582043, "grad_norm": 0.15817047655582428, "learning_rate": 9.957681307758544e-05, "loss": 0.0106, "step": 2408 }, { "epoch": 0.4661377708978328, "grad_norm": 0.04939301684498787, "learning_rate": 9.957643758611719e-05, "loss": 0.0112, "step": 2409 }, { "epoch": 0.4663312693498452, "grad_norm": 0.16764745116233826, "learning_rate": 9.957606192892471e-05, "loss": 0.0134, "step": 2410 }, { "epoch": 0.4665247678018576, "grad_norm": 0.058939378708601, "learning_rate": 9.957568610600943e-05, "loss": 0.0117, "step": 2411 }, { "epoch": 0.46671826625387, "grad_norm": 0.07929743081331253, "learning_rate": 9.957531011737272e-05, "loss": 0.0132, "step": 2412 }, { "epoch": 0.46691176470588236, "grad_norm": 0.06264480203390121, "learning_rate": 9.9574933963016e-05, "loss": 0.0118, "step": 2413 }, { "epoch": 0.46710526315789475, "grad_norm": 0.1377285122871399, "learning_rate": 9.957455764294066e-05, "loss": 0.0119, "step": 2414 }, { "epoch": 0.46729876160990713, "grad_norm": 0.07232289761304855, "learning_rate": 9.95741811571481e-05, "loss": 0.0127, "step": 2415 }, { "epoch": 0.4674922600619195, "grad_norm": 0.16336914896965027, "learning_rate": 9.957380450563971e-05, "loss": 0.0111, "step": 2416 }, { "epoch": 0.4676857585139319, "grad_norm": 0.09606737643480301, "learning_rate": 9.95734276884169e-05, "loss": 0.0106, "step": 2417 }, { "epoch": 0.4678792569659443, "grad_norm": 0.11110451072454453, "learning_rate": 9.957305070548106e-05, "loss": 0.0129, "step": 2418 }, { "epoch": 0.4680727554179567, "grad_norm": 0.1669985055923462, "learning_rate": 9.95726735568336e-05, "loss": 0.0121, "step": 2419 }, { "epoch": 0.46826625386996906, "grad_norm": 0.05375632271170616, "learning_rate": 9.957229624247595e-05, "loss": 0.0119, "step": 2420 }, { "epoch": 0.46845975232198145, "grad_norm": 0.11868774145841599, "learning_rate": 9.957191876240947e-05, "loss": 0.0114, "step": 2421 }, { "epoch": 0.46865325077399383, "grad_norm": 0.08535285294055939, "learning_rate": 9.957154111663558e-05, "loss": 0.0112, "step": 2422 }, { "epoch": 0.46884674922600617, "grad_norm": 0.10091312229633331, "learning_rate": 9.957116330515567e-05, "loss": 0.0111, "step": 2423 }, { "epoch": 0.46904024767801855, "grad_norm": 0.06618925929069519, "learning_rate": 9.957078532797118e-05, "loss": 0.0117, "step": 2424 }, { "epoch": 0.46923374613003094, "grad_norm": 0.13196854293346405, "learning_rate": 9.957040718508347e-05, "loss": 0.0114, "step": 2425 }, { "epoch": 0.4694272445820433, "grad_norm": 0.06106061860918999, "learning_rate": 9.957002887649398e-05, "loss": 0.0103, "step": 2426 }, { "epoch": 0.4696207430340557, "grad_norm": 0.08863005042076111, "learning_rate": 9.95696504022041e-05, "loss": 0.0135, "step": 2427 }, { "epoch": 0.4698142414860681, "grad_norm": 0.14307957887649536, "learning_rate": 9.956927176221524e-05, "loss": 0.0135, "step": 2428 }, { "epoch": 0.4700077399380805, "grad_norm": 0.05895422026515007, "learning_rate": 9.956889295652881e-05, "loss": 0.0106, "step": 2429 }, { "epoch": 0.47020123839009287, "grad_norm": 0.1477252095937729, "learning_rate": 9.956851398514624e-05, "loss": 0.0135, "step": 2430 }, { "epoch": 0.47039473684210525, "grad_norm": 0.0957057848572731, "learning_rate": 9.956813484806889e-05, "loss": 0.0125, "step": 2431 }, { "epoch": 0.47058823529411764, "grad_norm": 0.10021769255399704, "learning_rate": 9.956775554529822e-05, "loss": 0.0105, "step": 2432 }, { "epoch": 0.47078173374613, "grad_norm": 0.142643541097641, "learning_rate": 9.956737607683558e-05, "loss": 0.0089, "step": 2433 }, { "epoch": 0.4709752321981424, "grad_norm": 0.05478321760892868, "learning_rate": 9.956699644268245e-05, "loss": 0.0117, "step": 2434 }, { "epoch": 0.4711687306501548, "grad_norm": 0.12531252205371857, "learning_rate": 9.95666166428402e-05, "loss": 0.0101, "step": 2435 }, { "epoch": 0.4713622291021672, "grad_norm": 0.07119978964328766, "learning_rate": 9.956623667731025e-05, "loss": 0.01, "step": 2436 }, { "epoch": 0.47155572755417957, "grad_norm": 0.039182960987091064, "learning_rate": 9.956585654609401e-05, "loss": 0.0119, "step": 2437 }, { "epoch": 0.47174922600619196, "grad_norm": 0.06226644292473793, "learning_rate": 9.956547624919289e-05, "loss": 0.0094, "step": 2438 }, { "epoch": 0.47194272445820434, "grad_norm": 0.05368490889668465, "learning_rate": 9.956509578660834e-05, "loss": 0.0121, "step": 2439 }, { "epoch": 0.47213622291021673, "grad_norm": 0.049217965453863144, "learning_rate": 9.956471515834171e-05, "loss": 0.0117, "step": 2440 }, { "epoch": 0.4723297213622291, "grad_norm": 0.053302422165870667, "learning_rate": 9.956433436439447e-05, "loss": 0.0121, "step": 2441 }, { "epoch": 0.4725232198142415, "grad_norm": 0.08847897499799728, "learning_rate": 9.956395340476801e-05, "loss": 0.0104, "step": 2442 }, { "epoch": 0.4727167182662539, "grad_norm": 0.07281530648469925, "learning_rate": 9.956357227946376e-05, "loss": 0.0098, "step": 2443 }, { "epoch": 0.4729102167182663, "grad_norm": 0.10925140976905823, "learning_rate": 9.956319098848312e-05, "loss": 0.0117, "step": 2444 }, { "epoch": 0.47310371517027866, "grad_norm": 0.038399044424295425, "learning_rate": 9.956280953182752e-05, "loss": 0.0102, "step": 2445 }, { "epoch": 0.47329721362229105, "grad_norm": 0.07885587215423584, "learning_rate": 9.956242790949837e-05, "loss": 0.0111, "step": 2446 }, { "epoch": 0.47349071207430343, "grad_norm": 0.0982273742556572, "learning_rate": 9.95620461214971e-05, "loss": 0.0092, "step": 2447 }, { "epoch": 0.47368421052631576, "grad_norm": 0.07010240107774734, "learning_rate": 9.956166416782513e-05, "loss": 0.0104, "step": 2448 }, { "epoch": 0.47387770897832815, "grad_norm": 0.09095319360494614, "learning_rate": 9.956128204848386e-05, "loss": 0.0108, "step": 2449 }, { "epoch": 0.47407120743034054, "grad_norm": 0.1620589941740036, "learning_rate": 9.956089976347472e-05, "loss": 0.0117, "step": 2450 }, { "epoch": 0.4742647058823529, "grad_norm": 0.08809414505958557, "learning_rate": 9.956051731279914e-05, "loss": 0.0125, "step": 2451 }, { "epoch": 0.4744582043343653, "grad_norm": 0.1122463122010231, "learning_rate": 9.956013469645854e-05, "loss": 0.0089, "step": 2452 }, { "epoch": 0.4746517027863777, "grad_norm": 0.04700281098484993, "learning_rate": 9.955975191445434e-05, "loss": 0.0101, "step": 2453 }, { "epoch": 0.4748452012383901, "grad_norm": 0.05902579426765442, "learning_rate": 9.955936896678796e-05, "loss": 0.0113, "step": 2454 }, { "epoch": 0.47503869969040247, "grad_norm": 0.05582147464156151, "learning_rate": 9.955898585346082e-05, "loss": 0.0111, "step": 2455 }, { "epoch": 0.47523219814241485, "grad_norm": 0.13220034539699554, "learning_rate": 9.955860257447435e-05, "loss": 0.0137, "step": 2456 }, { "epoch": 0.47542569659442724, "grad_norm": 0.1771487146615982, "learning_rate": 9.955821912982999e-05, "loss": 0.0114, "step": 2457 }, { "epoch": 0.4756191950464396, "grad_norm": 0.07177065312862396, "learning_rate": 9.955783551952913e-05, "loss": 0.0098, "step": 2458 }, { "epoch": 0.475812693498452, "grad_norm": 0.15796016156673431, "learning_rate": 9.955745174357322e-05, "loss": 0.0127, "step": 2459 }, { "epoch": 0.4760061919504644, "grad_norm": 0.08442936837673187, "learning_rate": 9.95570678019637e-05, "loss": 0.0128, "step": 2460 }, { "epoch": 0.4761996904024768, "grad_norm": 0.09578641504049301, "learning_rate": 9.955668369470197e-05, "loss": 0.0125, "step": 2461 }, { "epoch": 0.47639318885448917, "grad_norm": 0.11473030596971512, "learning_rate": 9.955629942178948e-05, "loss": 0.0135, "step": 2462 }, { "epoch": 0.47658668730650156, "grad_norm": 0.1005827784538269, "learning_rate": 9.955591498322763e-05, "loss": 0.0125, "step": 2463 }, { "epoch": 0.47678018575851394, "grad_norm": 0.1213209480047226, "learning_rate": 9.955553037901788e-05, "loss": 0.0104, "step": 2464 }, { "epoch": 0.4769736842105263, "grad_norm": 0.12822502851486206, "learning_rate": 9.955514560916166e-05, "loss": 0.0124, "step": 2465 }, { "epoch": 0.4771671826625387, "grad_norm": 0.18661050498485565, "learning_rate": 9.955476067366036e-05, "loss": 0.0101, "step": 2466 }, { "epoch": 0.4773606811145511, "grad_norm": 0.10427093505859375, "learning_rate": 9.955437557251545e-05, "loss": 0.011, "step": 2467 }, { "epoch": 0.4775541795665635, "grad_norm": 0.1844853013753891, "learning_rate": 9.955399030572834e-05, "loss": 0.0116, "step": 2468 }, { "epoch": 0.47774767801857587, "grad_norm": 0.15751859545707703, "learning_rate": 9.955360487330048e-05, "loss": 0.0115, "step": 2469 }, { "epoch": 0.47794117647058826, "grad_norm": 0.2519470453262329, "learning_rate": 9.95532192752333e-05, "loss": 0.012, "step": 2470 }, { "epoch": 0.47813467492260064, "grad_norm": 0.2016790509223938, "learning_rate": 9.955283351152821e-05, "loss": 0.0142, "step": 2471 }, { "epoch": 0.47832817337461303, "grad_norm": 0.12916406989097595, "learning_rate": 9.95524475821867e-05, "loss": 0.0113, "step": 2472 }, { "epoch": 0.47852167182662536, "grad_norm": 0.3388877809047699, "learning_rate": 9.955206148721014e-05, "loss": 0.0127, "step": 2473 }, { "epoch": 0.47871517027863775, "grad_norm": 0.06490179151296616, "learning_rate": 9.95516752266e-05, "loss": 0.0098, "step": 2474 }, { "epoch": 0.47890866873065013, "grad_norm": 0.3604065477848053, "learning_rate": 9.955128880035772e-05, "loss": 0.0132, "step": 2475 }, { "epoch": 0.4791021671826625, "grad_norm": 0.046591050922870636, "learning_rate": 9.955090220848471e-05, "loss": 0.0132, "step": 2476 }, { "epoch": 0.4792956656346749, "grad_norm": 0.2795107662677765, "learning_rate": 9.955051545098241e-05, "loss": 0.0114, "step": 2477 }, { "epoch": 0.4794891640866873, "grad_norm": 0.14912478625774384, "learning_rate": 9.95501285278523e-05, "loss": 0.0118, "step": 2478 }, { "epoch": 0.4796826625386997, "grad_norm": 0.19406050443649292, "learning_rate": 9.95497414390958e-05, "loss": 0.0114, "step": 2479 }, { "epoch": 0.47987616099071206, "grad_norm": 0.24809253215789795, "learning_rate": 9.954935418471429e-05, "loss": 0.013, "step": 2480 }, { "epoch": 0.48006965944272445, "grad_norm": 0.102442167699337, "learning_rate": 9.954896676470928e-05, "loss": 0.0106, "step": 2481 }, { "epoch": 0.48026315789473684, "grad_norm": 0.21955829858779907, "learning_rate": 9.95485791790822e-05, "loss": 0.0152, "step": 2482 }, { "epoch": 0.4804566563467492, "grad_norm": 0.12141044437885284, "learning_rate": 9.954819142783448e-05, "loss": 0.0128, "step": 2483 }, { "epoch": 0.4806501547987616, "grad_norm": 0.2074262499809265, "learning_rate": 9.954780351096755e-05, "loss": 0.0129, "step": 2484 }, { "epoch": 0.480843653250774, "grad_norm": 0.12135609984397888, "learning_rate": 9.954741542848287e-05, "loss": 0.0115, "step": 2485 }, { "epoch": 0.4810371517027864, "grad_norm": 0.15339064598083496, "learning_rate": 9.954702718038187e-05, "loss": 0.0113, "step": 2486 }, { "epoch": 0.48123065015479877, "grad_norm": 0.08557132631540298, "learning_rate": 9.954663876666601e-05, "loss": 0.0098, "step": 2487 }, { "epoch": 0.48142414860681115, "grad_norm": 0.1434018611907959, "learning_rate": 9.954625018733672e-05, "loss": 0.0114, "step": 2488 }, { "epoch": 0.48161764705882354, "grad_norm": 0.0726713240146637, "learning_rate": 9.954586144239545e-05, "loss": 0.0092, "step": 2489 }, { "epoch": 0.4818111455108359, "grad_norm": 0.10720477253198624, "learning_rate": 9.954547253184363e-05, "loss": 0.0144, "step": 2490 }, { "epoch": 0.4820046439628483, "grad_norm": 0.16637326776981354, "learning_rate": 9.954508345568274e-05, "loss": 0.0136, "step": 2491 }, { "epoch": 0.4821981424148607, "grad_norm": 0.08905752003192902, "learning_rate": 9.954469421391419e-05, "loss": 0.0118, "step": 2492 }, { "epoch": 0.4823916408668731, "grad_norm": 0.17323145270347595, "learning_rate": 9.954430480653946e-05, "loss": 0.0115, "step": 2493 }, { "epoch": 0.48258513931888547, "grad_norm": 0.10440659523010254, "learning_rate": 9.954391523355997e-05, "loss": 0.0133, "step": 2494 }, { "epoch": 0.48277863777089786, "grad_norm": 0.06213502213358879, "learning_rate": 9.954352549497717e-05, "loss": 0.0107, "step": 2495 }, { "epoch": 0.48297213622291024, "grad_norm": 0.13625164330005646, "learning_rate": 9.954313559079252e-05, "loss": 0.0119, "step": 2496 }, { "epoch": 0.48316563467492263, "grad_norm": 0.07933514565229416, "learning_rate": 9.954274552100748e-05, "loss": 0.0123, "step": 2497 }, { "epoch": 0.48335913312693496, "grad_norm": 0.07252306491136551, "learning_rate": 9.954235528562348e-05, "loss": 0.0124, "step": 2498 }, { "epoch": 0.48355263157894735, "grad_norm": 0.06549757719039917, "learning_rate": 9.954196488464198e-05, "loss": 0.0109, "step": 2499 }, { "epoch": 0.48374613003095973, "grad_norm": 0.10938920080661774, "learning_rate": 9.954157431806442e-05, "loss": 0.0133, "step": 2500 }, { "epoch": 0.4839396284829721, "grad_norm": 0.11308979243040085, "learning_rate": 9.954118358589228e-05, "loss": 0.0115, "step": 2501 }, { "epoch": 0.4841331269349845, "grad_norm": 0.09288634359836578, "learning_rate": 9.954079268812698e-05, "loss": 0.0112, "step": 2502 }, { "epoch": 0.4843266253869969, "grad_norm": 0.11598764359951019, "learning_rate": 9.954040162477e-05, "loss": 0.0117, "step": 2503 }, { "epoch": 0.4845201238390093, "grad_norm": 0.09199812263250351, "learning_rate": 9.954001039582277e-05, "loss": 0.0122, "step": 2504 }, { "epoch": 0.48471362229102166, "grad_norm": 0.14085206389427185, "learning_rate": 9.953961900128676e-05, "loss": 0.0106, "step": 2505 }, { "epoch": 0.48490712074303405, "grad_norm": 0.06585526466369629, "learning_rate": 9.953922744116343e-05, "loss": 0.0109, "step": 2506 }, { "epoch": 0.48510061919504643, "grad_norm": 0.1164492815732956, "learning_rate": 9.953883571545422e-05, "loss": 0.0113, "step": 2507 }, { "epoch": 0.4852941176470588, "grad_norm": 0.08277527987957001, "learning_rate": 9.95384438241606e-05, "loss": 0.0134, "step": 2508 }, { "epoch": 0.4854876160990712, "grad_norm": 0.05070037022233009, "learning_rate": 9.953805176728402e-05, "loss": 0.0111, "step": 2509 }, { "epoch": 0.4856811145510836, "grad_norm": 0.0884457603096962, "learning_rate": 9.953765954482592e-05, "loss": 0.0113, "step": 2510 }, { "epoch": 0.485874613003096, "grad_norm": 0.05095917358994484, "learning_rate": 9.95372671567878e-05, "loss": 0.0109, "step": 2511 }, { "epoch": 0.48606811145510836, "grad_norm": 0.11523241549730301, "learning_rate": 9.953687460317109e-05, "loss": 0.0116, "step": 2512 }, { "epoch": 0.48626160990712075, "grad_norm": 0.05649209022521973, "learning_rate": 9.953648188397722e-05, "loss": 0.0124, "step": 2513 }, { "epoch": 0.48645510835913314, "grad_norm": 0.12731388211250305, "learning_rate": 9.953608899920772e-05, "loss": 0.0121, "step": 2514 }, { "epoch": 0.4866486068111455, "grad_norm": 0.08110866695642471, "learning_rate": 9.953569594886401e-05, "loss": 0.011, "step": 2515 }, { "epoch": 0.4868421052631579, "grad_norm": 0.13327138125896454, "learning_rate": 9.953530273294756e-05, "loss": 0.011, "step": 2516 }, { "epoch": 0.4870356037151703, "grad_norm": 0.0968869999051094, "learning_rate": 9.953490935145983e-05, "loss": 0.013, "step": 2517 }, { "epoch": 0.4872291021671827, "grad_norm": 0.12897169589996338, "learning_rate": 9.953451580440227e-05, "loss": 0.0132, "step": 2518 }, { "epoch": 0.48742260061919507, "grad_norm": 0.09952324628829956, "learning_rate": 9.953412209177634e-05, "loss": 0.0113, "step": 2519 }, { "epoch": 0.48761609907120745, "grad_norm": 0.09116319566965103, "learning_rate": 9.953372821358353e-05, "loss": 0.0111, "step": 2520 }, { "epoch": 0.48780959752321984, "grad_norm": 0.12679718434810638, "learning_rate": 9.953333416982531e-05, "loss": 0.013, "step": 2521 }, { "epoch": 0.48800309597523217, "grad_norm": 0.07550854980945587, "learning_rate": 9.953293996050312e-05, "loss": 0.0101, "step": 2522 }, { "epoch": 0.48819659442724456, "grad_norm": 0.12696439027786255, "learning_rate": 9.953254558561843e-05, "loss": 0.0124, "step": 2523 }, { "epoch": 0.48839009287925694, "grad_norm": 0.07491020113229752, "learning_rate": 9.953215104517269e-05, "loss": 0.0116, "step": 2524 }, { "epoch": 0.48858359133126933, "grad_norm": 0.0787741020321846, "learning_rate": 9.953175633916741e-05, "loss": 0.0115, "step": 2525 }, { "epoch": 0.4887770897832817, "grad_norm": 0.10002878308296204, "learning_rate": 9.953136146760402e-05, "loss": 0.0091, "step": 2526 }, { "epoch": 0.4889705882352941, "grad_norm": 0.07361049950122833, "learning_rate": 9.953096643048401e-05, "loss": 0.0124, "step": 2527 }, { "epoch": 0.4891640866873065, "grad_norm": 0.090936578810215, "learning_rate": 9.953057122780882e-05, "loss": 0.0134, "step": 2528 }, { "epoch": 0.4893575851393189, "grad_norm": 0.05999721586704254, "learning_rate": 9.953017585957995e-05, "loss": 0.0114, "step": 2529 }, { "epoch": 0.48955108359133126, "grad_norm": 0.09945112466812134, "learning_rate": 9.952978032579885e-05, "loss": 0.0105, "step": 2530 }, { "epoch": 0.48974458204334365, "grad_norm": 0.05633888393640518, "learning_rate": 9.952938462646702e-05, "loss": 0.0104, "step": 2531 }, { "epoch": 0.48993808049535603, "grad_norm": 0.09740402549505234, "learning_rate": 9.95289887615859e-05, "loss": 0.0133, "step": 2532 }, { "epoch": 0.4901315789473684, "grad_norm": 0.061950717121362686, "learning_rate": 9.952859273115696e-05, "loss": 0.0117, "step": 2533 }, { "epoch": 0.4903250773993808, "grad_norm": 0.06961210817098618, "learning_rate": 9.95281965351817e-05, "loss": 0.011, "step": 2534 }, { "epoch": 0.4905185758513932, "grad_norm": 0.055468250066041946, "learning_rate": 9.952780017366158e-05, "loss": 0.0122, "step": 2535 }, { "epoch": 0.4907120743034056, "grad_norm": 0.05174418166279793, "learning_rate": 9.952740364659806e-05, "loss": 0.0115, "step": 2536 }, { "epoch": 0.49090557275541796, "grad_norm": 0.05005096271634102, "learning_rate": 9.952700695399264e-05, "loss": 0.0106, "step": 2537 }, { "epoch": 0.49109907120743035, "grad_norm": 0.07968368381261826, "learning_rate": 9.952661009584676e-05, "loss": 0.0123, "step": 2538 }, { "epoch": 0.49129256965944273, "grad_norm": 0.08975943177938461, "learning_rate": 9.952621307216192e-05, "loss": 0.0119, "step": 2539 }, { "epoch": 0.4914860681114551, "grad_norm": 0.08993019908666611, "learning_rate": 9.95258158829396e-05, "loss": 0.0121, "step": 2540 }, { "epoch": 0.4916795665634675, "grad_norm": 0.11582330614328384, "learning_rate": 9.952541852818126e-05, "loss": 0.0132, "step": 2541 }, { "epoch": 0.4918730650154799, "grad_norm": 0.08684896677732468, "learning_rate": 9.952502100788839e-05, "loss": 0.0129, "step": 2542 }, { "epoch": 0.4920665634674923, "grad_norm": 0.11464104056358337, "learning_rate": 9.952462332206247e-05, "loss": 0.0111, "step": 2543 }, { "epoch": 0.49226006191950467, "grad_norm": 0.06017908453941345, "learning_rate": 9.952422547070495e-05, "loss": 0.0113, "step": 2544 }, { "epoch": 0.49245356037151705, "grad_norm": 0.09857537597417831, "learning_rate": 9.952382745381734e-05, "loss": 0.0123, "step": 2545 }, { "epoch": 0.49264705882352944, "grad_norm": 0.04770197719335556, "learning_rate": 9.952342927140113e-05, "loss": 0.0132, "step": 2546 }, { "epoch": 0.49284055727554177, "grad_norm": 0.09456153213977814, "learning_rate": 9.952303092345775e-05, "loss": 0.0132, "step": 2547 }, { "epoch": 0.49303405572755415, "grad_norm": 0.041134320199489594, "learning_rate": 9.952263240998873e-05, "loss": 0.0137, "step": 2548 }, { "epoch": 0.49322755417956654, "grad_norm": 0.07190234959125519, "learning_rate": 9.952223373099554e-05, "loss": 0.0119, "step": 2549 }, { "epoch": 0.4934210526315789, "grad_norm": 0.0436394065618515, "learning_rate": 9.952183488647965e-05, "loss": 0.013, "step": 2550 }, { "epoch": 0.4936145510835913, "grad_norm": 0.12242621928453445, "learning_rate": 9.952143587644255e-05, "loss": 0.0106, "step": 2551 }, { "epoch": 0.4938080495356037, "grad_norm": 0.07943511009216309, "learning_rate": 9.952103670088571e-05, "loss": 0.0132, "step": 2552 }, { "epoch": 0.4940015479876161, "grad_norm": 0.08116700500249863, "learning_rate": 9.952063735981064e-05, "loss": 0.0094, "step": 2553 }, { "epoch": 0.49419504643962847, "grad_norm": 0.10148314386606216, "learning_rate": 9.952023785321879e-05, "loss": 0.0112, "step": 2554 }, { "epoch": 0.49438854489164086, "grad_norm": 0.07141365110874176, "learning_rate": 9.951983818111167e-05, "loss": 0.0106, "step": 2555 }, { "epoch": 0.49458204334365324, "grad_norm": 0.11853720992803574, "learning_rate": 9.951943834349078e-05, "loss": 0.0101, "step": 2556 }, { "epoch": 0.49477554179566563, "grad_norm": 0.04351057857275009, "learning_rate": 9.951903834035759e-05, "loss": 0.0132, "step": 2557 }, { "epoch": 0.494969040247678, "grad_norm": 0.1057644709944725, "learning_rate": 9.951863817171356e-05, "loss": 0.0109, "step": 2558 }, { "epoch": 0.4951625386996904, "grad_norm": 0.05527724325656891, "learning_rate": 9.951823783756021e-05, "loss": 0.0107, "step": 2559 }, { "epoch": 0.4953560371517028, "grad_norm": 0.11763669550418854, "learning_rate": 9.951783733789904e-05, "loss": 0.0127, "step": 2560 }, { "epoch": 0.4955495356037152, "grad_norm": 0.07540769875049591, "learning_rate": 9.951743667273149e-05, "loss": 0.0104, "step": 2561 }, { "epoch": 0.49574303405572756, "grad_norm": 0.1344938576221466, "learning_rate": 9.95170358420591e-05, "loss": 0.0115, "step": 2562 }, { "epoch": 0.49593653250773995, "grad_norm": 0.06925982236862183, "learning_rate": 9.951663484588334e-05, "loss": 0.0109, "step": 2563 }, { "epoch": 0.49613003095975233, "grad_norm": 0.20341752469539642, "learning_rate": 9.95162336842057e-05, "loss": 0.0103, "step": 2564 }, { "epoch": 0.4963235294117647, "grad_norm": 0.0989876389503479, "learning_rate": 9.951583235702767e-05, "loss": 0.0139, "step": 2565 }, { "epoch": 0.4965170278637771, "grad_norm": 0.13576938211917877, "learning_rate": 9.951543086435075e-05, "loss": 0.0122, "step": 2566 }, { "epoch": 0.4967105263157895, "grad_norm": 0.07993658632040024, "learning_rate": 9.951502920617642e-05, "loss": 0.0134, "step": 2567 }, { "epoch": 0.4969040247678019, "grad_norm": 0.09106718748807907, "learning_rate": 9.951462738250617e-05, "loss": 0.013, "step": 2568 }, { "epoch": 0.49709752321981426, "grad_norm": 0.0672733336687088, "learning_rate": 9.951422539334152e-05, "loss": 0.0114, "step": 2569 }, { "epoch": 0.49729102167182665, "grad_norm": 0.09611280262470245, "learning_rate": 9.951382323868395e-05, "loss": 0.0125, "step": 2570 }, { "epoch": 0.49748452012383904, "grad_norm": 0.0956064909696579, "learning_rate": 9.951342091853494e-05, "loss": 0.0126, "step": 2571 }, { "epoch": 0.49767801857585137, "grad_norm": 0.05007448419928551, "learning_rate": 9.9513018432896e-05, "loss": 0.0115, "step": 2572 }, { "epoch": 0.49787151702786375, "grad_norm": 0.1418042778968811, "learning_rate": 9.951261578176864e-05, "loss": 0.0135, "step": 2573 }, { "epoch": 0.49806501547987614, "grad_norm": 0.14582115411758423, "learning_rate": 9.951221296515433e-05, "loss": 0.0128, "step": 2574 }, { "epoch": 0.4982585139318885, "grad_norm": 0.09737368673086166, "learning_rate": 9.951180998305459e-05, "loss": 0.011, "step": 2575 }, { "epoch": 0.4984520123839009, "grad_norm": 0.14604264497756958, "learning_rate": 9.951140683547091e-05, "loss": 0.0129, "step": 2576 }, { "epoch": 0.4986455108359133, "grad_norm": 0.09166739881038666, "learning_rate": 9.951100352240478e-05, "loss": 0.0132, "step": 2577 }, { "epoch": 0.4988390092879257, "grad_norm": 0.1635289192199707, "learning_rate": 9.951060004385771e-05, "loss": 0.0123, "step": 2578 }, { "epoch": 0.49903250773993807, "grad_norm": 0.10306388139724731, "learning_rate": 9.951019639983119e-05, "loss": 0.0104, "step": 2579 }, { "epoch": 0.49922600619195046, "grad_norm": 0.1339728683233261, "learning_rate": 9.950979259032673e-05, "loss": 0.0129, "step": 2580 }, { "epoch": 0.49941950464396284, "grad_norm": 0.08831906318664551, "learning_rate": 9.950938861534583e-05, "loss": 0.0122, "step": 2581 }, { "epoch": 0.4996130030959752, "grad_norm": 0.10156374424695969, "learning_rate": 9.950898447488999e-05, "loss": 0.0097, "step": 2582 }, { "epoch": 0.4998065015479876, "grad_norm": 0.10687009990215302, "learning_rate": 9.950858016896073e-05, "loss": 0.0108, "step": 2583 }, { "epoch": 0.5, "grad_norm": 0.06511899828910828, "learning_rate": 9.950817569755953e-05, "loss": 0.0119, "step": 2584 }, { "epoch": 0.5001934984520123, "grad_norm": 0.12704350054264069, "learning_rate": 9.950777106068787e-05, "loss": 0.0117, "step": 2585 }, { "epoch": 0.5003869969040248, "grad_norm": 0.042013391852378845, "learning_rate": 9.950736625834731e-05, "loss": 0.0086, "step": 2586 }, { "epoch": 0.5005804953560371, "grad_norm": 0.08817151933908463, "learning_rate": 9.950696129053932e-05, "loss": 0.0094, "step": 2587 }, { "epoch": 0.5007739938080495, "grad_norm": 0.05066579207777977, "learning_rate": 9.950655615726543e-05, "loss": 0.011, "step": 2588 }, { "epoch": 0.5009674922600619, "grad_norm": 0.061973936855793, "learning_rate": 9.950615085852712e-05, "loss": 0.0114, "step": 2589 }, { "epoch": 0.5011609907120743, "grad_norm": 0.06621363759040833, "learning_rate": 9.95057453943259e-05, "loss": 0.0114, "step": 2590 }, { "epoch": 0.5013544891640866, "grad_norm": 0.082718126475811, "learning_rate": 9.95053397646633e-05, "loss": 0.0112, "step": 2591 }, { "epoch": 0.5015479876160991, "grad_norm": 0.07662441581487656, "learning_rate": 9.95049339695408e-05, "loss": 0.0099, "step": 2592 }, { "epoch": 0.5017414860681114, "grad_norm": 0.09460093080997467, "learning_rate": 9.950452800895993e-05, "loss": 0.0096, "step": 2593 }, { "epoch": 0.5019349845201239, "grad_norm": 0.09871864318847656, "learning_rate": 9.950412188292219e-05, "loss": 0.012, "step": 2594 }, { "epoch": 0.5021284829721362, "grad_norm": 0.07746296375989914, "learning_rate": 9.950371559142907e-05, "loss": 0.011, "step": 2595 }, { "epoch": 0.5023219814241486, "grad_norm": 0.11246896535158157, "learning_rate": 9.950330913448213e-05, "loss": 0.0095, "step": 2596 }, { "epoch": 0.502515479876161, "grad_norm": 0.13845844566822052, "learning_rate": 9.950290251208285e-05, "loss": 0.0149, "step": 2597 }, { "epoch": 0.5027089783281734, "grad_norm": 0.2995738387107849, "learning_rate": 9.950249572423274e-05, "loss": 0.0114, "step": 2598 }, { "epoch": 0.5029024767801857, "grad_norm": 0.06109681725502014, "learning_rate": 9.95020887709333e-05, "loss": 0.0108, "step": 2599 }, { "epoch": 0.5030959752321982, "grad_norm": 0.27554914355278015, "learning_rate": 9.950168165218608e-05, "loss": 0.0129, "step": 2600 }, { "epoch": 0.5032894736842105, "grad_norm": 0.06150941178202629, "learning_rate": 9.950127436799257e-05, "loss": 0.0106, "step": 2601 }, { "epoch": 0.503482972136223, "grad_norm": 0.1587119996547699, "learning_rate": 9.950086691835428e-05, "loss": 0.0122, "step": 2602 }, { "epoch": 0.5036764705882353, "grad_norm": 0.11490695923566818, "learning_rate": 9.950045930327274e-05, "loss": 0.0089, "step": 2603 }, { "epoch": 0.5038699690402477, "grad_norm": 0.19625283777713776, "learning_rate": 9.950005152274945e-05, "loss": 0.0121, "step": 2604 }, { "epoch": 0.50406346749226, "grad_norm": 0.14866319298744202, "learning_rate": 9.949964357678594e-05, "loss": 0.0127, "step": 2605 }, { "epoch": 0.5042569659442725, "grad_norm": 0.31721004843711853, "learning_rate": 9.949923546538371e-05, "loss": 0.0135, "step": 2606 }, { "epoch": 0.5044504643962848, "grad_norm": 0.13154500722885132, "learning_rate": 9.94988271885443e-05, "loss": 0.0136, "step": 2607 }, { "epoch": 0.5046439628482973, "grad_norm": 0.2831875681877136, "learning_rate": 9.949841874626922e-05, "loss": 0.0116, "step": 2608 }, { "epoch": 0.5048374613003096, "grad_norm": 0.17794989049434662, "learning_rate": 9.949801013855996e-05, "loss": 0.0127, "step": 2609 }, { "epoch": 0.5050309597523219, "grad_norm": 0.1814361959695816, "learning_rate": 9.949760136541807e-05, "loss": 0.0103, "step": 2610 }, { "epoch": 0.5052244582043344, "grad_norm": 0.2972167730331421, "learning_rate": 9.949719242684507e-05, "loss": 0.0132, "step": 2611 }, { "epoch": 0.5054179566563467, "grad_norm": 0.10395196080207825, "learning_rate": 9.949678332284247e-05, "loss": 0.0092, "step": 2612 }, { "epoch": 0.5056114551083591, "grad_norm": 0.3022918999195099, "learning_rate": 9.94963740534118e-05, "loss": 0.0136, "step": 2613 }, { "epoch": 0.5058049535603715, "grad_norm": 0.10844133794307709, "learning_rate": 9.949596461855457e-05, "loss": 0.0115, "step": 2614 }, { "epoch": 0.5059984520123839, "grad_norm": 0.24356913566589355, "learning_rate": 9.949555501827232e-05, "loss": 0.0127, "step": 2615 }, { "epoch": 0.5061919504643962, "grad_norm": 0.14202716946601868, "learning_rate": 9.949514525256655e-05, "loss": 0.0132, "step": 2616 }, { "epoch": 0.5063854489164087, "grad_norm": 0.13411834836006165, "learning_rate": 9.949473532143878e-05, "loss": 0.0091, "step": 2617 }, { "epoch": 0.506578947368421, "grad_norm": 0.1780053824186325, "learning_rate": 9.949432522489056e-05, "loss": 0.0116, "step": 2618 }, { "epoch": 0.5067724458204335, "grad_norm": 0.15881173312664032, "learning_rate": 9.949391496292341e-05, "loss": 0.0113, "step": 2619 }, { "epoch": 0.5069659442724458, "grad_norm": 0.18767361342906952, "learning_rate": 9.949350453553885e-05, "loss": 0.0117, "step": 2620 }, { "epoch": 0.5071594427244582, "grad_norm": 0.16793546080589294, "learning_rate": 9.949309394273839e-05, "loss": 0.0102, "step": 2621 }, { "epoch": 0.5073529411764706, "grad_norm": 0.17561306059360504, "learning_rate": 9.949268318452358e-05, "loss": 0.0123, "step": 2622 }, { "epoch": 0.507546439628483, "grad_norm": 0.12444586306810379, "learning_rate": 9.949227226089592e-05, "loss": 0.0108, "step": 2623 }, { "epoch": 0.5077399380804953, "grad_norm": 0.14423204958438873, "learning_rate": 9.949186117185697e-05, "loss": 0.0108, "step": 2624 }, { "epoch": 0.5079334365325078, "grad_norm": 0.0905311331152916, "learning_rate": 9.949144991740825e-05, "loss": 0.014, "step": 2625 }, { "epoch": 0.5081269349845201, "grad_norm": 0.07727319002151489, "learning_rate": 9.949103849755126e-05, "loss": 0.0093, "step": 2626 }, { "epoch": 0.5083204334365325, "grad_norm": 0.1239299327135086, "learning_rate": 9.949062691228757e-05, "loss": 0.013, "step": 2627 }, { "epoch": 0.5085139318885449, "grad_norm": 0.15206047892570496, "learning_rate": 9.949021516161868e-05, "loss": 0.0119, "step": 2628 }, { "epoch": 0.5087074303405573, "grad_norm": 0.15079213678836823, "learning_rate": 9.948980324554614e-05, "loss": 0.0137, "step": 2629 }, { "epoch": 0.5089009287925697, "grad_norm": 0.21594776213169098, "learning_rate": 9.948939116407147e-05, "loss": 0.0115, "step": 2630 }, { "epoch": 0.5090944272445821, "grad_norm": 0.13601011037826538, "learning_rate": 9.948897891719621e-05, "loss": 0.0106, "step": 2631 }, { "epoch": 0.5092879256965944, "grad_norm": 0.20191679894924164, "learning_rate": 9.948856650492189e-05, "loss": 0.0124, "step": 2632 }, { "epoch": 0.5094814241486069, "grad_norm": 0.11368270218372345, "learning_rate": 9.948815392725004e-05, "loss": 0.0123, "step": 2633 }, { "epoch": 0.5096749226006192, "grad_norm": 0.15546609461307526, "learning_rate": 9.948774118418219e-05, "loss": 0.013, "step": 2634 }, { "epoch": 0.5098684210526315, "grad_norm": 0.10268471390008926, "learning_rate": 9.94873282757199e-05, "loss": 0.0108, "step": 2635 }, { "epoch": 0.510061919504644, "grad_norm": 0.11028337478637695, "learning_rate": 9.948691520186467e-05, "loss": 0.0112, "step": 2636 }, { "epoch": 0.5102554179566563, "grad_norm": 0.09643534570932388, "learning_rate": 9.948650196261805e-05, "loss": 0.0104, "step": 2637 }, { "epoch": 0.5104489164086687, "grad_norm": 0.08657203614711761, "learning_rate": 9.948608855798159e-05, "loss": 0.0119, "step": 2638 }, { "epoch": 0.5106424148606811, "grad_norm": 0.11394354701042175, "learning_rate": 9.94856749879568e-05, "loss": 0.0131, "step": 2639 }, { "epoch": 0.5108359133126935, "grad_norm": 0.16894809901714325, "learning_rate": 9.948526125254522e-05, "loss": 0.0122, "step": 2640 }, { "epoch": 0.5110294117647058, "grad_norm": 0.1720089465379715, "learning_rate": 9.948484735174843e-05, "loss": 0.0085, "step": 2641 }, { "epoch": 0.5112229102167183, "grad_norm": 0.24153007566928864, "learning_rate": 9.948443328556792e-05, "loss": 0.0136, "step": 2642 }, { "epoch": 0.5114164086687306, "grad_norm": 0.2351265251636505, "learning_rate": 9.948401905400525e-05, "loss": 0.0123, "step": 2643 }, { "epoch": 0.5116099071207431, "grad_norm": 0.1708797961473465, "learning_rate": 9.948360465706195e-05, "loss": 0.0133, "step": 2644 }, { "epoch": 0.5118034055727554, "grad_norm": 0.18528705835342407, "learning_rate": 9.948319009473959e-05, "loss": 0.0109, "step": 2645 }, { "epoch": 0.5119969040247678, "grad_norm": 0.16117124259471893, "learning_rate": 9.948277536703968e-05, "loss": 0.0116, "step": 2646 }, { "epoch": 0.5121904024767802, "grad_norm": 0.1539657860994339, "learning_rate": 9.948236047396378e-05, "loss": 0.0115, "step": 2647 }, { "epoch": 0.5123839009287926, "grad_norm": 0.14467795193195343, "learning_rate": 9.948194541551342e-05, "loss": 0.0127, "step": 2648 }, { "epoch": 0.5125773993808049, "grad_norm": 0.13123252987861633, "learning_rate": 9.948153019169013e-05, "loss": 0.0124, "step": 2649 }, { "epoch": 0.5127708978328174, "grad_norm": 0.18113616108894348, "learning_rate": 9.948111480249549e-05, "loss": 0.0097, "step": 2650 }, { "epoch": 0.5129643962848297, "grad_norm": 0.14554762840270996, "learning_rate": 9.948069924793102e-05, "loss": 0.0119, "step": 2651 }, { "epoch": 0.5131578947368421, "grad_norm": 0.11610965430736542, "learning_rate": 9.948028352799827e-05, "loss": 0.0113, "step": 2652 }, { "epoch": 0.5133513931888545, "grad_norm": 0.3571791648864746, "learning_rate": 9.94798676426988e-05, "loss": 0.0136, "step": 2653 }, { "epoch": 0.5135448916408669, "grad_norm": 0.12292932718992233, "learning_rate": 9.947945159203413e-05, "loss": 0.0128, "step": 2654 }, { "epoch": 0.5137383900928792, "grad_norm": 0.41019535064697266, "learning_rate": 9.947903537600581e-05, "loss": 0.0128, "step": 2655 }, { "epoch": 0.5139318885448917, "grad_norm": 0.05580098554491997, "learning_rate": 9.94786189946154e-05, "loss": 0.011, "step": 2656 }, { "epoch": 0.514125386996904, "grad_norm": 0.431874543428421, "learning_rate": 9.947820244786446e-05, "loss": 0.0126, "step": 2657 }, { "epoch": 0.5143188854489165, "grad_norm": 0.11826184391975403, "learning_rate": 9.947778573575452e-05, "loss": 0.0108, "step": 2658 }, { "epoch": 0.5145123839009288, "grad_norm": 0.3486168682575226, "learning_rate": 9.947736885828712e-05, "loss": 0.0124, "step": 2659 }, { "epoch": 0.5147058823529411, "grad_norm": 0.21920596063137054, "learning_rate": 9.947695181546383e-05, "loss": 0.0108, "step": 2660 }, { "epoch": 0.5148993808049536, "grad_norm": 0.22050951421260834, "learning_rate": 9.947653460728618e-05, "loss": 0.0105, "step": 2661 }, { "epoch": 0.5150928792569659, "grad_norm": 0.26888659596443176, "learning_rate": 9.947611723375575e-05, "loss": 0.0116, "step": 2662 }, { "epoch": 0.5152863777089783, "grad_norm": 0.0940987765789032, "learning_rate": 9.947569969487405e-05, "loss": 0.0124, "step": 2663 }, { "epoch": 0.5154798761609907, "grad_norm": 0.2435997873544693, "learning_rate": 9.94752819906427e-05, "loss": 0.0129, "step": 2664 }, { "epoch": 0.5156733746130031, "grad_norm": 0.11612506210803986, "learning_rate": 9.947486412106319e-05, "loss": 0.0116, "step": 2665 }, { "epoch": 0.5158668730650154, "grad_norm": 0.20650026202201843, "learning_rate": 9.947444608613708e-05, "loss": 0.0119, "step": 2666 }, { "epoch": 0.5160603715170279, "grad_norm": 0.18280236423015594, "learning_rate": 9.947402788586596e-05, "loss": 0.0107, "step": 2667 }, { "epoch": 0.5162538699690402, "grad_norm": 0.11195293813943863, "learning_rate": 9.947360952025134e-05, "loss": 0.0105, "step": 2668 }, { "epoch": 0.5164473684210527, "grad_norm": 0.22055256366729736, "learning_rate": 9.94731909892948e-05, "loss": 0.0106, "step": 2669 }, { "epoch": 0.516640866873065, "grad_norm": 0.10608731210231781, "learning_rate": 9.947277229299792e-05, "loss": 0.0097, "step": 2670 }, { "epoch": 0.5168343653250774, "grad_norm": 0.16219684481620789, "learning_rate": 9.947235343136222e-05, "loss": 0.0154, "step": 2671 }, { "epoch": 0.5170278637770898, "grad_norm": 0.12859196960926056, "learning_rate": 9.947193440438926e-05, "loss": 0.0132, "step": 2672 }, { "epoch": 0.5172213622291022, "grad_norm": 0.12332623451948166, "learning_rate": 9.947151521208061e-05, "loss": 0.0122, "step": 2673 }, { "epoch": 0.5174148606811145, "grad_norm": 0.1361873298883438, "learning_rate": 9.947109585443783e-05, "loss": 0.0123, "step": 2674 }, { "epoch": 0.517608359133127, "grad_norm": 0.13889241218566895, "learning_rate": 9.947067633146247e-05, "loss": 0.0129, "step": 2675 }, { "epoch": 0.5178018575851393, "grad_norm": 0.1849544644355774, "learning_rate": 9.947025664315608e-05, "loss": 0.0114, "step": 2676 }, { "epoch": 0.5179953560371517, "grad_norm": 0.16745918989181519, "learning_rate": 9.946983678952025e-05, "loss": 0.0122, "step": 2677 }, { "epoch": 0.5181888544891641, "grad_norm": 0.17239147424697876, "learning_rate": 9.946941677055652e-05, "loss": 0.0121, "step": 2678 }, { "epoch": 0.5183823529411765, "grad_norm": 0.14957693219184875, "learning_rate": 9.946899658626645e-05, "loss": 0.0102, "step": 2679 }, { "epoch": 0.5185758513931888, "grad_norm": 0.14488400518894196, "learning_rate": 9.94685762366516e-05, "loss": 0.0104, "step": 2680 }, { "epoch": 0.5187693498452013, "grad_norm": 0.20630823075771332, "learning_rate": 9.946815572171358e-05, "loss": 0.0117, "step": 2681 }, { "epoch": 0.5189628482972136, "grad_norm": 0.09385921061038971, "learning_rate": 9.946773504145387e-05, "loss": 0.0115, "step": 2682 }, { "epoch": 0.5191563467492261, "grad_norm": 0.10462740063667297, "learning_rate": 9.94673141958741e-05, "loss": 0.011, "step": 2683 }, { "epoch": 0.5193498452012384, "grad_norm": 0.14048337936401367, "learning_rate": 9.946689318497582e-05, "loss": 0.0108, "step": 2684 }, { "epoch": 0.5195433436532507, "grad_norm": 0.08992376923561096, "learning_rate": 9.946647200876057e-05, "loss": 0.0129, "step": 2685 }, { "epoch": 0.5197368421052632, "grad_norm": 0.17951783537864685, "learning_rate": 9.946605066722995e-05, "loss": 0.0115, "step": 2686 }, { "epoch": 0.5199303405572755, "grad_norm": 0.11861943453550339, "learning_rate": 9.94656291603855e-05, "loss": 0.009, "step": 2687 }, { "epoch": 0.5201238390092879, "grad_norm": 0.14676351845264435, "learning_rate": 9.94652074882288e-05, "loss": 0.0119, "step": 2688 }, { "epoch": 0.5203173374613003, "grad_norm": 0.08406328409910202, "learning_rate": 9.94647856507614e-05, "loss": 0.0108, "step": 2689 }, { "epoch": 0.5205108359133127, "grad_norm": 0.13159991800785065, "learning_rate": 9.946436364798492e-05, "loss": 0.0122, "step": 2690 }, { "epoch": 0.520704334365325, "grad_norm": 0.0675654411315918, "learning_rate": 9.946394147990087e-05, "loss": 0.0112, "step": 2691 }, { "epoch": 0.5208978328173375, "grad_norm": 0.08382290601730347, "learning_rate": 9.946351914651082e-05, "loss": 0.0094, "step": 2692 }, { "epoch": 0.5210913312693498, "grad_norm": 0.07804922759532928, "learning_rate": 9.946309664781638e-05, "loss": 0.0114, "step": 2693 }, { "epoch": 0.5212848297213623, "grad_norm": 0.13243365287780762, "learning_rate": 9.94626739838191e-05, "loss": 0.0128, "step": 2694 }, { "epoch": 0.5214783281733746, "grad_norm": 0.1350255161523819, "learning_rate": 9.946225115452058e-05, "loss": 0.0109, "step": 2695 }, { "epoch": 0.521671826625387, "grad_norm": 0.15112844109535217, "learning_rate": 9.946182815992236e-05, "loss": 0.0142, "step": 2696 }, { "epoch": 0.5218653250773994, "grad_norm": 0.15787416696548462, "learning_rate": 9.946140500002599e-05, "loss": 0.0108, "step": 2697 }, { "epoch": 0.5220588235294118, "grad_norm": 0.11448804289102554, "learning_rate": 9.946098167483308e-05, "loss": 0.0118, "step": 2698 }, { "epoch": 0.5222523219814241, "grad_norm": 0.17771513760089874, "learning_rate": 9.946055818434521e-05, "loss": 0.0128, "step": 2699 }, { "epoch": 0.5224458204334366, "grad_norm": 0.04658306762576103, "learning_rate": 9.946013452856393e-05, "loss": 0.0117, "step": 2700 }, { "epoch": 0.5226393188854489, "grad_norm": 0.15983973443508148, "learning_rate": 9.945971070749082e-05, "loss": 0.01, "step": 2701 }, { "epoch": 0.5228328173374613, "grad_norm": 0.05096132680773735, "learning_rate": 9.945928672112747e-05, "loss": 0.0109, "step": 2702 }, { "epoch": 0.5230263157894737, "grad_norm": 0.08348508179187775, "learning_rate": 9.945886256947545e-05, "loss": 0.0102, "step": 2703 }, { "epoch": 0.5232198142414861, "grad_norm": 0.08574442565441132, "learning_rate": 9.945843825253633e-05, "loss": 0.011, "step": 2704 }, { "epoch": 0.5234133126934984, "grad_norm": 0.06919307261705399, "learning_rate": 9.945801377031168e-05, "loss": 0.0122, "step": 2705 }, { "epoch": 0.5236068111455109, "grad_norm": 0.100366972386837, "learning_rate": 9.94575891228031e-05, "loss": 0.0097, "step": 2706 }, { "epoch": 0.5238003095975232, "grad_norm": 0.0356748066842556, "learning_rate": 9.945716431001216e-05, "loss": 0.0098, "step": 2707 }, { "epoch": 0.5239938080495357, "grad_norm": 0.08862139284610748, "learning_rate": 9.945673933194045e-05, "loss": 0.0137, "step": 2708 }, { "epoch": 0.524187306501548, "grad_norm": 0.09276451170444489, "learning_rate": 9.945631418858952e-05, "loss": 0.011, "step": 2709 }, { "epoch": 0.5243808049535603, "grad_norm": 0.11193224042654037, "learning_rate": 9.945588887996097e-05, "loss": 0.0109, "step": 2710 }, { "epoch": 0.5245743034055728, "grad_norm": 0.0862981528043747, "learning_rate": 9.945546340605638e-05, "loss": 0.0097, "step": 2711 }, { "epoch": 0.5247678018575851, "grad_norm": 0.11327943950891495, "learning_rate": 9.945503776687733e-05, "loss": 0.0126, "step": 2712 }, { "epoch": 0.5249613003095975, "grad_norm": 0.08714739978313446, "learning_rate": 9.945461196242541e-05, "loss": 0.0108, "step": 2713 }, { "epoch": 0.5251547987616099, "grad_norm": 0.0838174894452095, "learning_rate": 9.945418599270218e-05, "loss": 0.0121, "step": 2714 }, { "epoch": 0.5253482972136223, "grad_norm": 0.05196498706936836, "learning_rate": 9.945375985770926e-05, "loss": 0.0095, "step": 2715 }, { "epoch": 0.5255417956656346, "grad_norm": 0.11911104619503021, "learning_rate": 9.945333355744821e-05, "loss": 0.0113, "step": 2716 }, { "epoch": 0.5257352941176471, "grad_norm": 0.06876484304666519, "learning_rate": 9.945290709192062e-05, "loss": 0.0106, "step": 2717 }, { "epoch": 0.5259287925696594, "grad_norm": 0.15961769223213196, "learning_rate": 9.945248046112806e-05, "loss": 0.0121, "step": 2718 }, { "epoch": 0.5261222910216719, "grad_norm": 0.08363381028175354, "learning_rate": 9.945205366507214e-05, "loss": 0.0084, "step": 2719 }, { "epoch": 0.5263157894736842, "grad_norm": 0.11799963563680649, "learning_rate": 9.945162670375444e-05, "loss": 0.0116, "step": 2720 }, { "epoch": 0.5265092879256966, "grad_norm": 0.05322336032986641, "learning_rate": 9.945119957717654e-05, "loss": 0.0115, "step": 2721 }, { "epoch": 0.526702786377709, "grad_norm": 0.07776937633752823, "learning_rate": 9.945077228534004e-05, "loss": 0.0111, "step": 2722 }, { "epoch": 0.5268962848297214, "grad_norm": 0.04547747224569321, "learning_rate": 9.945034482824653e-05, "loss": 0.0107, "step": 2723 }, { "epoch": 0.5270897832817337, "grad_norm": 0.0474831759929657, "learning_rate": 9.944991720589758e-05, "loss": 0.0115, "step": 2724 }, { "epoch": 0.5272832817337462, "grad_norm": 0.03560931980609894, "learning_rate": 9.944948941829478e-05, "loss": 0.0119, "step": 2725 }, { "epoch": 0.5274767801857585, "grad_norm": 0.07679738849401474, "learning_rate": 9.944906146543976e-05, "loss": 0.0109, "step": 2726 }, { "epoch": 0.5276702786377709, "grad_norm": 0.060860525816679, "learning_rate": 9.944863334733405e-05, "loss": 0.0134, "step": 2727 }, { "epoch": 0.5278637770897833, "grad_norm": 0.13701263070106506, "learning_rate": 9.94482050639793e-05, "loss": 0.012, "step": 2728 }, { "epoch": 0.5280572755417957, "grad_norm": 0.14612102508544922, "learning_rate": 9.944777661537705e-05, "loss": 0.0106, "step": 2729 }, { "epoch": 0.528250773993808, "grad_norm": 0.10303723812103271, "learning_rate": 9.944734800152893e-05, "loss": 0.012, "step": 2730 }, { "epoch": 0.5284442724458205, "grad_norm": 0.1781120002269745, "learning_rate": 9.944691922243653e-05, "loss": 0.0111, "step": 2731 }, { "epoch": 0.5286377708978328, "grad_norm": 0.07324860244989395, "learning_rate": 9.944649027810144e-05, "loss": 0.0111, "step": 2732 }, { "epoch": 0.5288312693498453, "grad_norm": 0.1731906682252884, "learning_rate": 9.944606116852524e-05, "loss": 0.0117, "step": 2733 }, { "epoch": 0.5290247678018576, "grad_norm": 0.0962115228176117, "learning_rate": 9.944563189370954e-05, "loss": 0.0109, "step": 2734 }, { "epoch": 0.5292182662538699, "grad_norm": 0.11757197976112366, "learning_rate": 9.944520245365593e-05, "loss": 0.0106, "step": 2735 }, { "epoch": 0.5294117647058824, "grad_norm": 0.09750621765851974, "learning_rate": 9.944477284836601e-05, "loss": 0.0114, "step": 2736 }, { "epoch": 0.5296052631578947, "grad_norm": 0.1624867171049118, "learning_rate": 9.944434307784136e-05, "loss": 0.009, "step": 2737 }, { "epoch": 0.5297987616099071, "grad_norm": 0.09157654643058777, "learning_rate": 9.944391314208362e-05, "loss": 0.0126, "step": 2738 }, { "epoch": 0.5299922600619195, "grad_norm": 0.09968820959329605, "learning_rate": 9.944348304109434e-05, "loss": 0.0111, "step": 2739 }, { "epoch": 0.5301857585139319, "grad_norm": 0.08760730177164078, "learning_rate": 9.944305277487517e-05, "loss": 0.0134, "step": 2740 }, { "epoch": 0.5303792569659442, "grad_norm": 0.08164189755916595, "learning_rate": 9.944262234342765e-05, "loss": 0.0093, "step": 2741 }, { "epoch": 0.5305727554179567, "grad_norm": 0.09387711435556412, "learning_rate": 9.944219174675342e-05, "loss": 0.0103, "step": 2742 }, { "epoch": 0.530766253869969, "grad_norm": 0.0734357088804245, "learning_rate": 9.944176098485408e-05, "loss": 0.0103, "step": 2743 }, { "epoch": 0.5309597523219814, "grad_norm": 0.12028828263282776, "learning_rate": 9.944133005773123e-05, "loss": 0.0137, "step": 2744 }, { "epoch": 0.5311532507739938, "grad_norm": 0.08316602557897568, "learning_rate": 9.944089896538645e-05, "loss": 0.0109, "step": 2745 }, { "epoch": 0.5313467492260062, "grad_norm": 0.13951487839221954, "learning_rate": 9.944046770782135e-05, "loss": 0.0121, "step": 2746 }, { "epoch": 0.5315402476780186, "grad_norm": 0.06876742839813232, "learning_rate": 9.944003628503757e-05, "loss": 0.0112, "step": 2747 }, { "epoch": 0.531733746130031, "grad_norm": 0.14986994862556458, "learning_rate": 9.943960469703666e-05, "loss": 0.0126, "step": 2748 }, { "epoch": 0.5319272445820433, "grad_norm": 0.042280349880456924, "learning_rate": 9.943917294382026e-05, "loss": 0.0106, "step": 2749 }, { "epoch": 0.5321207430340558, "grad_norm": 0.19285176694393158, "learning_rate": 9.943874102538997e-05, "loss": 0.0132, "step": 2750 }, { "epoch": 0.5323142414860681, "grad_norm": 0.05385233461856842, "learning_rate": 9.943830894174738e-05, "loss": 0.0125, "step": 2751 }, { "epoch": 0.5325077399380805, "grad_norm": 0.27031031250953674, "learning_rate": 9.943787669289412e-05, "loss": 0.0127, "step": 2752 }, { "epoch": 0.5327012383900929, "grad_norm": 0.139634907245636, "learning_rate": 9.943744427883179e-05, "loss": 0.0125, "step": 2753 }, { "epoch": 0.5328947368421053, "grad_norm": 0.2343328893184662, "learning_rate": 9.943701169956196e-05, "loss": 0.0132, "step": 2754 }, { "epoch": 0.5330882352941176, "grad_norm": 0.13620135188102722, "learning_rate": 9.94365789550863e-05, "loss": 0.0102, "step": 2755 }, { "epoch": 0.5332817337461301, "grad_norm": 0.2366054207086563, "learning_rate": 9.943614604540636e-05, "loss": 0.0119, "step": 2756 }, { "epoch": 0.5334752321981424, "grad_norm": 0.11999662220478058, "learning_rate": 9.943571297052379e-05, "loss": 0.0114, "step": 2757 }, { "epoch": 0.5336687306501547, "grad_norm": 0.2669781446456909, "learning_rate": 9.943527973044021e-05, "loss": 0.0117, "step": 2758 }, { "epoch": 0.5338622291021672, "grad_norm": 0.09244798868894577, "learning_rate": 9.94348463251572e-05, "loss": 0.0105, "step": 2759 }, { "epoch": 0.5340557275541795, "grad_norm": 0.17185157537460327, "learning_rate": 9.943441275467638e-05, "loss": 0.0126, "step": 2760 }, { "epoch": 0.534249226006192, "grad_norm": 0.25297272205352783, "learning_rate": 9.943397901899933e-05, "loss": 0.0092, "step": 2761 }, { "epoch": 0.5344427244582043, "grad_norm": 0.23845504224300385, "learning_rate": 9.943354511812773e-05, "loss": 0.0119, "step": 2762 }, { "epoch": 0.5346362229102167, "grad_norm": 0.23322583734989166, "learning_rate": 9.943311105206316e-05, "loss": 0.0123, "step": 2763 }, { "epoch": 0.5348297213622291, "grad_norm": 0.17646582424640656, "learning_rate": 9.943267682080722e-05, "loss": 0.008, "step": 2764 }, { "epoch": 0.5350232198142415, "grad_norm": 0.3077235817909241, "learning_rate": 9.943224242436152e-05, "loss": 0.0136, "step": 2765 }, { "epoch": 0.5352167182662538, "grad_norm": 0.13422684371471405, "learning_rate": 9.943180786272773e-05, "loss": 0.0106, "step": 2766 }, { "epoch": 0.5354102167182663, "grad_norm": 0.308946818113327, "learning_rate": 9.943137313590739e-05, "loss": 0.0138, "step": 2767 }, { "epoch": 0.5356037151702786, "grad_norm": 0.1288173943758011, "learning_rate": 9.943093824390217e-05, "loss": 0.0104, "step": 2768 }, { "epoch": 0.535797213622291, "grad_norm": 0.33785176277160645, "learning_rate": 9.943050318671367e-05, "loss": 0.0117, "step": 2769 }, { "epoch": 0.5359907120743034, "grad_norm": 0.18707291781902313, "learning_rate": 9.943006796434349e-05, "loss": 0.0122, "step": 2770 }, { "epoch": 0.5361842105263158, "grad_norm": 0.2364872395992279, "learning_rate": 9.942963257679329e-05, "loss": 0.0109, "step": 2771 }, { "epoch": 0.5363777089783281, "grad_norm": 0.26361510157585144, "learning_rate": 9.942919702406465e-05, "loss": 0.0121, "step": 2772 }, { "epoch": 0.5365712074303406, "grad_norm": 0.05550049617886543, "learning_rate": 9.942876130615921e-05, "loss": 0.0097, "step": 2773 }, { "epoch": 0.5367647058823529, "grad_norm": 0.30196046829223633, "learning_rate": 9.942832542307858e-05, "loss": 0.0115, "step": 2774 }, { "epoch": 0.5369582043343654, "grad_norm": 0.0788344293832779, "learning_rate": 9.942788937482438e-05, "loss": 0.0102, "step": 2775 }, { "epoch": 0.5371517027863777, "grad_norm": 0.20058248937129974, "learning_rate": 9.942745316139823e-05, "loss": 0.013, "step": 2776 }, { "epoch": 0.5373452012383901, "grad_norm": 0.08538704365491867, "learning_rate": 9.942701678280177e-05, "loss": 0.0104, "step": 2777 }, { "epoch": 0.5375386996904025, "grad_norm": 0.08425185084342957, "learning_rate": 9.942658023903661e-05, "loss": 0.0118, "step": 2778 }, { "epoch": 0.5377321981424149, "grad_norm": 0.08130714297294617, "learning_rate": 9.942614353010437e-05, "loss": 0.01, "step": 2779 }, { "epoch": 0.5379256965944272, "grad_norm": 0.0778437927365303, "learning_rate": 9.942570665600665e-05, "loss": 0.0125, "step": 2780 }, { "epoch": 0.5381191950464397, "grad_norm": 0.07186570018529892, "learning_rate": 9.942526961674513e-05, "loss": 0.0102, "step": 2781 }, { "epoch": 0.538312693498452, "grad_norm": 0.15551571547985077, "learning_rate": 9.942483241232138e-05, "loss": 0.0111, "step": 2782 }, { "epoch": 0.5385061919504643, "grad_norm": 0.06538745015859604, "learning_rate": 9.942439504273708e-05, "loss": 0.0121, "step": 2783 }, { "epoch": 0.5386996904024768, "grad_norm": 0.13631364703178406, "learning_rate": 9.942395750799379e-05, "loss": 0.0103, "step": 2784 }, { "epoch": 0.5388931888544891, "grad_norm": 0.07704600691795349, "learning_rate": 9.942351980809319e-05, "loss": 0.0116, "step": 2785 }, { "epoch": 0.5390866873065016, "grad_norm": 0.09448518604040146, "learning_rate": 9.94230819430369e-05, "loss": 0.0118, "step": 2786 }, { "epoch": 0.5392801857585139, "grad_norm": 0.042906664311885834, "learning_rate": 9.942264391282651e-05, "loss": 0.0123, "step": 2787 }, { "epoch": 0.5394736842105263, "grad_norm": 0.07825732231140137, "learning_rate": 9.942220571746372e-05, "loss": 0.0105, "step": 2788 }, { "epoch": 0.5396671826625387, "grad_norm": 0.05740940198302269, "learning_rate": 9.942176735695007e-05, "loss": 0.0115, "step": 2789 }, { "epoch": 0.5398606811145511, "grad_norm": 0.12322298437356949, "learning_rate": 9.942132883128725e-05, "loss": 0.0115, "step": 2790 }, { "epoch": 0.5400541795665634, "grad_norm": 0.09897135943174362, "learning_rate": 9.942089014047689e-05, "loss": 0.0109, "step": 2791 }, { "epoch": 0.5402476780185759, "grad_norm": 0.12353354692459106, "learning_rate": 9.94204512845206e-05, "loss": 0.0113, "step": 2792 }, { "epoch": 0.5404411764705882, "grad_norm": 0.09827802330255508, "learning_rate": 9.942001226342001e-05, "loss": 0.0111, "step": 2793 }, { "epoch": 0.5406346749226006, "grad_norm": 0.08546458184719086, "learning_rate": 9.941957307717676e-05, "loss": 0.0107, "step": 2794 }, { "epoch": 0.540828173374613, "grad_norm": 0.0911555364727974, "learning_rate": 9.941913372579247e-05, "loss": 0.009, "step": 2795 }, { "epoch": 0.5410216718266254, "grad_norm": 0.09166117757558823, "learning_rate": 9.941869420926881e-05, "loss": 0.0116, "step": 2796 }, { "epoch": 0.5412151702786377, "grad_norm": 0.09817846119403839, "learning_rate": 9.941825452760738e-05, "loss": 0.0116, "step": 2797 }, { "epoch": 0.5414086687306502, "grad_norm": 0.05908725783228874, "learning_rate": 9.941781468080983e-05, "loss": 0.0096, "step": 2798 }, { "epoch": 0.5416021671826625, "grad_norm": 0.14801377058029175, "learning_rate": 9.941737466887778e-05, "loss": 0.0087, "step": 2799 }, { "epoch": 0.541795665634675, "grad_norm": 0.07745197415351868, "learning_rate": 9.941693449181287e-05, "loss": 0.0107, "step": 2800 }, { "epoch": 0.5419891640866873, "grad_norm": 0.13427181541919708, "learning_rate": 9.941649414961676e-05, "loss": 0.0123, "step": 2801 }, { "epoch": 0.5421826625386997, "grad_norm": 0.06523841619491577, "learning_rate": 9.941605364229106e-05, "loss": 0.0117, "step": 2802 }, { "epoch": 0.5423761609907121, "grad_norm": 0.08780404180288315, "learning_rate": 9.941561296983741e-05, "loss": 0.0107, "step": 2803 }, { "epoch": 0.5425696594427245, "grad_norm": 0.0657038539648056, "learning_rate": 9.941517213225747e-05, "loss": 0.0106, "step": 2804 }, { "epoch": 0.5427631578947368, "grad_norm": 0.15080195665359497, "learning_rate": 9.941473112955286e-05, "loss": 0.0106, "step": 2805 }, { "epoch": 0.5429566563467493, "grad_norm": 0.09212570637464523, "learning_rate": 9.941428996172522e-05, "loss": 0.0093, "step": 2806 }, { "epoch": 0.5431501547987616, "grad_norm": 0.18503506481647491, "learning_rate": 9.94138486287762e-05, "loss": 0.0125, "step": 2807 }, { "epoch": 0.5433436532507739, "grad_norm": 0.0816507637500763, "learning_rate": 9.941340713070742e-05, "loss": 0.0119, "step": 2808 }, { "epoch": 0.5435371517027864, "grad_norm": 0.1916298121213913, "learning_rate": 9.941296546752056e-05, "loss": 0.0123, "step": 2809 }, { "epoch": 0.5437306501547987, "grad_norm": 0.08711766451597214, "learning_rate": 9.941252363921721e-05, "loss": 0.0106, "step": 2810 }, { "epoch": 0.5439241486068112, "grad_norm": 0.08606486022472382, "learning_rate": 9.941208164579907e-05, "loss": 0.0118, "step": 2811 }, { "epoch": 0.5441176470588235, "grad_norm": 0.17798393964767456, "learning_rate": 9.941163948726773e-05, "loss": 0.0122, "step": 2812 }, { "epoch": 0.5443111455108359, "grad_norm": 0.08232831954956055, "learning_rate": 9.941119716362486e-05, "loss": 0.0139, "step": 2813 }, { "epoch": 0.5445046439628483, "grad_norm": 0.2229941338300705, "learning_rate": 9.941075467487211e-05, "loss": 0.0113, "step": 2814 }, { "epoch": 0.5446981424148607, "grad_norm": 0.11360511183738708, "learning_rate": 9.941031202101112e-05, "loss": 0.0111, "step": 2815 }, { "epoch": 0.544891640866873, "grad_norm": 0.242129847407341, "learning_rate": 9.940986920204352e-05, "loss": 0.0112, "step": 2816 }, { "epoch": 0.5450851393188855, "grad_norm": 0.15647612512111664, "learning_rate": 9.940942621797099e-05, "loss": 0.0117, "step": 2817 }, { "epoch": 0.5452786377708978, "grad_norm": 0.16951675713062286, "learning_rate": 9.940898306879513e-05, "loss": 0.011, "step": 2818 }, { "epoch": 0.5454721362229102, "grad_norm": 0.21849454939365387, "learning_rate": 9.940853975451765e-05, "loss": 0.0138, "step": 2819 }, { "epoch": 0.5456656346749226, "grad_norm": 0.12109905481338501, "learning_rate": 9.940809627514013e-05, "loss": 0.0113, "step": 2820 }, { "epoch": 0.545859133126935, "grad_norm": 0.2419879138469696, "learning_rate": 9.940765263066427e-05, "loss": 0.0133, "step": 2821 }, { "epoch": 0.5460526315789473, "grad_norm": 0.14995630085468292, "learning_rate": 9.940720882109168e-05, "loss": 0.0127, "step": 2822 }, { "epoch": 0.5462461300309598, "grad_norm": 0.20875735580921173, "learning_rate": 9.940676484642403e-05, "loss": 0.0129, "step": 2823 }, { "epoch": 0.5464396284829721, "grad_norm": 0.16983948647975922, "learning_rate": 9.940632070666298e-05, "loss": 0.0109, "step": 2824 }, { "epoch": 0.5466331269349846, "grad_norm": 0.19819682836532593, "learning_rate": 9.940587640181017e-05, "loss": 0.0108, "step": 2825 }, { "epoch": 0.5468266253869969, "grad_norm": 0.15038785338401794, "learning_rate": 9.940543193186725e-05, "loss": 0.0133, "step": 2826 }, { "epoch": 0.5470201238390093, "grad_norm": 0.15108506381511688, "learning_rate": 9.940498729683589e-05, "loss": 0.0127, "step": 2827 }, { "epoch": 0.5472136222910217, "grad_norm": 0.04245613142848015, "learning_rate": 9.940454249671773e-05, "loss": 0.0103, "step": 2828 }, { "epoch": 0.5474071207430341, "grad_norm": 0.14410069584846497, "learning_rate": 9.940409753151441e-05, "loss": 0.0129, "step": 2829 }, { "epoch": 0.5476006191950464, "grad_norm": 0.0391450971364975, "learning_rate": 9.940365240122759e-05, "loss": 0.0108, "step": 2830 }, { "epoch": 0.5477941176470589, "grad_norm": 0.12722952663898468, "learning_rate": 9.940320710585895e-05, "loss": 0.0138, "step": 2831 }, { "epoch": 0.5479876160990712, "grad_norm": 0.04146387800574303, "learning_rate": 9.94027616454101e-05, "loss": 0.0109, "step": 2832 }, { "epoch": 0.5481811145510835, "grad_norm": 0.11883807182312012, "learning_rate": 9.940231601988274e-05, "loss": 0.0111, "step": 2833 }, { "epoch": 0.548374613003096, "grad_norm": 0.1157921552658081, "learning_rate": 9.940187022927851e-05, "loss": 0.0124, "step": 2834 }, { "epoch": 0.5485681114551083, "grad_norm": 0.1386784464120865, "learning_rate": 9.940142427359907e-05, "loss": 0.0106, "step": 2835 }, { "epoch": 0.5487616099071208, "grad_norm": 0.1014813631772995, "learning_rate": 9.940097815284608e-05, "loss": 0.0092, "step": 2836 }, { "epoch": 0.5489551083591331, "grad_norm": 0.11433210223913193, "learning_rate": 9.94005318670212e-05, "loss": 0.0099, "step": 2837 }, { "epoch": 0.5491486068111455, "grad_norm": 0.21645118296146393, "learning_rate": 9.940008541612607e-05, "loss": 0.0103, "step": 2838 }, { "epoch": 0.5493421052631579, "grad_norm": 0.10309373587369919, "learning_rate": 9.939963880016236e-05, "loss": 0.0123, "step": 2839 }, { "epoch": 0.5495356037151703, "grad_norm": 0.28271573781967163, "learning_rate": 9.939919201913173e-05, "loss": 0.0116, "step": 2840 }, { "epoch": 0.5497291021671826, "grad_norm": 0.09765134006738663, "learning_rate": 9.939874507303585e-05, "loss": 0.0085, "step": 2841 }, { "epoch": 0.5499226006191951, "grad_norm": 0.25515642762184143, "learning_rate": 9.939829796187638e-05, "loss": 0.0113, "step": 2842 }, { "epoch": 0.5501160990712074, "grad_norm": 0.08417900651693344, "learning_rate": 9.939785068565497e-05, "loss": 0.0104, "step": 2843 }, { "epoch": 0.5503095975232198, "grad_norm": 0.21961157023906708, "learning_rate": 9.93974032443733e-05, "loss": 0.0123, "step": 2844 }, { "epoch": 0.5505030959752322, "grad_norm": 0.10964177548885345, "learning_rate": 9.939695563803302e-05, "loss": 0.014, "step": 2845 }, { "epoch": 0.5506965944272446, "grad_norm": 0.22890692949295044, "learning_rate": 9.939650786663579e-05, "loss": 0.0129, "step": 2846 }, { "epoch": 0.5508900928792569, "grad_norm": 0.10250905156135559, "learning_rate": 9.93960599301833e-05, "loss": 0.0133, "step": 2847 }, { "epoch": 0.5510835913312694, "grad_norm": 0.15871018171310425, "learning_rate": 9.939561182867719e-05, "loss": 0.012, "step": 2848 }, { "epoch": 0.5512770897832817, "grad_norm": 0.18482214212417603, "learning_rate": 9.939516356211914e-05, "loss": 0.011, "step": 2849 }, { "epoch": 0.5514705882352942, "grad_norm": 0.10811100900173187, "learning_rate": 9.93947151305108e-05, "loss": 0.011, "step": 2850 }, { "epoch": 0.5516640866873065, "grad_norm": 0.20943327248096466, "learning_rate": 9.939426653385387e-05, "loss": 0.0115, "step": 2851 }, { "epoch": 0.5518575851393189, "grad_norm": 0.10384723544120789, "learning_rate": 9.939381777214997e-05, "loss": 0.0134, "step": 2852 }, { "epoch": 0.5520510835913313, "grad_norm": 0.1618751585483551, "learning_rate": 9.93933688454008e-05, "loss": 0.0123, "step": 2853 }, { "epoch": 0.5522445820433437, "grad_norm": 0.09061173349618912, "learning_rate": 9.939291975360803e-05, "loss": 0.0109, "step": 2854 }, { "epoch": 0.552438080495356, "grad_norm": 0.09876441210508347, "learning_rate": 9.939247049677331e-05, "loss": 0.012, "step": 2855 }, { "epoch": 0.5526315789473685, "grad_norm": 0.10194741189479828, "learning_rate": 9.939202107489835e-05, "loss": 0.0109, "step": 2856 }, { "epoch": 0.5528250773993808, "grad_norm": 0.0797366350889206, "learning_rate": 9.93915714879848e-05, "loss": 0.0108, "step": 2857 }, { "epoch": 0.5530185758513931, "grad_norm": 0.08529163151979446, "learning_rate": 9.939112173603428e-05, "loss": 0.0106, "step": 2858 }, { "epoch": 0.5532120743034056, "grad_norm": 0.09087398648262024, "learning_rate": 9.939067181904855e-05, "loss": 0.011, "step": 2859 }, { "epoch": 0.5534055727554179, "grad_norm": 0.09367895126342773, "learning_rate": 9.93902217370292e-05, "loss": 0.013, "step": 2860 }, { "epoch": 0.5535990712074303, "grad_norm": 0.03572938218712807, "learning_rate": 9.938977148997797e-05, "loss": 0.0109, "step": 2861 }, { "epoch": 0.5537925696594427, "grad_norm": 0.0662446990609169, "learning_rate": 9.938932107789651e-05, "loss": 0.0092, "step": 2862 }, { "epoch": 0.5539860681114551, "grad_norm": 0.04846194386482239, "learning_rate": 9.938887050078649e-05, "loss": 0.0104, "step": 2863 }, { "epoch": 0.5541795665634675, "grad_norm": 0.11181198805570602, "learning_rate": 9.938841975864957e-05, "loss": 0.0125, "step": 2864 }, { "epoch": 0.5543730650154799, "grad_norm": 0.06306562572717667, "learning_rate": 9.938796885148747e-05, "loss": 0.0118, "step": 2865 }, { "epoch": 0.5545665634674922, "grad_norm": 0.12704122066497803, "learning_rate": 9.938751777930182e-05, "loss": 0.011, "step": 2866 }, { "epoch": 0.5547600619195047, "grad_norm": 0.06896433979272842, "learning_rate": 9.938706654209432e-05, "loss": 0.0117, "step": 2867 }, { "epoch": 0.554953560371517, "grad_norm": 0.08587181568145752, "learning_rate": 9.938661513986664e-05, "loss": 0.0124, "step": 2868 }, { "epoch": 0.5551470588235294, "grad_norm": 0.06503856182098389, "learning_rate": 9.938616357262047e-05, "loss": 0.0121, "step": 2869 }, { "epoch": 0.5553405572755418, "grad_norm": 0.0826992467045784, "learning_rate": 9.938571184035749e-05, "loss": 0.0118, "step": 2870 }, { "epoch": 0.5555340557275542, "grad_norm": 0.046906549483537674, "learning_rate": 9.938525994307936e-05, "loss": 0.0126, "step": 2871 }, { "epoch": 0.5557275541795665, "grad_norm": 0.09122532606124878, "learning_rate": 9.938480788078776e-05, "loss": 0.0119, "step": 2872 }, { "epoch": 0.555921052631579, "grad_norm": 0.10592189431190491, "learning_rate": 9.938435565348439e-05, "loss": 0.012, "step": 2873 }, { "epoch": 0.5561145510835913, "grad_norm": 0.10825178772211075, "learning_rate": 9.938390326117093e-05, "loss": 0.0106, "step": 2874 }, { "epoch": 0.5563080495356038, "grad_norm": 0.10115806758403778, "learning_rate": 9.938345070384905e-05, "loss": 0.011, "step": 2875 }, { "epoch": 0.5565015479876161, "grad_norm": 0.11345375329256058, "learning_rate": 9.938299798152043e-05, "loss": 0.0123, "step": 2876 }, { "epoch": 0.5566950464396285, "grad_norm": 0.05598263442516327, "learning_rate": 9.938254509418677e-05, "loss": 0.0084, "step": 2877 }, { "epoch": 0.5568885448916409, "grad_norm": 0.11375392973423004, "learning_rate": 9.938209204184973e-05, "loss": 0.0113, "step": 2878 }, { "epoch": 0.5570820433436533, "grad_norm": 0.02973804995417595, "learning_rate": 9.9381638824511e-05, "loss": 0.0104, "step": 2879 }, { "epoch": 0.5572755417956656, "grad_norm": 0.11801068484783173, "learning_rate": 9.93811854421723e-05, "loss": 0.0093, "step": 2880 }, { "epoch": 0.5574690402476781, "grad_norm": 0.09594862908124924, "learning_rate": 9.938073189483528e-05, "loss": 0.0136, "step": 2881 }, { "epoch": 0.5576625386996904, "grad_norm": 0.1538793444633484, "learning_rate": 9.938027818250162e-05, "loss": 0.0132, "step": 2882 }, { "epoch": 0.5578560371517027, "grad_norm": 0.06868337839841843, "learning_rate": 9.937982430517303e-05, "loss": 0.0105, "step": 2883 }, { "epoch": 0.5580495356037152, "grad_norm": 0.08494894951581955, "learning_rate": 9.93793702628512e-05, "loss": 0.0104, "step": 2884 }, { "epoch": 0.5582430340557275, "grad_norm": 0.0767500251531601, "learning_rate": 9.937891605553779e-05, "loss": 0.0106, "step": 2885 }, { "epoch": 0.55843653250774, "grad_norm": 0.050230108201503754, "learning_rate": 9.937846168323451e-05, "loss": 0.0077, "step": 2886 }, { "epoch": 0.5586300309597523, "grad_norm": 0.07781434804201126, "learning_rate": 9.937800714594304e-05, "loss": 0.0108, "step": 2887 }, { "epoch": 0.5588235294117647, "grad_norm": 0.04907870292663574, "learning_rate": 9.937755244366508e-05, "loss": 0.0094, "step": 2888 }, { "epoch": 0.559017027863777, "grad_norm": 0.07086431235074997, "learning_rate": 9.937709757640233e-05, "loss": 0.0107, "step": 2889 }, { "epoch": 0.5592105263157895, "grad_norm": 0.0694570317864418, "learning_rate": 9.937664254415644e-05, "loss": 0.0091, "step": 2890 }, { "epoch": 0.5594040247678018, "grad_norm": 0.055305417627096176, "learning_rate": 9.937618734692914e-05, "loss": 0.0126, "step": 2891 }, { "epoch": 0.5595975232198143, "grad_norm": 0.07806997746229172, "learning_rate": 9.937573198472209e-05, "loss": 0.0121, "step": 2892 }, { "epoch": 0.5597910216718266, "grad_norm": 0.06707112491130829, "learning_rate": 9.937527645753704e-05, "loss": 0.013, "step": 2893 }, { "epoch": 0.559984520123839, "grad_norm": 0.06738582253456116, "learning_rate": 9.937482076537561e-05, "loss": 0.0134, "step": 2894 }, { "epoch": 0.5601780185758514, "grad_norm": 0.08751136809587479, "learning_rate": 9.937436490823957e-05, "loss": 0.0123, "step": 2895 }, { "epoch": 0.5603715170278638, "grad_norm": 0.06400267034769058, "learning_rate": 9.937390888613055e-05, "loss": 0.0105, "step": 2896 }, { "epoch": 0.5605650154798761, "grad_norm": 0.11211452633142471, "learning_rate": 9.937345269905028e-05, "loss": 0.0116, "step": 2897 }, { "epoch": 0.5607585139318886, "grad_norm": 0.10167549550533295, "learning_rate": 9.937299634700044e-05, "loss": 0.0129, "step": 2898 }, { "epoch": 0.5609520123839009, "grad_norm": 0.08421434462070465, "learning_rate": 9.937253982998274e-05, "loss": 0.0092, "step": 2899 }, { "epoch": 0.5611455108359134, "grad_norm": 0.10256408154964447, "learning_rate": 9.937208314799886e-05, "loss": 0.0109, "step": 2900 }, { "epoch": 0.5613390092879257, "grad_norm": 0.09422110766172409, "learning_rate": 9.937162630105053e-05, "loss": 0.0123, "step": 2901 }, { "epoch": 0.5615325077399381, "grad_norm": 0.07586097717285156, "learning_rate": 9.93711692891394e-05, "loss": 0.0105, "step": 2902 }, { "epoch": 0.5617260061919505, "grad_norm": 0.09087392687797546, "learning_rate": 9.93707121122672e-05, "loss": 0.0107, "step": 2903 }, { "epoch": 0.5619195046439629, "grad_norm": 0.06198470667004585, "learning_rate": 9.937025477043564e-05, "loss": 0.0122, "step": 2904 }, { "epoch": 0.5621130030959752, "grad_norm": 0.08604104071855545, "learning_rate": 9.93697972636464e-05, "loss": 0.0137, "step": 2905 }, { "epoch": 0.5623065015479877, "grad_norm": 0.10496553033590317, "learning_rate": 9.936933959190119e-05, "loss": 0.0109, "step": 2906 }, { "epoch": 0.5625, "grad_norm": 0.0818762257695198, "learning_rate": 9.936888175520169e-05, "loss": 0.0118, "step": 2907 }, { "epoch": 0.5626934984520123, "grad_norm": 0.05773458257317543, "learning_rate": 9.936842375354964e-05, "loss": 0.0124, "step": 2908 }, { "epoch": 0.5628869969040248, "grad_norm": 0.07587836682796478, "learning_rate": 9.936796558694672e-05, "loss": 0.0125, "step": 2909 }, { "epoch": 0.5630804953560371, "grad_norm": 0.05102359876036644, "learning_rate": 9.936750725539462e-05, "loss": 0.0126, "step": 2910 }, { "epoch": 0.5632739938080495, "grad_norm": 0.06320659071207047, "learning_rate": 9.936704875889509e-05, "loss": 0.013, "step": 2911 }, { "epoch": 0.5634674922600619, "grad_norm": 0.07270125299692154, "learning_rate": 9.936659009744978e-05, "loss": 0.0108, "step": 2912 }, { "epoch": 0.5636609907120743, "grad_norm": 0.06460301578044891, "learning_rate": 9.936613127106042e-05, "loss": 0.012, "step": 2913 }, { "epoch": 0.5638544891640866, "grad_norm": 0.07120968401432037, "learning_rate": 9.936567227972873e-05, "loss": 0.0121, "step": 2914 }, { "epoch": 0.5640479876160991, "grad_norm": 0.08527430891990662, "learning_rate": 9.93652131234564e-05, "loss": 0.0103, "step": 2915 }, { "epoch": 0.5642414860681114, "grad_norm": 0.051692478358745575, "learning_rate": 9.936475380224513e-05, "loss": 0.0102, "step": 2916 }, { "epoch": 0.5644349845201239, "grad_norm": 0.06101006641983986, "learning_rate": 9.936429431609666e-05, "loss": 0.0112, "step": 2917 }, { "epoch": 0.5646284829721362, "grad_norm": 0.060610082000494, "learning_rate": 9.936383466501266e-05, "loss": 0.0109, "step": 2918 }, { "epoch": 0.5648219814241486, "grad_norm": 0.0975998118519783, "learning_rate": 9.936337484899485e-05, "loss": 0.01, "step": 2919 }, { "epoch": 0.565015479876161, "grad_norm": 0.052595749497413635, "learning_rate": 9.936291486804494e-05, "loss": 0.0106, "step": 2920 }, { "epoch": 0.5652089783281734, "grad_norm": 0.05309073626995087, "learning_rate": 9.936245472216467e-05, "loss": 0.0117, "step": 2921 }, { "epoch": 0.5654024767801857, "grad_norm": 0.048073843121528625, "learning_rate": 9.93619944113557e-05, "loss": 0.0122, "step": 2922 }, { "epoch": 0.5655959752321982, "grad_norm": 0.04878853261470795, "learning_rate": 9.936153393561978e-05, "loss": 0.0111, "step": 2923 }, { "epoch": 0.5657894736842105, "grad_norm": 0.05122150108218193, "learning_rate": 9.936107329495859e-05, "loss": 0.0121, "step": 2924 }, { "epoch": 0.565982972136223, "grad_norm": 0.0421641580760479, "learning_rate": 9.936061248937387e-05, "loss": 0.0112, "step": 2925 }, { "epoch": 0.5661764705882353, "grad_norm": 0.06061679869890213, "learning_rate": 9.936015151886732e-05, "loss": 0.012, "step": 2926 }, { "epoch": 0.5663699690402477, "grad_norm": 0.047081030905246735, "learning_rate": 9.935969038344066e-05, "loss": 0.0098, "step": 2927 }, { "epoch": 0.56656346749226, "grad_norm": 0.05119946226477623, "learning_rate": 9.93592290830956e-05, "loss": 0.0104, "step": 2928 }, { "epoch": 0.5667569659442725, "grad_norm": 0.05379585176706314, "learning_rate": 9.935876761783386e-05, "loss": 0.0119, "step": 2929 }, { "epoch": 0.5669504643962848, "grad_norm": 0.08257249742746353, "learning_rate": 9.935830598765715e-05, "loss": 0.0115, "step": 2930 }, { "epoch": 0.5671439628482973, "grad_norm": 0.10831506550312042, "learning_rate": 9.935784419256719e-05, "loss": 0.012, "step": 2931 }, { "epoch": 0.5673374613003096, "grad_norm": 0.10391847789287567, "learning_rate": 9.93573822325657e-05, "loss": 0.0105, "step": 2932 }, { "epoch": 0.5675309597523219, "grad_norm": 0.0824783518910408, "learning_rate": 9.935692010765439e-05, "loss": 0.0111, "step": 2933 }, { "epoch": 0.5677244582043344, "grad_norm": 0.12137655168771744, "learning_rate": 9.935645781783497e-05, "loss": 0.013, "step": 2934 }, { "epoch": 0.5679179566563467, "grad_norm": 0.09672579169273376, "learning_rate": 9.935599536310918e-05, "loss": 0.0124, "step": 2935 }, { "epoch": 0.5681114551083591, "grad_norm": 0.1005466878414154, "learning_rate": 9.935553274347873e-05, "loss": 0.009, "step": 2936 }, { "epoch": 0.5683049535603715, "grad_norm": 0.087366983294487, "learning_rate": 9.935506995894533e-05, "loss": 0.0112, "step": 2937 }, { "epoch": 0.5684984520123839, "grad_norm": 0.09713785350322723, "learning_rate": 9.935460700951073e-05, "loss": 0.0121, "step": 2938 }, { "epoch": 0.5686919504643962, "grad_norm": 0.1528112143278122, "learning_rate": 9.93541438951766e-05, "loss": 0.0127, "step": 2939 }, { "epoch": 0.5688854489164087, "grad_norm": 0.057473402470350266, "learning_rate": 9.935368061594472e-05, "loss": 0.0118, "step": 2940 }, { "epoch": 0.569078947368421, "grad_norm": 0.10343116521835327, "learning_rate": 9.935321717181675e-05, "loss": 0.0091, "step": 2941 }, { "epoch": 0.5692724458204335, "grad_norm": 0.0768013596534729, "learning_rate": 9.935275356279447e-05, "loss": 0.0096, "step": 2942 }, { "epoch": 0.5694659442724458, "grad_norm": 0.09266365319490433, "learning_rate": 9.93522897888796e-05, "loss": 0.0109, "step": 2943 }, { "epoch": 0.5696594427244582, "grad_norm": 0.04748046025633812, "learning_rate": 9.935182585007382e-05, "loss": 0.0137, "step": 2944 }, { "epoch": 0.5698529411764706, "grad_norm": 0.08828619867563248, "learning_rate": 9.935136174637888e-05, "loss": 0.0121, "step": 2945 }, { "epoch": 0.570046439628483, "grad_norm": 0.05036690831184387, "learning_rate": 9.935089747779652e-05, "loss": 0.0114, "step": 2946 }, { "epoch": 0.5702399380804953, "grad_norm": 0.06942401081323624, "learning_rate": 9.935043304432844e-05, "loss": 0.0121, "step": 2947 }, { "epoch": 0.5704334365325078, "grad_norm": 0.12415352463722229, "learning_rate": 9.934996844597638e-05, "loss": 0.0107, "step": 2948 }, { "epoch": 0.5706269349845201, "grad_norm": 0.06086255609989166, "learning_rate": 9.934950368274207e-05, "loss": 0.0111, "step": 2949 }, { "epoch": 0.5708204334365325, "grad_norm": 0.12017447501420975, "learning_rate": 9.934903875462722e-05, "loss": 0.0106, "step": 2950 }, { "epoch": 0.5710139318885449, "grad_norm": 0.05585174262523651, "learning_rate": 9.934857366163358e-05, "loss": 0.0122, "step": 2951 }, { "epoch": 0.5712074303405573, "grad_norm": 0.28003525733947754, "learning_rate": 9.934810840376288e-05, "loss": 0.013, "step": 2952 }, { "epoch": 0.5714009287925697, "grad_norm": 0.14221476018428802, "learning_rate": 9.934764298101683e-05, "loss": 0.0133, "step": 2953 }, { "epoch": 0.5715944272445821, "grad_norm": 0.22076088190078735, "learning_rate": 9.934717739339717e-05, "loss": 0.0129, "step": 2954 }, { "epoch": 0.5717879256965944, "grad_norm": 0.16497042775154114, "learning_rate": 9.934671164090565e-05, "loss": 0.0113, "step": 2955 }, { "epoch": 0.5719814241486069, "grad_norm": 0.20541226863861084, "learning_rate": 9.934624572354396e-05, "loss": 0.0096, "step": 2956 }, { "epoch": 0.5721749226006192, "grad_norm": 0.1541370004415512, "learning_rate": 9.934577964131386e-05, "loss": 0.0136, "step": 2957 }, { "epoch": 0.5723684210526315, "grad_norm": 0.15127819776535034, "learning_rate": 9.934531339421709e-05, "loss": 0.0102, "step": 2958 }, { "epoch": 0.572561919504644, "grad_norm": 0.07413261383771896, "learning_rate": 9.934484698225536e-05, "loss": 0.0097, "step": 2959 }, { "epoch": 0.5727554179566563, "grad_norm": 0.1567482054233551, "learning_rate": 9.934438040543041e-05, "loss": 0.0101, "step": 2960 }, { "epoch": 0.5729489164086687, "grad_norm": 0.15210047364234924, "learning_rate": 9.9343913663744e-05, "loss": 0.0141, "step": 2961 }, { "epoch": 0.5731424148606811, "grad_norm": 0.12440221011638641, "learning_rate": 9.934344675719783e-05, "loss": 0.0126, "step": 2962 }, { "epoch": 0.5733359133126935, "grad_norm": 0.1663244068622589, "learning_rate": 9.934297968579367e-05, "loss": 0.012, "step": 2963 }, { "epoch": 0.5735294117647058, "grad_norm": 0.11850181967020035, "learning_rate": 9.934251244953322e-05, "loss": 0.0119, "step": 2964 }, { "epoch": 0.5737229102167183, "grad_norm": 0.13814355432987213, "learning_rate": 9.934204504841824e-05, "loss": 0.0121, "step": 2965 }, { "epoch": 0.5739164086687306, "grad_norm": 0.13351237773895264, "learning_rate": 9.934157748245046e-05, "loss": 0.0092, "step": 2966 }, { "epoch": 0.5741099071207431, "grad_norm": 0.06420611590147018, "learning_rate": 9.934110975163162e-05, "loss": 0.0095, "step": 2967 }, { "epoch": 0.5743034055727554, "grad_norm": 0.10890449583530426, "learning_rate": 9.934064185596347e-05, "loss": 0.0125, "step": 2968 }, { "epoch": 0.5744969040247678, "grad_norm": 0.06058560311794281, "learning_rate": 9.934017379544774e-05, "loss": 0.0119, "step": 2969 }, { "epoch": 0.5746904024767802, "grad_norm": 0.15573027729988098, "learning_rate": 9.933970557008617e-05, "loss": 0.0128, "step": 2970 }, { "epoch": 0.5748839009287926, "grad_norm": 0.06118597462773323, "learning_rate": 9.933923717988049e-05, "loss": 0.0108, "step": 2971 }, { "epoch": 0.5750773993808049, "grad_norm": 0.18092183768749237, "learning_rate": 9.933876862483245e-05, "loss": 0.0129, "step": 2972 }, { "epoch": 0.5752708978328174, "grad_norm": 0.08031002432107925, "learning_rate": 9.93382999049438e-05, "loss": 0.0121, "step": 2973 }, { "epoch": 0.5754643962848297, "grad_norm": 0.08925755321979523, "learning_rate": 9.933783102021628e-05, "loss": 0.0117, "step": 2974 }, { "epoch": 0.5756578947368421, "grad_norm": 0.06549359112977982, "learning_rate": 9.933736197065163e-05, "loss": 0.0115, "step": 2975 }, { "epoch": 0.5758513931888545, "grad_norm": 0.09675495326519012, "learning_rate": 9.933689275625157e-05, "loss": 0.0119, "step": 2976 }, { "epoch": 0.5760448916408669, "grad_norm": 0.08752643316984177, "learning_rate": 9.933642337701789e-05, "loss": 0.0112, "step": 2977 }, { "epoch": 0.5762383900928792, "grad_norm": 0.08343077450990677, "learning_rate": 9.933595383295232e-05, "loss": 0.0127, "step": 2978 }, { "epoch": 0.5764318885448917, "grad_norm": 0.10250457376241684, "learning_rate": 9.933548412405657e-05, "loss": 0.0095, "step": 2979 }, { "epoch": 0.576625386996904, "grad_norm": 0.09498507529497147, "learning_rate": 9.933501425033244e-05, "loss": 0.0096, "step": 2980 }, { "epoch": 0.5768188854489165, "grad_norm": 0.11736419796943665, "learning_rate": 9.933454421178163e-05, "loss": 0.0109, "step": 2981 }, { "epoch": 0.5770123839009288, "grad_norm": 0.05445883795619011, "learning_rate": 9.933407400840592e-05, "loss": 0.0125, "step": 2982 }, { "epoch": 0.5772058823529411, "grad_norm": 0.09842296689748764, "learning_rate": 9.933360364020705e-05, "loss": 0.0101, "step": 2983 }, { "epoch": 0.5773993808049536, "grad_norm": 0.07077693194150925, "learning_rate": 9.933313310718676e-05, "loss": 0.0116, "step": 2984 }, { "epoch": 0.5775928792569659, "grad_norm": 0.08704178035259247, "learning_rate": 9.933266240934682e-05, "loss": 0.0098, "step": 2985 }, { "epoch": 0.5777863777089783, "grad_norm": 0.10365007072687149, "learning_rate": 9.933219154668895e-05, "loss": 0.0114, "step": 2986 }, { "epoch": 0.5779798761609907, "grad_norm": 0.046263135969638824, "learning_rate": 9.933172051921492e-05, "loss": 0.0122, "step": 2987 }, { "epoch": 0.5781733746130031, "grad_norm": 0.11009042710065842, "learning_rate": 9.933124932692647e-05, "loss": 0.0119, "step": 2988 }, { "epoch": 0.5783668730650154, "grad_norm": 0.13490493595600128, "learning_rate": 9.933077796982537e-05, "loss": 0.0103, "step": 2989 }, { "epoch": 0.5785603715170279, "grad_norm": 0.1111544519662857, "learning_rate": 9.933030644791336e-05, "loss": 0.0097, "step": 2990 }, { "epoch": 0.5787538699690402, "grad_norm": 0.18951812386512756, "learning_rate": 9.932983476119219e-05, "loss": 0.0093, "step": 2991 }, { "epoch": 0.5789473684210527, "grad_norm": 0.06635130941867828, "learning_rate": 9.932936290966361e-05, "loss": 0.0103, "step": 2992 }, { "epoch": 0.579140866873065, "grad_norm": 0.15904323756694794, "learning_rate": 9.932889089332939e-05, "loss": 0.0094, "step": 2993 }, { "epoch": 0.5793343653250774, "grad_norm": 0.0615973062813282, "learning_rate": 9.932841871219127e-05, "loss": 0.0107, "step": 2994 }, { "epoch": 0.5795278637770898, "grad_norm": 0.1764400154352188, "learning_rate": 9.932794636625101e-05, "loss": 0.0105, "step": 2995 }, { "epoch": 0.5797213622291022, "grad_norm": 0.05929720774292946, "learning_rate": 9.932747385551038e-05, "loss": 0.0098, "step": 2996 }, { "epoch": 0.5799148606811145, "grad_norm": 0.13389427959918976, "learning_rate": 9.932700117997113e-05, "loss": 0.0105, "step": 2997 }, { "epoch": 0.580108359133127, "grad_norm": 0.060781531035900116, "learning_rate": 9.9326528339635e-05, "loss": 0.0124, "step": 2998 }, { "epoch": 0.5803018575851393, "grad_norm": 0.1794801503419876, "learning_rate": 9.932605533450376e-05, "loss": 0.0121, "step": 2999 }, { "epoch": 0.5804953560371517, "grad_norm": 0.07491357624530792, "learning_rate": 9.932558216457917e-05, "loss": 0.0125, "step": 3000 }, { "epoch": 0.5806888544891641, "grad_norm": 0.20171160995960236, "learning_rate": 9.932510882986298e-05, "loss": 0.011, "step": 3001 }, { "epoch": 0.5808823529411765, "grad_norm": 0.11654383689165115, "learning_rate": 9.932463533035697e-05, "loss": 0.0082, "step": 3002 }, { "epoch": 0.5810758513931888, "grad_norm": 0.20495298504829407, "learning_rate": 9.932416166606289e-05, "loss": 0.0088, "step": 3003 }, { "epoch": 0.5812693498452013, "grad_norm": 0.13937775790691376, "learning_rate": 9.932368783698248e-05, "loss": 0.0122, "step": 3004 }, { "epoch": 0.5814628482972136, "grad_norm": 0.1363404244184494, "learning_rate": 9.932321384311754e-05, "loss": 0.0119, "step": 3005 }, { "epoch": 0.5816563467492261, "grad_norm": 0.12247683107852936, "learning_rate": 9.932273968446978e-05, "loss": 0.0113, "step": 3006 }, { "epoch": 0.5818498452012384, "grad_norm": 0.08050186932086945, "learning_rate": 9.932226536104104e-05, "loss": 0.0118, "step": 3007 }, { "epoch": 0.5820433436532507, "grad_norm": 0.1247834786772728, "learning_rate": 9.932179087283302e-05, "loss": 0.0097, "step": 3008 }, { "epoch": 0.5822368421052632, "grad_norm": 0.12234178930521011, "learning_rate": 9.93213162198475e-05, "loss": 0.0112, "step": 3009 }, { "epoch": 0.5824303405572755, "grad_norm": 0.0927995964884758, "learning_rate": 9.932084140208625e-05, "loss": 0.0114, "step": 3010 }, { "epoch": 0.5826238390092879, "grad_norm": 0.08640379458665848, "learning_rate": 9.932036641955102e-05, "loss": 0.0127, "step": 3011 }, { "epoch": 0.5828173374613003, "grad_norm": 0.0744745209813118, "learning_rate": 9.93198912722436e-05, "loss": 0.0108, "step": 3012 }, { "epoch": 0.5830108359133127, "grad_norm": 0.04545846953988075, "learning_rate": 9.931941596016576e-05, "loss": 0.0099, "step": 3013 }, { "epoch": 0.583204334365325, "grad_norm": 0.06056404113769531, "learning_rate": 9.931894048331922e-05, "loss": 0.0112, "step": 3014 }, { "epoch": 0.5833978328173375, "grad_norm": 0.07661699503660202, "learning_rate": 9.93184648417058e-05, "loss": 0.0106, "step": 3015 }, { "epoch": 0.5835913312693498, "grad_norm": 0.06476957350969315, "learning_rate": 9.931798903532724e-05, "loss": 0.0108, "step": 3016 }, { "epoch": 0.5837848297213623, "grad_norm": 0.15768608450889587, "learning_rate": 9.931751306418532e-05, "loss": 0.0116, "step": 3017 }, { "epoch": 0.5839783281733746, "grad_norm": 0.1085205152630806, "learning_rate": 9.931703692828181e-05, "loss": 0.0094, "step": 3018 }, { "epoch": 0.584171826625387, "grad_norm": 0.1516750156879425, "learning_rate": 9.931656062761849e-05, "loss": 0.0103, "step": 3019 }, { "epoch": 0.5843653250773994, "grad_norm": 0.10152104496955872, "learning_rate": 9.931608416219709e-05, "loss": 0.0106, "step": 3020 }, { "epoch": 0.5845588235294118, "grad_norm": 0.16761241853237152, "learning_rate": 9.931560753201942e-05, "loss": 0.0107, "step": 3021 }, { "epoch": 0.5847523219814241, "grad_norm": 0.110040083527565, "learning_rate": 9.931513073708723e-05, "loss": 0.012, "step": 3022 }, { "epoch": 0.5849458204334366, "grad_norm": 0.20105929672718048, "learning_rate": 9.931465377740232e-05, "loss": 0.0108, "step": 3023 }, { "epoch": 0.5851393188854489, "grad_norm": 0.13088664412498474, "learning_rate": 9.931417665296644e-05, "loss": 0.0124, "step": 3024 }, { "epoch": 0.5853328173374613, "grad_norm": 0.15592291951179504, "learning_rate": 9.931369936378138e-05, "loss": 0.0123, "step": 3025 }, { "epoch": 0.5855263157894737, "grad_norm": 0.1027015671133995, "learning_rate": 9.93132219098489e-05, "loss": 0.0108, "step": 3026 }, { "epoch": 0.5857198142414861, "grad_norm": 0.05593137443065643, "learning_rate": 9.931274429117077e-05, "loss": 0.0123, "step": 3027 }, { "epoch": 0.5859133126934984, "grad_norm": 0.09875013679265976, "learning_rate": 9.931226650774879e-05, "loss": 0.0119, "step": 3028 }, { "epoch": 0.5861068111455109, "grad_norm": 0.045197099447250366, "learning_rate": 9.931178855958471e-05, "loss": 0.0096, "step": 3029 }, { "epoch": 0.5863003095975232, "grad_norm": 0.13942652940750122, "learning_rate": 9.931131044668031e-05, "loss": 0.0119, "step": 3030 }, { "epoch": 0.5864938080495357, "grad_norm": 0.08503284305334091, "learning_rate": 9.931083216903741e-05, "loss": 0.0111, "step": 3031 }, { "epoch": 0.586687306501548, "grad_norm": 0.10088331997394562, "learning_rate": 9.931035372665773e-05, "loss": 0.0111, "step": 3032 }, { "epoch": 0.5868808049535603, "grad_norm": 0.07652734220027924, "learning_rate": 9.930987511954306e-05, "loss": 0.0111, "step": 3033 }, { "epoch": 0.5870743034055728, "grad_norm": 0.08938237279653549, "learning_rate": 9.930939634769522e-05, "loss": 0.0099, "step": 3034 }, { "epoch": 0.5872678018575851, "grad_norm": 0.09120122343301773, "learning_rate": 9.930891741111595e-05, "loss": 0.0114, "step": 3035 }, { "epoch": 0.5874613003095975, "grad_norm": 0.08761235326528549, "learning_rate": 9.930843830980705e-05, "loss": 0.0109, "step": 3036 }, { "epoch": 0.5876547987616099, "grad_norm": 0.11974356323480606, "learning_rate": 9.930795904377029e-05, "loss": 0.0115, "step": 3037 }, { "epoch": 0.5878482972136223, "grad_norm": 0.07704208046197891, "learning_rate": 9.930747961300745e-05, "loss": 0.0104, "step": 3038 }, { "epoch": 0.5880417956656346, "grad_norm": 0.1005702018737793, "learning_rate": 9.930700001752033e-05, "loss": 0.012, "step": 3039 }, { "epoch": 0.5882352941176471, "grad_norm": 0.09288153797388077, "learning_rate": 9.930652025731069e-05, "loss": 0.0136, "step": 3040 }, { "epoch": 0.5884287925696594, "grad_norm": 0.08336054533720016, "learning_rate": 9.930604033238034e-05, "loss": 0.0114, "step": 3041 }, { "epoch": 0.5886222910216719, "grad_norm": 0.06800975650548935, "learning_rate": 9.930556024273103e-05, "loss": 0.0112, "step": 3042 }, { "epoch": 0.5888157894736842, "grad_norm": 0.06730745732784271, "learning_rate": 9.930507998836458e-05, "loss": 0.011, "step": 3043 }, { "epoch": 0.5890092879256966, "grad_norm": 0.09206993132829666, "learning_rate": 9.930459956928274e-05, "loss": 0.0087, "step": 3044 }, { "epoch": 0.589202786377709, "grad_norm": 0.07622124254703522, "learning_rate": 9.930411898548735e-05, "loss": 0.0088, "step": 3045 }, { "epoch": 0.5893962848297214, "grad_norm": 0.08483986556529999, "learning_rate": 9.930363823698013e-05, "loss": 0.0107, "step": 3046 }, { "epoch": 0.5895897832817337, "grad_norm": 0.09564254432916641, "learning_rate": 9.930315732376291e-05, "loss": 0.0105, "step": 3047 }, { "epoch": 0.5897832817337462, "grad_norm": 0.08067016303539276, "learning_rate": 9.930267624583748e-05, "loss": 0.0098, "step": 3048 }, { "epoch": 0.5899767801857585, "grad_norm": 0.07530473172664642, "learning_rate": 9.930219500320559e-05, "loss": 0.0083, "step": 3049 }, { "epoch": 0.5901702786377709, "grad_norm": 0.04415223374962807, "learning_rate": 9.930171359586907e-05, "loss": 0.0125, "step": 3050 }, { "epoch": 0.5903637770897833, "grad_norm": 0.0657026618719101, "learning_rate": 9.93012320238297e-05, "loss": 0.0088, "step": 3051 }, { "epoch": 0.5905572755417957, "grad_norm": 0.05422975867986679, "learning_rate": 9.930075028708927e-05, "loss": 0.0104, "step": 3052 }, { "epoch": 0.590750773993808, "grad_norm": 0.0750315710902214, "learning_rate": 9.930026838564955e-05, "loss": 0.0101, "step": 3053 }, { "epoch": 0.5909442724458205, "grad_norm": 0.04727334901690483, "learning_rate": 9.929978631951235e-05, "loss": 0.0102, "step": 3054 }, { "epoch": 0.5911377708978328, "grad_norm": 0.09591135382652283, "learning_rate": 9.929930408867946e-05, "loss": 0.0108, "step": 3055 }, { "epoch": 0.5913312693498453, "grad_norm": 0.05311734974384308, "learning_rate": 9.929882169315267e-05, "loss": 0.0109, "step": 3056 }, { "epoch": 0.5915247678018576, "grad_norm": 0.08922021836042404, "learning_rate": 9.92983391329338e-05, "loss": 0.0126, "step": 3057 }, { "epoch": 0.5917182662538699, "grad_norm": 0.08756513893604279, "learning_rate": 9.929785640802461e-05, "loss": 0.01, "step": 3058 }, { "epoch": 0.5919117647058824, "grad_norm": 0.10421622544527054, "learning_rate": 9.92973735184269e-05, "loss": 0.0101, "step": 3059 }, { "epoch": 0.5921052631578947, "grad_norm": 0.09598841518163681, "learning_rate": 9.929689046414246e-05, "loss": 0.0106, "step": 3060 }, { "epoch": 0.5922987616099071, "grad_norm": 0.08944503217935562, "learning_rate": 9.929640724517312e-05, "loss": 0.0104, "step": 3061 }, { "epoch": 0.5924922600619195, "grad_norm": 0.12144934386014938, "learning_rate": 9.929592386152063e-05, "loss": 0.0103, "step": 3062 }, { "epoch": 0.5926857585139319, "grad_norm": 0.09116294234991074, "learning_rate": 9.929544031318682e-05, "loss": 0.0099, "step": 3063 }, { "epoch": 0.5928792569659442, "grad_norm": 0.1398458033800125, "learning_rate": 9.929495660017349e-05, "loss": 0.013, "step": 3064 }, { "epoch": 0.5930727554179567, "grad_norm": 0.11613603681325912, "learning_rate": 9.92944727224824e-05, "loss": 0.0121, "step": 3065 }, { "epoch": 0.593266253869969, "grad_norm": 0.09422899037599564, "learning_rate": 9.92939886801154e-05, "loss": 0.0111, "step": 3066 }, { "epoch": 0.5934597523219814, "grad_norm": 0.07677953690290451, "learning_rate": 9.929350447307424e-05, "loss": 0.0101, "step": 3067 }, { "epoch": 0.5936532507739938, "grad_norm": 0.16161499917507172, "learning_rate": 9.929302010136076e-05, "loss": 0.0111, "step": 3068 }, { "epoch": 0.5938467492260062, "grad_norm": 0.11601573973894119, "learning_rate": 9.929253556497675e-05, "loss": 0.009, "step": 3069 }, { "epoch": 0.5940402476780186, "grad_norm": 0.07170068472623825, "learning_rate": 9.929205086392399e-05, "loss": 0.0127, "step": 3070 }, { "epoch": 0.594233746130031, "grad_norm": 0.08260990679264069, "learning_rate": 9.92915659982043e-05, "loss": 0.012, "step": 3071 }, { "epoch": 0.5944272445820433, "grad_norm": 0.19538943469524384, "learning_rate": 9.929108096781951e-05, "loss": 0.0109, "step": 3072 }, { "epoch": 0.5946207430340558, "grad_norm": 0.050527095794677734, "learning_rate": 9.929059577277136e-05, "loss": 0.0093, "step": 3073 }, { "epoch": 0.5948142414860681, "grad_norm": 0.1992661952972412, "learning_rate": 9.92901104130617e-05, "loss": 0.0123, "step": 3074 }, { "epoch": 0.5950077399380805, "grad_norm": 0.0430762879550457, "learning_rate": 9.928962488869234e-05, "loss": 0.0113, "step": 3075 }, { "epoch": 0.5952012383900929, "grad_norm": 0.10990247130393982, "learning_rate": 9.928913919966505e-05, "loss": 0.0125, "step": 3076 }, { "epoch": 0.5953947368421053, "grad_norm": 0.10068338364362717, "learning_rate": 9.928865334598165e-05, "loss": 0.0095, "step": 3077 }, { "epoch": 0.5955882352941176, "grad_norm": 0.09645310044288635, "learning_rate": 9.928816732764396e-05, "loss": 0.0106, "step": 3078 }, { "epoch": 0.5957817337461301, "grad_norm": 0.12084536254405975, "learning_rate": 9.928768114465377e-05, "loss": 0.0085, "step": 3079 }, { "epoch": 0.5959752321981424, "grad_norm": 0.09994591772556305, "learning_rate": 9.92871947970129e-05, "loss": 0.0128, "step": 3080 }, { "epoch": 0.5961687306501547, "grad_norm": 0.21916675567626953, "learning_rate": 9.928670828472315e-05, "loss": 0.0118, "step": 3081 }, { "epoch": 0.5963622291021672, "grad_norm": 0.0906030535697937, "learning_rate": 9.928622160778632e-05, "loss": 0.0107, "step": 3082 }, { "epoch": 0.5965557275541795, "grad_norm": 0.36194273829460144, "learning_rate": 9.928573476620426e-05, "loss": 0.0121, "step": 3083 }, { "epoch": 0.596749226006192, "grad_norm": 0.13472256064414978, "learning_rate": 9.928524775997872e-05, "loss": 0.0134, "step": 3084 }, { "epoch": 0.5969427244582043, "grad_norm": 0.3683573603630066, "learning_rate": 9.928476058911154e-05, "loss": 0.0126, "step": 3085 }, { "epoch": 0.5971362229102167, "grad_norm": 0.18602178990840912, "learning_rate": 9.928427325360455e-05, "loss": 0.0114, "step": 3086 }, { "epoch": 0.5973297213622291, "grad_norm": 0.21301518380641937, "learning_rate": 9.928378575345955e-05, "loss": 0.0107, "step": 3087 }, { "epoch": 0.5975232198142415, "grad_norm": 0.3027902841567993, "learning_rate": 9.928329808867834e-05, "loss": 0.0133, "step": 3088 }, { "epoch": 0.5977167182662538, "grad_norm": 0.14609482884407043, "learning_rate": 9.928281025926272e-05, "loss": 0.0094, "step": 3089 }, { "epoch": 0.5979102167182663, "grad_norm": 0.3322398066520691, "learning_rate": 9.928232226521456e-05, "loss": 0.0121, "step": 3090 }, { "epoch": 0.5981037151702786, "grad_norm": 0.07331907004117966, "learning_rate": 9.928183410653559e-05, "loss": 0.0096, "step": 3091 }, { "epoch": 0.598297213622291, "grad_norm": 0.33531641960144043, "learning_rate": 9.92813457832277e-05, "loss": 0.0135, "step": 3092 }, { "epoch": 0.5984907120743034, "grad_norm": 0.10772329568862915, "learning_rate": 9.928085729529268e-05, "loss": 0.0101, "step": 3093 }, { "epoch": 0.5986842105263158, "grad_norm": 0.29146093130111694, "learning_rate": 9.928036864273235e-05, "loss": 0.0127, "step": 3094 }, { "epoch": 0.5988777089783281, "grad_norm": 0.1919802874326706, "learning_rate": 9.92798798255485e-05, "loss": 0.0137, "step": 3095 }, { "epoch": 0.5990712074303406, "grad_norm": 0.23069091141223907, "learning_rate": 9.927939084374297e-05, "loss": 0.0127, "step": 3096 }, { "epoch": 0.5992647058823529, "grad_norm": 0.2526666820049286, "learning_rate": 9.927890169731758e-05, "loss": 0.0121, "step": 3097 }, { "epoch": 0.5994582043343654, "grad_norm": 0.15314511954784393, "learning_rate": 9.927841238627414e-05, "loss": 0.0117, "step": 3098 }, { "epoch": 0.5996517027863777, "grad_norm": 0.25130730867385864, "learning_rate": 9.927792291061449e-05, "loss": 0.0108, "step": 3099 }, { "epoch": 0.5998452012383901, "grad_norm": 0.10678189247846603, "learning_rate": 9.927743327034041e-05, "loss": 0.0105, "step": 3100 }, { "epoch": 0.6000386996904025, "grad_norm": 0.33058324456214905, "learning_rate": 9.927694346545377e-05, "loss": 0.0116, "step": 3101 }, { "epoch": 0.6002321981424149, "grad_norm": 0.11932207643985748, "learning_rate": 9.927645349595634e-05, "loss": 0.0088, "step": 3102 }, { "epoch": 0.6004256965944272, "grad_norm": 0.30467313528060913, "learning_rate": 9.927596336184998e-05, "loss": 0.0108, "step": 3103 }, { "epoch": 0.6006191950464397, "grad_norm": 0.1776636391878128, "learning_rate": 9.927547306313649e-05, "loss": 0.0109, "step": 3104 }, { "epoch": 0.600812693498452, "grad_norm": 0.24453964829444885, "learning_rate": 9.92749825998177e-05, "loss": 0.0115, "step": 3105 }, { "epoch": 0.6010061919504643, "grad_norm": 0.23582705855369568, "learning_rate": 9.927449197189544e-05, "loss": 0.0125, "step": 3106 }, { "epoch": 0.6011996904024768, "grad_norm": 0.20877601206302643, "learning_rate": 9.927400117937153e-05, "loss": 0.0105, "step": 3107 }, { "epoch": 0.6013931888544891, "grad_norm": 0.21922935545444489, "learning_rate": 9.927351022224779e-05, "loss": 0.0097, "step": 3108 }, { "epoch": 0.6015866873065016, "grad_norm": 0.24523845314979553, "learning_rate": 9.927301910052605e-05, "loss": 0.0093, "step": 3109 }, { "epoch": 0.6017801857585139, "grad_norm": 0.20122314989566803, "learning_rate": 9.927252781420813e-05, "loss": 0.0116, "step": 3110 }, { "epoch": 0.6019736842105263, "grad_norm": 0.23119188845157623, "learning_rate": 9.927203636329586e-05, "loss": 0.0111, "step": 3111 }, { "epoch": 0.6021671826625387, "grad_norm": 0.15054024755954742, "learning_rate": 9.927154474779108e-05, "loss": 0.0096, "step": 3112 }, { "epoch": 0.6023606811145511, "grad_norm": 0.23689185082912445, "learning_rate": 9.92710529676956e-05, "loss": 0.0132, "step": 3113 }, { "epoch": 0.6025541795665634, "grad_norm": 0.1234557181596756, "learning_rate": 9.927056102301124e-05, "loss": 0.0093, "step": 3114 }, { "epoch": 0.6027476780185759, "grad_norm": 0.23407655954360962, "learning_rate": 9.927006891373986e-05, "loss": 0.0101, "step": 3115 }, { "epoch": 0.6029411764705882, "grad_norm": 0.08762647956609726, "learning_rate": 9.926957663988325e-05, "loss": 0.0128, "step": 3116 }, { "epoch": 0.6031346749226006, "grad_norm": 0.24057915806770325, "learning_rate": 9.926908420144329e-05, "loss": 0.011, "step": 3117 }, { "epoch": 0.603328173374613, "grad_norm": 0.09242542088031769, "learning_rate": 9.926859159842177e-05, "loss": 0.0103, "step": 3118 }, { "epoch": 0.6035216718266254, "grad_norm": 0.2561476528644562, "learning_rate": 9.926809883082053e-05, "loss": 0.0086, "step": 3119 }, { "epoch": 0.6037151702786377, "grad_norm": 0.1070299744606018, "learning_rate": 9.92676058986414e-05, "loss": 0.0119, "step": 3120 }, { "epoch": 0.6039086687306502, "grad_norm": 0.25136950612068176, "learning_rate": 9.926711280188624e-05, "loss": 0.0133, "step": 3121 }, { "epoch": 0.6041021671826625, "grad_norm": 0.1406373828649521, "learning_rate": 9.926661954055684e-05, "loss": 0.0129, "step": 3122 }, { "epoch": 0.604295665634675, "grad_norm": 0.26033076643943787, "learning_rate": 9.926612611465508e-05, "loss": 0.0114, "step": 3123 }, { "epoch": 0.6044891640866873, "grad_norm": 0.17073631286621094, "learning_rate": 9.926563252418276e-05, "loss": 0.0105, "step": 3124 }, { "epoch": 0.6046826625386997, "grad_norm": 0.18979279696941376, "learning_rate": 9.926513876914172e-05, "loss": 0.011, "step": 3125 }, { "epoch": 0.6048761609907121, "grad_norm": 0.2043602019548416, "learning_rate": 9.92646448495338e-05, "loss": 0.0108, "step": 3126 }, { "epoch": 0.6050696594427245, "grad_norm": 0.12622115015983582, "learning_rate": 9.926415076536085e-05, "loss": 0.0115, "step": 3127 }, { "epoch": 0.6052631578947368, "grad_norm": 0.1753089725971222, "learning_rate": 9.926365651662469e-05, "loss": 0.0126, "step": 3128 }, { "epoch": 0.6054566563467493, "grad_norm": 0.09921945631504059, "learning_rate": 9.926316210332716e-05, "loss": 0.0098, "step": 3129 }, { "epoch": 0.6056501547987616, "grad_norm": 0.164779931306839, "learning_rate": 9.92626675254701e-05, "loss": 0.0108, "step": 3130 }, { "epoch": 0.6058436532507739, "grad_norm": 0.11641032993793488, "learning_rate": 9.926217278305535e-05, "loss": 0.009, "step": 3131 }, { "epoch": 0.6060371517027864, "grad_norm": 0.1472783386707306, "learning_rate": 9.926167787608474e-05, "loss": 0.0108, "step": 3132 }, { "epoch": 0.6062306501547987, "grad_norm": 0.16589681804180145, "learning_rate": 9.926118280456013e-05, "loss": 0.0128, "step": 3133 }, { "epoch": 0.6064241486068112, "grad_norm": 0.13074380159378052, "learning_rate": 9.926068756848334e-05, "loss": 0.0117, "step": 3134 }, { "epoch": 0.6066176470588235, "grad_norm": 0.15947772562503815, "learning_rate": 9.926019216785622e-05, "loss": 0.011, "step": 3135 }, { "epoch": 0.6068111455108359, "grad_norm": 0.09639069437980652, "learning_rate": 9.925969660268063e-05, "loss": 0.0108, "step": 3136 }, { "epoch": 0.6070046439628483, "grad_norm": 0.12356334179639816, "learning_rate": 9.925920087295836e-05, "loss": 0.0141, "step": 3137 }, { "epoch": 0.6071981424148607, "grad_norm": 0.0837327316403389, "learning_rate": 9.925870497869131e-05, "loss": 0.0112, "step": 3138 }, { "epoch": 0.607391640866873, "grad_norm": 0.20582067966461182, "learning_rate": 9.925820891988129e-05, "loss": 0.0107, "step": 3139 }, { "epoch": 0.6075851393188855, "grad_norm": 0.136132150888443, "learning_rate": 9.925771269653017e-05, "loss": 0.0092, "step": 3140 }, { "epoch": 0.6077786377708978, "grad_norm": 0.10651200264692307, "learning_rate": 9.925721630863978e-05, "loss": 0.0089, "step": 3141 }, { "epoch": 0.6079721362229102, "grad_norm": 0.06866457313299179, "learning_rate": 9.925671975621193e-05, "loss": 0.0081, "step": 3142 }, { "epoch": 0.6081656346749226, "grad_norm": 0.09233933687210083, "learning_rate": 9.925622303924854e-05, "loss": 0.012, "step": 3143 }, { "epoch": 0.608359133126935, "grad_norm": 0.06176742911338806, "learning_rate": 9.92557261577514e-05, "loss": 0.0094, "step": 3144 }, { "epoch": 0.6085526315789473, "grad_norm": 0.16925521194934845, "learning_rate": 9.925522911172238e-05, "loss": 0.0125, "step": 3145 }, { "epoch": 0.6087461300309598, "grad_norm": 0.08862801641225815, "learning_rate": 9.925473190116333e-05, "loss": 0.0117, "step": 3146 }, { "epoch": 0.6089396284829721, "grad_norm": 0.19723591208457947, "learning_rate": 9.925423452607607e-05, "loss": 0.0105, "step": 3147 }, { "epoch": 0.6091331269349846, "grad_norm": 0.052415695041418076, "learning_rate": 9.925373698646249e-05, "loss": 0.0107, "step": 3148 }, { "epoch": 0.6093266253869969, "grad_norm": 0.1537225991487503, "learning_rate": 9.92532392823244e-05, "loss": 0.0148, "step": 3149 }, { "epoch": 0.6095201238390093, "grad_norm": 0.10331501811742783, "learning_rate": 9.92527414136637e-05, "loss": 0.0098, "step": 3150 }, { "epoch": 0.6097136222910217, "grad_norm": 0.14062751829624176, "learning_rate": 9.92522433804822e-05, "loss": 0.0103, "step": 3151 }, { "epoch": 0.6099071207430341, "grad_norm": 0.11131655424833298, "learning_rate": 9.925174518278175e-05, "loss": 0.0117, "step": 3152 }, { "epoch": 0.6101006191950464, "grad_norm": 0.056459419429302216, "learning_rate": 9.925124682056422e-05, "loss": 0.0108, "step": 3153 }, { "epoch": 0.6102941176470589, "grad_norm": 0.14349302649497986, "learning_rate": 9.925074829383147e-05, "loss": 0.0103, "step": 3154 }, { "epoch": 0.6104876160990712, "grad_norm": 0.07210663706064224, "learning_rate": 9.925024960258533e-05, "loss": 0.0092, "step": 3155 }, { "epoch": 0.6106811145510835, "grad_norm": 0.04834427684545517, "learning_rate": 9.924975074682766e-05, "loss": 0.0115, "step": 3156 }, { "epoch": 0.610874613003096, "grad_norm": 0.08398962765932083, "learning_rate": 9.924925172656033e-05, "loss": 0.013, "step": 3157 }, { "epoch": 0.6110681114551083, "grad_norm": 0.0678742304444313, "learning_rate": 9.924875254178519e-05, "loss": 0.0096, "step": 3158 }, { "epoch": 0.6112616099071208, "grad_norm": 0.06498183310031891, "learning_rate": 9.924825319250408e-05, "loss": 0.0095, "step": 3159 }, { "epoch": 0.6114551083591331, "grad_norm": 0.08196506649255753, "learning_rate": 9.924775367871887e-05, "loss": 0.013, "step": 3160 }, { "epoch": 0.6116486068111455, "grad_norm": 0.049313876777887344, "learning_rate": 9.924725400043144e-05, "loss": 0.0111, "step": 3161 }, { "epoch": 0.6118421052631579, "grad_norm": 0.08381173014640808, "learning_rate": 9.924675415764359e-05, "loss": 0.0113, "step": 3162 }, { "epoch": 0.6120356037151703, "grad_norm": 0.11323915421962738, "learning_rate": 9.924625415035722e-05, "loss": 0.0095, "step": 3163 }, { "epoch": 0.6122291021671826, "grad_norm": 0.05468325689435005, "learning_rate": 9.924575397857418e-05, "loss": 0.009, "step": 3164 }, { "epoch": 0.6124226006191951, "grad_norm": 0.1301620900630951, "learning_rate": 9.924525364229634e-05, "loss": 0.0112, "step": 3165 }, { "epoch": 0.6126160990712074, "grad_norm": 0.10698502510786057, "learning_rate": 9.924475314152553e-05, "loss": 0.0103, "step": 3166 }, { "epoch": 0.6128095975232198, "grad_norm": 0.08520366996526718, "learning_rate": 9.924425247626364e-05, "loss": 0.0115, "step": 3167 }, { "epoch": 0.6130030959752322, "grad_norm": 0.1442229300737381, "learning_rate": 9.924375164651251e-05, "loss": 0.0101, "step": 3168 }, { "epoch": 0.6131965944272446, "grad_norm": 0.08891437947750092, "learning_rate": 9.924325065227403e-05, "loss": 0.0105, "step": 3169 }, { "epoch": 0.6133900928792569, "grad_norm": 0.11481216549873352, "learning_rate": 9.924274949355005e-05, "loss": 0.0105, "step": 3170 }, { "epoch": 0.6135835913312694, "grad_norm": 0.08707872778177261, "learning_rate": 9.924224817034241e-05, "loss": 0.0118, "step": 3171 }, { "epoch": 0.6137770897832817, "grad_norm": 0.08760254085063934, "learning_rate": 9.924174668265301e-05, "loss": 0.0108, "step": 3172 }, { "epoch": 0.6139705882352942, "grad_norm": 0.16084860265254974, "learning_rate": 9.924124503048369e-05, "loss": 0.0119, "step": 3173 }, { "epoch": 0.6141640866873065, "grad_norm": 0.10809300094842911, "learning_rate": 9.924074321383632e-05, "loss": 0.0091, "step": 3174 }, { "epoch": 0.6143575851393189, "grad_norm": 0.15837377309799194, "learning_rate": 9.924024123271277e-05, "loss": 0.0104, "step": 3175 }, { "epoch": 0.6145510835913313, "grad_norm": 0.12225654721260071, "learning_rate": 9.92397390871149e-05, "loss": 0.0102, "step": 3176 }, { "epoch": 0.6147445820433437, "grad_norm": 0.15872596204280853, "learning_rate": 9.923923677704459e-05, "loss": 0.0119, "step": 3177 }, { "epoch": 0.614938080495356, "grad_norm": 0.11814437061548233, "learning_rate": 9.923873430250369e-05, "loss": 0.0123, "step": 3178 }, { "epoch": 0.6151315789473685, "grad_norm": 0.1043514758348465, "learning_rate": 9.923823166349408e-05, "loss": 0.0102, "step": 3179 }, { "epoch": 0.6153250773993808, "grad_norm": 0.19733759760856628, "learning_rate": 9.923772886001761e-05, "loss": 0.0096, "step": 3180 }, { "epoch": 0.6155185758513931, "grad_norm": 0.07603226602077484, "learning_rate": 9.923722589207619e-05, "loss": 0.0101, "step": 3181 }, { "epoch": 0.6157120743034056, "grad_norm": 0.16696025431156158, "learning_rate": 9.923672275967165e-05, "loss": 0.0118, "step": 3182 }, { "epoch": 0.6159055727554179, "grad_norm": 0.06275875121355057, "learning_rate": 9.923621946280588e-05, "loss": 0.0098, "step": 3183 }, { "epoch": 0.6160990712074303, "grad_norm": 0.14724792540073395, "learning_rate": 9.923571600148076e-05, "loss": 0.0122, "step": 3184 }, { "epoch": 0.6162925696594427, "grad_norm": 0.05018559843301773, "learning_rate": 9.923521237569813e-05, "loss": 0.0116, "step": 3185 }, { "epoch": 0.6164860681114551, "grad_norm": 0.13164514303207397, "learning_rate": 9.923470858545989e-05, "loss": 0.0124, "step": 3186 }, { "epoch": 0.6166795665634675, "grad_norm": 0.0369117446243763, "learning_rate": 9.923420463076789e-05, "loss": 0.011, "step": 3187 }, { "epoch": 0.6168730650154799, "grad_norm": 0.08093107491731644, "learning_rate": 9.923370051162403e-05, "loss": 0.0103, "step": 3188 }, { "epoch": 0.6170665634674922, "grad_norm": 0.13153596222400665, "learning_rate": 9.923319622803014e-05, "loss": 0.0113, "step": 3189 }, { "epoch": 0.6172600619195047, "grad_norm": 0.15655255317687988, "learning_rate": 9.923269177998817e-05, "loss": 0.0118, "step": 3190 }, { "epoch": 0.617453560371517, "grad_norm": 0.12636089324951172, "learning_rate": 9.923218716749992e-05, "loss": 0.0106, "step": 3191 }, { "epoch": 0.6176470588235294, "grad_norm": 0.07512272149324417, "learning_rate": 9.923168239056731e-05, "loss": 0.0099, "step": 3192 }, { "epoch": 0.6178405572755418, "grad_norm": 0.26122134923934937, "learning_rate": 9.923117744919221e-05, "loss": 0.0103, "step": 3193 }, { "epoch": 0.6180340557275542, "grad_norm": 0.06299740821123123, "learning_rate": 9.923067234337647e-05, "loss": 0.0115, "step": 3194 }, { "epoch": 0.6182275541795665, "grad_norm": 0.22998706996440887, "learning_rate": 9.923016707312202e-05, "loss": 0.0115, "step": 3195 }, { "epoch": 0.618421052631579, "grad_norm": 0.12128964066505432, "learning_rate": 9.922966163843067e-05, "loss": 0.0127, "step": 3196 }, { "epoch": 0.6186145510835913, "grad_norm": 0.23824025690555573, "learning_rate": 9.922915603930437e-05, "loss": 0.013, "step": 3197 }, { "epoch": 0.6188080495356038, "grad_norm": 0.13485518097877502, "learning_rate": 9.922865027574493e-05, "loss": 0.0092, "step": 3198 }, { "epoch": 0.6190015479876161, "grad_norm": 0.21066686511039734, "learning_rate": 9.922814434775431e-05, "loss": 0.0124, "step": 3199 }, { "epoch": 0.6191950464396285, "grad_norm": 0.13439153134822845, "learning_rate": 9.922763825533432e-05, "loss": 0.0123, "step": 3200 }, { "epoch": 0.6193885448916409, "grad_norm": 0.12734214961528778, "learning_rate": 9.922713199848688e-05, "loss": 0.011, "step": 3201 }, { "epoch": 0.6195820433436533, "grad_norm": 0.14972998201847076, "learning_rate": 9.922662557721385e-05, "loss": 0.0109, "step": 3202 }, { "epoch": 0.6197755417956656, "grad_norm": 0.09369192272424698, "learning_rate": 9.922611899151712e-05, "loss": 0.0107, "step": 3203 }, { "epoch": 0.6199690402476781, "grad_norm": 0.13491223752498627, "learning_rate": 9.92256122413986e-05, "loss": 0.0113, "step": 3204 }, { "epoch": 0.6201625386996904, "grad_norm": 0.08653432130813599, "learning_rate": 9.922510532686013e-05, "loss": 0.0109, "step": 3205 }, { "epoch": 0.6203560371517027, "grad_norm": 0.08750659227371216, "learning_rate": 9.922459824790363e-05, "loss": 0.0127, "step": 3206 }, { "epoch": 0.6205495356037152, "grad_norm": 0.06315567344427109, "learning_rate": 9.922409100453098e-05, "loss": 0.0092, "step": 3207 }, { "epoch": 0.6207430340557275, "grad_norm": 0.06561272591352463, "learning_rate": 9.922358359674402e-05, "loss": 0.0107, "step": 3208 }, { "epoch": 0.62093653250774, "grad_norm": 0.07485195249319077, "learning_rate": 9.922307602454469e-05, "loss": 0.0097, "step": 3209 }, { "epoch": 0.6211300309597523, "grad_norm": 0.09791389107704163, "learning_rate": 9.922256828793486e-05, "loss": 0.0106, "step": 3210 }, { "epoch": 0.6213235294117647, "grad_norm": 0.07513880729675293, "learning_rate": 9.922206038691642e-05, "loss": 0.0085, "step": 3211 }, { "epoch": 0.621517027863777, "grad_norm": 0.07325191050767899, "learning_rate": 9.922155232149127e-05, "loss": 0.0111, "step": 3212 }, { "epoch": 0.6217105263157895, "grad_norm": 0.0868382379412651, "learning_rate": 9.922104409166128e-05, "loss": 0.0106, "step": 3213 }, { "epoch": 0.6219040247678018, "grad_norm": 0.12562301754951477, "learning_rate": 9.922053569742833e-05, "loss": 0.0099, "step": 3214 }, { "epoch": 0.6220975232198143, "grad_norm": 0.07273158431053162, "learning_rate": 9.922002713879435e-05, "loss": 0.0098, "step": 3215 }, { "epoch": 0.6222910216718266, "grad_norm": 0.12907420098781586, "learning_rate": 9.921951841576117e-05, "loss": 0.0099, "step": 3216 }, { "epoch": 0.622484520123839, "grad_norm": 0.12006498128175735, "learning_rate": 9.921900952833075e-05, "loss": 0.012, "step": 3217 }, { "epoch": 0.6226780185758514, "grad_norm": 0.11353948712348938, "learning_rate": 9.921850047650492e-05, "loss": 0.0082, "step": 3218 }, { "epoch": 0.6228715170278638, "grad_norm": 0.1185096800327301, "learning_rate": 9.92179912602856e-05, "loss": 0.0128, "step": 3219 }, { "epoch": 0.6230650154798761, "grad_norm": 0.10276535898447037, "learning_rate": 9.921748187967471e-05, "loss": 0.0111, "step": 3220 }, { "epoch": 0.6232585139318886, "grad_norm": 0.12434506416320801, "learning_rate": 9.92169723346741e-05, "loss": 0.0114, "step": 3221 }, { "epoch": 0.6234520123839009, "grad_norm": 0.14865612983703613, "learning_rate": 9.921646262528569e-05, "loss": 0.0105, "step": 3222 }, { "epoch": 0.6236455108359134, "grad_norm": 0.22114376723766327, "learning_rate": 9.921595275151137e-05, "loss": 0.0115, "step": 3223 }, { "epoch": 0.6238390092879257, "grad_norm": 0.07024889439344406, "learning_rate": 9.921544271335303e-05, "loss": 0.0107, "step": 3224 }, { "epoch": 0.6240325077399381, "grad_norm": 0.2338273674249649, "learning_rate": 9.921493251081256e-05, "loss": 0.0105, "step": 3225 }, { "epoch": 0.6242260061919505, "grad_norm": 0.042892955243587494, "learning_rate": 9.921442214389187e-05, "loss": 0.0093, "step": 3226 }, { "epoch": 0.6244195046439629, "grad_norm": 0.1315494328737259, "learning_rate": 9.921391161259285e-05, "loss": 0.0114, "step": 3227 }, { "epoch": 0.6246130030959752, "grad_norm": 0.0818086490035057, "learning_rate": 9.921340091691741e-05, "loss": 0.0096, "step": 3228 }, { "epoch": 0.6248065015479877, "grad_norm": 0.09994229674339294, "learning_rate": 9.921289005686744e-05, "loss": 0.012, "step": 3229 }, { "epoch": 0.625, "grad_norm": 0.13364815711975098, "learning_rate": 9.921237903244482e-05, "loss": 0.0091, "step": 3230 }, { "epoch": 0.6251934984520123, "grad_norm": 0.07810667157173157, "learning_rate": 9.92118678436515e-05, "loss": 0.0118, "step": 3231 }, { "epoch": 0.6253869969040248, "grad_norm": 0.15371637046337128, "learning_rate": 9.921135649048933e-05, "loss": 0.0115, "step": 3232 }, { "epoch": 0.6255804953560371, "grad_norm": 0.04246395081281662, "learning_rate": 9.921084497296023e-05, "loss": 0.009, "step": 3233 }, { "epoch": 0.6257739938080495, "grad_norm": 0.1438584178686142, "learning_rate": 9.921033329106611e-05, "loss": 0.0099, "step": 3234 }, { "epoch": 0.6259674922600619, "grad_norm": 0.038365140557289124, "learning_rate": 9.920982144480885e-05, "loss": 0.0117, "step": 3235 }, { "epoch": 0.6261609907120743, "grad_norm": 0.10981998592615128, "learning_rate": 9.92093094341904e-05, "loss": 0.0113, "step": 3236 }, { "epoch": 0.6263544891640866, "grad_norm": 0.031161708757281303, "learning_rate": 9.92087972592126e-05, "loss": 0.0116, "step": 3237 }, { "epoch": 0.6265479876160991, "grad_norm": 0.08616811037063599, "learning_rate": 9.92082849198774e-05, "loss": 0.0107, "step": 3238 }, { "epoch": 0.6267414860681114, "grad_norm": 0.047499287873506546, "learning_rate": 9.920777241618668e-05, "loss": 0.0108, "step": 3239 }, { "epoch": 0.6269349845201239, "grad_norm": 0.09615200012922287, "learning_rate": 9.920725974814236e-05, "loss": 0.0114, "step": 3240 }, { "epoch": 0.6271284829721362, "grad_norm": 0.04974391683936119, "learning_rate": 9.920674691574633e-05, "loss": 0.012, "step": 3241 }, { "epoch": 0.6273219814241486, "grad_norm": 0.08623962849378586, "learning_rate": 9.920623391900053e-05, "loss": 0.0102, "step": 3242 }, { "epoch": 0.627515479876161, "grad_norm": 0.0391472764313221, "learning_rate": 9.920572075790683e-05, "loss": 0.0094, "step": 3243 }, { "epoch": 0.6277089783281734, "grad_norm": 0.10546909272670746, "learning_rate": 9.920520743246715e-05, "loss": 0.0109, "step": 3244 }, { "epoch": 0.6279024767801857, "grad_norm": 0.07779747992753983, "learning_rate": 9.920469394268341e-05, "loss": 0.0085, "step": 3245 }, { "epoch": 0.6280959752321982, "grad_norm": 0.10131919384002686, "learning_rate": 9.920418028855751e-05, "loss": 0.0092, "step": 3246 }, { "epoch": 0.6282894736842105, "grad_norm": 0.0664953887462616, "learning_rate": 9.920366647009135e-05, "loss": 0.0128, "step": 3247 }, { "epoch": 0.628482972136223, "grad_norm": 0.12471406906843185, "learning_rate": 9.920315248728687e-05, "loss": 0.0108, "step": 3248 }, { "epoch": 0.6286764705882353, "grad_norm": 0.11380501836538315, "learning_rate": 9.920263834014595e-05, "loss": 0.0102, "step": 3249 }, { "epoch": 0.6288699690402477, "grad_norm": 0.10245952755212784, "learning_rate": 9.92021240286705e-05, "loss": 0.0093, "step": 3250 }, { "epoch": 0.62906346749226, "grad_norm": 0.09020429104566574, "learning_rate": 9.920160955286246e-05, "loss": 0.011, "step": 3251 }, { "epoch": 0.6292569659442725, "grad_norm": 0.06629546731710434, "learning_rate": 9.920109491272372e-05, "loss": 0.0096, "step": 3252 }, { "epoch": 0.6294504643962848, "grad_norm": 0.060674868524074554, "learning_rate": 9.920058010825622e-05, "loss": 0.0107, "step": 3253 }, { "epoch": 0.6296439628482973, "grad_norm": 0.10030906647443771, "learning_rate": 9.920006513946183e-05, "loss": 0.0103, "step": 3254 }, { "epoch": 0.6298374613003096, "grad_norm": 0.040026336908340454, "learning_rate": 9.919955000634248e-05, "loss": 0.0111, "step": 3255 }, { "epoch": 0.6300309597523219, "grad_norm": 0.10935363173484802, "learning_rate": 9.919903470890013e-05, "loss": 0.0101, "step": 3256 }, { "epoch": 0.6302244582043344, "grad_norm": 0.12426745146512985, "learning_rate": 9.919851924713663e-05, "loss": 0.0123, "step": 3257 }, { "epoch": 0.6304179566563467, "grad_norm": 0.0770738273859024, "learning_rate": 9.919800362105394e-05, "loss": 0.0087, "step": 3258 }, { "epoch": 0.6306114551083591, "grad_norm": 0.1133742406964302, "learning_rate": 9.919748783065396e-05, "loss": 0.011, "step": 3259 }, { "epoch": 0.6308049535603715, "grad_norm": 0.056258294731378555, "learning_rate": 9.91969718759386e-05, "loss": 0.0107, "step": 3260 }, { "epoch": 0.6309984520123839, "grad_norm": 0.12259387969970703, "learning_rate": 9.91964557569098e-05, "loss": 0.0095, "step": 3261 }, { "epoch": 0.6311919504643962, "grad_norm": 0.03762216493487358, "learning_rate": 9.919593947356948e-05, "loss": 0.0089, "step": 3262 }, { "epoch": 0.6313854489164087, "grad_norm": 0.11659050732851028, "learning_rate": 9.919542302591953e-05, "loss": 0.0093, "step": 3263 }, { "epoch": 0.631578947368421, "grad_norm": 0.07267877459526062, "learning_rate": 9.919490641396189e-05, "loss": 0.0107, "step": 3264 }, { "epoch": 0.6317724458204335, "grad_norm": 0.1570848524570465, "learning_rate": 9.919438963769847e-05, "loss": 0.0121, "step": 3265 }, { "epoch": 0.6319659442724458, "grad_norm": 0.06926719844341278, "learning_rate": 9.919387269713122e-05, "loss": 0.0093, "step": 3266 }, { "epoch": 0.6321594427244582, "grad_norm": 0.13591523468494415, "learning_rate": 9.919335559226203e-05, "loss": 0.0105, "step": 3267 }, { "epoch": 0.6323529411764706, "grad_norm": 0.10647433251142502, "learning_rate": 9.919283832309282e-05, "loss": 0.0089, "step": 3268 }, { "epoch": 0.632546439628483, "grad_norm": 0.15683892369270325, "learning_rate": 9.919232088962556e-05, "loss": 0.0123, "step": 3269 }, { "epoch": 0.6327399380804953, "grad_norm": 0.07975995540618896, "learning_rate": 9.919180329186211e-05, "loss": 0.0104, "step": 3270 }, { "epoch": 0.6329334365325078, "grad_norm": 0.10904780775308609, "learning_rate": 9.919128552980445e-05, "loss": 0.0114, "step": 3271 }, { "epoch": 0.6331269349845201, "grad_norm": 0.09520180523395538, "learning_rate": 9.919076760345445e-05, "loss": 0.0103, "step": 3272 }, { "epoch": 0.6333204334365325, "grad_norm": 0.06899318099021912, "learning_rate": 9.919024951281409e-05, "loss": 0.0112, "step": 3273 }, { "epoch": 0.6335139318885449, "grad_norm": 0.08842983841896057, "learning_rate": 9.918973125788526e-05, "loss": 0.0091, "step": 3274 }, { "epoch": 0.6337074303405573, "grad_norm": 0.04797040671110153, "learning_rate": 9.918921283866988e-05, "loss": 0.0102, "step": 3275 }, { "epoch": 0.6339009287925697, "grad_norm": 0.0845179483294487, "learning_rate": 9.918869425516991e-05, "loss": 0.0144, "step": 3276 }, { "epoch": 0.6340944272445821, "grad_norm": 0.04236532747745514, "learning_rate": 9.918817550738727e-05, "loss": 0.0121, "step": 3277 }, { "epoch": 0.6342879256965944, "grad_norm": 0.06432470679283142, "learning_rate": 9.918765659532389e-05, "loss": 0.0098, "step": 3278 }, { "epoch": 0.6344814241486069, "grad_norm": 0.04400249943137169, "learning_rate": 9.918713751898167e-05, "loss": 0.0102, "step": 3279 }, { "epoch": 0.6346749226006192, "grad_norm": 0.03664876148104668, "learning_rate": 9.918661827836257e-05, "loss": 0.0097, "step": 3280 }, { "epoch": 0.6348684210526315, "grad_norm": 0.055061209946870804, "learning_rate": 9.918609887346852e-05, "loss": 0.0097, "step": 3281 }, { "epoch": 0.635061919504644, "grad_norm": 0.062321700155735016, "learning_rate": 9.918557930430144e-05, "loss": 0.0097, "step": 3282 }, { "epoch": 0.6352554179566563, "grad_norm": 0.1507863849401474, "learning_rate": 9.918505957086325e-05, "loss": 0.011, "step": 3283 }, { "epoch": 0.6354489164086687, "grad_norm": 0.07190733402967453, "learning_rate": 9.918453967315589e-05, "loss": 0.0101, "step": 3284 }, { "epoch": 0.6356424148606811, "grad_norm": 0.1300833821296692, "learning_rate": 9.918401961118131e-05, "loss": 0.0094, "step": 3285 }, { "epoch": 0.6358359133126935, "grad_norm": 0.044317904859781265, "learning_rate": 9.918349938494143e-05, "loss": 0.0093, "step": 3286 }, { "epoch": 0.6360294117647058, "grad_norm": 0.14841893315315247, "learning_rate": 9.91829789944382e-05, "loss": 0.0109, "step": 3287 }, { "epoch": 0.6362229102167183, "grad_norm": 0.06993936002254486, "learning_rate": 9.918245843967353e-05, "loss": 0.0107, "step": 3288 }, { "epoch": 0.6364164086687306, "grad_norm": 0.13907931745052338, "learning_rate": 9.918193772064936e-05, "loss": 0.0105, "step": 3289 }, { "epoch": 0.6366099071207431, "grad_norm": 0.09338128566741943, "learning_rate": 9.918141683736764e-05, "loss": 0.0121, "step": 3290 }, { "epoch": 0.6368034055727554, "grad_norm": 0.11441504210233688, "learning_rate": 9.91808957898303e-05, "loss": 0.0082, "step": 3291 }, { "epoch": 0.6369969040247678, "grad_norm": 0.09055893868207932, "learning_rate": 9.918037457803928e-05, "loss": 0.0107, "step": 3292 }, { "epoch": 0.6371904024767802, "grad_norm": 0.09200720489025116, "learning_rate": 9.91798532019965e-05, "loss": 0.0117, "step": 3293 }, { "epoch": 0.6373839009287926, "grad_norm": 0.11704560369253159, "learning_rate": 9.917933166170393e-05, "loss": 0.0101, "step": 3294 }, { "epoch": 0.6375773993808049, "grad_norm": 0.10875439643859863, "learning_rate": 9.917880995716348e-05, "loss": 0.0099, "step": 3295 }, { "epoch": 0.6377708978328174, "grad_norm": 0.11146378517150879, "learning_rate": 9.917828808837709e-05, "loss": 0.0115, "step": 3296 }, { "epoch": 0.6379643962848297, "grad_norm": 0.1019582599401474, "learning_rate": 9.917776605534672e-05, "loss": 0.0096, "step": 3297 }, { "epoch": 0.6381578947368421, "grad_norm": 0.1115918681025505, "learning_rate": 9.91772438580743e-05, "loss": 0.0104, "step": 3298 }, { "epoch": 0.6383513931888545, "grad_norm": 0.07531147450208664, "learning_rate": 9.917672149656177e-05, "loss": 0.0115, "step": 3299 }, { "epoch": 0.6385448916408669, "grad_norm": 0.21105806529521942, "learning_rate": 9.917619897081109e-05, "loss": 0.0123, "step": 3300 }, { "epoch": 0.6387383900928792, "grad_norm": 0.05913477763533592, "learning_rate": 9.917567628082419e-05, "loss": 0.0112, "step": 3301 }, { "epoch": 0.6389318885448917, "grad_norm": 0.14527487754821777, "learning_rate": 9.917515342660299e-05, "loss": 0.0102, "step": 3302 }, { "epoch": 0.639125386996904, "grad_norm": 0.12376778572797775, "learning_rate": 9.917463040814947e-05, "loss": 0.0099, "step": 3303 }, { "epoch": 0.6393188854489165, "grad_norm": 0.1218787357211113, "learning_rate": 9.917410722546554e-05, "loss": 0.0122, "step": 3304 }, { "epoch": 0.6395123839009288, "grad_norm": 0.11584631353616714, "learning_rate": 9.917358387855318e-05, "loss": 0.0108, "step": 3305 }, { "epoch": 0.6397058823529411, "grad_norm": 0.09959528595209122, "learning_rate": 9.917306036741431e-05, "loss": 0.0091, "step": 3306 }, { "epoch": 0.6398993808049536, "grad_norm": 0.10502269119024277, "learning_rate": 9.91725366920509e-05, "loss": 0.0117, "step": 3307 }, { "epoch": 0.6400928792569659, "grad_norm": 0.09172610193490982, "learning_rate": 9.917201285246487e-05, "loss": 0.009, "step": 3308 }, { "epoch": 0.6402863777089783, "grad_norm": 0.13047398626804352, "learning_rate": 9.917148884865818e-05, "loss": 0.011, "step": 3309 }, { "epoch": 0.6404798761609907, "grad_norm": 0.05909044295549393, "learning_rate": 9.91709646806328e-05, "loss": 0.0122, "step": 3310 }, { "epoch": 0.6406733746130031, "grad_norm": 0.1410878598690033, "learning_rate": 9.917044034839063e-05, "loss": 0.0116, "step": 3311 }, { "epoch": 0.6408668730650154, "grad_norm": 0.07091011106967926, "learning_rate": 9.916991585193366e-05, "loss": 0.0083, "step": 3312 }, { "epoch": 0.6410603715170279, "grad_norm": 0.10530321300029755, "learning_rate": 9.916939119126383e-05, "loss": 0.0104, "step": 3313 }, { "epoch": 0.6412538699690402, "grad_norm": 0.07672886550426483, "learning_rate": 9.916886636638309e-05, "loss": 0.0107, "step": 3314 }, { "epoch": 0.6414473684210527, "grad_norm": 0.10025665909051895, "learning_rate": 9.916834137729337e-05, "loss": 0.01, "step": 3315 }, { "epoch": 0.641640866873065, "grad_norm": 0.05638054385781288, "learning_rate": 9.916781622399666e-05, "loss": 0.0118, "step": 3316 }, { "epoch": 0.6418343653250774, "grad_norm": 0.11572602391242981, "learning_rate": 9.916729090649488e-05, "loss": 0.0116, "step": 3317 }, { "epoch": 0.6420278637770898, "grad_norm": 0.05632258951663971, "learning_rate": 9.916676542479e-05, "loss": 0.0098, "step": 3318 }, { "epoch": 0.6422213622291022, "grad_norm": 0.08688819408416748, "learning_rate": 9.916623977888397e-05, "loss": 0.0113, "step": 3319 }, { "epoch": 0.6424148606811145, "grad_norm": 0.059241749346256256, "learning_rate": 9.916571396877873e-05, "loss": 0.0101, "step": 3320 }, { "epoch": 0.642608359133127, "grad_norm": 0.08363770693540573, "learning_rate": 9.916518799447626e-05, "loss": 0.0102, "step": 3321 }, { "epoch": 0.6428018575851393, "grad_norm": 0.056901898235082626, "learning_rate": 9.91646618559785e-05, "loss": 0.0133, "step": 3322 }, { "epoch": 0.6429953560371517, "grad_norm": 0.10153691470623016, "learning_rate": 9.916413555328741e-05, "loss": 0.0084, "step": 3323 }, { "epoch": 0.6431888544891641, "grad_norm": 0.09151970595121384, "learning_rate": 9.916360908640496e-05, "loss": 0.0077, "step": 3324 }, { "epoch": 0.6433823529411765, "grad_norm": 0.0918068140745163, "learning_rate": 9.916308245533307e-05, "loss": 0.0095, "step": 3325 }, { "epoch": 0.6435758513931888, "grad_norm": 0.09668221324682236, "learning_rate": 9.916255566007374e-05, "loss": 0.011, "step": 3326 }, { "epoch": 0.6437693498452013, "grad_norm": 0.09332019090652466, "learning_rate": 9.91620287006289e-05, "loss": 0.0112, "step": 3327 }, { "epoch": 0.6439628482972136, "grad_norm": 0.07767459750175476, "learning_rate": 9.916150157700052e-05, "loss": 0.0094, "step": 3328 }, { "epoch": 0.6441563467492261, "grad_norm": 0.081728495657444, "learning_rate": 9.916097428919056e-05, "loss": 0.011, "step": 3329 }, { "epoch": 0.6443498452012384, "grad_norm": 0.10322069376707077, "learning_rate": 9.9160446837201e-05, "loss": 0.0088, "step": 3330 }, { "epoch": 0.6445433436532507, "grad_norm": 0.15765655040740967, "learning_rate": 9.915991922103375e-05, "loss": 0.012, "step": 3331 }, { "epoch": 0.6447368421052632, "grad_norm": 0.04836190119385719, "learning_rate": 9.91593914406908e-05, "loss": 0.0085, "step": 3332 }, { "epoch": 0.6449303405572755, "grad_norm": 0.14223943650722504, "learning_rate": 9.915886349617414e-05, "loss": 0.0113, "step": 3333 }, { "epoch": 0.6451238390092879, "grad_norm": 0.08517244458198547, "learning_rate": 9.915833538748569e-05, "loss": 0.0084, "step": 3334 }, { "epoch": 0.6453173374613003, "grad_norm": 0.10938338935375214, "learning_rate": 9.915780711462744e-05, "loss": 0.0098, "step": 3335 }, { "epoch": 0.6455108359133127, "grad_norm": 0.08470569550991058, "learning_rate": 9.915727867760133e-05, "loss": 0.0114, "step": 3336 }, { "epoch": 0.645704334365325, "grad_norm": 0.12697777152061462, "learning_rate": 9.915675007640936e-05, "loss": 0.0083, "step": 3337 }, { "epoch": 0.6458978328173375, "grad_norm": 0.07849782705307007, "learning_rate": 9.915622131105344e-05, "loss": 0.0103, "step": 3338 }, { "epoch": 0.6460913312693498, "grad_norm": 0.1046537458896637, "learning_rate": 9.915569238153559e-05, "loss": 0.0106, "step": 3339 }, { "epoch": 0.6462848297213623, "grad_norm": 0.12041446566581726, "learning_rate": 9.915516328785776e-05, "loss": 0.0096, "step": 3340 }, { "epoch": 0.6464783281733746, "grad_norm": 0.1102246642112732, "learning_rate": 9.91546340300219e-05, "loss": 0.01, "step": 3341 }, { "epoch": 0.646671826625387, "grad_norm": 0.1499585062265396, "learning_rate": 9.915410460803001e-05, "loss": 0.0094, "step": 3342 }, { "epoch": 0.6468653250773994, "grad_norm": 0.11547806859016418, "learning_rate": 9.915357502188402e-05, "loss": 0.01, "step": 3343 }, { "epoch": 0.6470588235294118, "grad_norm": 0.14465834200382233, "learning_rate": 9.915304527158594e-05, "loss": 0.0095, "step": 3344 }, { "epoch": 0.6472523219814241, "grad_norm": 0.14517487585544586, "learning_rate": 9.915251535713769e-05, "loss": 0.0117, "step": 3345 }, { "epoch": 0.6474458204334366, "grad_norm": 0.0929550901055336, "learning_rate": 9.915198527854128e-05, "loss": 0.0095, "step": 3346 }, { "epoch": 0.6476393188854489, "grad_norm": 0.12595903873443604, "learning_rate": 9.915145503579866e-05, "loss": 0.0123, "step": 3347 }, { "epoch": 0.6478328173374613, "grad_norm": 0.11365114152431488, "learning_rate": 9.915092462891182e-05, "loss": 0.0108, "step": 3348 }, { "epoch": 0.6480263157894737, "grad_norm": 0.09011662006378174, "learning_rate": 9.915039405788272e-05, "loss": 0.0121, "step": 3349 }, { "epoch": 0.6482198142414861, "grad_norm": 0.13284602761268616, "learning_rate": 9.914986332271332e-05, "loss": 0.0119, "step": 3350 }, { "epoch": 0.6484133126934984, "grad_norm": 0.07843142747879028, "learning_rate": 9.914933242340562e-05, "loss": 0.0112, "step": 3351 }, { "epoch": 0.6486068111455109, "grad_norm": 0.12797418236732483, "learning_rate": 9.914880135996159e-05, "loss": 0.0099, "step": 3352 }, { "epoch": 0.6488003095975232, "grad_norm": 0.11733290553092957, "learning_rate": 9.914827013238318e-05, "loss": 0.0108, "step": 3353 }, { "epoch": 0.6489938080495357, "grad_norm": 0.16928212344646454, "learning_rate": 9.914773874067239e-05, "loss": 0.0117, "step": 3354 }, { "epoch": 0.649187306501548, "grad_norm": 0.0843747928738594, "learning_rate": 9.914720718483119e-05, "loss": 0.0119, "step": 3355 }, { "epoch": 0.6493808049535603, "grad_norm": 0.14959141612052917, "learning_rate": 9.914667546486152e-05, "loss": 0.0136, "step": 3356 }, { "epoch": 0.6495743034055728, "grad_norm": 0.13390062749385834, "learning_rate": 9.914614358076543e-05, "loss": 0.0108, "step": 3357 }, { "epoch": 0.6497678018575851, "grad_norm": 0.08733890950679779, "learning_rate": 9.914561153254482e-05, "loss": 0.0128, "step": 3358 }, { "epoch": 0.6499613003095975, "grad_norm": 0.14299437403678894, "learning_rate": 9.914507932020173e-05, "loss": 0.012, "step": 3359 }, { "epoch": 0.6501547987616099, "grad_norm": 0.09208022803068161, "learning_rate": 9.914454694373809e-05, "loss": 0.0091, "step": 3360 }, { "epoch": 0.6503482972136223, "grad_norm": 0.13553978502750397, "learning_rate": 9.914401440315591e-05, "loss": 0.0111, "step": 3361 }, { "epoch": 0.6505417956656346, "grad_norm": 0.07715544104576111, "learning_rate": 9.914348169845719e-05, "loss": 0.0105, "step": 3362 }, { "epoch": 0.6507352941176471, "grad_norm": 0.14097264409065247, "learning_rate": 9.914294882964385e-05, "loss": 0.0103, "step": 3363 }, { "epoch": 0.6509287925696594, "grad_norm": 0.12168420851230621, "learning_rate": 9.914241579671792e-05, "loss": 0.0111, "step": 3364 }, { "epoch": 0.6511222910216719, "grad_norm": 0.15363827347755432, "learning_rate": 9.914188259968136e-05, "loss": 0.0126, "step": 3365 }, { "epoch": 0.6513157894736842, "grad_norm": 0.2343587428331375, "learning_rate": 9.914134923853614e-05, "loss": 0.0139, "step": 3366 }, { "epoch": 0.6515092879256966, "grad_norm": 0.09517820924520493, "learning_rate": 9.914081571328429e-05, "loss": 0.0092, "step": 3367 }, { "epoch": 0.651702786377709, "grad_norm": 0.22831010818481445, "learning_rate": 9.914028202392773e-05, "loss": 0.014, "step": 3368 }, { "epoch": 0.6518962848297214, "grad_norm": 0.16594965755939484, "learning_rate": 9.91397481704685e-05, "loss": 0.0114, "step": 3369 }, { "epoch": 0.6520897832817337, "grad_norm": 0.10821406543254852, "learning_rate": 9.913921415290857e-05, "loss": 0.0112, "step": 3370 }, { "epoch": 0.6522832817337462, "grad_norm": 0.1433562934398651, "learning_rate": 9.91386799712499e-05, "loss": 0.0101, "step": 3371 }, { "epoch": 0.6524767801857585, "grad_norm": 0.21047867834568024, "learning_rate": 9.91381456254945e-05, "loss": 0.0114, "step": 3372 }, { "epoch": 0.6526702786377709, "grad_norm": 0.0452914834022522, "learning_rate": 9.913761111564436e-05, "loss": 0.0109, "step": 3373 }, { "epoch": 0.6528637770897833, "grad_norm": 0.23405225574970245, "learning_rate": 9.913707644170145e-05, "loss": 0.0114, "step": 3374 }, { "epoch": 0.6530572755417957, "grad_norm": 0.06652205437421799, "learning_rate": 9.913654160366775e-05, "loss": 0.0087, "step": 3375 }, { "epoch": 0.653250773993808, "grad_norm": 0.2379222959280014, "learning_rate": 9.91360066015453e-05, "loss": 0.0115, "step": 3376 }, { "epoch": 0.6534442724458205, "grad_norm": 0.14763204753398895, "learning_rate": 9.913547143533602e-05, "loss": 0.0089, "step": 3377 }, { "epoch": 0.6536377708978328, "grad_norm": 0.23183387517929077, "learning_rate": 9.913493610504196e-05, "loss": 0.0133, "step": 3378 }, { "epoch": 0.6538312693498453, "grad_norm": 0.14546997845172882, "learning_rate": 9.913440061066505e-05, "loss": 0.0082, "step": 3379 }, { "epoch": 0.6540247678018576, "grad_norm": 0.16067001223564148, "learning_rate": 9.913386495220734e-05, "loss": 0.0132, "step": 3380 }, { "epoch": 0.6542182662538699, "grad_norm": 0.24580970406532288, "learning_rate": 9.913332912967078e-05, "loss": 0.0105, "step": 3381 }, { "epoch": 0.6544117647058824, "grad_norm": 0.1159050241112709, "learning_rate": 9.913279314305739e-05, "loss": 0.0112, "step": 3382 }, { "epoch": 0.6546052631578947, "grad_norm": 0.2201697677373886, "learning_rate": 9.913225699236913e-05, "loss": 0.0116, "step": 3383 }, { "epoch": 0.6547987616099071, "grad_norm": 0.13614177703857422, "learning_rate": 9.913172067760803e-05, "loss": 0.0115, "step": 3384 }, { "epoch": 0.6549922600619195, "grad_norm": 0.131536066532135, "learning_rate": 9.913118419877606e-05, "loss": 0.0107, "step": 3385 }, { "epoch": 0.6551857585139319, "grad_norm": 0.13820695877075195, "learning_rate": 9.913064755587522e-05, "loss": 0.0116, "step": 3386 }, { "epoch": 0.6553792569659442, "grad_norm": 0.15693317353725433, "learning_rate": 9.91301107489075e-05, "loss": 0.0092, "step": 3387 }, { "epoch": 0.6555727554179567, "grad_norm": 0.10853523015975952, "learning_rate": 9.912957377787492e-05, "loss": 0.0119, "step": 3388 }, { "epoch": 0.655766253869969, "grad_norm": 0.2049427479505539, "learning_rate": 9.912903664277945e-05, "loss": 0.0102, "step": 3389 }, { "epoch": 0.6559597523219814, "grad_norm": 0.13527823984622955, "learning_rate": 9.91284993436231e-05, "loss": 0.0109, "step": 3390 }, { "epoch": 0.6561532507739938, "grad_norm": 0.15340888500213623, "learning_rate": 9.912796188040784e-05, "loss": 0.0124, "step": 3391 }, { "epoch": 0.6563467492260062, "grad_norm": 0.10999975353479385, "learning_rate": 9.912742425313573e-05, "loss": 0.0113, "step": 3392 }, { "epoch": 0.6565402476780186, "grad_norm": 0.05427411198616028, "learning_rate": 9.91268864618087e-05, "loss": 0.01, "step": 3393 }, { "epoch": 0.656733746130031, "grad_norm": 0.10600887984037399, "learning_rate": 9.912634850642879e-05, "loss": 0.01, "step": 3394 }, { "epoch": 0.6569272445820433, "grad_norm": 0.1220422014594078, "learning_rate": 9.9125810386998e-05, "loss": 0.0106, "step": 3395 }, { "epoch": 0.6571207430340558, "grad_norm": 0.07781155407428741, "learning_rate": 9.912527210351829e-05, "loss": 0.0109, "step": 3396 }, { "epoch": 0.6573142414860681, "grad_norm": 0.08087130635976791, "learning_rate": 9.912473365599172e-05, "loss": 0.0103, "step": 3397 }, { "epoch": 0.6575077399380805, "grad_norm": 0.1241665780544281, "learning_rate": 9.912419504442024e-05, "loss": 0.0095, "step": 3398 }, { "epoch": 0.6577012383900929, "grad_norm": 0.05061502009630203, "learning_rate": 9.912365626880589e-05, "loss": 0.0114, "step": 3399 }, { "epoch": 0.6578947368421053, "grad_norm": 0.12499218434095383, "learning_rate": 9.912311732915066e-05, "loss": 0.0103, "step": 3400 }, { "epoch": 0.6580882352941176, "grad_norm": 0.052773796021938324, "learning_rate": 9.912257822545655e-05, "loss": 0.0118, "step": 3401 }, { "epoch": 0.6582817337461301, "grad_norm": 0.08595278859138489, "learning_rate": 9.912203895772555e-05, "loss": 0.0097, "step": 3402 }, { "epoch": 0.6584752321981424, "grad_norm": 0.07716614753007889, "learning_rate": 9.912149952595969e-05, "loss": 0.0101, "step": 3403 }, { "epoch": 0.6586687306501547, "grad_norm": 0.08292482048273087, "learning_rate": 9.912095993016098e-05, "loss": 0.0106, "step": 3404 }, { "epoch": 0.6588622291021672, "grad_norm": 0.09718345105648041, "learning_rate": 9.91204201703314e-05, "loss": 0.0132, "step": 3405 }, { "epoch": 0.6590557275541795, "grad_norm": 0.06668482720851898, "learning_rate": 9.911988024647299e-05, "loss": 0.01, "step": 3406 }, { "epoch": 0.659249226006192, "grad_norm": 0.11297328025102615, "learning_rate": 9.911934015858771e-05, "loss": 0.0109, "step": 3407 }, { "epoch": 0.6594427244582043, "grad_norm": 0.061704788357019424, "learning_rate": 9.91187999066776e-05, "loss": 0.0106, "step": 3408 }, { "epoch": 0.6596362229102167, "grad_norm": 0.09231693297624588, "learning_rate": 9.911825949074467e-05, "loss": 0.0131, "step": 3409 }, { "epoch": 0.6598297213622291, "grad_norm": 0.07088837772607803, "learning_rate": 9.911771891079093e-05, "loss": 0.0102, "step": 3410 }, { "epoch": 0.6600232198142415, "grad_norm": 0.08869779855012894, "learning_rate": 9.911717816681836e-05, "loss": 0.0096, "step": 3411 }, { "epoch": 0.6602167182662538, "grad_norm": 0.0632108673453331, "learning_rate": 9.911663725882902e-05, "loss": 0.0122, "step": 3412 }, { "epoch": 0.6604102167182663, "grad_norm": 0.12478280812501907, "learning_rate": 9.911609618682487e-05, "loss": 0.0103, "step": 3413 }, { "epoch": 0.6606037151702786, "grad_norm": 0.08343180269002914, "learning_rate": 9.911555495080795e-05, "loss": 0.0089, "step": 3414 }, { "epoch": 0.660797213622291, "grad_norm": 0.08866065740585327, "learning_rate": 9.911501355078025e-05, "loss": 0.0101, "step": 3415 }, { "epoch": 0.6609907120743034, "grad_norm": 0.060428768396377563, "learning_rate": 9.911447198674382e-05, "loss": 0.0119, "step": 3416 }, { "epoch": 0.6611842105263158, "grad_norm": 0.1351015418767929, "learning_rate": 9.911393025870068e-05, "loss": 0.0101, "step": 3417 }, { "epoch": 0.6613777089783281, "grad_norm": 0.08462640643119812, "learning_rate": 9.911338836665277e-05, "loss": 0.0093, "step": 3418 }, { "epoch": 0.6615712074303406, "grad_norm": 0.1068204715847969, "learning_rate": 9.911284631060219e-05, "loss": 0.0105, "step": 3419 }, { "epoch": 0.6617647058823529, "grad_norm": 0.08366401493549347, "learning_rate": 9.911230409055087e-05, "loss": 0.0089, "step": 3420 }, { "epoch": 0.6619582043343654, "grad_norm": 0.04241342470049858, "learning_rate": 9.911176170650091e-05, "loss": 0.0094, "step": 3421 }, { "epoch": 0.6621517027863777, "grad_norm": 0.12435023486614227, "learning_rate": 9.911121915845429e-05, "loss": 0.0096, "step": 3422 }, { "epoch": 0.6623452012383901, "grad_norm": 0.03483174368739128, "learning_rate": 9.911067644641301e-05, "loss": 0.0095, "step": 3423 }, { "epoch": 0.6625386996904025, "grad_norm": 0.10864231735467911, "learning_rate": 9.911013357037911e-05, "loss": 0.0084, "step": 3424 }, { "epoch": 0.6627321981424149, "grad_norm": 0.052791960537433624, "learning_rate": 9.910959053035461e-05, "loss": 0.0115, "step": 3425 }, { "epoch": 0.6629256965944272, "grad_norm": 0.1042354553937912, "learning_rate": 9.910904732634151e-05, "loss": 0.0089, "step": 3426 }, { "epoch": 0.6631191950464397, "grad_norm": 0.07110077142715454, "learning_rate": 9.910850395834185e-05, "loss": 0.0112, "step": 3427 }, { "epoch": 0.663312693498452, "grad_norm": 0.0891447439789772, "learning_rate": 9.910796042635764e-05, "loss": 0.0116, "step": 3428 }, { "epoch": 0.6635061919504643, "grad_norm": 0.059561364352703094, "learning_rate": 9.910741673039089e-05, "loss": 0.0096, "step": 3429 }, { "epoch": 0.6636996904024768, "grad_norm": 0.06847896426916122, "learning_rate": 9.910687287044365e-05, "loss": 0.0121, "step": 3430 }, { "epoch": 0.6638931888544891, "grad_norm": 0.044746387749910355, "learning_rate": 9.910632884651791e-05, "loss": 0.0111, "step": 3431 }, { "epoch": 0.6640866873065016, "grad_norm": 0.050663553178310394, "learning_rate": 9.910578465861572e-05, "loss": 0.0105, "step": 3432 }, { "epoch": 0.6642801857585139, "grad_norm": 0.056862205266952515, "learning_rate": 9.910524030673909e-05, "loss": 0.0094, "step": 3433 }, { "epoch": 0.6644736842105263, "grad_norm": 0.07849232852458954, "learning_rate": 9.910469579089004e-05, "loss": 0.0128, "step": 3434 }, { "epoch": 0.6646671826625387, "grad_norm": 0.09389214217662811, "learning_rate": 9.910415111107059e-05, "loss": 0.0105, "step": 3435 }, { "epoch": 0.6648606811145511, "grad_norm": 0.07632452249526978, "learning_rate": 9.910360626728277e-05, "loss": 0.0106, "step": 3436 }, { "epoch": 0.6650541795665634, "grad_norm": 0.05681530386209488, "learning_rate": 9.910306125952862e-05, "loss": 0.0084, "step": 3437 }, { "epoch": 0.6652476780185759, "grad_norm": 0.21035611629486084, "learning_rate": 9.910251608781016e-05, "loss": 0.0119, "step": 3438 }, { "epoch": 0.6654411764705882, "grad_norm": 0.057697050273418427, "learning_rate": 9.910197075212941e-05, "loss": 0.0083, "step": 3439 }, { "epoch": 0.6656346749226006, "grad_norm": 0.23778562247753143, "learning_rate": 9.910142525248839e-05, "loss": 0.0115, "step": 3440 }, { "epoch": 0.665828173374613, "grad_norm": 0.05835040658712387, "learning_rate": 9.910087958888915e-05, "loss": 0.0107, "step": 3441 }, { "epoch": 0.6660216718266254, "grad_norm": 0.22179429233074188, "learning_rate": 9.91003337613337e-05, "loss": 0.0098, "step": 3442 }, { "epoch": 0.6662151702786377, "grad_norm": 0.06583717465400696, "learning_rate": 9.909978776982407e-05, "loss": 0.0107, "step": 3443 }, { "epoch": 0.6664086687306502, "grad_norm": 0.18032248318195343, "learning_rate": 9.909924161436231e-05, "loss": 0.0094, "step": 3444 }, { "epoch": 0.6666021671826625, "grad_norm": 0.1599557250738144, "learning_rate": 9.909869529495043e-05, "loss": 0.0128, "step": 3445 }, { "epoch": 0.666795665634675, "grad_norm": 0.21407851576805115, "learning_rate": 9.909814881159046e-05, "loss": 0.0114, "step": 3446 }, { "epoch": 0.6669891640866873, "grad_norm": 0.1433282047510147, "learning_rate": 9.909760216428445e-05, "loss": 0.0115, "step": 3447 }, { "epoch": 0.6671826625386997, "grad_norm": 0.13554859161376953, "learning_rate": 9.909705535303442e-05, "loss": 0.0108, "step": 3448 }, { "epoch": 0.6673761609907121, "grad_norm": 0.15228205919265747, "learning_rate": 9.90965083778424e-05, "loss": 0.0111, "step": 3449 }, { "epoch": 0.6675696594427245, "grad_norm": 0.11628247052431107, "learning_rate": 9.909596123871043e-05, "loss": 0.0112, "step": 3450 }, { "epoch": 0.6677631578947368, "grad_norm": 0.09563539177179337, "learning_rate": 9.909541393564054e-05, "loss": 0.0097, "step": 3451 }, { "epoch": 0.6679566563467493, "grad_norm": 0.08977185189723969, "learning_rate": 9.909486646863475e-05, "loss": 0.0122, "step": 3452 }, { "epoch": 0.6681501547987616, "grad_norm": 0.0852564349770546, "learning_rate": 9.909431883769513e-05, "loss": 0.0084, "step": 3453 }, { "epoch": 0.6683436532507739, "grad_norm": 0.08941066265106201, "learning_rate": 9.909377104282369e-05, "loss": 0.0091, "step": 3454 }, { "epoch": 0.6685371517027864, "grad_norm": 0.08954844623804092, "learning_rate": 9.90932230840225e-05, "loss": 0.0083, "step": 3455 }, { "epoch": 0.6687306501547987, "grad_norm": 0.05357831344008446, "learning_rate": 9.909267496129354e-05, "loss": 0.012, "step": 3456 }, { "epoch": 0.6689241486068112, "grad_norm": 0.1972517967224121, "learning_rate": 9.909212667463888e-05, "loss": 0.0092, "step": 3457 }, { "epoch": 0.6691176470588235, "grad_norm": 0.20547881722450256, "learning_rate": 9.909157822406058e-05, "loss": 0.0114, "step": 3458 }, { "epoch": 0.6693111455108359, "grad_norm": 0.0819290429353714, "learning_rate": 9.909102960956063e-05, "loss": 0.0091, "step": 3459 }, { "epoch": 0.6695046439628483, "grad_norm": 0.11582544445991516, "learning_rate": 9.909048083114111e-05, "loss": 0.0108, "step": 3460 }, { "epoch": 0.6696981424148607, "grad_norm": 0.08656234294176102, "learning_rate": 9.908993188880405e-05, "loss": 0.0104, "step": 3461 }, { "epoch": 0.669891640866873, "grad_norm": 0.08396361023187637, "learning_rate": 9.908938278255149e-05, "loss": 0.0091, "step": 3462 }, { "epoch": 0.6700851393188855, "grad_norm": 0.16473278403282166, "learning_rate": 9.908883351238547e-05, "loss": 0.0115, "step": 3463 }, { "epoch": 0.6702786377708978, "grad_norm": 0.14994105696678162, "learning_rate": 9.908828407830801e-05, "loss": 0.0121, "step": 3464 }, { "epoch": 0.6704721362229102, "grad_norm": 0.1412002146244049, "learning_rate": 9.908773448032118e-05, "loss": 0.0099, "step": 3465 }, { "epoch": 0.6706656346749226, "grad_norm": 0.1746814101934433, "learning_rate": 9.908718471842703e-05, "loss": 0.0113, "step": 3466 }, { "epoch": 0.670859133126935, "grad_norm": 0.20460960268974304, "learning_rate": 9.908663479262758e-05, "loss": 0.0086, "step": 3467 }, { "epoch": 0.6710526315789473, "grad_norm": 0.27406081557273865, "learning_rate": 9.908608470292488e-05, "loss": 0.0108, "step": 3468 }, { "epoch": 0.6712461300309598, "grad_norm": 0.30186787247657776, "learning_rate": 9.9085534449321e-05, "loss": 0.0113, "step": 3469 }, { "epoch": 0.6714396284829721, "grad_norm": 0.19864031672477722, "learning_rate": 9.908498403181795e-05, "loss": 0.0102, "step": 3470 }, { "epoch": 0.6716331269349846, "grad_norm": 0.19154107570648193, "learning_rate": 9.90844334504178e-05, "loss": 0.0112, "step": 3471 }, { "epoch": 0.6718266253869969, "grad_norm": 0.2146916687488556, "learning_rate": 9.908388270512259e-05, "loss": 0.0111, "step": 3472 }, { "epoch": 0.6720201238390093, "grad_norm": 0.08275813609361649, "learning_rate": 9.908333179593437e-05, "loss": 0.0082, "step": 3473 }, { "epoch": 0.6722136222910217, "grad_norm": 0.2469290941953659, "learning_rate": 9.908278072285518e-05, "loss": 0.0103, "step": 3474 }, { "epoch": 0.6724071207430341, "grad_norm": 0.1771448850631714, "learning_rate": 9.908222948588705e-05, "loss": 0.0103, "step": 3475 }, { "epoch": 0.6726006191950464, "grad_norm": 0.23309190571308136, "learning_rate": 9.908167808503208e-05, "loss": 0.0109, "step": 3476 }, { "epoch": 0.6727941176470589, "grad_norm": 0.1555764377117157, "learning_rate": 9.908112652029227e-05, "loss": 0.0098, "step": 3477 }, { "epoch": 0.6729876160990712, "grad_norm": 0.0615248866379261, "learning_rate": 9.908057479166974e-05, "loss": 0.0115, "step": 3478 }, { "epoch": 0.6731811145510835, "grad_norm": 0.19485782086849213, "learning_rate": 9.908002289916646e-05, "loss": 0.0117, "step": 3479 }, { "epoch": 0.673374613003096, "grad_norm": 0.07839665561914444, "learning_rate": 9.907947084278451e-05, "loss": 0.0106, "step": 3480 }, { "epoch": 0.6735681114551083, "grad_norm": 0.19670872390270233, "learning_rate": 9.907891862252596e-05, "loss": 0.0115, "step": 3481 }, { "epoch": 0.6737616099071208, "grad_norm": 0.08127611875534058, "learning_rate": 9.907836623839285e-05, "loss": 0.0127, "step": 3482 }, { "epoch": 0.6739551083591331, "grad_norm": 0.19188344478607178, "learning_rate": 9.907781369038725e-05, "loss": 0.0133, "step": 3483 }, { "epoch": 0.6741486068111455, "grad_norm": 0.054468631744384766, "learning_rate": 9.907726097851118e-05, "loss": 0.0103, "step": 3484 }, { "epoch": 0.6743421052631579, "grad_norm": 0.1682313084602356, "learning_rate": 9.907670810276671e-05, "loss": 0.0116, "step": 3485 }, { "epoch": 0.6745356037151703, "grad_norm": 0.045897237956523895, "learning_rate": 9.907615506315592e-05, "loss": 0.009, "step": 3486 }, { "epoch": 0.6747291021671826, "grad_norm": 0.12614485621452332, "learning_rate": 9.907560185968084e-05, "loss": 0.0108, "step": 3487 }, { "epoch": 0.6749226006191951, "grad_norm": 0.08078788220882416, "learning_rate": 9.907504849234352e-05, "loss": 0.0095, "step": 3488 }, { "epoch": 0.6751160990712074, "grad_norm": 0.11664276570081711, "learning_rate": 9.907449496114605e-05, "loss": 0.0108, "step": 3489 }, { "epoch": 0.6753095975232198, "grad_norm": 0.10945666581392288, "learning_rate": 9.907394126609045e-05, "loss": 0.01, "step": 3490 }, { "epoch": 0.6755030959752322, "grad_norm": 0.07462132722139359, "learning_rate": 9.907338740717881e-05, "loss": 0.0098, "step": 3491 }, { "epoch": 0.6756965944272446, "grad_norm": 0.1163306012749672, "learning_rate": 9.907283338441317e-05, "loss": 0.0089, "step": 3492 }, { "epoch": 0.6758900928792569, "grad_norm": 0.0648178979754448, "learning_rate": 9.907227919779561e-05, "loss": 0.0102, "step": 3493 }, { "epoch": 0.6760835913312694, "grad_norm": 0.08543790131807327, "learning_rate": 9.907172484732816e-05, "loss": 0.0104, "step": 3494 }, { "epoch": 0.6762770897832817, "grad_norm": 0.07641064375638962, "learning_rate": 9.907117033301289e-05, "loss": 0.0132, "step": 3495 }, { "epoch": 0.6764705882352942, "grad_norm": 0.053577639162540436, "learning_rate": 9.907061565485187e-05, "loss": 0.0099, "step": 3496 }, { "epoch": 0.6766640866873065, "grad_norm": 0.07636246830224991, "learning_rate": 9.907006081284716e-05, "loss": 0.0102, "step": 3497 }, { "epoch": 0.6768575851393189, "grad_norm": 0.05855518952012062, "learning_rate": 9.906950580700084e-05, "loss": 0.0102, "step": 3498 }, { "epoch": 0.6770510835913313, "grad_norm": 0.03434152901172638, "learning_rate": 9.906895063731494e-05, "loss": 0.0091, "step": 3499 }, { "epoch": 0.6772445820433437, "grad_norm": 0.07616118341684341, "learning_rate": 9.906839530379155e-05, "loss": 0.0098, "step": 3500 }, { "epoch": 0.677438080495356, "grad_norm": 0.0553869903087616, "learning_rate": 9.906783980643273e-05, "loss": 0.011, "step": 3501 }, { "epoch": 0.6776315789473685, "grad_norm": 0.044917032122612, "learning_rate": 9.906728414524052e-05, "loss": 0.0092, "step": 3502 }, { "epoch": 0.6778250773993808, "grad_norm": 0.03514128550887108, "learning_rate": 9.906672832021702e-05, "loss": 0.0109, "step": 3503 }, { "epoch": 0.6780185758513931, "grad_norm": 0.04188503324985504, "learning_rate": 9.906617233136428e-05, "loss": 0.0091, "step": 3504 }, { "epoch": 0.6782120743034056, "grad_norm": 0.04579344019293785, "learning_rate": 9.906561617868437e-05, "loss": 0.0096, "step": 3505 }, { "epoch": 0.6784055727554179, "grad_norm": 0.08862803131341934, "learning_rate": 9.906505986217936e-05, "loss": 0.0124, "step": 3506 }, { "epoch": 0.6785990712074303, "grad_norm": 0.07657068967819214, "learning_rate": 9.906450338185131e-05, "loss": 0.0085, "step": 3507 }, { "epoch": 0.6787925696594427, "grad_norm": 0.1349102407693863, "learning_rate": 9.90639467377023e-05, "loss": 0.0099, "step": 3508 }, { "epoch": 0.6789860681114551, "grad_norm": 0.08673150837421417, "learning_rate": 9.906338992973439e-05, "loss": 0.0094, "step": 3509 }, { "epoch": 0.6791795665634675, "grad_norm": 0.19489838182926178, "learning_rate": 9.906283295794965e-05, "loss": 0.0109, "step": 3510 }, { "epoch": 0.6793730650154799, "grad_norm": 0.08817912638187408, "learning_rate": 9.906227582235016e-05, "loss": 0.0104, "step": 3511 }, { "epoch": 0.6795665634674922, "grad_norm": 0.18444685637950897, "learning_rate": 9.906171852293797e-05, "loss": 0.0108, "step": 3512 }, { "epoch": 0.6797600619195047, "grad_norm": 0.1761232167482376, "learning_rate": 9.906116105971519e-05, "loss": 0.0104, "step": 3513 }, { "epoch": 0.679953560371517, "grad_norm": 0.1310534030199051, "learning_rate": 9.906060343268386e-05, "loss": 0.0117, "step": 3514 }, { "epoch": 0.6801470588235294, "grad_norm": 0.21672746539115906, "learning_rate": 9.906004564184606e-05, "loss": 0.0092, "step": 3515 }, { "epoch": 0.6803405572755418, "grad_norm": 0.0869886502623558, "learning_rate": 9.905948768720388e-05, "loss": 0.0122, "step": 3516 }, { "epoch": 0.6805340557275542, "grad_norm": 0.1835324764251709, "learning_rate": 9.905892956875937e-05, "loss": 0.0097, "step": 3517 }, { "epoch": 0.6807275541795665, "grad_norm": 0.09842576831579208, "learning_rate": 9.905837128651462e-05, "loss": 0.0104, "step": 3518 }, { "epoch": 0.680921052631579, "grad_norm": 0.17802748084068298, "learning_rate": 9.90578128404717e-05, "loss": 0.0108, "step": 3519 }, { "epoch": 0.6811145510835913, "grad_norm": 0.1129348948597908, "learning_rate": 9.905725423063269e-05, "loss": 0.0099, "step": 3520 }, { "epoch": 0.6813080495356038, "grad_norm": 0.07765989005565643, "learning_rate": 9.905669545699965e-05, "loss": 0.0113, "step": 3521 }, { "epoch": 0.6815015479876161, "grad_norm": 0.13503015041351318, "learning_rate": 9.90561365195747e-05, "loss": 0.0112, "step": 3522 }, { "epoch": 0.6816950464396285, "grad_norm": 0.08228190988302231, "learning_rate": 9.905557741835988e-05, "loss": 0.0122, "step": 3523 }, { "epoch": 0.6818885448916409, "grad_norm": 0.12602782249450684, "learning_rate": 9.905501815335727e-05, "loss": 0.0116, "step": 3524 }, { "epoch": 0.6820820433436533, "grad_norm": 0.09661032259464264, "learning_rate": 9.905445872456895e-05, "loss": 0.0123, "step": 3525 }, { "epoch": 0.6822755417956656, "grad_norm": 0.09987922757863998, "learning_rate": 9.905389913199703e-05, "loss": 0.0091, "step": 3526 }, { "epoch": 0.6824690402476781, "grad_norm": 0.13960085809230804, "learning_rate": 9.905333937564356e-05, "loss": 0.0114, "step": 3527 }, { "epoch": 0.6826625386996904, "grad_norm": 0.09725375473499298, "learning_rate": 9.905277945551062e-05, "loss": 0.0126, "step": 3528 }, { "epoch": 0.6828560371517027, "grad_norm": 0.1035713255405426, "learning_rate": 9.905221937160033e-05, "loss": 0.0091, "step": 3529 }, { "epoch": 0.6830495356037152, "grad_norm": 0.10914602130651474, "learning_rate": 9.905165912391469e-05, "loss": 0.0112, "step": 3530 }, { "epoch": 0.6832430340557275, "grad_norm": 0.09757322818040848, "learning_rate": 9.905109871245586e-05, "loss": 0.0096, "step": 3531 }, { "epoch": 0.68343653250774, "grad_norm": 0.07484789192676544, "learning_rate": 9.905053813722592e-05, "loss": 0.0102, "step": 3532 }, { "epoch": 0.6836300309597523, "grad_norm": 0.09524381905794144, "learning_rate": 9.904997739822692e-05, "loss": 0.0115, "step": 3533 }, { "epoch": 0.6838235294117647, "grad_norm": 0.1615273356437683, "learning_rate": 9.904941649546097e-05, "loss": 0.0104, "step": 3534 }, { "epoch": 0.684017027863777, "grad_norm": 0.1164022907614708, "learning_rate": 9.904885542893012e-05, "loss": 0.0109, "step": 3535 }, { "epoch": 0.6842105263157895, "grad_norm": 0.11870758980512619, "learning_rate": 9.90482941986365e-05, "loss": 0.0097, "step": 3536 }, { "epoch": 0.6844040247678018, "grad_norm": 0.16965508460998535, "learning_rate": 9.904773280458215e-05, "loss": 0.0104, "step": 3537 }, { "epoch": 0.6845975232198143, "grad_norm": 0.05535132437944412, "learning_rate": 9.904717124676922e-05, "loss": 0.0088, "step": 3538 }, { "epoch": 0.6847910216718266, "grad_norm": 0.20006287097930908, "learning_rate": 9.904660952519974e-05, "loss": 0.0087, "step": 3539 }, { "epoch": 0.684984520123839, "grad_norm": 0.04987632855772972, "learning_rate": 9.904604763987582e-05, "loss": 0.0116, "step": 3540 }, { "epoch": 0.6851780185758514, "grad_norm": 0.21929492056369781, "learning_rate": 9.904548559079954e-05, "loss": 0.01, "step": 3541 }, { "epoch": 0.6853715170278638, "grad_norm": 0.10525878518819809, "learning_rate": 9.904492337797301e-05, "loss": 0.0083, "step": 3542 }, { "epoch": 0.6855650154798761, "grad_norm": 0.1652296632528305, "learning_rate": 9.90443610013983e-05, "loss": 0.0114, "step": 3543 }, { "epoch": 0.6857585139318886, "grad_norm": 0.07395172119140625, "learning_rate": 9.904379846107752e-05, "loss": 0.0108, "step": 3544 }, { "epoch": 0.6859520123839009, "grad_norm": 0.157875195145607, "learning_rate": 9.904323575701274e-05, "loss": 0.011, "step": 3545 }, { "epoch": 0.6861455108359134, "grad_norm": 0.07051194459199905, "learning_rate": 9.904267288920606e-05, "loss": 0.012, "step": 3546 }, { "epoch": 0.6863390092879257, "grad_norm": 0.12749791145324707, "learning_rate": 9.904210985765958e-05, "loss": 0.0084, "step": 3547 }, { "epoch": 0.6865325077399381, "grad_norm": 0.09398259222507477, "learning_rate": 9.904154666237539e-05, "loss": 0.0096, "step": 3548 }, { "epoch": 0.6867260061919505, "grad_norm": 0.11914961040019989, "learning_rate": 9.904098330335557e-05, "loss": 0.0102, "step": 3549 }, { "epoch": 0.6869195046439629, "grad_norm": 0.136818990111351, "learning_rate": 9.904041978060223e-05, "loss": 0.0111, "step": 3550 }, { "epoch": 0.6871130030959752, "grad_norm": 0.07880650460720062, "learning_rate": 9.903985609411748e-05, "loss": 0.0113, "step": 3551 }, { "epoch": 0.6873065015479877, "grad_norm": 0.13022659718990326, "learning_rate": 9.903929224390338e-05, "loss": 0.0106, "step": 3552 }, { "epoch": 0.6875, "grad_norm": 0.08564159274101257, "learning_rate": 9.903872822996205e-05, "loss": 0.012, "step": 3553 }, { "epoch": 0.6876934984520123, "grad_norm": 0.08249378204345703, "learning_rate": 9.903816405229556e-05, "loss": 0.0102, "step": 3554 }, { "epoch": 0.6878869969040248, "grad_norm": 0.08956045657396317, "learning_rate": 9.903759971090605e-05, "loss": 0.0104, "step": 3555 }, { "epoch": 0.6880804953560371, "grad_norm": 0.05304498225450516, "learning_rate": 9.903703520579559e-05, "loss": 0.0101, "step": 3556 }, { "epoch": 0.6882739938080495, "grad_norm": 0.06298129260540009, "learning_rate": 9.903647053696628e-05, "loss": 0.0111, "step": 3557 }, { "epoch": 0.6884674922600619, "grad_norm": 0.04316902533173561, "learning_rate": 9.903590570442022e-05, "loss": 0.0104, "step": 3558 }, { "epoch": 0.6886609907120743, "grad_norm": 0.06761622428894043, "learning_rate": 9.903534070815952e-05, "loss": 0.0093, "step": 3559 }, { "epoch": 0.6888544891640866, "grad_norm": 0.029679853469133377, "learning_rate": 9.903477554818627e-05, "loss": 0.0119, "step": 3560 }, { "epoch": 0.6890479876160991, "grad_norm": 0.06121348589658737, "learning_rate": 9.903421022450257e-05, "loss": 0.0097, "step": 3561 }, { "epoch": 0.6892414860681114, "grad_norm": 0.05048675462603569, "learning_rate": 9.903364473711054e-05, "loss": 0.0123, "step": 3562 }, { "epoch": 0.6894349845201239, "grad_norm": 0.0970686599612236, "learning_rate": 9.903307908601226e-05, "loss": 0.0101, "step": 3563 }, { "epoch": 0.6896284829721362, "grad_norm": 0.05035659670829773, "learning_rate": 9.903251327120985e-05, "loss": 0.0124, "step": 3564 }, { "epoch": 0.6898219814241486, "grad_norm": 0.06779836118221283, "learning_rate": 9.903194729270541e-05, "loss": 0.0115, "step": 3565 }, { "epoch": 0.690015479876161, "grad_norm": 0.08598779886960983, "learning_rate": 9.903138115050102e-05, "loss": 0.012, "step": 3566 }, { "epoch": 0.6902089783281734, "grad_norm": 0.18272341787815094, "learning_rate": 9.903081484459882e-05, "loss": 0.0109, "step": 3567 }, { "epoch": 0.6904024767801857, "grad_norm": 0.048961341381073, "learning_rate": 9.903024837500088e-05, "loss": 0.0103, "step": 3568 }, { "epoch": 0.6905959752321982, "grad_norm": 0.19061018526554108, "learning_rate": 9.902968174170935e-05, "loss": 0.0094, "step": 3569 }, { "epoch": 0.6907894736842105, "grad_norm": 0.10777536779642105, "learning_rate": 9.902911494472631e-05, "loss": 0.01, "step": 3570 }, { "epoch": 0.690982972136223, "grad_norm": 0.3217037618160248, "learning_rate": 9.902854798405386e-05, "loss": 0.0131, "step": 3571 }, { "epoch": 0.6911764705882353, "grad_norm": 0.10178004950284958, "learning_rate": 9.902798085969411e-05, "loss": 0.0137, "step": 3572 }, { "epoch": 0.6913699690402477, "grad_norm": 0.1851908564567566, "learning_rate": 9.90274135716492e-05, "loss": 0.0107, "step": 3573 }, { "epoch": 0.69156346749226, "grad_norm": 0.10927101224660873, "learning_rate": 9.90268461199212e-05, "loss": 0.0108, "step": 3574 }, { "epoch": 0.6917569659442725, "grad_norm": 0.1685798466205597, "learning_rate": 9.902627850451223e-05, "loss": 0.0103, "step": 3575 }, { "epoch": 0.6919504643962848, "grad_norm": 0.14501537382602692, "learning_rate": 9.90257107254244e-05, "loss": 0.0111, "step": 3576 }, { "epoch": 0.6921439628482973, "grad_norm": 0.13527636229991913, "learning_rate": 9.902514278265985e-05, "loss": 0.0101, "step": 3577 }, { "epoch": 0.6923374613003096, "grad_norm": 0.17713287472724915, "learning_rate": 9.902457467622067e-05, "loss": 0.0105, "step": 3578 }, { "epoch": 0.6925309597523219, "grad_norm": 0.11546964198350906, "learning_rate": 9.902400640610893e-05, "loss": 0.0108, "step": 3579 }, { "epoch": 0.6927244582043344, "grad_norm": 0.15686343610286713, "learning_rate": 9.90234379723268e-05, "loss": 0.01, "step": 3580 }, { "epoch": 0.6929179566563467, "grad_norm": 0.11959308385848999, "learning_rate": 9.902286937487638e-05, "loss": 0.0139, "step": 3581 }, { "epoch": 0.6931114551083591, "grad_norm": 0.12787631154060364, "learning_rate": 9.902230061375978e-05, "loss": 0.0127, "step": 3582 }, { "epoch": 0.6933049535603715, "grad_norm": 0.1000993624329567, "learning_rate": 9.902173168897911e-05, "loss": 0.0106, "step": 3583 }, { "epoch": 0.6934984520123839, "grad_norm": 0.1195487529039383, "learning_rate": 9.90211626005365e-05, "loss": 0.0107, "step": 3584 }, { "epoch": 0.6936919504643962, "grad_norm": 0.10115063935518265, "learning_rate": 9.902059334843405e-05, "loss": 0.0104, "step": 3585 }, { "epoch": 0.6938854489164087, "grad_norm": 0.06483669579029083, "learning_rate": 9.902002393267386e-05, "loss": 0.0095, "step": 3586 }, { "epoch": 0.694078947368421, "grad_norm": 0.15675538778305054, "learning_rate": 9.90194543532581e-05, "loss": 0.0106, "step": 3587 }, { "epoch": 0.6942724458204335, "grad_norm": 0.05042522773146629, "learning_rate": 9.901888461018883e-05, "loss": 0.0088, "step": 3588 }, { "epoch": 0.6944659442724458, "grad_norm": 0.14291875064373016, "learning_rate": 9.90183147034682e-05, "loss": 0.0095, "step": 3589 }, { "epoch": 0.6946594427244582, "grad_norm": 0.09847474843263626, "learning_rate": 9.901774463309834e-05, "loss": 0.0115, "step": 3590 }, { "epoch": 0.6948529411764706, "grad_norm": 0.12283901870250702, "learning_rate": 9.901717439908134e-05, "loss": 0.0133, "step": 3591 }, { "epoch": 0.695046439628483, "grad_norm": 0.11322420090436935, "learning_rate": 9.901660400141932e-05, "loss": 0.0096, "step": 3592 }, { "epoch": 0.6952399380804953, "grad_norm": 0.09444281458854675, "learning_rate": 9.901603344011444e-05, "loss": 0.0099, "step": 3593 }, { "epoch": 0.6954334365325078, "grad_norm": 0.1407560259103775, "learning_rate": 9.901546271516877e-05, "loss": 0.011, "step": 3594 }, { "epoch": 0.6956269349845201, "grad_norm": 0.0433889739215374, "learning_rate": 9.901489182658445e-05, "loss": 0.0099, "step": 3595 }, { "epoch": 0.6958204334365325, "grad_norm": 0.1338595449924469, "learning_rate": 9.901432077436361e-05, "loss": 0.0113, "step": 3596 }, { "epoch": 0.6960139318885449, "grad_norm": 0.04454302042722702, "learning_rate": 9.901374955850838e-05, "loss": 0.01, "step": 3597 }, { "epoch": 0.6962074303405573, "grad_norm": 0.10330282151699066, "learning_rate": 9.901317817902088e-05, "loss": 0.012, "step": 3598 }, { "epoch": 0.6964009287925697, "grad_norm": 0.0831853374838829, "learning_rate": 9.901260663590322e-05, "loss": 0.0091, "step": 3599 }, { "epoch": 0.6965944272445821, "grad_norm": 0.060303401201963425, "learning_rate": 9.901203492915754e-05, "loss": 0.0115, "step": 3600 }, { "epoch": 0.6967879256965944, "grad_norm": 0.050287745893001556, "learning_rate": 9.901146305878595e-05, "loss": 0.0108, "step": 3601 }, { "epoch": 0.6969814241486069, "grad_norm": 0.13364693522453308, "learning_rate": 9.901089102479059e-05, "loss": 0.0116, "step": 3602 }, { "epoch": 0.6971749226006192, "grad_norm": 0.08521753549575806, "learning_rate": 9.901031882717358e-05, "loss": 0.0119, "step": 3603 }, { "epoch": 0.6973684210526315, "grad_norm": 0.1260199248790741, "learning_rate": 9.900974646593705e-05, "loss": 0.0093, "step": 3604 }, { "epoch": 0.697561919504644, "grad_norm": 0.04237549751996994, "learning_rate": 9.900917394108312e-05, "loss": 0.0073, "step": 3605 }, { "epoch": 0.6977554179566563, "grad_norm": 0.1455662101507187, "learning_rate": 9.900860125261394e-05, "loss": 0.0107, "step": 3606 }, { "epoch": 0.6979489164086687, "grad_norm": 0.12838777899742126, "learning_rate": 9.900802840053161e-05, "loss": 0.0107, "step": 3607 }, { "epoch": 0.6981424148606811, "grad_norm": 0.14328525960445404, "learning_rate": 9.900745538483829e-05, "loss": 0.0101, "step": 3608 }, { "epoch": 0.6983359133126935, "grad_norm": 0.1625002920627594, "learning_rate": 9.900688220553607e-05, "loss": 0.0109, "step": 3609 }, { "epoch": 0.6985294117647058, "grad_norm": 0.12805211544036865, "learning_rate": 9.900630886262713e-05, "loss": 0.0082, "step": 3610 }, { "epoch": 0.6987229102167183, "grad_norm": 0.21412548422813416, "learning_rate": 9.900573535611357e-05, "loss": 0.0134, "step": 3611 }, { "epoch": 0.6989164086687306, "grad_norm": 0.10527002066373825, "learning_rate": 9.900516168599752e-05, "loss": 0.0084, "step": 3612 }, { "epoch": 0.6991099071207431, "grad_norm": 0.1989654004573822, "learning_rate": 9.900458785228114e-05, "loss": 0.0096, "step": 3613 }, { "epoch": 0.6993034055727554, "grad_norm": 0.06466936320066452, "learning_rate": 9.900401385496652e-05, "loss": 0.0119, "step": 3614 }, { "epoch": 0.6994969040247678, "grad_norm": 0.17559662461280823, "learning_rate": 9.900343969405585e-05, "loss": 0.0082, "step": 3615 }, { "epoch": 0.6996904024767802, "grad_norm": 0.07574406266212463, "learning_rate": 9.900286536955122e-05, "loss": 0.0088, "step": 3616 }, { "epoch": 0.6998839009287926, "grad_norm": 0.14626333117485046, "learning_rate": 9.900229088145476e-05, "loss": 0.0119, "step": 3617 }, { "epoch": 0.7000773993808049, "grad_norm": 0.08616802096366882, "learning_rate": 9.900171622976865e-05, "loss": 0.0113, "step": 3618 }, { "epoch": 0.7002708978328174, "grad_norm": 0.11281588673591614, "learning_rate": 9.9001141414495e-05, "loss": 0.0129, "step": 3619 }, { "epoch": 0.7004643962848297, "grad_norm": 0.09189508110284805, "learning_rate": 9.900056643563594e-05, "loss": 0.0094, "step": 3620 }, { "epoch": 0.7006578947368421, "grad_norm": 0.11531040817499161, "learning_rate": 9.899999129319361e-05, "loss": 0.0101, "step": 3621 }, { "epoch": 0.7008513931888545, "grad_norm": 0.07718486338853836, "learning_rate": 9.899941598717017e-05, "loss": 0.01, "step": 3622 }, { "epoch": 0.7010448916408669, "grad_norm": 0.09381019324064255, "learning_rate": 9.899884051756773e-05, "loss": 0.0114, "step": 3623 }, { "epoch": 0.7012383900928792, "grad_norm": 0.06389732658863068, "learning_rate": 9.899826488438845e-05, "loss": 0.0112, "step": 3624 }, { "epoch": 0.7014318885448917, "grad_norm": 0.05660024285316467, "learning_rate": 9.899768908763446e-05, "loss": 0.0096, "step": 3625 }, { "epoch": 0.701625386996904, "grad_norm": 0.10150446742773056, "learning_rate": 9.899711312730791e-05, "loss": 0.0114, "step": 3626 }, { "epoch": 0.7018188854489165, "grad_norm": 0.06656082719564438, "learning_rate": 9.899653700341092e-05, "loss": 0.0114, "step": 3627 }, { "epoch": 0.7020123839009288, "grad_norm": 0.09818556159734726, "learning_rate": 9.899596071594566e-05, "loss": 0.012, "step": 3628 }, { "epoch": 0.7022058823529411, "grad_norm": 0.11415811628103256, "learning_rate": 9.899538426491425e-05, "loss": 0.0105, "step": 3629 }, { "epoch": 0.7023993808049536, "grad_norm": 0.06321275979280472, "learning_rate": 9.899480765031885e-05, "loss": 0.0101, "step": 3630 }, { "epoch": 0.7025928792569659, "grad_norm": 0.12163747102022171, "learning_rate": 9.899423087216159e-05, "loss": 0.0121, "step": 3631 }, { "epoch": 0.7027863777089783, "grad_norm": 0.0714084580540657, "learning_rate": 9.899365393044462e-05, "loss": 0.0092, "step": 3632 }, { "epoch": 0.7029798761609907, "grad_norm": 0.09705384820699692, "learning_rate": 9.899307682517008e-05, "loss": 0.0106, "step": 3633 }, { "epoch": 0.7031733746130031, "grad_norm": 0.11597470939159393, "learning_rate": 9.899249955634013e-05, "loss": 0.0108, "step": 3634 }, { "epoch": 0.7033668730650154, "grad_norm": 0.07767399400472641, "learning_rate": 9.89919221239569e-05, "loss": 0.0103, "step": 3635 }, { "epoch": 0.7035603715170279, "grad_norm": 0.13882547616958618, "learning_rate": 9.899134452802254e-05, "loss": 0.0107, "step": 3636 }, { "epoch": 0.7037538699690402, "grad_norm": 0.06060343608260155, "learning_rate": 9.899076676853922e-05, "loss": 0.0111, "step": 3637 }, { "epoch": 0.7039473684210527, "grad_norm": 0.10629437863826752, "learning_rate": 9.899018884550905e-05, "loss": 0.011, "step": 3638 }, { "epoch": 0.704140866873065, "grad_norm": 0.10590290278196335, "learning_rate": 9.89896107589342e-05, "loss": 0.0091, "step": 3639 }, { "epoch": 0.7043343653250774, "grad_norm": 0.06374381482601166, "learning_rate": 9.898903250881684e-05, "loss": 0.0094, "step": 3640 }, { "epoch": 0.7045278637770898, "grad_norm": 0.08291862905025482, "learning_rate": 9.898845409515907e-05, "loss": 0.0102, "step": 3641 }, { "epoch": 0.7047213622291022, "grad_norm": 0.04229862242937088, "learning_rate": 9.898787551796307e-05, "loss": 0.0113, "step": 3642 }, { "epoch": 0.7049148606811145, "grad_norm": 0.04263349995017052, "learning_rate": 9.8987296777231e-05, "loss": 0.0111, "step": 3643 }, { "epoch": 0.705108359133127, "grad_norm": 0.06340764462947845, "learning_rate": 9.898671787296497e-05, "loss": 0.0085, "step": 3644 }, { "epoch": 0.7053018575851393, "grad_norm": 0.13432134687900543, "learning_rate": 9.89861388051672e-05, "loss": 0.0105, "step": 3645 }, { "epoch": 0.7054953560371517, "grad_norm": 0.07155019044876099, "learning_rate": 9.898555957383977e-05, "loss": 0.0101, "step": 3646 }, { "epoch": 0.7056888544891641, "grad_norm": 0.14691564440727234, "learning_rate": 9.898498017898489e-05, "loss": 0.0095, "step": 3647 }, { "epoch": 0.7058823529411765, "grad_norm": 0.056882526725530624, "learning_rate": 9.898440062060468e-05, "loss": 0.0112, "step": 3648 }, { "epoch": 0.7060758513931888, "grad_norm": 0.12946303188800812, "learning_rate": 9.898382089870131e-05, "loss": 0.0102, "step": 3649 }, { "epoch": 0.7062693498452013, "grad_norm": 0.17677250504493713, "learning_rate": 9.898324101327693e-05, "loss": 0.01, "step": 3650 }, { "epoch": 0.7064628482972136, "grad_norm": 0.13310179114341736, "learning_rate": 9.898266096433372e-05, "loss": 0.012, "step": 3651 }, { "epoch": 0.7066563467492261, "grad_norm": 0.2479483187198639, "learning_rate": 9.898208075187378e-05, "loss": 0.0109, "step": 3652 }, { "epoch": 0.7068498452012384, "grad_norm": 0.08787466585636139, "learning_rate": 9.898150037589933e-05, "loss": 0.0104, "step": 3653 }, { "epoch": 0.7070433436532507, "grad_norm": 0.22955799102783203, "learning_rate": 9.898091983641247e-05, "loss": 0.01, "step": 3654 }, { "epoch": 0.7072368421052632, "grad_norm": 0.12598969042301178, "learning_rate": 9.89803391334154e-05, "loss": 0.0094, "step": 3655 }, { "epoch": 0.7074303405572755, "grad_norm": 0.1436539739370346, "learning_rate": 9.89797582669103e-05, "loss": 0.011, "step": 3656 }, { "epoch": 0.7076238390092879, "grad_norm": 0.14686135947704315, "learning_rate": 9.897917723689925e-05, "loss": 0.0091, "step": 3657 }, { "epoch": 0.7078173374613003, "grad_norm": 0.07632743567228317, "learning_rate": 9.897859604338448e-05, "loss": 0.0114, "step": 3658 }, { "epoch": 0.7080108359133127, "grad_norm": 0.11549196392297745, "learning_rate": 9.89780146863681e-05, "loss": 0.0089, "step": 3659 }, { "epoch": 0.708204334365325, "grad_norm": 0.1047380343079567, "learning_rate": 9.897743316585233e-05, "loss": 0.0097, "step": 3660 }, { "epoch": 0.7083978328173375, "grad_norm": 0.04935387894511223, "learning_rate": 9.897685148183927e-05, "loss": 0.0095, "step": 3661 }, { "epoch": 0.7085913312693498, "grad_norm": 0.12268846482038498, "learning_rate": 9.897626963433113e-05, "loss": 0.01, "step": 3662 }, { "epoch": 0.7087848297213623, "grad_norm": 0.05873687565326691, "learning_rate": 9.897568762333005e-05, "loss": 0.0104, "step": 3663 }, { "epoch": 0.7089783281733746, "grad_norm": 0.18908075988292694, "learning_rate": 9.89751054488382e-05, "loss": 0.0111, "step": 3664 }, { "epoch": 0.709171826625387, "grad_norm": 0.09571059793233871, "learning_rate": 9.897452311085775e-05, "loss": 0.0102, "step": 3665 }, { "epoch": 0.7093653250773994, "grad_norm": 0.20542512834072113, "learning_rate": 9.897394060939086e-05, "loss": 0.0115, "step": 3666 }, { "epoch": 0.7095588235294118, "grad_norm": 0.08884265273809433, "learning_rate": 9.897335794443968e-05, "loss": 0.0128, "step": 3667 }, { "epoch": 0.7097523219814241, "grad_norm": 0.21619156002998352, "learning_rate": 9.89727751160064e-05, "loss": 0.0111, "step": 3668 }, { "epoch": 0.7099458204334366, "grad_norm": 0.09536788612604141, "learning_rate": 9.897219212409318e-05, "loss": 0.0114, "step": 3669 }, { "epoch": 0.7101393188854489, "grad_norm": 0.18781571090221405, "learning_rate": 9.897160896870218e-05, "loss": 0.0111, "step": 3670 }, { "epoch": 0.7103328173374613, "grad_norm": 0.08389565348625183, "learning_rate": 9.897102564983557e-05, "loss": 0.0079, "step": 3671 }, { "epoch": 0.7105263157894737, "grad_norm": 0.16036522388458252, "learning_rate": 9.897044216749553e-05, "loss": 0.0117, "step": 3672 }, { "epoch": 0.7107198142414861, "grad_norm": 0.16051451861858368, "learning_rate": 9.896985852168423e-05, "loss": 0.0084, "step": 3673 }, { "epoch": 0.7109133126934984, "grad_norm": 0.13587722182273865, "learning_rate": 9.896927471240382e-05, "loss": 0.0112, "step": 3674 }, { "epoch": 0.7111068111455109, "grad_norm": 0.23176130652427673, "learning_rate": 9.896869073965645e-05, "loss": 0.0111, "step": 3675 }, { "epoch": 0.7113003095975232, "grad_norm": 0.1055009514093399, "learning_rate": 9.896810660344435e-05, "loss": 0.0094, "step": 3676 }, { "epoch": 0.7114938080495357, "grad_norm": 0.21951350569725037, "learning_rate": 9.896752230376967e-05, "loss": 0.0088, "step": 3677 }, { "epoch": 0.711687306501548, "grad_norm": 0.19486932456493378, "learning_rate": 9.896693784063458e-05, "loss": 0.01, "step": 3678 }, { "epoch": 0.7118808049535603, "grad_norm": 0.20816653966903687, "learning_rate": 9.896635321404121e-05, "loss": 0.0117, "step": 3679 }, { "epoch": 0.7120743034055728, "grad_norm": 0.2280758023262024, "learning_rate": 9.896576842399181e-05, "loss": 0.0103, "step": 3680 }, { "epoch": 0.7122678018575851, "grad_norm": 0.1747799664735794, "learning_rate": 9.896518347048852e-05, "loss": 0.0087, "step": 3681 }, { "epoch": 0.7124613003095975, "grad_norm": 0.21065524220466614, "learning_rate": 9.896459835353349e-05, "loss": 0.0091, "step": 3682 }, { "epoch": 0.7126547987616099, "grad_norm": 0.12941980361938477, "learning_rate": 9.896401307312893e-05, "loss": 0.0098, "step": 3683 }, { "epoch": 0.7128482972136223, "grad_norm": 0.19561393558979034, "learning_rate": 9.8963427629277e-05, "loss": 0.0099, "step": 3684 }, { "epoch": 0.7130417956656346, "grad_norm": 0.08088993281126022, "learning_rate": 9.896284202197986e-05, "loss": 0.0118, "step": 3685 }, { "epoch": 0.7132352941176471, "grad_norm": 0.1548638790845871, "learning_rate": 9.896225625123973e-05, "loss": 0.0105, "step": 3686 }, { "epoch": 0.7134287925696594, "grad_norm": 0.08164545893669128, "learning_rate": 9.896167031705875e-05, "loss": 0.0116, "step": 3687 }, { "epoch": 0.7136222910216719, "grad_norm": 0.16655343770980835, "learning_rate": 9.896108421943911e-05, "loss": 0.0099, "step": 3688 }, { "epoch": 0.7138157894736842, "grad_norm": 0.07126446813344955, "learning_rate": 9.896049795838301e-05, "loss": 0.0099, "step": 3689 }, { "epoch": 0.7140092879256966, "grad_norm": 0.1272760033607483, "learning_rate": 9.89599115338926e-05, "loss": 0.0106, "step": 3690 }, { "epoch": 0.714202786377709, "grad_norm": 0.11045882105827332, "learning_rate": 9.895932494597006e-05, "loss": 0.0111, "step": 3691 }, { "epoch": 0.7143962848297214, "grad_norm": 0.09580165147781372, "learning_rate": 9.89587381946176e-05, "loss": 0.0085, "step": 3692 }, { "epoch": 0.7145897832817337, "grad_norm": 0.18770967423915863, "learning_rate": 9.895815127983737e-05, "loss": 0.0095, "step": 3693 }, { "epoch": 0.7147832817337462, "grad_norm": 0.10053977370262146, "learning_rate": 9.895756420163158e-05, "loss": 0.0095, "step": 3694 }, { "epoch": 0.7149767801857585, "grad_norm": 0.22837848961353302, "learning_rate": 9.895697696000238e-05, "loss": 0.0122, "step": 3695 }, { "epoch": 0.7151702786377709, "grad_norm": 0.12274852395057678, "learning_rate": 9.895638955495198e-05, "loss": 0.0087, "step": 3696 }, { "epoch": 0.7153637770897833, "grad_norm": 0.21704302728176117, "learning_rate": 9.895580198648256e-05, "loss": 0.0128, "step": 3697 }, { "epoch": 0.7155572755417957, "grad_norm": 0.08685731887817383, "learning_rate": 9.895521425459627e-05, "loss": 0.0138, "step": 3698 }, { "epoch": 0.715750773993808, "grad_norm": 0.15943337976932526, "learning_rate": 9.895462635929535e-05, "loss": 0.011, "step": 3699 }, { "epoch": 0.7159442724458205, "grad_norm": 0.07451708614826202, "learning_rate": 9.895403830058197e-05, "loss": 0.0126, "step": 3700 }, { "epoch": 0.7161377708978328, "grad_norm": 0.09119311720132828, "learning_rate": 9.89534500784583e-05, "loss": 0.0092, "step": 3701 }, { "epoch": 0.7163312693498453, "grad_norm": 0.1441146582365036, "learning_rate": 9.895286169292651e-05, "loss": 0.0109, "step": 3702 }, { "epoch": 0.7165247678018576, "grad_norm": 0.09113821387290955, "learning_rate": 9.895227314398883e-05, "loss": 0.0122, "step": 3703 }, { "epoch": 0.7167182662538699, "grad_norm": 0.16467346251010895, "learning_rate": 9.895168443164743e-05, "loss": 0.0103, "step": 3704 }, { "epoch": 0.7169117647058824, "grad_norm": 0.0699969008564949, "learning_rate": 9.895109555590448e-05, "loss": 0.0106, "step": 3705 }, { "epoch": 0.7171052631578947, "grad_norm": 0.16376034915447235, "learning_rate": 9.89505065167622e-05, "loss": 0.0103, "step": 3706 }, { "epoch": 0.7172987616099071, "grad_norm": 0.08763455599546432, "learning_rate": 9.894991731422277e-05, "loss": 0.0099, "step": 3707 }, { "epoch": 0.7174922600619195, "grad_norm": 0.1494157463312149, "learning_rate": 9.894932794828838e-05, "loss": 0.0121, "step": 3708 }, { "epoch": 0.7176857585139319, "grad_norm": 0.11770396679639816, "learning_rate": 9.89487384189612e-05, "loss": 0.011, "step": 3709 }, { "epoch": 0.7178792569659442, "grad_norm": 0.09971687942743301, "learning_rate": 9.894814872624345e-05, "loss": 0.0093, "step": 3710 }, { "epoch": 0.7180727554179567, "grad_norm": 0.12621383368968964, "learning_rate": 9.894755887013732e-05, "loss": 0.0069, "step": 3711 }, { "epoch": 0.718266253869969, "grad_norm": 0.05945570021867752, "learning_rate": 9.894696885064499e-05, "loss": 0.0097, "step": 3712 }, { "epoch": 0.7184597523219814, "grad_norm": 0.15372416377067566, "learning_rate": 9.894637866776865e-05, "loss": 0.0102, "step": 3713 }, { "epoch": 0.7186532507739938, "grad_norm": 0.09803278744220734, "learning_rate": 9.89457883215105e-05, "loss": 0.0121, "step": 3714 }, { "epoch": 0.7188467492260062, "grad_norm": 0.13024188578128815, "learning_rate": 9.894519781187275e-05, "loss": 0.0127, "step": 3715 }, { "epoch": 0.7190402476780186, "grad_norm": 0.08011076599359512, "learning_rate": 9.894460713885757e-05, "loss": 0.0104, "step": 3716 }, { "epoch": 0.719233746130031, "grad_norm": 0.08346689492464066, "learning_rate": 9.894401630246718e-05, "loss": 0.0111, "step": 3717 }, { "epoch": 0.7194272445820433, "grad_norm": 0.09670073539018631, "learning_rate": 9.894342530270377e-05, "loss": 0.0114, "step": 3718 }, { "epoch": 0.7196207430340558, "grad_norm": 0.10364235192537308, "learning_rate": 9.894283413956953e-05, "loss": 0.0097, "step": 3719 }, { "epoch": 0.7198142414860681, "grad_norm": 0.07825206220149994, "learning_rate": 9.894224281306665e-05, "loss": 0.0118, "step": 3720 }, { "epoch": 0.7200077399380805, "grad_norm": 0.08595395088195801, "learning_rate": 9.894165132319733e-05, "loss": 0.0135, "step": 3721 }, { "epoch": 0.7202012383900929, "grad_norm": 0.1019815057516098, "learning_rate": 9.894105966996381e-05, "loss": 0.0103, "step": 3722 }, { "epoch": 0.7203947368421053, "grad_norm": 0.05314111337065697, "learning_rate": 9.894046785336824e-05, "loss": 0.0083, "step": 3723 }, { "epoch": 0.7205882352941176, "grad_norm": 0.0928349420428276, "learning_rate": 9.893987587341283e-05, "loss": 0.0103, "step": 3724 }, { "epoch": 0.7207817337461301, "grad_norm": 0.09982621669769287, "learning_rate": 9.893928373009981e-05, "loss": 0.01, "step": 3725 }, { "epoch": 0.7209752321981424, "grad_norm": 0.03938696160912514, "learning_rate": 9.893869142343133e-05, "loss": 0.0083, "step": 3726 }, { "epoch": 0.7211687306501547, "grad_norm": 0.08135776966810226, "learning_rate": 9.893809895340966e-05, "loss": 0.0126, "step": 3727 }, { "epoch": 0.7213622291021672, "grad_norm": 0.14345882833003998, "learning_rate": 9.893750632003693e-05, "loss": 0.0094, "step": 3728 }, { "epoch": 0.7215557275541795, "grad_norm": 0.07304175943136215, "learning_rate": 9.893691352331539e-05, "loss": 0.0108, "step": 3729 }, { "epoch": 0.721749226006192, "grad_norm": 0.19479252398014069, "learning_rate": 9.893632056324723e-05, "loss": 0.012, "step": 3730 }, { "epoch": 0.7219427244582043, "grad_norm": 0.10642828792333603, "learning_rate": 9.893572743983466e-05, "loss": 0.0119, "step": 3731 }, { "epoch": 0.7221362229102167, "grad_norm": 0.10690487176179886, "learning_rate": 9.89351341530799e-05, "loss": 0.0134, "step": 3732 }, { "epoch": 0.7223297213622291, "grad_norm": 0.13877582550048828, "learning_rate": 9.89345407029851e-05, "loss": 0.0105, "step": 3733 }, { "epoch": 0.7225232198142415, "grad_norm": 0.06953590363264084, "learning_rate": 9.893394708955253e-05, "loss": 0.0103, "step": 3734 }, { "epoch": 0.7227167182662538, "grad_norm": 0.14396332204341888, "learning_rate": 9.893335331278437e-05, "loss": 0.0117, "step": 3735 }, { "epoch": 0.7229102167182663, "grad_norm": 0.10537035018205643, "learning_rate": 9.89327593726828e-05, "loss": 0.0103, "step": 3736 }, { "epoch": 0.7231037151702786, "grad_norm": 0.09808647632598877, "learning_rate": 9.893216526925008e-05, "loss": 0.0103, "step": 3737 }, { "epoch": 0.723297213622291, "grad_norm": 0.13569965958595276, "learning_rate": 9.89315710024884e-05, "loss": 0.0123, "step": 3738 }, { "epoch": 0.7234907120743034, "grad_norm": 0.08077302575111389, "learning_rate": 9.893097657239994e-05, "loss": 0.0077, "step": 3739 }, { "epoch": 0.7236842105263158, "grad_norm": 0.11879926174879074, "learning_rate": 9.893038197898695e-05, "loss": 0.009, "step": 3740 }, { "epoch": 0.7238777089783281, "grad_norm": 0.05244909226894379, "learning_rate": 9.892978722225162e-05, "loss": 0.0089, "step": 3741 }, { "epoch": 0.7240712074303406, "grad_norm": 0.13042038679122925, "learning_rate": 9.892919230219617e-05, "loss": 0.0123, "step": 3742 }, { "epoch": 0.7242647058823529, "grad_norm": 0.046821627765893936, "learning_rate": 9.892859721882282e-05, "loss": 0.0096, "step": 3743 }, { "epoch": 0.7244582043343654, "grad_norm": 0.14256764948368073, "learning_rate": 9.892800197213375e-05, "loss": 0.013, "step": 3744 }, { "epoch": 0.7246517027863777, "grad_norm": 0.041110560297966, "learning_rate": 9.892740656213118e-05, "loss": 0.0099, "step": 3745 }, { "epoch": 0.7248452012383901, "grad_norm": 0.16273029148578644, "learning_rate": 9.892681098881737e-05, "loss": 0.0132, "step": 3746 }, { "epoch": 0.7250386996904025, "grad_norm": 0.07000014930963516, "learning_rate": 9.892621525219448e-05, "loss": 0.0129, "step": 3747 }, { "epoch": 0.7252321981424149, "grad_norm": 0.13655222952365875, "learning_rate": 9.892561935226475e-05, "loss": 0.0109, "step": 3748 }, { "epoch": 0.7254256965944272, "grad_norm": 0.09731175005435944, "learning_rate": 9.892502328903039e-05, "loss": 0.0102, "step": 3749 }, { "epoch": 0.7256191950464397, "grad_norm": 0.10835050046443939, "learning_rate": 9.892442706249362e-05, "loss": 0.0099, "step": 3750 }, { "epoch": 0.725812693498452, "grad_norm": 0.11218702793121338, "learning_rate": 9.892383067265665e-05, "loss": 0.0106, "step": 3751 }, { "epoch": 0.7260061919504643, "grad_norm": 0.12206322699785233, "learning_rate": 9.89232341195217e-05, "loss": 0.0113, "step": 3752 }, { "epoch": 0.7261996904024768, "grad_norm": 0.07230538129806519, "learning_rate": 9.892263740309099e-05, "loss": 0.0102, "step": 3753 }, { "epoch": 0.7263931888544891, "grad_norm": 0.2243022322654724, "learning_rate": 9.892204052336674e-05, "loss": 0.0113, "step": 3754 }, { "epoch": 0.7265866873065016, "grad_norm": 0.07704751938581467, "learning_rate": 9.892144348035117e-05, "loss": 0.0115, "step": 3755 }, { "epoch": 0.7267801857585139, "grad_norm": 0.14888538420200348, "learning_rate": 9.892084627404648e-05, "loss": 0.0115, "step": 3756 }, { "epoch": 0.7269736842105263, "grad_norm": 0.15190474689006805, "learning_rate": 9.892024890445492e-05, "loss": 0.0118, "step": 3757 }, { "epoch": 0.7271671826625387, "grad_norm": 0.12813213467597961, "learning_rate": 9.89196513715787e-05, "loss": 0.0102, "step": 3758 }, { "epoch": 0.7273606811145511, "grad_norm": 0.18530280888080597, "learning_rate": 9.891905367542003e-05, "loss": 0.0099, "step": 3759 }, { "epoch": 0.7275541795665634, "grad_norm": 0.06382110714912415, "learning_rate": 9.891845581598113e-05, "loss": 0.0119, "step": 3760 }, { "epoch": 0.7277476780185759, "grad_norm": 0.2761078178882599, "learning_rate": 9.891785779326425e-05, "loss": 0.0132, "step": 3761 }, { "epoch": 0.7279411764705882, "grad_norm": 0.12518133223056793, "learning_rate": 9.891725960727158e-05, "loss": 0.0098, "step": 3762 }, { "epoch": 0.7281346749226006, "grad_norm": 0.15004248917102814, "learning_rate": 9.891666125800537e-05, "loss": 0.0104, "step": 3763 }, { "epoch": 0.728328173374613, "grad_norm": 0.07942117750644684, "learning_rate": 9.891606274546784e-05, "loss": 0.0126, "step": 3764 }, { "epoch": 0.7285216718266254, "grad_norm": 0.2756488025188446, "learning_rate": 9.891546406966121e-05, "loss": 0.0129, "step": 3765 }, { "epoch": 0.7287151702786377, "grad_norm": 0.05490773171186447, "learning_rate": 9.89148652305877e-05, "loss": 0.0104, "step": 3766 }, { "epoch": 0.7289086687306502, "grad_norm": 0.2626464366912842, "learning_rate": 9.891426622824954e-05, "loss": 0.0091, "step": 3767 }, { "epoch": 0.7291021671826625, "grad_norm": 0.1200767531991005, "learning_rate": 9.891366706264895e-05, "loss": 0.012, "step": 3768 }, { "epoch": 0.729295665634675, "grad_norm": 0.29269278049468994, "learning_rate": 9.89130677337882e-05, "loss": 0.0099, "step": 3769 }, { "epoch": 0.7294891640866873, "grad_norm": 0.16676019132137299, "learning_rate": 9.891246824166945e-05, "loss": 0.0101, "step": 3770 }, { "epoch": 0.7296826625386997, "grad_norm": 0.23447741568088531, "learning_rate": 9.891186858629497e-05, "loss": 0.0102, "step": 3771 }, { "epoch": 0.7298761609907121, "grad_norm": 0.19344937801361084, "learning_rate": 9.891126876766699e-05, "loss": 0.011, "step": 3772 }, { "epoch": 0.7300696594427245, "grad_norm": 0.15403249859809875, "learning_rate": 9.891066878578774e-05, "loss": 0.0109, "step": 3773 }, { "epoch": 0.7302631578947368, "grad_norm": 0.21388088166713715, "learning_rate": 9.891006864065944e-05, "loss": 0.0109, "step": 3774 }, { "epoch": 0.7304566563467493, "grad_norm": 0.08431262522935867, "learning_rate": 9.89094683322843e-05, "loss": 0.0102, "step": 3775 }, { "epoch": 0.7306501547987616, "grad_norm": 0.1753379851579666, "learning_rate": 9.890886786066461e-05, "loss": 0.0106, "step": 3776 }, { "epoch": 0.7308436532507739, "grad_norm": 0.09930408746004105, "learning_rate": 9.890826722580256e-05, "loss": 0.0101, "step": 3777 }, { "epoch": 0.7310371517027864, "grad_norm": 0.14852465689182281, "learning_rate": 9.890766642770038e-05, "loss": 0.0102, "step": 3778 }, { "epoch": 0.7312306501547987, "grad_norm": 0.20678696036338806, "learning_rate": 9.890706546636031e-05, "loss": 0.0116, "step": 3779 }, { "epoch": 0.7314241486068112, "grad_norm": 0.21053601801395416, "learning_rate": 9.89064643417846e-05, "loss": 0.0137, "step": 3780 }, { "epoch": 0.7316176470588235, "grad_norm": 0.07516892999410629, "learning_rate": 9.890586305397548e-05, "loss": 0.0099, "step": 3781 }, { "epoch": 0.7318111455108359, "grad_norm": 0.18514902889728546, "learning_rate": 9.890526160293519e-05, "loss": 0.011, "step": 3782 }, { "epoch": 0.7320046439628483, "grad_norm": 0.06432782113552094, "learning_rate": 9.890465998866593e-05, "loss": 0.0088, "step": 3783 }, { "epoch": 0.7321981424148607, "grad_norm": 0.1373545080423355, "learning_rate": 9.890405821116997e-05, "loss": 0.01, "step": 3784 }, { "epoch": 0.732391640866873, "grad_norm": 0.0887046530842781, "learning_rate": 9.890345627044955e-05, "loss": 0.012, "step": 3785 }, { "epoch": 0.7325851393188855, "grad_norm": 0.07458863407373428, "learning_rate": 9.890285416650689e-05, "loss": 0.0108, "step": 3786 }, { "epoch": 0.7327786377708978, "grad_norm": 0.06218456104397774, "learning_rate": 9.890225189934423e-05, "loss": 0.01, "step": 3787 }, { "epoch": 0.7329721362229102, "grad_norm": 0.07107295095920563, "learning_rate": 9.890164946896383e-05, "loss": 0.0129, "step": 3788 }, { "epoch": 0.7331656346749226, "grad_norm": 0.049525849521160126, "learning_rate": 9.89010468753679e-05, "loss": 0.011, "step": 3789 }, { "epoch": 0.733359133126935, "grad_norm": 0.06500326097011566, "learning_rate": 9.890044411855869e-05, "loss": 0.0101, "step": 3790 }, { "epoch": 0.7335526315789473, "grad_norm": 0.08238336443901062, "learning_rate": 9.889984119853847e-05, "loss": 0.0087, "step": 3791 }, { "epoch": 0.7337461300309598, "grad_norm": 0.06998797506093979, "learning_rate": 9.889923811530944e-05, "loss": 0.0094, "step": 3792 }, { "epoch": 0.7339396284829721, "grad_norm": 0.12014060467481613, "learning_rate": 9.889863486887388e-05, "loss": 0.0118, "step": 3793 }, { "epoch": 0.7341331269349846, "grad_norm": 0.051730528473854065, "learning_rate": 9.8898031459234e-05, "loss": 0.0097, "step": 3794 }, { "epoch": 0.7343266253869969, "grad_norm": 0.13795655965805054, "learning_rate": 9.889742788639205e-05, "loss": 0.0107, "step": 3795 }, { "epoch": 0.7345201238390093, "grad_norm": 0.0534399040043354, "learning_rate": 9.889682415035027e-05, "loss": 0.0102, "step": 3796 }, { "epoch": 0.7347136222910217, "grad_norm": 0.144102543592453, "learning_rate": 9.889622025111094e-05, "loss": 0.0103, "step": 3797 }, { "epoch": 0.7349071207430341, "grad_norm": 0.0476810596883297, "learning_rate": 9.889561618867626e-05, "loss": 0.0118, "step": 3798 }, { "epoch": 0.7351006191950464, "grad_norm": 0.1435582935810089, "learning_rate": 9.88950119630485e-05, "loss": 0.0107, "step": 3799 }, { "epoch": 0.7352941176470589, "grad_norm": 0.11506364494562149, "learning_rate": 9.88944075742299e-05, "loss": 0.011, "step": 3800 }, { "epoch": 0.7354876160990712, "grad_norm": 0.15131904184818268, "learning_rate": 9.889380302222272e-05, "loss": 0.0117, "step": 3801 }, { "epoch": 0.7356811145510835, "grad_norm": 0.13201312720775604, "learning_rate": 9.88931983070292e-05, "loss": 0.0094, "step": 3802 }, { "epoch": 0.735874613003096, "grad_norm": 0.10008760541677475, "learning_rate": 9.889259342865156e-05, "loss": 0.0102, "step": 3803 }, { "epoch": 0.7360681114551083, "grad_norm": 0.11059257388114929, "learning_rate": 9.889198838709209e-05, "loss": 0.0104, "step": 3804 }, { "epoch": 0.7362616099071208, "grad_norm": 0.09420648962259293, "learning_rate": 9.889138318235302e-05, "loss": 0.0089, "step": 3805 }, { "epoch": 0.7364551083591331, "grad_norm": 0.09679073840379715, "learning_rate": 9.88907778144366e-05, "loss": 0.0123, "step": 3806 }, { "epoch": 0.7366486068111455, "grad_norm": 0.08936184644699097, "learning_rate": 9.889017228334507e-05, "loss": 0.0131, "step": 3807 }, { "epoch": 0.7368421052631579, "grad_norm": 0.07317899912595749, "learning_rate": 9.888956658908071e-05, "loss": 0.0076, "step": 3808 }, { "epoch": 0.7370356037151703, "grad_norm": 0.06407952308654785, "learning_rate": 9.888896073164576e-05, "loss": 0.0107, "step": 3809 }, { "epoch": 0.7372291021671826, "grad_norm": 0.0539296418428421, "learning_rate": 9.888835471104247e-05, "loss": 0.0101, "step": 3810 }, { "epoch": 0.7374226006191951, "grad_norm": 0.03845716640353203, "learning_rate": 9.888774852727309e-05, "loss": 0.0094, "step": 3811 }, { "epoch": 0.7376160990712074, "grad_norm": 0.06932970136404037, "learning_rate": 9.888714218033985e-05, "loss": 0.0099, "step": 3812 }, { "epoch": 0.7378095975232198, "grad_norm": 0.05588635802268982, "learning_rate": 9.888653567024506e-05, "loss": 0.0099, "step": 3813 }, { "epoch": 0.7380030959752322, "grad_norm": 0.061111100018024445, "learning_rate": 9.888592899699093e-05, "loss": 0.0095, "step": 3814 }, { "epoch": 0.7381965944272446, "grad_norm": 0.04994833469390869, "learning_rate": 9.888532216057973e-05, "loss": 0.0123, "step": 3815 }, { "epoch": 0.7383900928792569, "grad_norm": 0.0759991779923439, "learning_rate": 9.888471516101371e-05, "loss": 0.0124, "step": 3816 }, { "epoch": 0.7385835913312694, "grad_norm": 0.08064204454421997, "learning_rate": 9.888410799829515e-05, "loss": 0.0108, "step": 3817 }, { "epoch": 0.7387770897832817, "grad_norm": 0.08905744552612305, "learning_rate": 9.888350067242628e-05, "loss": 0.0101, "step": 3818 }, { "epoch": 0.7389705882352942, "grad_norm": 0.07948356121778488, "learning_rate": 9.888289318340936e-05, "loss": 0.0086, "step": 3819 }, { "epoch": 0.7391640866873065, "grad_norm": 0.0526646226644516, "learning_rate": 9.888228553124668e-05, "loss": 0.0101, "step": 3820 }, { "epoch": 0.7393575851393189, "grad_norm": 0.08902198821306229, "learning_rate": 9.888167771594046e-05, "loss": 0.0103, "step": 3821 }, { "epoch": 0.7395510835913313, "grad_norm": 0.08849198371171951, "learning_rate": 9.888106973749297e-05, "loss": 0.0089, "step": 3822 }, { "epoch": 0.7397445820433437, "grad_norm": 0.08409324288368225, "learning_rate": 9.888046159590648e-05, "loss": 0.0124, "step": 3823 }, { "epoch": 0.739938080495356, "grad_norm": 0.08979291468858719, "learning_rate": 9.887985329118325e-05, "loss": 0.01, "step": 3824 }, { "epoch": 0.7401315789473685, "grad_norm": 0.0801292359828949, "learning_rate": 9.887924482332553e-05, "loss": 0.0101, "step": 3825 }, { "epoch": 0.7403250773993808, "grad_norm": 0.08191009610891342, "learning_rate": 9.88786361923356e-05, "loss": 0.009, "step": 3826 }, { "epoch": 0.7405185758513931, "grad_norm": 0.0861663967370987, "learning_rate": 9.88780273982157e-05, "loss": 0.0097, "step": 3827 }, { "epoch": 0.7407120743034056, "grad_norm": 0.04323629289865494, "learning_rate": 9.887741844096812e-05, "loss": 0.0096, "step": 3828 }, { "epoch": 0.7409055727554179, "grad_norm": 0.09221875667572021, "learning_rate": 9.88768093205951e-05, "loss": 0.0109, "step": 3829 }, { "epoch": 0.7410990712074303, "grad_norm": 0.08291877806186676, "learning_rate": 9.887620003709891e-05, "loss": 0.0098, "step": 3830 }, { "epoch": 0.7412925696594427, "grad_norm": 0.11475283652544022, "learning_rate": 9.887559059048182e-05, "loss": 0.0084, "step": 3831 }, { "epoch": 0.7414860681114551, "grad_norm": 0.1289251148700714, "learning_rate": 9.88749809807461e-05, "loss": 0.0099, "step": 3832 }, { "epoch": 0.7416795665634675, "grad_norm": 0.1287672072649002, "learning_rate": 9.887437120789403e-05, "loss": 0.0091, "step": 3833 }, { "epoch": 0.7418730650154799, "grad_norm": 0.06988821178674698, "learning_rate": 9.887376127192783e-05, "loss": 0.0109, "step": 3834 }, { "epoch": 0.7420665634674922, "grad_norm": 0.12408427149057388, "learning_rate": 9.88731511728498e-05, "loss": 0.0135, "step": 3835 }, { "epoch": 0.7422600619195047, "grad_norm": 0.08340528607368469, "learning_rate": 9.887254091066222e-05, "loss": 0.008, "step": 3836 }, { "epoch": 0.742453560371517, "grad_norm": 0.10784707218408585, "learning_rate": 9.887193048536734e-05, "loss": 0.0096, "step": 3837 }, { "epoch": 0.7426470588235294, "grad_norm": 0.11669940501451492, "learning_rate": 9.887131989696745e-05, "loss": 0.0087, "step": 3838 }, { "epoch": 0.7428405572755418, "grad_norm": 0.12177509814500809, "learning_rate": 9.887070914546477e-05, "loss": 0.0085, "step": 3839 }, { "epoch": 0.7430340557275542, "grad_norm": 0.1161186471581459, "learning_rate": 9.88700982308616e-05, "loss": 0.0106, "step": 3840 }, { "epoch": 0.7432275541795665, "grad_norm": 0.09130141884088516, "learning_rate": 9.886948715316025e-05, "loss": 0.0103, "step": 3841 }, { "epoch": 0.743421052631579, "grad_norm": 0.07195375859737396, "learning_rate": 9.886887591236292e-05, "loss": 0.0091, "step": 3842 }, { "epoch": 0.7436145510835913, "grad_norm": 0.07768483459949493, "learning_rate": 9.886826450847195e-05, "loss": 0.0097, "step": 3843 }, { "epoch": 0.7438080495356038, "grad_norm": 0.09868882596492767, "learning_rate": 9.886765294148955e-05, "loss": 0.0125, "step": 3844 }, { "epoch": 0.7440015479876161, "grad_norm": 0.1081303060054779, "learning_rate": 9.886704121141805e-05, "loss": 0.0125, "step": 3845 }, { "epoch": 0.7441950464396285, "grad_norm": 0.1049250066280365, "learning_rate": 9.886642931825969e-05, "loss": 0.0105, "step": 3846 }, { "epoch": 0.7443885448916409, "grad_norm": 0.13395534455776215, "learning_rate": 9.886581726201677e-05, "loss": 0.0106, "step": 3847 }, { "epoch": 0.7445820433436533, "grad_norm": 0.10492187738418579, "learning_rate": 9.886520504269153e-05, "loss": 0.0102, "step": 3848 }, { "epoch": 0.7447755417956656, "grad_norm": 0.1409517526626587, "learning_rate": 9.886459266028627e-05, "loss": 0.0095, "step": 3849 }, { "epoch": 0.7449690402476781, "grad_norm": 0.1365099549293518, "learning_rate": 9.886398011480327e-05, "loss": 0.012, "step": 3850 }, { "epoch": 0.7451625386996904, "grad_norm": 0.09377940744161606, "learning_rate": 9.88633674062448e-05, "loss": 0.0107, "step": 3851 }, { "epoch": 0.7453560371517027, "grad_norm": 0.10779470950365067, "learning_rate": 9.886275453461314e-05, "loss": 0.0096, "step": 3852 }, { "epoch": 0.7455495356037152, "grad_norm": 0.1456758826971054, "learning_rate": 9.886214149991056e-05, "loss": 0.0098, "step": 3853 }, { "epoch": 0.7457430340557275, "grad_norm": 0.13299031555652618, "learning_rate": 9.886152830213934e-05, "loss": 0.0111, "step": 3854 }, { "epoch": 0.74593653250774, "grad_norm": 0.18276497721672058, "learning_rate": 9.886091494130177e-05, "loss": 0.0116, "step": 3855 }, { "epoch": 0.7461300309597523, "grad_norm": 0.061593424528837204, "learning_rate": 9.886030141740013e-05, "loss": 0.0107, "step": 3856 }, { "epoch": 0.7463235294117647, "grad_norm": 0.1998870074748993, "learning_rate": 9.88596877304367e-05, "loss": 0.0108, "step": 3857 }, { "epoch": 0.746517027863777, "grad_norm": 0.06721765547990799, "learning_rate": 9.885907388041376e-05, "loss": 0.008, "step": 3858 }, { "epoch": 0.7467105263157895, "grad_norm": 0.13292813301086426, "learning_rate": 9.885845986733358e-05, "loss": 0.0107, "step": 3859 }, { "epoch": 0.7469040247678018, "grad_norm": 0.09776133298873901, "learning_rate": 9.885784569119846e-05, "loss": 0.0101, "step": 3860 }, { "epoch": 0.7470975232198143, "grad_norm": 0.07859618961811066, "learning_rate": 9.885723135201069e-05, "loss": 0.0104, "step": 3861 }, { "epoch": 0.7472910216718266, "grad_norm": 0.10916490852832794, "learning_rate": 9.88566168497725e-05, "loss": 0.0107, "step": 3862 }, { "epoch": 0.747484520123839, "grad_norm": 0.07677856832742691, "learning_rate": 9.885600218448625e-05, "loss": 0.0095, "step": 3863 }, { "epoch": 0.7476780185758514, "grad_norm": 0.09115754812955856, "learning_rate": 9.885538735615418e-05, "loss": 0.0108, "step": 3864 }, { "epoch": 0.7478715170278638, "grad_norm": 0.08830495923757553, "learning_rate": 9.885477236477858e-05, "loss": 0.0095, "step": 3865 }, { "epoch": 0.7480650154798761, "grad_norm": 0.11219675093889236, "learning_rate": 9.885415721036177e-05, "loss": 0.0106, "step": 3866 }, { "epoch": 0.7482585139318886, "grad_norm": 0.09241563081741333, "learning_rate": 9.885354189290598e-05, "loss": 0.0084, "step": 3867 }, { "epoch": 0.7484520123839009, "grad_norm": 0.14147764444351196, "learning_rate": 9.885292641241353e-05, "loss": 0.0105, "step": 3868 }, { "epoch": 0.7486455108359134, "grad_norm": 0.05593270808458328, "learning_rate": 9.885231076888671e-05, "loss": 0.0085, "step": 3869 }, { "epoch": 0.7488390092879257, "grad_norm": 0.14376749098300934, "learning_rate": 9.88516949623278e-05, "loss": 0.0107, "step": 3870 }, { "epoch": 0.7490325077399381, "grad_norm": 0.05239098519086838, "learning_rate": 9.88510789927391e-05, "loss": 0.0101, "step": 3871 }, { "epoch": 0.7492260061919505, "grad_norm": 0.06945602595806122, "learning_rate": 9.88504628601229e-05, "loss": 0.0095, "step": 3872 }, { "epoch": 0.7494195046439629, "grad_norm": 0.0728563666343689, "learning_rate": 9.884984656448147e-05, "loss": 0.0128, "step": 3873 }, { "epoch": 0.7496130030959752, "grad_norm": 0.09678266197443008, "learning_rate": 9.884923010581712e-05, "loss": 0.0083, "step": 3874 }, { "epoch": 0.7498065015479877, "grad_norm": 0.06637116521596909, "learning_rate": 9.884861348413215e-05, "loss": 0.0112, "step": 3875 }, { "epoch": 0.75, "grad_norm": 0.13297505676746368, "learning_rate": 9.884799669942881e-05, "loss": 0.0098, "step": 3876 }, { "epoch": 0.7501934984520123, "grad_norm": 0.07760787755250931, "learning_rate": 9.884737975170945e-05, "loss": 0.0089, "step": 3877 }, { "epoch": 0.7503869969040248, "grad_norm": 0.1315692663192749, "learning_rate": 9.884676264097633e-05, "loss": 0.0098, "step": 3878 }, { "epoch": 0.7505804953560371, "grad_norm": 0.10974740982055664, "learning_rate": 9.884614536723173e-05, "loss": 0.0122, "step": 3879 }, { "epoch": 0.7507739938080495, "grad_norm": 0.12055611610412598, "learning_rate": 9.8845527930478e-05, "loss": 0.012, "step": 3880 }, { "epoch": 0.7509674922600619, "grad_norm": 0.10551551729440689, "learning_rate": 9.884491033071737e-05, "loss": 0.011, "step": 3881 }, { "epoch": 0.7511609907120743, "grad_norm": 0.1078929528594017, "learning_rate": 9.88442925679522e-05, "loss": 0.0097, "step": 3882 }, { "epoch": 0.7513544891640866, "grad_norm": 0.11682301759719849, "learning_rate": 9.884367464218473e-05, "loss": 0.0082, "step": 3883 }, { "epoch": 0.7515479876160991, "grad_norm": 0.1355046182870865, "learning_rate": 9.884305655341728e-05, "loss": 0.0092, "step": 3884 }, { "epoch": 0.7517414860681114, "grad_norm": 0.09690847247838974, "learning_rate": 9.884243830165215e-05, "loss": 0.0104, "step": 3885 }, { "epoch": 0.7519349845201239, "grad_norm": 0.18214039504528046, "learning_rate": 9.884181988689164e-05, "loss": 0.0091, "step": 3886 }, { "epoch": 0.7521284829721362, "grad_norm": 0.11765998601913452, "learning_rate": 9.884120130913806e-05, "loss": 0.0113, "step": 3887 }, { "epoch": 0.7523219814241486, "grad_norm": 0.17518939077854156, "learning_rate": 9.884058256839368e-05, "loss": 0.0108, "step": 3888 }, { "epoch": 0.752515479876161, "grad_norm": 0.11315707117319107, "learning_rate": 9.883996366466081e-05, "loss": 0.0119, "step": 3889 }, { "epoch": 0.7527089783281734, "grad_norm": 0.1397349238395691, "learning_rate": 9.883934459794177e-05, "loss": 0.0105, "step": 3890 }, { "epoch": 0.7529024767801857, "grad_norm": 0.09233170002698898, "learning_rate": 9.883872536823885e-05, "loss": 0.0101, "step": 3891 }, { "epoch": 0.7530959752321982, "grad_norm": 0.1265888810157776, "learning_rate": 9.883810597555436e-05, "loss": 0.0102, "step": 3892 }, { "epoch": 0.7532894736842105, "grad_norm": 0.08238627016544342, "learning_rate": 9.883748641989059e-05, "loss": 0.0079, "step": 3893 }, { "epoch": 0.753482972136223, "grad_norm": 0.12678194046020508, "learning_rate": 9.883686670124983e-05, "loss": 0.0114, "step": 3894 }, { "epoch": 0.7536764705882353, "grad_norm": 0.13452330231666565, "learning_rate": 9.883624681963442e-05, "loss": 0.0098, "step": 3895 }, { "epoch": 0.7538699690402477, "grad_norm": 0.0923469290137291, "learning_rate": 9.883562677504663e-05, "loss": 0.0105, "step": 3896 }, { "epoch": 0.75406346749226, "grad_norm": 0.10736380517482758, "learning_rate": 9.883500656748879e-05, "loss": 0.0108, "step": 3897 }, { "epoch": 0.7542569659442725, "grad_norm": 0.10604806244373322, "learning_rate": 9.883438619696319e-05, "loss": 0.0113, "step": 3898 }, { "epoch": 0.7544504643962848, "grad_norm": 0.06608431041240692, "learning_rate": 9.883376566347215e-05, "loss": 0.0111, "step": 3899 }, { "epoch": 0.7546439628482973, "grad_norm": 0.09198673814535141, "learning_rate": 9.883314496701797e-05, "loss": 0.0102, "step": 3900 }, { "epoch": 0.7548374613003096, "grad_norm": 0.08282040059566498, "learning_rate": 9.883252410760296e-05, "loss": 0.011, "step": 3901 }, { "epoch": 0.7550309597523219, "grad_norm": 0.11020741611719131, "learning_rate": 9.883190308522942e-05, "loss": 0.0116, "step": 3902 }, { "epoch": 0.7552244582043344, "grad_norm": 0.10184666514396667, "learning_rate": 9.883128189989967e-05, "loss": 0.0094, "step": 3903 }, { "epoch": 0.7554179566563467, "grad_norm": 0.1432926207780838, "learning_rate": 9.883066055161601e-05, "loss": 0.0122, "step": 3904 }, { "epoch": 0.7556114551083591, "grad_norm": 0.13388119637966156, "learning_rate": 9.883003904038077e-05, "loss": 0.0092, "step": 3905 }, { "epoch": 0.7558049535603715, "grad_norm": 0.2629776895046234, "learning_rate": 9.882941736619621e-05, "loss": 0.0091, "step": 3906 }, { "epoch": 0.7559984520123839, "grad_norm": 0.1250661015510559, "learning_rate": 9.88287955290647e-05, "loss": 0.0106, "step": 3907 }, { "epoch": 0.7561919504643962, "grad_norm": 0.1799839586019516, "learning_rate": 9.882817352898854e-05, "loss": 0.009, "step": 3908 }, { "epoch": 0.7563854489164087, "grad_norm": 0.08669380843639374, "learning_rate": 9.882755136596999e-05, "loss": 0.0117, "step": 3909 }, { "epoch": 0.756578947368421, "grad_norm": 0.14850981533527374, "learning_rate": 9.882692904001144e-05, "loss": 0.0108, "step": 3910 }, { "epoch": 0.7567724458204335, "grad_norm": 0.12465491145849228, "learning_rate": 9.882630655111516e-05, "loss": 0.0106, "step": 3911 }, { "epoch": 0.7569659442724458, "grad_norm": 0.07256050407886505, "learning_rate": 9.882568389928347e-05, "loss": 0.0093, "step": 3912 }, { "epoch": 0.7571594427244582, "grad_norm": 0.08118834346532822, "learning_rate": 9.882506108451867e-05, "loss": 0.0098, "step": 3913 }, { "epoch": 0.7573529411764706, "grad_norm": 0.06256237626075745, "learning_rate": 9.882443810682312e-05, "loss": 0.0114, "step": 3914 }, { "epoch": 0.757546439628483, "grad_norm": 0.04879157245159149, "learning_rate": 9.882381496619908e-05, "loss": 0.0117, "step": 3915 }, { "epoch": 0.7577399380804953, "grad_norm": 0.09655006974935532, "learning_rate": 9.882319166264891e-05, "loss": 0.0108, "step": 3916 }, { "epoch": 0.7579334365325078, "grad_norm": 0.041192058473825455, "learning_rate": 9.882256819617491e-05, "loss": 0.0096, "step": 3917 }, { "epoch": 0.7581269349845201, "grad_norm": 0.16187311708927155, "learning_rate": 9.882194456677939e-05, "loss": 0.0102, "step": 3918 }, { "epoch": 0.7583204334365325, "grad_norm": 0.22481365501880646, "learning_rate": 9.882132077446468e-05, "loss": 0.0098, "step": 3919 }, { "epoch": 0.7585139318885449, "grad_norm": 0.12881039083003998, "learning_rate": 9.882069681923311e-05, "loss": 0.0105, "step": 3920 }, { "epoch": 0.7587074303405573, "grad_norm": 0.19502192735671997, "learning_rate": 9.882007270108698e-05, "loss": 0.0104, "step": 3921 }, { "epoch": 0.7589009287925697, "grad_norm": 0.1533142477273941, "learning_rate": 9.881944842002861e-05, "loss": 0.0079, "step": 3922 }, { "epoch": 0.7590944272445821, "grad_norm": 0.18065424263477325, "learning_rate": 9.881882397606034e-05, "loss": 0.0102, "step": 3923 }, { "epoch": 0.7592879256965944, "grad_norm": 0.1620229035615921, "learning_rate": 9.881819936918447e-05, "loss": 0.0115, "step": 3924 }, { "epoch": 0.7594814241486069, "grad_norm": 0.10719543695449829, "learning_rate": 9.881757459940335e-05, "loss": 0.0097, "step": 3925 }, { "epoch": 0.7596749226006192, "grad_norm": 0.232639342546463, "learning_rate": 9.881694966671926e-05, "loss": 0.0102, "step": 3926 }, { "epoch": 0.7598684210526315, "grad_norm": 0.14956121146678925, "learning_rate": 9.881632457113456e-05, "loss": 0.013, "step": 3927 }, { "epoch": 0.760061919504644, "grad_norm": 0.19361265003681183, "learning_rate": 9.881569931265156e-05, "loss": 0.0107, "step": 3928 }, { "epoch": 0.7602554179566563, "grad_norm": 0.10338390618562698, "learning_rate": 9.881507389127259e-05, "loss": 0.0119, "step": 3929 }, { "epoch": 0.7604489164086687, "grad_norm": 0.22046159207820892, "learning_rate": 9.881444830699997e-05, "loss": 0.0111, "step": 3930 }, { "epoch": 0.7606424148606811, "grad_norm": 0.07563698291778564, "learning_rate": 9.881382255983602e-05, "loss": 0.0096, "step": 3931 }, { "epoch": 0.7608359133126935, "grad_norm": 0.2155723124742508, "learning_rate": 9.881319664978309e-05, "loss": 0.0128, "step": 3932 }, { "epoch": 0.7610294117647058, "grad_norm": 0.1095150038599968, "learning_rate": 9.881257057684349e-05, "loss": 0.0091, "step": 3933 }, { "epoch": 0.7612229102167183, "grad_norm": 0.18117643892765045, "learning_rate": 9.881194434101954e-05, "loss": 0.012, "step": 3934 }, { "epoch": 0.7614164086687306, "grad_norm": 0.14563103020191193, "learning_rate": 9.881131794231356e-05, "loss": 0.0105, "step": 3935 }, { "epoch": 0.7616099071207431, "grad_norm": 0.12674933671951294, "learning_rate": 9.881069138072792e-05, "loss": 0.0094, "step": 3936 }, { "epoch": 0.7618034055727554, "grad_norm": 0.21946457028388977, "learning_rate": 9.881006465626492e-05, "loss": 0.0123, "step": 3937 }, { "epoch": 0.7619969040247678, "grad_norm": 0.14627143740653992, "learning_rate": 9.880943776892688e-05, "loss": 0.0095, "step": 3938 }, { "epoch": 0.7621904024767802, "grad_norm": 0.24404625594615936, "learning_rate": 9.880881071871617e-05, "loss": 0.0121, "step": 3939 }, { "epoch": 0.7623839009287926, "grad_norm": 0.14771561324596405, "learning_rate": 9.880818350563508e-05, "loss": 0.0106, "step": 3940 }, { "epoch": 0.7625773993808049, "grad_norm": 0.19136980175971985, "learning_rate": 9.880755612968594e-05, "loss": 0.0103, "step": 3941 }, { "epoch": 0.7627708978328174, "grad_norm": 0.13946211338043213, "learning_rate": 9.880692859087112e-05, "loss": 0.009, "step": 3942 }, { "epoch": 0.7629643962848297, "grad_norm": 0.13596035540103912, "learning_rate": 9.880630088919294e-05, "loss": 0.0105, "step": 3943 }, { "epoch": 0.7631578947368421, "grad_norm": 0.11866159737110138, "learning_rate": 9.880567302465372e-05, "loss": 0.0111, "step": 3944 }, { "epoch": 0.7633513931888545, "grad_norm": 0.09201712906360626, "learning_rate": 9.880504499725581e-05, "loss": 0.0108, "step": 3945 }, { "epoch": 0.7635448916408669, "grad_norm": 0.10460551083087921, "learning_rate": 9.880441680700154e-05, "loss": 0.0102, "step": 3946 }, { "epoch": 0.7637383900928792, "grad_norm": 0.12960638105869293, "learning_rate": 9.880378845389323e-05, "loss": 0.0108, "step": 3947 }, { "epoch": 0.7639318885448917, "grad_norm": 0.05436309799551964, "learning_rate": 9.880315993793323e-05, "loss": 0.0088, "step": 3948 }, { "epoch": 0.764125386996904, "grad_norm": 0.11602150648832321, "learning_rate": 9.880253125912387e-05, "loss": 0.0094, "step": 3949 }, { "epoch": 0.7643188854489165, "grad_norm": 0.10171153396368027, "learning_rate": 9.88019024174675e-05, "loss": 0.0123, "step": 3950 }, { "epoch": 0.7645123839009288, "grad_norm": 0.07475647330284119, "learning_rate": 9.880127341296645e-05, "loss": 0.0109, "step": 3951 }, { "epoch": 0.7647058823529411, "grad_norm": 0.11432327330112457, "learning_rate": 9.880064424562304e-05, "loss": 0.0124, "step": 3952 }, { "epoch": 0.7648993808049536, "grad_norm": 0.0996493548154831, "learning_rate": 9.880001491543967e-05, "loss": 0.0114, "step": 3953 }, { "epoch": 0.7650928792569659, "grad_norm": 0.06983678042888641, "learning_rate": 9.879938542241859e-05, "loss": 0.0094, "step": 3954 }, { "epoch": 0.7652863777089783, "grad_norm": 0.11040868610143661, "learning_rate": 9.87987557665622e-05, "loss": 0.0114, "step": 3955 }, { "epoch": 0.7654798761609907, "grad_norm": 0.07293720543384552, "learning_rate": 9.879812594787286e-05, "loss": 0.0102, "step": 3956 }, { "epoch": 0.7656733746130031, "grad_norm": 0.16904577612876892, "learning_rate": 9.879749596635285e-05, "loss": 0.0104, "step": 3957 }, { "epoch": 0.7658668730650154, "grad_norm": 0.10422243922948837, "learning_rate": 9.879686582200455e-05, "loss": 0.0128, "step": 3958 }, { "epoch": 0.7660603715170279, "grad_norm": 0.20034845173358917, "learning_rate": 9.879623551483029e-05, "loss": 0.0085, "step": 3959 }, { "epoch": 0.7662538699690402, "grad_norm": 0.05625386908650398, "learning_rate": 9.879560504483242e-05, "loss": 0.0092, "step": 3960 }, { "epoch": 0.7664473684210527, "grad_norm": 0.16830450296401978, "learning_rate": 9.879497441201328e-05, "loss": 0.011, "step": 3961 }, { "epoch": 0.766640866873065, "grad_norm": 0.08011608570814133, "learning_rate": 9.879434361637521e-05, "loss": 0.0078, "step": 3962 }, { "epoch": 0.7668343653250774, "grad_norm": 0.1200772151350975, "learning_rate": 9.879371265792057e-05, "loss": 0.0111, "step": 3963 }, { "epoch": 0.7670278637770898, "grad_norm": 0.14259958267211914, "learning_rate": 9.87930815366517e-05, "loss": 0.0129, "step": 3964 }, { "epoch": 0.7672213622291022, "grad_norm": 0.07926712930202484, "learning_rate": 9.879245025257094e-05, "loss": 0.0107, "step": 3965 }, { "epoch": 0.7674148606811145, "grad_norm": 0.186916321516037, "learning_rate": 9.879181880568064e-05, "loss": 0.0114, "step": 3966 }, { "epoch": 0.767608359133127, "grad_norm": 0.056817974895238876, "learning_rate": 9.879118719598314e-05, "loss": 0.0106, "step": 3967 }, { "epoch": 0.7678018575851393, "grad_norm": 0.2061937004327774, "learning_rate": 9.879055542348083e-05, "loss": 0.0127, "step": 3968 }, { "epoch": 0.7679953560371517, "grad_norm": 0.04854315146803856, "learning_rate": 9.878992348817599e-05, "loss": 0.0104, "step": 3969 }, { "epoch": 0.7681888544891641, "grad_norm": 0.20999138057231903, "learning_rate": 9.878929139007101e-05, "loss": 0.0102, "step": 3970 }, { "epoch": 0.7683823529411765, "grad_norm": 0.08693856745958328, "learning_rate": 9.878865912916825e-05, "loss": 0.0099, "step": 3971 }, { "epoch": 0.7685758513931888, "grad_norm": 0.18918085098266602, "learning_rate": 9.878802670547003e-05, "loss": 0.0106, "step": 3972 }, { "epoch": 0.7687693498452013, "grad_norm": 0.14245232939720154, "learning_rate": 9.878739411897873e-05, "loss": 0.0106, "step": 3973 }, { "epoch": 0.7689628482972136, "grad_norm": 0.15409083664417267, "learning_rate": 9.878676136969669e-05, "loss": 0.0105, "step": 3974 }, { "epoch": 0.7691563467492261, "grad_norm": 0.15202836692333221, "learning_rate": 9.878612845762624e-05, "loss": 0.0101, "step": 3975 }, { "epoch": 0.7693498452012384, "grad_norm": 0.09178660809993744, "learning_rate": 9.878549538276977e-05, "loss": 0.0104, "step": 3976 }, { "epoch": 0.7695433436532507, "grad_norm": 0.18278194963932037, "learning_rate": 9.878486214512962e-05, "loss": 0.0097, "step": 3977 }, { "epoch": 0.7697368421052632, "grad_norm": 0.10441556572914124, "learning_rate": 9.878422874470813e-05, "loss": 0.0101, "step": 3978 }, { "epoch": 0.7699303405572755, "grad_norm": 0.11975861340761185, "learning_rate": 9.878359518150765e-05, "loss": 0.0095, "step": 3979 }, { "epoch": 0.7701238390092879, "grad_norm": 0.07144366204738617, "learning_rate": 9.878296145553059e-05, "loss": 0.0098, "step": 3980 }, { "epoch": 0.7703173374613003, "grad_norm": 0.11119404435157776, "learning_rate": 9.878232756677925e-05, "loss": 0.009, "step": 3981 }, { "epoch": 0.7705108359133127, "grad_norm": 0.056663062423467636, "learning_rate": 9.8781693515256e-05, "loss": 0.0075, "step": 3982 }, { "epoch": 0.770704334365325, "grad_norm": 0.11806832998991013, "learning_rate": 9.87810593009632e-05, "loss": 0.0118, "step": 3983 }, { "epoch": 0.7708978328173375, "grad_norm": 0.0746207907795906, "learning_rate": 9.878042492390322e-05, "loss": 0.013, "step": 3984 }, { "epoch": 0.7710913312693498, "grad_norm": 0.08674807846546173, "learning_rate": 9.87797903840784e-05, "loss": 0.0107, "step": 3985 }, { "epoch": 0.7712848297213623, "grad_norm": 0.06666053086519241, "learning_rate": 9.877915568149109e-05, "loss": 0.0082, "step": 3986 }, { "epoch": 0.7714783281733746, "grad_norm": 0.0779159739613533, "learning_rate": 9.87785208161437e-05, "loss": 0.0109, "step": 3987 }, { "epoch": 0.771671826625387, "grad_norm": 0.03691183030605316, "learning_rate": 9.877788578803855e-05, "loss": 0.0097, "step": 3988 }, { "epoch": 0.7718653250773994, "grad_norm": 0.09244059771299362, "learning_rate": 9.8777250597178e-05, "loss": 0.0134, "step": 3989 }, { "epoch": 0.7720588235294118, "grad_norm": 0.04496302083134651, "learning_rate": 9.877661524356442e-05, "loss": 0.0092, "step": 3990 }, { "epoch": 0.7722523219814241, "grad_norm": 0.11134139448404312, "learning_rate": 9.877597972720017e-05, "loss": 0.0097, "step": 3991 }, { "epoch": 0.7724458204334366, "grad_norm": 0.07234124839305878, "learning_rate": 9.877534404808762e-05, "loss": 0.0082, "step": 3992 }, { "epoch": 0.7726393188854489, "grad_norm": 0.0750865712761879, "learning_rate": 9.877470820622911e-05, "loss": 0.01, "step": 3993 }, { "epoch": 0.7728328173374613, "grad_norm": 0.038747482001781464, "learning_rate": 9.877407220162703e-05, "loss": 0.0096, "step": 3994 }, { "epoch": 0.7730263157894737, "grad_norm": 0.11445911228656769, "learning_rate": 9.877343603428374e-05, "loss": 0.0108, "step": 3995 }, { "epoch": 0.7732198142414861, "grad_norm": 0.0674908235669136, "learning_rate": 9.877279970420162e-05, "loss": 0.01, "step": 3996 }, { "epoch": 0.7734133126934984, "grad_norm": 0.10602007061243057, "learning_rate": 9.877216321138299e-05, "loss": 0.0101, "step": 3997 }, { "epoch": 0.7736068111455109, "grad_norm": 0.133534237742424, "learning_rate": 9.877152655583024e-05, "loss": 0.01, "step": 3998 }, { "epoch": 0.7738003095975232, "grad_norm": 0.05877460539340973, "learning_rate": 9.877088973754574e-05, "loss": 0.0106, "step": 3999 }, { "epoch": 0.7739938080495357, "grad_norm": 0.1455053836107254, "learning_rate": 9.87702527565319e-05, "loss": 0.0123, "step": 4000 }, { "epoch": 0.774187306501548, "grad_norm": 0.1097133532166481, "learning_rate": 9.8769615612791e-05, "loss": 0.0123, "step": 4001 }, { "epoch": 0.7743808049535603, "grad_norm": 0.11489511281251907, "learning_rate": 9.876897830632545e-05, "loss": 0.0095, "step": 4002 }, { "epoch": 0.7745743034055728, "grad_norm": 0.08288612216711044, "learning_rate": 9.876834083713763e-05, "loss": 0.0097, "step": 4003 }, { "epoch": 0.7747678018575851, "grad_norm": 0.10835766047239304, "learning_rate": 9.876770320522992e-05, "loss": 0.0106, "step": 4004 }, { "epoch": 0.7749613003095975, "grad_norm": 0.10044422000646591, "learning_rate": 9.876706541060468e-05, "loss": 0.0101, "step": 4005 }, { "epoch": 0.7751547987616099, "grad_norm": 0.1099543496966362, "learning_rate": 9.876642745326425e-05, "loss": 0.0094, "step": 4006 }, { "epoch": 0.7753482972136223, "grad_norm": 0.05389837548136711, "learning_rate": 9.876578933321105e-05, "loss": 0.0099, "step": 4007 }, { "epoch": 0.7755417956656346, "grad_norm": 0.13458707928657532, "learning_rate": 9.876515105044741e-05, "loss": 0.0092, "step": 4008 }, { "epoch": 0.7757352941176471, "grad_norm": 0.06385032832622528, "learning_rate": 9.876451260497575e-05, "loss": 0.0105, "step": 4009 }, { "epoch": 0.7759287925696594, "grad_norm": 0.13600431382656097, "learning_rate": 9.876387399679839e-05, "loss": 0.0116, "step": 4010 }, { "epoch": 0.7761222910216719, "grad_norm": 0.11127620935440063, "learning_rate": 9.876323522591774e-05, "loss": 0.0113, "step": 4011 }, { "epoch": 0.7763157894736842, "grad_norm": 0.15392282605171204, "learning_rate": 9.876259629233616e-05, "loss": 0.0097, "step": 4012 }, { "epoch": 0.7765092879256966, "grad_norm": 0.11203698068857193, "learning_rate": 9.876195719605604e-05, "loss": 0.012, "step": 4013 }, { "epoch": 0.776702786377709, "grad_norm": 0.0988755002617836, "learning_rate": 9.876131793707974e-05, "loss": 0.0087, "step": 4014 }, { "epoch": 0.7768962848297214, "grad_norm": 0.1265144646167755, "learning_rate": 9.876067851540964e-05, "loss": 0.0088, "step": 4015 }, { "epoch": 0.7770897832817337, "grad_norm": 0.09133875370025635, "learning_rate": 9.876003893104812e-05, "loss": 0.0091, "step": 4016 }, { "epoch": 0.7772832817337462, "grad_norm": 0.12865464389324188, "learning_rate": 9.875939918399756e-05, "loss": 0.0102, "step": 4017 }, { "epoch": 0.7774767801857585, "grad_norm": 0.10600508749485016, "learning_rate": 9.875875927426035e-05, "loss": 0.0107, "step": 4018 }, { "epoch": 0.7776702786377709, "grad_norm": 0.1042635440826416, "learning_rate": 9.875811920183882e-05, "loss": 0.0123, "step": 4019 }, { "epoch": 0.7778637770897833, "grad_norm": 0.10253477841615677, "learning_rate": 9.875747896673543e-05, "loss": 0.0102, "step": 4020 }, { "epoch": 0.7780572755417957, "grad_norm": 0.13302390277385712, "learning_rate": 9.875683856895248e-05, "loss": 0.0099, "step": 4021 }, { "epoch": 0.778250773993808, "grad_norm": 0.09179476648569107, "learning_rate": 9.875619800849239e-05, "loss": 0.0101, "step": 4022 }, { "epoch": 0.7784442724458205, "grad_norm": 0.18479833006858826, "learning_rate": 9.875555728535756e-05, "loss": 0.0108, "step": 4023 }, { "epoch": 0.7786377708978328, "grad_norm": 0.17216113209724426, "learning_rate": 9.875491639955033e-05, "loss": 0.0089, "step": 4024 }, { "epoch": 0.7788312693498453, "grad_norm": 0.07620954513549805, "learning_rate": 9.875427535107312e-05, "loss": 0.0101, "step": 4025 }, { "epoch": 0.7790247678018576, "grad_norm": 0.1514187455177307, "learning_rate": 9.875363413992828e-05, "loss": 0.0102, "step": 4026 }, { "epoch": 0.7792182662538699, "grad_norm": 0.072695791721344, "learning_rate": 9.87529927661182e-05, "loss": 0.0102, "step": 4027 }, { "epoch": 0.7794117647058824, "grad_norm": 0.12506680190563202, "learning_rate": 9.87523512296453e-05, "loss": 0.0109, "step": 4028 }, { "epoch": 0.7796052631578947, "grad_norm": 0.15496283769607544, "learning_rate": 9.875170953051193e-05, "loss": 0.01, "step": 4029 }, { "epoch": 0.7797987616099071, "grad_norm": 0.17536605894565582, "learning_rate": 9.875106766872048e-05, "loss": 0.0098, "step": 4030 }, { "epoch": 0.7799922600619195, "grad_norm": 0.18520964682102203, "learning_rate": 9.875042564427334e-05, "loss": 0.0106, "step": 4031 }, { "epoch": 0.7801857585139319, "grad_norm": 0.15077875554561615, "learning_rate": 9.874978345717289e-05, "loss": 0.0096, "step": 4032 }, { "epoch": 0.7803792569659442, "grad_norm": 0.16081029176712036, "learning_rate": 9.874914110742155e-05, "loss": 0.0095, "step": 4033 }, { "epoch": 0.7805727554179567, "grad_norm": 0.10818455368280411, "learning_rate": 9.874849859502167e-05, "loss": 0.0094, "step": 4034 }, { "epoch": 0.780766253869969, "grad_norm": 0.2477021962404251, "learning_rate": 9.874785591997564e-05, "loss": 0.0095, "step": 4035 }, { "epoch": 0.7809597523219814, "grad_norm": 0.1176876351237297, "learning_rate": 9.874721308228589e-05, "loss": 0.0087, "step": 4036 }, { "epoch": 0.7811532507739938, "grad_norm": 0.17410095036029816, "learning_rate": 9.874657008195475e-05, "loss": 0.0103, "step": 4037 }, { "epoch": 0.7813467492260062, "grad_norm": 0.17152093350887299, "learning_rate": 9.874592691898466e-05, "loss": 0.0107, "step": 4038 }, { "epoch": 0.7815402476780186, "grad_norm": 0.1139574944972992, "learning_rate": 9.874528359337799e-05, "loss": 0.0093, "step": 4039 }, { "epoch": 0.781733746130031, "grad_norm": 0.15682604908943176, "learning_rate": 9.874464010513712e-05, "loss": 0.0112, "step": 4040 }, { "epoch": 0.7819272445820433, "grad_norm": 0.12267239391803741, "learning_rate": 9.874399645426449e-05, "loss": 0.011, "step": 4041 }, { "epoch": 0.7821207430340558, "grad_norm": 0.14233191311359406, "learning_rate": 9.874335264076243e-05, "loss": 0.0101, "step": 4042 }, { "epoch": 0.7823142414860681, "grad_norm": 0.08799666166305542, "learning_rate": 9.874270866463337e-05, "loss": 0.0101, "step": 4043 }, { "epoch": 0.7825077399380805, "grad_norm": 0.06865677237510681, "learning_rate": 9.874206452587971e-05, "loss": 0.0109, "step": 4044 }, { "epoch": 0.7827012383900929, "grad_norm": 0.10974589735269547, "learning_rate": 9.874142022450382e-05, "loss": 0.011, "step": 4045 }, { "epoch": 0.7828947368421053, "grad_norm": 0.057048603892326355, "learning_rate": 9.874077576050813e-05, "loss": 0.0089, "step": 4046 }, { "epoch": 0.7830882352941176, "grad_norm": 0.11942140758037567, "learning_rate": 9.874013113389497e-05, "loss": 0.0097, "step": 4047 }, { "epoch": 0.7832817337461301, "grad_norm": 0.07700467854738235, "learning_rate": 9.873948634466681e-05, "loss": 0.0113, "step": 4048 }, { "epoch": 0.7834752321981424, "grad_norm": 0.1348300725221634, "learning_rate": 9.873884139282602e-05, "loss": 0.0097, "step": 4049 }, { "epoch": 0.7836687306501547, "grad_norm": 0.0823201984167099, "learning_rate": 9.873819627837498e-05, "loss": 0.0122, "step": 4050 }, { "epoch": 0.7838622291021672, "grad_norm": 0.08942397683858871, "learning_rate": 9.873755100131612e-05, "loss": 0.009, "step": 4051 }, { "epoch": 0.7840557275541795, "grad_norm": 0.13868127763271332, "learning_rate": 9.87369055616518e-05, "loss": 0.011, "step": 4052 }, { "epoch": 0.784249226006192, "grad_norm": 0.08422110974788666, "learning_rate": 9.873625995938447e-05, "loss": 0.0107, "step": 4053 }, { "epoch": 0.7844427244582043, "grad_norm": 0.14459651708602905, "learning_rate": 9.873561419451649e-05, "loss": 0.0092, "step": 4054 }, { "epoch": 0.7846362229102167, "grad_norm": 0.1019970178604126, "learning_rate": 9.873496826705026e-05, "loss": 0.011, "step": 4055 }, { "epoch": 0.7848297213622291, "grad_norm": 0.1652429848909378, "learning_rate": 9.873432217698822e-05, "loss": 0.0097, "step": 4056 }, { "epoch": 0.7850232198142415, "grad_norm": 0.1066986471414566, "learning_rate": 9.873367592433273e-05, "loss": 0.008, "step": 4057 }, { "epoch": 0.7852167182662538, "grad_norm": 0.16219644248485565, "learning_rate": 9.873302950908622e-05, "loss": 0.0099, "step": 4058 }, { "epoch": 0.7854102167182663, "grad_norm": 0.117621511220932, "learning_rate": 9.873238293125106e-05, "loss": 0.0105, "step": 4059 }, { "epoch": 0.7856037151702786, "grad_norm": 0.15412132441997528, "learning_rate": 9.873173619082969e-05, "loss": 0.0094, "step": 4060 }, { "epoch": 0.785797213622291, "grad_norm": 0.10241243988275528, "learning_rate": 9.873108928782449e-05, "loss": 0.0073, "step": 4061 }, { "epoch": 0.7859907120743034, "grad_norm": 0.13420620560646057, "learning_rate": 9.873044222223791e-05, "loss": 0.0087, "step": 4062 }, { "epoch": 0.7861842105263158, "grad_norm": 0.08565550297498703, "learning_rate": 9.872979499407229e-05, "loss": 0.0083, "step": 4063 }, { "epoch": 0.7863777089783281, "grad_norm": 0.08423443138599396, "learning_rate": 9.872914760333008e-05, "loss": 0.0104, "step": 4064 }, { "epoch": 0.7865712074303406, "grad_norm": 0.08583515882492065, "learning_rate": 9.872850005001367e-05, "loss": 0.0094, "step": 4065 }, { "epoch": 0.7867647058823529, "grad_norm": 0.07351844012737274, "learning_rate": 9.872785233412547e-05, "loss": 0.0084, "step": 4066 }, { "epoch": 0.7869582043343654, "grad_norm": 0.11977024376392365, "learning_rate": 9.87272044556679e-05, "loss": 0.0096, "step": 4067 }, { "epoch": 0.7871517027863777, "grad_norm": 0.07390613853931427, "learning_rate": 9.872655641464334e-05, "loss": 0.0076, "step": 4068 }, { "epoch": 0.7873452012383901, "grad_norm": 0.09482423216104507, "learning_rate": 9.872590821105423e-05, "loss": 0.0092, "step": 4069 }, { "epoch": 0.7875386996904025, "grad_norm": 0.06629949808120728, "learning_rate": 9.872525984490296e-05, "loss": 0.0106, "step": 4070 }, { "epoch": 0.7877321981424149, "grad_norm": 0.1308319866657257, "learning_rate": 9.872461131619195e-05, "loss": 0.0119, "step": 4071 }, { "epoch": 0.7879256965944272, "grad_norm": 0.06393739581108093, "learning_rate": 9.872396262492363e-05, "loss": 0.0112, "step": 4072 }, { "epoch": 0.7881191950464397, "grad_norm": 0.17576321959495544, "learning_rate": 9.872331377110037e-05, "loss": 0.0105, "step": 4073 }, { "epoch": 0.788312693498452, "grad_norm": 0.13155929744243622, "learning_rate": 9.87226647547246e-05, "loss": 0.0101, "step": 4074 }, { "epoch": 0.7885061919504643, "grad_norm": 0.11516883969306946, "learning_rate": 9.872201557579873e-05, "loss": 0.0092, "step": 4075 }, { "epoch": 0.7886996904024768, "grad_norm": 0.08424630016088486, "learning_rate": 9.872136623432521e-05, "loss": 0.0092, "step": 4076 }, { "epoch": 0.7888931888544891, "grad_norm": 0.12273678183555603, "learning_rate": 9.87207167303064e-05, "loss": 0.0098, "step": 4077 }, { "epoch": 0.7890866873065016, "grad_norm": 0.0472799688577652, "learning_rate": 9.872006706374475e-05, "loss": 0.009, "step": 4078 }, { "epoch": 0.7892801857585139, "grad_norm": 0.12045276910066605, "learning_rate": 9.871941723464266e-05, "loss": 0.0098, "step": 4079 }, { "epoch": 0.7894736842105263, "grad_norm": 0.06718264520168304, "learning_rate": 9.871876724300253e-05, "loss": 0.0095, "step": 4080 }, { "epoch": 0.7896671826625387, "grad_norm": 0.11512233316898346, "learning_rate": 9.87181170888268e-05, "loss": 0.0094, "step": 4081 }, { "epoch": 0.7898606811145511, "grad_norm": 0.082895927131176, "learning_rate": 9.87174667721179e-05, "loss": 0.0105, "step": 4082 }, { "epoch": 0.7900541795665634, "grad_norm": 0.09195152670145035, "learning_rate": 9.871681629287823e-05, "loss": 0.0086, "step": 4083 }, { "epoch": 0.7902476780185759, "grad_norm": 0.0752279981970787, "learning_rate": 9.871616565111021e-05, "loss": 0.0125, "step": 4084 }, { "epoch": 0.7904411764705882, "grad_norm": 0.10558700561523438, "learning_rate": 9.871551484681625e-05, "loss": 0.01, "step": 4085 }, { "epoch": 0.7906346749226006, "grad_norm": 0.04867018386721611, "learning_rate": 9.871486387999879e-05, "loss": 0.0088, "step": 4086 }, { "epoch": 0.790828173374613, "grad_norm": 0.11602877080440521, "learning_rate": 9.871421275066023e-05, "loss": 0.0087, "step": 4087 }, { "epoch": 0.7910216718266254, "grad_norm": 0.07196706533432007, "learning_rate": 9.8713561458803e-05, "loss": 0.01, "step": 4088 }, { "epoch": 0.7912151702786377, "grad_norm": 0.1107347309589386, "learning_rate": 9.871291000442952e-05, "loss": 0.0099, "step": 4089 }, { "epoch": 0.7914086687306502, "grad_norm": 0.04376780614256859, "learning_rate": 9.87122583875422e-05, "loss": 0.01, "step": 4090 }, { "epoch": 0.7916021671826625, "grad_norm": 0.1035984456539154, "learning_rate": 9.871160660814349e-05, "loss": 0.013, "step": 4091 }, { "epoch": 0.791795665634675, "grad_norm": 0.08564195781946182, "learning_rate": 9.871095466623579e-05, "loss": 0.0116, "step": 4092 }, { "epoch": 0.7919891640866873, "grad_norm": 0.11419017612934113, "learning_rate": 9.871030256182153e-05, "loss": 0.0105, "step": 4093 }, { "epoch": 0.7921826625386997, "grad_norm": 0.17901885509490967, "learning_rate": 9.870965029490314e-05, "loss": 0.0104, "step": 4094 }, { "epoch": 0.7923761609907121, "grad_norm": 0.08056830614805222, "learning_rate": 9.870899786548304e-05, "loss": 0.012, "step": 4095 }, { "epoch": 0.7925696594427245, "grad_norm": 0.13063263893127441, "learning_rate": 9.870834527356365e-05, "loss": 0.0095, "step": 4096 }, { "epoch": 0.7927631578947368, "grad_norm": 0.12319860607385635, "learning_rate": 9.870769251914742e-05, "loss": 0.0127, "step": 4097 }, { "epoch": 0.7929566563467493, "grad_norm": 0.10437896847724915, "learning_rate": 9.870703960223673e-05, "loss": 0.0096, "step": 4098 }, { "epoch": 0.7931501547987616, "grad_norm": 0.12073023617267609, "learning_rate": 9.870638652283406e-05, "loss": 0.0091, "step": 4099 }, { "epoch": 0.7933436532507739, "grad_norm": 0.07883122563362122, "learning_rate": 9.87057332809418e-05, "loss": 0.0106, "step": 4100 }, { "epoch": 0.7935371517027864, "grad_norm": 0.1798194795846939, "learning_rate": 9.870507987656241e-05, "loss": 0.0085, "step": 4101 }, { "epoch": 0.7937306501547987, "grad_norm": 0.08401855081319809, "learning_rate": 9.870442630969829e-05, "loss": 0.0103, "step": 4102 }, { "epoch": 0.7939241486068112, "grad_norm": 0.27306655049324036, "learning_rate": 9.870377258035187e-05, "loss": 0.0085, "step": 4103 }, { "epoch": 0.7941176470588235, "grad_norm": 0.09290415048599243, "learning_rate": 9.870311868852561e-05, "loss": 0.0093, "step": 4104 }, { "epoch": 0.7943111455108359, "grad_norm": 0.2767237722873688, "learning_rate": 9.870246463422193e-05, "loss": 0.0087, "step": 4105 }, { "epoch": 0.7945046439628483, "grad_norm": 0.11818850040435791, "learning_rate": 9.870181041744324e-05, "loss": 0.0109, "step": 4106 }, { "epoch": 0.7946981424148607, "grad_norm": 0.18149542808532715, "learning_rate": 9.870115603819199e-05, "loss": 0.0096, "step": 4107 }, { "epoch": 0.794891640866873, "grad_norm": 0.1513357013463974, "learning_rate": 9.870050149647061e-05, "loss": 0.0106, "step": 4108 }, { "epoch": 0.7950851393188855, "grad_norm": 0.2202051281929016, "learning_rate": 9.869984679228153e-05, "loss": 0.0105, "step": 4109 }, { "epoch": 0.7952786377708978, "grad_norm": 0.17299148440361023, "learning_rate": 9.86991919256272e-05, "loss": 0.0097, "step": 4110 }, { "epoch": 0.7954721362229102, "grad_norm": 0.14045871794223785, "learning_rate": 9.869853689651003e-05, "loss": 0.0093, "step": 4111 }, { "epoch": 0.7956656346749226, "grad_norm": 0.2172003984451294, "learning_rate": 9.869788170493247e-05, "loss": 0.0116, "step": 4112 }, { "epoch": 0.795859133126935, "grad_norm": 0.09469471126794815, "learning_rate": 9.869722635089695e-05, "loss": 0.0104, "step": 4113 }, { "epoch": 0.7960526315789473, "grad_norm": 0.16095001995563507, "learning_rate": 9.869657083440593e-05, "loss": 0.0099, "step": 4114 }, { "epoch": 0.7962461300309598, "grad_norm": 0.09570325911045074, "learning_rate": 9.86959151554618e-05, "loss": 0.0085, "step": 4115 }, { "epoch": 0.7964396284829721, "grad_norm": 0.1644231677055359, "learning_rate": 9.869525931406705e-05, "loss": 0.0108, "step": 4116 }, { "epoch": 0.7966331269349846, "grad_norm": 0.11737264692783356, "learning_rate": 9.869460331022406e-05, "loss": 0.0071, "step": 4117 }, { "epoch": 0.7968266253869969, "grad_norm": 0.0932609885931015, "learning_rate": 9.869394714393534e-05, "loss": 0.0128, "step": 4118 }, { "epoch": 0.7970201238390093, "grad_norm": 0.16575072705745697, "learning_rate": 9.869329081520327e-05, "loss": 0.0093, "step": 4119 }, { "epoch": 0.7972136222910217, "grad_norm": 0.06002218276262283, "learning_rate": 9.869263432403031e-05, "loss": 0.0117, "step": 4120 }, { "epoch": 0.7974071207430341, "grad_norm": 0.12734752893447876, "learning_rate": 9.86919776704189e-05, "loss": 0.0112, "step": 4121 }, { "epoch": 0.7976006191950464, "grad_norm": 0.08232443034648895, "learning_rate": 9.869132085437148e-05, "loss": 0.0113, "step": 4122 }, { "epoch": 0.7977941176470589, "grad_norm": 0.08194701373577118, "learning_rate": 9.86906638758905e-05, "loss": 0.0094, "step": 4123 }, { "epoch": 0.7979876160990712, "grad_norm": 0.09014613181352615, "learning_rate": 9.869000673497841e-05, "loss": 0.0087, "step": 4124 }, { "epoch": 0.7981811145510835, "grad_norm": 0.12935520708560944, "learning_rate": 9.868934943163764e-05, "loss": 0.0129, "step": 4125 }, { "epoch": 0.798374613003096, "grad_norm": 0.1121818870306015, "learning_rate": 9.86886919658706e-05, "loss": 0.0108, "step": 4126 }, { "epoch": 0.7985681114551083, "grad_norm": 0.056952182203531265, "learning_rate": 9.868803433767981e-05, "loss": 0.0102, "step": 4127 }, { "epoch": 0.7987616099071208, "grad_norm": 0.1104688048362732, "learning_rate": 9.868737654706764e-05, "loss": 0.0096, "step": 4128 }, { "epoch": 0.7989551083591331, "grad_norm": 0.10132265090942383, "learning_rate": 9.86867185940366e-05, "loss": 0.0117, "step": 4129 }, { "epoch": 0.7991486068111455, "grad_norm": 0.10907888412475586, "learning_rate": 9.868606047858908e-05, "loss": 0.012, "step": 4130 }, { "epoch": 0.7993421052631579, "grad_norm": 0.125503271818161, "learning_rate": 9.868540220072756e-05, "loss": 0.0119, "step": 4131 }, { "epoch": 0.7995356037151703, "grad_norm": 0.0848664864897728, "learning_rate": 9.868474376045448e-05, "loss": 0.0094, "step": 4132 }, { "epoch": 0.7997291021671826, "grad_norm": 0.128434419631958, "learning_rate": 9.868408515777229e-05, "loss": 0.0099, "step": 4133 }, { "epoch": 0.7999226006191951, "grad_norm": 0.0519801490008831, "learning_rate": 9.868342639268343e-05, "loss": 0.0099, "step": 4134 }, { "epoch": 0.8001160990712074, "grad_norm": 0.11043574661016464, "learning_rate": 9.868276746519036e-05, "loss": 0.0108, "step": 4135 }, { "epoch": 0.8003095975232198, "grad_norm": 0.04139764606952667, "learning_rate": 9.868210837529553e-05, "loss": 0.0111, "step": 4136 }, { "epoch": 0.8005030959752322, "grad_norm": 0.13493378460407257, "learning_rate": 9.868144912300136e-05, "loss": 0.0096, "step": 4137 }, { "epoch": 0.8006965944272446, "grad_norm": 0.044443607330322266, "learning_rate": 9.868078970831035e-05, "loss": 0.0094, "step": 4138 }, { "epoch": 0.8008900928792569, "grad_norm": 0.10201676189899445, "learning_rate": 9.868013013122491e-05, "loss": 0.0103, "step": 4139 }, { "epoch": 0.8010835913312694, "grad_norm": 0.12195471674203873, "learning_rate": 9.867947039174752e-05, "loss": 0.0091, "step": 4140 }, { "epoch": 0.8012770897832817, "grad_norm": 0.08753280341625214, "learning_rate": 9.867881048988062e-05, "loss": 0.0108, "step": 4141 }, { "epoch": 0.8014705882352942, "grad_norm": 0.280865877866745, "learning_rate": 9.867815042562667e-05, "loss": 0.01, "step": 4142 }, { "epoch": 0.8016640866873065, "grad_norm": 0.09986979514360428, "learning_rate": 9.867749019898811e-05, "loss": 0.01, "step": 4143 }, { "epoch": 0.8018575851393189, "grad_norm": 0.21813656389713287, "learning_rate": 9.86768298099674e-05, "loss": 0.0116, "step": 4144 }, { "epoch": 0.8020510835913313, "grad_norm": 0.05979626998305321, "learning_rate": 9.867616925856702e-05, "loss": 0.0101, "step": 4145 }, { "epoch": 0.8022445820433437, "grad_norm": 0.16836178302764893, "learning_rate": 9.867550854478938e-05, "loss": 0.0085, "step": 4146 }, { "epoch": 0.802438080495356, "grad_norm": 0.08166646957397461, "learning_rate": 9.867484766863698e-05, "loss": 0.0103, "step": 4147 }, { "epoch": 0.8026315789473685, "grad_norm": 0.08961967378854752, "learning_rate": 9.867418663011225e-05, "loss": 0.0105, "step": 4148 }, { "epoch": 0.8028250773993808, "grad_norm": 0.11920348554849625, "learning_rate": 9.867352542921767e-05, "loss": 0.0097, "step": 4149 }, { "epoch": 0.8030185758513931, "grad_norm": 0.0789293497800827, "learning_rate": 9.867286406595566e-05, "loss": 0.0096, "step": 4150 }, { "epoch": 0.8032120743034056, "grad_norm": 0.09258095175027847, "learning_rate": 9.867220254032871e-05, "loss": 0.0102, "step": 4151 }, { "epoch": 0.8034055727554179, "grad_norm": 0.05283871293067932, "learning_rate": 9.867154085233928e-05, "loss": 0.0102, "step": 4152 }, { "epoch": 0.8035990712074303, "grad_norm": 0.08291931450366974, "learning_rate": 9.867087900198982e-05, "loss": 0.0081, "step": 4153 }, { "epoch": 0.8037925696594427, "grad_norm": 0.033999428153038025, "learning_rate": 9.867021698928279e-05, "loss": 0.0093, "step": 4154 }, { "epoch": 0.8039860681114551, "grad_norm": 0.0711042732000351, "learning_rate": 9.866955481422065e-05, "loss": 0.0109, "step": 4155 }, { "epoch": 0.8041795665634675, "grad_norm": 0.06332188844680786, "learning_rate": 9.866889247680588e-05, "loss": 0.0112, "step": 4156 }, { "epoch": 0.8043730650154799, "grad_norm": 0.11077764630317688, "learning_rate": 9.866822997704091e-05, "loss": 0.01, "step": 4157 }, { "epoch": 0.8045665634674922, "grad_norm": 0.06824398785829544, "learning_rate": 9.866756731492823e-05, "loss": 0.0098, "step": 4158 }, { "epoch": 0.8047600619195047, "grad_norm": 0.07214360684156418, "learning_rate": 9.86669044904703e-05, "loss": 0.0099, "step": 4159 }, { "epoch": 0.804953560371517, "grad_norm": 0.06455329060554504, "learning_rate": 9.866624150366957e-05, "loss": 0.009, "step": 4160 }, { "epoch": 0.8051470588235294, "grad_norm": 0.04876204952597618, "learning_rate": 9.86655783545285e-05, "loss": 0.0098, "step": 4161 }, { "epoch": 0.8053405572755418, "grad_norm": 0.10303325951099396, "learning_rate": 9.86649150430496e-05, "loss": 0.0107, "step": 4162 }, { "epoch": 0.8055340557275542, "grad_norm": 0.054632239043712616, "learning_rate": 9.866425156923528e-05, "loss": 0.0095, "step": 4163 }, { "epoch": 0.8057275541795665, "grad_norm": 0.053457386791706085, "learning_rate": 9.866358793308804e-05, "loss": 0.0097, "step": 4164 }, { "epoch": 0.805921052631579, "grad_norm": 0.1124381572008133, "learning_rate": 9.866292413461035e-05, "loss": 0.011, "step": 4165 }, { "epoch": 0.8061145510835913, "grad_norm": 0.08537157624959946, "learning_rate": 9.866226017380466e-05, "loss": 0.0081, "step": 4166 }, { "epoch": 0.8063080495356038, "grad_norm": 0.14521464705467224, "learning_rate": 9.866159605067343e-05, "loss": 0.0107, "step": 4167 }, { "epoch": 0.8065015479876161, "grad_norm": 0.13927963376045227, "learning_rate": 9.866093176521916e-05, "loss": 0.0104, "step": 4168 }, { "epoch": 0.8066950464396285, "grad_norm": 0.11747816205024719, "learning_rate": 9.86602673174443e-05, "loss": 0.0102, "step": 4169 }, { "epoch": 0.8068885448916409, "grad_norm": 0.1121373400092125, "learning_rate": 9.865960270735133e-05, "loss": 0.0123, "step": 4170 }, { "epoch": 0.8070820433436533, "grad_norm": 0.1561780422925949, "learning_rate": 9.86589379349427e-05, "loss": 0.0114, "step": 4171 }, { "epoch": 0.8072755417956656, "grad_norm": 0.10482209175825119, "learning_rate": 9.865827300022092e-05, "loss": 0.0109, "step": 4172 }, { "epoch": 0.8074690402476781, "grad_norm": 0.1597364842891693, "learning_rate": 9.865760790318843e-05, "loss": 0.0092, "step": 4173 }, { "epoch": 0.8076625386996904, "grad_norm": 0.14358659088611603, "learning_rate": 9.86569426438477e-05, "loss": 0.0087, "step": 4174 }, { "epoch": 0.8078560371517027, "grad_norm": 0.13277216255664825, "learning_rate": 9.865627722220123e-05, "loss": 0.0105, "step": 4175 }, { "epoch": 0.8080495356037152, "grad_norm": 0.14561383426189423, "learning_rate": 9.865561163825146e-05, "loss": 0.0109, "step": 4176 }, { "epoch": 0.8082430340557275, "grad_norm": 0.04732534661889076, "learning_rate": 9.86549458920009e-05, "loss": 0.0087, "step": 4177 }, { "epoch": 0.80843653250774, "grad_norm": 0.17676766216754913, "learning_rate": 9.8654279983452e-05, "loss": 0.0101, "step": 4178 }, { "epoch": 0.8086300309597523, "grad_norm": 0.03977932408452034, "learning_rate": 9.865361391260725e-05, "loss": 0.0091, "step": 4179 }, { "epoch": 0.8088235294117647, "grad_norm": 0.12798920273780823, "learning_rate": 9.865294767946911e-05, "loss": 0.0114, "step": 4180 }, { "epoch": 0.809017027863777, "grad_norm": 0.10476430505514145, "learning_rate": 9.865228128404008e-05, "loss": 0.0098, "step": 4181 }, { "epoch": 0.8092105263157895, "grad_norm": 0.14244741201400757, "learning_rate": 9.865161472632261e-05, "loss": 0.0098, "step": 4182 }, { "epoch": 0.8094040247678018, "grad_norm": 0.09043360501527786, "learning_rate": 9.86509480063192e-05, "loss": 0.0081, "step": 4183 }, { "epoch": 0.8095975232198143, "grad_norm": 0.1115807294845581, "learning_rate": 9.865028112403232e-05, "loss": 0.011, "step": 4184 }, { "epoch": 0.8097910216718266, "grad_norm": 0.07999593019485474, "learning_rate": 9.864961407946445e-05, "loss": 0.0096, "step": 4185 }, { "epoch": 0.809984520123839, "grad_norm": 0.11904734373092651, "learning_rate": 9.864894687261808e-05, "loss": 0.0104, "step": 4186 }, { "epoch": 0.8101780185758514, "grad_norm": 0.03650550916790962, "learning_rate": 9.864827950349566e-05, "loss": 0.0075, "step": 4187 }, { "epoch": 0.8103715170278638, "grad_norm": 0.17564651370048523, "learning_rate": 9.86476119720997e-05, "loss": 0.0113, "step": 4188 }, { "epoch": 0.8105650154798761, "grad_norm": 0.035643525421619415, "learning_rate": 9.864694427843267e-05, "loss": 0.011, "step": 4189 }, { "epoch": 0.8107585139318886, "grad_norm": 0.1427241563796997, "learning_rate": 9.864627642249708e-05, "loss": 0.012, "step": 4190 }, { "epoch": 0.8109520123839009, "grad_norm": 0.057671159505844116, "learning_rate": 9.864560840429537e-05, "loss": 0.0099, "step": 4191 }, { "epoch": 0.8111455108359134, "grad_norm": 0.1123286783695221, "learning_rate": 9.864494022383005e-05, "loss": 0.0108, "step": 4192 }, { "epoch": 0.8113390092879257, "grad_norm": 0.09680133312940598, "learning_rate": 9.86442718811036e-05, "loss": 0.0089, "step": 4193 }, { "epoch": 0.8115325077399381, "grad_norm": 0.099113829433918, "learning_rate": 9.864360337611848e-05, "loss": 0.0092, "step": 4194 }, { "epoch": 0.8117260061919505, "grad_norm": 0.10137223452329636, "learning_rate": 9.86429347088772e-05, "loss": 0.0098, "step": 4195 }, { "epoch": 0.8119195046439629, "grad_norm": 0.1325758844614029, "learning_rate": 9.864226587938225e-05, "loss": 0.0116, "step": 4196 }, { "epoch": 0.8121130030959752, "grad_norm": 0.08811259269714355, "learning_rate": 9.864159688763611e-05, "loss": 0.0118, "step": 4197 }, { "epoch": 0.8123065015479877, "grad_norm": 0.14937536418437958, "learning_rate": 9.864092773364127e-05, "loss": 0.0108, "step": 4198 }, { "epoch": 0.8125, "grad_norm": 0.034691788256168365, "learning_rate": 9.864025841740021e-05, "loss": 0.0097, "step": 4199 }, { "epoch": 0.8126934984520123, "grad_norm": 0.15044619143009186, "learning_rate": 9.863958893891543e-05, "loss": 0.0096, "step": 4200 }, { "epoch": 0.8128869969040248, "grad_norm": 0.049903176724910736, "learning_rate": 9.86389192981894e-05, "loss": 0.0102, "step": 4201 }, { "epoch": 0.8130804953560371, "grad_norm": 0.15190501511096954, "learning_rate": 9.863824949522464e-05, "loss": 0.0111, "step": 4202 }, { "epoch": 0.8132739938080495, "grad_norm": 0.05356251075863838, "learning_rate": 9.863757953002359e-05, "loss": 0.0084, "step": 4203 }, { "epoch": 0.8134674922600619, "grad_norm": 0.18281753361225128, "learning_rate": 9.863690940258879e-05, "loss": 0.0105, "step": 4204 }, { "epoch": 0.8136609907120743, "grad_norm": 0.05881215259432793, "learning_rate": 9.863623911292272e-05, "loss": 0.0096, "step": 4205 }, { "epoch": 0.8138544891640866, "grad_norm": 0.1455402970314026, "learning_rate": 9.863556866102786e-05, "loss": 0.0097, "step": 4206 }, { "epoch": 0.8140479876160991, "grad_norm": 0.09395134449005127, "learning_rate": 9.863489804690671e-05, "loss": 0.0096, "step": 4207 }, { "epoch": 0.8142414860681114, "grad_norm": 0.13028064370155334, "learning_rate": 9.863422727056176e-05, "loss": 0.0101, "step": 4208 }, { "epoch": 0.8144349845201239, "grad_norm": 0.0945737212896347, "learning_rate": 9.863355633199549e-05, "loss": 0.0084, "step": 4209 }, { "epoch": 0.8146284829721362, "grad_norm": 0.08658760040998459, "learning_rate": 9.863288523121042e-05, "loss": 0.0098, "step": 4210 }, { "epoch": 0.8148219814241486, "grad_norm": 0.18070712685585022, "learning_rate": 9.863221396820904e-05, "loss": 0.0097, "step": 4211 }, { "epoch": 0.815015479876161, "grad_norm": 0.13751474022865295, "learning_rate": 9.863154254299383e-05, "loss": 0.0099, "step": 4212 }, { "epoch": 0.8152089783281734, "grad_norm": 0.1494675576686859, "learning_rate": 9.86308709555673e-05, "loss": 0.0099, "step": 4213 }, { "epoch": 0.8154024767801857, "grad_norm": 0.06244738772511482, "learning_rate": 9.863019920593195e-05, "loss": 0.0095, "step": 4214 }, { "epoch": 0.8155959752321982, "grad_norm": 0.11348196119070053, "learning_rate": 9.862952729409026e-05, "loss": 0.0086, "step": 4215 }, { "epoch": 0.8157894736842105, "grad_norm": 0.08366581797599792, "learning_rate": 9.862885522004473e-05, "loss": 0.0103, "step": 4216 }, { "epoch": 0.815982972136223, "grad_norm": 0.14118985831737518, "learning_rate": 9.862818298379788e-05, "loss": 0.0105, "step": 4217 }, { "epoch": 0.8161764705882353, "grad_norm": 0.07566708326339722, "learning_rate": 9.86275105853522e-05, "loss": 0.0095, "step": 4218 }, { "epoch": 0.8163699690402477, "grad_norm": 0.10212428867816925, "learning_rate": 9.862683802471017e-05, "loss": 0.011, "step": 4219 }, { "epoch": 0.81656346749226, "grad_norm": 0.07279245555400848, "learning_rate": 9.862616530187434e-05, "loss": 0.0097, "step": 4220 }, { "epoch": 0.8167569659442725, "grad_norm": 0.10581445693969727, "learning_rate": 9.862549241684714e-05, "loss": 0.0126, "step": 4221 }, { "epoch": 0.8169504643962848, "grad_norm": 0.10255300253629684, "learning_rate": 9.862481936963113e-05, "loss": 0.0118, "step": 4222 }, { "epoch": 0.8171439628482973, "grad_norm": 0.053637485951185226, "learning_rate": 9.862414616022877e-05, "loss": 0.0104, "step": 4223 }, { "epoch": 0.8173374613003096, "grad_norm": 0.1277778148651123, "learning_rate": 9.862347278864261e-05, "loss": 0.0112, "step": 4224 }, { "epoch": 0.8175309597523219, "grad_norm": 0.055292483419179916, "learning_rate": 9.86227992548751e-05, "loss": 0.0123, "step": 4225 }, { "epoch": 0.8177244582043344, "grad_norm": 0.14526276290416718, "learning_rate": 9.862212555892878e-05, "loss": 0.0076, "step": 4226 }, { "epoch": 0.8179179566563467, "grad_norm": 0.06018758937716484, "learning_rate": 9.862145170080617e-05, "loss": 0.0099, "step": 4227 }, { "epoch": 0.8181114551083591, "grad_norm": 0.15755926072597504, "learning_rate": 9.862077768050973e-05, "loss": 0.0105, "step": 4228 }, { "epoch": 0.8183049535603715, "grad_norm": 0.08556162565946579, "learning_rate": 9.862010349804198e-05, "loss": 0.0088, "step": 4229 }, { "epoch": 0.8184984520123839, "grad_norm": 0.13281944394111633, "learning_rate": 9.861942915340544e-05, "loss": 0.0091, "step": 4230 }, { "epoch": 0.8186919504643962, "grad_norm": 0.0863422304391861, "learning_rate": 9.861875464660261e-05, "loss": 0.0117, "step": 4231 }, { "epoch": 0.8188854489164087, "grad_norm": 0.09106625616550446, "learning_rate": 9.8618079977636e-05, "loss": 0.0121, "step": 4232 }, { "epoch": 0.819078947368421, "grad_norm": 0.0589204765856266, "learning_rate": 9.861740514650812e-05, "loss": 0.0098, "step": 4233 }, { "epoch": 0.8192724458204335, "grad_norm": 0.07021724432706833, "learning_rate": 9.861673015322147e-05, "loss": 0.01, "step": 4234 }, { "epoch": 0.8194659442724458, "grad_norm": 0.07406871020793915, "learning_rate": 9.861605499777858e-05, "loss": 0.0104, "step": 4235 }, { "epoch": 0.8196594427244582, "grad_norm": 0.08807128667831421, "learning_rate": 9.861537968018192e-05, "loss": 0.012, "step": 4236 }, { "epoch": 0.8198529411764706, "grad_norm": 0.11209561675786972, "learning_rate": 9.861470420043404e-05, "loss": 0.0086, "step": 4237 }, { "epoch": 0.820046439628483, "grad_norm": 0.06392286717891693, "learning_rate": 9.861402855853744e-05, "loss": 0.011, "step": 4238 }, { "epoch": 0.8202399380804953, "grad_norm": 0.17944958806037903, "learning_rate": 9.861335275449463e-05, "loss": 0.0107, "step": 4239 }, { "epoch": 0.8204334365325078, "grad_norm": 0.0566139817237854, "learning_rate": 9.86126767883081e-05, "loss": 0.012, "step": 4240 }, { "epoch": 0.8206269349845201, "grad_norm": 0.21151615679264069, "learning_rate": 9.861200065998041e-05, "loss": 0.0095, "step": 4241 }, { "epoch": 0.8208204334365325, "grad_norm": 0.08019981533288956, "learning_rate": 9.861132436951402e-05, "loss": 0.0105, "step": 4242 }, { "epoch": 0.8210139318885449, "grad_norm": 0.19231091439723969, "learning_rate": 9.86106479169115e-05, "loss": 0.0124, "step": 4243 }, { "epoch": 0.8212074303405573, "grad_norm": 0.11423242837190628, "learning_rate": 9.860997130217532e-05, "loss": 0.0106, "step": 4244 }, { "epoch": 0.8214009287925697, "grad_norm": 0.20887576043605804, "learning_rate": 9.860929452530802e-05, "loss": 0.0111, "step": 4245 }, { "epoch": 0.8215944272445821, "grad_norm": 0.10576283931732178, "learning_rate": 9.860861758631212e-05, "loss": 0.0111, "step": 4246 }, { "epoch": 0.8217879256965944, "grad_norm": 0.16046105325222015, "learning_rate": 9.860794048519009e-05, "loss": 0.0121, "step": 4247 }, { "epoch": 0.8219814241486069, "grad_norm": 0.10387644171714783, "learning_rate": 9.860726322194451e-05, "loss": 0.0098, "step": 4248 }, { "epoch": 0.8221749226006192, "grad_norm": 0.12739457190036774, "learning_rate": 9.860658579657787e-05, "loss": 0.0113, "step": 4249 }, { "epoch": 0.8223684210526315, "grad_norm": 0.1171007975935936, "learning_rate": 9.860590820909268e-05, "loss": 0.0094, "step": 4250 }, { "epoch": 0.822561919504644, "grad_norm": 0.09853650629520416, "learning_rate": 9.860523045949147e-05, "loss": 0.0091, "step": 4251 }, { "epoch": 0.8227554179566563, "grad_norm": 0.10301009565591812, "learning_rate": 9.860455254777676e-05, "loss": 0.0092, "step": 4252 }, { "epoch": 0.8229489164086687, "grad_norm": 0.0678250789642334, "learning_rate": 9.860387447395106e-05, "loss": 0.0101, "step": 4253 }, { "epoch": 0.8231424148606811, "grad_norm": 0.10754238814115524, "learning_rate": 9.860319623801693e-05, "loss": 0.01, "step": 4254 }, { "epoch": 0.8233359133126935, "grad_norm": 0.08849840611219406, "learning_rate": 9.860251783997682e-05, "loss": 0.0082, "step": 4255 }, { "epoch": 0.8235294117647058, "grad_norm": 0.0818532258272171, "learning_rate": 9.860183927983331e-05, "loss": 0.0116, "step": 4256 }, { "epoch": 0.8237229102167183, "grad_norm": 0.09770959615707397, "learning_rate": 9.86011605575889e-05, "loss": 0.008, "step": 4257 }, { "epoch": 0.8239164086687306, "grad_norm": 0.09925713390111923, "learning_rate": 9.860048167324613e-05, "loss": 0.0096, "step": 4258 }, { "epoch": 0.8241099071207431, "grad_norm": 0.16519401967525482, "learning_rate": 9.859980262680749e-05, "loss": 0.0095, "step": 4259 }, { "epoch": 0.8243034055727554, "grad_norm": 0.08686315268278122, "learning_rate": 9.859912341827557e-05, "loss": 0.0099, "step": 4260 }, { "epoch": 0.8244969040247678, "grad_norm": 0.11043137311935425, "learning_rate": 9.859844404765281e-05, "loss": 0.011, "step": 4261 }, { "epoch": 0.8246904024767802, "grad_norm": 0.13863693177700043, "learning_rate": 9.859776451494181e-05, "loss": 0.0102, "step": 4262 }, { "epoch": 0.8248839009287926, "grad_norm": 0.0887213945388794, "learning_rate": 9.859708482014504e-05, "loss": 0.0112, "step": 4263 }, { "epoch": 0.8250773993808049, "grad_norm": 0.1389428675174713, "learning_rate": 9.859640496326505e-05, "loss": 0.011, "step": 4264 }, { "epoch": 0.8252708978328174, "grad_norm": 0.054646655917167664, "learning_rate": 9.859572494430438e-05, "loss": 0.0105, "step": 4265 }, { "epoch": 0.8254643962848297, "grad_norm": 0.15182337164878845, "learning_rate": 9.859504476326554e-05, "loss": 0.0096, "step": 4266 }, { "epoch": 0.8256578947368421, "grad_norm": 0.09367413073778152, "learning_rate": 9.859436442015106e-05, "loss": 0.01, "step": 4267 }, { "epoch": 0.8258513931888545, "grad_norm": 0.16370120644569397, "learning_rate": 9.859368391496349e-05, "loss": 0.0103, "step": 4268 }, { "epoch": 0.8260448916408669, "grad_norm": 0.11964668333530426, "learning_rate": 9.859300324770534e-05, "loss": 0.0088, "step": 4269 }, { "epoch": 0.8262383900928792, "grad_norm": 0.11150167137384415, "learning_rate": 9.859232241837915e-05, "loss": 0.013, "step": 4270 }, { "epoch": 0.8264318885448917, "grad_norm": 0.18359598517417908, "learning_rate": 9.859164142698743e-05, "loss": 0.0072, "step": 4271 }, { "epoch": 0.826625386996904, "grad_norm": 0.07788367569446564, "learning_rate": 9.859096027353275e-05, "loss": 0.0106, "step": 4272 }, { "epoch": 0.8268188854489165, "grad_norm": 0.1733231544494629, "learning_rate": 9.85902789580176e-05, "loss": 0.0102, "step": 4273 }, { "epoch": 0.8270123839009288, "grad_norm": 0.08431868255138397, "learning_rate": 9.858959748044456e-05, "loss": 0.0098, "step": 4274 }, { "epoch": 0.8272058823529411, "grad_norm": 0.11817189306020737, "learning_rate": 9.85889158408161e-05, "loss": 0.0107, "step": 4275 }, { "epoch": 0.8273993808049536, "grad_norm": 0.11415275186300278, "learning_rate": 9.858823403913483e-05, "loss": 0.0104, "step": 4276 }, { "epoch": 0.8275928792569659, "grad_norm": 0.091512531042099, "learning_rate": 9.858755207540324e-05, "loss": 0.0117, "step": 4277 }, { "epoch": 0.8277863777089783, "grad_norm": 0.11802586913108826, "learning_rate": 9.858686994962385e-05, "loss": 0.0121, "step": 4278 }, { "epoch": 0.8279798761609907, "grad_norm": 0.08195387572050095, "learning_rate": 9.858618766179923e-05, "loss": 0.0108, "step": 4279 }, { "epoch": 0.8281733746130031, "grad_norm": 0.0917142704129219, "learning_rate": 9.858550521193189e-05, "loss": 0.0105, "step": 4280 }, { "epoch": 0.8283668730650154, "grad_norm": 0.09105952829122543, "learning_rate": 9.858482260002441e-05, "loss": 0.0103, "step": 4281 }, { "epoch": 0.8285603715170279, "grad_norm": 0.09110403060913086, "learning_rate": 9.858413982607928e-05, "loss": 0.0104, "step": 4282 }, { "epoch": 0.8287538699690402, "grad_norm": 0.10584378987550735, "learning_rate": 9.858345689009907e-05, "loss": 0.0082, "step": 4283 }, { "epoch": 0.8289473684210527, "grad_norm": 0.0445079430937767, "learning_rate": 9.858277379208629e-05, "loss": 0.0104, "step": 4284 }, { "epoch": 0.829140866873065, "grad_norm": 0.09843529015779495, "learning_rate": 9.858209053204352e-05, "loss": 0.0087, "step": 4285 }, { "epoch": 0.8293343653250774, "grad_norm": 0.05296734720468521, "learning_rate": 9.858140710997324e-05, "loss": 0.0098, "step": 4286 }, { "epoch": 0.8295278637770898, "grad_norm": 0.057183604687452316, "learning_rate": 9.858072352587805e-05, "loss": 0.0098, "step": 4287 }, { "epoch": 0.8297213622291022, "grad_norm": 0.06888888776302338, "learning_rate": 9.858003977976045e-05, "loss": 0.0107, "step": 4288 }, { "epoch": 0.8299148606811145, "grad_norm": 0.0522567555308342, "learning_rate": 9.857935587162304e-05, "loss": 0.0096, "step": 4289 }, { "epoch": 0.830108359133127, "grad_norm": 0.08866395801305771, "learning_rate": 9.857867180146829e-05, "loss": 0.0096, "step": 4290 }, { "epoch": 0.8303018575851393, "grad_norm": 0.08873932808637619, "learning_rate": 9.857798756929879e-05, "loss": 0.0089, "step": 4291 }, { "epoch": 0.8304953560371517, "grad_norm": 0.07178856432437897, "learning_rate": 9.857730317511707e-05, "loss": 0.0075, "step": 4292 }, { "epoch": 0.8306888544891641, "grad_norm": 0.07504703104496002, "learning_rate": 9.857661861892567e-05, "loss": 0.011, "step": 4293 }, { "epoch": 0.8308823529411765, "grad_norm": 0.06710601598024368, "learning_rate": 9.857593390072713e-05, "loss": 0.0096, "step": 4294 }, { "epoch": 0.8310758513931888, "grad_norm": 0.08017895370721817, "learning_rate": 9.857524902052402e-05, "loss": 0.0106, "step": 4295 }, { "epoch": 0.8312693498452013, "grad_norm": 0.0942356288433075, "learning_rate": 9.857456397831886e-05, "loss": 0.01, "step": 4296 }, { "epoch": 0.8314628482972136, "grad_norm": 0.06225818768143654, "learning_rate": 9.857387877411422e-05, "loss": 0.011, "step": 4297 }, { "epoch": 0.8316563467492261, "grad_norm": 0.07784291356801987, "learning_rate": 9.857319340791262e-05, "loss": 0.0091, "step": 4298 }, { "epoch": 0.8318498452012384, "grad_norm": 0.04295644164085388, "learning_rate": 9.857250787971665e-05, "loss": 0.0097, "step": 4299 }, { "epoch": 0.8320433436532507, "grad_norm": 0.09804605692625046, "learning_rate": 9.857182218952881e-05, "loss": 0.0106, "step": 4300 }, { "epoch": 0.8322368421052632, "grad_norm": 0.03948545828461647, "learning_rate": 9.857113633735168e-05, "loss": 0.0093, "step": 4301 }, { "epoch": 0.8324303405572755, "grad_norm": 0.09841268509626389, "learning_rate": 9.85704503231878e-05, "loss": 0.0101, "step": 4302 }, { "epoch": 0.8326238390092879, "grad_norm": 0.08407093584537506, "learning_rate": 9.856976414703971e-05, "loss": 0.0109, "step": 4303 }, { "epoch": 0.8328173374613003, "grad_norm": 0.15313801169395447, "learning_rate": 9.856907780890999e-05, "loss": 0.0084, "step": 4304 }, { "epoch": 0.8330108359133127, "grad_norm": 0.04758675768971443, "learning_rate": 9.856839130880115e-05, "loss": 0.01, "step": 4305 }, { "epoch": 0.833204334365325, "grad_norm": 0.0956466794013977, "learning_rate": 9.856770464671578e-05, "loss": 0.0104, "step": 4306 }, { "epoch": 0.8333978328173375, "grad_norm": 0.08376257121562958, "learning_rate": 9.856701782265644e-05, "loss": 0.0099, "step": 4307 }, { "epoch": 0.8335913312693498, "grad_norm": 0.09200667589902878, "learning_rate": 9.856633083662564e-05, "loss": 0.0074, "step": 4308 }, { "epoch": 0.8337848297213623, "grad_norm": 0.08672535419464111, "learning_rate": 9.856564368862596e-05, "loss": 0.0095, "step": 4309 }, { "epoch": 0.8339783281733746, "grad_norm": 0.09533509612083435, "learning_rate": 9.856495637865996e-05, "loss": 0.0088, "step": 4310 }, { "epoch": 0.834171826625387, "grad_norm": 0.08946563303470612, "learning_rate": 9.856426890673019e-05, "loss": 0.0086, "step": 4311 }, { "epoch": 0.8343653250773994, "grad_norm": 0.09286832809448242, "learning_rate": 9.856358127283919e-05, "loss": 0.0108, "step": 4312 }, { "epoch": 0.8345588235294118, "grad_norm": 0.139421284198761, "learning_rate": 9.856289347698952e-05, "loss": 0.0105, "step": 4313 }, { "epoch": 0.8347523219814241, "grad_norm": 0.12027264386415482, "learning_rate": 9.856220551918377e-05, "loss": 0.0108, "step": 4314 }, { "epoch": 0.8349458204334366, "grad_norm": 0.1447397917509079, "learning_rate": 9.856151739942446e-05, "loss": 0.0103, "step": 4315 }, { "epoch": 0.8351393188854489, "grad_norm": 0.1086437925696373, "learning_rate": 9.856082911771417e-05, "loss": 0.0103, "step": 4316 }, { "epoch": 0.8353328173374613, "grad_norm": 0.1420959085226059, "learning_rate": 9.856014067405544e-05, "loss": 0.0085, "step": 4317 }, { "epoch": 0.8355263157894737, "grad_norm": 0.11495339125394821, "learning_rate": 9.855945206845086e-05, "loss": 0.0098, "step": 4318 }, { "epoch": 0.8357198142414861, "grad_norm": 0.14700591564178467, "learning_rate": 9.855876330090295e-05, "loss": 0.0099, "step": 4319 }, { "epoch": 0.8359133126934984, "grad_norm": 0.10654015839099884, "learning_rate": 9.85580743714143e-05, "loss": 0.0114, "step": 4320 }, { "epoch": 0.8361068111455109, "grad_norm": 0.16417571902275085, "learning_rate": 9.855738527998746e-05, "loss": 0.0113, "step": 4321 }, { "epoch": 0.8363003095975232, "grad_norm": 0.048563867807388306, "learning_rate": 9.855669602662499e-05, "loss": 0.0087, "step": 4322 }, { "epoch": 0.8364938080495357, "grad_norm": 0.15863579511642456, "learning_rate": 9.855600661132947e-05, "loss": 0.0123, "step": 4323 }, { "epoch": 0.836687306501548, "grad_norm": 0.030145589262247086, "learning_rate": 9.855531703410345e-05, "loss": 0.0087, "step": 4324 }, { "epoch": 0.8368808049535603, "grad_norm": 0.15904571115970612, "learning_rate": 9.855462729494947e-05, "loss": 0.0107, "step": 4325 }, { "epoch": 0.8370743034055728, "grad_norm": 0.06144387647509575, "learning_rate": 9.855393739387014e-05, "loss": 0.0104, "step": 4326 }, { "epoch": 0.8372678018575851, "grad_norm": 0.13170403242111206, "learning_rate": 9.855324733086799e-05, "loss": 0.008, "step": 4327 }, { "epoch": 0.8374613003095975, "grad_norm": 0.07413952052593231, "learning_rate": 9.85525571059456e-05, "loss": 0.009, "step": 4328 }, { "epoch": 0.8376547987616099, "grad_norm": 0.1472153514623642, "learning_rate": 9.855186671910554e-05, "loss": 0.0082, "step": 4329 }, { "epoch": 0.8378482972136223, "grad_norm": 0.07533664256334305, "learning_rate": 9.855117617035037e-05, "loss": 0.0094, "step": 4330 }, { "epoch": 0.8380417956656346, "grad_norm": 0.15626533329486847, "learning_rate": 9.855048545968268e-05, "loss": 0.01, "step": 4331 }, { "epoch": 0.8382352941176471, "grad_norm": 0.07408600300550461, "learning_rate": 9.854979458710498e-05, "loss": 0.0103, "step": 4332 }, { "epoch": 0.8384287925696594, "grad_norm": 0.1557457447052002, "learning_rate": 9.85491035526199e-05, "loss": 0.0109, "step": 4333 }, { "epoch": 0.8386222910216719, "grad_norm": 0.11829877644777298, "learning_rate": 9.854841235622996e-05, "loss": 0.0084, "step": 4334 }, { "epoch": 0.8388157894736842, "grad_norm": 0.1416310966014862, "learning_rate": 9.854772099793778e-05, "loss": 0.0113, "step": 4335 }, { "epoch": 0.8390092879256966, "grad_norm": 0.1544608175754547, "learning_rate": 9.854702947774588e-05, "loss": 0.0131, "step": 4336 }, { "epoch": 0.839202786377709, "grad_norm": 0.16458110511302948, "learning_rate": 9.854633779565686e-05, "loss": 0.0098, "step": 4337 }, { "epoch": 0.8393962848297214, "grad_norm": 0.17049124836921692, "learning_rate": 9.85456459516733e-05, "loss": 0.009, "step": 4338 }, { "epoch": 0.8395897832817337, "grad_norm": 0.16743357479572296, "learning_rate": 9.854495394579775e-05, "loss": 0.0106, "step": 4339 }, { "epoch": 0.8397832817337462, "grad_norm": 0.13495516777038574, "learning_rate": 9.854426177803279e-05, "loss": 0.0094, "step": 4340 }, { "epoch": 0.8399767801857585, "grad_norm": 0.1252715289592743, "learning_rate": 9.854356944838098e-05, "loss": 0.0104, "step": 4341 }, { "epoch": 0.8401702786377709, "grad_norm": 0.16601970791816711, "learning_rate": 9.854287695684493e-05, "loss": 0.0113, "step": 4342 }, { "epoch": 0.8403637770897833, "grad_norm": 0.08712605386972427, "learning_rate": 9.854218430342718e-05, "loss": 0.0092, "step": 4343 }, { "epoch": 0.8405572755417957, "grad_norm": 0.17724861204624176, "learning_rate": 9.854149148813034e-05, "loss": 0.0103, "step": 4344 }, { "epoch": 0.840750773993808, "grad_norm": 0.0700681135058403, "learning_rate": 9.854079851095693e-05, "loss": 0.0075, "step": 4345 }, { "epoch": 0.8409442724458205, "grad_norm": 0.15749727189540863, "learning_rate": 9.854010537190958e-05, "loss": 0.0096, "step": 4346 }, { "epoch": 0.8411377708978328, "grad_norm": 0.09013498574495316, "learning_rate": 9.853941207099083e-05, "loss": 0.0079, "step": 4347 }, { "epoch": 0.8413312693498453, "grad_norm": 0.1657254546880722, "learning_rate": 9.853871860820331e-05, "loss": 0.0097, "step": 4348 }, { "epoch": 0.8415247678018576, "grad_norm": 0.10423899441957474, "learning_rate": 9.853802498354953e-05, "loss": 0.0076, "step": 4349 }, { "epoch": 0.8417182662538699, "grad_norm": 0.1447519212961197, "learning_rate": 9.853733119703212e-05, "loss": 0.0086, "step": 4350 }, { "epoch": 0.8419117647058824, "grad_norm": 0.0734078586101532, "learning_rate": 9.853663724865362e-05, "loss": 0.0087, "step": 4351 }, { "epoch": 0.8421052631578947, "grad_norm": 0.13186629116535187, "learning_rate": 9.853594313841664e-05, "loss": 0.007, "step": 4352 }, { "epoch": 0.8422987616099071, "grad_norm": 0.07360105961561203, "learning_rate": 9.853524886632375e-05, "loss": 0.0077, "step": 4353 }, { "epoch": 0.8424922600619195, "grad_norm": 0.08733031153678894, "learning_rate": 9.853455443237755e-05, "loss": 0.0112, "step": 4354 }, { "epoch": 0.8426857585139319, "grad_norm": 0.10169778019189835, "learning_rate": 9.853385983658058e-05, "loss": 0.0092, "step": 4355 }, { "epoch": 0.8428792569659442, "grad_norm": 0.08669862896203995, "learning_rate": 9.853316507893545e-05, "loss": 0.0107, "step": 4356 }, { "epoch": 0.8430727554179567, "grad_norm": 0.13893623650074005, "learning_rate": 9.853247015944474e-05, "loss": 0.0104, "step": 4357 }, { "epoch": 0.843266253869969, "grad_norm": 0.05685348063707352, "learning_rate": 9.853177507811105e-05, "loss": 0.0115, "step": 4358 }, { "epoch": 0.8434597523219814, "grad_norm": 0.10821937769651413, "learning_rate": 9.853107983493693e-05, "loss": 0.0092, "step": 4359 }, { "epoch": 0.8436532507739938, "grad_norm": 0.04981356859207153, "learning_rate": 9.853038442992498e-05, "loss": 0.01, "step": 4360 }, { "epoch": 0.8438467492260062, "grad_norm": 0.10426722466945648, "learning_rate": 9.852968886307779e-05, "loss": 0.0104, "step": 4361 }, { "epoch": 0.8440402476780186, "grad_norm": 0.06406710296869278, "learning_rate": 9.852899313439795e-05, "loss": 0.0113, "step": 4362 }, { "epoch": 0.844233746130031, "grad_norm": 0.11586116999387741, "learning_rate": 9.852829724388803e-05, "loss": 0.0107, "step": 4363 }, { "epoch": 0.8444272445820433, "grad_norm": 0.06253334879875183, "learning_rate": 9.852760119155063e-05, "loss": 0.0108, "step": 4364 }, { "epoch": 0.8446207430340558, "grad_norm": 0.13785944879055023, "learning_rate": 9.852690497738833e-05, "loss": 0.0098, "step": 4365 }, { "epoch": 0.8448142414860681, "grad_norm": 0.04436851665377617, "learning_rate": 9.852620860140374e-05, "loss": 0.0105, "step": 4366 }, { "epoch": 0.8450077399380805, "grad_norm": 0.14470431208610535, "learning_rate": 9.852551206359942e-05, "loss": 0.0099, "step": 4367 }, { "epoch": 0.8452012383900929, "grad_norm": 0.07815732061862946, "learning_rate": 9.852481536397796e-05, "loss": 0.0084, "step": 4368 }, { "epoch": 0.8453947368421053, "grad_norm": 0.13834814727306366, "learning_rate": 9.852411850254196e-05, "loss": 0.011, "step": 4369 }, { "epoch": 0.8455882352941176, "grad_norm": 0.07268329709768295, "learning_rate": 9.852342147929403e-05, "loss": 0.01, "step": 4370 }, { "epoch": 0.8457817337461301, "grad_norm": 0.13386018574237823, "learning_rate": 9.852272429423675e-05, "loss": 0.011, "step": 4371 }, { "epoch": 0.8459752321981424, "grad_norm": 0.09861061722040176, "learning_rate": 9.852202694737268e-05, "loss": 0.0109, "step": 4372 }, { "epoch": 0.8461687306501547, "grad_norm": 0.11913572996854782, "learning_rate": 9.852132943870445e-05, "loss": 0.0094, "step": 4373 }, { "epoch": 0.8463622291021672, "grad_norm": 0.1542474925518036, "learning_rate": 9.852063176823464e-05, "loss": 0.0092, "step": 4374 }, { "epoch": 0.8465557275541795, "grad_norm": 0.12201674282550812, "learning_rate": 9.851993393596583e-05, "loss": 0.0113, "step": 4375 }, { "epoch": 0.846749226006192, "grad_norm": 0.1400933414697647, "learning_rate": 9.851923594190066e-05, "loss": 0.0097, "step": 4376 }, { "epoch": 0.8469427244582043, "grad_norm": 0.11578204482793808, "learning_rate": 9.851853778604167e-05, "loss": 0.0115, "step": 4377 }, { "epoch": 0.8471362229102167, "grad_norm": 0.10872343182563782, "learning_rate": 9.851783946839151e-05, "loss": 0.0125, "step": 4378 }, { "epoch": 0.8473297213622291, "grad_norm": 0.16156722605228424, "learning_rate": 9.851714098895272e-05, "loss": 0.0085, "step": 4379 }, { "epoch": 0.8475232198142415, "grad_norm": 0.07243921607732773, "learning_rate": 9.851644234772792e-05, "loss": 0.0097, "step": 4380 }, { "epoch": 0.8477167182662538, "grad_norm": 0.13292910158634186, "learning_rate": 9.85157435447197e-05, "loss": 0.008, "step": 4381 }, { "epoch": 0.8479102167182663, "grad_norm": 0.07968098670244217, "learning_rate": 9.851504457993069e-05, "loss": 0.0118, "step": 4382 }, { "epoch": 0.8481037151702786, "grad_norm": 0.14682230353355408, "learning_rate": 9.851434545336348e-05, "loss": 0.011, "step": 4383 }, { "epoch": 0.848297213622291, "grad_norm": 0.1070711612701416, "learning_rate": 9.851364616502063e-05, "loss": 0.01, "step": 4384 }, { "epoch": 0.8484907120743034, "grad_norm": 0.14343611896038055, "learning_rate": 9.851294671490476e-05, "loss": 0.0107, "step": 4385 }, { "epoch": 0.8486842105263158, "grad_norm": 0.12385246902704239, "learning_rate": 9.85122471030185e-05, "loss": 0.0098, "step": 4386 }, { "epoch": 0.8488777089783281, "grad_norm": 0.1276194006204605, "learning_rate": 9.851154732936441e-05, "loss": 0.0102, "step": 4387 }, { "epoch": 0.8490712074303406, "grad_norm": 0.08829089254140854, "learning_rate": 9.851084739394511e-05, "loss": 0.0114, "step": 4388 }, { "epoch": 0.8492647058823529, "grad_norm": 0.13048803806304932, "learning_rate": 9.851014729676322e-05, "loss": 0.0088, "step": 4389 }, { "epoch": 0.8494582043343654, "grad_norm": 0.05630185082554817, "learning_rate": 9.850944703782131e-05, "loss": 0.0095, "step": 4390 }, { "epoch": 0.8496517027863777, "grad_norm": 0.10135270655155182, "learning_rate": 9.8508746617122e-05, "loss": 0.0105, "step": 4391 }, { "epoch": 0.8498452012383901, "grad_norm": 0.10913833230733871, "learning_rate": 9.850804603466787e-05, "loss": 0.0078, "step": 4392 }, { "epoch": 0.8500386996904025, "grad_norm": 0.08527091890573502, "learning_rate": 9.850734529046157e-05, "loss": 0.0081, "step": 4393 }, { "epoch": 0.8502321981424149, "grad_norm": 0.15424422919750214, "learning_rate": 9.850664438450567e-05, "loss": 0.0083, "step": 4394 }, { "epoch": 0.8504256965944272, "grad_norm": 0.06219438835978508, "learning_rate": 9.85059433168028e-05, "loss": 0.0095, "step": 4395 }, { "epoch": 0.8506191950464397, "grad_norm": 0.13284720480442047, "learning_rate": 9.850524208735553e-05, "loss": 0.0108, "step": 4396 }, { "epoch": 0.850812693498452, "grad_norm": 0.04365675896406174, "learning_rate": 9.85045406961665e-05, "loss": 0.0085, "step": 4397 }, { "epoch": 0.8510061919504643, "grad_norm": 0.0928795337677002, "learning_rate": 9.850383914323832e-05, "loss": 0.0072, "step": 4398 }, { "epoch": 0.8511996904024768, "grad_norm": 0.06211601197719574, "learning_rate": 9.850313742857357e-05, "loss": 0.0082, "step": 4399 }, { "epoch": 0.8513931888544891, "grad_norm": 0.14674727618694305, "learning_rate": 9.850243555217487e-05, "loss": 0.0094, "step": 4400 }, { "epoch": 0.8515866873065016, "grad_norm": 0.05948290973901749, "learning_rate": 9.850173351404484e-05, "loss": 0.0103, "step": 4401 }, { "epoch": 0.8517801857585139, "grad_norm": 0.15637336671352386, "learning_rate": 9.85010313141861e-05, "loss": 0.0092, "step": 4402 }, { "epoch": 0.8519736842105263, "grad_norm": 0.08175824582576752, "learning_rate": 9.850032895260122e-05, "loss": 0.0104, "step": 4403 }, { "epoch": 0.8521671826625387, "grad_norm": 0.12806637585163116, "learning_rate": 9.849962642929286e-05, "loss": 0.0082, "step": 4404 }, { "epoch": 0.8523606811145511, "grad_norm": 0.11601667106151581, "learning_rate": 9.84989237442636e-05, "loss": 0.0111, "step": 4405 }, { "epoch": 0.8525541795665634, "grad_norm": 0.07982384413480759, "learning_rate": 9.849822089751605e-05, "loss": 0.0083, "step": 4406 }, { "epoch": 0.8527476780185759, "grad_norm": 0.15427711606025696, "learning_rate": 9.849751788905284e-05, "loss": 0.0101, "step": 4407 }, { "epoch": 0.8529411764705882, "grad_norm": 0.05575298145413399, "learning_rate": 9.849681471887656e-05, "loss": 0.0108, "step": 4408 }, { "epoch": 0.8531346749226006, "grad_norm": 0.12443205714225769, "learning_rate": 9.849611138698987e-05, "loss": 0.0097, "step": 4409 }, { "epoch": 0.853328173374613, "grad_norm": 0.04697538912296295, "learning_rate": 9.849540789339534e-05, "loss": 0.0087, "step": 4410 }, { "epoch": 0.8535216718266254, "grad_norm": 0.09771008789539337, "learning_rate": 9.84947042380956e-05, "loss": 0.0101, "step": 4411 }, { "epoch": 0.8537151702786377, "grad_norm": 0.04962759092450142, "learning_rate": 9.849400042109326e-05, "loss": 0.0105, "step": 4412 }, { "epoch": 0.8539086687306502, "grad_norm": 0.09450335800647736, "learning_rate": 9.849329644239096e-05, "loss": 0.0102, "step": 4413 }, { "epoch": 0.8541021671826625, "grad_norm": 0.043140362948179245, "learning_rate": 9.84925923019913e-05, "loss": 0.0116, "step": 4414 }, { "epoch": 0.854295665634675, "grad_norm": 0.08879069983959198, "learning_rate": 9.849188799989688e-05, "loss": 0.0093, "step": 4415 }, { "epoch": 0.8544891640866873, "grad_norm": 0.029119238257408142, "learning_rate": 9.849118353611036e-05, "loss": 0.0104, "step": 4416 }, { "epoch": 0.8546826625386997, "grad_norm": 0.11698539555072784, "learning_rate": 9.849047891063431e-05, "loss": 0.0116, "step": 4417 }, { "epoch": 0.8548761609907121, "grad_norm": 0.03620848059654236, "learning_rate": 9.84897741234714e-05, "loss": 0.0094, "step": 4418 }, { "epoch": 0.8550696594427245, "grad_norm": 0.09726351499557495, "learning_rate": 9.84890691746242e-05, "loss": 0.0096, "step": 4419 }, { "epoch": 0.8552631578947368, "grad_norm": 0.06526447087526321, "learning_rate": 9.848836406409538e-05, "loss": 0.0085, "step": 4420 }, { "epoch": 0.8554566563467493, "grad_norm": 0.10670195519924164, "learning_rate": 9.848765879188753e-05, "loss": 0.0108, "step": 4421 }, { "epoch": 0.8556501547987616, "grad_norm": 0.059901732951402664, "learning_rate": 9.848695335800329e-05, "loss": 0.01, "step": 4422 }, { "epoch": 0.8558436532507739, "grad_norm": 0.08605176210403442, "learning_rate": 9.848624776244526e-05, "loss": 0.0116, "step": 4423 }, { "epoch": 0.8560371517027864, "grad_norm": 0.0544719360768795, "learning_rate": 9.848554200521608e-05, "loss": 0.011, "step": 4424 }, { "epoch": 0.8562306501547987, "grad_norm": 0.0628667026758194, "learning_rate": 9.848483608631837e-05, "loss": 0.0079, "step": 4425 }, { "epoch": 0.8564241486068112, "grad_norm": 0.10155007988214493, "learning_rate": 9.848413000575477e-05, "loss": 0.01, "step": 4426 }, { "epoch": 0.8566176470588235, "grad_norm": 0.10310208052396774, "learning_rate": 9.848342376352787e-05, "loss": 0.0096, "step": 4427 }, { "epoch": 0.8568111455108359, "grad_norm": 0.09840870648622513, "learning_rate": 9.848271735964031e-05, "loss": 0.0103, "step": 4428 }, { "epoch": 0.8570046439628483, "grad_norm": 0.11619005352258682, "learning_rate": 9.848201079409473e-05, "loss": 0.009, "step": 4429 }, { "epoch": 0.8571981424148607, "grad_norm": 0.09837726503610611, "learning_rate": 9.848130406689376e-05, "loss": 0.0101, "step": 4430 }, { "epoch": 0.857391640866873, "grad_norm": 0.1402502954006195, "learning_rate": 9.848059717804e-05, "loss": 0.0091, "step": 4431 }, { "epoch": 0.8575851393188855, "grad_norm": 0.09936097264289856, "learning_rate": 9.84798901275361e-05, "loss": 0.0091, "step": 4432 }, { "epoch": 0.8577786377708978, "grad_norm": 0.13992935419082642, "learning_rate": 9.847918291538467e-05, "loss": 0.0104, "step": 4433 }, { "epoch": 0.8579721362229102, "grad_norm": 0.1050349622964859, "learning_rate": 9.847847554158838e-05, "loss": 0.0094, "step": 4434 }, { "epoch": 0.8581656346749226, "grad_norm": 0.08364086598157883, "learning_rate": 9.847776800614979e-05, "loss": 0.009, "step": 4435 }, { "epoch": 0.858359133126935, "grad_norm": 0.1089058443903923, "learning_rate": 9.84770603090716e-05, "loss": 0.0097, "step": 4436 }, { "epoch": 0.8585526315789473, "grad_norm": 0.08729001134634018, "learning_rate": 9.847635245035641e-05, "loss": 0.0118, "step": 4437 }, { "epoch": 0.8587461300309598, "grad_norm": 0.1337471306324005, "learning_rate": 9.847564443000686e-05, "loss": 0.0102, "step": 4438 }, { "epoch": 0.8589396284829721, "grad_norm": 0.09913847595453262, "learning_rate": 9.847493624802557e-05, "loss": 0.0103, "step": 4439 }, { "epoch": 0.8591331269349846, "grad_norm": 0.11610803753137589, "learning_rate": 9.847422790441517e-05, "loss": 0.0091, "step": 4440 }, { "epoch": 0.8593266253869969, "grad_norm": 0.03886020556092262, "learning_rate": 9.847351939917831e-05, "loss": 0.0094, "step": 4441 }, { "epoch": 0.8595201238390093, "grad_norm": 0.12777328491210938, "learning_rate": 9.847281073231763e-05, "loss": 0.0102, "step": 4442 }, { "epoch": 0.8597136222910217, "grad_norm": 0.05274071544408798, "learning_rate": 9.847210190383574e-05, "loss": 0.0106, "step": 4443 }, { "epoch": 0.8599071207430341, "grad_norm": 0.09580298513174057, "learning_rate": 9.847139291373527e-05, "loss": 0.0089, "step": 4444 }, { "epoch": 0.8601006191950464, "grad_norm": 0.08908598870038986, "learning_rate": 9.84706837620189e-05, "loss": 0.0097, "step": 4445 }, { "epoch": 0.8602941176470589, "grad_norm": 0.07581286877393723, "learning_rate": 9.846997444868923e-05, "loss": 0.01, "step": 4446 }, { "epoch": 0.8604876160990712, "grad_norm": 0.12848776578903198, "learning_rate": 9.84692649737489e-05, "loss": 0.011, "step": 4447 }, { "epoch": 0.8606811145510835, "grad_norm": 0.08022555708885193, "learning_rate": 9.846855533720055e-05, "loss": 0.0111, "step": 4448 }, { "epoch": 0.860874613003096, "grad_norm": 0.11859019845724106, "learning_rate": 9.846784553904684e-05, "loss": 0.008, "step": 4449 }, { "epoch": 0.8610681114551083, "grad_norm": 0.08939790725708008, "learning_rate": 9.846713557929038e-05, "loss": 0.0112, "step": 4450 }, { "epoch": 0.8612616099071208, "grad_norm": 0.09366368502378464, "learning_rate": 9.846642545793382e-05, "loss": 0.0103, "step": 4451 }, { "epoch": 0.8614551083591331, "grad_norm": 0.08663507550954819, "learning_rate": 9.84657151749798e-05, "loss": 0.0086, "step": 4452 }, { "epoch": 0.8616486068111455, "grad_norm": 0.12318063527345657, "learning_rate": 9.846500473043097e-05, "loss": 0.0087, "step": 4453 }, { "epoch": 0.8618421052631579, "grad_norm": 0.08465968817472458, "learning_rate": 9.846429412428996e-05, "loss": 0.0096, "step": 4454 }, { "epoch": 0.8620356037151703, "grad_norm": 0.19788753986358643, "learning_rate": 9.846358335655941e-05, "loss": 0.0111, "step": 4455 }, { "epoch": 0.8622291021671826, "grad_norm": 0.056555554270744324, "learning_rate": 9.846287242724197e-05, "loss": 0.0113, "step": 4456 }, { "epoch": 0.8624226006191951, "grad_norm": 0.2072838842868805, "learning_rate": 9.846216133634026e-05, "loss": 0.0103, "step": 4457 }, { "epoch": 0.8626160990712074, "grad_norm": 0.07588866353034973, "learning_rate": 9.846145008385698e-05, "loss": 0.0115, "step": 4458 }, { "epoch": 0.8628095975232198, "grad_norm": 0.15975940227508545, "learning_rate": 9.84607386697947e-05, "loss": 0.0098, "step": 4459 }, { "epoch": 0.8630030959752322, "grad_norm": 0.12187561392784119, "learning_rate": 9.846002709415613e-05, "loss": 0.0101, "step": 4460 }, { "epoch": 0.8631965944272446, "grad_norm": 0.08121882379055023, "learning_rate": 9.845931535694389e-05, "loss": 0.0095, "step": 4461 }, { "epoch": 0.8633900928792569, "grad_norm": 0.10978756844997406, "learning_rate": 9.84586034581606e-05, "loss": 0.0105, "step": 4462 }, { "epoch": 0.8635835913312694, "grad_norm": 0.08013036847114563, "learning_rate": 9.845789139780896e-05, "loss": 0.0081, "step": 4463 }, { "epoch": 0.8637770897832817, "grad_norm": 0.10547763854265213, "learning_rate": 9.845717917589155e-05, "loss": 0.0081, "step": 4464 }, { "epoch": 0.8639705882352942, "grad_norm": 0.09772748500108719, "learning_rate": 9.845646679241109e-05, "loss": 0.0081, "step": 4465 }, { "epoch": 0.8641640866873065, "grad_norm": 0.12373756617307663, "learning_rate": 9.845575424737019e-05, "loss": 0.01, "step": 4466 }, { "epoch": 0.8643575851393189, "grad_norm": 0.09697721898555756, "learning_rate": 9.845504154077149e-05, "loss": 0.0098, "step": 4467 }, { "epoch": 0.8645510835913313, "grad_norm": 0.04048194736242294, "learning_rate": 9.845432867261765e-05, "loss": 0.009, "step": 4468 }, { "epoch": 0.8647445820433437, "grad_norm": 0.06722118705511093, "learning_rate": 9.845361564291134e-05, "loss": 0.009, "step": 4469 }, { "epoch": 0.864938080495356, "grad_norm": 0.04414692148566246, "learning_rate": 9.845290245165518e-05, "loss": 0.0102, "step": 4470 }, { "epoch": 0.8651315789473685, "grad_norm": 0.07539179921150208, "learning_rate": 9.845218909885185e-05, "loss": 0.009, "step": 4471 }, { "epoch": 0.8653250773993808, "grad_norm": 0.03458453342318535, "learning_rate": 9.845147558450399e-05, "loss": 0.0062, "step": 4472 }, { "epoch": 0.8655185758513931, "grad_norm": 0.09548351168632507, "learning_rate": 9.845076190861424e-05, "loss": 0.0094, "step": 4473 }, { "epoch": 0.8657120743034056, "grad_norm": 0.03988087922334671, "learning_rate": 9.845004807118525e-05, "loss": 0.0087, "step": 4474 }, { "epoch": 0.8659055727554179, "grad_norm": 0.08209624886512756, "learning_rate": 9.84493340722197e-05, "loss": 0.0093, "step": 4475 }, { "epoch": 0.8660990712074303, "grad_norm": 0.060444559901952744, "learning_rate": 9.844861991172024e-05, "loss": 0.01, "step": 4476 }, { "epoch": 0.8662925696594427, "grad_norm": 0.07218166440725327, "learning_rate": 9.844790558968951e-05, "loss": 0.0108, "step": 4477 }, { "epoch": 0.8664860681114551, "grad_norm": 0.0761670470237732, "learning_rate": 9.844719110613019e-05, "loss": 0.01, "step": 4478 }, { "epoch": 0.8666795665634675, "grad_norm": 0.10744817554950714, "learning_rate": 9.844647646104489e-05, "loss": 0.0105, "step": 4479 }, { "epoch": 0.8668730650154799, "grad_norm": 0.08518655598163605, "learning_rate": 9.844576165443631e-05, "loss": 0.0091, "step": 4480 }, { "epoch": 0.8670665634674922, "grad_norm": 0.09301792085170746, "learning_rate": 9.844504668630708e-05, "loss": 0.0093, "step": 4481 }, { "epoch": 0.8672600619195047, "grad_norm": 0.06154467165470123, "learning_rate": 9.84443315566599e-05, "loss": 0.0092, "step": 4482 }, { "epoch": 0.867453560371517, "grad_norm": 0.252410352230072, "learning_rate": 9.844361626549738e-05, "loss": 0.0125, "step": 4483 }, { "epoch": 0.8676470588235294, "grad_norm": 0.10266885161399841, "learning_rate": 9.844290081282221e-05, "loss": 0.011, "step": 4484 }, { "epoch": 0.8678405572755418, "grad_norm": 0.28755006194114685, "learning_rate": 9.844218519863704e-05, "loss": 0.0115, "step": 4485 }, { "epoch": 0.8680340557275542, "grad_norm": 0.11571069806814194, "learning_rate": 9.844146942294453e-05, "loss": 0.0087, "step": 4486 }, { "epoch": 0.8682275541795665, "grad_norm": 0.26379719376564026, "learning_rate": 9.844075348574732e-05, "loss": 0.01, "step": 4487 }, { "epoch": 0.868421052631579, "grad_norm": 0.09105381369590759, "learning_rate": 9.84400373870481e-05, "loss": 0.0103, "step": 4488 }, { "epoch": 0.8686145510835913, "grad_norm": 0.2741834819316864, "learning_rate": 9.843932112684954e-05, "loss": 0.0097, "step": 4489 }, { "epoch": 0.8688080495356038, "grad_norm": 0.11645444482564926, "learning_rate": 9.843860470515427e-05, "loss": 0.0097, "step": 4490 }, { "epoch": 0.8690015479876161, "grad_norm": 0.24268491566181183, "learning_rate": 9.843788812196499e-05, "loss": 0.01, "step": 4491 }, { "epoch": 0.8691950464396285, "grad_norm": 0.16803982853889465, "learning_rate": 9.843717137728435e-05, "loss": 0.0127, "step": 4492 }, { "epoch": 0.8693885448916409, "grad_norm": 0.18021085858345032, "learning_rate": 9.843645447111497e-05, "loss": 0.0103, "step": 4493 }, { "epoch": 0.8695820433436533, "grad_norm": 0.1918119192123413, "learning_rate": 9.84357374034596e-05, "loss": 0.0112, "step": 4494 }, { "epoch": 0.8697755417956656, "grad_norm": 0.10817494988441467, "learning_rate": 9.843502017432083e-05, "loss": 0.0109, "step": 4495 }, { "epoch": 0.8699690402476781, "grad_norm": 0.18676747381687164, "learning_rate": 9.843430278370138e-05, "loss": 0.01, "step": 4496 }, { "epoch": 0.8701625386996904, "grad_norm": 0.09587520360946655, "learning_rate": 9.843358523160387e-05, "loss": 0.0094, "step": 4497 }, { "epoch": 0.8703560371517027, "grad_norm": 0.1330089569091797, "learning_rate": 9.843286751803101e-05, "loss": 0.0093, "step": 4498 }, { "epoch": 0.8705495356037152, "grad_norm": 0.09701386839151382, "learning_rate": 9.843214964298546e-05, "loss": 0.0104, "step": 4499 }, { "epoch": 0.8707430340557275, "grad_norm": 0.13518960773944855, "learning_rate": 9.843143160646986e-05, "loss": 0.0086, "step": 4500 }, { "epoch": 0.87093653250774, "grad_norm": 0.13650842010974884, "learning_rate": 9.843071340848691e-05, "loss": 0.0084, "step": 4501 }, { "epoch": 0.8711300309597523, "grad_norm": 0.11719492822885513, "learning_rate": 9.842999504903927e-05, "loss": 0.0098, "step": 4502 }, { "epoch": 0.8713235294117647, "grad_norm": 0.14092141389846802, "learning_rate": 9.84292765281296e-05, "loss": 0.0086, "step": 4503 }, { "epoch": 0.871517027863777, "grad_norm": 0.09178800135850906, "learning_rate": 9.842855784576058e-05, "loss": 0.0087, "step": 4504 }, { "epoch": 0.8717105263157895, "grad_norm": 0.12305571138858795, "learning_rate": 9.84278390019349e-05, "loss": 0.009, "step": 4505 }, { "epoch": 0.8719040247678018, "grad_norm": 0.05485406517982483, "learning_rate": 9.84271199966552e-05, "loss": 0.0086, "step": 4506 }, { "epoch": 0.8720975232198143, "grad_norm": 0.08408109843730927, "learning_rate": 9.842640082992418e-05, "loss": 0.0082, "step": 4507 }, { "epoch": 0.8722910216718266, "grad_norm": 0.042208053171634674, "learning_rate": 9.842568150174449e-05, "loss": 0.0091, "step": 4508 }, { "epoch": 0.872484520123839, "grad_norm": 0.05664237588644028, "learning_rate": 9.842496201211882e-05, "loss": 0.0105, "step": 4509 }, { "epoch": 0.8726780185758514, "grad_norm": 0.05130071938037872, "learning_rate": 9.842424236104984e-05, "loss": 0.0099, "step": 4510 }, { "epoch": 0.8728715170278638, "grad_norm": 0.04147406294941902, "learning_rate": 9.842352254854025e-05, "loss": 0.0096, "step": 4511 }, { "epoch": 0.8730650154798761, "grad_norm": 0.05887917801737785, "learning_rate": 9.842280257459268e-05, "loss": 0.0101, "step": 4512 }, { "epoch": 0.8732585139318886, "grad_norm": 0.06814023852348328, "learning_rate": 9.842208243920982e-05, "loss": 0.0122, "step": 4513 }, { "epoch": 0.8734520123839009, "grad_norm": 0.033396705985069275, "learning_rate": 9.842136214239439e-05, "loss": 0.0082, "step": 4514 }, { "epoch": 0.8736455108359134, "grad_norm": 0.0979776680469513, "learning_rate": 9.842064168414901e-05, "loss": 0.0106, "step": 4515 }, { "epoch": 0.8738390092879257, "grad_norm": 0.06319987773895264, "learning_rate": 9.841992106447638e-05, "loss": 0.0115, "step": 4516 }, { "epoch": 0.8740325077399381, "grad_norm": 0.048729896545410156, "learning_rate": 9.84192002833792e-05, "loss": 0.01, "step": 4517 }, { "epoch": 0.8742260061919505, "grad_norm": 0.08355148881673813, "learning_rate": 9.841847934086011e-05, "loss": 0.0109, "step": 4518 }, { "epoch": 0.8744195046439629, "grad_norm": 0.06492578238248825, "learning_rate": 9.841775823692183e-05, "loss": 0.0109, "step": 4519 }, { "epoch": 0.8746130030959752, "grad_norm": 0.08210084587335587, "learning_rate": 9.8417036971567e-05, "loss": 0.0117, "step": 4520 }, { "epoch": 0.8748065015479877, "grad_norm": 0.07719583809375763, "learning_rate": 9.841631554479837e-05, "loss": 0.0107, "step": 4521 }, { "epoch": 0.875, "grad_norm": 0.09540840238332748, "learning_rate": 9.841559395661855e-05, "loss": 0.0103, "step": 4522 }, { "epoch": 0.8751934984520123, "grad_norm": 0.05868422985076904, "learning_rate": 9.841487220703026e-05, "loss": 0.0085, "step": 4523 }, { "epoch": 0.8753869969040248, "grad_norm": 0.05612717196345329, "learning_rate": 9.841415029603614e-05, "loss": 0.0104, "step": 4524 }, { "epoch": 0.8755804953560371, "grad_norm": 0.07269427925348282, "learning_rate": 9.841342822363894e-05, "loss": 0.0106, "step": 4525 }, { "epoch": 0.8757739938080495, "grad_norm": 0.03350799158215523, "learning_rate": 9.84127059898413e-05, "loss": 0.0102, "step": 4526 }, { "epoch": 0.8759674922600619, "grad_norm": 0.09339384734630585, "learning_rate": 9.841198359464592e-05, "loss": 0.0109, "step": 4527 }, { "epoch": 0.8761609907120743, "grad_norm": 0.0486285500228405, "learning_rate": 9.841126103805547e-05, "loss": 0.0097, "step": 4528 }, { "epoch": 0.8763544891640866, "grad_norm": 0.045588169246912, "learning_rate": 9.841053832007266e-05, "loss": 0.0083, "step": 4529 }, { "epoch": 0.8765479876160991, "grad_norm": 0.08156940340995789, "learning_rate": 9.840981544070017e-05, "loss": 0.0099, "step": 4530 }, { "epoch": 0.8767414860681114, "grad_norm": 0.1045239120721817, "learning_rate": 9.840909239994067e-05, "loss": 0.0089, "step": 4531 }, { "epoch": 0.8769349845201239, "grad_norm": 0.07206383347511292, "learning_rate": 9.840836919779686e-05, "loss": 0.01, "step": 4532 }, { "epoch": 0.8771284829721362, "grad_norm": 0.11156872659921646, "learning_rate": 9.840764583427145e-05, "loss": 0.0096, "step": 4533 }, { "epoch": 0.8773219814241486, "grad_norm": 0.08640369772911072, "learning_rate": 9.840692230936708e-05, "loss": 0.0101, "step": 4534 }, { "epoch": 0.877515479876161, "grad_norm": 0.14089690148830414, "learning_rate": 9.840619862308648e-05, "loss": 0.0121, "step": 4535 }, { "epoch": 0.8777089783281734, "grad_norm": 0.11600317060947418, "learning_rate": 9.840547477543234e-05, "loss": 0.0099, "step": 4536 }, { "epoch": 0.8779024767801857, "grad_norm": 0.10598740726709366, "learning_rate": 9.840475076640732e-05, "loss": 0.0106, "step": 4537 }, { "epoch": 0.8780959752321982, "grad_norm": 0.15261957049369812, "learning_rate": 9.840402659601416e-05, "loss": 0.0117, "step": 4538 }, { "epoch": 0.8782894736842105, "grad_norm": 0.15641608834266663, "learning_rate": 9.840330226425551e-05, "loss": 0.0097, "step": 4539 }, { "epoch": 0.878482972136223, "grad_norm": 0.1350310891866684, "learning_rate": 9.840257777113406e-05, "loss": 0.0116, "step": 4540 }, { "epoch": 0.8786764705882353, "grad_norm": 0.13216537237167358, "learning_rate": 9.840185311665254e-05, "loss": 0.0089, "step": 4541 }, { "epoch": 0.8788699690402477, "grad_norm": 0.22257168591022491, "learning_rate": 9.840112830081361e-05, "loss": 0.0106, "step": 4542 }, { "epoch": 0.87906346749226, "grad_norm": 0.15216514468193054, "learning_rate": 9.840040332361999e-05, "loss": 0.0134, "step": 4543 }, { "epoch": 0.8792569659442725, "grad_norm": 0.2683650255203247, "learning_rate": 9.839967818507436e-05, "loss": 0.0101, "step": 4544 }, { "epoch": 0.8794504643962848, "grad_norm": 0.12738698720932007, "learning_rate": 9.839895288517943e-05, "loss": 0.0089, "step": 4545 }, { "epoch": 0.8796439628482973, "grad_norm": 0.2351738065481186, "learning_rate": 9.839822742393788e-05, "loss": 0.0098, "step": 4546 }, { "epoch": 0.8798374613003096, "grad_norm": 0.06193775683641434, "learning_rate": 9.839750180135241e-05, "loss": 0.011, "step": 4547 }, { "epoch": 0.8800309597523219, "grad_norm": 0.23834912478923798, "learning_rate": 9.839677601742573e-05, "loss": 0.0102, "step": 4548 }, { "epoch": 0.8802244582043344, "grad_norm": 0.11196163296699524, "learning_rate": 9.839605007216053e-05, "loss": 0.0104, "step": 4549 }, { "epoch": 0.8804179566563467, "grad_norm": 0.19377319514751434, "learning_rate": 9.839532396555951e-05, "loss": 0.0103, "step": 4550 }, { "epoch": 0.8806114551083591, "grad_norm": 0.21584521234035492, "learning_rate": 9.839459769762535e-05, "loss": 0.0117, "step": 4551 }, { "epoch": 0.8808049535603715, "grad_norm": 0.15353329479694366, "learning_rate": 9.839387126836078e-05, "loss": 0.0094, "step": 4552 }, { "epoch": 0.8809984520123839, "grad_norm": 0.24248340725898743, "learning_rate": 9.83931446777685e-05, "loss": 0.0097, "step": 4553 }, { "epoch": 0.8811919504643962, "grad_norm": 0.056810468435287476, "learning_rate": 9.839241792585118e-05, "loss": 0.01, "step": 4554 }, { "epoch": 0.8813854489164087, "grad_norm": 0.25270164012908936, "learning_rate": 9.839169101261157e-05, "loss": 0.0121, "step": 4555 }, { "epoch": 0.881578947368421, "grad_norm": 0.07536045461893082, "learning_rate": 9.839096393805233e-05, "loss": 0.0094, "step": 4556 }, { "epoch": 0.8817724458204335, "grad_norm": 0.19331485033035278, "learning_rate": 9.839023670217617e-05, "loss": 0.0109, "step": 4557 }, { "epoch": 0.8819659442724458, "grad_norm": 0.10898278653621674, "learning_rate": 9.838950930498581e-05, "loss": 0.0097, "step": 4558 }, { "epoch": 0.8821594427244582, "grad_norm": 0.149475559592247, "learning_rate": 9.838878174648393e-05, "loss": 0.0089, "step": 4559 }, { "epoch": 0.8823529411764706, "grad_norm": 0.10654472559690475, "learning_rate": 9.838805402667328e-05, "loss": 0.0119, "step": 4560 }, { "epoch": 0.882546439628483, "grad_norm": 0.10374388843774796, "learning_rate": 9.838732614555653e-05, "loss": 0.0114, "step": 4561 }, { "epoch": 0.8827399380804953, "grad_norm": 0.085910864174366, "learning_rate": 9.838659810313638e-05, "loss": 0.0102, "step": 4562 }, { "epoch": 0.8829334365325078, "grad_norm": 0.11548808962106705, "learning_rate": 9.838586989941555e-05, "loss": 0.0109, "step": 4563 }, { "epoch": 0.8831269349845201, "grad_norm": 0.08301571011543274, "learning_rate": 9.838514153439675e-05, "loss": 0.011, "step": 4564 }, { "epoch": 0.8833204334365325, "grad_norm": 0.09477119892835617, "learning_rate": 9.838441300808269e-05, "loss": 0.0107, "step": 4565 }, { "epoch": 0.8835139318885449, "grad_norm": 0.08198019862174988, "learning_rate": 9.838368432047604e-05, "loss": 0.0087, "step": 4566 }, { "epoch": 0.8837074303405573, "grad_norm": 0.08105877041816711, "learning_rate": 9.838295547157959e-05, "loss": 0.0094, "step": 4567 }, { "epoch": 0.8839009287925697, "grad_norm": 0.06740119308233261, "learning_rate": 9.838222646139598e-05, "loss": 0.0087, "step": 4568 }, { "epoch": 0.8840944272445821, "grad_norm": 0.086164191365242, "learning_rate": 9.838149728992792e-05, "loss": 0.0094, "step": 4569 }, { "epoch": 0.8842879256965944, "grad_norm": 0.07426311820745468, "learning_rate": 9.838076795717817e-05, "loss": 0.0101, "step": 4570 }, { "epoch": 0.8844814241486069, "grad_norm": 0.07292091846466064, "learning_rate": 9.83800384631494e-05, "loss": 0.0081, "step": 4571 }, { "epoch": 0.8846749226006192, "grad_norm": 0.12897388637065887, "learning_rate": 9.837930880784436e-05, "loss": 0.0107, "step": 4572 }, { "epoch": 0.8848684210526315, "grad_norm": 0.10527965426445007, "learning_rate": 9.837857899126571e-05, "loss": 0.0091, "step": 4573 }, { "epoch": 0.885061919504644, "grad_norm": 0.11111228168010712, "learning_rate": 9.83778490134162e-05, "loss": 0.0096, "step": 4574 }, { "epoch": 0.8852554179566563, "grad_norm": 0.09576696902513504, "learning_rate": 9.837711887429854e-05, "loss": 0.0113, "step": 4575 }, { "epoch": 0.8854489164086687, "grad_norm": 0.12676580250263214, "learning_rate": 9.837638857391544e-05, "loss": 0.0079, "step": 4576 }, { "epoch": 0.8856424148606811, "grad_norm": 0.12155836075544357, "learning_rate": 9.837565811226962e-05, "loss": 0.0105, "step": 4577 }, { "epoch": 0.8858359133126935, "grad_norm": 0.09020591527223587, "learning_rate": 9.83749274893638e-05, "loss": 0.0104, "step": 4578 }, { "epoch": 0.8860294117647058, "grad_norm": 0.20143333077430725, "learning_rate": 9.837419670520066e-05, "loss": 0.0089, "step": 4579 }, { "epoch": 0.8862229102167183, "grad_norm": 0.1052129790186882, "learning_rate": 9.837346575978296e-05, "loss": 0.0112, "step": 4580 }, { "epoch": 0.8864164086687306, "grad_norm": 0.1549261212348938, "learning_rate": 9.83727346531134e-05, "loss": 0.0114, "step": 4581 }, { "epoch": 0.8866099071207431, "grad_norm": 0.06688253581523895, "learning_rate": 9.83720033851947e-05, "loss": 0.0098, "step": 4582 }, { "epoch": 0.8868034055727554, "grad_norm": 0.13035283982753754, "learning_rate": 9.837127195602958e-05, "loss": 0.0087, "step": 4583 }, { "epoch": 0.8869969040247678, "grad_norm": 0.07260839641094208, "learning_rate": 9.837054036562076e-05, "loss": 0.0118, "step": 4584 }, { "epoch": 0.8871904024767802, "grad_norm": 0.12110265344381332, "learning_rate": 9.836980861397095e-05, "loss": 0.0101, "step": 4585 }, { "epoch": 0.8873839009287926, "grad_norm": 0.0708489716053009, "learning_rate": 9.836907670108287e-05, "loss": 0.0096, "step": 4586 }, { "epoch": 0.8875773993808049, "grad_norm": 0.14015476405620575, "learning_rate": 9.836834462695927e-05, "loss": 0.0099, "step": 4587 }, { "epoch": 0.8877708978328174, "grad_norm": 0.09297148138284683, "learning_rate": 9.836761239160284e-05, "loss": 0.0083, "step": 4588 }, { "epoch": 0.8879643962848297, "grad_norm": 0.11030396819114685, "learning_rate": 9.836687999501631e-05, "loss": 0.0119, "step": 4589 }, { "epoch": 0.8881578947368421, "grad_norm": 0.19707605242729187, "learning_rate": 9.836614743720242e-05, "loss": 0.009, "step": 4590 }, { "epoch": 0.8883513931888545, "grad_norm": 0.09420674294233322, "learning_rate": 9.836541471816387e-05, "loss": 0.0089, "step": 4591 }, { "epoch": 0.8885448916408669, "grad_norm": 0.1926356703042984, "learning_rate": 9.83646818379034e-05, "loss": 0.0073, "step": 4592 }, { "epoch": 0.8887383900928792, "grad_norm": 0.18112412095069885, "learning_rate": 9.836394879642372e-05, "loss": 0.0098, "step": 4593 }, { "epoch": 0.8889318885448917, "grad_norm": 0.19884899258613586, "learning_rate": 9.836321559372757e-05, "loss": 0.009, "step": 4594 }, { "epoch": 0.889125386996904, "grad_norm": 0.25296151638031006, "learning_rate": 9.836248222981766e-05, "loss": 0.009, "step": 4595 }, { "epoch": 0.8893188854489165, "grad_norm": 0.15695986151695251, "learning_rate": 9.836174870469674e-05, "loss": 0.0107, "step": 4596 }, { "epoch": 0.8895123839009288, "grad_norm": 0.3553577661514282, "learning_rate": 9.83610150183675e-05, "loss": 0.0103, "step": 4597 }, { "epoch": 0.8897058823529411, "grad_norm": 0.16849836707115173, "learning_rate": 9.836028117083271e-05, "loss": 0.0086, "step": 4598 }, { "epoch": 0.8898993808049536, "grad_norm": 0.27821922302246094, "learning_rate": 9.835954716209508e-05, "loss": 0.0102, "step": 4599 }, { "epoch": 0.8900928792569659, "grad_norm": 0.2354324758052826, "learning_rate": 9.835881299215733e-05, "loss": 0.0095, "step": 4600 }, { "epoch": 0.8902863777089783, "grad_norm": 0.15798087418079376, "learning_rate": 9.835807866102219e-05, "loss": 0.011, "step": 4601 }, { "epoch": 0.8904798761609907, "grad_norm": 0.2591439485549927, "learning_rate": 9.835734416869243e-05, "loss": 0.0106, "step": 4602 }, { "epoch": 0.8906733746130031, "grad_norm": 0.08854121714830399, "learning_rate": 9.835660951517071e-05, "loss": 0.0096, "step": 4603 }, { "epoch": 0.8908668730650154, "grad_norm": 0.19904501736164093, "learning_rate": 9.83558747004598e-05, "loss": 0.0078, "step": 4604 }, { "epoch": 0.8910603715170279, "grad_norm": 0.08810886740684509, "learning_rate": 9.835513972456245e-05, "loss": 0.0102, "step": 4605 }, { "epoch": 0.8912538699690402, "grad_norm": 0.1444828361272812, "learning_rate": 9.835440458748137e-05, "loss": 0.0102, "step": 4606 }, { "epoch": 0.8914473684210527, "grad_norm": 0.12972451746463776, "learning_rate": 9.83536692892193e-05, "loss": 0.01, "step": 4607 }, { "epoch": 0.891640866873065, "grad_norm": 0.10851830989122391, "learning_rate": 9.835293382977896e-05, "loss": 0.0096, "step": 4608 }, { "epoch": 0.8918343653250774, "grad_norm": 0.13532109558582306, "learning_rate": 9.835219820916311e-05, "loss": 0.0085, "step": 4609 }, { "epoch": 0.8920278637770898, "grad_norm": 0.09047849476337433, "learning_rate": 9.835146242737444e-05, "loss": 0.0096, "step": 4610 }, { "epoch": 0.8922213622291022, "grad_norm": 0.0899714007973671, "learning_rate": 9.835072648441573e-05, "loss": 0.0092, "step": 4611 }, { "epoch": 0.8924148606811145, "grad_norm": 0.10151515156030655, "learning_rate": 9.83499903802897e-05, "loss": 0.0117, "step": 4612 }, { "epoch": 0.892608359133127, "grad_norm": 0.061141617596149445, "learning_rate": 9.83492541149991e-05, "loss": 0.0109, "step": 4613 }, { "epoch": 0.8928018575851393, "grad_norm": 0.09248221665620804, "learning_rate": 9.834851768854665e-05, "loss": 0.0126, "step": 4614 }, { "epoch": 0.8929953560371517, "grad_norm": 0.0820598378777504, "learning_rate": 9.834778110093507e-05, "loss": 0.0107, "step": 4615 }, { "epoch": 0.8931888544891641, "grad_norm": 0.09183131903409958, "learning_rate": 9.834704435216715e-05, "loss": 0.0103, "step": 4616 }, { "epoch": 0.8933823529411765, "grad_norm": 0.07404261827468872, "learning_rate": 9.83463074422456e-05, "loss": 0.0082, "step": 4617 }, { "epoch": 0.8935758513931888, "grad_norm": 0.06820043921470642, "learning_rate": 9.834557037117315e-05, "loss": 0.011, "step": 4618 }, { "epoch": 0.8937693498452013, "grad_norm": 0.08928418904542923, "learning_rate": 9.834483313895253e-05, "loss": 0.0085, "step": 4619 }, { "epoch": 0.8939628482972136, "grad_norm": 0.05099804699420929, "learning_rate": 9.834409574558653e-05, "loss": 0.0082, "step": 4620 }, { "epoch": 0.8941563467492261, "grad_norm": 0.1031995564699173, "learning_rate": 9.834335819107786e-05, "loss": 0.0123, "step": 4621 }, { "epoch": 0.8943498452012384, "grad_norm": 0.06024441495537758, "learning_rate": 9.834262047542924e-05, "loss": 0.008, "step": 4622 }, { "epoch": 0.8945433436532507, "grad_norm": 0.10358297824859619, "learning_rate": 9.834188259864345e-05, "loss": 0.0104, "step": 4623 }, { "epoch": 0.8947368421052632, "grad_norm": 0.06208648905158043, "learning_rate": 9.834114456072324e-05, "loss": 0.0081, "step": 4624 }, { "epoch": 0.8949303405572755, "grad_norm": 0.11703736335039139, "learning_rate": 9.83404063616713e-05, "loss": 0.0094, "step": 4625 }, { "epoch": 0.8951238390092879, "grad_norm": 0.07812490314245224, "learning_rate": 9.833966800149043e-05, "loss": 0.01, "step": 4626 }, { "epoch": 0.8953173374613003, "grad_norm": 0.17138998210430145, "learning_rate": 9.833892948018336e-05, "loss": 0.0079, "step": 4627 }, { "epoch": 0.8955108359133127, "grad_norm": 0.08794521540403366, "learning_rate": 9.833819079775282e-05, "loss": 0.0116, "step": 4628 }, { "epoch": 0.895704334365325, "grad_norm": 0.09970802068710327, "learning_rate": 9.833745195420157e-05, "loss": 0.0106, "step": 4629 }, { "epoch": 0.8958978328173375, "grad_norm": 0.14910468459129333, "learning_rate": 9.833671294953234e-05, "loss": 0.0124, "step": 4630 }, { "epoch": 0.8960913312693498, "grad_norm": 0.07150191068649292, "learning_rate": 9.833597378374789e-05, "loss": 0.0104, "step": 4631 }, { "epoch": 0.8962848297213623, "grad_norm": 0.12520377337932587, "learning_rate": 9.833523445685097e-05, "loss": 0.0098, "step": 4632 }, { "epoch": 0.8964783281733746, "grad_norm": 0.10230588912963867, "learning_rate": 9.833449496884434e-05, "loss": 0.0086, "step": 4633 }, { "epoch": 0.896671826625387, "grad_norm": 0.09391612559556961, "learning_rate": 9.833375531973072e-05, "loss": 0.0092, "step": 4634 }, { "epoch": 0.8968653250773994, "grad_norm": 0.09921117126941681, "learning_rate": 9.83330155095129e-05, "loss": 0.0104, "step": 4635 }, { "epoch": 0.8970588235294118, "grad_norm": 0.09609491378068924, "learning_rate": 9.833227553819357e-05, "loss": 0.0105, "step": 4636 }, { "epoch": 0.8972523219814241, "grad_norm": 0.05783829838037491, "learning_rate": 9.833153540577553e-05, "loss": 0.0088, "step": 4637 }, { "epoch": 0.8974458204334366, "grad_norm": 0.17002767324447632, "learning_rate": 9.833079511226153e-05, "loss": 0.0083, "step": 4638 }, { "epoch": 0.8976393188854489, "grad_norm": 0.09484496712684631, "learning_rate": 9.83300546576543e-05, "loss": 0.0088, "step": 4639 }, { "epoch": 0.8978328173374613, "grad_norm": 0.14319202303886414, "learning_rate": 9.832931404195661e-05, "loss": 0.0114, "step": 4640 }, { "epoch": 0.8980263157894737, "grad_norm": 0.11863451451063156, "learning_rate": 9.83285732651712e-05, "loss": 0.0095, "step": 4641 }, { "epoch": 0.8982198142414861, "grad_norm": 0.10731173306703568, "learning_rate": 9.832783232730084e-05, "loss": 0.0085, "step": 4642 }, { "epoch": 0.8984133126934984, "grad_norm": 0.13083268702030182, "learning_rate": 9.832709122834826e-05, "loss": 0.0108, "step": 4643 }, { "epoch": 0.8986068111455109, "grad_norm": 0.11056560277938843, "learning_rate": 9.832634996831624e-05, "loss": 0.0086, "step": 4644 }, { "epoch": 0.8988003095975232, "grad_norm": 0.10523134469985962, "learning_rate": 9.832560854720753e-05, "loss": 0.0088, "step": 4645 }, { "epoch": 0.8989938080495357, "grad_norm": 0.14899104833602905, "learning_rate": 9.832486696502489e-05, "loss": 0.0082, "step": 4646 }, { "epoch": 0.899187306501548, "grad_norm": 0.10489589720964432, "learning_rate": 9.832412522177105e-05, "loss": 0.0101, "step": 4647 }, { "epoch": 0.8993808049535603, "grad_norm": 0.11019831150770187, "learning_rate": 9.832338331744881e-05, "loss": 0.0094, "step": 4648 }, { "epoch": 0.8995743034055728, "grad_norm": 0.0779484361410141, "learning_rate": 9.83226412520609e-05, "loss": 0.0103, "step": 4649 }, { "epoch": 0.8997678018575851, "grad_norm": 0.0611439049243927, "learning_rate": 9.832189902561007e-05, "loss": 0.0088, "step": 4650 }, { "epoch": 0.8999613003095975, "grad_norm": 0.06850774586200714, "learning_rate": 9.832115663809911e-05, "loss": 0.0079, "step": 4651 }, { "epoch": 0.9001547987616099, "grad_norm": 0.0450025238096714, "learning_rate": 9.832041408953075e-05, "loss": 0.0093, "step": 4652 }, { "epoch": 0.9003482972136223, "grad_norm": 0.06953779608011246, "learning_rate": 9.831967137990777e-05, "loss": 0.0097, "step": 4653 }, { "epoch": 0.9005417956656346, "grad_norm": 0.06622317433357239, "learning_rate": 9.831892850923294e-05, "loss": 0.0098, "step": 4654 }, { "epoch": 0.9007352941176471, "grad_norm": 0.09317870438098907, "learning_rate": 9.831818547750898e-05, "loss": 0.0098, "step": 4655 }, { "epoch": 0.9009287925696594, "grad_norm": 0.06083882600069046, "learning_rate": 9.83174422847387e-05, "loss": 0.0098, "step": 4656 }, { "epoch": 0.9011222910216719, "grad_norm": 0.10094080865383148, "learning_rate": 9.831669893092484e-05, "loss": 0.0076, "step": 4657 }, { "epoch": 0.9013157894736842, "grad_norm": 0.11095559597015381, "learning_rate": 9.831595541607018e-05, "loss": 0.0102, "step": 4658 }, { "epoch": 0.9015092879256966, "grad_norm": 0.09022898226976395, "learning_rate": 9.831521174017744e-05, "loss": 0.0096, "step": 4659 }, { "epoch": 0.901702786377709, "grad_norm": 0.09013615548610687, "learning_rate": 9.831446790324944e-05, "loss": 0.008, "step": 4660 }, { "epoch": 0.9018962848297214, "grad_norm": 0.0859057679772377, "learning_rate": 9.831372390528891e-05, "loss": 0.0094, "step": 4661 }, { "epoch": 0.9020897832817337, "grad_norm": 0.03644580766558647, "learning_rate": 9.831297974629865e-05, "loss": 0.0102, "step": 4662 }, { "epoch": 0.9022832817337462, "grad_norm": 0.08061814308166504, "learning_rate": 9.831223542628138e-05, "loss": 0.0115, "step": 4663 }, { "epoch": 0.9024767801857585, "grad_norm": 0.04660077020525932, "learning_rate": 9.83114909452399e-05, "loss": 0.0106, "step": 4664 }, { "epoch": 0.9026702786377709, "grad_norm": 0.09417957812547684, "learning_rate": 9.831074630317696e-05, "loss": 0.0097, "step": 4665 }, { "epoch": 0.9028637770897833, "grad_norm": 0.07973131537437439, "learning_rate": 9.831000150009535e-05, "loss": 0.0094, "step": 4666 }, { "epoch": 0.9030572755417957, "grad_norm": 0.08236018568277359, "learning_rate": 9.830925653599782e-05, "loss": 0.0106, "step": 4667 }, { "epoch": 0.903250773993808, "grad_norm": 0.0472368448972702, "learning_rate": 9.830851141088714e-05, "loss": 0.0095, "step": 4668 }, { "epoch": 0.9034442724458205, "grad_norm": 0.06090831756591797, "learning_rate": 9.830776612476609e-05, "loss": 0.0099, "step": 4669 }, { "epoch": 0.9036377708978328, "grad_norm": 0.06922590732574463, "learning_rate": 9.830702067763745e-05, "loss": 0.0115, "step": 4670 }, { "epoch": 0.9038312693498453, "grad_norm": 0.06793368607759476, "learning_rate": 9.830627506950396e-05, "loss": 0.0097, "step": 4671 }, { "epoch": 0.9040247678018576, "grad_norm": 0.08175308257341385, "learning_rate": 9.830552930036843e-05, "loss": 0.0091, "step": 4672 }, { "epoch": 0.9042182662538699, "grad_norm": 0.0697992593050003, "learning_rate": 9.830478337023359e-05, "loss": 0.0109, "step": 4673 }, { "epoch": 0.9044117647058824, "grad_norm": 0.06974716484546661, "learning_rate": 9.830403727910225e-05, "loss": 0.0121, "step": 4674 }, { "epoch": 0.9046052631578947, "grad_norm": 0.08167766779661179, "learning_rate": 9.830329102697715e-05, "loss": 0.01, "step": 4675 }, { "epoch": 0.9047987616099071, "grad_norm": 0.06383179128170013, "learning_rate": 9.830254461386111e-05, "loss": 0.0105, "step": 4676 }, { "epoch": 0.9049922600619195, "grad_norm": 0.11829447746276855, "learning_rate": 9.830179803975687e-05, "loss": 0.0105, "step": 4677 }, { "epoch": 0.9051857585139319, "grad_norm": 0.07324808090925217, "learning_rate": 9.830105130466723e-05, "loss": 0.0082, "step": 4678 }, { "epoch": 0.9053792569659442, "grad_norm": 0.11698494851589203, "learning_rate": 9.830030440859492e-05, "loss": 0.0104, "step": 4679 }, { "epoch": 0.9055727554179567, "grad_norm": 0.09847532212734222, "learning_rate": 9.829955735154275e-05, "loss": 0.0086, "step": 4680 }, { "epoch": 0.905766253869969, "grad_norm": 0.09633389860391617, "learning_rate": 9.829881013351352e-05, "loss": 0.0101, "step": 4681 }, { "epoch": 0.9059597523219814, "grad_norm": 0.11812714487314224, "learning_rate": 9.829806275450998e-05, "loss": 0.0081, "step": 4682 }, { "epoch": 0.9061532507739938, "grad_norm": 0.09804914891719818, "learning_rate": 9.829731521453489e-05, "loss": 0.0099, "step": 4683 }, { "epoch": 0.9063467492260062, "grad_norm": 0.08165065199136734, "learning_rate": 9.829656751359106e-05, "loss": 0.0096, "step": 4684 }, { "epoch": 0.9065402476780186, "grad_norm": 0.07358454912900925, "learning_rate": 9.829581965168126e-05, "loss": 0.0106, "step": 4685 }, { "epoch": 0.906733746130031, "grad_norm": 0.08128451555967331, "learning_rate": 9.829507162880828e-05, "loss": 0.0099, "step": 4686 }, { "epoch": 0.9069272445820433, "grad_norm": 0.03805610537528992, "learning_rate": 9.829432344497488e-05, "loss": 0.0101, "step": 4687 }, { "epoch": 0.9071207430340558, "grad_norm": 0.09260110557079315, "learning_rate": 9.829357510018387e-05, "loss": 0.0084, "step": 4688 }, { "epoch": 0.9073142414860681, "grad_norm": 0.047876302152872086, "learning_rate": 9.8292826594438e-05, "loss": 0.0092, "step": 4689 }, { "epoch": 0.9075077399380805, "grad_norm": 0.05789598077535629, "learning_rate": 9.829207792774007e-05, "loss": 0.009, "step": 4690 }, { "epoch": 0.9077012383900929, "grad_norm": 0.07778550684452057, "learning_rate": 9.829132910009286e-05, "loss": 0.0097, "step": 4691 }, { "epoch": 0.9078947368421053, "grad_norm": 0.056408047676086426, "learning_rate": 9.829058011149917e-05, "loss": 0.0119, "step": 4692 }, { "epoch": 0.9080882352941176, "grad_norm": 0.048977307975292206, "learning_rate": 9.828983096196178e-05, "loss": 0.0097, "step": 4693 }, { "epoch": 0.9082817337461301, "grad_norm": 0.08043167740106583, "learning_rate": 9.828908165148342e-05, "loss": 0.0092, "step": 4694 }, { "epoch": 0.9084752321981424, "grad_norm": 0.08832213282585144, "learning_rate": 9.828833218006696e-05, "loss": 0.0086, "step": 4695 }, { "epoch": 0.9086687306501547, "grad_norm": 0.08586057275533676, "learning_rate": 9.828758254771512e-05, "loss": 0.0091, "step": 4696 }, { "epoch": 0.9088622291021672, "grad_norm": 0.07648620754480362, "learning_rate": 9.828683275443073e-05, "loss": 0.0121, "step": 4697 }, { "epoch": 0.9090557275541795, "grad_norm": 0.09769267588853836, "learning_rate": 9.828608280021657e-05, "loss": 0.0094, "step": 4698 }, { "epoch": 0.909249226006192, "grad_norm": 0.05796056240797043, "learning_rate": 9.828533268507541e-05, "loss": 0.0104, "step": 4699 }, { "epoch": 0.9094427244582043, "grad_norm": 0.09402206540107727, "learning_rate": 9.828458240901005e-05, "loss": 0.0088, "step": 4700 }, { "epoch": 0.9096362229102167, "grad_norm": 0.06049305945634842, "learning_rate": 9.828383197202328e-05, "loss": 0.0081, "step": 4701 }, { "epoch": 0.9098297213622291, "grad_norm": 0.09734860807657242, "learning_rate": 9.828308137411789e-05, "loss": 0.0119, "step": 4702 }, { "epoch": 0.9100232198142415, "grad_norm": 0.11297531425952911, "learning_rate": 9.828233061529666e-05, "loss": 0.0109, "step": 4703 }, { "epoch": 0.9102167182662538, "grad_norm": 0.08785640448331833, "learning_rate": 9.828157969556239e-05, "loss": 0.0115, "step": 4704 }, { "epoch": 0.9104102167182663, "grad_norm": 0.07064307481050491, "learning_rate": 9.828082861491788e-05, "loss": 0.0087, "step": 4705 }, { "epoch": 0.9106037151702786, "grad_norm": 0.09178128093481064, "learning_rate": 9.82800773733659e-05, "loss": 0.0115, "step": 4706 }, { "epoch": 0.910797213622291, "grad_norm": 0.05492224544286728, "learning_rate": 9.827932597090928e-05, "loss": 0.0089, "step": 4707 }, { "epoch": 0.9109907120743034, "grad_norm": 0.08352789282798767, "learning_rate": 9.827857440755078e-05, "loss": 0.0111, "step": 4708 }, { "epoch": 0.9111842105263158, "grad_norm": 0.11434036493301392, "learning_rate": 9.82778226832932e-05, "loss": 0.0116, "step": 4709 }, { "epoch": 0.9113777089783281, "grad_norm": 0.0730641707777977, "learning_rate": 9.827707079813933e-05, "loss": 0.0086, "step": 4710 }, { "epoch": 0.9115712074303406, "grad_norm": 0.1256035566329956, "learning_rate": 9.827631875209199e-05, "loss": 0.0102, "step": 4711 }, { "epoch": 0.9117647058823529, "grad_norm": 0.06338243931531906, "learning_rate": 9.827556654515395e-05, "loss": 0.0096, "step": 4712 }, { "epoch": 0.9119582043343654, "grad_norm": 0.16893337666988373, "learning_rate": 9.827481417732803e-05, "loss": 0.0083, "step": 4713 }, { "epoch": 0.9121517027863777, "grad_norm": 0.09473129361867905, "learning_rate": 9.827406164861701e-05, "loss": 0.0083, "step": 4714 }, { "epoch": 0.9123452012383901, "grad_norm": 0.12310376018285751, "learning_rate": 9.827330895902369e-05, "loss": 0.011, "step": 4715 }, { "epoch": 0.9125386996904025, "grad_norm": 0.12336061894893646, "learning_rate": 9.827255610855086e-05, "loss": 0.0096, "step": 4716 }, { "epoch": 0.9127321981424149, "grad_norm": 0.1469711810350418, "learning_rate": 9.827180309720135e-05, "loss": 0.0094, "step": 4717 }, { "epoch": 0.9129256965944272, "grad_norm": 0.11069843918085098, "learning_rate": 9.827104992497792e-05, "loss": 0.0098, "step": 4718 }, { "epoch": 0.9131191950464397, "grad_norm": 0.16588124632835388, "learning_rate": 9.82702965918834e-05, "loss": 0.0092, "step": 4719 }, { "epoch": 0.913312693498452, "grad_norm": 0.09031478315591812, "learning_rate": 9.826954309792058e-05, "loss": 0.006, "step": 4720 }, { "epoch": 0.9135061919504643, "grad_norm": 0.22767367959022522, "learning_rate": 9.826878944309225e-05, "loss": 0.0099, "step": 4721 }, { "epoch": 0.9136996904024768, "grad_norm": 0.099422886967659, "learning_rate": 9.826803562740122e-05, "loss": 0.009, "step": 4722 }, { "epoch": 0.9138931888544891, "grad_norm": 0.20650316774845123, "learning_rate": 9.82672816508503e-05, "loss": 0.0101, "step": 4723 }, { "epoch": 0.9140866873065016, "grad_norm": 0.1298845410346985, "learning_rate": 9.82665275134423e-05, "loss": 0.0109, "step": 4724 }, { "epoch": 0.9142801857585139, "grad_norm": 0.15383678674697876, "learning_rate": 9.826577321517999e-05, "loss": 0.0077, "step": 4725 }, { "epoch": 0.9144736842105263, "grad_norm": 0.148481085896492, "learning_rate": 9.826501875606621e-05, "loss": 0.0106, "step": 4726 }, { "epoch": 0.9146671826625387, "grad_norm": 0.12829874455928802, "learning_rate": 9.826426413610376e-05, "loss": 0.0098, "step": 4727 }, { "epoch": 0.9148606811145511, "grad_norm": 0.13575346767902374, "learning_rate": 9.826350935529541e-05, "loss": 0.0097, "step": 4728 }, { "epoch": 0.9150541795665634, "grad_norm": 0.12605513632297516, "learning_rate": 9.826275441364402e-05, "loss": 0.0105, "step": 4729 }, { "epoch": 0.9152476780185759, "grad_norm": 0.07848668098449707, "learning_rate": 9.826199931115236e-05, "loss": 0.0099, "step": 4730 }, { "epoch": 0.9154411764705882, "grad_norm": 0.10411003232002258, "learning_rate": 9.826124404782324e-05, "loss": 0.0098, "step": 4731 }, { "epoch": 0.9156346749226006, "grad_norm": 0.09613385796546936, "learning_rate": 9.826048862365948e-05, "loss": 0.0089, "step": 4732 }, { "epoch": 0.915828173374613, "grad_norm": 0.1291927844285965, "learning_rate": 9.825973303866388e-05, "loss": 0.0095, "step": 4733 }, { "epoch": 0.9160216718266254, "grad_norm": 0.07624927908182144, "learning_rate": 9.825897729283926e-05, "loss": 0.0097, "step": 4734 }, { "epoch": 0.9162151702786377, "grad_norm": 0.14554952085018158, "learning_rate": 9.825822138618841e-05, "loss": 0.0106, "step": 4735 }, { "epoch": 0.9164086687306502, "grad_norm": 0.04680682346224785, "learning_rate": 9.825746531871416e-05, "loss": 0.01, "step": 4736 }, { "epoch": 0.9166021671826625, "grad_norm": 0.14282163977622986, "learning_rate": 9.82567090904193e-05, "loss": 0.0113, "step": 4737 }, { "epoch": 0.916795665634675, "grad_norm": 0.05755772441625595, "learning_rate": 9.825595270130667e-05, "loss": 0.0092, "step": 4738 }, { "epoch": 0.9169891640866873, "grad_norm": 0.09281010180711746, "learning_rate": 9.825519615137906e-05, "loss": 0.0096, "step": 4739 }, { "epoch": 0.9171826625386997, "grad_norm": 0.06275168061256409, "learning_rate": 9.825443944063928e-05, "loss": 0.0095, "step": 4740 }, { "epoch": 0.9173761609907121, "grad_norm": 0.056149113923311234, "learning_rate": 9.825368256909018e-05, "loss": 0.0087, "step": 4741 }, { "epoch": 0.9175696594427245, "grad_norm": 0.0671161487698555, "learning_rate": 9.825292553673453e-05, "loss": 0.0076, "step": 4742 }, { "epoch": 0.9177631578947368, "grad_norm": 0.11116141080856323, "learning_rate": 9.825216834357516e-05, "loss": 0.009, "step": 4743 }, { "epoch": 0.9179566563467493, "grad_norm": 0.1254805028438568, "learning_rate": 9.825141098961488e-05, "loss": 0.011, "step": 4744 }, { "epoch": 0.9181501547987616, "grad_norm": 0.08541841804981232, "learning_rate": 9.825065347485652e-05, "loss": 0.0086, "step": 4745 }, { "epoch": 0.9183436532507739, "grad_norm": 0.12802599370479584, "learning_rate": 9.824989579930289e-05, "loss": 0.0112, "step": 4746 }, { "epoch": 0.9185371517027864, "grad_norm": 0.043697528541088104, "learning_rate": 9.824913796295679e-05, "loss": 0.0094, "step": 4747 }, { "epoch": 0.9187306501547987, "grad_norm": 0.071277916431427, "learning_rate": 9.824837996582107e-05, "loss": 0.0093, "step": 4748 }, { "epoch": 0.9189241486068112, "grad_norm": 0.09586334228515625, "learning_rate": 9.824762180789852e-05, "loss": 0.0089, "step": 4749 }, { "epoch": 0.9191176470588235, "grad_norm": 0.10006105899810791, "learning_rate": 9.824686348919195e-05, "loss": 0.0087, "step": 4750 }, { "epoch": 0.9193111455108359, "grad_norm": 0.08838426321744919, "learning_rate": 9.824610500970423e-05, "loss": 0.0073, "step": 4751 }, { "epoch": 0.9195046439628483, "grad_norm": 0.060712918639183044, "learning_rate": 9.824534636943813e-05, "loss": 0.0104, "step": 4752 }, { "epoch": 0.9196981424148607, "grad_norm": 0.13013076782226562, "learning_rate": 9.824458756839649e-05, "loss": 0.0092, "step": 4753 }, { "epoch": 0.919891640866873, "grad_norm": 0.05419750511646271, "learning_rate": 9.824382860658214e-05, "loss": 0.0097, "step": 4754 }, { "epoch": 0.9200851393188855, "grad_norm": 0.11963409185409546, "learning_rate": 9.824306948399789e-05, "loss": 0.0094, "step": 4755 }, { "epoch": 0.9202786377708978, "grad_norm": 0.06799689680337906, "learning_rate": 9.824231020064655e-05, "loss": 0.0079, "step": 4756 }, { "epoch": 0.9204721362229102, "grad_norm": 0.061276134103536606, "learning_rate": 9.824155075653095e-05, "loss": 0.0097, "step": 4757 }, { "epoch": 0.9206656346749226, "grad_norm": 0.12751370668411255, "learning_rate": 9.824079115165394e-05, "loss": 0.0101, "step": 4758 }, { "epoch": 0.920859133126935, "grad_norm": 0.06319762766361237, "learning_rate": 9.82400313860183e-05, "loss": 0.0122, "step": 4759 }, { "epoch": 0.9210526315789473, "grad_norm": 0.09593581408262253, "learning_rate": 9.823927145962689e-05, "loss": 0.0105, "step": 4760 }, { "epoch": 0.9212461300309598, "grad_norm": 0.10768133401870728, "learning_rate": 9.823851137248253e-05, "loss": 0.0087, "step": 4761 }, { "epoch": 0.9214396284829721, "grad_norm": 0.09221412986516953, "learning_rate": 9.823775112458803e-05, "loss": 0.0101, "step": 4762 }, { "epoch": 0.9216331269349846, "grad_norm": 0.10633157938718796, "learning_rate": 9.823699071594624e-05, "loss": 0.0099, "step": 4763 }, { "epoch": 0.9218266253869969, "grad_norm": 0.12949015200138092, "learning_rate": 9.823623014655996e-05, "loss": 0.0111, "step": 4764 }, { "epoch": 0.9220201238390093, "grad_norm": 0.06491941213607788, "learning_rate": 9.823546941643202e-05, "loss": 0.0086, "step": 4765 }, { "epoch": 0.9222136222910217, "grad_norm": 0.11527277529239655, "learning_rate": 9.82347085255653e-05, "loss": 0.0084, "step": 4766 }, { "epoch": 0.9224071207430341, "grad_norm": 0.07873950153589249, "learning_rate": 9.823394747396253e-05, "loss": 0.01, "step": 4767 }, { "epoch": 0.9226006191950464, "grad_norm": 0.1370149850845337, "learning_rate": 9.823318626162663e-05, "loss": 0.0108, "step": 4768 }, { "epoch": 0.9227941176470589, "grad_norm": 0.08456523716449738, "learning_rate": 9.823242488856039e-05, "loss": 0.0096, "step": 4769 }, { "epoch": 0.9229876160990712, "grad_norm": 0.1209668293595314, "learning_rate": 9.823166335476664e-05, "loss": 0.0107, "step": 4770 }, { "epoch": 0.9231811145510835, "grad_norm": 0.0900043398141861, "learning_rate": 9.823090166024821e-05, "loss": 0.0089, "step": 4771 }, { "epoch": 0.923374613003096, "grad_norm": 0.06747663021087646, "learning_rate": 9.823013980500797e-05, "loss": 0.0103, "step": 4772 }, { "epoch": 0.9235681114551083, "grad_norm": 0.12095265835523605, "learning_rate": 9.822937778904871e-05, "loss": 0.0083, "step": 4773 }, { "epoch": 0.9237616099071208, "grad_norm": 0.051887400448322296, "learning_rate": 9.822861561237328e-05, "loss": 0.0097, "step": 4774 }, { "epoch": 0.9239551083591331, "grad_norm": 0.1646602600812912, "learning_rate": 9.822785327498449e-05, "loss": 0.009, "step": 4775 }, { "epoch": 0.9241486068111455, "grad_norm": 0.0302724689245224, "learning_rate": 9.822709077688522e-05, "loss": 0.0101, "step": 4776 }, { "epoch": 0.9243421052631579, "grad_norm": 0.16280333697795868, "learning_rate": 9.822632811807824e-05, "loss": 0.0118, "step": 4777 }, { "epoch": 0.9245356037151703, "grad_norm": 0.045527003705501556, "learning_rate": 9.822556529856646e-05, "loss": 0.009, "step": 4778 }, { "epoch": 0.9247291021671826, "grad_norm": 0.14181584119796753, "learning_rate": 9.822480231835267e-05, "loss": 0.0098, "step": 4779 }, { "epoch": 0.9249226006191951, "grad_norm": 0.06426925212144852, "learning_rate": 9.82240391774397e-05, "loss": 0.0096, "step": 4780 }, { "epoch": 0.9251160990712074, "grad_norm": 0.08047224581241608, "learning_rate": 9.822327587583041e-05, "loss": 0.0091, "step": 4781 }, { "epoch": 0.9253095975232198, "grad_norm": 0.11546406149864197, "learning_rate": 9.822251241352763e-05, "loss": 0.0106, "step": 4782 }, { "epoch": 0.9255030959752322, "grad_norm": 0.10147003829479218, "learning_rate": 9.822174879053422e-05, "loss": 0.0102, "step": 4783 }, { "epoch": 0.9256965944272446, "grad_norm": 0.10536041110754013, "learning_rate": 9.822098500685297e-05, "loss": 0.0084, "step": 4784 }, { "epoch": 0.9258900928792569, "grad_norm": 0.09300532191991806, "learning_rate": 9.822022106248678e-05, "loss": 0.01, "step": 4785 }, { "epoch": 0.9260835913312694, "grad_norm": 0.0963079184293747, "learning_rate": 9.821945695743843e-05, "loss": 0.0083, "step": 4786 }, { "epoch": 0.9262770897832817, "grad_norm": 0.07526206970214844, "learning_rate": 9.82186926917108e-05, "loss": 0.0092, "step": 4787 }, { "epoch": 0.9264705882352942, "grad_norm": 0.1137349084019661, "learning_rate": 9.82179282653067e-05, "loss": 0.0075, "step": 4788 }, { "epoch": 0.9266640866873065, "grad_norm": 0.05791611969470978, "learning_rate": 9.821716367822901e-05, "loss": 0.0099, "step": 4789 }, { "epoch": 0.9268575851393189, "grad_norm": 0.1094890683889389, "learning_rate": 9.821639893048055e-05, "loss": 0.0077, "step": 4790 }, { "epoch": 0.9270510835913313, "grad_norm": 0.09277863055467606, "learning_rate": 9.821563402206417e-05, "loss": 0.0095, "step": 4791 }, { "epoch": 0.9272445820433437, "grad_norm": 0.10342583805322647, "learning_rate": 9.821486895298271e-05, "loss": 0.0097, "step": 4792 }, { "epoch": 0.927438080495356, "grad_norm": 0.12659788131713867, "learning_rate": 9.821410372323903e-05, "loss": 0.0091, "step": 4793 }, { "epoch": 0.9276315789473685, "grad_norm": 0.11679219454526901, "learning_rate": 9.821333833283594e-05, "loss": 0.0081, "step": 4794 }, { "epoch": 0.9278250773993808, "grad_norm": 0.10251346230506897, "learning_rate": 9.821257278177631e-05, "loss": 0.0115, "step": 4795 }, { "epoch": 0.9280185758513931, "grad_norm": 0.1275840401649475, "learning_rate": 9.821180707006299e-05, "loss": 0.0084, "step": 4796 }, { "epoch": 0.9282120743034056, "grad_norm": 0.10910633206367493, "learning_rate": 9.82110411976988e-05, "loss": 0.0099, "step": 4797 }, { "epoch": 0.9284055727554179, "grad_norm": 0.16156980395317078, "learning_rate": 9.821027516468663e-05, "loss": 0.0099, "step": 4798 }, { "epoch": 0.9285990712074303, "grad_norm": 0.1325094997882843, "learning_rate": 9.82095089710293e-05, "loss": 0.0103, "step": 4799 }, { "epoch": 0.9287925696594427, "grad_norm": 0.1299230009317398, "learning_rate": 9.820874261672966e-05, "loss": 0.0093, "step": 4800 }, { "epoch": 0.9289860681114551, "grad_norm": 0.16095522046089172, "learning_rate": 9.820797610179056e-05, "loss": 0.0079, "step": 4801 }, { "epoch": 0.9291795665634675, "grad_norm": 0.08363429456949234, "learning_rate": 9.820720942621484e-05, "loss": 0.0078, "step": 4802 }, { "epoch": 0.9293730650154799, "grad_norm": 0.1783229559659958, "learning_rate": 9.820644259000538e-05, "loss": 0.0111, "step": 4803 }, { "epoch": 0.9295665634674922, "grad_norm": 0.06504318118095398, "learning_rate": 9.8205675593165e-05, "loss": 0.0094, "step": 4804 }, { "epoch": 0.9297600619195047, "grad_norm": 0.14752209186553955, "learning_rate": 9.820490843569658e-05, "loss": 0.0097, "step": 4805 }, { "epoch": 0.929953560371517, "grad_norm": 0.05148289352655411, "learning_rate": 9.820414111760294e-05, "loss": 0.0112, "step": 4806 }, { "epoch": 0.9301470588235294, "grad_norm": 0.13728322088718414, "learning_rate": 9.820337363888696e-05, "loss": 0.0081, "step": 4807 }, { "epoch": 0.9303405572755418, "grad_norm": 0.06632616370916367, "learning_rate": 9.820260599955148e-05, "loss": 0.0103, "step": 4808 }, { "epoch": 0.9305340557275542, "grad_norm": 0.18069909512996674, "learning_rate": 9.820183819959933e-05, "loss": 0.0094, "step": 4809 }, { "epoch": 0.9307275541795665, "grad_norm": 0.08413723856210709, "learning_rate": 9.820107023903344e-05, "loss": 0.0103, "step": 4810 }, { "epoch": 0.930921052631579, "grad_norm": 0.1607494056224823, "learning_rate": 9.820030211785659e-05, "loss": 0.0122, "step": 4811 }, { "epoch": 0.9311145510835913, "grad_norm": 0.1283036768436432, "learning_rate": 9.819953383607165e-05, "loss": 0.0091, "step": 4812 }, { "epoch": 0.9313080495356038, "grad_norm": 0.07731936872005463, "learning_rate": 9.81987653936815e-05, "loss": 0.0079, "step": 4813 }, { "epoch": 0.9315015479876161, "grad_norm": 0.13071367144584656, "learning_rate": 9.819799679068899e-05, "loss": 0.0103, "step": 4814 }, { "epoch": 0.9316950464396285, "grad_norm": 0.05761552229523659, "learning_rate": 9.819722802709697e-05, "loss": 0.009, "step": 4815 }, { "epoch": 0.9318885448916409, "grad_norm": 0.10846398025751114, "learning_rate": 9.819645910290828e-05, "loss": 0.0094, "step": 4816 }, { "epoch": 0.9320820433436533, "grad_norm": 0.06259030103683472, "learning_rate": 9.819569001812583e-05, "loss": 0.0093, "step": 4817 }, { "epoch": 0.9322755417956656, "grad_norm": 0.08959342539310455, "learning_rate": 9.819492077275243e-05, "loss": 0.009, "step": 4818 }, { "epoch": 0.9324690402476781, "grad_norm": 0.09192552417516708, "learning_rate": 9.819415136679097e-05, "loss": 0.0097, "step": 4819 }, { "epoch": 0.9326625386996904, "grad_norm": 0.10050046443939209, "learning_rate": 9.819338180024428e-05, "loss": 0.008, "step": 4820 }, { "epoch": 0.9328560371517027, "grad_norm": 0.11018569022417068, "learning_rate": 9.819261207311525e-05, "loss": 0.0089, "step": 4821 }, { "epoch": 0.9330495356037152, "grad_norm": 0.10334015637636185, "learning_rate": 9.819184218540674e-05, "loss": 0.009, "step": 4822 }, { "epoch": 0.9332430340557275, "grad_norm": 0.06729311496019363, "learning_rate": 9.819107213712159e-05, "loss": 0.01, "step": 4823 }, { "epoch": 0.93343653250774, "grad_norm": 0.04491555690765381, "learning_rate": 9.819030192826269e-05, "loss": 0.0099, "step": 4824 }, { "epoch": 0.9336300309597523, "grad_norm": 0.09325723350048065, "learning_rate": 9.818953155883287e-05, "loss": 0.0113, "step": 4825 }, { "epoch": 0.9338235294117647, "grad_norm": 0.06476856023073196, "learning_rate": 9.818876102883503e-05, "loss": 0.0099, "step": 4826 }, { "epoch": 0.934017027863777, "grad_norm": 0.09776682406663895, "learning_rate": 9.8187990338272e-05, "loss": 0.0094, "step": 4827 }, { "epoch": 0.9342105263157895, "grad_norm": 0.05250988528132439, "learning_rate": 9.818721948714669e-05, "loss": 0.0098, "step": 4828 }, { "epoch": 0.9344040247678018, "grad_norm": 0.10324851423501968, "learning_rate": 9.818644847546192e-05, "loss": 0.0103, "step": 4829 }, { "epoch": 0.9345975232198143, "grad_norm": 0.06676805764436722, "learning_rate": 9.818567730322058e-05, "loss": 0.0104, "step": 4830 }, { "epoch": 0.9347910216718266, "grad_norm": 0.09211182594299316, "learning_rate": 9.818490597042553e-05, "loss": 0.0105, "step": 4831 }, { "epoch": 0.934984520123839, "grad_norm": 0.08895660191774368, "learning_rate": 9.818413447707965e-05, "loss": 0.009, "step": 4832 }, { "epoch": 0.9351780185758514, "grad_norm": 0.08015721291303635, "learning_rate": 9.818336282318581e-05, "loss": 0.0091, "step": 4833 }, { "epoch": 0.9353715170278638, "grad_norm": 0.07378779351711273, "learning_rate": 9.818259100874685e-05, "loss": 0.0087, "step": 4834 }, { "epoch": 0.9355650154798761, "grad_norm": 0.07554285228252411, "learning_rate": 9.818181903376565e-05, "loss": 0.0087, "step": 4835 }, { "epoch": 0.9357585139318886, "grad_norm": 0.08202807605266571, "learning_rate": 9.818104689824511e-05, "loss": 0.0091, "step": 4836 }, { "epoch": 0.9359520123839009, "grad_norm": 0.06859193742275238, "learning_rate": 9.818027460218806e-05, "loss": 0.0095, "step": 4837 }, { "epoch": 0.9361455108359134, "grad_norm": 0.09699206799268723, "learning_rate": 9.817950214559738e-05, "loss": 0.0078, "step": 4838 }, { "epoch": 0.9363390092879257, "grad_norm": 0.07872527837753296, "learning_rate": 9.817872952847598e-05, "loss": 0.0072, "step": 4839 }, { "epoch": 0.9365325077399381, "grad_norm": 0.0816325694322586, "learning_rate": 9.817795675082668e-05, "loss": 0.0112, "step": 4840 }, { "epoch": 0.9367260061919505, "grad_norm": 0.08035695552825928, "learning_rate": 9.81771838126524e-05, "loss": 0.0092, "step": 4841 }, { "epoch": 0.9369195046439629, "grad_norm": 0.1117941290140152, "learning_rate": 9.817641071395596e-05, "loss": 0.0112, "step": 4842 }, { "epoch": 0.9371130030959752, "grad_norm": 0.11799103766679764, "learning_rate": 9.817563745474028e-05, "loss": 0.0113, "step": 4843 }, { "epoch": 0.9373065015479877, "grad_norm": 0.11496570706367493, "learning_rate": 9.817486403500821e-05, "loss": 0.0101, "step": 4844 }, { "epoch": 0.9375, "grad_norm": 0.11571961641311646, "learning_rate": 9.817409045476264e-05, "loss": 0.0099, "step": 4845 }, { "epoch": 0.9376934984520123, "grad_norm": 0.13842031359672546, "learning_rate": 9.817331671400644e-05, "loss": 0.0105, "step": 4846 }, { "epoch": 0.9378869969040248, "grad_norm": 0.12103399634361267, "learning_rate": 9.81725428127425e-05, "loss": 0.0107, "step": 4847 }, { "epoch": 0.9380804953560371, "grad_norm": 0.1296902745962143, "learning_rate": 9.817176875097366e-05, "loss": 0.0099, "step": 4848 }, { "epoch": 0.9382739938080495, "grad_norm": 0.11307177692651749, "learning_rate": 9.817099452870284e-05, "loss": 0.0085, "step": 4849 }, { "epoch": 0.9384674922600619, "grad_norm": 0.11838307231664658, "learning_rate": 9.817022014593289e-05, "loss": 0.0081, "step": 4850 }, { "epoch": 0.9386609907120743, "grad_norm": 0.10360539704561234, "learning_rate": 9.81694456026667e-05, "loss": 0.0079, "step": 4851 }, { "epoch": 0.9388544891640866, "grad_norm": 0.10915824770927429, "learning_rate": 9.816867089890715e-05, "loss": 0.0083, "step": 4852 }, { "epoch": 0.9390479876160991, "grad_norm": 0.08158963173627853, "learning_rate": 9.81678960346571e-05, "loss": 0.0101, "step": 4853 }, { "epoch": 0.9392414860681114, "grad_norm": 0.08747753500938416, "learning_rate": 9.816712100991948e-05, "loss": 0.0095, "step": 4854 }, { "epoch": 0.9394349845201239, "grad_norm": 0.10509611666202545, "learning_rate": 9.816634582469712e-05, "loss": 0.0102, "step": 4855 }, { "epoch": 0.9396284829721362, "grad_norm": 0.07232688367366791, "learning_rate": 9.816557047899294e-05, "loss": 0.01, "step": 4856 }, { "epoch": 0.9398219814241486, "grad_norm": 0.0987270176410675, "learning_rate": 9.81647949728098e-05, "loss": 0.0092, "step": 4857 }, { "epoch": 0.940015479876161, "grad_norm": 0.03961735963821411, "learning_rate": 9.816401930615058e-05, "loss": 0.0079, "step": 4858 }, { "epoch": 0.9402089783281734, "grad_norm": 0.10202627629041672, "learning_rate": 9.816324347901816e-05, "loss": 0.0108, "step": 4859 }, { "epoch": 0.9404024767801857, "grad_norm": 0.05661825090646744, "learning_rate": 9.816246749141547e-05, "loss": 0.0082, "step": 4860 }, { "epoch": 0.9405959752321982, "grad_norm": 0.05788782238960266, "learning_rate": 9.816169134334533e-05, "loss": 0.0078, "step": 4861 }, { "epoch": 0.9407894736842105, "grad_norm": 0.07763861119747162, "learning_rate": 9.816091503481068e-05, "loss": 0.0096, "step": 4862 }, { "epoch": 0.940982972136223, "grad_norm": 0.04130356386303902, "learning_rate": 9.816013856581438e-05, "loss": 0.01, "step": 4863 }, { "epoch": 0.9411764705882353, "grad_norm": 0.05023493990302086, "learning_rate": 9.815936193635931e-05, "loss": 0.0101, "step": 4864 }, { "epoch": 0.9413699690402477, "grad_norm": 0.06488273292779922, "learning_rate": 9.815858514644836e-05, "loss": 0.0094, "step": 4865 }, { "epoch": 0.94156346749226, "grad_norm": 0.04857358708977699, "learning_rate": 9.815780819608443e-05, "loss": 0.011, "step": 4866 }, { "epoch": 0.9417569659442725, "grad_norm": 0.05836685746908188, "learning_rate": 9.815703108527043e-05, "loss": 0.0098, "step": 4867 }, { "epoch": 0.9419504643962848, "grad_norm": 0.061245471239089966, "learning_rate": 9.815625381400918e-05, "loss": 0.0076, "step": 4868 }, { "epoch": 0.9421439628482973, "grad_norm": 0.1265101581811905, "learning_rate": 9.815547638230365e-05, "loss": 0.0104, "step": 4869 }, { "epoch": 0.9423374613003096, "grad_norm": 0.07585018873214722, "learning_rate": 9.815469879015669e-05, "loss": 0.0108, "step": 4870 }, { "epoch": 0.9425309597523219, "grad_norm": 0.12369722872972488, "learning_rate": 9.815392103757116e-05, "loss": 0.0115, "step": 4871 }, { "epoch": 0.9427244582043344, "grad_norm": 0.10265298187732697, "learning_rate": 9.815314312455e-05, "loss": 0.0078, "step": 4872 }, { "epoch": 0.9429179566563467, "grad_norm": 0.13362625241279602, "learning_rate": 9.81523650510961e-05, "loss": 0.0113, "step": 4873 }, { "epoch": 0.9431114551083591, "grad_norm": 0.09562402963638306, "learning_rate": 9.815158681721234e-05, "loss": 0.0101, "step": 4874 }, { "epoch": 0.9433049535603715, "grad_norm": 0.167756125330925, "learning_rate": 9.815080842290159e-05, "loss": 0.0083, "step": 4875 }, { "epoch": 0.9434984520123839, "grad_norm": 0.10008566826581955, "learning_rate": 9.815002986816679e-05, "loss": 0.0103, "step": 4876 }, { "epoch": 0.9436919504643962, "grad_norm": 0.18664461374282837, "learning_rate": 9.814925115301081e-05, "loss": 0.0106, "step": 4877 }, { "epoch": 0.9438854489164087, "grad_norm": 0.10118458420038223, "learning_rate": 9.814847227743653e-05, "loss": 0.0094, "step": 4878 }, { "epoch": 0.944078947368421, "grad_norm": 0.1829553246498108, "learning_rate": 9.814769324144687e-05, "loss": 0.0095, "step": 4879 }, { "epoch": 0.9442724458204335, "grad_norm": 0.0706578940153122, "learning_rate": 9.814691404504472e-05, "loss": 0.0101, "step": 4880 }, { "epoch": 0.9444659442724458, "grad_norm": 0.16659614443778992, "learning_rate": 9.814613468823297e-05, "loss": 0.0116, "step": 4881 }, { "epoch": 0.9446594427244582, "grad_norm": 0.0437081977725029, "learning_rate": 9.814535517101454e-05, "loss": 0.0099, "step": 4882 }, { "epoch": 0.9448529411764706, "grad_norm": 0.13612517714500427, "learning_rate": 9.81445754933923e-05, "loss": 0.0117, "step": 4883 }, { "epoch": 0.945046439628483, "grad_norm": 0.06569208949804306, "learning_rate": 9.814379565536916e-05, "loss": 0.0078, "step": 4884 }, { "epoch": 0.9452399380804953, "grad_norm": 0.13349750638008118, "learning_rate": 9.814301565694801e-05, "loss": 0.0088, "step": 4885 }, { "epoch": 0.9454334365325078, "grad_norm": 0.13186855614185333, "learning_rate": 9.814223549813176e-05, "loss": 0.0104, "step": 4886 }, { "epoch": 0.9456269349845201, "grad_norm": 0.3105800151824951, "learning_rate": 9.814145517892332e-05, "loss": 0.0125, "step": 4887 }, { "epoch": 0.9458204334365325, "grad_norm": 0.11594254523515701, "learning_rate": 9.814067469932558e-05, "loss": 0.0098, "step": 4888 }, { "epoch": 0.9460139318885449, "grad_norm": 0.24768070876598358, "learning_rate": 9.813989405934142e-05, "loss": 0.0108, "step": 4889 }, { "epoch": 0.9462074303405573, "grad_norm": 0.11078884452581406, "learning_rate": 9.813911325897378e-05, "loss": 0.0099, "step": 4890 }, { "epoch": 0.9464009287925697, "grad_norm": 0.21609283983707428, "learning_rate": 9.813833229822556e-05, "loss": 0.0104, "step": 4891 }, { "epoch": 0.9465944272445821, "grad_norm": 0.17387837171554565, "learning_rate": 9.813755117709963e-05, "loss": 0.0106, "step": 4892 }, { "epoch": 0.9467879256965944, "grad_norm": 0.17439977824687958, "learning_rate": 9.813676989559892e-05, "loss": 0.0088, "step": 4893 }, { "epoch": 0.9469814241486069, "grad_norm": 0.15260429680347443, "learning_rate": 9.813598845372632e-05, "loss": 0.0091, "step": 4894 }, { "epoch": 0.9471749226006192, "grad_norm": 0.07130614668130875, "learning_rate": 9.813520685148476e-05, "loss": 0.0087, "step": 4895 }, { "epoch": 0.9473684210526315, "grad_norm": 0.20072129368782043, "learning_rate": 9.813442508887712e-05, "loss": 0.0068, "step": 4896 }, { "epoch": 0.947561919504644, "grad_norm": 0.11464366316795349, "learning_rate": 9.813364316590632e-05, "loss": 0.0099, "step": 4897 }, { "epoch": 0.9477554179566563, "grad_norm": 0.15136203169822693, "learning_rate": 9.813286108257525e-05, "loss": 0.0112, "step": 4898 }, { "epoch": 0.9479489164086687, "grad_norm": 0.10735432803630829, "learning_rate": 9.813207883888685e-05, "loss": 0.0092, "step": 4899 }, { "epoch": 0.9481424148606811, "grad_norm": 0.08260636776685715, "learning_rate": 9.8131296434844e-05, "loss": 0.0095, "step": 4900 }, { "epoch": 0.9483359133126935, "grad_norm": 0.09939549118280411, "learning_rate": 9.813051387044962e-05, "loss": 0.0097, "step": 4901 }, { "epoch": 0.9485294117647058, "grad_norm": 0.07152857631444931, "learning_rate": 9.812973114570662e-05, "loss": 0.0107, "step": 4902 }, { "epoch": 0.9487229102167183, "grad_norm": 0.08545047044754028, "learning_rate": 9.81289482606179e-05, "loss": 0.0101, "step": 4903 }, { "epoch": 0.9489164086687306, "grad_norm": 0.09592878073453903, "learning_rate": 9.812816521518639e-05, "loss": 0.0106, "step": 4904 }, { "epoch": 0.9491099071207431, "grad_norm": 0.08531786501407623, "learning_rate": 9.812738200941496e-05, "loss": 0.0096, "step": 4905 }, { "epoch": 0.9493034055727554, "grad_norm": 0.08636868745088577, "learning_rate": 9.812659864330659e-05, "loss": 0.0124, "step": 4906 }, { "epoch": 0.9494969040247678, "grad_norm": 0.1408367156982422, "learning_rate": 9.812581511686413e-05, "loss": 0.008, "step": 4907 }, { "epoch": 0.9496904024767802, "grad_norm": 0.0968141108751297, "learning_rate": 9.812503143009052e-05, "loss": 0.0105, "step": 4908 }, { "epoch": 0.9498839009287926, "grad_norm": 0.10047414898872375, "learning_rate": 9.812424758298865e-05, "loss": 0.0107, "step": 4909 }, { "epoch": 0.9500773993808049, "grad_norm": 0.12151385098695755, "learning_rate": 9.812346357556148e-05, "loss": 0.0099, "step": 4910 }, { "epoch": 0.9502708978328174, "grad_norm": 0.08450213074684143, "learning_rate": 9.81226794078119e-05, "loss": 0.0082, "step": 4911 }, { "epoch": 0.9504643962848297, "grad_norm": 0.09369086474180222, "learning_rate": 9.81218950797428e-05, "loss": 0.01, "step": 4912 }, { "epoch": 0.9506578947368421, "grad_norm": 0.15990649163722992, "learning_rate": 9.812111059135714e-05, "loss": 0.0088, "step": 4913 }, { "epoch": 0.9508513931888545, "grad_norm": 0.12691453099250793, "learning_rate": 9.812032594265781e-05, "loss": 0.0093, "step": 4914 }, { "epoch": 0.9510448916408669, "grad_norm": 0.17697782814502716, "learning_rate": 9.811954113364772e-05, "loss": 0.0078, "step": 4915 }, { "epoch": 0.9512383900928792, "grad_norm": 0.06986642628908157, "learning_rate": 9.811875616432983e-05, "loss": 0.0077, "step": 4916 }, { "epoch": 0.9514318885448917, "grad_norm": 0.20182155072689056, "learning_rate": 9.811797103470701e-05, "loss": 0.0105, "step": 4917 }, { "epoch": 0.951625386996904, "grad_norm": 0.08057722449302673, "learning_rate": 9.811718574478221e-05, "loss": 0.0095, "step": 4918 }, { "epoch": 0.9518188854489165, "grad_norm": 0.18898913264274597, "learning_rate": 9.811640029455832e-05, "loss": 0.0106, "step": 4919 }, { "epoch": 0.9520123839009288, "grad_norm": 0.14590482413768768, "learning_rate": 9.81156146840383e-05, "loss": 0.011, "step": 4920 }, { "epoch": 0.9522058823529411, "grad_norm": 0.09104123711585999, "learning_rate": 9.811482891322502e-05, "loss": 0.0094, "step": 4921 }, { "epoch": 0.9523993808049536, "grad_norm": 0.11819273978471756, "learning_rate": 9.811404298212145e-05, "loss": 0.0095, "step": 4922 }, { "epoch": 0.9525928792569659, "grad_norm": 0.07633727043867111, "learning_rate": 9.811325689073048e-05, "loss": 0.0082, "step": 4923 }, { "epoch": 0.9527863777089783, "grad_norm": 0.09449448436498642, "learning_rate": 9.811247063905506e-05, "loss": 0.01, "step": 4924 }, { "epoch": 0.9529798761609907, "grad_norm": 0.08678651601076126, "learning_rate": 9.811168422709807e-05, "loss": 0.0101, "step": 4925 }, { "epoch": 0.9531733746130031, "grad_norm": 0.09818259626626968, "learning_rate": 9.81108976548625e-05, "loss": 0.0088, "step": 4926 }, { "epoch": 0.9533668730650154, "grad_norm": 0.07744906842708588, "learning_rate": 9.81101109223512e-05, "loss": 0.008, "step": 4927 }, { "epoch": 0.9535603715170279, "grad_norm": 0.10775583982467651, "learning_rate": 9.810932402956715e-05, "loss": 0.0107, "step": 4928 }, { "epoch": 0.9537538699690402, "grad_norm": 0.07402059435844421, "learning_rate": 9.810853697651325e-05, "loss": 0.0105, "step": 4929 }, { "epoch": 0.9539473684210527, "grad_norm": 0.09613250195980072, "learning_rate": 9.810774976319244e-05, "loss": 0.0103, "step": 4930 }, { "epoch": 0.954140866873065, "grad_norm": 0.10004459321498871, "learning_rate": 9.810696238960763e-05, "loss": 0.0111, "step": 4931 }, { "epoch": 0.9543343653250774, "grad_norm": 0.0818944051861763, "learning_rate": 9.810617485576176e-05, "loss": 0.0091, "step": 4932 }, { "epoch": 0.9545278637770898, "grad_norm": 0.12024001032114029, "learning_rate": 9.810538716165775e-05, "loss": 0.0102, "step": 4933 }, { "epoch": 0.9547213622291022, "grad_norm": 0.096070297062397, "learning_rate": 9.810459930729851e-05, "loss": 0.011, "step": 4934 }, { "epoch": 0.9549148606811145, "grad_norm": 0.10623686015605927, "learning_rate": 9.810381129268704e-05, "loss": 0.01, "step": 4935 }, { "epoch": 0.955108359133127, "grad_norm": 0.09169650822877884, "learning_rate": 9.810302311782618e-05, "loss": 0.0107, "step": 4936 }, { "epoch": 0.9553018575851393, "grad_norm": 0.10713529586791992, "learning_rate": 9.81022347827189e-05, "loss": 0.0086, "step": 4937 }, { "epoch": 0.9554953560371517, "grad_norm": 0.12314772605895996, "learning_rate": 9.810144628736816e-05, "loss": 0.0112, "step": 4938 }, { "epoch": 0.9556888544891641, "grad_norm": 0.09308896958827972, "learning_rate": 9.810065763177683e-05, "loss": 0.0103, "step": 4939 }, { "epoch": 0.9558823529411765, "grad_norm": 0.12355833500623703, "learning_rate": 9.809986881594791e-05, "loss": 0.0096, "step": 4940 }, { "epoch": 0.9560758513931888, "grad_norm": 0.08732723444700241, "learning_rate": 9.80990798398843e-05, "loss": 0.0104, "step": 4941 }, { "epoch": 0.9562693498452013, "grad_norm": 0.14939233660697937, "learning_rate": 9.80982907035889e-05, "loss": 0.0083, "step": 4942 }, { "epoch": 0.9564628482972136, "grad_norm": 0.07834705710411072, "learning_rate": 9.809750140706468e-05, "loss": 0.0084, "step": 4943 }, { "epoch": 0.9566563467492261, "grad_norm": 0.15111590921878815, "learning_rate": 9.809671195031458e-05, "loss": 0.0104, "step": 4944 }, { "epoch": 0.9568498452012384, "grad_norm": 0.08273802697658539, "learning_rate": 9.809592233334153e-05, "loss": 0.0092, "step": 4945 }, { "epoch": 0.9570433436532507, "grad_norm": 0.18793578445911407, "learning_rate": 9.809513255614845e-05, "loss": 0.0124, "step": 4946 }, { "epoch": 0.9572368421052632, "grad_norm": 0.16113993525505066, "learning_rate": 9.809434261873828e-05, "loss": 0.0108, "step": 4947 }, { "epoch": 0.9574303405572755, "grad_norm": 0.11516327410936356, "learning_rate": 9.809355252111397e-05, "loss": 0.0103, "step": 4948 }, { "epoch": 0.9576238390092879, "grad_norm": 0.15551798045635223, "learning_rate": 9.809276226327845e-05, "loss": 0.0103, "step": 4949 }, { "epoch": 0.9578173374613003, "grad_norm": 0.12069090455770493, "learning_rate": 9.809197184523467e-05, "loss": 0.0116, "step": 4950 }, { "epoch": 0.9580108359133127, "grad_norm": 0.19200100004673004, "learning_rate": 9.809118126698555e-05, "loss": 0.0098, "step": 4951 }, { "epoch": 0.958204334365325, "grad_norm": 0.11035235226154327, "learning_rate": 9.809039052853402e-05, "loss": 0.0108, "step": 4952 }, { "epoch": 0.9583978328173375, "grad_norm": 0.20061197876930237, "learning_rate": 9.808959962988305e-05, "loss": 0.0097, "step": 4953 }, { "epoch": 0.9585913312693498, "grad_norm": 0.1133209764957428, "learning_rate": 9.808880857103555e-05, "loss": 0.0106, "step": 4954 }, { "epoch": 0.9587848297213623, "grad_norm": 0.21128837764263153, "learning_rate": 9.808801735199452e-05, "loss": 0.0087, "step": 4955 }, { "epoch": 0.9589783281733746, "grad_norm": 0.09769472479820251, "learning_rate": 9.808722597276281e-05, "loss": 0.0093, "step": 4956 }, { "epoch": 0.959171826625387, "grad_norm": 0.19090154767036438, "learning_rate": 9.808643443334343e-05, "loss": 0.011, "step": 4957 }, { "epoch": 0.9593653250773994, "grad_norm": 0.09712126106023788, "learning_rate": 9.80856427337393e-05, "loss": 0.0104, "step": 4958 }, { "epoch": 0.9595588235294118, "grad_norm": 0.15566454827785492, "learning_rate": 9.808485087395339e-05, "loss": 0.0096, "step": 4959 }, { "epoch": 0.9597523219814241, "grad_norm": 0.0872991606593132, "learning_rate": 9.808405885398858e-05, "loss": 0.0103, "step": 4960 }, { "epoch": 0.9599458204334366, "grad_norm": 0.08543549478054047, "learning_rate": 9.808326667384788e-05, "loss": 0.0079, "step": 4961 }, { "epoch": 0.9601393188854489, "grad_norm": 0.09918168932199478, "learning_rate": 9.808247433353422e-05, "loss": 0.0093, "step": 4962 }, { "epoch": 0.9603328173374613, "grad_norm": 0.07552508264780045, "learning_rate": 9.808168183305051e-05, "loss": 0.0096, "step": 4963 }, { "epoch": 0.9605263157894737, "grad_norm": 0.07863390445709229, "learning_rate": 9.808088917239974e-05, "loss": 0.0083, "step": 4964 }, { "epoch": 0.9607198142414861, "grad_norm": 0.04729289934039116, "learning_rate": 9.808009635158483e-05, "loss": 0.0072, "step": 4965 }, { "epoch": 0.9609133126934984, "grad_norm": 0.11218573153018951, "learning_rate": 9.807930337060874e-05, "loss": 0.0091, "step": 4966 }, { "epoch": 0.9611068111455109, "grad_norm": 0.08857724070549011, "learning_rate": 9.807851022947441e-05, "loss": 0.0098, "step": 4967 }, { "epoch": 0.9613003095975232, "grad_norm": 0.1088542714715004, "learning_rate": 9.807771692818482e-05, "loss": 0.011, "step": 4968 }, { "epoch": 0.9614938080495357, "grad_norm": 0.11351660639047623, "learning_rate": 9.807692346674287e-05, "loss": 0.0086, "step": 4969 }, { "epoch": 0.961687306501548, "grad_norm": 0.08625934273004532, "learning_rate": 9.807612984515155e-05, "loss": 0.0091, "step": 4970 }, { "epoch": 0.9618808049535603, "grad_norm": 0.10798773169517517, "learning_rate": 9.807533606341376e-05, "loss": 0.0107, "step": 4971 }, { "epoch": 0.9620743034055728, "grad_norm": 0.20221473276615143, "learning_rate": 9.80745421215325e-05, "loss": 0.01, "step": 4972 }, { "epoch": 0.9622678018575851, "grad_norm": 0.11781378835439682, "learning_rate": 9.807374801951072e-05, "loss": 0.012, "step": 4973 }, { "epoch": 0.9624613003095975, "grad_norm": 0.19502677023410797, "learning_rate": 9.807295375735135e-05, "loss": 0.0095, "step": 4974 }, { "epoch": 0.9626547987616099, "grad_norm": 0.06467754393815994, "learning_rate": 9.807215933505736e-05, "loss": 0.0095, "step": 4975 }, { "epoch": 0.9628482972136223, "grad_norm": 0.18265770375728607, "learning_rate": 9.807136475263168e-05, "loss": 0.0095, "step": 4976 }, { "epoch": 0.9630417956656346, "grad_norm": 0.1469779759645462, "learning_rate": 9.807057001007728e-05, "loss": 0.0092, "step": 4977 }, { "epoch": 0.9632352941176471, "grad_norm": 0.21941271424293518, "learning_rate": 9.806977510739712e-05, "loss": 0.0089, "step": 4978 }, { "epoch": 0.9634287925696594, "grad_norm": 0.18282023072242737, "learning_rate": 9.806898004459415e-05, "loss": 0.01, "step": 4979 }, { "epoch": 0.9636222910216719, "grad_norm": 0.18650346994400024, "learning_rate": 9.806818482167132e-05, "loss": 0.0102, "step": 4980 }, { "epoch": 0.9638157894736842, "grad_norm": 0.22788722813129425, "learning_rate": 9.806738943863159e-05, "loss": 0.0088, "step": 4981 }, { "epoch": 0.9640092879256966, "grad_norm": 0.16622397303581238, "learning_rate": 9.806659389547791e-05, "loss": 0.0099, "step": 4982 }, { "epoch": 0.964202786377709, "grad_norm": 0.2776890993118286, "learning_rate": 9.806579819221327e-05, "loss": 0.0106, "step": 4983 }, { "epoch": 0.9643962848297214, "grad_norm": 0.15206481516361237, "learning_rate": 9.806500232884059e-05, "loss": 0.0111, "step": 4984 }, { "epoch": 0.9645897832817337, "grad_norm": 0.22667072713375092, "learning_rate": 9.806420630536285e-05, "loss": 0.0108, "step": 4985 }, { "epoch": 0.9647832817337462, "grad_norm": 0.16288165748119354, "learning_rate": 9.806341012178298e-05, "loss": 0.0097, "step": 4986 }, { "epoch": 0.9649767801857585, "grad_norm": 0.16690099239349365, "learning_rate": 9.806261377810398e-05, "loss": 0.0095, "step": 4987 }, { "epoch": 0.9651702786377709, "grad_norm": 0.20791205763816833, "learning_rate": 9.80618172743288e-05, "loss": 0.0095, "step": 4988 }, { "epoch": 0.9653637770897833, "grad_norm": 0.14968560636043549, "learning_rate": 9.806102061046038e-05, "loss": 0.0102, "step": 4989 }, { "epoch": 0.9655572755417957, "grad_norm": 0.17235776782035828, "learning_rate": 9.80602237865017e-05, "loss": 0.0094, "step": 4990 }, { "epoch": 0.965750773993808, "grad_norm": 0.19673468172550201, "learning_rate": 9.805942680245571e-05, "loss": 0.0106, "step": 4991 }, { "epoch": 0.9659442724458205, "grad_norm": 0.283036470413208, "learning_rate": 9.80586296583254e-05, "loss": 0.0082, "step": 4992 }, { "epoch": 0.9661377708978328, "grad_norm": 0.21592630445957184, "learning_rate": 9.80578323541137e-05, "loss": 0.0104, "step": 4993 }, { "epoch": 0.9663312693498453, "grad_norm": 0.20254075527191162, "learning_rate": 9.80570348898236e-05, "loss": 0.01, "step": 4994 }, { "epoch": 0.9665247678018576, "grad_norm": 0.3188382387161255, "learning_rate": 9.805623726545805e-05, "loss": 0.0114, "step": 4995 }, { "epoch": 0.9667182662538699, "grad_norm": 0.15491841733455658, "learning_rate": 9.805543948102001e-05, "loss": 0.0102, "step": 4996 }, { "epoch": 0.9669117647058824, "grad_norm": 0.24747000634670258, "learning_rate": 9.805464153651246e-05, "loss": 0.0103, "step": 4997 }, { "epoch": 0.9671052631578947, "grad_norm": 0.11129673570394516, "learning_rate": 9.805384343193838e-05, "loss": 0.0111, "step": 4998 }, { "epoch": 0.9672987616099071, "grad_norm": 0.18353702127933502, "learning_rate": 9.805304516730069e-05, "loss": 0.0099, "step": 4999 }, { "epoch": 0.9674922600619195, "grad_norm": 0.17642173171043396, "learning_rate": 9.80522467426024e-05, "loss": 0.0099, "step": 5000 }, { "epoch": 0.9676857585139319, "grad_norm": 0.1781706064939499, "learning_rate": 9.805144815784647e-05, "loss": 0.0114, "step": 5001 }, { "epoch": 0.9678792569659442, "grad_norm": 0.07594799995422363, "learning_rate": 9.805064941303585e-05, "loss": 0.0105, "step": 5002 }, { "epoch": 0.9680727554179567, "grad_norm": 0.09461384266614914, "learning_rate": 9.804985050817354e-05, "loss": 0.01, "step": 5003 }, { "epoch": 0.968266253869969, "grad_norm": 0.07690800726413727, "learning_rate": 9.804905144326249e-05, "loss": 0.0101, "step": 5004 }, { "epoch": 0.9684597523219814, "grad_norm": 0.08079582452774048, "learning_rate": 9.804825221830567e-05, "loss": 0.0109, "step": 5005 }, { "epoch": 0.9686532507739938, "grad_norm": 0.08672510832548141, "learning_rate": 9.804745283330605e-05, "loss": 0.011, "step": 5006 }, { "epoch": 0.9688467492260062, "grad_norm": 0.09869227558374405, "learning_rate": 9.804665328826663e-05, "loss": 0.0084, "step": 5007 }, { "epoch": 0.9690402476780186, "grad_norm": 0.12878696620464325, "learning_rate": 9.804585358319032e-05, "loss": 0.0095, "step": 5008 }, { "epoch": 0.969233746130031, "grad_norm": 0.12525121867656708, "learning_rate": 9.804505371808016e-05, "loss": 0.0103, "step": 5009 }, { "epoch": 0.9694272445820433, "grad_norm": 0.10337357968091965, "learning_rate": 9.804425369293909e-05, "loss": 0.0085, "step": 5010 }, { "epoch": 0.9696207430340558, "grad_norm": 0.11929018050432205, "learning_rate": 9.804345350777008e-05, "loss": 0.0089, "step": 5011 }, { "epoch": 0.9698142414860681, "grad_norm": 0.12978163361549377, "learning_rate": 9.804265316257614e-05, "loss": 0.0104, "step": 5012 }, { "epoch": 0.9700077399380805, "grad_norm": 0.14710617065429688, "learning_rate": 9.804185265736022e-05, "loss": 0.0112, "step": 5013 }, { "epoch": 0.9702012383900929, "grad_norm": 0.13347646594047546, "learning_rate": 9.804105199212529e-05, "loss": 0.0092, "step": 5014 }, { "epoch": 0.9703947368421053, "grad_norm": 0.12589001655578613, "learning_rate": 9.804025116687433e-05, "loss": 0.0084, "step": 5015 }, { "epoch": 0.9705882352941176, "grad_norm": 0.16548316180706024, "learning_rate": 9.803945018161032e-05, "loss": 0.01, "step": 5016 }, { "epoch": 0.9707817337461301, "grad_norm": 0.09948419034481049, "learning_rate": 9.803864903633624e-05, "loss": 0.0094, "step": 5017 }, { "epoch": 0.9709752321981424, "grad_norm": 0.1587778478860855, "learning_rate": 9.803784773105508e-05, "loss": 0.0093, "step": 5018 }, { "epoch": 0.9711687306501547, "grad_norm": 0.1365479677915573, "learning_rate": 9.803704626576979e-05, "loss": 0.0107, "step": 5019 }, { "epoch": 0.9713622291021672, "grad_norm": 0.11364881694316864, "learning_rate": 9.803624464048338e-05, "loss": 0.0108, "step": 5020 }, { "epoch": 0.9715557275541795, "grad_norm": 0.13481780886650085, "learning_rate": 9.803544285519881e-05, "loss": 0.0093, "step": 5021 }, { "epoch": 0.971749226006192, "grad_norm": 0.10965485125780106, "learning_rate": 9.803464090991908e-05, "loss": 0.0117, "step": 5022 }, { "epoch": 0.9719427244582043, "grad_norm": 0.038478218019008636, "learning_rate": 9.803383880464715e-05, "loss": 0.0071, "step": 5023 }, { "epoch": 0.9721362229102167, "grad_norm": 0.0973987877368927, "learning_rate": 9.8033036539386e-05, "loss": 0.0084, "step": 5024 }, { "epoch": 0.9723297213622291, "grad_norm": 0.10909908264875412, "learning_rate": 9.803223411413864e-05, "loss": 0.0085, "step": 5025 }, { "epoch": 0.9725232198142415, "grad_norm": 0.10355252772569656, "learning_rate": 9.803143152890802e-05, "loss": 0.0082, "step": 5026 }, { "epoch": 0.9727167182662538, "grad_norm": 0.16308964788913727, "learning_rate": 9.803062878369716e-05, "loss": 0.0084, "step": 5027 }, { "epoch": 0.9729102167182663, "grad_norm": 0.14693884551525116, "learning_rate": 9.802982587850902e-05, "loss": 0.0083, "step": 5028 }, { "epoch": 0.9731037151702786, "grad_norm": 0.19773297011852264, "learning_rate": 9.802902281334658e-05, "loss": 0.0097, "step": 5029 }, { "epoch": 0.973297213622291, "grad_norm": 0.14657573401927948, "learning_rate": 9.802821958821284e-05, "loss": 0.0093, "step": 5030 }, { "epoch": 0.9734907120743034, "grad_norm": 0.0524715930223465, "learning_rate": 9.802741620311078e-05, "loss": 0.0088, "step": 5031 }, { "epoch": 0.9736842105263158, "grad_norm": 0.17489059269428253, "learning_rate": 9.802661265804341e-05, "loss": 0.0096, "step": 5032 }, { "epoch": 0.9738777089783281, "grad_norm": 0.07226622104644775, "learning_rate": 9.802580895301368e-05, "loss": 0.0092, "step": 5033 }, { "epoch": 0.9740712074303406, "grad_norm": 0.07052431255578995, "learning_rate": 9.802500508802459e-05, "loss": 0.0101, "step": 5034 }, { "epoch": 0.9742647058823529, "grad_norm": 0.10729195922613144, "learning_rate": 9.802420106307913e-05, "loss": 0.0111, "step": 5035 }, { "epoch": 0.9744582043343654, "grad_norm": 0.05900969356298447, "learning_rate": 9.802339687818031e-05, "loss": 0.0089, "step": 5036 }, { "epoch": 0.9746517027863777, "grad_norm": 0.08949969708919525, "learning_rate": 9.802259253333108e-05, "loss": 0.0096, "step": 5037 }, { "epoch": 0.9748452012383901, "grad_norm": 0.09379787743091583, "learning_rate": 9.802178802853445e-05, "loss": 0.0081, "step": 5038 }, { "epoch": 0.9750386996904025, "grad_norm": 0.06879273056983948, "learning_rate": 9.802098336379344e-05, "loss": 0.0078, "step": 5039 }, { "epoch": 0.9752321981424149, "grad_norm": 0.08089645206928253, "learning_rate": 9.8020178539111e-05, "loss": 0.0096, "step": 5040 }, { "epoch": 0.9754256965944272, "grad_norm": 0.09149640798568726, "learning_rate": 9.801937355449014e-05, "loss": 0.0095, "step": 5041 }, { "epoch": 0.9756191950464397, "grad_norm": 0.07891303300857544, "learning_rate": 9.801856840993384e-05, "loss": 0.0106, "step": 5042 }, { "epoch": 0.975812693498452, "grad_norm": 0.15265916287899017, "learning_rate": 9.80177631054451e-05, "loss": 0.0111, "step": 5043 }, { "epoch": 0.9760061919504643, "grad_norm": 0.07592733949422836, "learning_rate": 9.801695764102693e-05, "loss": 0.0097, "step": 5044 }, { "epoch": 0.9761996904024768, "grad_norm": 0.15038974583148956, "learning_rate": 9.80161520166823e-05, "loss": 0.0118, "step": 5045 }, { "epoch": 0.9763931888544891, "grad_norm": 0.10413683205842972, "learning_rate": 9.801534623241423e-05, "loss": 0.0089, "step": 5046 }, { "epoch": 0.9765866873065016, "grad_norm": 0.12219683080911636, "learning_rate": 9.801454028822569e-05, "loss": 0.0091, "step": 5047 }, { "epoch": 0.9767801857585139, "grad_norm": 0.12767091393470764, "learning_rate": 9.80137341841197e-05, "loss": 0.0076, "step": 5048 }, { "epoch": 0.9769736842105263, "grad_norm": 0.11503273993730545, "learning_rate": 9.801292792009923e-05, "loss": 0.0104, "step": 5049 }, { "epoch": 0.9771671826625387, "grad_norm": 0.0919523537158966, "learning_rate": 9.80121214961673e-05, "loss": 0.0095, "step": 5050 }, { "epoch": 0.9773606811145511, "grad_norm": 0.07703767716884613, "learning_rate": 9.80113149123269e-05, "loss": 0.0091, "step": 5051 }, { "epoch": 0.9775541795665634, "grad_norm": 0.05778568610548973, "learning_rate": 9.801050816858103e-05, "loss": 0.0095, "step": 5052 }, { "epoch": 0.9777476780185759, "grad_norm": 0.03906385600566864, "learning_rate": 9.80097012649327e-05, "loss": 0.0091, "step": 5053 }, { "epoch": 0.9779411764705882, "grad_norm": 0.08431841433048248, "learning_rate": 9.800889420138488e-05, "loss": 0.01, "step": 5054 }, { "epoch": 0.9781346749226006, "grad_norm": 0.08231035619974136, "learning_rate": 9.80080869779406e-05, "loss": 0.0081, "step": 5055 }, { "epoch": 0.978328173374613, "grad_norm": 0.0876588448882103, "learning_rate": 9.800727959460285e-05, "loss": 0.0105, "step": 5056 }, { "epoch": 0.9785216718266254, "grad_norm": 0.08617691695690155, "learning_rate": 9.800647205137462e-05, "loss": 0.0087, "step": 5057 }, { "epoch": 0.9787151702786377, "grad_norm": 0.10447286814451218, "learning_rate": 9.800566434825894e-05, "loss": 0.0085, "step": 5058 }, { "epoch": 0.9789086687306502, "grad_norm": 0.05113285034894943, "learning_rate": 9.800485648525878e-05, "loss": 0.008, "step": 5059 }, { "epoch": 0.9791021671826625, "grad_norm": 0.13117961585521698, "learning_rate": 9.800404846237717e-05, "loss": 0.0089, "step": 5060 }, { "epoch": 0.979295665634675, "grad_norm": 0.06274938583374023, "learning_rate": 9.800324027961709e-05, "loss": 0.0104, "step": 5061 }, { "epoch": 0.9794891640866873, "grad_norm": 0.13529177010059357, "learning_rate": 9.800243193698159e-05, "loss": 0.0085, "step": 5062 }, { "epoch": 0.9796826625386997, "grad_norm": 0.08289645612239838, "learning_rate": 9.800162343447363e-05, "loss": 0.0098, "step": 5063 }, { "epoch": 0.9798761609907121, "grad_norm": 0.11270458251237869, "learning_rate": 9.800081477209622e-05, "loss": 0.0088, "step": 5064 }, { "epoch": 0.9800696594427245, "grad_norm": 0.11904360353946686, "learning_rate": 9.800000594985238e-05, "loss": 0.0088, "step": 5065 }, { "epoch": 0.9802631578947368, "grad_norm": 0.08872129023075104, "learning_rate": 9.799919696774513e-05, "loss": 0.009, "step": 5066 }, { "epoch": 0.9804566563467493, "grad_norm": 0.1321694552898407, "learning_rate": 9.799838782577743e-05, "loss": 0.01, "step": 5067 }, { "epoch": 0.9806501547987616, "grad_norm": 0.06960318237543106, "learning_rate": 9.799757852395234e-05, "loss": 0.009, "step": 5068 }, { "epoch": 0.9808436532507739, "grad_norm": 0.16386862099170685, "learning_rate": 9.799676906227284e-05, "loss": 0.0099, "step": 5069 }, { "epoch": 0.9810371517027864, "grad_norm": 0.06734281033277512, "learning_rate": 9.799595944074196e-05, "loss": 0.0109, "step": 5070 }, { "epoch": 0.9812306501547987, "grad_norm": 0.11600136756896973, "learning_rate": 9.799514965936269e-05, "loss": 0.0085, "step": 5071 }, { "epoch": 0.9814241486068112, "grad_norm": 0.178788959980011, "learning_rate": 9.799433971813803e-05, "loss": 0.009, "step": 5072 }, { "epoch": 0.9816176470588235, "grad_norm": 0.15206144750118256, "learning_rate": 9.799352961707103e-05, "loss": 0.0071, "step": 5073 }, { "epoch": 0.9818111455108359, "grad_norm": 0.25831377506256104, "learning_rate": 9.799271935616468e-05, "loss": 0.0113, "step": 5074 }, { "epoch": 0.9820046439628483, "grad_norm": 0.13893210887908936, "learning_rate": 9.7991908935422e-05, "loss": 0.0104, "step": 5075 }, { "epoch": 0.9821981424148607, "grad_norm": 0.28031477332115173, "learning_rate": 9.799109835484597e-05, "loss": 0.0119, "step": 5076 }, { "epoch": 0.982391640866873, "grad_norm": 0.08734267950057983, "learning_rate": 9.799028761443965e-05, "loss": 0.0086, "step": 5077 }, { "epoch": 0.9825851393188855, "grad_norm": 0.34144917130470276, "learning_rate": 9.798947671420603e-05, "loss": 0.0109, "step": 5078 }, { "epoch": 0.9827786377708978, "grad_norm": 0.10424815118312836, "learning_rate": 9.798866565414813e-05, "loss": 0.009, "step": 5079 }, { "epoch": 0.9829721362229102, "grad_norm": 0.25329020619392395, "learning_rate": 9.798785443426895e-05, "loss": 0.0089, "step": 5080 }, { "epoch": 0.9831656346749226, "grad_norm": 0.10058893263339996, "learning_rate": 9.798704305457154e-05, "loss": 0.0089, "step": 5081 }, { "epoch": 0.983359133126935, "grad_norm": 0.09761319309473038, "learning_rate": 9.798623151505888e-05, "loss": 0.0091, "step": 5082 }, { "epoch": 0.9835526315789473, "grad_norm": 0.2324964553117752, "learning_rate": 9.798541981573402e-05, "loss": 0.0104, "step": 5083 }, { "epoch": 0.9837461300309598, "grad_norm": 0.0796174556016922, "learning_rate": 9.798460795659994e-05, "loss": 0.0109, "step": 5084 }, { "epoch": 0.9839396284829721, "grad_norm": 0.3120082914829254, "learning_rate": 9.79837959376597e-05, "loss": 0.0093, "step": 5085 }, { "epoch": 0.9841331269349846, "grad_norm": 0.13866813480854034, "learning_rate": 9.798298375891629e-05, "loss": 0.0115, "step": 5086 }, { "epoch": 0.9843266253869969, "grad_norm": 0.25560519099235535, "learning_rate": 9.798217142037273e-05, "loss": 0.0093, "step": 5087 }, { "epoch": 0.9845201238390093, "grad_norm": 0.2105947881937027, "learning_rate": 9.798135892203204e-05, "loss": 0.0084, "step": 5088 }, { "epoch": 0.9847136222910217, "grad_norm": 0.14255988597869873, "learning_rate": 9.798054626389728e-05, "loss": 0.0096, "step": 5089 }, { "epoch": 0.9849071207430341, "grad_norm": 0.2504797577857971, "learning_rate": 9.797973344597142e-05, "loss": 0.0089, "step": 5090 }, { "epoch": 0.9851006191950464, "grad_norm": 0.07967044413089752, "learning_rate": 9.797892046825751e-05, "loss": 0.0088, "step": 5091 }, { "epoch": 0.9852941176470589, "grad_norm": 0.2668006122112274, "learning_rate": 9.797810733075856e-05, "loss": 0.0108, "step": 5092 }, { "epoch": 0.9854876160990712, "grad_norm": 0.07742326706647873, "learning_rate": 9.797729403347758e-05, "loss": 0.0106, "step": 5093 }, { "epoch": 0.9856811145510835, "grad_norm": 0.19764800369739532, "learning_rate": 9.797648057641763e-05, "loss": 0.0106, "step": 5094 }, { "epoch": 0.985874613003096, "grad_norm": 0.10609953850507736, "learning_rate": 9.79756669595817e-05, "loss": 0.0084, "step": 5095 }, { "epoch": 0.9860681114551083, "grad_norm": 0.1190318763256073, "learning_rate": 9.797485318297283e-05, "loss": 0.0092, "step": 5096 }, { "epoch": 0.9862616099071208, "grad_norm": 0.11183883249759674, "learning_rate": 9.797403924659406e-05, "loss": 0.0085, "step": 5097 }, { "epoch": 0.9864551083591331, "grad_norm": 0.0871381014585495, "learning_rate": 9.79732251504484e-05, "loss": 0.0102, "step": 5098 }, { "epoch": 0.9866486068111455, "grad_norm": 0.1663971245288849, "learning_rate": 9.797241089453888e-05, "loss": 0.0113, "step": 5099 }, { "epoch": 0.9868421052631579, "grad_norm": 0.1490955948829651, "learning_rate": 9.797159647886852e-05, "loss": 0.0093, "step": 5100 }, { "epoch": 0.9870356037151703, "grad_norm": 0.11366325616836548, "learning_rate": 9.797078190344035e-05, "loss": 0.0098, "step": 5101 }, { "epoch": 0.9872291021671826, "grad_norm": 0.17998813092708588, "learning_rate": 9.79699671682574e-05, "loss": 0.0117, "step": 5102 }, { "epoch": 0.9874226006191951, "grad_norm": 0.07240220904350281, "learning_rate": 9.796915227332269e-05, "loss": 0.0112, "step": 5103 }, { "epoch": 0.9876160990712074, "grad_norm": 0.15979985892772675, "learning_rate": 9.796833721863928e-05, "loss": 0.0118, "step": 5104 }, { "epoch": 0.9878095975232198, "grad_norm": 0.08592287451028824, "learning_rate": 9.796752200421018e-05, "loss": 0.0093, "step": 5105 }, { "epoch": 0.9880030959752322, "grad_norm": 0.12297942489385605, "learning_rate": 9.796670663003841e-05, "loss": 0.0097, "step": 5106 }, { "epoch": 0.9881965944272446, "grad_norm": 0.09078391641378403, "learning_rate": 9.796589109612701e-05, "loss": 0.0102, "step": 5107 }, { "epoch": 0.9883900928792569, "grad_norm": 0.14322392642498016, "learning_rate": 9.796507540247902e-05, "loss": 0.0098, "step": 5108 }, { "epoch": 0.9885835913312694, "grad_norm": 0.0737314522266388, "learning_rate": 9.796425954909746e-05, "loss": 0.0085, "step": 5109 }, { "epoch": 0.9887770897832817, "grad_norm": 0.13731345534324646, "learning_rate": 9.796344353598538e-05, "loss": 0.0079, "step": 5110 }, { "epoch": 0.9889705882352942, "grad_norm": 0.09564320743083954, "learning_rate": 9.79626273631458e-05, "loss": 0.009, "step": 5111 }, { "epoch": 0.9891640866873065, "grad_norm": 0.08695374429225922, "learning_rate": 9.796181103058176e-05, "loss": 0.0121, "step": 5112 }, { "epoch": 0.9893575851393189, "grad_norm": 0.1392815113067627, "learning_rate": 9.796099453829629e-05, "loss": 0.0095, "step": 5113 }, { "epoch": 0.9895510835913313, "grad_norm": 0.046913325786590576, "learning_rate": 9.796017788629242e-05, "loss": 0.0106, "step": 5114 }, { "epoch": 0.9897445820433437, "grad_norm": 0.12774567306041718, "learning_rate": 9.795936107457321e-05, "loss": 0.0091, "step": 5115 }, { "epoch": 0.989938080495356, "grad_norm": 0.13793212175369263, "learning_rate": 9.795854410314166e-05, "loss": 0.0099, "step": 5116 }, { "epoch": 0.9901315789473685, "grad_norm": 0.1480582058429718, "learning_rate": 9.795772697200085e-05, "loss": 0.0091, "step": 5117 }, { "epoch": 0.9903250773993808, "grad_norm": 0.07425042241811752, "learning_rate": 9.795690968115378e-05, "loss": 0.011, "step": 5118 }, { "epoch": 0.9905185758513931, "grad_norm": 0.10839363187551498, "learning_rate": 9.795609223060351e-05, "loss": 0.0081, "step": 5119 }, { "epoch": 0.9907120743034056, "grad_norm": 0.08157728612422943, "learning_rate": 9.795527462035308e-05, "loss": 0.0097, "step": 5120 }, { "epoch": 0.9909055727554179, "grad_norm": 0.09955114871263504, "learning_rate": 9.795445685040552e-05, "loss": 0.0061, "step": 5121 }, { "epoch": 0.9910990712074303, "grad_norm": 0.10888846963644028, "learning_rate": 9.795363892076386e-05, "loss": 0.0104, "step": 5122 }, { "epoch": 0.9912925696594427, "grad_norm": 0.05869553983211517, "learning_rate": 9.795282083143116e-05, "loss": 0.0105, "step": 5123 }, { "epoch": 0.9914860681114551, "grad_norm": 0.12758508324623108, "learning_rate": 9.795200258241047e-05, "loss": 0.0085, "step": 5124 }, { "epoch": 0.9916795665634675, "grad_norm": 0.06891217827796936, "learning_rate": 9.795118417370481e-05, "loss": 0.0091, "step": 5125 }, { "epoch": 0.9918730650154799, "grad_norm": 0.1794755607843399, "learning_rate": 9.795036560531723e-05, "loss": 0.0093, "step": 5126 }, { "epoch": 0.9920665634674922, "grad_norm": 0.05733601376414299, "learning_rate": 9.794954687725078e-05, "loss": 0.0086, "step": 5127 }, { "epoch": 0.9922600619195047, "grad_norm": 0.13048063218593597, "learning_rate": 9.794872798950848e-05, "loss": 0.0083, "step": 5128 }, { "epoch": 0.992453560371517, "grad_norm": 0.052032675594091415, "learning_rate": 9.794790894209341e-05, "loss": 0.0086, "step": 5129 }, { "epoch": 0.9926470588235294, "grad_norm": 0.10643430054187775, "learning_rate": 9.79470897350086e-05, "loss": 0.0085, "step": 5130 }, { "epoch": 0.9928405572755418, "grad_norm": 0.08345451205968857, "learning_rate": 9.794627036825708e-05, "loss": 0.007, "step": 5131 }, { "epoch": 0.9930340557275542, "grad_norm": 0.13158930838108063, "learning_rate": 9.794545084184191e-05, "loss": 0.0095, "step": 5132 }, { "epoch": 0.9932275541795665, "grad_norm": 0.08281918615102768, "learning_rate": 9.794463115576614e-05, "loss": 0.0079, "step": 5133 }, { "epoch": 0.993421052631579, "grad_norm": 0.11209587007761002, "learning_rate": 9.794381131003282e-05, "loss": 0.0124, "step": 5134 }, { "epoch": 0.9936145510835913, "grad_norm": 0.10154993832111359, "learning_rate": 9.794299130464498e-05, "loss": 0.0088, "step": 5135 }, { "epoch": 0.9938080495356038, "grad_norm": 0.08654630184173584, "learning_rate": 9.794217113960568e-05, "loss": 0.012, "step": 5136 }, { "epoch": 0.9940015479876161, "grad_norm": 0.08863836526870728, "learning_rate": 9.794135081491797e-05, "loss": 0.01, "step": 5137 }, { "epoch": 0.9941950464396285, "grad_norm": 0.06910600513219833, "learning_rate": 9.79405303305849e-05, "loss": 0.01, "step": 5138 }, { "epoch": 0.9943885448916409, "grad_norm": 0.0628831535577774, "learning_rate": 9.793970968660953e-05, "loss": 0.0093, "step": 5139 }, { "epoch": 0.9945820433436533, "grad_norm": 0.07064858824014664, "learning_rate": 9.793888888299489e-05, "loss": 0.0077, "step": 5140 }, { "epoch": 0.9947755417956656, "grad_norm": 0.06773576140403748, "learning_rate": 9.793806791974403e-05, "loss": 0.0097, "step": 5141 }, { "epoch": 0.9949690402476781, "grad_norm": 0.11706186830997467, "learning_rate": 9.793724679686003e-05, "loss": 0.0094, "step": 5142 }, { "epoch": 0.9951625386996904, "grad_norm": 0.07009859383106232, "learning_rate": 9.793642551434593e-05, "loss": 0.0099, "step": 5143 }, { "epoch": 0.9953560371517027, "grad_norm": 0.10991450399160385, "learning_rate": 9.793560407220476e-05, "loss": 0.0089, "step": 5144 }, { "epoch": 0.9955495356037152, "grad_norm": 0.07630899548530579, "learning_rate": 9.793478247043961e-05, "loss": 0.0094, "step": 5145 }, { "epoch": 0.9957430340557275, "grad_norm": 0.13962088525295258, "learning_rate": 9.793396070905351e-05, "loss": 0.0099, "step": 5146 }, { "epoch": 0.99593653250774, "grad_norm": 0.0585390105843544, "learning_rate": 9.793313878804953e-05, "loss": 0.0094, "step": 5147 }, { "epoch": 0.9961300309597523, "grad_norm": 0.08237040787935257, "learning_rate": 9.793231670743072e-05, "loss": 0.0078, "step": 5148 }, { "epoch": 0.9963235294117647, "grad_norm": 0.08783307671546936, "learning_rate": 9.793149446720013e-05, "loss": 0.0086, "step": 5149 }, { "epoch": 0.996517027863777, "grad_norm": 0.07069633156061172, "learning_rate": 9.793067206736082e-05, "loss": 0.0101, "step": 5150 }, { "epoch": 0.9967105263157895, "grad_norm": 0.07799738645553589, "learning_rate": 9.792984950791587e-05, "loss": 0.0105, "step": 5151 }, { "epoch": 0.9969040247678018, "grad_norm": 0.10092610120773315, "learning_rate": 9.79290267888683e-05, "loss": 0.0105, "step": 5152 }, { "epoch": 0.9970975232198143, "grad_norm": 0.06849317252635956, "learning_rate": 9.792820391022118e-05, "loss": 0.0082, "step": 5153 }, { "epoch": 0.9972910216718266, "grad_norm": 0.10108240693807602, "learning_rate": 9.792738087197756e-05, "loss": 0.0092, "step": 5154 }, { "epoch": 0.997484520123839, "grad_norm": 0.06397487223148346, "learning_rate": 9.792655767414055e-05, "loss": 0.0099, "step": 5155 }, { "epoch": 0.9976780185758514, "grad_norm": 0.11426154524087906, "learning_rate": 9.792573431671317e-05, "loss": 0.0115, "step": 5156 }, { "epoch": 0.9978715170278638, "grad_norm": 0.05745111033320427, "learning_rate": 9.792491079969847e-05, "loss": 0.0133, "step": 5157 }, { "epoch": 0.9980650154798761, "grad_norm": 0.11636469513177872, "learning_rate": 9.792408712309955e-05, "loss": 0.0103, "step": 5158 }, { "epoch": 0.9982585139318886, "grad_norm": 0.06784974783658981, "learning_rate": 9.792326328691942e-05, "loss": 0.0122, "step": 5159 }, { "epoch": 0.9984520123839009, "grad_norm": 0.074765145778656, "learning_rate": 9.79224392911612e-05, "loss": 0.009, "step": 5160 }, { "epoch": 0.9986455108359134, "grad_norm": 0.0752490982413292, "learning_rate": 9.792161513582792e-05, "loss": 0.0083, "step": 5161 }, { "epoch": 0.9988390092879257, "grad_norm": 0.0817689448595047, "learning_rate": 9.792079082092266e-05, "loss": 0.0079, "step": 5162 }, { "epoch": 0.9990325077399381, "grad_norm": 0.05276985466480255, "learning_rate": 9.791996634644847e-05, "loss": 0.0081, "step": 5163 }, { "epoch": 0.9992260061919505, "grad_norm": 0.06848938763141632, "learning_rate": 9.791914171240842e-05, "loss": 0.011, "step": 5164 }, { "epoch": 0.9994195046439629, "grad_norm": 0.05841546133160591, "learning_rate": 9.791831691880558e-05, "loss": 0.0084, "step": 5165 }, { "epoch": 0.9996130030959752, "grad_norm": 0.0665624812245369, "learning_rate": 9.7917491965643e-05, "loss": 0.0099, "step": 5166 }, { "epoch": 1.0001934984520124, "grad_norm": 0.07984452694654465, "learning_rate": 9.791666685292377e-05, "loss": 0.0082, "step": 5167 }, { "epoch": 1.0003869969040247, "grad_norm": 0.07422108203172684, "learning_rate": 9.791584158065095e-05, "loss": 0.0087, "step": 5168 }, { "epoch": 1.000580495356037, "grad_norm": 0.04002990201115608, "learning_rate": 9.791501614882758e-05, "loss": 0.0089, "step": 5169 }, { "epoch": 1.0007739938080495, "grad_norm": 0.07225698232650757, "learning_rate": 9.791419055745679e-05, "loss": 0.0101, "step": 5170 }, { "epoch": 1.000967492260062, "grad_norm": 0.0506398044526577, "learning_rate": 9.791336480654159e-05, "loss": 0.008, "step": 5171 }, { "epoch": 1.0011609907120742, "grad_norm": 0.08922169357538223, "learning_rate": 9.79125388960851e-05, "loss": 0.0091, "step": 5172 }, { "epoch": 1.0013544891640866, "grad_norm": 0.05842781066894531, "learning_rate": 9.791171282609034e-05, "loss": 0.0106, "step": 5173 }, { "epoch": 1.001547987616099, "grad_norm": 0.14258697628974915, "learning_rate": 9.791088659656041e-05, "loss": 0.0119, "step": 5174 }, { "epoch": 1.0017414860681115, "grad_norm": 0.05478855222463608, "learning_rate": 9.791006020749839e-05, "loss": 0.0099, "step": 5175 }, { "epoch": 1.0019349845201238, "grad_norm": 0.1781185418367386, "learning_rate": 9.790923365890731e-05, "loss": 0.01, "step": 5176 }, { "epoch": 1.0021284829721362, "grad_norm": 0.09667780250310898, "learning_rate": 9.79084069507903e-05, "loss": 0.009, "step": 5177 }, { "epoch": 1.0023219814241486, "grad_norm": 0.16544289886951447, "learning_rate": 9.79075800831504e-05, "loss": 0.0112, "step": 5178 }, { "epoch": 1.002515479876161, "grad_norm": 0.08946466445922852, "learning_rate": 9.790675305599069e-05, "loss": 0.01, "step": 5179 }, { "epoch": 1.0027089783281733, "grad_norm": 0.12767066061496735, "learning_rate": 9.790592586931425e-05, "loss": 0.0098, "step": 5180 }, { "epoch": 1.0029024767801857, "grad_norm": 0.1256011575460434, "learning_rate": 9.790509852312414e-05, "loss": 0.0092, "step": 5181 }, { "epoch": 1.0030959752321982, "grad_norm": 0.08771690726280212, "learning_rate": 9.790427101742345e-05, "loss": 0.0097, "step": 5182 }, { "epoch": 1.0032894736842106, "grad_norm": 0.17802616953849792, "learning_rate": 9.790344335221524e-05, "loss": 0.0095, "step": 5183 }, { "epoch": 1.0034829721362228, "grad_norm": 0.07031717151403427, "learning_rate": 9.790261552750262e-05, "loss": 0.0088, "step": 5184 }, { "epoch": 1.0036764705882353, "grad_norm": 0.1890699714422226, "learning_rate": 9.790178754328862e-05, "loss": 0.0097, "step": 5185 }, { "epoch": 1.0038699690402477, "grad_norm": 0.07076045870780945, "learning_rate": 9.790095939957638e-05, "loss": 0.0094, "step": 5186 }, { "epoch": 1.0040634674922602, "grad_norm": 0.2163827121257782, "learning_rate": 9.790013109636892e-05, "loss": 0.0094, "step": 5187 }, { "epoch": 1.0042569659442724, "grad_norm": 0.12638363242149353, "learning_rate": 9.789930263366935e-05, "loss": 0.0108, "step": 5188 }, { "epoch": 1.0044504643962848, "grad_norm": 0.12263084203004837, "learning_rate": 9.789847401148073e-05, "loss": 0.0091, "step": 5189 }, { "epoch": 1.0046439628482973, "grad_norm": 0.1575275957584381, "learning_rate": 9.789764522980618e-05, "loss": 0.0114, "step": 5190 }, { "epoch": 1.0048374613003097, "grad_norm": 0.08572496473789215, "learning_rate": 9.789681628864873e-05, "loss": 0.0092, "step": 5191 }, { "epoch": 1.005030959752322, "grad_norm": 0.12228364497423172, "learning_rate": 9.789598718801151e-05, "loss": 0.0081, "step": 5192 }, { "epoch": 1.0052244582043344, "grad_norm": 0.16401061415672302, "learning_rate": 9.789515792789756e-05, "loss": 0.0084, "step": 5193 }, { "epoch": 1.0054179566563468, "grad_norm": 0.1606961041688919, "learning_rate": 9.789432850831e-05, "loss": 0.0092, "step": 5194 }, { "epoch": 1.005611455108359, "grad_norm": 0.15903696417808533, "learning_rate": 9.789349892925188e-05, "loss": 0.0112, "step": 5195 }, { "epoch": 1.0058049535603715, "grad_norm": 0.08709510415792465, "learning_rate": 9.789266919072631e-05, "loss": 0.0093, "step": 5196 }, { "epoch": 1.005998452012384, "grad_norm": 0.14835838973522186, "learning_rate": 9.789183929273636e-05, "loss": 0.0083, "step": 5197 }, { "epoch": 1.0061919504643964, "grad_norm": 0.061826881021261215, "learning_rate": 9.789100923528512e-05, "loss": 0.0082, "step": 5198 }, { "epoch": 1.0063854489164086, "grad_norm": 0.09935998916625977, "learning_rate": 9.789017901837568e-05, "loss": 0.0101, "step": 5199 }, { "epoch": 1.006578947368421, "grad_norm": 0.10490768402814865, "learning_rate": 9.788934864201112e-05, "loss": 0.0088, "step": 5200 }, { "epoch": 1.0067724458204335, "grad_norm": 0.05050400644540787, "learning_rate": 9.788851810619455e-05, "loss": 0.0092, "step": 5201 }, { "epoch": 1.006965944272446, "grad_norm": 0.10185251384973526, "learning_rate": 9.788768741092901e-05, "loss": 0.0088, "step": 5202 }, { "epoch": 1.0071594427244581, "grad_norm": 0.04281321540474892, "learning_rate": 9.788685655621761e-05, "loss": 0.0089, "step": 5203 }, { "epoch": 1.0073529411764706, "grad_norm": 0.12447793036699295, "learning_rate": 9.788602554206346e-05, "loss": 0.0099, "step": 5204 }, { "epoch": 1.007546439628483, "grad_norm": 0.046807777136564255, "learning_rate": 9.788519436846965e-05, "loss": 0.0115, "step": 5205 }, { "epoch": 1.0077399380804954, "grad_norm": 0.11412471532821655, "learning_rate": 9.788436303543923e-05, "loss": 0.0102, "step": 5206 }, { "epoch": 1.0079334365325077, "grad_norm": 0.06934402137994766, "learning_rate": 9.788353154297532e-05, "loss": 0.0082, "step": 5207 }, { "epoch": 1.00812693498452, "grad_norm": 0.10792568325996399, "learning_rate": 9.788269989108103e-05, "loss": 0.0105, "step": 5208 }, { "epoch": 1.0083204334365325, "grad_norm": 0.0702645406126976, "learning_rate": 9.78818680797594e-05, "loss": 0.0102, "step": 5209 }, { "epoch": 1.008513931888545, "grad_norm": 0.11414564400911331, "learning_rate": 9.788103610901355e-05, "loss": 0.009, "step": 5210 }, { "epoch": 1.0087074303405572, "grad_norm": 0.07943372428417206, "learning_rate": 9.788020397884658e-05, "loss": 0.0104, "step": 5211 }, { "epoch": 1.0089009287925697, "grad_norm": 0.09934774041175842, "learning_rate": 9.787937168926159e-05, "loss": 0.0081, "step": 5212 }, { "epoch": 1.009094427244582, "grad_norm": 0.1207674890756607, "learning_rate": 9.787853924026166e-05, "loss": 0.009, "step": 5213 }, { "epoch": 1.0092879256965945, "grad_norm": 0.046045541763305664, "learning_rate": 9.787770663184987e-05, "loss": 0.0103, "step": 5214 }, { "epoch": 1.0094814241486068, "grad_norm": 0.09501830488443375, "learning_rate": 9.787687386402935e-05, "loss": 0.01, "step": 5215 }, { "epoch": 1.0096749226006192, "grad_norm": 0.04952017962932587, "learning_rate": 9.787604093680316e-05, "loss": 0.0105, "step": 5216 }, { "epoch": 1.0098684210526316, "grad_norm": 0.08527343720197678, "learning_rate": 9.787520785017442e-05, "loss": 0.0092, "step": 5217 }, { "epoch": 1.0100619195046439, "grad_norm": 0.052063822746276855, "learning_rate": 9.787437460414624e-05, "loss": 0.0086, "step": 5218 }, { "epoch": 1.0102554179566563, "grad_norm": 0.10224868357181549, "learning_rate": 9.787354119872168e-05, "loss": 0.0091, "step": 5219 }, { "epoch": 1.0104489164086687, "grad_norm": 0.0619795061647892, "learning_rate": 9.787270763390387e-05, "loss": 0.01, "step": 5220 }, { "epoch": 1.0106424148606812, "grad_norm": 0.09157800674438477, "learning_rate": 9.78718739096959e-05, "loss": 0.0111, "step": 5221 }, { "epoch": 1.0108359133126934, "grad_norm": 0.09057938307523727, "learning_rate": 9.787104002610084e-05, "loss": 0.0107, "step": 5222 }, { "epoch": 1.0110294117647058, "grad_norm": 0.08248008787631989, "learning_rate": 9.787020598312185e-05, "loss": 0.0096, "step": 5223 }, { "epoch": 1.0112229102167183, "grad_norm": 0.09002808481454849, "learning_rate": 9.786937178076196e-05, "loss": 0.01, "step": 5224 }, { "epoch": 1.0114164086687307, "grad_norm": 0.07724280655384064, "learning_rate": 9.786853741902435e-05, "loss": 0.0088, "step": 5225 }, { "epoch": 1.011609907120743, "grad_norm": 0.0841311365365982, "learning_rate": 9.786770289791205e-05, "loss": 0.0099, "step": 5226 }, { "epoch": 1.0118034055727554, "grad_norm": 0.1523771435022354, "learning_rate": 9.786686821742821e-05, "loss": 0.0103, "step": 5227 }, { "epoch": 1.0119969040247678, "grad_norm": 0.0326264463365078, "learning_rate": 9.786603337757592e-05, "loss": 0.0077, "step": 5228 }, { "epoch": 1.0121904024767803, "grad_norm": 0.1270415037870407, "learning_rate": 9.786519837835827e-05, "loss": 0.0126, "step": 5229 }, { "epoch": 1.0123839009287925, "grad_norm": 0.08897728472948074, "learning_rate": 9.786436321977837e-05, "loss": 0.0094, "step": 5230 }, { "epoch": 1.012577399380805, "grad_norm": 0.14026367664337158, "learning_rate": 9.786352790183934e-05, "loss": 0.0107, "step": 5231 }, { "epoch": 1.0127708978328174, "grad_norm": 0.10712724179029465, "learning_rate": 9.786269242454426e-05, "loss": 0.009, "step": 5232 }, { "epoch": 1.0129643962848298, "grad_norm": 0.09641529619693756, "learning_rate": 9.786185678789628e-05, "loss": 0.0084, "step": 5233 }, { "epoch": 1.013157894736842, "grad_norm": 0.10185293853282928, "learning_rate": 9.786102099189845e-05, "loss": 0.0115, "step": 5234 }, { "epoch": 1.0133513931888545, "grad_norm": 0.12981517612934113, "learning_rate": 9.786018503655392e-05, "loss": 0.0088, "step": 5235 }, { "epoch": 1.013544891640867, "grad_norm": 0.1140393540263176, "learning_rate": 9.785934892186579e-05, "loss": 0.0096, "step": 5236 }, { "epoch": 1.0137383900928794, "grad_norm": 0.09805300831794739, "learning_rate": 9.785851264783714e-05, "loss": 0.0098, "step": 5237 }, { "epoch": 1.0139318885448916, "grad_norm": 0.0838712602853775, "learning_rate": 9.785767621447112e-05, "loss": 0.0092, "step": 5238 }, { "epoch": 1.014125386996904, "grad_norm": 0.11463037878274918, "learning_rate": 9.785683962177081e-05, "loss": 0.0095, "step": 5239 }, { "epoch": 1.0143188854489165, "grad_norm": 0.15838667750358582, "learning_rate": 9.785600286973934e-05, "loss": 0.0103, "step": 5240 }, { "epoch": 1.0145123839009287, "grad_norm": 0.12166287750005722, "learning_rate": 9.78551659583798e-05, "loss": 0.011, "step": 5241 }, { "epoch": 1.0147058823529411, "grad_norm": 0.174043670296669, "learning_rate": 9.785432888769532e-05, "loss": 0.0124, "step": 5242 }, { "epoch": 1.0148993808049536, "grad_norm": 0.16349245607852936, "learning_rate": 9.7853491657689e-05, "loss": 0.0084, "step": 5243 }, { "epoch": 1.015092879256966, "grad_norm": 0.1108437180519104, "learning_rate": 9.785265426836396e-05, "loss": 0.0098, "step": 5244 }, { "epoch": 1.0152863777089782, "grad_norm": 0.19603684544563293, "learning_rate": 9.785181671972331e-05, "loss": 0.0092, "step": 5245 }, { "epoch": 1.0154798761609907, "grad_norm": 0.09897325187921524, "learning_rate": 9.785097901177016e-05, "loss": 0.0091, "step": 5246 }, { "epoch": 1.015673374613003, "grad_norm": 0.1910618394613266, "learning_rate": 9.785014114450763e-05, "loss": 0.0105, "step": 5247 }, { "epoch": 1.0158668730650156, "grad_norm": 0.11304334551095963, "learning_rate": 9.784930311793883e-05, "loss": 0.0091, "step": 5248 }, { "epoch": 1.0160603715170278, "grad_norm": 0.1700771600008011, "learning_rate": 9.784846493206689e-05, "loss": 0.0096, "step": 5249 }, { "epoch": 1.0162538699690402, "grad_norm": 0.10114691406488419, "learning_rate": 9.784762658689492e-05, "loss": 0.0084, "step": 5250 }, { "epoch": 1.0164473684210527, "grad_norm": 0.09145019948482513, "learning_rate": 9.784678808242602e-05, "loss": 0.0104, "step": 5251 }, { "epoch": 1.016640866873065, "grad_norm": 0.12797677516937256, "learning_rate": 9.784594941866331e-05, "loss": 0.0087, "step": 5252 }, { "epoch": 1.0168343653250773, "grad_norm": 0.0854254812002182, "learning_rate": 9.784511059560994e-05, "loss": 0.0099, "step": 5253 }, { "epoch": 1.0170278637770898, "grad_norm": 0.11613281071186066, "learning_rate": 9.784427161326899e-05, "loss": 0.0085, "step": 5254 }, { "epoch": 1.0172213622291022, "grad_norm": 0.0851212590932846, "learning_rate": 9.78434324716436e-05, "loss": 0.0077, "step": 5255 }, { "epoch": 1.0174148606811146, "grad_norm": 0.12366577982902527, "learning_rate": 9.784259317073688e-05, "loss": 0.0096, "step": 5256 }, { "epoch": 1.0176083591331269, "grad_norm": 0.08063555508852005, "learning_rate": 9.784175371055198e-05, "loss": 0.0094, "step": 5257 }, { "epoch": 1.0178018575851393, "grad_norm": 0.09981660544872284, "learning_rate": 9.784091409109197e-05, "loss": 0.0081, "step": 5258 }, { "epoch": 1.0179953560371517, "grad_norm": 0.12431932985782623, "learning_rate": 9.784007431236e-05, "loss": 0.012, "step": 5259 }, { "epoch": 1.0181888544891642, "grad_norm": 0.1521928608417511, "learning_rate": 9.78392343743592e-05, "loss": 0.0096, "step": 5260 }, { "epoch": 1.0183823529411764, "grad_norm": 0.08337849378585815, "learning_rate": 9.783839427709269e-05, "loss": 0.009, "step": 5261 }, { "epoch": 1.0185758513931888, "grad_norm": 0.10220882296562195, "learning_rate": 9.783755402056358e-05, "loss": 0.0102, "step": 5262 }, { "epoch": 1.0187693498452013, "grad_norm": 0.10491009801626205, "learning_rate": 9.783671360477498e-05, "loss": 0.0094, "step": 5263 }, { "epoch": 1.0189628482972137, "grad_norm": 0.09655874967575073, "learning_rate": 9.783587302973004e-05, "loss": 0.0112, "step": 5264 }, { "epoch": 1.019156346749226, "grad_norm": 0.11426275223493576, "learning_rate": 9.783503229543189e-05, "loss": 0.0099, "step": 5265 }, { "epoch": 1.0193498452012384, "grad_norm": 0.0925082340836525, "learning_rate": 9.783419140188364e-05, "loss": 0.0085, "step": 5266 }, { "epoch": 1.0195433436532508, "grad_norm": 0.08674734085798264, "learning_rate": 9.783335034908841e-05, "loss": 0.0106, "step": 5267 }, { "epoch": 1.019736842105263, "grad_norm": 0.07227690517902374, "learning_rate": 9.783250913704936e-05, "loss": 0.0087, "step": 5268 }, { "epoch": 1.0199303405572755, "grad_norm": 0.09930825978517532, "learning_rate": 9.783166776576959e-05, "loss": 0.0112, "step": 5269 }, { "epoch": 1.020123839009288, "grad_norm": 0.05807828530669212, "learning_rate": 9.783082623525223e-05, "loss": 0.0106, "step": 5270 }, { "epoch": 1.0203173374613004, "grad_norm": 0.06172984838485718, "learning_rate": 9.782998454550041e-05, "loss": 0.0085, "step": 5271 }, { "epoch": 1.0205108359133126, "grad_norm": 0.07304596155881882, "learning_rate": 9.782914269651726e-05, "loss": 0.0109, "step": 5272 }, { "epoch": 1.020704334365325, "grad_norm": 0.05832758545875549, "learning_rate": 9.78283006883059e-05, "loss": 0.0087, "step": 5273 }, { "epoch": 1.0208978328173375, "grad_norm": 0.06333789974451065, "learning_rate": 9.782745852086949e-05, "loss": 0.0113, "step": 5274 }, { "epoch": 1.02109133126935, "grad_norm": 0.0898335725069046, "learning_rate": 9.782661619421114e-05, "loss": 0.0095, "step": 5275 }, { "epoch": 1.0212848297213621, "grad_norm": 0.043378643691539764, "learning_rate": 9.782577370833398e-05, "loss": 0.0078, "step": 5276 }, { "epoch": 1.0214783281733746, "grad_norm": 0.09620984643697739, "learning_rate": 9.782493106324114e-05, "loss": 0.0106, "step": 5277 }, { "epoch": 1.021671826625387, "grad_norm": 0.07842497527599335, "learning_rate": 9.782408825893575e-05, "loss": 0.0107, "step": 5278 }, { "epoch": 1.0218653250773995, "grad_norm": 0.06838109344244003, "learning_rate": 9.782324529542097e-05, "loss": 0.0109, "step": 5279 }, { "epoch": 1.0220588235294117, "grad_norm": 0.07980277389287949, "learning_rate": 9.782240217269993e-05, "loss": 0.0096, "step": 5280 }, { "epoch": 1.0222523219814241, "grad_norm": 0.12635062634944916, "learning_rate": 9.782155889077572e-05, "loss": 0.0112, "step": 5281 }, { "epoch": 1.0224458204334366, "grad_norm": 0.07863820344209671, "learning_rate": 9.782071544965152e-05, "loss": 0.0106, "step": 5282 }, { "epoch": 1.022639318885449, "grad_norm": 0.22378550469875336, "learning_rate": 9.781987184933045e-05, "loss": 0.0115, "step": 5283 }, { "epoch": 1.0228328173374612, "grad_norm": 0.14786794781684875, "learning_rate": 9.781902808981564e-05, "loss": 0.0135, "step": 5284 }, { "epoch": 1.0230263157894737, "grad_norm": 0.21028687059879303, "learning_rate": 9.781818417111024e-05, "loss": 0.0092, "step": 5285 }, { "epoch": 1.0232198142414861, "grad_norm": 0.11858759820461273, "learning_rate": 9.781734009321738e-05, "loss": 0.0116, "step": 5286 }, { "epoch": 1.0234133126934986, "grad_norm": 0.21296383440494537, "learning_rate": 9.78164958561402e-05, "loss": 0.0094, "step": 5287 }, { "epoch": 1.0236068111455108, "grad_norm": 0.08686857670545578, "learning_rate": 9.781565145988185e-05, "loss": 0.0104, "step": 5288 }, { "epoch": 1.0238003095975232, "grad_norm": 0.19160667061805725, "learning_rate": 9.781480690444545e-05, "loss": 0.01, "step": 5289 }, { "epoch": 1.0239938080495357, "grad_norm": 0.08524217456579208, "learning_rate": 9.781396218983414e-05, "loss": 0.0103, "step": 5290 }, { "epoch": 1.024187306501548, "grad_norm": 0.16426636278629303, "learning_rate": 9.781311731605109e-05, "loss": 0.009, "step": 5291 }, { "epoch": 1.0243808049535603, "grad_norm": 0.1486896127462387, "learning_rate": 9.781227228309938e-05, "loss": 0.0095, "step": 5292 }, { "epoch": 1.0245743034055728, "grad_norm": 0.10466127097606659, "learning_rate": 9.781142709098224e-05, "loss": 0.0115, "step": 5293 }, { "epoch": 1.0247678018575852, "grad_norm": 0.205689936876297, "learning_rate": 9.781058173970273e-05, "loss": 0.0103, "step": 5294 }, { "epoch": 1.0249613003095974, "grad_norm": 0.10589448362588882, "learning_rate": 9.780973622926403e-05, "loss": 0.01, "step": 5295 }, { "epoch": 1.0251547987616099, "grad_norm": 0.1912190020084381, "learning_rate": 9.780889055966929e-05, "loss": 0.0107, "step": 5296 }, { "epoch": 1.0253482972136223, "grad_norm": 0.16823060810565948, "learning_rate": 9.780804473092163e-05, "loss": 0.0098, "step": 5297 }, { "epoch": 1.0255417956656347, "grad_norm": 0.11075923591852188, "learning_rate": 9.780719874302422e-05, "loss": 0.0108, "step": 5298 }, { "epoch": 1.025735294117647, "grad_norm": 0.2187196910381317, "learning_rate": 9.780635259598018e-05, "loss": 0.0117, "step": 5299 }, { "epoch": 1.0259287925696594, "grad_norm": 0.09350739419460297, "learning_rate": 9.780550628979269e-05, "loss": 0.0088, "step": 5300 }, { "epoch": 1.0261222910216719, "grad_norm": 0.19427959620952606, "learning_rate": 9.780465982446486e-05, "loss": 0.0081, "step": 5301 }, { "epoch": 1.0263157894736843, "grad_norm": 0.11957912147045135, "learning_rate": 9.780381319999984e-05, "loss": 0.0087, "step": 5302 }, { "epoch": 1.0265092879256965, "grad_norm": 0.1429847776889801, "learning_rate": 9.78029664164008e-05, "loss": 0.0114, "step": 5303 }, { "epoch": 1.026702786377709, "grad_norm": 0.16968588531017303, "learning_rate": 9.780211947367089e-05, "loss": 0.0109, "step": 5304 }, { "epoch": 1.0268962848297214, "grad_norm": 0.06975923478603363, "learning_rate": 9.780127237181324e-05, "loss": 0.0096, "step": 5305 }, { "epoch": 1.0270897832817338, "grad_norm": 0.27612271904945374, "learning_rate": 9.7800425110831e-05, "loss": 0.0109, "step": 5306 }, { "epoch": 1.027283281733746, "grad_norm": 0.06148182973265648, "learning_rate": 9.779957769072733e-05, "loss": 0.0097, "step": 5307 }, { "epoch": 1.0274767801857585, "grad_norm": 0.2144923359155655, "learning_rate": 9.779873011150537e-05, "loss": 0.0089, "step": 5308 }, { "epoch": 1.027670278637771, "grad_norm": 0.12018769234418869, "learning_rate": 9.779788237316828e-05, "loss": 0.0108, "step": 5309 }, { "epoch": 1.0278637770897834, "grad_norm": 0.18111036717891693, "learning_rate": 9.779703447571922e-05, "loss": 0.0109, "step": 5310 }, { "epoch": 1.0280572755417956, "grad_norm": 0.25229254364967346, "learning_rate": 9.779618641916131e-05, "loss": 0.0097, "step": 5311 }, { "epoch": 1.028250773993808, "grad_norm": 0.21374881267547607, "learning_rate": 9.779533820349774e-05, "loss": 0.0116, "step": 5312 }, { "epoch": 1.0284442724458205, "grad_norm": 0.2586747109889984, "learning_rate": 9.779448982873165e-05, "loss": 0.0118, "step": 5313 }, { "epoch": 1.028637770897833, "grad_norm": 0.1221589669585228, "learning_rate": 9.779364129486619e-05, "loss": 0.0081, "step": 5314 }, { "epoch": 1.0288312693498451, "grad_norm": 0.26624488830566406, "learning_rate": 9.779279260190451e-05, "loss": 0.0093, "step": 5315 }, { "epoch": 1.0290247678018576, "grad_norm": 0.043692685663700104, "learning_rate": 9.779194374984977e-05, "loss": 0.0115, "step": 5316 }, { "epoch": 1.02921826625387, "grad_norm": 0.22452564537525177, "learning_rate": 9.779109473870512e-05, "loss": 0.0101, "step": 5317 }, { "epoch": 1.0294117647058822, "grad_norm": 0.18200890719890594, "learning_rate": 9.779024556847374e-05, "loss": 0.0095, "step": 5318 }, { "epoch": 1.0296052631578947, "grad_norm": 0.2532963752746582, "learning_rate": 9.778939623915878e-05, "loss": 0.0089, "step": 5319 }, { "epoch": 1.0297987616099071, "grad_norm": 0.2035502791404724, "learning_rate": 9.778854675076336e-05, "loss": 0.0081, "step": 5320 }, { "epoch": 1.0299922600619196, "grad_norm": 0.11400902271270752, "learning_rate": 9.778769710329069e-05, "loss": 0.0096, "step": 5321 }, { "epoch": 1.0301857585139318, "grad_norm": 0.2420545369386673, "learning_rate": 9.77868472967439e-05, "loss": 0.0094, "step": 5322 }, { "epoch": 1.0303792569659442, "grad_norm": 0.10526419430971146, "learning_rate": 9.778599733112615e-05, "loss": 0.0112, "step": 5323 }, { "epoch": 1.0305727554179567, "grad_norm": 0.1976335346698761, "learning_rate": 9.77851472064406e-05, "loss": 0.0112, "step": 5324 }, { "epoch": 1.0307662538699691, "grad_norm": 0.18459086120128632, "learning_rate": 9.778429692269041e-05, "loss": 0.0083, "step": 5325 }, { "epoch": 1.0309597523219813, "grad_norm": 0.12008947879076004, "learning_rate": 9.778344647987877e-05, "loss": 0.0101, "step": 5326 }, { "epoch": 1.0311532507739938, "grad_norm": 0.23841576278209686, "learning_rate": 9.77825958780088e-05, "loss": 0.0102, "step": 5327 }, { "epoch": 1.0313467492260062, "grad_norm": 0.08011642843484879, "learning_rate": 9.778174511708368e-05, "loss": 0.0099, "step": 5328 }, { "epoch": 1.0315402476780187, "grad_norm": 0.20379959046840668, "learning_rate": 9.778089419710658e-05, "loss": 0.0103, "step": 5329 }, { "epoch": 1.0317337461300309, "grad_norm": 0.15227998793125153, "learning_rate": 9.778004311808066e-05, "loss": 0.0125, "step": 5330 }, { "epoch": 1.0319272445820433, "grad_norm": 0.17368820309638977, "learning_rate": 9.777919188000907e-05, "loss": 0.0106, "step": 5331 }, { "epoch": 1.0321207430340558, "grad_norm": 0.1621529757976532, "learning_rate": 9.7778340482895e-05, "loss": 0.0097, "step": 5332 }, { "epoch": 1.0323142414860682, "grad_norm": 0.10184768587350845, "learning_rate": 9.777748892674158e-05, "loss": 0.0096, "step": 5333 }, { "epoch": 1.0325077399380804, "grad_norm": 0.14935797452926636, "learning_rate": 9.777663721155202e-05, "loss": 0.0101, "step": 5334 }, { "epoch": 1.0327012383900929, "grad_norm": 0.04883597046136856, "learning_rate": 9.777578533732943e-05, "loss": 0.0095, "step": 5335 }, { "epoch": 1.0328947368421053, "grad_norm": 0.1418815702199936, "learning_rate": 9.777493330407705e-05, "loss": 0.0073, "step": 5336 }, { "epoch": 1.0330882352941178, "grad_norm": 0.09669119119644165, "learning_rate": 9.777408111179799e-05, "loss": 0.0093, "step": 5337 }, { "epoch": 1.03328173374613, "grad_norm": 0.11306249350309372, "learning_rate": 9.777322876049544e-05, "loss": 0.0081, "step": 5338 }, { "epoch": 1.0334752321981424, "grad_norm": 0.12244099378585815, "learning_rate": 9.777237625017256e-05, "loss": 0.01, "step": 5339 }, { "epoch": 1.0336687306501549, "grad_norm": 0.05728121101856232, "learning_rate": 9.777152358083252e-05, "loss": 0.0091, "step": 5340 }, { "epoch": 1.033862229102167, "grad_norm": 0.07576187700033188, "learning_rate": 9.77706707524785e-05, "loss": 0.0092, "step": 5341 }, { "epoch": 1.0340557275541795, "grad_norm": 0.11120989918708801, "learning_rate": 9.776981776511367e-05, "loss": 0.0094, "step": 5342 }, { "epoch": 1.034249226006192, "grad_norm": 0.07140454649925232, "learning_rate": 9.77689646187412e-05, "loss": 0.0095, "step": 5343 }, { "epoch": 1.0344427244582044, "grad_norm": 0.09481128305196762, "learning_rate": 9.776811131336424e-05, "loss": 0.0102, "step": 5344 }, { "epoch": 1.0346362229102166, "grad_norm": 0.06530419737100601, "learning_rate": 9.7767257848986e-05, "loss": 0.0094, "step": 5345 }, { "epoch": 1.034829721362229, "grad_norm": 0.06306072324514389, "learning_rate": 9.776640422560962e-05, "loss": 0.0106, "step": 5346 }, { "epoch": 1.0350232198142415, "grad_norm": 0.07665640860795975, "learning_rate": 9.77655504432383e-05, "loss": 0.0087, "step": 5347 }, { "epoch": 1.035216718266254, "grad_norm": 0.03828609734773636, "learning_rate": 9.776469650187518e-05, "loss": 0.0097, "step": 5348 }, { "epoch": 1.0354102167182662, "grad_norm": 0.060980379581451416, "learning_rate": 9.776384240152348e-05, "loss": 0.0104, "step": 5349 }, { "epoch": 1.0356037151702786, "grad_norm": 0.07883410155773163, "learning_rate": 9.776298814218634e-05, "loss": 0.0083, "step": 5350 }, { "epoch": 1.035797213622291, "grad_norm": 0.06449849158525467, "learning_rate": 9.776213372386696e-05, "loss": 0.0079, "step": 5351 }, { "epoch": 1.0359907120743035, "grad_norm": 0.06168217957019806, "learning_rate": 9.776127914656848e-05, "loss": 0.0073, "step": 5352 }, { "epoch": 1.0361842105263157, "grad_norm": 0.08012744039297104, "learning_rate": 9.77604244102941e-05, "loss": 0.01, "step": 5353 }, { "epoch": 1.0363777089783281, "grad_norm": 0.045678939670324326, "learning_rate": 9.775956951504702e-05, "loss": 0.0101, "step": 5354 }, { "epoch": 1.0365712074303406, "grad_norm": 0.07412148267030716, "learning_rate": 9.775871446083038e-05, "loss": 0.0092, "step": 5355 }, { "epoch": 1.036764705882353, "grad_norm": 0.056104812771081924, "learning_rate": 9.775785924764739e-05, "loss": 0.011, "step": 5356 }, { "epoch": 1.0369582043343653, "grad_norm": 0.06028527393937111, "learning_rate": 9.775700387550119e-05, "loss": 0.0104, "step": 5357 }, { "epoch": 1.0371517027863777, "grad_norm": 0.07919517159461975, "learning_rate": 9.775614834439501e-05, "loss": 0.0089, "step": 5358 }, { "epoch": 1.0373452012383901, "grad_norm": 0.04166411980986595, "learning_rate": 9.7755292654332e-05, "loss": 0.009, "step": 5359 }, { "epoch": 1.0375386996904026, "grad_norm": 0.0773196816444397, "learning_rate": 9.775443680531535e-05, "loss": 0.0099, "step": 5360 }, { "epoch": 1.0377321981424148, "grad_norm": 0.05049584433436394, "learning_rate": 9.775358079734823e-05, "loss": 0.0109, "step": 5361 }, { "epoch": 1.0379256965944272, "grad_norm": 0.040189433842897415, "learning_rate": 9.775272463043385e-05, "loss": 0.0083, "step": 5362 }, { "epoch": 1.0381191950464397, "grad_norm": 0.05051165074110031, "learning_rate": 9.775186830457534e-05, "loss": 0.0112, "step": 5363 }, { "epoch": 1.038312693498452, "grad_norm": 0.04673013463616371, "learning_rate": 9.775101181977595e-05, "loss": 0.0093, "step": 5364 }, { "epoch": 1.0385061919504643, "grad_norm": 0.037894681096076965, "learning_rate": 9.775015517603881e-05, "loss": 0.0091, "step": 5365 }, { "epoch": 1.0386996904024768, "grad_norm": 0.05959540233016014, "learning_rate": 9.774929837336714e-05, "loss": 0.0093, "step": 5366 }, { "epoch": 1.0388931888544892, "grad_norm": 0.07276183366775513, "learning_rate": 9.774844141176413e-05, "loss": 0.0091, "step": 5367 }, { "epoch": 1.0390866873065014, "grad_norm": 0.07986408472061157, "learning_rate": 9.774758429123291e-05, "loss": 0.0091, "step": 5368 }, { "epoch": 1.0392801857585139, "grad_norm": 0.0826316550374031, "learning_rate": 9.774672701177673e-05, "loss": 0.0085, "step": 5369 }, { "epoch": 1.0394736842105263, "grad_norm": 0.09360265731811523, "learning_rate": 9.774586957339875e-05, "loss": 0.011, "step": 5370 }, { "epoch": 1.0396671826625388, "grad_norm": 0.09361178427934647, "learning_rate": 9.774501197610215e-05, "loss": 0.0089, "step": 5371 }, { "epoch": 1.039860681114551, "grad_norm": 0.06875506043434143, "learning_rate": 9.774415421989013e-05, "loss": 0.01, "step": 5372 }, { "epoch": 1.0400541795665634, "grad_norm": 0.06211117282509804, "learning_rate": 9.774329630476587e-05, "loss": 0.0095, "step": 5373 }, { "epoch": 1.0402476780185759, "grad_norm": 0.06961696594953537, "learning_rate": 9.774243823073259e-05, "loss": 0.0094, "step": 5374 }, { "epoch": 1.0404411764705883, "grad_norm": 0.05319168046116829, "learning_rate": 9.774157999779343e-05, "loss": 0.0084, "step": 5375 }, { "epoch": 1.0406346749226005, "grad_norm": 0.07279089093208313, "learning_rate": 9.774072160595162e-05, "loss": 0.009, "step": 5376 }, { "epoch": 1.040828173374613, "grad_norm": 0.06020824611186981, "learning_rate": 9.773986305521034e-05, "loss": 0.0094, "step": 5377 }, { "epoch": 1.0410216718266254, "grad_norm": 0.075978584587574, "learning_rate": 9.773900434557278e-05, "loss": 0.0098, "step": 5378 }, { "epoch": 1.0412151702786379, "grad_norm": 0.038909077644348145, "learning_rate": 9.773814547704213e-05, "loss": 0.0083, "step": 5379 }, { "epoch": 1.04140866873065, "grad_norm": 0.08468145877122879, "learning_rate": 9.77372864496216e-05, "loss": 0.0096, "step": 5380 }, { "epoch": 1.0416021671826625, "grad_norm": 0.06550627946853638, "learning_rate": 9.773642726331434e-05, "loss": 0.0089, "step": 5381 }, { "epoch": 1.041795665634675, "grad_norm": 0.09846336394548416, "learning_rate": 9.77355679181236e-05, "loss": 0.0087, "step": 5382 }, { "epoch": 1.0419891640866874, "grad_norm": 0.05887201800942421, "learning_rate": 9.773470841405254e-05, "loss": 0.0078, "step": 5383 }, { "epoch": 1.0421826625386996, "grad_norm": 0.07678394764661789, "learning_rate": 9.773384875110437e-05, "loss": 0.0101, "step": 5384 }, { "epoch": 1.042376160990712, "grad_norm": 0.07255080342292786, "learning_rate": 9.773298892928227e-05, "loss": 0.0099, "step": 5385 }, { "epoch": 1.0425696594427245, "grad_norm": 0.07366275787353516, "learning_rate": 9.773212894858947e-05, "loss": 0.0072, "step": 5386 }, { "epoch": 1.042763157894737, "grad_norm": 0.06669437885284424, "learning_rate": 9.77312688090291e-05, "loss": 0.0104, "step": 5387 }, { "epoch": 1.0429566563467492, "grad_norm": 0.06793016940355301, "learning_rate": 9.773040851060444e-05, "loss": 0.0085, "step": 5388 }, { "epoch": 1.0431501547987616, "grad_norm": 0.04637996852397919, "learning_rate": 9.772954805331864e-05, "loss": 0.0083, "step": 5389 }, { "epoch": 1.043343653250774, "grad_norm": 0.065340556204319, "learning_rate": 9.772868743717492e-05, "loss": 0.0086, "step": 5390 }, { "epoch": 1.0435371517027865, "grad_norm": 0.07747172564268112, "learning_rate": 9.772782666217645e-05, "loss": 0.011, "step": 5391 }, { "epoch": 1.0437306501547987, "grad_norm": 0.05588095262646675, "learning_rate": 9.772696572832644e-05, "loss": 0.0104, "step": 5392 }, { "epoch": 1.0439241486068112, "grad_norm": 0.09743589162826538, "learning_rate": 9.772610463562812e-05, "loss": 0.0083, "step": 5393 }, { "epoch": 1.0441176470588236, "grad_norm": 0.04338245093822479, "learning_rate": 9.772524338408467e-05, "loss": 0.009, "step": 5394 }, { "epoch": 1.0443111455108358, "grad_norm": 0.07756749540567398, "learning_rate": 9.772438197369928e-05, "loss": 0.0112, "step": 5395 }, { "epoch": 1.0445046439628483, "grad_norm": 0.06382577121257782, "learning_rate": 9.772352040447518e-05, "loss": 0.0085, "step": 5396 }, { "epoch": 1.0446981424148607, "grad_norm": 0.06168285384774208, "learning_rate": 9.772265867641555e-05, "loss": 0.0087, "step": 5397 }, { "epoch": 1.0448916408668731, "grad_norm": 0.06739719212055206, "learning_rate": 9.77217967895236e-05, "loss": 0.0096, "step": 5398 }, { "epoch": 1.0450851393188854, "grad_norm": 0.09240637719631195, "learning_rate": 9.772093474380255e-05, "loss": 0.009, "step": 5399 }, { "epoch": 1.0452786377708978, "grad_norm": 0.15234990417957306, "learning_rate": 9.772007253925557e-05, "loss": 0.0102, "step": 5400 }, { "epoch": 1.0454721362229102, "grad_norm": 0.07227467000484467, "learning_rate": 9.77192101758859e-05, "loss": 0.0096, "step": 5401 }, { "epoch": 1.0456656346749227, "grad_norm": 0.17637091875076294, "learning_rate": 9.771834765369673e-05, "loss": 0.0113, "step": 5402 }, { "epoch": 1.045859133126935, "grad_norm": 0.07697629928588867, "learning_rate": 9.771748497269129e-05, "loss": 0.0094, "step": 5403 }, { "epoch": 1.0460526315789473, "grad_norm": 0.16706229746341705, "learning_rate": 9.771662213287274e-05, "loss": 0.0104, "step": 5404 }, { "epoch": 1.0462461300309598, "grad_norm": 0.08485206216573715, "learning_rate": 9.771575913424433e-05, "loss": 0.01, "step": 5405 }, { "epoch": 1.0464396284829722, "grad_norm": 0.06972486525774002, "learning_rate": 9.771489597680923e-05, "loss": 0.0081, "step": 5406 }, { "epoch": 1.0466331269349844, "grad_norm": 0.13208310306072235, "learning_rate": 9.771403266057069e-05, "loss": 0.0103, "step": 5407 }, { "epoch": 1.046826625386997, "grad_norm": 0.06127818673849106, "learning_rate": 9.77131691855319e-05, "loss": 0.0117, "step": 5408 }, { "epoch": 1.0470201238390093, "grad_norm": 0.10663147270679474, "learning_rate": 9.771230555169608e-05, "loss": 0.0081, "step": 5409 }, { "epoch": 1.0472136222910218, "grad_norm": 0.06751378625631332, "learning_rate": 9.771144175906641e-05, "loss": 0.0067, "step": 5410 }, { "epoch": 1.047407120743034, "grad_norm": 0.06375791877508163, "learning_rate": 9.771057780764614e-05, "loss": 0.0097, "step": 5411 }, { "epoch": 1.0476006191950464, "grad_norm": 0.10810338705778122, "learning_rate": 9.770971369743846e-05, "loss": 0.0088, "step": 5412 }, { "epoch": 1.0477941176470589, "grad_norm": 0.07516346871852875, "learning_rate": 9.77088494284466e-05, "loss": 0.011, "step": 5413 }, { "epoch": 1.0479876160990713, "grad_norm": 0.1271802932024002, "learning_rate": 9.770798500067374e-05, "loss": 0.0086, "step": 5414 }, { "epoch": 1.0481811145510835, "grad_norm": 0.1468370258808136, "learning_rate": 9.770712041412314e-05, "loss": 0.0113, "step": 5415 }, { "epoch": 1.048374613003096, "grad_norm": 0.09130793809890747, "learning_rate": 9.770625566879797e-05, "loss": 0.0102, "step": 5416 }, { "epoch": 1.0485681114551084, "grad_norm": 0.1565486341714859, "learning_rate": 9.770539076470148e-05, "loss": 0.0098, "step": 5417 }, { "epoch": 1.0487616099071206, "grad_norm": 0.06450976431369781, "learning_rate": 9.770452570183685e-05, "loss": 0.0081, "step": 5418 }, { "epoch": 1.048955108359133, "grad_norm": 0.13536487519741058, "learning_rate": 9.770366048020732e-05, "loss": 0.0102, "step": 5419 }, { "epoch": 1.0491486068111455, "grad_norm": 0.07946207374334335, "learning_rate": 9.770279509981613e-05, "loss": 0.0102, "step": 5420 }, { "epoch": 1.049342105263158, "grad_norm": 0.06770244985818863, "learning_rate": 9.770192956066646e-05, "loss": 0.0098, "step": 5421 }, { "epoch": 1.0495356037151702, "grad_norm": 0.0578407421708107, "learning_rate": 9.77010638627615e-05, "loss": 0.0098, "step": 5422 }, { "epoch": 1.0497291021671826, "grad_norm": 0.10939287394285202, "learning_rate": 9.770019800610454e-05, "loss": 0.0089, "step": 5423 }, { "epoch": 1.049922600619195, "grad_norm": 0.046174708753824234, "learning_rate": 9.769933199069877e-05, "loss": 0.0091, "step": 5424 }, { "epoch": 1.0501160990712075, "grad_norm": 0.08180558681488037, "learning_rate": 9.769846581654739e-05, "loss": 0.0106, "step": 5425 }, { "epoch": 1.0503095975232197, "grad_norm": 0.11962326616048813, "learning_rate": 9.769759948365365e-05, "loss": 0.0094, "step": 5426 }, { "epoch": 1.0505030959752322, "grad_norm": 0.07567223906517029, "learning_rate": 9.769673299202074e-05, "loss": 0.0106, "step": 5427 }, { "epoch": 1.0506965944272446, "grad_norm": 0.16105110943317413, "learning_rate": 9.76958663416519e-05, "loss": 0.0083, "step": 5428 }, { "epoch": 1.050890092879257, "grad_norm": 0.0473550446331501, "learning_rate": 9.769499953255036e-05, "loss": 0.008, "step": 5429 }, { "epoch": 1.0510835913312693, "grad_norm": 0.1424710601568222, "learning_rate": 9.769413256471934e-05, "loss": 0.0091, "step": 5430 }, { "epoch": 1.0512770897832817, "grad_norm": 0.13927488029003143, "learning_rate": 9.769326543816203e-05, "loss": 0.0107, "step": 5431 }, { "epoch": 1.0514705882352942, "grad_norm": 0.19153562188148499, "learning_rate": 9.769239815288169e-05, "loss": 0.0107, "step": 5432 }, { "epoch": 1.0516640866873066, "grad_norm": 0.08654923737049103, "learning_rate": 9.769153070888153e-05, "loss": 0.0085, "step": 5433 }, { "epoch": 1.0518575851393188, "grad_norm": 0.10941306501626968, "learning_rate": 9.769066310616478e-05, "loss": 0.0104, "step": 5434 }, { "epoch": 1.0520510835913313, "grad_norm": 0.08806414902210236, "learning_rate": 9.768979534473467e-05, "loss": 0.0084, "step": 5435 }, { "epoch": 1.0522445820433437, "grad_norm": 0.08883494883775711, "learning_rate": 9.768892742459441e-05, "loss": 0.0099, "step": 5436 }, { "epoch": 1.0524380804953561, "grad_norm": 0.10028553754091263, "learning_rate": 9.768805934574725e-05, "loss": 0.0082, "step": 5437 }, { "epoch": 1.0526315789473684, "grad_norm": 0.052232518792152405, "learning_rate": 9.768719110819638e-05, "loss": 0.009, "step": 5438 }, { "epoch": 1.0528250773993808, "grad_norm": 0.0718546137213707, "learning_rate": 9.768632271194508e-05, "loss": 0.0092, "step": 5439 }, { "epoch": 1.0530185758513932, "grad_norm": 0.0649915337562561, "learning_rate": 9.768545415699654e-05, "loss": 0.0085, "step": 5440 }, { "epoch": 1.0532120743034055, "grad_norm": 0.058317579329013824, "learning_rate": 9.768458544335399e-05, "loss": 0.0093, "step": 5441 }, { "epoch": 1.053405572755418, "grad_norm": 0.09974299371242523, "learning_rate": 9.768371657102068e-05, "loss": 0.0078, "step": 5442 }, { "epoch": 1.0535990712074303, "grad_norm": 0.11081310361623764, "learning_rate": 9.768284753999983e-05, "loss": 0.0096, "step": 5443 }, { "epoch": 1.0537925696594428, "grad_norm": 0.08770643919706345, "learning_rate": 9.768197835029466e-05, "loss": 0.0088, "step": 5444 }, { "epoch": 1.053986068111455, "grad_norm": 0.12485569715499878, "learning_rate": 9.768110900190843e-05, "loss": 0.0097, "step": 5445 }, { "epoch": 1.0541795665634675, "grad_norm": 0.06526988744735718, "learning_rate": 9.768023949484433e-05, "loss": 0.009, "step": 5446 }, { "epoch": 1.05437306501548, "grad_norm": 0.10674745589494705, "learning_rate": 9.767936982910562e-05, "loss": 0.0108, "step": 5447 }, { "epoch": 1.0545665634674923, "grad_norm": 0.10286861658096313, "learning_rate": 9.767850000469555e-05, "loss": 0.0092, "step": 5448 }, { "epoch": 1.0547600619195046, "grad_norm": 0.1071525365114212, "learning_rate": 9.767763002161732e-05, "loss": 0.0087, "step": 5449 }, { "epoch": 1.054953560371517, "grad_norm": 0.15978941321372986, "learning_rate": 9.767675987987417e-05, "loss": 0.0108, "step": 5450 }, { "epoch": 1.0551470588235294, "grad_norm": 0.10471827536821365, "learning_rate": 9.767588957946936e-05, "loss": 0.0091, "step": 5451 }, { "epoch": 1.0553405572755419, "grad_norm": 0.1544119268655777, "learning_rate": 9.76750191204061e-05, "loss": 0.008, "step": 5452 }, { "epoch": 1.055534055727554, "grad_norm": 0.11762352287769318, "learning_rate": 9.767414850268763e-05, "loss": 0.01, "step": 5453 }, { "epoch": 1.0557275541795665, "grad_norm": 0.11615002900362015, "learning_rate": 9.767327772631719e-05, "loss": 0.0095, "step": 5454 }, { "epoch": 1.055921052631579, "grad_norm": 0.11152032762765884, "learning_rate": 9.767240679129803e-05, "loss": 0.0095, "step": 5455 }, { "epoch": 1.0561145510835914, "grad_norm": 0.05774441361427307, "learning_rate": 9.767153569763337e-05, "loss": 0.0087, "step": 5456 }, { "epoch": 1.0563080495356036, "grad_norm": 0.13863033056259155, "learning_rate": 9.767066444532644e-05, "loss": 0.0087, "step": 5457 }, { "epoch": 1.056501547987616, "grad_norm": 0.07225525379180908, "learning_rate": 9.766979303438052e-05, "loss": 0.0107, "step": 5458 }, { "epoch": 1.0566950464396285, "grad_norm": 0.11230459809303284, "learning_rate": 9.76689214647988e-05, "loss": 0.0087, "step": 5459 }, { "epoch": 1.056888544891641, "grad_norm": 0.08659906685352325, "learning_rate": 9.766804973658456e-05, "loss": 0.009, "step": 5460 }, { "epoch": 1.0570820433436532, "grad_norm": 0.06209380552172661, "learning_rate": 9.766717784974102e-05, "loss": 0.0091, "step": 5461 }, { "epoch": 1.0572755417956656, "grad_norm": 0.1127300038933754, "learning_rate": 9.766630580427143e-05, "loss": 0.0091, "step": 5462 }, { "epoch": 1.057469040247678, "grad_norm": 0.048664141446352005, "learning_rate": 9.766543360017902e-05, "loss": 0.0098, "step": 5463 }, { "epoch": 1.0576625386996903, "grad_norm": 0.10089005529880524, "learning_rate": 9.766456123746704e-05, "loss": 0.0108, "step": 5464 }, { "epoch": 1.0578560371517027, "grad_norm": 0.03760287165641785, "learning_rate": 9.766368871613874e-05, "loss": 0.0079, "step": 5465 }, { "epoch": 1.0580495356037152, "grad_norm": 0.1039208471775055, "learning_rate": 9.766281603619736e-05, "loss": 0.009, "step": 5466 }, { "epoch": 1.0582430340557276, "grad_norm": 0.15014807879924774, "learning_rate": 9.766194319764614e-05, "loss": 0.0102, "step": 5467 }, { "epoch": 1.0584365325077398, "grad_norm": 0.0993228629231453, "learning_rate": 9.766107020048834e-05, "loss": 0.0079, "step": 5468 }, { "epoch": 1.0586300309597523, "grad_norm": 0.23681335151195526, "learning_rate": 9.766019704472716e-05, "loss": 0.01, "step": 5469 }, { "epoch": 1.0588235294117647, "grad_norm": 0.06911822408437729, "learning_rate": 9.765932373036591e-05, "loss": 0.0078, "step": 5470 }, { "epoch": 1.0590170278637772, "grad_norm": 0.26995617151260376, "learning_rate": 9.765845025740778e-05, "loss": 0.0113, "step": 5471 }, { "epoch": 1.0592105263157894, "grad_norm": 0.07901272922754288, "learning_rate": 9.765757662585606e-05, "loss": 0.0079, "step": 5472 }, { "epoch": 1.0594040247678018, "grad_norm": 0.23479953408241272, "learning_rate": 9.765670283571399e-05, "loss": 0.0075, "step": 5473 }, { "epoch": 1.0595975232198143, "grad_norm": 0.17175409197807312, "learning_rate": 9.76558288869848e-05, "loss": 0.0087, "step": 5474 }, { "epoch": 1.0597910216718267, "grad_norm": 0.14189687371253967, "learning_rate": 9.765495477967174e-05, "loss": 0.0108, "step": 5475 }, { "epoch": 1.059984520123839, "grad_norm": 0.2135235071182251, "learning_rate": 9.765408051377808e-05, "loss": 0.0085, "step": 5476 }, { "epoch": 1.0601780185758514, "grad_norm": 0.10773157328367233, "learning_rate": 9.765320608930706e-05, "loss": 0.0103, "step": 5477 }, { "epoch": 1.0603715170278638, "grad_norm": 0.1424858272075653, "learning_rate": 9.76523315062619e-05, "loss": 0.0096, "step": 5478 }, { "epoch": 1.0605650154798762, "grad_norm": 0.08119010925292969, "learning_rate": 9.765145676464593e-05, "loss": 0.008, "step": 5479 }, { "epoch": 1.0607585139318885, "grad_norm": 0.09722243249416351, "learning_rate": 9.765058186446232e-05, "loss": 0.009, "step": 5480 }, { "epoch": 1.060952012383901, "grad_norm": 0.10728112608194351, "learning_rate": 9.764970680571437e-05, "loss": 0.0093, "step": 5481 }, { "epoch": 1.0611455108359134, "grad_norm": 0.06237940862774849, "learning_rate": 9.764883158840531e-05, "loss": 0.0108, "step": 5482 }, { "epoch": 1.0613390092879258, "grad_norm": 0.12242524325847626, "learning_rate": 9.76479562125384e-05, "loss": 0.0086, "step": 5483 }, { "epoch": 1.061532507739938, "grad_norm": 0.05482885241508484, "learning_rate": 9.76470806781169e-05, "loss": 0.0107, "step": 5484 }, { "epoch": 1.0617260061919505, "grad_norm": 0.10549306869506836, "learning_rate": 9.764620498514407e-05, "loss": 0.011, "step": 5485 }, { "epoch": 1.061919504643963, "grad_norm": 0.08102574944496155, "learning_rate": 9.764532913362317e-05, "loss": 0.0092, "step": 5486 }, { "epoch": 1.0621130030959753, "grad_norm": 0.11212291568517685, "learning_rate": 9.764445312355744e-05, "loss": 0.0084, "step": 5487 }, { "epoch": 1.0623065015479876, "grad_norm": 0.08887304365634918, "learning_rate": 9.764357695495011e-05, "loss": 0.0107, "step": 5488 }, { "epoch": 1.0625, "grad_norm": 0.11133873462677002, "learning_rate": 9.76427006278045e-05, "loss": 0.0083, "step": 5489 }, { "epoch": 1.0626934984520124, "grad_norm": 0.09734763205051422, "learning_rate": 9.764182414212383e-05, "loss": 0.0099, "step": 5490 }, { "epoch": 1.0628869969040249, "grad_norm": 0.12009774893522263, "learning_rate": 9.764094749791137e-05, "loss": 0.008, "step": 5491 }, { "epoch": 1.063080495356037, "grad_norm": 0.11335133016109467, "learning_rate": 9.764007069517035e-05, "loss": 0.008, "step": 5492 }, { "epoch": 1.0632739938080495, "grad_norm": 0.1049504205584526, "learning_rate": 9.763919373390406e-05, "loss": 0.0097, "step": 5493 }, { "epoch": 1.063467492260062, "grad_norm": 0.14872479438781738, "learning_rate": 9.763831661411577e-05, "loss": 0.0077, "step": 5494 }, { "epoch": 1.0636609907120742, "grad_norm": 0.06580080091953278, "learning_rate": 9.763743933580873e-05, "loss": 0.008, "step": 5495 }, { "epoch": 1.0638544891640866, "grad_norm": 0.1330353021621704, "learning_rate": 9.763656189898617e-05, "loss": 0.0107, "step": 5496 }, { "epoch": 1.064047987616099, "grad_norm": 0.08108723163604736, "learning_rate": 9.763568430365139e-05, "loss": 0.0113, "step": 5497 }, { "epoch": 1.0642414860681115, "grad_norm": 0.12485574185848236, "learning_rate": 9.763480654980766e-05, "loss": 0.0096, "step": 5498 }, { "epoch": 1.0644349845201238, "grad_norm": 0.06402599066495895, "learning_rate": 9.763392863745821e-05, "loss": 0.0092, "step": 5499 }, { "epoch": 1.0646284829721362, "grad_norm": 0.10776469111442566, "learning_rate": 9.763305056660633e-05, "loss": 0.0111, "step": 5500 }, { "epoch": 1.0648219814241486, "grad_norm": 0.06313479691743851, "learning_rate": 9.763217233725524e-05, "loss": 0.0099, "step": 5501 }, { "epoch": 1.065015479876161, "grad_norm": 0.1159730777144432, "learning_rate": 9.763129394940826e-05, "loss": 0.0094, "step": 5502 }, { "epoch": 1.0652089783281733, "grad_norm": 0.03321800380945206, "learning_rate": 9.763041540306866e-05, "loss": 0.0083, "step": 5503 }, { "epoch": 1.0654024767801857, "grad_norm": 0.09868699312210083, "learning_rate": 9.762953669823963e-05, "loss": 0.0088, "step": 5504 }, { "epoch": 1.0655959752321982, "grad_norm": 0.08211364597082138, "learning_rate": 9.762865783492452e-05, "loss": 0.0103, "step": 5505 }, { "epoch": 1.0657894736842106, "grad_norm": 0.1376837193965912, "learning_rate": 9.762777881312656e-05, "loss": 0.0087, "step": 5506 }, { "epoch": 1.0659829721362228, "grad_norm": 0.1113557517528534, "learning_rate": 9.7626899632849e-05, "loss": 0.01, "step": 5507 }, { "epoch": 1.0661764705882353, "grad_norm": 0.13723589479923248, "learning_rate": 9.762602029409516e-05, "loss": 0.0106, "step": 5508 }, { "epoch": 1.0663699690402477, "grad_norm": 0.10540252923965454, "learning_rate": 9.762514079686825e-05, "loss": 0.0118, "step": 5509 }, { "epoch": 1.0665634674922602, "grad_norm": 0.0689256563782692, "learning_rate": 9.762426114117159e-05, "loss": 0.0077, "step": 5510 }, { "epoch": 1.0667569659442724, "grad_norm": 0.11788547039031982, "learning_rate": 9.762338132700843e-05, "loss": 0.0094, "step": 5511 }, { "epoch": 1.0669504643962848, "grad_norm": 0.04736299440264702, "learning_rate": 9.762250135438205e-05, "loss": 0.0086, "step": 5512 }, { "epoch": 1.0671439628482973, "grad_norm": 0.10783234983682632, "learning_rate": 9.762162122329568e-05, "loss": 0.0094, "step": 5513 }, { "epoch": 1.0673374613003097, "grad_norm": 0.10102719813585281, "learning_rate": 9.762074093375266e-05, "loss": 0.0104, "step": 5514 }, { "epoch": 1.067530959752322, "grad_norm": 0.13728618621826172, "learning_rate": 9.76198604857562e-05, "loss": 0.0098, "step": 5515 }, { "epoch": 1.0677244582043344, "grad_norm": 0.09772077947854996, "learning_rate": 9.761897987930962e-05, "loss": 0.0106, "step": 5516 }, { "epoch": 1.0679179566563468, "grad_norm": 0.11864043027162552, "learning_rate": 9.761809911441616e-05, "loss": 0.0111, "step": 5517 }, { "epoch": 1.068111455108359, "grad_norm": 0.10557582229375839, "learning_rate": 9.761721819107913e-05, "loss": 0.009, "step": 5518 }, { "epoch": 1.0683049535603715, "grad_norm": 0.11941006779670715, "learning_rate": 9.761633710930177e-05, "loss": 0.0078, "step": 5519 }, { "epoch": 1.068498452012384, "grad_norm": 0.0828142762184143, "learning_rate": 9.761545586908737e-05, "loss": 0.0099, "step": 5520 }, { "epoch": 1.0686919504643964, "grad_norm": 0.12902070581912994, "learning_rate": 9.76145744704392e-05, "loss": 0.0083, "step": 5521 }, { "epoch": 1.0688854489164086, "grad_norm": 0.08919044584035873, "learning_rate": 9.761369291336055e-05, "loss": 0.0078, "step": 5522 }, { "epoch": 1.069078947368421, "grad_norm": 0.14702540636062622, "learning_rate": 9.761281119785468e-05, "loss": 0.0112, "step": 5523 }, { "epoch": 1.0692724458204335, "grad_norm": 0.11453410238027573, "learning_rate": 9.761192932392489e-05, "loss": 0.009, "step": 5524 }, { "epoch": 1.069465944272446, "grad_norm": 0.12862247228622437, "learning_rate": 9.761104729157444e-05, "loss": 0.0089, "step": 5525 }, { "epoch": 1.0696594427244581, "grad_norm": 0.10197611898183823, "learning_rate": 9.761016510080661e-05, "loss": 0.0086, "step": 5526 }, { "epoch": 1.0698529411764706, "grad_norm": 0.09747637808322906, "learning_rate": 9.760928275162469e-05, "loss": 0.0094, "step": 5527 }, { "epoch": 1.070046439628483, "grad_norm": 0.12228094041347504, "learning_rate": 9.760840024403197e-05, "loss": 0.0097, "step": 5528 }, { "epoch": 1.0702399380804954, "grad_norm": 0.08227039873600006, "learning_rate": 9.76075175780317e-05, "loss": 0.0101, "step": 5529 }, { "epoch": 1.0704334365325077, "grad_norm": 0.06079642102122307, "learning_rate": 9.76066347536272e-05, "loss": 0.0093, "step": 5530 }, { "epoch": 1.07062693498452, "grad_norm": 0.058860357850790024, "learning_rate": 9.760575177082171e-05, "loss": 0.0096, "step": 5531 }, { "epoch": 1.0708204334365325, "grad_norm": 0.07181206345558167, "learning_rate": 9.760486862961853e-05, "loss": 0.0087, "step": 5532 }, { "epoch": 1.071013931888545, "grad_norm": 0.1257486194372177, "learning_rate": 9.760398533002096e-05, "loss": 0.0103, "step": 5533 }, { "epoch": 1.0712074303405572, "grad_norm": 0.059247635304927826, "learning_rate": 9.760310187203226e-05, "loss": 0.0103, "step": 5534 }, { "epoch": 1.0714009287925697, "grad_norm": 0.14733746647834778, "learning_rate": 9.760221825565573e-05, "loss": 0.0079, "step": 5535 }, { "epoch": 1.071594427244582, "grad_norm": 0.07351483404636383, "learning_rate": 9.760133448089466e-05, "loss": 0.0096, "step": 5536 }, { "epoch": 1.0717879256965945, "grad_norm": 0.16821128129959106, "learning_rate": 9.760045054775232e-05, "loss": 0.0104, "step": 5537 }, { "epoch": 1.0719814241486068, "grad_norm": 0.09388666599988937, "learning_rate": 9.759956645623199e-05, "loss": 0.0095, "step": 5538 }, { "epoch": 1.0721749226006192, "grad_norm": 0.1510242074728012, "learning_rate": 9.759868220633698e-05, "loss": 0.0091, "step": 5539 }, { "epoch": 1.0723684210526316, "grad_norm": 0.08925576508045197, "learning_rate": 9.759779779807056e-05, "loss": 0.0089, "step": 5540 }, { "epoch": 1.0725619195046439, "grad_norm": 0.06462348997592926, "learning_rate": 9.759691323143602e-05, "loss": 0.0086, "step": 5541 }, { "epoch": 1.0727554179566563, "grad_norm": 0.10720694810152054, "learning_rate": 9.759602850643665e-05, "loss": 0.0093, "step": 5542 }, { "epoch": 1.0729489164086687, "grad_norm": 0.05412963777780533, "learning_rate": 9.759514362307577e-05, "loss": 0.0101, "step": 5543 }, { "epoch": 1.0731424148606812, "grad_norm": 0.06120719015598297, "learning_rate": 9.759425858135662e-05, "loss": 0.0084, "step": 5544 }, { "epoch": 1.0733359133126934, "grad_norm": 0.04545992985367775, "learning_rate": 9.759337338128252e-05, "loss": 0.0091, "step": 5545 }, { "epoch": 1.0735294117647058, "grad_norm": 0.07164917141199112, "learning_rate": 9.759248802285674e-05, "loss": 0.0098, "step": 5546 }, { "epoch": 1.0737229102167183, "grad_norm": 0.0726623609662056, "learning_rate": 9.759160250608259e-05, "loss": 0.0081, "step": 5547 }, { "epoch": 1.0739164086687307, "grad_norm": 0.047383345663547516, "learning_rate": 9.759071683096335e-05, "loss": 0.0097, "step": 5548 }, { "epoch": 1.074109907120743, "grad_norm": 0.1820589005947113, "learning_rate": 9.758983099750234e-05, "loss": 0.0101, "step": 5549 }, { "epoch": 1.0743034055727554, "grad_norm": 0.03854445368051529, "learning_rate": 9.758894500570283e-05, "loss": 0.0091, "step": 5550 }, { "epoch": 1.0744969040247678, "grad_norm": 0.12793482840061188, "learning_rate": 9.75880588555681e-05, "loss": 0.0093, "step": 5551 }, { "epoch": 1.0746904024767803, "grad_norm": 0.038063060492277145, "learning_rate": 9.758717254710147e-05, "loss": 0.0096, "step": 5552 }, { "epoch": 1.0748839009287925, "grad_norm": 0.10972949862480164, "learning_rate": 9.758628608030622e-05, "loss": 0.0082, "step": 5553 }, { "epoch": 1.075077399380805, "grad_norm": 0.030872073024511337, "learning_rate": 9.758539945518565e-05, "loss": 0.0097, "step": 5554 }, { "epoch": 1.0752708978328174, "grad_norm": 0.06330075860023499, "learning_rate": 9.758451267174306e-05, "loss": 0.0078, "step": 5555 }, { "epoch": 1.0754643962848298, "grad_norm": 0.06011971831321716, "learning_rate": 9.758362572998176e-05, "loss": 0.0104, "step": 5556 }, { "epoch": 1.075657894736842, "grad_norm": 0.051518261432647705, "learning_rate": 9.758273862990503e-05, "loss": 0.0107, "step": 5557 }, { "epoch": 1.0758513931888545, "grad_norm": 0.05319853499531746, "learning_rate": 9.758185137151616e-05, "loss": 0.0109, "step": 5558 }, { "epoch": 1.076044891640867, "grad_norm": 0.06013663113117218, "learning_rate": 9.758096395481846e-05, "loss": 0.0086, "step": 5559 }, { "epoch": 1.0762383900928794, "grad_norm": 0.06289359927177429, "learning_rate": 9.758007637981523e-05, "loss": 0.008, "step": 5560 }, { "epoch": 1.0764318885448916, "grad_norm": 0.06085214763879776, "learning_rate": 9.757918864650977e-05, "loss": 0.009, "step": 5561 }, { "epoch": 1.076625386996904, "grad_norm": 0.06298092752695084, "learning_rate": 9.757830075490539e-05, "loss": 0.0098, "step": 5562 }, { "epoch": 1.0768188854489165, "grad_norm": 0.055683717131614685, "learning_rate": 9.757741270500535e-05, "loss": 0.0122, "step": 5563 }, { "epoch": 1.0770123839009287, "grad_norm": 0.057145558297634125, "learning_rate": 9.757652449681301e-05, "loss": 0.0068, "step": 5564 }, { "epoch": 1.0772058823529411, "grad_norm": 0.10790160298347473, "learning_rate": 9.757563613033161e-05, "loss": 0.0086, "step": 5565 }, { "epoch": 1.0773993808049536, "grad_norm": 0.12164710462093353, "learning_rate": 9.75747476055645e-05, "loss": 0.008, "step": 5566 }, { "epoch": 1.077592879256966, "grad_norm": 0.08952388167381287, "learning_rate": 9.757385892251499e-05, "loss": 0.0086, "step": 5567 }, { "epoch": 1.0777863777089782, "grad_norm": 0.07208093255758286, "learning_rate": 9.757297008118634e-05, "loss": 0.0095, "step": 5568 }, { "epoch": 1.0779798761609907, "grad_norm": 0.12817944586277008, "learning_rate": 9.757208108158189e-05, "loss": 0.0103, "step": 5569 }, { "epoch": 1.078173374613003, "grad_norm": 0.04793292656540871, "learning_rate": 9.757119192370493e-05, "loss": 0.0108, "step": 5570 }, { "epoch": 1.0783668730650156, "grad_norm": 0.14266812801361084, "learning_rate": 9.757030260755876e-05, "loss": 0.0083, "step": 5571 }, { "epoch": 1.0785603715170278, "grad_norm": 0.10414621233940125, "learning_rate": 9.75694131331467e-05, "loss": 0.0078, "step": 5572 }, { "epoch": 1.0787538699690402, "grad_norm": 0.09004566818475723, "learning_rate": 9.756852350047207e-05, "loss": 0.0093, "step": 5573 }, { "epoch": 1.0789473684210527, "grad_norm": 0.09398838877677917, "learning_rate": 9.756763370953814e-05, "loss": 0.0099, "step": 5574 }, { "epoch": 1.079140866873065, "grad_norm": 0.14533428847789764, "learning_rate": 9.756674376034824e-05, "loss": 0.0085, "step": 5575 }, { "epoch": 1.0793343653250773, "grad_norm": 0.17589160799980164, "learning_rate": 9.756585365290565e-05, "loss": 0.0082, "step": 5576 }, { "epoch": 1.0795278637770898, "grad_norm": 0.2032315731048584, "learning_rate": 9.756496338721374e-05, "loss": 0.0083, "step": 5577 }, { "epoch": 1.0797213622291022, "grad_norm": 0.13076132535934448, "learning_rate": 9.756407296327578e-05, "loss": 0.0089, "step": 5578 }, { "epoch": 1.0799148606811146, "grad_norm": 0.22930724918842316, "learning_rate": 9.756318238109508e-05, "loss": 0.0094, "step": 5579 }, { "epoch": 1.0801083591331269, "grad_norm": 0.07689011096954346, "learning_rate": 9.756229164067495e-05, "loss": 0.0082, "step": 5580 }, { "epoch": 1.0803018575851393, "grad_norm": 0.20687583088874817, "learning_rate": 9.756140074201871e-05, "loss": 0.0105, "step": 5581 }, { "epoch": 1.0804953560371517, "grad_norm": 0.10650331526994705, "learning_rate": 9.756050968512965e-05, "loss": 0.0107, "step": 5582 }, { "epoch": 1.0806888544891642, "grad_norm": 0.12333529442548752, "learning_rate": 9.755961847001113e-05, "loss": 0.0096, "step": 5583 }, { "epoch": 1.0808823529411764, "grad_norm": 0.13891515135765076, "learning_rate": 9.755872709666642e-05, "loss": 0.009, "step": 5584 }, { "epoch": 1.0810758513931888, "grad_norm": 0.10375834256410599, "learning_rate": 9.755783556509886e-05, "loss": 0.0077, "step": 5585 }, { "epoch": 1.0812693498452013, "grad_norm": 0.14947906136512756, "learning_rate": 9.755694387531174e-05, "loss": 0.0093, "step": 5586 }, { "epoch": 1.0814628482972135, "grad_norm": 0.11342350393533707, "learning_rate": 9.75560520273084e-05, "loss": 0.0101, "step": 5587 }, { "epoch": 1.081656346749226, "grad_norm": 0.10910972952842712, "learning_rate": 9.755516002109214e-05, "loss": 0.0104, "step": 5588 }, { "epoch": 1.0818498452012384, "grad_norm": 0.10766997188329697, "learning_rate": 9.75542678566663e-05, "loss": 0.008, "step": 5589 }, { "epoch": 1.0820433436532508, "grad_norm": 0.11789610236883163, "learning_rate": 9.755337553403417e-05, "loss": 0.0099, "step": 5590 }, { "epoch": 1.0822368421052633, "grad_norm": 0.09860064834356308, "learning_rate": 9.755248305319905e-05, "loss": 0.0099, "step": 5591 }, { "epoch": 1.0824303405572755, "grad_norm": 0.08957154303789139, "learning_rate": 9.755159041416431e-05, "loss": 0.0079, "step": 5592 }, { "epoch": 1.082623839009288, "grad_norm": 0.058066509664058685, "learning_rate": 9.755069761693323e-05, "loss": 0.0089, "step": 5593 }, { "epoch": 1.0828173374613004, "grad_norm": 0.10932911932468414, "learning_rate": 9.754980466150915e-05, "loss": 0.0101, "step": 5594 }, { "epoch": 1.0830108359133126, "grad_norm": 0.09793099015951157, "learning_rate": 9.754891154789537e-05, "loss": 0.0089, "step": 5595 }, { "epoch": 1.083204334365325, "grad_norm": 0.09674528241157532, "learning_rate": 9.754801827609522e-05, "loss": 0.009, "step": 5596 }, { "epoch": 1.0833978328173375, "grad_norm": 0.10361119359731674, "learning_rate": 9.754712484611202e-05, "loss": 0.0082, "step": 5597 }, { "epoch": 1.08359133126935, "grad_norm": 0.11904668807983398, "learning_rate": 9.754623125794911e-05, "loss": 0.0108, "step": 5598 }, { "epoch": 1.0837848297213621, "grad_norm": 0.06799248605966568, "learning_rate": 9.75453375116098e-05, "loss": 0.011, "step": 5599 }, { "epoch": 1.0839783281733746, "grad_norm": 0.0796453133225441, "learning_rate": 9.754444360709739e-05, "loss": 0.0088, "step": 5600 }, { "epoch": 1.084171826625387, "grad_norm": 0.048518870025873184, "learning_rate": 9.754354954441522e-05, "loss": 0.0094, "step": 5601 }, { "epoch": 1.0843653250773995, "grad_norm": 0.04313129186630249, "learning_rate": 9.754265532356663e-05, "loss": 0.0075, "step": 5602 }, { "epoch": 1.0845588235294117, "grad_norm": 0.057141099125146866, "learning_rate": 9.754176094455492e-05, "loss": 0.0097, "step": 5603 }, { "epoch": 1.0847523219814241, "grad_norm": 0.06315925717353821, "learning_rate": 9.754086640738344e-05, "loss": 0.0091, "step": 5604 }, { "epoch": 1.0849458204334366, "grad_norm": 0.07481607794761658, "learning_rate": 9.753997171205548e-05, "loss": 0.0097, "step": 5605 }, { "epoch": 1.085139318885449, "grad_norm": 0.08386676758527756, "learning_rate": 9.75390768585744e-05, "loss": 0.0081, "step": 5606 }, { "epoch": 1.0853328173374612, "grad_norm": 0.07408897578716278, "learning_rate": 9.753818184694351e-05, "loss": 0.0088, "step": 5607 }, { "epoch": 1.0855263157894737, "grad_norm": 0.06378458440303802, "learning_rate": 9.753728667716613e-05, "loss": 0.0087, "step": 5608 }, { "epoch": 1.0857198142414861, "grad_norm": 0.09100862592458725, "learning_rate": 9.753639134924561e-05, "loss": 0.0093, "step": 5609 }, { "epoch": 1.0859133126934986, "grad_norm": 0.05360248684883118, "learning_rate": 9.753549586318529e-05, "loss": 0.0081, "step": 5610 }, { "epoch": 1.0861068111455108, "grad_norm": 0.09315570443868637, "learning_rate": 9.753460021898845e-05, "loss": 0.0084, "step": 5611 }, { "epoch": 1.0863003095975232, "grad_norm": 0.05472778156399727, "learning_rate": 9.753370441665846e-05, "loss": 0.0084, "step": 5612 }, { "epoch": 1.0864938080495357, "grad_norm": 0.07638031989336014, "learning_rate": 9.753280845619863e-05, "loss": 0.0071, "step": 5613 }, { "epoch": 1.086687306501548, "grad_norm": 0.0640922337770462, "learning_rate": 9.75319123376123e-05, "loss": 0.0079, "step": 5614 }, { "epoch": 1.0868808049535603, "grad_norm": 0.08971994370222092, "learning_rate": 9.75310160609028e-05, "loss": 0.0093, "step": 5615 }, { "epoch": 1.0870743034055728, "grad_norm": 0.07073201984167099, "learning_rate": 9.753011962607347e-05, "loss": 0.0073, "step": 5616 }, { "epoch": 1.0872678018575852, "grad_norm": 0.09317958354949951, "learning_rate": 9.752922303312763e-05, "loss": 0.0091, "step": 5617 }, { "epoch": 1.0874613003095974, "grad_norm": 0.08739150315523148, "learning_rate": 9.75283262820686e-05, "loss": 0.0093, "step": 5618 }, { "epoch": 1.0876547987616099, "grad_norm": 0.09448112547397614, "learning_rate": 9.752742937289977e-05, "loss": 0.0097, "step": 5619 }, { "epoch": 1.0878482972136223, "grad_norm": 0.11912044137716293, "learning_rate": 9.752653230562441e-05, "loss": 0.0072, "step": 5620 }, { "epoch": 1.0880417956656347, "grad_norm": 0.06907936185598373, "learning_rate": 9.752563508024588e-05, "loss": 0.0085, "step": 5621 }, { "epoch": 1.088235294117647, "grad_norm": 0.11785956472158432, "learning_rate": 9.752473769676752e-05, "loss": 0.0099, "step": 5622 }, { "epoch": 1.0884287925696594, "grad_norm": 0.05267910659313202, "learning_rate": 9.752384015519268e-05, "loss": 0.0086, "step": 5623 }, { "epoch": 1.0886222910216719, "grad_norm": 0.09171941131353378, "learning_rate": 9.752294245552465e-05, "loss": 0.009, "step": 5624 }, { "epoch": 1.0888157894736843, "grad_norm": 0.043884117156267166, "learning_rate": 9.752204459776682e-05, "loss": 0.0088, "step": 5625 }, { "epoch": 1.0890092879256965, "grad_norm": 0.0691949874162674, "learning_rate": 9.752114658192251e-05, "loss": 0.0098, "step": 5626 }, { "epoch": 1.089202786377709, "grad_norm": 0.0606159009039402, "learning_rate": 9.752024840799505e-05, "loss": 0.0079, "step": 5627 }, { "epoch": 1.0893962848297214, "grad_norm": 0.06306983530521393, "learning_rate": 9.751935007598778e-05, "loss": 0.0079, "step": 5628 }, { "epoch": 1.0895897832817338, "grad_norm": 0.08726825565099716, "learning_rate": 9.751845158590404e-05, "loss": 0.0103, "step": 5629 }, { "epoch": 1.089783281733746, "grad_norm": 0.06393715739250183, "learning_rate": 9.751755293774717e-05, "loss": 0.0104, "step": 5630 }, { "epoch": 1.0899767801857585, "grad_norm": 0.061216190457344055, "learning_rate": 9.751665413152052e-05, "loss": 0.0098, "step": 5631 }, { "epoch": 1.090170278637771, "grad_norm": 0.06405273824930191, "learning_rate": 9.751575516722743e-05, "loss": 0.0093, "step": 5632 }, { "epoch": 1.0903637770897834, "grad_norm": 0.09484364092350006, "learning_rate": 9.751485604487124e-05, "loss": 0.011, "step": 5633 }, { "epoch": 1.0905572755417956, "grad_norm": 0.06968140602111816, "learning_rate": 9.75139567644553e-05, "loss": 0.0111, "step": 5634 }, { "epoch": 1.090750773993808, "grad_norm": 0.05792675539851189, "learning_rate": 9.751305732598291e-05, "loss": 0.0079, "step": 5635 }, { "epoch": 1.0909442724458205, "grad_norm": 0.06893172115087509, "learning_rate": 9.751215772945749e-05, "loss": 0.0079, "step": 5636 }, { "epoch": 1.091137770897833, "grad_norm": 0.05329551920294762, "learning_rate": 9.751125797488232e-05, "loss": 0.0085, "step": 5637 }, { "epoch": 1.0913312693498451, "grad_norm": 0.07650560140609741, "learning_rate": 9.751035806226077e-05, "loss": 0.0076, "step": 5638 }, { "epoch": 1.0915247678018576, "grad_norm": 0.08803149312734604, "learning_rate": 9.750945799159618e-05, "loss": 0.0095, "step": 5639 }, { "epoch": 1.09171826625387, "grad_norm": 0.1096799224615097, "learning_rate": 9.75085577628919e-05, "loss": 0.009, "step": 5640 }, { "epoch": 1.0919117647058822, "grad_norm": 0.09814769774675369, "learning_rate": 9.750765737615129e-05, "loss": 0.0086, "step": 5641 }, { "epoch": 1.0921052631578947, "grad_norm": 0.12472505867481232, "learning_rate": 9.750675683137768e-05, "loss": 0.0096, "step": 5642 }, { "epoch": 1.0922987616099071, "grad_norm": 0.13231274485588074, "learning_rate": 9.750585612857443e-05, "loss": 0.0094, "step": 5643 }, { "epoch": 1.0924922600619196, "grad_norm": 0.11532861739397049, "learning_rate": 9.750495526774485e-05, "loss": 0.0086, "step": 5644 }, { "epoch": 1.0926857585139318, "grad_norm": 0.12101561576128006, "learning_rate": 9.750405424889235e-05, "loss": 0.0092, "step": 5645 }, { "epoch": 1.0928792569659442, "grad_norm": 0.09871792048215866, "learning_rate": 9.750315307202023e-05, "loss": 0.0091, "step": 5646 }, { "epoch": 1.0930727554179567, "grad_norm": 0.11215808987617493, "learning_rate": 9.750225173713186e-05, "loss": 0.0092, "step": 5647 }, { "epoch": 1.0932662538699691, "grad_norm": 0.09053651243448257, "learning_rate": 9.750135024423059e-05, "loss": 0.0091, "step": 5648 }, { "epoch": 1.0934597523219813, "grad_norm": 0.06369762867689133, "learning_rate": 9.75004485933198e-05, "loss": 0.0088, "step": 5649 }, { "epoch": 1.0936532507739938, "grad_norm": 0.06954903900623322, "learning_rate": 9.749954678440279e-05, "loss": 0.0104, "step": 5650 }, { "epoch": 1.0938467492260062, "grad_norm": 0.07850047945976257, "learning_rate": 9.749864481748294e-05, "loss": 0.0076, "step": 5651 }, { "epoch": 1.0940402476780187, "grad_norm": 0.07979189604520798, "learning_rate": 9.74977426925636e-05, "loss": 0.0081, "step": 5652 }, { "epoch": 1.0942337461300309, "grad_norm": 0.07123921811580658, "learning_rate": 9.749684040964813e-05, "loss": 0.0079, "step": 5653 }, { "epoch": 1.0944272445820433, "grad_norm": 0.08172252029180527, "learning_rate": 9.749593796873988e-05, "loss": 0.0106, "step": 5654 }, { "epoch": 1.0946207430340558, "grad_norm": 0.06811464577913284, "learning_rate": 9.749503536984219e-05, "loss": 0.01, "step": 5655 }, { "epoch": 1.0948142414860682, "grad_norm": 0.06500127166509628, "learning_rate": 9.749413261295844e-05, "loss": 0.0095, "step": 5656 }, { "epoch": 1.0950077399380804, "grad_norm": 0.09491588920354843, "learning_rate": 9.749322969809196e-05, "loss": 0.0087, "step": 5657 }, { "epoch": 1.0952012383900929, "grad_norm": 0.04922192171216011, "learning_rate": 9.749232662524616e-05, "loss": 0.0089, "step": 5658 }, { "epoch": 1.0953947368421053, "grad_norm": 0.09889393299818039, "learning_rate": 9.749142339442433e-05, "loss": 0.0099, "step": 5659 }, { "epoch": 1.0955882352941178, "grad_norm": 0.09417562931776047, "learning_rate": 9.749052000562987e-05, "loss": 0.0086, "step": 5660 }, { "epoch": 1.09578173374613, "grad_norm": 0.06997023522853851, "learning_rate": 9.748961645886613e-05, "loss": 0.009, "step": 5661 }, { "epoch": 1.0959752321981424, "grad_norm": 0.1438666731119156, "learning_rate": 9.748871275413645e-05, "loss": 0.0073, "step": 5662 }, { "epoch": 1.0961687306501549, "grad_norm": 0.05689144879579544, "learning_rate": 9.74878088914442e-05, "loss": 0.009, "step": 5663 }, { "epoch": 1.096362229102167, "grad_norm": 0.12937738001346588, "learning_rate": 9.748690487079278e-05, "loss": 0.0097, "step": 5664 }, { "epoch": 1.0965557275541795, "grad_norm": 0.07543367892503738, "learning_rate": 9.748600069218548e-05, "loss": 0.009, "step": 5665 }, { "epoch": 1.096749226006192, "grad_norm": 0.09711724519729614, "learning_rate": 9.748509635562573e-05, "loss": 0.0106, "step": 5666 }, { "epoch": 1.0969427244582044, "grad_norm": 0.10126376897096634, "learning_rate": 9.748419186111684e-05, "loss": 0.0092, "step": 5667 }, { "epoch": 1.0971362229102166, "grad_norm": 0.13998295366764069, "learning_rate": 9.74832872086622e-05, "loss": 0.0097, "step": 5668 }, { "epoch": 1.097329721362229, "grad_norm": 0.09062854200601578, "learning_rate": 9.748238239826517e-05, "loss": 0.0097, "step": 5669 }, { "epoch": 1.0975232198142415, "grad_norm": 0.13826587796211243, "learning_rate": 9.74814774299291e-05, "loss": 0.0099, "step": 5670 }, { "epoch": 1.097716718266254, "grad_norm": 0.09015803784132004, "learning_rate": 9.748057230365737e-05, "loss": 0.0094, "step": 5671 }, { "epoch": 1.0979102167182662, "grad_norm": 0.10903748869895935, "learning_rate": 9.747966701945332e-05, "loss": 0.0088, "step": 5672 }, { "epoch": 1.0981037151702786, "grad_norm": 0.08779103308916092, "learning_rate": 9.747876157732035e-05, "loss": 0.0093, "step": 5673 }, { "epoch": 1.098297213622291, "grad_norm": 0.09246654063463211, "learning_rate": 9.747785597726184e-05, "loss": 0.0102, "step": 5674 }, { "epoch": 1.0984907120743035, "grad_norm": 0.10866063088178635, "learning_rate": 9.74769502192811e-05, "loss": 0.01, "step": 5675 }, { "epoch": 1.0986842105263157, "grad_norm": 0.08044418692588806, "learning_rate": 9.747604430338152e-05, "loss": 0.0105, "step": 5676 }, { "epoch": 1.0988777089783281, "grad_norm": 0.14017550647258759, "learning_rate": 9.747513822956649e-05, "loss": 0.0092, "step": 5677 }, { "epoch": 1.0990712074303406, "grad_norm": 0.10594582557678223, "learning_rate": 9.747423199783934e-05, "loss": 0.0073, "step": 5678 }, { "epoch": 1.099264705882353, "grad_norm": 0.13335366547107697, "learning_rate": 9.747332560820348e-05, "loss": 0.0093, "step": 5679 }, { "epoch": 1.0994582043343653, "grad_norm": 0.08414226025342941, "learning_rate": 9.747241906066226e-05, "loss": 0.0085, "step": 5680 }, { "epoch": 1.0996517027863777, "grad_norm": 0.13885468244552612, "learning_rate": 9.747151235521906e-05, "loss": 0.0096, "step": 5681 }, { "epoch": 1.0998452012383901, "grad_norm": 0.04279753565788269, "learning_rate": 9.74706054918772e-05, "loss": 0.01, "step": 5682 }, { "epoch": 1.1000386996904026, "grad_norm": 0.12542976438999176, "learning_rate": 9.746969847064014e-05, "loss": 0.0087, "step": 5683 }, { "epoch": 1.1002321981424148, "grad_norm": 0.05974390357732773, "learning_rate": 9.746879129151119e-05, "loss": 0.009, "step": 5684 }, { "epoch": 1.1004256965944272, "grad_norm": 0.10586865991353989, "learning_rate": 9.746788395449374e-05, "loss": 0.0081, "step": 5685 }, { "epoch": 1.1006191950464397, "grad_norm": 0.09582795202732086, "learning_rate": 9.746697645959115e-05, "loss": 0.0089, "step": 5686 }, { "epoch": 1.100812693498452, "grad_norm": 0.0989563912153244, "learning_rate": 9.746606880680681e-05, "loss": 0.0079, "step": 5687 }, { "epoch": 1.1010061919504643, "grad_norm": 0.08773239701986313, "learning_rate": 9.746516099614413e-05, "loss": 0.0081, "step": 5688 }, { "epoch": 1.1011996904024768, "grad_norm": 0.059193965047597885, "learning_rate": 9.74642530276064e-05, "loss": 0.0099, "step": 5689 }, { "epoch": 1.1013931888544892, "grad_norm": 0.08056852966547012, "learning_rate": 9.746334490119705e-05, "loss": 0.0107, "step": 5690 }, { "epoch": 1.1015866873065014, "grad_norm": 0.08918197453022003, "learning_rate": 9.746243661691944e-05, "loss": 0.0079, "step": 5691 }, { "epoch": 1.1017801857585139, "grad_norm": 0.06563087552785873, "learning_rate": 9.746152817477697e-05, "loss": 0.0115, "step": 5692 }, { "epoch": 1.1019736842105263, "grad_norm": 0.10562524944543839, "learning_rate": 9.746061957477299e-05, "loss": 0.0074, "step": 5693 }, { "epoch": 1.1021671826625388, "grad_norm": 0.061089325696229935, "learning_rate": 9.74597108169109e-05, "loss": 0.0098, "step": 5694 }, { "epoch": 1.102360681114551, "grad_norm": 0.10269968211650848, "learning_rate": 9.745880190119405e-05, "loss": 0.0079, "step": 5695 }, { "epoch": 1.1025541795665634, "grad_norm": 0.06416429579257965, "learning_rate": 9.745789282762585e-05, "loss": 0.0079, "step": 5696 }, { "epoch": 1.1027476780185759, "grad_norm": 0.07648283243179321, "learning_rate": 9.745698359620966e-05, "loss": 0.0089, "step": 5697 }, { "epoch": 1.1029411764705883, "grad_norm": 0.043454188853502274, "learning_rate": 9.745607420694886e-05, "loss": 0.0085, "step": 5698 }, { "epoch": 1.1031346749226005, "grad_norm": 0.06948986649513245, "learning_rate": 9.745516465984687e-05, "loss": 0.0087, "step": 5699 }, { "epoch": 1.103328173374613, "grad_norm": 0.09052518010139465, "learning_rate": 9.7454254954907e-05, "loss": 0.009, "step": 5700 }, { "epoch": 1.1035216718266254, "grad_norm": 0.08503524959087372, "learning_rate": 9.74533450921327e-05, "loss": 0.0082, "step": 5701 }, { "epoch": 1.1037151702786379, "grad_norm": 0.11551264673471451, "learning_rate": 9.745243507152731e-05, "loss": 0.0092, "step": 5702 }, { "epoch": 1.10390866873065, "grad_norm": 0.06764610856771469, "learning_rate": 9.745152489309423e-05, "loss": 0.0091, "step": 5703 }, { "epoch": 1.1041021671826625, "grad_norm": 0.13800841569900513, "learning_rate": 9.745061455683684e-05, "loss": 0.0105, "step": 5704 }, { "epoch": 1.104295665634675, "grad_norm": 0.03036417067050934, "learning_rate": 9.744970406275851e-05, "loss": 0.0081, "step": 5705 }, { "epoch": 1.1044891640866874, "grad_norm": 0.1612362265586853, "learning_rate": 9.744879341086266e-05, "loss": 0.0102, "step": 5706 }, { "epoch": 1.1046826625386996, "grad_norm": 0.08015832304954529, "learning_rate": 9.744788260115265e-05, "loss": 0.0096, "step": 5707 }, { "epoch": 1.104876160990712, "grad_norm": 0.11012234538793564, "learning_rate": 9.744697163363187e-05, "loss": 0.009, "step": 5708 }, { "epoch": 1.1050696594427245, "grad_norm": 0.08109167963266373, "learning_rate": 9.744606050830372e-05, "loss": 0.0096, "step": 5709 }, { "epoch": 1.1052631578947367, "grad_norm": 0.12516486644744873, "learning_rate": 9.744514922517157e-05, "loss": 0.0077, "step": 5710 }, { "epoch": 1.1054566563467492, "grad_norm": 0.06564927101135254, "learning_rate": 9.74442377842388e-05, "loss": 0.0076, "step": 5711 }, { "epoch": 1.1056501547987616, "grad_norm": 0.15761350095272064, "learning_rate": 9.744332618550881e-05, "loss": 0.0087, "step": 5712 }, { "epoch": 1.105843653250774, "grad_norm": 0.09798695892095566, "learning_rate": 9.744241442898502e-05, "loss": 0.0102, "step": 5713 }, { "epoch": 1.1060371517027865, "grad_norm": 0.12908372282981873, "learning_rate": 9.744150251467078e-05, "loss": 0.0104, "step": 5714 }, { "epoch": 1.1062306501547987, "grad_norm": 0.1309671849012375, "learning_rate": 9.744059044256947e-05, "loss": 0.0103, "step": 5715 }, { "epoch": 1.1064241486068112, "grad_norm": 0.08695061504840851, "learning_rate": 9.743967821268453e-05, "loss": 0.0078, "step": 5716 }, { "epoch": 1.1066176470588236, "grad_norm": 0.1658831238746643, "learning_rate": 9.743876582501931e-05, "loss": 0.01, "step": 5717 }, { "epoch": 1.1068111455108358, "grad_norm": 0.10758164525032043, "learning_rate": 9.743785327957721e-05, "loss": 0.0096, "step": 5718 }, { "epoch": 1.1070046439628483, "grad_norm": 0.11382462084293365, "learning_rate": 9.743694057636165e-05, "loss": 0.0088, "step": 5719 }, { "epoch": 1.1071981424148607, "grad_norm": 0.07997092604637146, "learning_rate": 9.7436027715376e-05, "loss": 0.0091, "step": 5720 }, { "epoch": 1.1073916408668731, "grad_norm": 0.10698502510786057, "learning_rate": 9.743511469662364e-05, "loss": 0.0073, "step": 5721 }, { "epoch": 1.1075851393188854, "grad_norm": 0.07005956768989563, "learning_rate": 9.743420152010798e-05, "loss": 0.0116, "step": 5722 }, { "epoch": 1.1077786377708978, "grad_norm": 0.12136080861091614, "learning_rate": 9.743328818583242e-05, "loss": 0.0106, "step": 5723 }, { "epoch": 1.1079721362229102, "grad_norm": 0.02770799584686756, "learning_rate": 9.743237469380034e-05, "loss": 0.0089, "step": 5724 }, { "epoch": 1.1081656346749227, "grad_norm": 0.0961279422044754, "learning_rate": 9.743146104401518e-05, "loss": 0.0112, "step": 5725 }, { "epoch": 1.108359133126935, "grad_norm": 0.03688322380185127, "learning_rate": 9.743054723648026e-05, "loss": 0.0078, "step": 5726 }, { "epoch": 1.1085526315789473, "grad_norm": 0.07932154834270477, "learning_rate": 9.742963327119905e-05, "loss": 0.0097, "step": 5727 }, { "epoch": 1.1087461300309598, "grad_norm": 0.047151118516922, "learning_rate": 9.742871914817489e-05, "loss": 0.0095, "step": 5728 }, { "epoch": 1.1089396284829722, "grad_norm": 0.06788554787635803, "learning_rate": 9.742780486741123e-05, "loss": 0.0102, "step": 5729 }, { "epoch": 1.1091331269349844, "grad_norm": 0.06711962819099426, "learning_rate": 9.742689042891146e-05, "loss": 0.0082, "step": 5730 }, { "epoch": 1.109326625386997, "grad_norm": 0.0453546941280365, "learning_rate": 9.742597583267893e-05, "loss": 0.0091, "step": 5731 }, { "epoch": 1.1095201238390093, "grad_norm": 0.10000218451023102, "learning_rate": 9.74250610787171e-05, "loss": 0.0076, "step": 5732 }, { "epoch": 1.1097136222910218, "grad_norm": 0.03540867939591408, "learning_rate": 9.742414616702933e-05, "loss": 0.0096, "step": 5733 }, { "epoch": 1.109907120743034, "grad_norm": 0.19120652973651886, "learning_rate": 9.742323109761905e-05, "loss": 0.0103, "step": 5734 }, { "epoch": 1.1101006191950464, "grad_norm": 0.11805897951126099, "learning_rate": 9.742231587048964e-05, "loss": 0.0088, "step": 5735 }, { "epoch": 1.1102941176470589, "grad_norm": 0.09767397493124008, "learning_rate": 9.742140048564453e-05, "loss": 0.0096, "step": 5736 }, { "epoch": 1.1104876160990713, "grad_norm": 0.0841279923915863, "learning_rate": 9.74204849430871e-05, "loss": 0.0095, "step": 5737 }, { "epoch": 1.1106811145510835, "grad_norm": 0.08739189058542252, "learning_rate": 9.741956924282074e-05, "loss": 0.0084, "step": 5738 }, { "epoch": 1.110874613003096, "grad_norm": 0.08056079596281052, "learning_rate": 9.74186533848489e-05, "loss": 0.0075, "step": 5739 }, { "epoch": 1.1110681114551084, "grad_norm": 0.10904090851545334, "learning_rate": 9.741773736917495e-05, "loss": 0.0092, "step": 5740 }, { "epoch": 1.1112616099071206, "grad_norm": 0.08502314239740372, "learning_rate": 9.74168211958023e-05, "loss": 0.0082, "step": 5741 }, { "epoch": 1.111455108359133, "grad_norm": 0.13474048674106598, "learning_rate": 9.741590486473436e-05, "loss": 0.0106, "step": 5742 }, { "epoch": 1.1116486068111455, "grad_norm": 0.09906923770904541, "learning_rate": 9.741498837597454e-05, "loss": 0.0073, "step": 5743 }, { "epoch": 1.111842105263158, "grad_norm": 0.16763484477996826, "learning_rate": 9.741407172952625e-05, "loss": 0.0089, "step": 5744 }, { "epoch": 1.1120356037151702, "grad_norm": 0.12708257138729095, "learning_rate": 9.741315492539289e-05, "loss": 0.0098, "step": 5745 }, { "epoch": 1.1122291021671826, "grad_norm": 0.170164555311203, "learning_rate": 9.741223796357786e-05, "loss": 0.0105, "step": 5746 }, { "epoch": 1.112422600619195, "grad_norm": 0.09997090697288513, "learning_rate": 9.741132084408458e-05, "loss": 0.0084, "step": 5747 }, { "epoch": 1.1126160990712075, "grad_norm": 0.17346356809139252, "learning_rate": 9.741040356691649e-05, "loss": 0.0089, "step": 5748 }, { "epoch": 1.1128095975232197, "grad_norm": 0.0981920063495636, "learning_rate": 9.740948613207694e-05, "loss": 0.0094, "step": 5749 }, { "epoch": 1.1130030959752322, "grad_norm": 0.14213228225708008, "learning_rate": 9.740856853956937e-05, "loss": 0.0104, "step": 5750 }, { "epoch": 1.1131965944272446, "grad_norm": 0.10604400932788849, "learning_rate": 9.740765078939721e-05, "loss": 0.009, "step": 5751 }, { "epoch": 1.113390092879257, "grad_norm": 0.08482477068901062, "learning_rate": 9.740673288156385e-05, "loss": 0.0091, "step": 5752 }, { "epoch": 1.1135835913312693, "grad_norm": 0.1599137783050537, "learning_rate": 9.740581481607268e-05, "loss": 0.0099, "step": 5753 }, { "epoch": 1.1137770897832817, "grad_norm": 0.04703085869550705, "learning_rate": 9.740489659292716e-05, "loss": 0.0096, "step": 5754 }, { "epoch": 1.1139705882352942, "grad_norm": 0.15997545421123505, "learning_rate": 9.74039782121307e-05, "loss": 0.0077, "step": 5755 }, { "epoch": 1.1141640866873066, "grad_norm": 0.06133664399385452, "learning_rate": 9.740305967368668e-05, "loss": 0.0111, "step": 5756 }, { "epoch": 1.1143575851393188, "grad_norm": 0.15266269445419312, "learning_rate": 9.740214097759852e-05, "loss": 0.0093, "step": 5757 }, { "epoch": 1.1145510835913313, "grad_norm": 0.07883177697658539, "learning_rate": 9.740122212386967e-05, "loss": 0.01, "step": 5758 }, { "epoch": 1.1147445820433437, "grad_norm": 0.12025150656700134, "learning_rate": 9.74003031125035e-05, "loss": 0.0089, "step": 5759 }, { "epoch": 1.1149380804953561, "grad_norm": 0.1218050867319107, "learning_rate": 9.739938394350349e-05, "loss": 0.0099, "step": 5760 }, { "epoch": 1.1151315789473684, "grad_norm": 0.1136096715927124, "learning_rate": 9.739846461687297e-05, "loss": 0.0094, "step": 5761 }, { "epoch": 1.1153250773993808, "grad_norm": 0.1584952175617218, "learning_rate": 9.739754513261544e-05, "loss": 0.0109, "step": 5762 }, { "epoch": 1.1155185758513932, "grad_norm": 0.07281223684549332, "learning_rate": 9.739662549073428e-05, "loss": 0.0091, "step": 5763 }, { "epoch": 1.1157120743034055, "grad_norm": 0.1291303187608719, "learning_rate": 9.73957056912329e-05, "loss": 0.0092, "step": 5764 }, { "epoch": 1.115905572755418, "grad_norm": 0.10024014115333557, "learning_rate": 9.739478573411474e-05, "loss": 0.0084, "step": 5765 }, { "epoch": 1.1160990712074303, "grad_norm": 0.12098831683397293, "learning_rate": 9.739386561938323e-05, "loss": 0.0091, "step": 5766 }, { "epoch": 1.1162925696594428, "grad_norm": 0.0793335810303688, "learning_rate": 9.739294534704176e-05, "loss": 0.0083, "step": 5767 }, { "epoch": 1.116486068111455, "grad_norm": 0.07298577576875687, "learning_rate": 9.739202491709374e-05, "loss": 0.0073, "step": 5768 }, { "epoch": 1.1166795665634675, "grad_norm": 0.12278921902179718, "learning_rate": 9.739110432954265e-05, "loss": 0.0095, "step": 5769 }, { "epoch": 1.11687306501548, "grad_norm": 0.11338628828525543, "learning_rate": 9.739018358439186e-05, "loss": 0.0095, "step": 5770 }, { "epoch": 1.1170665634674923, "grad_norm": 0.197713702917099, "learning_rate": 9.738926268164483e-05, "loss": 0.0095, "step": 5771 }, { "epoch": 1.1172600619195046, "grad_norm": 0.20270772278308868, "learning_rate": 9.738834162130496e-05, "loss": 0.0083, "step": 5772 }, { "epoch": 1.117453560371517, "grad_norm": 0.1842612773180008, "learning_rate": 9.738742040337567e-05, "loss": 0.0099, "step": 5773 }, { "epoch": 1.1176470588235294, "grad_norm": 0.19460351765155792, "learning_rate": 9.73864990278604e-05, "loss": 0.0102, "step": 5774 }, { "epoch": 1.1178405572755419, "grad_norm": 0.15400302410125732, "learning_rate": 9.738557749476259e-05, "loss": 0.0106, "step": 5775 }, { "epoch": 1.118034055727554, "grad_norm": 0.15739388763904572, "learning_rate": 9.738465580408563e-05, "loss": 0.0091, "step": 5776 }, { "epoch": 1.1182275541795665, "grad_norm": 0.13976198434829712, "learning_rate": 9.738373395583296e-05, "loss": 0.0105, "step": 5777 }, { "epoch": 1.118421052631579, "grad_norm": 0.11246027052402496, "learning_rate": 9.738281195000801e-05, "loss": 0.0096, "step": 5778 }, { "epoch": 1.1186145510835914, "grad_norm": 0.1008114218711853, "learning_rate": 9.738188978661422e-05, "loss": 0.0102, "step": 5779 }, { "epoch": 1.1188080495356036, "grad_norm": 0.17662189900875092, "learning_rate": 9.7380967465655e-05, "loss": 0.0091, "step": 5780 }, { "epoch": 1.119001547987616, "grad_norm": 0.1561945378780365, "learning_rate": 9.738004498713379e-05, "loss": 0.0089, "step": 5781 }, { "epoch": 1.1191950464396285, "grad_norm": 0.2413983792066574, "learning_rate": 9.7379122351054e-05, "loss": 0.0117, "step": 5782 }, { "epoch": 1.119388544891641, "grad_norm": 0.2017652541399002, "learning_rate": 9.73781995574191e-05, "loss": 0.01, "step": 5783 }, { "epoch": 1.1195820433436532, "grad_norm": 0.29474008083343506, "learning_rate": 9.737727660623247e-05, "loss": 0.01, "step": 5784 }, { "epoch": 1.1197755417956656, "grad_norm": 0.21230515837669373, "learning_rate": 9.737635349749759e-05, "loss": 0.0076, "step": 5785 }, { "epoch": 1.119969040247678, "grad_norm": 0.3842049837112427, "learning_rate": 9.737543023121786e-05, "loss": 0.0087, "step": 5786 }, { "epoch": 1.1201625386996903, "grad_norm": 0.17436206340789795, "learning_rate": 9.737450680739672e-05, "loss": 0.0093, "step": 5787 }, { "epoch": 1.1203560371517027, "grad_norm": 0.3431827127933502, "learning_rate": 9.73735832260376e-05, "loss": 0.01, "step": 5788 }, { "epoch": 1.1205495356037152, "grad_norm": 0.19676539301872253, "learning_rate": 9.737265948714395e-05, "loss": 0.009, "step": 5789 }, { "epoch": 1.1207430340557276, "grad_norm": 0.223414808511734, "learning_rate": 9.73717355907192e-05, "loss": 0.0097, "step": 5790 }, { "epoch": 1.1209365325077398, "grad_norm": 0.21976344287395477, "learning_rate": 9.737081153676677e-05, "loss": 0.0099, "step": 5791 }, { "epoch": 1.1211300309597523, "grad_norm": 0.1052839457988739, "learning_rate": 9.73698873252901e-05, "loss": 0.0086, "step": 5792 }, { "epoch": 1.1213235294117647, "grad_norm": 0.18762987852096558, "learning_rate": 9.736896295629262e-05, "loss": 0.0101, "step": 5793 }, { "epoch": 1.1215170278637772, "grad_norm": 0.10173047333955765, "learning_rate": 9.736803842977779e-05, "loss": 0.012, "step": 5794 }, { "epoch": 1.1217105263157894, "grad_norm": 0.1307472288608551, "learning_rate": 9.736711374574902e-05, "loss": 0.0106, "step": 5795 }, { "epoch": 1.1219040247678018, "grad_norm": 0.12620405852794647, "learning_rate": 9.736618890420977e-05, "loss": 0.009, "step": 5796 }, { "epoch": 1.1220975232198143, "grad_norm": 0.057493504136800766, "learning_rate": 9.736526390516347e-05, "loss": 0.0119, "step": 5797 }, { "epoch": 1.1222910216718267, "grad_norm": 0.24038153886795044, "learning_rate": 9.736433874861356e-05, "loss": 0.0094, "step": 5798 }, { "epoch": 1.122484520123839, "grad_norm": 0.05739065632224083, "learning_rate": 9.736341343456346e-05, "loss": 0.0096, "step": 5799 }, { "epoch": 1.1226780185758514, "grad_norm": 0.17179982364177704, "learning_rate": 9.736248796301665e-05, "loss": 0.0084, "step": 5800 }, { "epoch": 1.1228715170278638, "grad_norm": 0.13640524446964264, "learning_rate": 9.736156233397654e-05, "loss": 0.0106, "step": 5801 }, { "epoch": 1.1230650154798762, "grad_norm": 0.09136421978473663, "learning_rate": 9.736063654744658e-05, "loss": 0.0092, "step": 5802 }, { "epoch": 1.1232585139318885, "grad_norm": 0.18348681926727295, "learning_rate": 9.735971060343021e-05, "loss": 0.0095, "step": 5803 }, { "epoch": 1.123452012383901, "grad_norm": 0.0761127844452858, "learning_rate": 9.735878450193086e-05, "loss": 0.0095, "step": 5804 }, { "epoch": 1.1236455108359134, "grad_norm": 0.15469606220722198, "learning_rate": 9.735785824295202e-05, "loss": 0.0103, "step": 5805 }, { "epoch": 1.1238390092879258, "grad_norm": 0.1530783474445343, "learning_rate": 9.735693182649707e-05, "loss": 0.0072, "step": 5806 }, { "epoch": 1.124032507739938, "grad_norm": 0.08954805135726929, "learning_rate": 9.735600525256949e-05, "loss": 0.0085, "step": 5807 }, { "epoch": 1.1242260061919505, "grad_norm": 0.16965283453464508, "learning_rate": 9.735507852117272e-05, "loss": 0.0093, "step": 5808 }, { "epoch": 1.124419504643963, "grad_norm": 0.06376207619905472, "learning_rate": 9.735415163231021e-05, "loss": 0.0092, "step": 5809 }, { "epoch": 1.1246130030959751, "grad_norm": 0.1136435717344284, "learning_rate": 9.73532245859854e-05, "loss": 0.0104, "step": 5810 }, { "epoch": 1.1248065015479876, "grad_norm": 0.10923216491937637, "learning_rate": 9.735229738220175e-05, "loss": 0.0093, "step": 5811 }, { "epoch": 1.125, "grad_norm": 0.060734864324331284, "learning_rate": 9.735137002096267e-05, "loss": 0.0091, "step": 5812 }, { "epoch": 1.1251934984520124, "grad_norm": 0.08705997467041016, "learning_rate": 9.735044250227164e-05, "loss": 0.0087, "step": 5813 }, { "epoch": 1.1253869969040249, "grad_norm": 0.10291259735822678, "learning_rate": 9.73495148261321e-05, "loss": 0.0102, "step": 5814 }, { "epoch": 1.125580495356037, "grad_norm": 0.05712581053376198, "learning_rate": 9.734858699254751e-05, "loss": 0.0075, "step": 5815 }, { "epoch": 1.1257739938080495, "grad_norm": 0.1056017205119133, "learning_rate": 9.73476590015213e-05, "loss": 0.0101, "step": 5816 }, { "epoch": 1.125967492260062, "grad_norm": 0.062316879630088806, "learning_rate": 9.734673085305692e-05, "loss": 0.0106, "step": 5817 }, { "epoch": 1.1261609907120742, "grad_norm": 0.06724267452955246, "learning_rate": 9.734580254715785e-05, "loss": 0.0087, "step": 5818 }, { "epoch": 1.1263544891640866, "grad_norm": 0.08508814871311188, "learning_rate": 9.73448740838275e-05, "loss": 0.0079, "step": 5819 }, { "epoch": 1.126547987616099, "grad_norm": 0.0943959429860115, "learning_rate": 9.734394546306935e-05, "loss": 0.0119, "step": 5820 }, { "epoch": 1.1267414860681115, "grad_norm": 0.05267609283328056, "learning_rate": 9.734301668488684e-05, "loss": 0.0074, "step": 5821 }, { "epoch": 1.1269349845201238, "grad_norm": 0.14744459092617035, "learning_rate": 9.734208774928344e-05, "loss": 0.0098, "step": 5822 }, { "epoch": 1.1271284829721362, "grad_norm": 0.04490836337208748, "learning_rate": 9.734115865626258e-05, "loss": 0.0082, "step": 5823 }, { "epoch": 1.1273219814241486, "grad_norm": 0.14041045308113098, "learning_rate": 9.734022940582772e-05, "loss": 0.0075, "step": 5824 }, { "epoch": 1.127515479876161, "grad_norm": 0.03689664229750633, "learning_rate": 9.733929999798233e-05, "loss": 0.0093, "step": 5825 }, { "epoch": 1.1277089783281733, "grad_norm": 0.1346270591020584, "learning_rate": 9.733837043272987e-05, "loss": 0.0082, "step": 5826 }, { "epoch": 1.1279024767801857, "grad_norm": 0.06100701168179512, "learning_rate": 9.733744071007375e-05, "loss": 0.0088, "step": 5827 }, { "epoch": 1.1280959752321982, "grad_norm": 0.11505571752786636, "learning_rate": 9.733651083001746e-05, "loss": 0.0095, "step": 5828 }, { "epoch": 1.1282894736842106, "grad_norm": 0.09536191821098328, "learning_rate": 9.733558079256448e-05, "loss": 0.0091, "step": 5829 }, { "epoch": 1.1284829721362228, "grad_norm": 0.06044928729534149, "learning_rate": 9.733465059771822e-05, "loss": 0.0073, "step": 5830 }, { "epoch": 1.1286764705882353, "grad_norm": 0.10177459567785263, "learning_rate": 9.733372024548218e-05, "loss": 0.0098, "step": 5831 }, { "epoch": 1.1288699690402477, "grad_norm": 0.07262083142995834, "learning_rate": 9.733278973585979e-05, "loss": 0.0086, "step": 5832 }, { "epoch": 1.12906346749226, "grad_norm": 0.038214605301618576, "learning_rate": 9.733185906885451e-05, "loss": 0.0081, "step": 5833 }, { "epoch": 1.1292569659442724, "grad_norm": 0.07390271872282028, "learning_rate": 9.733092824446983e-05, "loss": 0.0088, "step": 5834 }, { "epoch": 1.1294504643962848, "grad_norm": 0.06383824348449707, "learning_rate": 9.732999726270916e-05, "loss": 0.0095, "step": 5835 }, { "epoch": 1.1296439628482973, "grad_norm": 0.10442414879798889, "learning_rate": 9.732906612357602e-05, "loss": 0.0101, "step": 5836 }, { "epoch": 1.1298374613003097, "grad_norm": 0.035241104662418365, "learning_rate": 9.732813482707384e-05, "loss": 0.0089, "step": 5837 }, { "epoch": 1.130030959752322, "grad_norm": 0.08783071488142014, "learning_rate": 9.732720337320605e-05, "loss": 0.0076, "step": 5838 }, { "epoch": 1.1302244582043344, "grad_norm": 0.04060022905468941, "learning_rate": 9.732627176197617e-05, "loss": 0.0109, "step": 5839 }, { "epoch": 1.1304179566563468, "grad_norm": 0.09658307582139969, "learning_rate": 9.732533999338766e-05, "loss": 0.0085, "step": 5840 }, { "epoch": 1.130611455108359, "grad_norm": 0.03959466889500618, "learning_rate": 9.732440806744394e-05, "loss": 0.0101, "step": 5841 }, { "epoch": 1.1308049535603715, "grad_norm": 0.11155769973993301, "learning_rate": 9.73234759841485e-05, "loss": 0.0117, "step": 5842 }, { "epoch": 1.130998452012384, "grad_norm": 0.041947368532419205, "learning_rate": 9.732254374350483e-05, "loss": 0.0082, "step": 5843 }, { "epoch": 1.1311919504643964, "grad_norm": 0.11522489041090012, "learning_rate": 9.732161134551635e-05, "loss": 0.0088, "step": 5844 }, { "epoch": 1.1313854489164086, "grad_norm": 0.035413708537817, "learning_rate": 9.732067879018654e-05, "loss": 0.0109, "step": 5845 }, { "epoch": 1.131578947368421, "grad_norm": 0.08087850362062454, "learning_rate": 9.731974607751889e-05, "loss": 0.0081, "step": 5846 }, { "epoch": 1.1317724458204335, "grad_norm": 0.0640382468700409, "learning_rate": 9.731881320751685e-05, "loss": 0.0096, "step": 5847 }, { "epoch": 1.131965944272446, "grad_norm": 0.06578201055526733, "learning_rate": 9.73178801801839e-05, "loss": 0.0111, "step": 5848 }, { "epoch": 1.1321594427244581, "grad_norm": 0.07155784219503403, "learning_rate": 9.731694699552346e-05, "loss": 0.0068, "step": 5849 }, { "epoch": 1.1323529411764706, "grad_norm": 0.06695621460676193, "learning_rate": 9.731601365353906e-05, "loss": 0.0096, "step": 5850 }, { "epoch": 1.132546439628483, "grad_norm": 0.09354294836521149, "learning_rate": 9.731508015423417e-05, "loss": 0.0096, "step": 5851 }, { "epoch": 1.1327399380804954, "grad_norm": 0.10733281821012497, "learning_rate": 9.731414649761222e-05, "loss": 0.0094, "step": 5852 }, { "epoch": 1.1329334365325077, "grad_norm": 0.07883817702531815, "learning_rate": 9.731321268367671e-05, "loss": 0.0096, "step": 5853 }, { "epoch": 1.13312693498452, "grad_norm": 0.1199294850230217, "learning_rate": 9.731227871243107e-05, "loss": 0.0092, "step": 5854 }, { "epoch": 1.1333204334365325, "grad_norm": 0.06358292698860168, "learning_rate": 9.731134458387884e-05, "loss": 0.0105, "step": 5855 }, { "epoch": 1.1335139318885448, "grad_norm": 0.0697849839925766, "learning_rate": 9.731041029802345e-05, "loss": 0.0086, "step": 5856 }, { "epoch": 1.1337074303405572, "grad_norm": 0.07246460020542145, "learning_rate": 9.730947585486835e-05, "loss": 0.0091, "step": 5857 }, { "epoch": 1.1339009287925697, "grad_norm": 0.0872880145907402, "learning_rate": 9.730854125441708e-05, "loss": 0.0095, "step": 5858 }, { "epoch": 1.134094427244582, "grad_norm": 0.058000288903713226, "learning_rate": 9.730760649667307e-05, "loss": 0.0101, "step": 5859 }, { "epoch": 1.1342879256965945, "grad_norm": 0.13876129686832428, "learning_rate": 9.730667158163978e-05, "loss": 0.0109, "step": 5860 }, { "epoch": 1.1344814241486068, "grad_norm": 0.10723317414522171, "learning_rate": 9.730573650932074e-05, "loss": 0.0102, "step": 5861 }, { "epoch": 1.1346749226006192, "grad_norm": 0.12321826815605164, "learning_rate": 9.730480127971938e-05, "loss": 0.0072, "step": 5862 }, { "epoch": 1.1348684210526316, "grad_norm": 0.1500900238752365, "learning_rate": 9.73038658928392e-05, "loss": 0.0093, "step": 5863 }, { "epoch": 1.1350619195046439, "grad_norm": 0.08062843233346939, "learning_rate": 9.730293034868367e-05, "loss": 0.0079, "step": 5864 }, { "epoch": 1.1352554179566563, "grad_norm": 0.1187426969408989, "learning_rate": 9.730199464725626e-05, "loss": 0.0081, "step": 5865 }, { "epoch": 1.1354489164086687, "grad_norm": 0.06727541983127594, "learning_rate": 9.730105878856047e-05, "loss": 0.0093, "step": 5866 }, { "epoch": 1.1356424148606812, "grad_norm": 0.10365983843803406, "learning_rate": 9.730012277259976e-05, "loss": 0.0107, "step": 5867 }, { "epoch": 1.1358359133126934, "grad_norm": 0.13896815478801727, "learning_rate": 9.729918659937762e-05, "loss": 0.0085, "step": 5868 }, { "epoch": 1.1360294117647058, "grad_norm": 0.09698114544153214, "learning_rate": 9.729825026889753e-05, "loss": 0.0075, "step": 5869 }, { "epoch": 1.1362229102167183, "grad_norm": 0.14251403510570526, "learning_rate": 9.729731378116296e-05, "loss": 0.01, "step": 5870 }, { "epoch": 1.1364164086687307, "grad_norm": 0.05086024850606918, "learning_rate": 9.72963771361774e-05, "loss": 0.0094, "step": 5871 }, { "epoch": 1.136609907120743, "grad_norm": 0.1526165008544922, "learning_rate": 9.729544033394434e-05, "loss": 0.0105, "step": 5872 }, { "epoch": 1.1368034055727554, "grad_norm": 0.06014842540025711, "learning_rate": 9.729450337446724e-05, "loss": 0.0102, "step": 5873 }, { "epoch": 1.1369969040247678, "grad_norm": 0.12148767709732056, "learning_rate": 9.72935662577496e-05, "loss": 0.01, "step": 5874 }, { "epoch": 1.1371904024767803, "grad_norm": 0.0712442696094513, "learning_rate": 9.729262898379491e-05, "loss": 0.0092, "step": 5875 }, { "epoch": 1.1373839009287925, "grad_norm": 0.2006046622991562, "learning_rate": 9.729169155260667e-05, "loss": 0.0079, "step": 5876 }, { "epoch": 1.137577399380805, "grad_norm": 0.07227708399295807, "learning_rate": 9.72907539641883e-05, "loss": 0.011, "step": 5877 }, { "epoch": 1.1377708978328174, "grad_norm": 0.17058438062667847, "learning_rate": 9.728981621854334e-05, "loss": 0.0095, "step": 5878 }, { "epoch": 1.1379643962848298, "grad_norm": 0.08747270703315735, "learning_rate": 9.728887831567527e-05, "loss": 0.0079, "step": 5879 }, { "epoch": 1.138157894736842, "grad_norm": 0.11848542839288712, "learning_rate": 9.728794025558757e-05, "loss": 0.0107, "step": 5880 }, { "epoch": 1.1383513931888545, "grad_norm": 0.12977388501167297, "learning_rate": 9.728700203828373e-05, "loss": 0.0093, "step": 5881 }, { "epoch": 1.138544891640867, "grad_norm": 0.09055959433317184, "learning_rate": 9.728606366376724e-05, "loss": 0.0066, "step": 5882 }, { "epoch": 1.1387383900928794, "grad_norm": 0.14091280102729797, "learning_rate": 9.728512513204157e-05, "loss": 0.0098, "step": 5883 }, { "epoch": 1.1389318885448916, "grad_norm": 0.07074522227048874, "learning_rate": 9.728418644311023e-05, "loss": 0.0091, "step": 5884 }, { "epoch": 1.139125386996904, "grad_norm": 0.10990947484970093, "learning_rate": 9.728324759697672e-05, "loss": 0.0092, "step": 5885 }, { "epoch": 1.1393188854489165, "grad_norm": 0.07798823714256287, "learning_rate": 9.728230859364449e-05, "loss": 0.0097, "step": 5886 }, { "epoch": 1.1395123839009287, "grad_norm": 0.07387146353721619, "learning_rate": 9.728136943311708e-05, "loss": 0.009, "step": 5887 }, { "epoch": 1.1397058823529411, "grad_norm": 0.08132949471473694, "learning_rate": 9.728043011539792e-05, "loss": 0.0092, "step": 5888 }, { "epoch": 1.1398993808049536, "grad_norm": 0.04555466026067734, "learning_rate": 9.727949064049057e-05, "loss": 0.0105, "step": 5889 }, { "epoch": 1.140092879256966, "grad_norm": 0.06683842837810516, "learning_rate": 9.72785510083985e-05, "loss": 0.008, "step": 5890 }, { "epoch": 1.1402863777089784, "grad_norm": 0.03133809566497803, "learning_rate": 9.727761121912517e-05, "loss": 0.0088, "step": 5891 }, { "epoch": 1.1404798761609907, "grad_norm": 0.04868142679333687, "learning_rate": 9.72766712726741e-05, "loss": 0.0084, "step": 5892 }, { "epoch": 1.140673374613003, "grad_norm": 0.06271795928478241, "learning_rate": 9.727573116904879e-05, "loss": 0.0078, "step": 5893 }, { "epoch": 1.1408668730650156, "grad_norm": 0.06177385151386261, "learning_rate": 9.727479090825274e-05, "loss": 0.0102, "step": 5894 }, { "epoch": 1.1410603715170278, "grad_norm": 0.09104207158088684, "learning_rate": 9.727385049028942e-05, "loss": 0.0111, "step": 5895 }, { "epoch": 1.1412538699690402, "grad_norm": 0.077513687312603, "learning_rate": 9.727290991516234e-05, "loss": 0.0075, "step": 5896 }, { "epoch": 1.1414473684210527, "grad_norm": 0.03745261952280998, "learning_rate": 9.727196918287501e-05, "loss": 0.0089, "step": 5897 }, { "epoch": 1.141640866873065, "grad_norm": 0.06003402918577194, "learning_rate": 9.72710282934309e-05, "loss": 0.009, "step": 5898 }, { "epoch": 1.1418343653250773, "grad_norm": 0.0489942692220211, "learning_rate": 9.727008724683352e-05, "loss": 0.0092, "step": 5899 }, { "epoch": 1.1420278637770898, "grad_norm": 0.05656660348176956, "learning_rate": 9.726914604308638e-05, "loss": 0.0081, "step": 5900 }, { "epoch": 1.1422213622291022, "grad_norm": 0.06509975343942642, "learning_rate": 9.726820468219298e-05, "loss": 0.0082, "step": 5901 }, { "epoch": 1.1424148606811146, "grad_norm": 0.0770362839102745, "learning_rate": 9.72672631641568e-05, "loss": 0.0091, "step": 5902 }, { "epoch": 1.1426083591331269, "grad_norm": 0.06149713322520256, "learning_rate": 9.726632148898135e-05, "loss": 0.0089, "step": 5903 }, { "epoch": 1.1428018575851393, "grad_norm": 0.09161001443862915, "learning_rate": 9.726537965667013e-05, "loss": 0.0087, "step": 5904 }, { "epoch": 1.1429953560371517, "grad_norm": 0.03031919337809086, "learning_rate": 9.726443766722665e-05, "loss": 0.0096, "step": 5905 }, { "epoch": 1.1431888544891642, "grad_norm": 0.08335720747709274, "learning_rate": 9.72634955206544e-05, "loss": 0.0109, "step": 5906 }, { "epoch": 1.1433823529411764, "grad_norm": 0.06979329884052277, "learning_rate": 9.726255321695688e-05, "loss": 0.01, "step": 5907 }, { "epoch": 1.1435758513931888, "grad_norm": 0.04328765347599983, "learning_rate": 9.726161075613762e-05, "loss": 0.0092, "step": 5908 }, { "epoch": 1.1437693498452013, "grad_norm": 0.0798812210559845, "learning_rate": 9.726066813820008e-05, "loss": 0.0095, "step": 5909 }, { "epoch": 1.1439628482972135, "grad_norm": 0.041063643991947174, "learning_rate": 9.72597253631478e-05, "loss": 0.0089, "step": 5910 }, { "epoch": 1.144156346749226, "grad_norm": 0.04272780567407608, "learning_rate": 9.725878243098427e-05, "loss": 0.0095, "step": 5911 }, { "epoch": 1.1443498452012384, "grad_norm": 0.03884677588939667, "learning_rate": 9.7257839341713e-05, "loss": 0.0085, "step": 5912 }, { "epoch": 1.1445433436532508, "grad_norm": 0.042569711804389954, "learning_rate": 9.725689609533752e-05, "loss": 0.0085, "step": 5913 }, { "epoch": 1.1447368421052633, "grad_norm": 0.051658861339092255, "learning_rate": 9.725595269186129e-05, "loss": 0.0092, "step": 5914 }, { "epoch": 1.1449303405572755, "grad_norm": 0.050725072622299194, "learning_rate": 9.725500913128785e-05, "loss": 0.0092, "step": 5915 }, { "epoch": 1.145123839009288, "grad_norm": 0.04227932542562485, "learning_rate": 9.725406541362069e-05, "loss": 0.0072, "step": 5916 }, { "epoch": 1.1453173374613004, "grad_norm": 0.03822958841919899, "learning_rate": 9.725312153886332e-05, "loss": 0.0087, "step": 5917 }, { "epoch": 1.1455108359133126, "grad_norm": 0.07789351046085358, "learning_rate": 9.725217750701927e-05, "loss": 0.0093, "step": 5918 }, { "epoch": 1.145704334365325, "grad_norm": 0.051070019602775574, "learning_rate": 9.725123331809203e-05, "loss": 0.0094, "step": 5919 }, { "epoch": 1.1458978328173375, "grad_norm": 0.08582459390163422, "learning_rate": 9.725028897208512e-05, "loss": 0.0077, "step": 5920 }, { "epoch": 1.14609133126935, "grad_norm": 0.06349076330661774, "learning_rate": 9.724934446900203e-05, "loss": 0.01, "step": 5921 }, { "epoch": 1.1462848297213621, "grad_norm": 0.0393344908952713, "learning_rate": 9.724839980884631e-05, "loss": 0.0078, "step": 5922 }, { "epoch": 1.1464783281733746, "grad_norm": 0.053118687123060226, "learning_rate": 9.724745499162143e-05, "loss": 0.0086, "step": 5923 }, { "epoch": 1.146671826625387, "grad_norm": 0.057795725762844086, "learning_rate": 9.724651001733094e-05, "loss": 0.0092, "step": 5924 }, { "epoch": 1.1468653250773995, "grad_norm": 0.08251500129699707, "learning_rate": 9.724556488597833e-05, "loss": 0.0105, "step": 5925 }, { "epoch": 1.1470588235294117, "grad_norm": 0.05859890207648277, "learning_rate": 9.724461959756711e-05, "loss": 0.008, "step": 5926 }, { "epoch": 1.1472523219814241, "grad_norm": 0.11026404052972794, "learning_rate": 9.724367415210082e-05, "loss": 0.0078, "step": 5927 }, { "epoch": 1.1474458204334366, "grad_norm": 0.05429038777947426, "learning_rate": 9.724272854958294e-05, "loss": 0.0101, "step": 5928 }, { "epoch": 1.147639318885449, "grad_norm": 0.12633726000785828, "learning_rate": 9.724178279001702e-05, "loss": 0.011, "step": 5929 }, { "epoch": 1.1478328173374612, "grad_norm": 0.08827397972345352, "learning_rate": 9.724083687340654e-05, "loss": 0.0084, "step": 5930 }, { "epoch": 1.1480263157894737, "grad_norm": 0.1795780509710312, "learning_rate": 9.723989079975507e-05, "loss": 0.0098, "step": 5931 }, { "epoch": 1.1482198142414861, "grad_norm": 0.09651629626750946, "learning_rate": 9.723894456906606e-05, "loss": 0.0079, "step": 5932 }, { "epoch": 1.1484133126934983, "grad_norm": 0.11227519810199738, "learning_rate": 9.723799818134309e-05, "loss": 0.0097, "step": 5933 }, { "epoch": 1.1486068111455108, "grad_norm": 0.1532936841249466, "learning_rate": 9.723705163658963e-05, "loss": 0.0084, "step": 5934 }, { "epoch": 1.1488003095975232, "grad_norm": 0.09222164005041122, "learning_rate": 9.723610493480923e-05, "loss": 0.0092, "step": 5935 }, { "epoch": 1.1489938080495357, "grad_norm": 0.10808341950178146, "learning_rate": 9.723515807600538e-05, "loss": 0.0084, "step": 5936 }, { "epoch": 1.149187306501548, "grad_norm": 0.1407594382762909, "learning_rate": 9.723421106018165e-05, "loss": 0.0087, "step": 5937 }, { "epoch": 1.1493808049535603, "grad_norm": 0.10775265842676163, "learning_rate": 9.72332638873415e-05, "loss": 0.0098, "step": 5938 }, { "epoch": 1.1495743034055728, "grad_norm": 0.11720441281795502, "learning_rate": 9.72323165574885e-05, "loss": 0.0086, "step": 5939 }, { "epoch": 1.1497678018575852, "grad_norm": 0.10871328413486481, "learning_rate": 9.723136907062616e-05, "loss": 0.0078, "step": 5940 }, { "epoch": 1.1499613003095974, "grad_norm": 0.10334811359643936, "learning_rate": 9.723042142675798e-05, "loss": 0.0087, "step": 5941 }, { "epoch": 1.1501547987616099, "grad_norm": 0.16305679082870483, "learning_rate": 9.72294736258875e-05, "loss": 0.0096, "step": 5942 }, { "epoch": 1.1503482972136223, "grad_norm": 0.06694354861974716, "learning_rate": 9.722852566801824e-05, "loss": 0.0094, "step": 5943 }, { "epoch": 1.1505417956656347, "grad_norm": 0.15718726813793182, "learning_rate": 9.722757755315375e-05, "loss": 0.0112, "step": 5944 }, { "epoch": 1.150735294117647, "grad_norm": 0.03447674214839935, "learning_rate": 9.722662928129752e-05, "loss": 0.011, "step": 5945 }, { "epoch": 1.1509287925696594, "grad_norm": 0.14504745602607727, "learning_rate": 9.722568085245308e-05, "loss": 0.0116, "step": 5946 }, { "epoch": 1.1511222910216719, "grad_norm": 0.047476768493652344, "learning_rate": 9.722473226662397e-05, "loss": 0.0092, "step": 5947 }, { "epoch": 1.1513157894736843, "grad_norm": 0.061747126281261444, "learning_rate": 9.722378352381369e-05, "loss": 0.0064, "step": 5948 }, { "epoch": 1.1515092879256965, "grad_norm": 0.18139217793941498, "learning_rate": 9.72228346240258e-05, "loss": 0.0108, "step": 5949 }, { "epoch": 1.151702786377709, "grad_norm": 0.10665155947208405, "learning_rate": 9.722188556726383e-05, "loss": 0.0088, "step": 5950 }, { "epoch": 1.1518962848297214, "grad_norm": 0.24072536826133728, "learning_rate": 9.722093635353127e-05, "loss": 0.0118, "step": 5951 }, { "epoch": 1.1520897832817338, "grad_norm": 0.11728621274232864, "learning_rate": 9.721998698283168e-05, "loss": 0.0104, "step": 5952 }, { "epoch": 1.152283281733746, "grad_norm": 0.2497781217098236, "learning_rate": 9.721903745516858e-05, "loss": 0.0095, "step": 5953 }, { "epoch": 1.1524767801857585, "grad_norm": 0.1011369377374649, "learning_rate": 9.721808777054551e-05, "loss": 0.0094, "step": 5954 }, { "epoch": 1.152670278637771, "grad_norm": 0.21708731353282928, "learning_rate": 9.721713792896598e-05, "loss": 0.008, "step": 5955 }, { "epoch": 1.1528637770897832, "grad_norm": 0.11519885808229446, "learning_rate": 9.721618793043356e-05, "loss": 0.0096, "step": 5956 }, { "epoch": 1.1530572755417956, "grad_norm": 0.11899323761463165, "learning_rate": 9.721523777495173e-05, "loss": 0.0104, "step": 5957 }, { "epoch": 1.153250773993808, "grad_norm": 0.1569862961769104, "learning_rate": 9.721428746252406e-05, "loss": 0.0091, "step": 5958 }, { "epoch": 1.1534442724458205, "grad_norm": 0.0749182477593422, "learning_rate": 9.721333699315406e-05, "loss": 0.0107, "step": 5959 }, { "epoch": 1.153637770897833, "grad_norm": 0.13209950923919678, "learning_rate": 9.721238636684529e-05, "loss": 0.0084, "step": 5960 }, { "epoch": 1.1538312693498451, "grad_norm": 0.06828414648771286, "learning_rate": 9.721143558360127e-05, "loss": 0.0096, "step": 5961 }, { "epoch": 1.1540247678018576, "grad_norm": 0.13864469528198242, "learning_rate": 9.721048464342552e-05, "loss": 0.0095, "step": 5962 }, { "epoch": 1.15421826625387, "grad_norm": 0.04284881055355072, "learning_rate": 9.720953354632158e-05, "loss": 0.0074, "step": 5963 }, { "epoch": 1.1544117647058822, "grad_norm": 0.06003974378108978, "learning_rate": 9.720858229229303e-05, "loss": 0.0087, "step": 5964 }, { "epoch": 1.1546052631578947, "grad_norm": 0.08932319283485413, "learning_rate": 9.720763088134333e-05, "loss": 0.009, "step": 5965 }, { "epoch": 1.1547987616099071, "grad_norm": 0.048463013023138046, "learning_rate": 9.720667931347608e-05, "loss": 0.009, "step": 5966 }, { "epoch": 1.1549922600619196, "grad_norm": 0.10094793885946274, "learning_rate": 9.72057275886948e-05, "loss": 0.009, "step": 5967 }, { "epoch": 1.1551857585139318, "grad_norm": 0.0762559026479721, "learning_rate": 9.720477570700303e-05, "loss": 0.0099, "step": 5968 }, { "epoch": 1.1553792569659442, "grad_norm": 0.12073486298322678, "learning_rate": 9.720382366840429e-05, "loss": 0.0096, "step": 5969 }, { "epoch": 1.1555727554179567, "grad_norm": 0.08368825912475586, "learning_rate": 9.720287147290212e-05, "loss": 0.0083, "step": 5970 }, { "epoch": 1.1557662538699691, "grad_norm": 0.14761364459991455, "learning_rate": 9.72019191205001e-05, "loss": 0.0082, "step": 5971 }, { "epoch": 1.1559597523219813, "grad_norm": 0.09090369194746017, "learning_rate": 9.720096661120173e-05, "loss": 0.0087, "step": 5972 }, { "epoch": 1.1561532507739938, "grad_norm": 0.15096306800842285, "learning_rate": 9.720001394501056e-05, "loss": 0.0098, "step": 5973 }, { "epoch": 1.1563467492260062, "grad_norm": 0.09453229606151581, "learning_rate": 9.719906112193015e-05, "loss": 0.0102, "step": 5974 }, { "epoch": 1.1565402476780187, "grad_norm": 0.16291075944900513, "learning_rate": 9.719810814196401e-05, "loss": 0.0108, "step": 5975 }, { "epoch": 1.1567337461300309, "grad_norm": 0.10744508355855942, "learning_rate": 9.719715500511573e-05, "loss": 0.0104, "step": 5976 }, { "epoch": 1.1569272445820433, "grad_norm": 0.07711899280548096, "learning_rate": 9.71962017113888e-05, "loss": 0.0093, "step": 5977 }, { "epoch": 1.1571207430340558, "grad_norm": 0.14004555344581604, "learning_rate": 9.71952482607868e-05, "loss": 0.0104, "step": 5978 }, { "epoch": 1.1573142414860682, "grad_norm": 0.1166367158293724, "learning_rate": 9.719429465331326e-05, "loss": 0.011, "step": 5979 }, { "epoch": 1.1575077399380804, "grad_norm": 0.1257149875164032, "learning_rate": 9.719334088897173e-05, "loss": 0.008, "step": 5980 }, { "epoch": 1.1577012383900929, "grad_norm": 0.09915504604578018, "learning_rate": 9.719238696776577e-05, "loss": 0.0085, "step": 5981 }, { "epoch": 1.1578947368421053, "grad_norm": 0.13220854103565216, "learning_rate": 9.71914328896989e-05, "loss": 0.01, "step": 5982 }, { "epoch": 1.1580882352941178, "grad_norm": 0.06590590626001358, "learning_rate": 9.719047865477468e-05, "loss": 0.009, "step": 5983 }, { "epoch": 1.15828173374613, "grad_norm": 0.11065342277288437, "learning_rate": 9.718952426299664e-05, "loss": 0.0097, "step": 5984 }, { "epoch": 1.1584752321981424, "grad_norm": 0.0817963033914566, "learning_rate": 9.718856971436837e-05, "loss": 0.0104, "step": 5985 }, { "epoch": 1.1586687306501549, "grad_norm": 0.06698405742645264, "learning_rate": 9.718761500889337e-05, "loss": 0.0096, "step": 5986 }, { "epoch": 1.158862229102167, "grad_norm": 0.10293318331241608, "learning_rate": 9.718666014657522e-05, "loss": 0.0093, "step": 5987 }, { "epoch": 1.1590557275541795, "grad_norm": 0.036416489630937576, "learning_rate": 9.718570512741749e-05, "loss": 0.0078, "step": 5988 }, { "epoch": 1.159249226006192, "grad_norm": 0.09408698976039886, "learning_rate": 9.718474995142366e-05, "loss": 0.0095, "step": 5989 }, { "epoch": 1.1594427244582044, "grad_norm": 0.05352580547332764, "learning_rate": 9.718379461859736e-05, "loss": 0.0096, "step": 5990 }, { "epoch": 1.1596362229102168, "grad_norm": 0.06098479777574539, "learning_rate": 9.718283912894208e-05, "loss": 0.0076, "step": 5991 }, { "epoch": 1.159829721362229, "grad_norm": 0.05545720085501671, "learning_rate": 9.718188348246141e-05, "loss": 0.0099, "step": 5992 }, { "epoch": 1.1600232198142415, "grad_norm": 0.03183908760547638, "learning_rate": 9.718092767915889e-05, "loss": 0.0103, "step": 5993 }, { "epoch": 1.160216718266254, "grad_norm": 0.03853023424744606, "learning_rate": 9.717997171903809e-05, "loss": 0.0075, "step": 5994 }, { "epoch": 1.1604102167182662, "grad_norm": 0.06439223140478134, "learning_rate": 9.717901560210252e-05, "loss": 0.0077, "step": 5995 }, { "epoch": 1.1606037151702786, "grad_norm": 0.0578756183385849, "learning_rate": 9.717805932835577e-05, "loss": 0.0078, "step": 5996 }, { "epoch": 1.160797213622291, "grad_norm": 0.07777242362499237, "learning_rate": 9.71771028978014e-05, "loss": 0.0101, "step": 5997 }, { "epoch": 1.1609907120743035, "grad_norm": 0.05090684816241264, "learning_rate": 9.717614631044295e-05, "loss": 0.0107, "step": 5998 }, { "epoch": 1.1611842105263157, "grad_norm": 0.07572724670171738, "learning_rate": 9.717518956628398e-05, "loss": 0.0095, "step": 5999 }, { "epoch": 1.1613777089783281, "grad_norm": 0.05545130372047424, "learning_rate": 9.717423266532803e-05, "loss": 0.009, "step": 6000 }, { "epoch": 1.1615712074303406, "grad_norm": 0.08707962930202484, "learning_rate": 9.717327560757871e-05, "loss": 0.0083, "step": 6001 }, { "epoch": 1.161764705882353, "grad_norm": 0.05735188350081444, "learning_rate": 9.717231839303952e-05, "loss": 0.0087, "step": 6002 }, { "epoch": 1.1619582043343653, "grad_norm": 0.09676194190979004, "learning_rate": 9.717136102171404e-05, "loss": 0.0098, "step": 6003 }, { "epoch": 1.1621517027863777, "grad_norm": 0.058464258909225464, "learning_rate": 9.717040349360583e-05, "loss": 0.0102, "step": 6004 }, { "epoch": 1.1623452012383901, "grad_norm": 0.09016063809394836, "learning_rate": 9.716944580871848e-05, "loss": 0.0085, "step": 6005 }, { "epoch": 1.1625386996904026, "grad_norm": 0.06067895516753197, "learning_rate": 9.716848796705549e-05, "loss": 0.0078, "step": 6006 }, { "epoch": 1.1627321981424148, "grad_norm": 0.12707017362117767, "learning_rate": 9.716752996862046e-05, "loss": 0.0099, "step": 6007 }, { "epoch": 1.1629256965944272, "grad_norm": 0.04073214903473854, "learning_rate": 9.716657181341695e-05, "loss": 0.0102, "step": 6008 }, { "epoch": 1.1631191950464397, "grad_norm": 0.12199099361896515, "learning_rate": 9.71656135014485e-05, "loss": 0.0119, "step": 6009 }, { "epoch": 1.163312693498452, "grad_norm": 0.060774292796850204, "learning_rate": 9.716465503271872e-05, "loss": 0.0081, "step": 6010 }, { "epoch": 1.1635061919504643, "grad_norm": 0.124889075756073, "learning_rate": 9.716369640723112e-05, "loss": 0.0109, "step": 6011 }, { "epoch": 1.1636996904024768, "grad_norm": 0.08161270618438721, "learning_rate": 9.716273762498929e-05, "loss": 0.0087, "step": 6012 }, { "epoch": 1.1638931888544892, "grad_norm": 0.09969240427017212, "learning_rate": 9.716177868599679e-05, "loss": 0.0081, "step": 6013 }, { "epoch": 1.1640866873065017, "grad_norm": 0.07531856000423431, "learning_rate": 9.716081959025718e-05, "loss": 0.0084, "step": 6014 }, { "epoch": 1.1642801857585139, "grad_norm": 0.12975752353668213, "learning_rate": 9.715986033777404e-05, "loss": 0.008, "step": 6015 }, { "epoch": 1.1644736842105263, "grad_norm": 0.06820511072874069, "learning_rate": 9.715890092855091e-05, "loss": 0.0088, "step": 6016 }, { "epoch": 1.1646671826625388, "grad_norm": 0.13296322524547577, "learning_rate": 9.71579413625914e-05, "loss": 0.0079, "step": 6017 }, { "epoch": 1.164860681114551, "grad_norm": 0.07221242785453796, "learning_rate": 9.715698163989905e-05, "loss": 0.0079, "step": 6018 }, { "epoch": 1.1650541795665634, "grad_norm": 0.10860525071620941, "learning_rate": 9.715602176047742e-05, "loss": 0.0092, "step": 6019 }, { "epoch": 1.1652476780185759, "grad_norm": 0.08125266432762146, "learning_rate": 9.715506172433007e-05, "loss": 0.0102, "step": 6020 }, { "epoch": 1.1654411764705883, "grad_norm": 0.13609273731708527, "learning_rate": 9.715410153146063e-05, "loss": 0.0105, "step": 6021 }, { "epoch": 1.1656346749226005, "grad_norm": 0.05702722445130348, "learning_rate": 9.715314118187259e-05, "loss": 0.0087, "step": 6022 }, { "epoch": 1.165828173374613, "grad_norm": 0.1236574798822403, "learning_rate": 9.715218067556958e-05, "loss": 0.0088, "step": 6023 }, { "epoch": 1.1660216718266254, "grad_norm": 0.04467363283038139, "learning_rate": 9.715122001255515e-05, "loss": 0.0085, "step": 6024 }, { "epoch": 1.1662151702786379, "grad_norm": 0.10605945438146591, "learning_rate": 9.715025919283286e-05, "loss": 0.0097, "step": 6025 }, { "epoch": 1.16640866873065, "grad_norm": 0.052153848111629486, "learning_rate": 9.714929821640628e-05, "loss": 0.007, "step": 6026 }, { "epoch": 1.1666021671826625, "grad_norm": 0.15392117202281952, "learning_rate": 9.714833708327901e-05, "loss": 0.0094, "step": 6027 }, { "epoch": 1.166795665634675, "grad_norm": 0.10854153335094452, "learning_rate": 9.71473757934546e-05, "loss": 0.0076, "step": 6028 }, { "epoch": 1.1669891640866874, "grad_norm": 0.08608695864677429, "learning_rate": 9.714641434693665e-05, "loss": 0.0089, "step": 6029 }, { "epoch": 1.1671826625386996, "grad_norm": 0.10355199873447418, "learning_rate": 9.714545274372872e-05, "loss": 0.01, "step": 6030 }, { "epoch": 1.167376160990712, "grad_norm": 0.16623224318027496, "learning_rate": 9.714449098383435e-05, "loss": 0.01, "step": 6031 }, { "epoch": 1.1675696594427245, "grad_norm": 0.12939389050006866, "learning_rate": 9.714352906725717e-05, "loss": 0.0094, "step": 6032 }, { "epoch": 1.1677631578947367, "grad_norm": 0.17933419346809387, "learning_rate": 9.714256699400073e-05, "loss": 0.0088, "step": 6033 }, { "epoch": 1.1679566563467492, "grad_norm": 0.19526925683021545, "learning_rate": 9.71416047640686e-05, "loss": 0.0101, "step": 6034 }, { "epoch": 1.1681501547987616, "grad_norm": 0.15513603389263153, "learning_rate": 9.714064237746438e-05, "loss": 0.0092, "step": 6035 }, { "epoch": 1.168343653250774, "grad_norm": 0.24193759262561798, "learning_rate": 9.713967983419163e-05, "loss": 0.0097, "step": 6036 }, { "epoch": 1.1685371517027865, "grad_norm": 0.09707694500684738, "learning_rate": 9.713871713425395e-05, "loss": 0.0098, "step": 6037 }, { "epoch": 1.1687306501547987, "grad_norm": 0.2548660933971405, "learning_rate": 9.713775427765488e-05, "loss": 0.0073, "step": 6038 }, { "epoch": 1.1689241486068112, "grad_norm": 0.1378498673439026, "learning_rate": 9.713679126439802e-05, "loss": 0.0082, "step": 6039 }, { "epoch": 1.1691176470588236, "grad_norm": 0.18714489042758942, "learning_rate": 9.713582809448697e-05, "loss": 0.0089, "step": 6040 }, { "epoch": 1.1693111455108358, "grad_norm": 0.22979719936847687, "learning_rate": 9.713486476792529e-05, "loss": 0.0105, "step": 6041 }, { "epoch": 1.1695046439628483, "grad_norm": 0.06692920625209808, "learning_rate": 9.713390128471657e-05, "loss": 0.009, "step": 6042 }, { "epoch": 1.1696981424148607, "grad_norm": 0.25699713826179504, "learning_rate": 9.713293764486437e-05, "loss": 0.0106, "step": 6043 }, { "epoch": 1.1698916408668731, "grad_norm": 0.09547333419322968, "learning_rate": 9.71319738483723e-05, "loss": 0.01, "step": 6044 }, { "epoch": 1.1700851393188854, "grad_norm": 0.2078922539949417, "learning_rate": 9.713100989524394e-05, "loss": 0.0099, "step": 6045 }, { "epoch": 1.1702786377708978, "grad_norm": 0.0830937847495079, "learning_rate": 9.713004578548285e-05, "loss": 0.0075, "step": 6046 }, { "epoch": 1.1704721362229102, "grad_norm": 0.07405702024698257, "learning_rate": 9.712908151909264e-05, "loss": 0.0076, "step": 6047 }, { "epoch": 1.1706656346749227, "grad_norm": 0.13574974238872528, "learning_rate": 9.712811709607689e-05, "loss": 0.0098, "step": 6048 }, { "epoch": 1.170859133126935, "grad_norm": 0.08221451938152313, "learning_rate": 9.712715251643917e-05, "loss": 0.0087, "step": 6049 }, { "epoch": 1.1710526315789473, "grad_norm": 0.06998521089553833, "learning_rate": 9.712618778018309e-05, "loss": 0.0104, "step": 6050 }, { "epoch": 1.1712461300309598, "grad_norm": 0.09528906643390656, "learning_rate": 9.71252228873122e-05, "loss": 0.0081, "step": 6051 }, { "epoch": 1.1714396284829722, "grad_norm": 0.043657902628183365, "learning_rate": 9.712425783783013e-05, "loss": 0.0072, "step": 6052 }, { "epoch": 1.1716331269349844, "grad_norm": 0.18534526228904724, "learning_rate": 9.712329263174045e-05, "loss": 0.0114, "step": 6053 }, { "epoch": 1.171826625386997, "grad_norm": 0.10541703552007675, "learning_rate": 9.712232726904675e-05, "loss": 0.0097, "step": 6054 }, { "epoch": 1.1720201238390093, "grad_norm": 0.12077798694372177, "learning_rate": 9.712136174975262e-05, "loss": 0.0102, "step": 6055 }, { "epoch": 1.1722136222910216, "grad_norm": 0.13029064238071442, "learning_rate": 9.712039607386163e-05, "loss": 0.0108, "step": 6056 }, { "epoch": 1.172407120743034, "grad_norm": 0.09127160906791687, "learning_rate": 9.71194302413774e-05, "loss": 0.0096, "step": 6057 }, { "epoch": 1.1726006191950464, "grad_norm": 0.10191839933395386, "learning_rate": 9.71184642523035e-05, "loss": 0.0114, "step": 6058 }, { "epoch": 1.1727941176470589, "grad_norm": 0.1864745169878006, "learning_rate": 9.711749810664354e-05, "loss": 0.0085, "step": 6059 }, { "epoch": 1.1729876160990713, "grad_norm": 0.11390198022127151, "learning_rate": 9.711653180440106e-05, "loss": 0.0079, "step": 6060 }, { "epoch": 1.1731811145510835, "grad_norm": 0.21872027218341827, "learning_rate": 9.711556534557974e-05, "loss": 0.0092, "step": 6061 }, { "epoch": 1.173374613003096, "grad_norm": 0.1365128755569458, "learning_rate": 9.71145987301831e-05, "loss": 0.0099, "step": 6062 }, { "epoch": 1.1735681114551084, "grad_norm": 0.1440650373697281, "learning_rate": 9.711363195821478e-05, "loss": 0.0099, "step": 6063 }, { "epoch": 1.1737616099071206, "grad_norm": 0.1535705327987671, "learning_rate": 9.711266502967834e-05, "loss": 0.0094, "step": 6064 }, { "epoch": 1.173955108359133, "grad_norm": 0.11657055467367172, "learning_rate": 9.711169794457738e-05, "loss": 0.0076, "step": 6065 }, { "epoch": 1.1741486068111455, "grad_norm": 0.12143681943416595, "learning_rate": 9.71107307029155e-05, "loss": 0.0083, "step": 6066 }, { "epoch": 1.174342105263158, "grad_norm": 0.14919911324977875, "learning_rate": 9.710976330469633e-05, "loss": 0.0083, "step": 6067 }, { "epoch": 1.1745356037151702, "grad_norm": 0.11141885071992874, "learning_rate": 9.710879574992341e-05, "loss": 0.0088, "step": 6068 }, { "epoch": 1.1747291021671826, "grad_norm": 0.10886853188276291, "learning_rate": 9.710782803860036e-05, "loss": 0.0074, "step": 6069 }, { "epoch": 1.174922600619195, "grad_norm": 0.12089994549751282, "learning_rate": 9.71068601707308e-05, "loss": 0.0077, "step": 6070 }, { "epoch": 1.1751160990712075, "grad_norm": 0.10348009318113327, "learning_rate": 9.71058921463183e-05, "loss": 0.0089, "step": 6071 }, { "epoch": 1.1753095975232197, "grad_norm": 0.1001444160938263, "learning_rate": 9.710492396536648e-05, "loss": 0.0084, "step": 6072 }, { "epoch": 1.1755030959752322, "grad_norm": 0.10567139089107513, "learning_rate": 9.71039556278789e-05, "loss": 0.008, "step": 6073 }, { "epoch": 1.1756965944272446, "grad_norm": 0.1013132631778717, "learning_rate": 9.71029871338592e-05, "loss": 0.0088, "step": 6074 }, { "epoch": 1.175890092879257, "grad_norm": 0.08494134992361069, "learning_rate": 9.710201848331098e-05, "loss": 0.0105, "step": 6075 }, { "epoch": 1.1760835913312693, "grad_norm": 0.15467071533203125, "learning_rate": 9.710104967623782e-05, "loss": 0.0102, "step": 6076 }, { "epoch": 1.1762770897832817, "grad_norm": 0.0626111552119255, "learning_rate": 9.710008071264333e-05, "loss": 0.0091, "step": 6077 }, { "epoch": 1.1764705882352942, "grad_norm": 0.16237622499465942, "learning_rate": 9.709911159253113e-05, "loss": 0.0093, "step": 6078 }, { "epoch": 1.1766640866873066, "grad_norm": 0.04690232127904892, "learning_rate": 9.709814231590479e-05, "loss": 0.0094, "step": 6079 }, { "epoch": 1.1768575851393188, "grad_norm": 0.16631531715393066, "learning_rate": 9.709717288276792e-05, "loss": 0.009, "step": 6080 }, { "epoch": 1.1770510835913313, "grad_norm": 0.08164742588996887, "learning_rate": 9.709620329312415e-05, "loss": 0.0081, "step": 6081 }, { "epoch": 1.1772445820433437, "grad_norm": 0.12022615224123001, "learning_rate": 9.709523354697708e-05, "loss": 0.0086, "step": 6082 }, { "epoch": 1.1774380804953561, "grad_norm": 0.1275559961795807, "learning_rate": 9.709426364433027e-05, "loss": 0.008, "step": 6083 }, { "epoch": 1.1776315789473684, "grad_norm": 0.06530513614416122, "learning_rate": 9.70932935851874e-05, "loss": 0.0088, "step": 6084 }, { "epoch": 1.1778250773993808, "grad_norm": 0.19815315306186676, "learning_rate": 9.709232336955201e-05, "loss": 0.0081, "step": 6085 }, { "epoch": 1.1780185758513932, "grad_norm": 0.03588460013270378, "learning_rate": 9.709135299742774e-05, "loss": 0.0064, "step": 6086 }, { "epoch": 1.1782120743034055, "grad_norm": 0.17972582578659058, "learning_rate": 9.709038246881818e-05, "loss": 0.01, "step": 6087 }, { "epoch": 1.178405572755418, "grad_norm": 0.07068070024251938, "learning_rate": 9.708941178372698e-05, "loss": 0.0101, "step": 6088 }, { "epoch": 1.1785990712074303, "grad_norm": 0.20348675549030304, "learning_rate": 9.70884409421577e-05, "loss": 0.0092, "step": 6089 }, { "epoch": 1.1787925696594428, "grad_norm": 0.06463171541690826, "learning_rate": 9.708746994411397e-05, "loss": 0.0101, "step": 6090 }, { "epoch": 1.1789860681114552, "grad_norm": 0.16820670664310455, "learning_rate": 9.708649878959938e-05, "loss": 0.0088, "step": 6091 }, { "epoch": 1.1791795665634675, "grad_norm": 0.09934242814779282, "learning_rate": 9.708552747861757e-05, "loss": 0.01, "step": 6092 }, { "epoch": 1.17937306501548, "grad_norm": 0.12761831283569336, "learning_rate": 9.708455601117214e-05, "loss": 0.0086, "step": 6093 }, { "epoch": 1.1795665634674923, "grad_norm": 0.14413011074066162, "learning_rate": 9.708358438726672e-05, "loss": 0.0085, "step": 6094 }, { "epoch": 1.1797600619195046, "grad_norm": 0.04738515615463257, "learning_rate": 9.708261260690488e-05, "loss": 0.0089, "step": 6095 }, { "epoch": 1.179953560371517, "grad_norm": 0.20438708364963531, "learning_rate": 9.708164067009026e-05, "loss": 0.0078, "step": 6096 }, { "epoch": 1.1801470588235294, "grad_norm": 0.09099844098091125, "learning_rate": 9.708066857682646e-05, "loss": 0.0101, "step": 6097 }, { "epoch": 1.1803405572755419, "grad_norm": 0.08220896869897842, "learning_rate": 9.707969632711712e-05, "loss": 0.0107, "step": 6098 }, { "epoch": 1.180534055727554, "grad_norm": 0.4020499587059021, "learning_rate": 9.707872392096582e-05, "loss": 0.0117, "step": 6099 }, { "epoch": 1.1807275541795665, "grad_norm": 0.05729808285832405, "learning_rate": 9.70777513583762e-05, "loss": 0.008, "step": 6100 }, { "epoch": 1.180921052631579, "grad_norm": 0.3685844838619232, "learning_rate": 9.707677863935188e-05, "loss": 0.0103, "step": 6101 }, { "epoch": 1.1811145510835914, "grad_norm": 0.14222803711891174, "learning_rate": 9.707580576389646e-05, "loss": 0.0109, "step": 6102 }, { "epoch": 1.1813080495356036, "grad_norm": 0.26982107758522034, "learning_rate": 9.707483273201355e-05, "loss": 0.0097, "step": 6103 }, { "epoch": 1.181501547987616, "grad_norm": 0.2834746837615967, "learning_rate": 9.70738595437068e-05, "loss": 0.0088, "step": 6104 }, { "epoch": 1.1816950464396285, "grad_norm": 0.12715134024620056, "learning_rate": 9.707288619897978e-05, "loss": 0.0129, "step": 6105 }, { "epoch": 1.181888544891641, "grad_norm": 0.30619317293167114, "learning_rate": 9.707191269783615e-05, "loss": 0.01, "step": 6106 }, { "epoch": 1.1820820433436532, "grad_norm": 0.1438446342945099, "learning_rate": 9.707093904027953e-05, "loss": 0.0086, "step": 6107 }, { "epoch": 1.1822755417956656, "grad_norm": 0.2448987364768982, "learning_rate": 9.706996522631351e-05, "loss": 0.0095, "step": 6108 }, { "epoch": 1.182469040247678, "grad_norm": 0.21722084283828735, "learning_rate": 9.706899125594174e-05, "loss": 0.009, "step": 6109 }, { "epoch": 1.1826625386996903, "grad_norm": 0.13156670331954956, "learning_rate": 9.706801712916781e-05, "loss": 0.0093, "step": 6110 }, { "epoch": 1.1828560371517027, "grad_norm": 0.23865781724452972, "learning_rate": 9.706704284599536e-05, "loss": 0.0106, "step": 6111 }, { "epoch": 1.1830495356037152, "grad_norm": 0.09371130168437958, "learning_rate": 9.706606840642802e-05, "loss": 0.0102, "step": 6112 }, { "epoch": 1.1832430340557276, "grad_norm": 0.20261605083942413, "learning_rate": 9.706509381046938e-05, "loss": 0.0106, "step": 6113 }, { "epoch": 1.18343653250774, "grad_norm": 0.1083858385682106, "learning_rate": 9.70641190581231e-05, "loss": 0.007, "step": 6114 }, { "epoch": 1.1836300309597523, "grad_norm": 0.10462462157011032, "learning_rate": 9.70631441493928e-05, "loss": 0.0097, "step": 6115 }, { "epoch": 1.1838235294117647, "grad_norm": 0.1405559480190277, "learning_rate": 9.706216908428207e-05, "loss": 0.0107, "step": 6116 }, { "epoch": 1.1840170278637772, "grad_norm": 0.06357325613498688, "learning_rate": 9.70611938627946e-05, "loss": 0.0096, "step": 6117 }, { "epoch": 1.1842105263157894, "grad_norm": 0.1172221302986145, "learning_rate": 9.706021848493394e-05, "loss": 0.0096, "step": 6118 }, { "epoch": 1.1844040247678018, "grad_norm": 0.09771746397018433, "learning_rate": 9.705924295070375e-05, "loss": 0.011, "step": 6119 }, { "epoch": 1.1845975232198143, "grad_norm": 0.11092245578765869, "learning_rate": 9.705826726010769e-05, "loss": 0.0088, "step": 6120 }, { "epoch": 1.1847910216718267, "grad_norm": 0.09850882738828659, "learning_rate": 9.705729141314934e-05, "loss": 0.0095, "step": 6121 }, { "epoch": 1.184984520123839, "grad_norm": 0.0944008156657219, "learning_rate": 9.705631540983233e-05, "loss": 0.0105, "step": 6122 }, { "epoch": 1.1851780185758514, "grad_norm": 0.11134236305952072, "learning_rate": 9.705533925016031e-05, "loss": 0.0093, "step": 6123 }, { "epoch": 1.1853715170278638, "grad_norm": 0.08077298104763031, "learning_rate": 9.705436293413691e-05, "loss": 0.0093, "step": 6124 }, { "epoch": 1.1855650154798762, "grad_norm": 0.1173623576760292, "learning_rate": 9.705338646176575e-05, "loss": 0.0104, "step": 6125 }, { "epoch": 1.1857585139318885, "grad_norm": 0.06844485551118851, "learning_rate": 9.705240983305045e-05, "loss": 0.009, "step": 6126 }, { "epoch": 1.185952012383901, "grad_norm": 0.07690564543008804, "learning_rate": 9.705143304799467e-05, "loss": 0.0112, "step": 6127 }, { "epoch": 1.1861455108359134, "grad_norm": 0.1130523532629013, "learning_rate": 9.7050456106602e-05, "loss": 0.0084, "step": 6128 }, { "epoch": 1.1863390092879258, "grad_norm": 0.07907510548830032, "learning_rate": 9.704947900887611e-05, "loss": 0.0065, "step": 6129 }, { "epoch": 1.186532507739938, "grad_norm": 0.07011304795742035, "learning_rate": 9.704850175482062e-05, "loss": 0.0103, "step": 6130 }, { "epoch": 1.1867260061919505, "grad_norm": 0.09401850402355194, "learning_rate": 9.704752434443916e-05, "loss": 0.0102, "step": 6131 }, { "epoch": 1.186919504643963, "grad_norm": 0.08768632262945175, "learning_rate": 9.704654677773537e-05, "loss": 0.0079, "step": 6132 }, { "epoch": 1.1871130030959751, "grad_norm": 0.04964831843972206, "learning_rate": 9.704556905471287e-05, "loss": 0.0099, "step": 6133 }, { "epoch": 1.1873065015479876, "grad_norm": 0.0962938591837883, "learning_rate": 9.704459117537531e-05, "loss": 0.0093, "step": 6134 }, { "epoch": 1.1875, "grad_norm": 0.2155838906764984, "learning_rate": 9.704361313972631e-05, "loss": 0.0082, "step": 6135 }, { "epoch": 1.1876934984520124, "grad_norm": 0.18403223156929016, "learning_rate": 9.704263494776953e-05, "loss": 0.0091, "step": 6136 }, { "epoch": 1.1878869969040249, "grad_norm": 0.17460137605667114, "learning_rate": 9.704165659950859e-05, "loss": 0.0086, "step": 6137 }, { "epoch": 1.188080495356037, "grad_norm": 0.09682535380125046, "learning_rate": 9.704067809494713e-05, "loss": 0.0086, "step": 6138 }, { "epoch": 1.1882739938080495, "grad_norm": 0.11114754527807236, "learning_rate": 9.703969943408878e-05, "loss": 0.01, "step": 6139 }, { "epoch": 1.188467492260062, "grad_norm": 0.050017014145851135, "learning_rate": 9.703872061693719e-05, "loss": 0.0079, "step": 6140 }, { "epoch": 1.1886609907120742, "grad_norm": 0.0940728560090065, "learning_rate": 9.703774164349599e-05, "loss": 0.0099, "step": 6141 }, { "epoch": 1.1888544891640866, "grad_norm": 0.08811698108911514, "learning_rate": 9.703676251376884e-05, "loss": 0.0115, "step": 6142 }, { "epoch": 1.189047987616099, "grad_norm": 0.07616643607616425, "learning_rate": 9.703578322775936e-05, "loss": 0.0076, "step": 6143 }, { "epoch": 1.1892414860681115, "grad_norm": 0.09137233346700668, "learning_rate": 9.703480378547119e-05, "loss": 0.008, "step": 6144 }, { "epoch": 1.1894349845201238, "grad_norm": 0.057841707020998, "learning_rate": 9.703382418690798e-05, "loss": 0.0104, "step": 6145 }, { "epoch": 1.1896284829721362, "grad_norm": 0.10675294697284698, "learning_rate": 9.703284443207337e-05, "loss": 0.0079, "step": 6146 }, { "epoch": 1.1898219814241486, "grad_norm": 0.045860256999731064, "learning_rate": 9.703186452097099e-05, "loss": 0.0089, "step": 6147 }, { "epoch": 1.190015479876161, "grad_norm": 0.125423401594162, "learning_rate": 9.703088445360451e-05, "loss": 0.0089, "step": 6148 }, { "epoch": 1.1902089783281733, "grad_norm": 0.10247988253831863, "learning_rate": 9.702990422997756e-05, "loss": 0.0092, "step": 6149 }, { "epoch": 1.1904024767801857, "grad_norm": 0.13183297216892242, "learning_rate": 9.702892385009376e-05, "loss": 0.0102, "step": 6150 }, { "epoch": 1.1905959752321982, "grad_norm": 0.11522122472524643, "learning_rate": 9.70279433139568e-05, "loss": 0.0091, "step": 6151 }, { "epoch": 1.1907894736842106, "grad_norm": 0.0562506802380085, "learning_rate": 9.702696262157031e-05, "loss": 0.0094, "step": 6152 }, { "epoch": 1.1909829721362228, "grad_norm": 0.18097220361232758, "learning_rate": 9.70259817729379e-05, "loss": 0.0087, "step": 6153 }, { "epoch": 1.1911764705882353, "grad_norm": 0.052427686750888824, "learning_rate": 9.702500076806325e-05, "loss": 0.0098, "step": 6154 }, { "epoch": 1.1913699690402477, "grad_norm": 0.16844448447227478, "learning_rate": 9.702401960695001e-05, "loss": 0.0081, "step": 6155 }, { "epoch": 1.19156346749226, "grad_norm": 0.06482717394828796, "learning_rate": 9.702303828960183e-05, "loss": 0.01, "step": 6156 }, { "epoch": 1.1917569659442724, "grad_norm": 0.13188785314559937, "learning_rate": 9.702205681602234e-05, "loss": 0.0074, "step": 6157 }, { "epoch": 1.1919504643962848, "grad_norm": 0.08141980320215225, "learning_rate": 9.702107518621521e-05, "loss": 0.0083, "step": 6158 }, { "epoch": 1.1921439628482973, "grad_norm": 0.05133500695228577, "learning_rate": 9.702009340018405e-05, "loss": 0.0086, "step": 6159 }, { "epoch": 1.1923374613003097, "grad_norm": 0.12978576123714447, "learning_rate": 9.701911145793257e-05, "loss": 0.0112, "step": 6160 }, { "epoch": 1.192530959752322, "grad_norm": 0.05241542309522629, "learning_rate": 9.701812935946436e-05, "loss": 0.0096, "step": 6161 }, { "epoch": 1.1927244582043344, "grad_norm": 0.1131194606423378, "learning_rate": 9.70171471047831e-05, "loss": 0.0096, "step": 6162 }, { "epoch": 1.1929179566563468, "grad_norm": 0.08460836857557297, "learning_rate": 9.701616469389244e-05, "loss": 0.0081, "step": 6163 }, { "epoch": 1.193111455108359, "grad_norm": 0.10355665534734726, "learning_rate": 9.701518212679604e-05, "loss": 0.0088, "step": 6164 }, { "epoch": 1.1933049535603715, "grad_norm": 0.09385806322097778, "learning_rate": 9.701419940349757e-05, "loss": 0.009, "step": 6165 }, { "epoch": 1.193498452012384, "grad_norm": 0.0967518761754036, "learning_rate": 9.701321652400062e-05, "loss": 0.0079, "step": 6166 }, { "epoch": 1.1936919504643964, "grad_norm": 0.0691692978143692, "learning_rate": 9.70122334883089e-05, "loss": 0.0086, "step": 6167 }, { "epoch": 1.1938854489164086, "grad_norm": 0.11629923433065414, "learning_rate": 9.701125029642604e-05, "loss": 0.0074, "step": 6168 }, { "epoch": 1.194078947368421, "grad_norm": 0.053740642964839935, "learning_rate": 9.701026694835571e-05, "loss": 0.0077, "step": 6169 }, { "epoch": 1.1942724458204335, "grad_norm": 0.13879740238189697, "learning_rate": 9.700928344410156e-05, "loss": 0.0083, "step": 6170 }, { "epoch": 1.194465944272446, "grad_norm": 0.05136359483003616, "learning_rate": 9.700829978366723e-05, "loss": 0.01, "step": 6171 }, { "epoch": 1.1946594427244581, "grad_norm": 0.1296931952238083, "learning_rate": 9.700731596705642e-05, "loss": 0.0109, "step": 6172 }, { "epoch": 1.1948529411764706, "grad_norm": 0.049284927546978, "learning_rate": 9.700633199427274e-05, "loss": 0.0074, "step": 6173 }, { "epoch": 1.195046439628483, "grad_norm": 0.11162306368350983, "learning_rate": 9.700534786531988e-05, "loss": 0.0079, "step": 6174 }, { "epoch": 1.1952399380804954, "grad_norm": 0.05922986567020416, "learning_rate": 9.700436358020149e-05, "loss": 0.0083, "step": 6175 }, { "epoch": 1.1954334365325077, "grad_norm": 0.08575958758592606, "learning_rate": 9.700337913892122e-05, "loss": 0.0093, "step": 6176 }, { "epoch": 1.19562693498452, "grad_norm": 0.08055994659662247, "learning_rate": 9.700239454148273e-05, "loss": 0.0079, "step": 6177 }, { "epoch": 1.1958204334365325, "grad_norm": 0.06041863188147545, "learning_rate": 9.700140978788969e-05, "loss": 0.0091, "step": 6178 }, { "epoch": 1.1960139318885448, "grad_norm": 0.08488120883703232, "learning_rate": 9.700042487814574e-05, "loss": 0.01, "step": 6179 }, { "epoch": 1.1962074303405572, "grad_norm": 0.04850359633564949, "learning_rate": 9.699943981225459e-05, "loss": 0.0096, "step": 6180 }, { "epoch": 1.1964009287925697, "grad_norm": 0.07782775908708572, "learning_rate": 9.699845459021986e-05, "loss": 0.0088, "step": 6181 }, { "epoch": 1.196594427244582, "grad_norm": 0.03537279739975929, "learning_rate": 9.699746921204523e-05, "loss": 0.0087, "step": 6182 }, { "epoch": 1.1967879256965945, "grad_norm": 0.05179639533162117, "learning_rate": 9.699648367773436e-05, "loss": 0.0077, "step": 6183 }, { "epoch": 1.1969814241486068, "grad_norm": 0.049641724675893784, "learning_rate": 9.699549798729088e-05, "loss": 0.0096, "step": 6184 }, { "epoch": 1.1971749226006192, "grad_norm": 0.053267691284418106, "learning_rate": 9.699451214071852e-05, "loss": 0.0098, "step": 6185 }, { "epoch": 1.1973684210526316, "grad_norm": 0.06745152175426483, "learning_rate": 9.69935261380209e-05, "loss": 0.0085, "step": 6186 }, { "epoch": 1.1975619195046439, "grad_norm": 0.02737862430512905, "learning_rate": 9.69925399792017e-05, "loss": 0.008, "step": 6187 }, { "epoch": 1.1977554179566563, "grad_norm": 0.09978467971086502, "learning_rate": 9.699155366426459e-05, "loss": 0.0085, "step": 6188 }, { "epoch": 1.1979489164086687, "grad_norm": 0.06420934945344925, "learning_rate": 9.699056719321322e-05, "loss": 0.0096, "step": 6189 }, { "epoch": 1.1981424148606812, "grad_norm": 0.11999393254518509, "learning_rate": 9.698958056605128e-05, "loss": 0.009, "step": 6190 }, { "epoch": 1.1983359133126934, "grad_norm": 0.04801908880472183, "learning_rate": 9.698859378278241e-05, "loss": 0.0091, "step": 6191 }, { "epoch": 1.1985294117647058, "grad_norm": 0.1578725427389145, "learning_rate": 9.698760684341029e-05, "loss": 0.0097, "step": 6192 }, { "epoch": 1.1987229102167183, "grad_norm": 0.1011117547750473, "learning_rate": 9.69866197479386e-05, "loss": 0.0085, "step": 6193 }, { "epoch": 1.1989164086687307, "grad_norm": 0.122354656457901, "learning_rate": 9.698563249637103e-05, "loss": 0.009, "step": 6194 }, { "epoch": 1.199109907120743, "grad_norm": 0.1926904171705246, "learning_rate": 9.69846450887112e-05, "loss": 0.0075, "step": 6195 }, { "epoch": 1.1993034055727554, "grad_norm": 0.13987265527248383, "learning_rate": 9.69836575249628e-05, "loss": 0.0098, "step": 6196 }, { "epoch": 1.1994969040247678, "grad_norm": 0.12398991733789444, "learning_rate": 9.698266980512952e-05, "loss": 0.01, "step": 6197 }, { "epoch": 1.1996904024767803, "grad_norm": 0.05917302891612053, "learning_rate": 9.698168192921501e-05, "loss": 0.0094, "step": 6198 }, { "epoch": 1.1998839009287925, "grad_norm": 0.14230749011039734, "learning_rate": 9.698069389722295e-05, "loss": 0.0081, "step": 6199 }, { "epoch": 1.200077399380805, "grad_norm": 0.07165712118148804, "learning_rate": 9.697970570915703e-05, "loss": 0.0114, "step": 6200 }, { "epoch": 1.2002708978328174, "grad_norm": 0.11376668512821198, "learning_rate": 9.69787173650209e-05, "loss": 0.009, "step": 6201 }, { "epoch": 1.2004643962848298, "grad_norm": 0.13045892119407654, "learning_rate": 9.697772886481824e-05, "loss": 0.009, "step": 6202 }, { "epoch": 1.200657894736842, "grad_norm": 0.17324253916740417, "learning_rate": 9.697674020855271e-05, "loss": 0.0088, "step": 6203 }, { "epoch": 1.2008513931888545, "grad_norm": 0.1323123425245285, "learning_rate": 9.697575139622803e-05, "loss": 0.0112, "step": 6204 }, { "epoch": 1.201044891640867, "grad_norm": 0.1430734544992447, "learning_rate": 9.697476242784785e-05, "loss": 0.0108, "step": 6205 }, { "epoch": 1.2012383900928794, "grad_norm": 0.152140274643898, "learning_rate": 9.697377330341582e-05, "loss": 0.0106, "step": 6206 }, { "epoch": 1.2014318885448916, "grad_norm": 0.0985705554485321, "learning_rate": 9.697278402293566e-05, "loss": 0.009, "step": 6207 }, { "epoch": 1.201625386996904, "grad_norm": 0.1607646346092224, "learning_rate": 9.697179458641104e-05, "loss": 0.0084, "step": 6208 }, { "epoch": 1.2018188854489165, "grad_norm": 0.05648643895983696, "learning_rate": 9.69708049938456e-05, "loss": 0.0087, "step": 6209 }, { "epoch": 1.2020123839009287, "grad_norm": 0.16676467657089233, "learning_rate": 9.696981524524308e-05, "loss": 0.0103, "step": 6210 }, { "epoch": 1.2022058823529411, "grad_norm": 0.07336392998695374, "learning_rate": 9.696882534060712e-05, "loss": 0.0072, "step": 6211 }, { "epoch": 1.2023993808049536, "grad_norm": 0.13386304676532745, "learning_rate": 9.69678352799414e-05, "loss": 0.0082, "step": 6212 }, { "epoch": 1.202592879256966, "grad_norm": 0.11882136017084122, "learning_rate": 9.696684506324961e-05, "loss": 0.0102, "step": 6213 }, { "epoch": 1.2027863777089784, "grad_norm": 0.13024529814720154, "learning_rate": 9.696585469053545e-05, "loss": 0.0099, "step": 6214 }, { "epoch": 1.2029798761609907, "grad_norm": 0.13169342279434204, "learning_rate": 9.696486416180256e-05, "loss": 0.0112, "step": 6215 }, { "epoch": 1.203173374613003, "grad_norm": 0.16415385901927948, "learning_rate": 9.696387347705466e-05, "loss": 0.0088, "step": 6216 }, { "epoch": 1.2033668730650156, "grad_norm": 0.11994940787553787, "learning_rate": 9.69628826362954e-05, "loss": 0.01, "step": 6217 }, { "epoch": 1.2035603715170278, "grad_norm": 0.12671160697937012, "learning_rate": 9.696189163952851e-05, "loss": 0.0079, "step": 6218 }, { "epoch": 1.2037538699690402, "grad_norm": 0.10734181106090546, "learning_rate": 9.696090048675764e-05, "loss": 0.0075, "step": 6219 }, { "epoch": 1.2039473684210527, "grad_norm": 0.11519113183021545, "learning_rate": 9.695990917798647e-05, "loss": 0.0072, "step": 6220 }, { "epoch": 1.204140866873065, "grad_norm": 0.09087207913398743, "learning_rate": 9.69589177132187e-05, "loss": 0.0093, "step": 6221 }, { "epoch": 1.2043343653250773, "grad_norm": 0.07291299104690552, "learning_rate": 9.695792609245802e-05, "loss": 0.0102, "step": 6222 }, { "epoch": 1.2045278637770898, "grad_norm": 0.10674339532852173, "learning_rate": 9.695693431570809e-05, "loss": 0.0077, "step": 6223 }, { "epoch": 1.2047213622291022, "grad_norm": 0.07034329324960709, "learning_rate": 9.695594238297262e-05, "loss": 0.0084, "step": 6224 }, { "epoch": 1.2049148606811146, "grad_norm": 0.11473590135574341, "learning_rate": 9.695495029425532e-05, "loss": 0.0091, "step": 6225 }, { "epoch": 1.2051083591331269, "grad_norm": 0.13552162051200867, "learning_rate": 9.695395804955983e-05, "loss": 0.0093, "step": 6226 }, { "epoch": 1.2053018575851393, "grad_norm": 0.08518287539482117, "learning_rate": 9.695296564888987e-05, "loss": 0.0082, "step": 6227 }, { "epoch": 1.2054953560371517, "grad_norm": 0.15247824788093567, "learning_rate": 9.695197309224911e-05, "loss": 0.0082, "step": 6228 }, { "epoch": 1.2056888544891642, "grad_norm": 0.11369283497333527, "learning_rate": 9.695098037964128e-05, "loss": 0.0078, "step": 6229 }, { "epoch": 1.2058823529411764, "grad_norm": 0.1455560326576233, "learning_rate": 9.694998751107002e-05, "loss": 0.0089, "step": 6230 }, { "epoch": 1.2060758513931888, "grad_norm": 0.11575435847043991, "learning_rate": 9.694899448653904e-05, "loss": 0.0112, "step": 6231 }, { "epoch": 1.2062693498452013, "grad_norm": 0.06509461998939514, "learning_rate": 9.694800130605204e-05, "loss": 0.0084, "step": 6232 }, { "epoch": 1.2064628482972135, "grad_norm": 0.13140305876731873, "learning_rate": 9.69470079696127e-05, "loss": 0.0091, "step": 6233 }, { "epoch": 1.206656346749226, "grad_norm": 0.07068582624197006, "learning_rate": 9.694601447722475e-05, "loss": 0.0094, "step": 6234 }, { "epoch": 1.2068498452012384, "grad_norm": 0.06307458132505417, "learning_rate": 9.694502082889182e-05, "loss": 0.0087, "step": 6235 }, { "epoch": 1.2070433436532508, "grad_norm": 0.09341058135032654, "learning_rate": 9.694402702461765e-05, "loss": 0.0094, "step": 6236 }, { "epoch": 1.2072368421052633, "grad_norm": 0.059456661343574524, "learning_rate": 9.694303306440592e-05, "loss": 0.0092, "step": 6237 }, { "epoch": 1.2074303405572755, "grad_norm": 0.08784561604261398, "learning_rate": 9.694203894826033e-05, "loss": 0.0094, "step": 6238 }, { "epoch": 1.207623839009288, "grad_norm": 0.08448836952447891, "learning_rate": 9.694104467618458e-05, "loss": 0.0108, "step": 6239 }, { "epoch": 1.2078173374613004, "grad_norm": 0.06700941175222397, "learning_rate": 9.694005024818235e-05, "loss": 0.0091, "step": 6240 }, { "epoch": 1.2080108359133126, "grad_norm": 0.11217161267995834, "learning_rate": 9.693905566425736e-05, "loss": 0.0093, "step": 6241 }, { "epoch": 1.208204334365325, "grad_norm": 0.0647146999835968, "learning_rate": 9.693806092441329e-05, "loss": 0.0102, "step": 6242 }, { "epoch": 1.2083978328173375, "grad_norm": 0.07971567660570145, "learning_rate": 9.693706602865383e-05, "loss": 0.0084, "step": 6243 }, { "epoch": 1.20859133126935, "grad_norm": 0.1141676977276802, "learning_rate": 9.693607097698271e-05, "loss": 0.0093, "step": 6244 }, { "epoch": 1.2087848297213621, "grad_norm": 0.06315848976373672, "learning_rate": 9.69350757694036e-05, "loss": 0.0083, "step": 6245 }, { "epoch": 1.2089783281733746, "grad_norm": 0.11396429687738419, "learning_rate": 9.693408040592022e-05, "loss": 0.0092, "step": 6246 }, { "epoch": 1.209171826625387, "grad_norm": 0.05934542417526245, "learning_rate": 9.693308488653625e-05, "loss": 0.0101, "step": 6247 }, { "epoch": 1.2093653250773995, "grad_norm": 0.11360563337802887, "learning_rate": 9.693208921125542e-05, "loss": 0.008, "step": 6248 }, { "epoch": 1.2095588235294117, "grad_norm": 0.08795539289712906, "learning_rate": 9.69310933800814e-05, "loss": 0.0089, "step": 6249 }, { "epoch": 1.2097523219814241, "grad_norm": 0.06472920626401901, "learning_rate": 9.693009739301792e-05, "loss": 0.0082, "step": 6250 }, { "epoch": 1.2099458204334366, "grad_norm": 0.105692058801651, "learning_rate": 9.692910125006865e-05, "loss": 0.009, "step": 6251 }, { "epoch": 1.210139318885449, "grad_norm": 0.051889173686504364, "learning_rate": 9.692810495123735e-05, "loss": 0.0091, "step": 6252 }, { "epoch": 1.2103328173374612, "grad_norm": 0.06165582686662674, "learning_rate": 9.692710849652767e-05, "loss": 0.0092, "step": 6253 }, { "epoch": 1.2105263157894737, "grad_norm": 0.0818038359284401, "learning_rate": 9.69261118859433e-05, "loss": 0.0092, "step": 6254 }, { "epoch": 1.2107198142414861, "grad_norm": 0.12576018273830414, "learning_rate": 9.692511511948801e-05, "loss": 0.0089, "step": 6255 }, { "epoch": 1.2109133126934983, "grad_norm": 0.1320319026708603, "learning_rate": 9.692411819716546e-05, "loss": 0.0089, "step": 6256 }, { "epoch": 1.2111068111455108, "grad_norm": 0.10288716852664948, "learning_rate": 9.692312111897938e-05, "loss": 0.0091, "step": 6257 }, { "epoch": 1.2113003095975232, "grad_norm": 0.07997451722621918, "learning_rate": 9.692212388493346e-05, "loss": 0.0081, "step": 6258 }, { "epoch": 1.2114938080495357, "grad_norm": 0.08977610617876053, "learning_rate": 9.692112649503141e-05, "loss": 0.009, "step": 6259 }, { "epoch": 1.211687306501548, "grad_norm": 0.06554918736219406, "learning_rate": 9.692012894927693e-05, "loss": 0.0088, "step": 6260 }, { "epoch": 1.2118808049535603, "grad_norm": 0.102264404296875, "learning_rate": 9.691913124767377e-05, "loss": 0.0102, "step": 6261 }, { "epoch": 1.2120743034055728, "grad_norm": 0.08157271146774292, "learning_rate": 9.691813339022558e-05, "loss": 0.0094, "step": 6262 }, { "epoch": 1.2122678018575852, "grad_norm": 0.07487855106592178, "learning_rate": 9.691713537693611e-05, "loss": 0.0081, "step": 6263 }, { "epoch": 1.2124613003095974, "grad_norm": 0.08638525009155273, "learning_rate": 9.691613720780906e-05, "loss": 0.0097, "step": 6264 }, { "epoch": 1.2126547987616099, "grad_norm": 0.07423177361488342, "learning_rate": 9.691513888284815e-05, "loss": 0.0087, "step": 6265 }, { "epoch": 1.2128482972136223, "grad_norm": 0.07073676586151123, "learning_rate": 9.691414040205707e-05, "loss": 0.0105, "step": 6266 }, { "epoch": 1.2130417956656347, "grad_norm": 0.06320856511592865, "learning_rate": 9.691314176543956e-05, "loss": 0.0098, "step": 6267 }, { "epoch": 1.213235294117647, "grad_norm": 0.10658735781908035, "learning_rate": 9.691214297299928e-05, "loss": 0.01, "step": 6268 }, { "epoch": 1.2134287925696594, "grad_norm": 0.08396859467029572, "learning_rate": 9.691114402474e-05, "loss": 0.0085, "step": 6269 }, { "epoch": 1.2136222910216719, "grad_norm": 0.048903871327638626, "learning_rate": 9.691014492066544e-05, "loss": 0.0092, "step": 6270 }, { "epoch": 1.2138157894736843, "grad_norm": 0.05017360672354698, "learning_rate": 9.690914566077926e-05, "loss": 0.0086, "step": 6271 }, { "epoch": 1.2140092879256965, "grad_norm": 0.08174265921115875, "learning_rate": 9.69081462450852e-05, "loss": 0.0096, "step": 6272 }, { "epoch": 1.214202786377709, "grad_norm": 0.06937029212713242, "learning_rate": 9.690714667358698e-05, "loss": 0.0107, "step": 6273 }, { "epoch": 1.2143962848297214, "grad_norm": 0.09438376128673553, "learning_rate": 9.690614694628832e-05, "loss": 0.011, "step": 6274 }, { "epoch": 1.2145897832817338, "grad_norm": 0.06807108223438263, "learning_rate": 9.690514706319293e-05, "loss": 0.0104, "step": 6275 }, { "epoch": 1.214783281733746, "grad_norm": 0.12706948816776276, "learning_rate": 9.690414702430453e-05, "loss": 0.0092, "step": 6276 }, { "epoch": 1.2149767801857585, "grad_norm": 0.062245529145002365, "learning_rate": 9.690314682962686e-05, "loss": 0.0081, "step": 6277 }, { "epoch": 1.215170278637771, "grad_norm": 0.12653176486492157, "learning_rate": 9.690214647916358e-05, "loss": 0.0089, "step": 6278 }, { "epoch": 1.2153637770897832, "grad_norm": 0.07797049731016159, "learning_rate": 9.690114597291846e-05, "loss": 0.0075, "step": 6279 }, { "epoch": 1.2155572755417956, "grad_norm": 0.10798146575689316, "learning_rate": 9.69001453108952e-05, "loss": 0.0083, "step": 6280 }, { "epoch": 1.215750773993808, "grad_norm": 0.13010917603969574, "learning_rate": 9.689914449309752e-05, "loss": 0.0069, "step": 6281 }, { "epoch": 1.2159442724458205, "grad_norm": 0.07715758681297302, "learning_rate": 9.689814351952917e-05, "loss": 0.0094, "step": 6282 }, { "epoch": 1.216137770897833, "grad_norm": 0.15079940855503082, "learning_rate": 9.689714239019382e-05, "loss": 0.008, "step": 6283 }, { "epoch": 1.2163312693498451, "grad_norm": 0.0681028962135315, "learning_rate": 9.689614110509522e-05, "loss": 0.0082, "step": 6284 }, { "epoch": 1.2165247678018576, "grad_norm": 0.13176557421684265, "learning_rate": 9.68951396642371e-05, "loss": 0.0081, "step": 6285 }, { "epoch": 1.21671826625387, "grad_norm": 0.10802050679922104, "learning_rate": 9.689413806762317e-05, "loss": 0.0092, "step": 6286 }, { "epoch": 1.2169117647058822, "grad_norm": 0.04509252682328224, "learning_rate": 9.689313631525716e-05, "loss": 0.01, "step": 6287 }, { "epoch": 1.2171052631578947, "grad_norm": 0.1406438648700714, "learning_rate": 9.689213440714278e-05, "loss": 0.0083, "step": 6288 }, { "epoch": 1.2172987616099071, "grad_norm": 0.09807592630386353, "learning_rate": 9.68911323432838e-05, "loss": 0.0071, "step": 6289 }, { "epoch": 1.2174922600619196, "grad_norm": 0.14204658567905426, "learning_rate": 9.689013012368388e-05, "loss": 0.0105, "step": 6290 }, { "epoch": 1.2176857585139318, "grad_norm": 0.170798197388649, "learning_rate": 9.688912774834679e-05, "loss": 0.0085, "step": 6291 }, { "epoch": 1.2178792569659442, "grad_norm": 0.05206852778792381, "learning_rate": 9.688812521727624e-05, "loss": 0.0082, "step": 6292 }, { "epoch": 1.2180727554179567, "grad_norm": 0.18388661742210388, "learning_rate": 9.688712253047596e-05, "loss": 0.0099, "step": 6293 }, { "epoch": 1.2182662538699691, "grad_norm": 0.15915033221244812, "learning_rate": 9.688611968794969e-05, "loss": 0.0126, "step": 6294 }, { "epoch": 1.2184597523219813, "grad_norm": 0.19189105927944183, "learning_rate": 9.688511668970113e-05, "loss": 0.0095, "step": 6295 }, { "epoch": 1.2186532507739938, "grad_norm": 0.20160602033138275, "learning_rate": 9.688411353573404e-05, "loss": 0.0095, "step": 6296 }, { "epoch": 1.2188467492260062, "grad_norm": 0.06256548315286636, "learning_rate": 9.688311022605214e-05, "loss": 0.0084, "step": 6297 }, { "epoch": 1.2190402476780187, "grad_norm": 0.3230714499950409, "learning_rate": 9.688210676065915e-05, "loss": 0.0098, "step": 6298 }, { "epoch": 1.2192337461300309, "grad_norm": 0.061686791479587555, "learning_rate": 9.68811031395588e-05, "loss": 0.0106, "step": 6299 }, { "epoch": 1.2194272445820433, "grad_norm": 0.25207605957984924, "learning_rate": 9.688009936275485e-05, "loss": 0.0095, "step": 6300 }, { "epoch": 1.2196207430340558, "grad_norm": 0.1997496485710144, "learning_rate": 9.687909543025099e-05, "loss": 0.0092, "step": 6301 }, { "epoch": 1.2198142414860682, "grad_norm": 0.15925513207912445, "learning_rate": 9.687809134205098e-05, "loss": 0.0103, "step": 6302 }, { "epoch": 1.2200077399380804, "grad_norm": 0.2525686025619507, "learning_rate": 9.687708709815855e-05, "loss": 0.0103, "step": 6303 }, { "epoch": 1.2202012383900929, "grad_norm": 0.06375090032815933, "learning_rate": 9.687608269857742e-05, "loss": 0.0086, "step": 6304 }, { "epoch": 1.2203947368421053, "grad_norm": 0.21836279332637787, "learning_rate": 9.687507814331134e-05, "loss": 0.0114, "step": 6305 }, { "epoch": 1.2205882352941178, "grad_norm": 0.18919743597507477, "learning_rate": 9.687407343236404e-05, "loss": 0.0084, "step": 6306 }, { "epoch": 1.22078173374613, "grad_norm": 0.1235053762793541, "learning_rate": 9.687306856573923e-05, "loss": 0.0104, "step": 6307 }, { "epoch": 1.2209752321981424, "grad_norm": 0.19628684222698212, "learning_rate": 9.68720635434407e-05, "loss": 0.0092, "step": 6308 }, { "epoch": 1.2211687306501549, "grad_norm": 0.09164179116487503, "learning_rate": 9.687105836547214e-05, "loss": 0.0088, "step": 6309 }, { "epoch": 1.221362229102167, "grad_norm": 0.21853739023208618, "learning_rate": 9.68700530318373e-05, "loss": 0.0107, "step": 6310 }, { "epoch": 1.2215557275541795, "grad_norm": 0.08853328973054886, "learning_rate": 9.686904754253993e-05, "loss": 0.0082, "step": 6311 }, { "epoch": 1.221749226006192, "grad_norm": 0.23185338079929352, "learning_rate": 9.686804189758375e-05, "loss": 0.0091, "step": 6312 }, { "epoch": 1.2219427244582044, "grad_norm": 0.12174014747142792, "learning_rate": 9.68670360969725e-05, "loss": 0.0098, "step": 6313 }, { "epoch": 1.2221362229102168, "grad_norm": 0.20384186506271362, "learning_rate": 9.686603014070993e-05, "loss": 0.0092, "step": 6314 }, { "epoch": 1.222329721362229, "grad_norm": 0.15438003838062286, "learning_rate": 9.686502402879977e-05, "loss": 0.0095, "step": 6315 }, { "epoch": 1.2225232198142415, "grad_norm": 0.16397114098072052, "learning_rate": 9.68640177612458e-05, "loss": 0.0083, "step": 6316 }, { "epoch": 1.222716718266254, "grad_norm": 0.17392830550670624, "learning_rate": 9.686301133805168e-05, "loss": 0.0111, "step": 6317 }, { "epoch": 1.2229102167182662, "grad_norm": 0.09242212027311325, "learning_rate": 9.686200475922125e-05, "loss": 0.0082, "step": 6318 }, { "epoch": 1.2231037151702786, "grad_norm": 0.20023411512374878, "learning_rate": 9.686099802475818e-05, "loss": 0.0078, "step": 6319 }, { "epoch": 1.223297213622291, "grad_norm": 0.06230899691581726, "learning_rate": 9.68599911346662e-05, "loss": 0.008, "step": 6320 }, { "epoch": 1.2234907120743035, "grad_norm": 0.15703363716602325, "learning_rate": 9.685898408894914e-05, "loss": 0.0093, "step": 6321 }, { "epoch": 1.2236842105263157, "grad_norm": 0.11654780060052872, "learning_rate": 9.685797688761065e-05, "loss": 0.0089, "step": 6322 }, { "epoch": 1.2238777089783281, "grad_norm": 0.07801486551761627, "learning_rate": 9.685696953065454e-05, "loss": 0.0099, "step": 6323 }, { "epoch": 1.2240712074303406, "grad_norm": 0.16956482827663422, "learning_rate": 9.685596201808452e-05, "loss": 0.0097, "step": 6324 }, { "epoch": 1.224264705882353, "grad_norm": 0.059908971190452576, "learning_rate": 9.685495434990436e-05, "loss": 0.0076, "step": 6325 }, { "epoch": 1.2244582043343653, "grad_norm": 0.15420906245708466, "learning_rate": 9.68539465261178e-05, "loss": 0.0093, "step": 6326 }, { "epoch": 1.2246517027863777, "grad_norm": 0.11576934903860092, "learning_rate": 9.685293854672855e-05, "loss": 0.0091, "step": 6327 }, { "epoch": 1.2248452012383901, "grad_norm": 0.12937776744365692, "learning_rate": 9.68519304117404e-05, "loss": 0.0079, "step": 6328 }, { "epoch": 1.2250386996904026, "grad_norm": 0.14070221781730652, "learning_rate": 9.685092212115708e-05, "loss": 0.0103, "step": 6329 }, { "epoch": 1.2252321981424148, "grad_norm": 0.09964632987976074, "learning_rate": 9.684991367498236e-05, "loss": 0.0091, "step": 6330 }, { "epoch": 1.2254256965944272, "grad_norm": 0.16158878803253174, "learning_rate": 9.684890507321996e-05, "loss": 0.0086, "step": 6331 }, { "epoch": 1.2256191950464397, "grad_norm": 0.04485679045319557, "learning_rate": 9.684789631587366e-05, "loss": 0.0097, "step": 6332 }, { "epoch": 1.225812693498452, "grad_norm": 0.18488019704818726, "learning_rate": 9.684688740294717e-05, "loss": 0.0093, "step": 6333 }, { "epoch": 1.2260061919504643, "grad_norm": 0.07753138989210129, "learning_rate": 9.684587833444428e-05, "loss": 0.0085, "step": 6334 }, { "epoch": 1.2261996904024768, "grad_norm": 0.09876422584056854, "learning_rate": 9.684486911036873e-05, "loss": 0.0085, "step": 6335 }, { "epoch": 1.2263931888544892, "grad_norm": 0.09270031005144119, "learning_rate": 9.684385973072424e-05, "loss": 0.0071, "step": 6336 }, { "epoch": 1.2265866873065017, "grad_norm": 0.15238143503665924, "learning_rate": 9.684285019551461e-05, "loss": 0.0092, "step": 6337 }, { "epoch": 1.2267801857585139, "grad_norm": 0.14420126378536224, "learning_rate": 9.684184050474357e-05, "loss": 0.009, "step": 6338 }, { "epoch": 1.2269736842105263, "grad_norm": 0.12473342567682266, "learning_rate": 9.684083065841488e-05, "loss": 0.0077, "step": 6339 }, { "epoch": 1.2271671826625388, "grad_norm": 0.05432478338479996, "learning_rate": 9.683982065653229e-05, "loss": 0.0075, "step": 6340 }, { "epoch": 1.227360681114551, "grad_norm": 0.08579801768064499, "learning_rate": 9.683881049909955e-05, "loss": 0.0075, "step": 6341 }, { "epoch": 1.2275541795665634, "grad_norm": 0.11773873120546341, "learning_rate": 9.683780018612043e-05, "loss": 0.0086, "step": 6342 }, { "epoch": 1.2277476780185759, "grad_norm": 0.06665661931037903, "learning_rate": 9.683678971759868e-05, "loss": 0.0071, "step": 6343 }, { "epoch": 1.2279411764705883, "grad_norm": 0.11490818858146667, "learning_rate": 9.683577909353803e-05, "loss": 0.0114, "step": 6344 }, { "epoch": 1.2281346749226005, "grad_norm": 0.07750324159860611, "learning_rate": 9.68347683139423e-05, "loss": 0.0081, "step": 6345 }, { "epoch": 1.228328173374613, "grad_norm": 0.08262708783149719, "learning_rate": 9.683375737881518e-05, "loss": 0.0089, "step": 6346 }, { "epoch": 1.2285216718266254, "grad_norm": 0.08134696632623672, "learning_rate": 9.683274628816048e-05, "loss": 0.0082, "step": 6347 }, { "epoch": 1.2287151702786379, "grad_norm": 0.05572964996099472, "learning_rate": 9.683173504198193e-05, "loss": 0.0094, "step": 6348 }, { "epoch": 1.22890866873065, "grad_norm": 0.1062084436416626, "learning_rate": 9.683072364028328e-05, "loss": 0.01, "step": 6349 }, { "epoch": 1.2291021671826625, "grad_norm": 0.04540269076824188, "learning_rate": 9.682971208306832e-05, "loss": 0.0093, "step": 6350 }, { "epoch": 1.229295665634675, "grad_norm": 0.07461121678352356, "learning_rate": 9.682870037034081e-05, "loss": 0.0086, "step": 6351 }, { "epoch": 1.2294891640866874, "grad_norm": 0.06959367543458939, "learning_rate": 9.682768850210448e-05, "loss": 0.0084, "step": 6352 }, { "epoch": 1.2296826625386996, "grad_norm": 0.04983474686741829, "learning_rate": 9.682667647836312e-05, "loss": 0.0082, "step": 6353 }, { "epoch": 1.229876160990712, "grad_norm": 0.07979093492031097, "learning_rate": 9.682566429912048e-05, "loss": 0.0099, "step": 6354 }, { "epoch": 1.2300696594427245, "grad_norm": 0.05723245441913605, "learning_rate": 9.682465196438032e-05, "loss": 0.0092, "step": 6355 }, { "epoch": 1.2302631578947367, "grad_norm": 0.10138799250125885, "learning_rate": 9.682363947414642e-05, "loss": 0.0115, "step": 6356 }, { "epoch": 1.2304566563467492, "grad_norm": 0.062136173248291016, "learning_rate": 9.682262682842253e-05, "loss": 0.0083, "step": 6357 }, { "epoch": 1.2306501547987616, "grad_norm": 0.0821792408823967, "learning_rate": 9.682161402721242e-05, "loss": 0.0109, "step": 6358 }, { "epoch": 1.230843653250774, "grad_norm": 0.05139685049653053, "learning_rate": 9.682060107051985e-05, "loss": 0.009, "step": 6359 }, { "epoch": 1.2310371517027865, "grad_norm": 0.06775949895381927, "learning_rate": 9.68195879583486e-05, "loss": 0.0074, "step": 6360 }, { "epoch": 1.2312306501547987, "grad_norm": 0.08428744226694107, "learning_rate": 9.681857469070241e-05, "loss": 0.0102, "step": 6361 }, { "epoch": 1.2314241486068112, "grad_norm": 0.10403183102607727, "learning_rate": 9.68175612675851e-05, "loss": 0.0087, "step": 6362 }, { "epoch": 1.2316176470588236, "grad_norm": 0.08822121471166611, "learning_rate": 9.681654768900036e-05, "loss": 0.0078, "step": 6363 }, { "epoch": 1.2318111455108358, "grad_norm": 0.10465125739574432, "learning_rate": 9.681553395495202e-05, "loss": 0.0088, "step": 6364 }, { "epoch": 1.2320046439628483, "grad_norm": 0.14033488929271698, "learning_rate": 9.681452006544382e-05, "loss": 0.0088, "step": 6365 }, { "epoch": 1.2321981424148607, "grad_norm": 0.1264631301164627, "learning_rate": 9.681350602047954e-05, "loss": 0.0079, "step": 6366 }, { "epoch": 1.2323916408668731, "grad_norm": 0.12294416129589081, "learning_rate": 9.681249182006294e-05, "loss": 0.0096, "step": 6367 }, { "epoch": 1.2325851393188854, "grad_norm": 0.05635454133152962, "learning_rate": 9.681147746419782e-05, "loss": 0.0085, "step": 6368 }, { "epoch": 1.2327786377708978, "grad_norm": 0.1305575966835022, "learning_rate": 9.681046295288792e-05, "loss": 0.009, "step": 6369 }, { "epoch": 1.2329721362229102, "grad_norm": 0.0604289285838604, "learning_rate": 9.680944828613701e-05, "loss": 0.0081, "step": 6370 }, { "epoch": 1.2331656346749227, "grad_norm": 0.09634984284639359, "learning_rate": 9.680843346394887e-05, "loss": 0.0086, "step": 6371 }, { "epoch": 1.233359133126935, "grad_norm": 0.07434733957052231, "learning_rate": 9.680741848632731e-05, "loss": 0.0091, "step": 6372 }, { "epoch": 1.2335526315789473, "grad_norm": 0.05167815834283829, "learning_rate": 9.680640335327603e-05, "loss": 0.0087, "step": 6373 }, { "epoch": 1.2337461300309598, "grad_norm": 0.11120422184467316, "learning_rate": 9.680538806479886e-05, "loss": 0.0085, "step": 6374 }, { "epoch": 1.2339396284829722, "grad_norm": 0.11515079438686371, "learning_rate": 9.680437262089957e-05, "loss": 0.0084, "step": 6375 }, { "epoch": 1.2341331269349844, "grad_norm": 0.1507423222064972, "learning_rate": 9.68033570215819e-05, "loss": 0.0098, "step": 6376 }, { "epoch": 1.234326625386997, "grad_norm": 0.09532557427883148, "learning_rate": 9.680234126684966e-05, "loss": 0.0082, "step": 6377 }, { "epoch": 1.2345201238390093, "grad_norm": 0.14855538308620453, "learning_rate": 9.680132535670663e-05, "loss": 0.0087, "step": 6378 }, { "epoch": 1.2347136222910216, "grad_norm": 0.05463458225131035, "learning_rate": 9.680030929115655e-05, "loss": 0.0096, "step": 6379 }, { "epoch": 1.234907120743034, "grad_norm": 0.17615200579166412, "learning_rate": 9.679929307020322e-05, "loss": 0.0101, "step": 6380 }, { "epoch": 1.2351006191950464, "grad_norm": 0.09658122807741165, "learning_rate": 9.679827669385044e-05, "loss": 0.0094, "step": 6381 }, { "epoch": 1.2352941176470589, "grad_norm": 0.11047810316085815, "learning_rate": 9.679726016210195e-05, "loss": 0.0098, "step": 6382 }, { "epoch": 1.2354876160990713, "grad_norm": 0.10489894449710846, "learning_rate": 9.679624347496155e-05, "loss": 0.0093, "step": 6383 }, { "epoch": 1.2356811145510835, "grad_norm": 0.08666025102138519, "learning_rate": 9.6795226632433e-05, "loss": 0.0085, "step": 6384 }, { "epoch": 1.235874613003096, "grad_norm": 0.11456328630447388, "learning_rate": 9.679420963452013e-05, "loss": 0.0091, "step": 6385 }, { "epoch": 1.2360681114551084, "grad_norm": 0.06885503232479095, "learning_rate": 9.679319248122666e-05, "loss": 0.0105, "step": 6386 }, { "epoch": 1.2362616099071206, "grad_norm": 0.11145076900720596, "learning_rate": 9.679217517255641e-05, "loss": 0.0095, "step": 6387 }, { "epoch": 1.236455108359133, "grad_norm": 0.052430395036935806, "learning_rate": 9.679115770851315e-05, "loss": 0.0082, "step": 6388 }, { "epoch": 1.2366486068111455, "grad_norm": 0.08818893879652023, "learning_rate": 9.679014008910065e-05, "loss": 0.0089, "step": 6389 }, { "epoch": 1.236842105263158, "grad_norm": 0.09625227004289627, "learning_rate": 9.678912231432272e-05, "loss": 0.0089, "step": 6390 }, { "epoch": 1.2370356037151702, "grad_norm": 0.09451446682214737, "learning_rate": 9.678810438418312e-05, "loss": 0.0092, "step": 6391 }, { "epoch": 1.2372291021671826, "grad_norm": 0.11597762256860733, "learning_rate": 9.678708629868565e-05, "loss": 0.0091, "step": 6392 }, { "epoch": 1.237422600619195, "grad_norm": 0.05743489786982536, "learning_rate": 9.67860680578341e-05, "loss": 0.0085, "step": 6393 }, { "epoch": 1.2376160990712075, "grad_norm": 0.13410021364688873, "learning_rate": 9.678504966163223e-05, "loss": 0.0088, "step": 6394 }, { "epoch": 1.2378095975232197, "grad_norm": 0.05307362973690033, "learning_rate": 9.678403111008386e-05, "loss": 0.0079, "step": 6395 }, { "epoch": 1.2380030959752322, "grad_norm": 0.12798485159873962, "learning_rate": 9.678301240319274e-05, "loss": 0.0103, "step": 6396 }, { "epoch": 1.2381965944272446, "grad_norm": 0.09501226991415024, "learning_rate": 9.678199354096268e-05, "loss": 0.008, "step": 6397 }, { "epoch": 1.238390092879257, "grad_norm": 0.09021706879138947, "learning_rate": 9.678097452339748e-05, "loss": 0.01, "step": 6398 }, { "epoch": 1.2385835913312693, "grad_norm": 0.1095380038022995, "learning_rate": 9.677995535050089e-05, "loss": 0.0096, "step": 6399 }, { "epoch": 1.2387770897832817, "grad_norm": 0.10115324705839157, "learning_rate": 9.677893602227672e-05, "loss": 0.0093, "step": 6400 }, { "epoch": 1.2389705882352942, "grad_norm": 0.10741368681192398, "learning_rate": 9.677791653872877e-05, "loss": 0.0112, "step": 6401 }, { "epoch": 1.2391640866873066, "grad_norm": 0.05348331108689308, "learning_rate": 9.677689689986083e-05, "loss": 0.0096, "step": 6402 }, { "epoch": 1.2393575851393188, "grad_norm": 0.1133771762251854, "learning_rate": 9.677587710567667e-05, "loss": 0.0078, "step": 6403 }, { "epoch": 1.2395510835913313, "grad_norm": 0.048380639404058456, "learning_rate": 9.677485715618009e-05, "loss": 0.0065, "step": 6404 }, { "epoch": 1.2397445820433437, "grad_norm": 0.11673972010612488, "learning_rate": 9.677383705137489e-05, "loss": 0.0091, "step": 6405 }, { "epoch": 1.2399380804953561, "grad_norm": 0.06414881348609924, "learning_rate": 9.677281679126485e-05, "loss": 0.0106, "step": 6406 }, { "epoch": 1.2401315789473684, "grad_norm": 0.06930394470691681, "learning_rate": 9.677179637585377e-05, "loss": 0.0092, "step": 6407 }, { "epoch": 1.2403250773993808, "grad_norm": 0.10416781157255173, "learning_rate": 9.677077580514545e-05, "loss": 0.0091, "step": 6408 }, { "epoch": 1.2405185758513932, "grad_norm": 0.04553155601024628, "learning_rate": 9.676975507914368e-05, "loss": 0.0088, "step": 6409 }, { "epoch": 1.2407120743034055, "grad_norm": 0.11789961904287338, "learning_rate": 9.676873419785225e-05, "loss": 0.0081, "step": 6410 }, { "epoch": 1.240905572755418, "grad_norm": 0.06728996336460114, "learning_rate": 9.676771316127497e-05, "loss": 0.0094, "step": 6411 }, { "epoch": 1.2410990712074303, "grad_norm": 0.11935016512870789, "learning_rate": 9.67666919694156e-05, "loss": 0.0099, "step": 6412 }, { "epoch": 1.2412925696594428, "grad_norm": 0.06682071834802628, "learning_rate": 9.676567062227796e-05, "loss": 0.0086, "step": 6413 }, { "epoch": 1.2414860681114552, "grad_norm": 0.09555037319660187, "learning_rate": 9.676464911986587e-05, "loss": 0.0077, "step": 6414 }, { "epoch": 1.2416795665634675, "grad_norm": 0.08994625508785248, "learning_rate": 9.67636274621831e-05, "loss": 0.0085, "step": 6415 }, { "epoch": 1.24187306501548, "grad_norm": 0.056221622973680496, "learning_rate": 9.676260564923346e-05, "loss": 0.0102, "step": 6416 }, { "epoch": 1.2420665634674923, "grad_norm": 0.08690456300973892, "learning_rate": 9.676158368102073e-05, "loss": 0.0085, "step": 6417 }, { "epoch": 1.2422600619195046, "grad_norm": 0.040743838995695114, "learning_rate": 9.676056155754873e-05, "loss": 0.0081, "step": 6418 }, { "epoch": 1.242453560371517, "grad_norm": 0.10251230001449585, "learning_rate": 9.675953927882124e-05, "loss": 0.0092, "step": 6419 }, { "epoch": 1.2426470588235294, "grad_norm": 0.059140611439943314, "learning_rate": 9.675851684484208e-05, "loss": 0.0081, "step": 6420 }, { "epoch": 1.2428405572755419, "grad_norm": 0.06758573651313782, "learning_rate": 9.675749425561504e-05, "loss": 0.0087, "step": 6421 }, { "epoch": 1.243034055727554, "grad_norm": 0.08551245182752609, "learning_rate": 9.675647151114394e-05, "loss": 0.0079, "step": 6422 }, { "epoch": 1.2432275541795665, "grad_norm": 0.05008092150092125, "learning_rate": 9.675544861143255e-05, "loss": 0.0082, "step": 6423 }, { "epoch": 1.243421052631579, "grad_norm": 0.10248308628797531, "learning_rate": 9.67544255564847e-05, "loss": 0.0084, "step": 6424 }, { "epoch": 1.2436145510835914, "grad_norm": 0.06512877345085144, "learning_rate": 9.67534023463042e-05, "loss": 0.0083, "step": 6425 }, { "epoch": 1.2438080495356036, "grad_norm": 0.07177915424108505, "learning_rate": 9.675237898089481e-05, "loss": 0.0111, "step": 6426 }, { "epoch": 1.244001547987616, "grad_norm": 0.0845947116613388, "learning_rate": 9.675135546026037e-05, "loss": 0.008, "step": 6427 }, { "epoch": 1.2441950464396285, "grad_norm": 0.0204267930239439, "learning_rate": 9.675033178440469e-05, "loss": 0.0083, "step": 6428 }, { "epoch": 1.244388544891641, "grad_norm": 0.09233714640140533, "learning_rate": 9.674930795333155e-05, "loss": 0.0078, "step": 6429 }, { "epoch": 1.2445820433436532, "grad_norm": 0.04463982209563255, "learning_rate": 9.674828396704479e-05, "loss": 0.0091, "step": 6430 }, { "epoch": 1.2447755417956656, "grad_norm": 0.06446242332458496, "learning_rate": 9.674725982554818e-05, "loss": 0.0076, "step": 6431 }, { "epoch": 1.244969040247678, "grad_norm": 0.055780887603759766, "learning_rate": 9.674623552884554e-05, "loss": 0.0103, "step": 6432 }, { "epoch": 1.2451625386996903, "grad_norm": 0.07221334427595139, "learning_rate": 9.67452110769407e-05, "loss": 0.0097, "step": 6433 }, { "epoch": 1.2453560371517027, "grad_norm": 0.031234584748744965, "learning_rate": 9.674418646983745e-05, "loss": 0.0091, "step": 6434 }, { "epoch": 1.2455495356037152, "grad_norm": 0.0899447500705719, "learning_rate": 9.674316170753959e-05, "loss": 0.0078, "step": 6435 }, { "epoch": 1.2457430340557276, "grad_norm": 0.039834704250097275, "learning_rate": 9.674213679005096e-05, "loss": 0.0084, "step": 6436 }, { "epoch": 1.24593653250774, "grad_norm": 0.07532483339309692, "learning_rate": 9.674111171737531e-05, "loss": 0.0102, "step": 6437 }, { "epoch": 1.2461300309597523, "grad_norm": 0.04935355857014656, "learning_rate": 9.674008648951653e-05, "loss": 0.0079, "step": 6438 }, { "epoch": 1.2463235294117647, "grad_norm": 0.08004441112279892, "learning_rate": 9.673906110647838e-05, "loss": 0.0095, "step": 6439 }, { "epoch": 1.2465170278637772, "grad_norm": 0.05924075469374657, "learning_rate": 9.673803556826469e-05, "loss": 0.0099, "step": 6440 }, { "epoch": 1.2467105263157894, "grad_norm": 0.07335155457258224, "learning_rate": 9.673700987487927e-05, "loss": 0.0096, "step": 6441 }, { "epoch": 1.2469040247678018, "grad_norm": 0.0588998906314373, "learning_rate": 9.673598402632592e-05, "loss": 0.0085, "step": 6442 }, { "epoch": 1.2470975232198143, "grad_norm": 0.08555539697408676, "learning_rate": 9.673495802260847e-05, "loss": 0.0077, "step": 6443 }, { "epoch": 1.2472910216718267, "grad_norm": 0.0635419487953186, "learning_rate": 9.673393186373072e-05, "loss": 0.0081, "step": 6444 }, { "epoch": 1.247484520123839, "grad_norm": 0.1125834509730339, "learning_rate": 9.673290554969652e-05, "loss": 0.0094, "step": 6445 }, { "epoch": 1.2476780185758514, "grad_norm": 0.09449155628681183, "learning_rate": 9.673187908050963e-05, "loss": 0.0097, "step": 6446 }, { "epoch": 1.2478715170278638, "grad_norm": 0.06253594905138016, "learning_rate": 9.673085245617393e-05, "loss": 0.0081, "step": 6447 }, { "epoch": 1.2480650154798762, "grad_norm": 0.08172900229692459, "learning_rate": 9.672982567669318e-05, "loss": 0.0088, "step": 6448 }, { "epoch": 1.2482585139318885, "grad_norm": 0.08045391738414764, "learning_rate": 9.672879874207124e-05, "loss": 0.0099, "step": 6449 }, { "epoch": 1.248452012383901, "grad_norm": 0.05688083916902542, "learning_rate": 9.67277716523119e-05, "loss": 0.0082, "step": 6450 }, { "epoch": 1.2486455108359134, "grad_norm": 0.06216127425432205, "learning_rate": 9.672674440741896e-05, "loss": 0.0095, "step": 6451 }, { "epoch": 1.2488390092879258, "grad_norm": 0.07964268326759338, "learning_rate": 9.672571700739629e-05, "loss": 0.0089, "step": 6452 }, { "epoch": 1.249032507739938, "grad_norm": 0.05031155049800873, "learning_rate": 9.67246894522477e-05, "loss": 0.0101, "step": 6453 }, { "epoch": 1.2492260061919505, "grad_norm": 0.11397122591733932, "learning_rate": 9.672366174197697e-05, "loss": 0.0077, "step": 6454 }, { "epoch": 1.249419504643963, "grad_norm": 0.05412663146853447, "learning_rate": 9.672263387658796e-05, "loss": 0.0086, "step": 6455 }, { "epoch": 1.2496130030959751, "grad_norm": 0.11390262842178345, "learning_rate": 9.672160585608447e-05, "loss": 0.0082, "step": 6456 }, { "epoch": 1.2498065015479876, "grad_norm": 0.04150575026869774, "learning_rate": 9.672057768047034e-05, "loss": 0.0106, "step": 6457 }, { "epoch": 1.25, "grad_norm": 0.07989475876092911, "learning_rate": 9.671954934974939e-05, "loss": 0.0091, "step": 6458 }, { "epoch": 1.2501934984520124, "grad_norm": 0.06088785454630852, "learning_rate": 9.671852086392543e-05, "loss": 0.0067, "step": 6459 }, { "epoch": 1.2503869969040249, "grad_norm": 0.08718061447143555, "learning_rate": 9.671749222300226e-05, "loss": 0.0104, "step": 6460 }, { "epoch": 1.250580495356037, "grad_norm": 0.045224178582429886, "learning_rate": 9.671646342698376e-05, "loss": 0.0079, "step": 6461 }, { "epoch": 1.2507739938080495, "grad_norm": 0.08871608227491379, "learning_rate": 9.671543447587372e-05, "loss": 0.0097, "step": 6462 }, { "epoch": 1.250967492260062, "grad_norm": 0.05277964472770691, "learning_rate": 9.671440536967597e-05, "loss": 0.0077, "step": 6463 }, { "epoch": 1.2511609907120742, "grad_norm": 0.07886134088039398, "learning_rate": 9.671337610839435e-05, "loss": 0.0065, "step": 6464 }, { "epoch": 1.2513544891640866, "grad_norm": 0.08040816336870193, "learning_rate": 9.671234669203267e-05, "loss": 0.0104, "step": 6465 }, { "epoch": 1.251547987616099, "grad_norm": 0.1738271713256836, "learning_rate": 9.671131712059477e-05, "loss": 0.0071, "step": 6466 }, { "epoch": 1.2517414860681115, "grad_norm": 0.09653603285551071, "learning_rate": 9.671028739408447e-05, "loss": 0.0104, "step": 6467 }, { "epoch": 1.251934984520124, "grad_norm": 0.19910657405853271, "learning_rate": 9.67092575125056e-05, "loss": 0.0098, "step": 6468 }, { "epoch": 1.2521284829721362, "grad_norm": 0.14855825901031494, "learning_rate": 9.670822747586198e-05, "loss": 0.0085, "step": 6469 }, { "epoch": 1.2523219814241486, "grad_norm": 0.14289574325084686, "learning_rate": 9.670719728415744e-05, "loss": 0.0085, "step": 6470 }, { "epoch": 1.2525154798761609, "grad_norm": 0.16247908771038055, "learning_rate": 9.670616693739584e-05, "loss": 0.0083, "step": 6471 }, { "epoch": 1.2527089783281733, "grad_norm": 0.11987947672605515, "learning_rate": 9.670513643558098e-05, "loss": 0.0093, "step": 6472 }, { "epoch": 1.2529024767801857, "grad_norm": 0.10038848221302032, "learning_rate": 9.670410577871669e-05, "loss": 0.0085, "step": 6473 }, { "epoch": 1.2530959752321982, "grad_norm": 0.13464108109474182, "learning_rate": 9.670307496680681e-05, "loss": 0.0077, "step": 6474 }, { "epoch": 1.2532894736842106, "grad_norm": 0.043693382292985916, "learning_rate": 9.670204399985519e-05, "loss": 0.0089, "step": 6475 }, { "epoch": 1.2534829721362228, "grad_norm": 0.11274110525846481, "learning_rate": 9.670101287786562e-05, "loss": 0.0094, "step": 6476 }, { "epoch": 1.2536764705882353, "grad_norm": 0.06286618113517761, "learning_rate": 9.6699981600842e-05, "loss": 0.0096, "step": 6477 }, { "epoch": 1.2538699690402477, "grad_norm": 0.0826110914349556, "learning_rate": 9.669895016878808e-05, "loss": 0.0079, "step": 6478 }, { "epoch": 1.25406346749226, "grad_norm": 0.07944978028535843, "learning_rate": 9.669791858170777e-05, "loss": 0.0093, "step": 6479 }, { "epoch": 1.2542569659442724, "grad_norm": 0.07035140693187714, "learning_rate": 9.669688683960486e-05, "loss": 0.0105, "step": 6480 }, { "epoch": 1.2544504643962848, "grad_norm": 0.08798470348119736, "learning_rate": 9.669585494248319e-05, "loss": 0.008, "step": 6481 }, { "epoch": 1.2546439628482973, "grad_norm": 0.09359639883041382, "learning_rate": 9.669482289034661e-05, "loss": 0.0072, "step": 6482 }, { "epoch": 1.2548374613003097, "grad_norm": 0.08822660893201828, "learning_rate": 9.669379068319896e-05, "loss": 0.0123, "step": 6483 }, { "epoch": 1.255030959752322, "grad_norm": 0.13264088332653046, "learning_rate": 9.669275832104408e-05, "loss": 0.0087, "step": 6484 }, { "epoch": 1.2552244582043344, "grad_norm": 0.06700298935174942, "learning_rate": 9.669172580388577e-05, "loss": 0.0068, "step": 6485 }, { "epoch": 1.2554179566563468, "grad_norm": 0.12301353365182877, "learning_rate": 9.669069313172791e-05, "loss": 0.01, "step": 6486 }, { "epoch": 1.255611455108359, "grad_norm": 0.07871787995100021, "learning_rate": 9.668966030457433e-05, "loss": 0.0094, "step": 6487 }, { "epoch": 1.2558049535603715, "grad_norm": 0.08914203196763992, "learning_rate": 9.668862732242886e-05, "loss": 0.0075, "step": 6488 }, { "epoch": 1.255998452012384, "grad_norm": 0.12245940417051315, "learning_rate": 9.668759418529534e-05, "loss": 0.0089, "step": 6489 }, { "epoch": 1.2561919504643964, "grad_norm": 0.08336708694696426, "learning_rate": 9.668656089317764e-05, "loss": 0.0102, "step": 6490 }, { "epoch": 1.2563854489164088, "grad_norm": 0.13359388709068298, "learning_rate": 9.668552744607956e-05, "loss": 0.0088, "step": 6491 }, { "epoch": 1.256578947368421, "grad_norm": 0.05198248475790024, "learning_rate": 9.668449384400497e-05, "loss": 0.0084, "step": 6492 }, { "epoch": 1.2567724458204335, "grad_norm": 0.13695509731769562, "learning_rate": 9.668346008695769e-05, "loss": 0.0098, "step": 6493 }, { "epoch": 1.256965944272446, "grad_norm": 0.13017882406711578, "learning_rate": 9.668242617494159e-05, "loss": 0.01, "step": 6494 }, { "epoch": 1.2571594427244581, "grad_norm": 0.09792125225067139, "learning_rate": 9.668139210796049e-05, "loss": 0.0103, "step": 6495 }, { "epoch": 1.2573529411764706, "grad_norm": 0.18641148507595062, "learning_rate": 9.668035788601824e-05, "loss": 0.0075, "step": 6496 }, { "epoch": 1.257546439628483, "grad_norm": 0.10966426879167557, "learning_rate": 9.66793235091187e-05, "loss": 0.0106, "step": 6497 }, { "epoch": 1.2577399380804954, "grad_norm": 0.2042187601327896, "learning_rate": 9.667828897726571e-05, "loss": 0.0073, "step": 6498 }, { "epoch": 1.2579334365325077, "grad_norm": 0.20873041450977325, "learning_rate": 9.66772542904631e-05, "loss": 0.0094, "step": 6499 }, { "epoch": 1.25812693498452, "grad_norm": 0.1719280481338501, "learning_rate": 9.667621944871474e-05, "loss": 0.0096, "step": 6500 }, { "epoch": 1.2583204334365325, "grad_norm": 0.20308537781238556, "learning_rate": 9.667518445202446e-05, "loss": 0.0091, "step": 6501 }, { "epoch": 1.2585139318885448, "grad_norm": 0.1307060420513153, "learning_rate": 9.66741493003961e-05, "loss": 0.0096, "step": 6502 }, { "epoch": 1.2587074303405572, "grad_norm": 0.17104165256023407, "learning_rate": 9.667311399383354e-05, "loss": 0.0089, "step": 6503 }, { "epoch": 1.2589009287925697, "grad_norm": 0.15318307280540466, "learning_rate": 9.667207853234061e-05, "loss": 0.0092, "step": 6504 }, { "epoch": 1.259094427244582, "grad_norm": 0.12033762037754059, "learning_rate": 9.667104291592115e-05, "loss": 0.0088, "step": 6505 }, { "epoch": 1.2592879256965945, "grad_norm": 0.1422419399023056, "learning_rate": 9.667000714457902e-05, "loss": 0.0073, "step": 6506 }, { "epoch": 1.2594814241486068, "grad_norm": 0.06470642238855362, "learning_rate": 9.666897121831808e-05, "loss": 0.0076, "step": 6507 }, { "epoch": 1.2596749226006192, "grad_norm": 0.13006187975406647, "learning_rate": 9.666793513714218e-05, "loss": 0.0098, "step": 6508 }, { "epoch": 1.2598684210526316, "grad_norm": 0.058873385190963745, "learning_rate": 9.666689890105515e-05, "loss": 0.0094, "step": 6509 }, { "epoch": 1.2600619195046439, "grad_norm": 0.1808689683675766, "learning_rate": 9.666586251006086e-05, "loss": 0.0088, "step": 6510 }, { "epoch": 1.2602554179566563, "grad_norm": 0.09654302150011063, "learning_rate": 9.666482596416315e-05, "loss": 0.0089, "step": 6511 }, { "epoch": 1.2604489164086687, "grad_norm": 0.16023431718349457, "learning_rate": 9.66637892633659e-05, "loss": 0.0081, "step": 6512 }, { "epoch": 1.2606424148606812, "grad_norm": 0.1468660682439804, "learning_rate": 9.666275240767294e-05, "loss": 0.0073, "step": 6513 }, { "epoch": 1.2608359133126936, "grad_norm": 0.09250325709581375, "learning_rate": 9.666171539708813e-05, "loss": 0.0106, "step": 6514 }, { "epoch": 1.2610294117647058, "grad_norm": 0.20982033014297485, "learning_rate": 9.666067823161533e-05, "loss": 0.0097, "step": 6515 }, { "epoch": 1.2612229102167183, "grad_norm": 0.04892846569418907, "learning_rate": 9.665964091125842e-05, "loss": 0.0105, "step": 6516 }, { "epoch": 1.2614164086687307, "grad_norm": 0.22556479275226593, "learning_rate": 9.665860343602119e-05, "loss": 0.0074, "step": 6517 }, { "epoch": 1.261609907120743, "grad_norm": 0.0936412662267685, "learning_rate": 9.665756580590756e-05, "loss": 0.0078, "step": 6518 }, { "epoch": 1.2618034055727554, "grad_norm": 0.1618565022945404, "learning_rate": 9.665652802092134e-05, "loss": 0.0097, "step": 6519 }, { "epoch": 1.2619969040247678, "grad_norm": 0.10683005303144455, "learning_rate": 9.665549008106644e-05, "loss": 0.0121, "step": 6520 }, { "epoch": 1.2621904024767803, "grad_norm": 0.07556813210248947, "learning_rate": 9.665445198634668e-05, "loss": 0.0092, "step": 6521 }, { "epoch": 1.2623839009287925, "grad_norm": 0.11414472758769989, "learning_rate": 9.665341373676593e-05, "loss": 0.0087, "step": 6522 }, { "epoch": 1.262577399380805, "grad_norm": 0.057802386581897736, "learning_rate": 9.665237533232805e-05, "loss": 0.009, "step": 6523 }, { "epoch": 1.2627708978328174, "grad_norm": 0.10829374939203262, "learning_rate": 9.665133677303691e-05, "loss": 0.0097, "step": 6524 }, { "epoch": 1.2629643962848296, "grad_norm": 0.08547531068325043, "learning_rate": 9.665029805889636e-05, "loss": 0.009, "step": 6525 }, { "epoch": 1.263157894736842, "grad_norm": 0.1310381293296814, "learning_rate": 9.664925918991027e-05, "loss": 0.0095, "step": 6526 }, { "epoch": 1.2633513931888545, "grad_norm": 0.07725431770086288, "learning_rate": 9.66482201660825e-05, "loss": 0.0084, "step": 6527 }, { "epoch": 1.263544891640867, "grad_norm": 0.09195958822965622, "learning_rate": 9.664718098741689e-05, "loss": 0.0083, "step": 6528 }, { "epoch": 1.2637383900928794, "grad_norm": 0.08021285384893417, "learning_rate": 9.664614165391734e-05, "loss": 0.0104, "step": 6529 }, { "epoch": 1.2639318885448916, "grad_norm": 0.07731758803129196, "learning_rate": 9.66451021655877e-05, "loss": 0.0078, "step": 6530 }, { "epoch": 1.264125386996904, "grad_norm": 0.08232452720403671, "learning_rate": 9.664406252243182e-05, "loss": 0.0077, "step": 6531 }, { "epoch": 1.2643188854489165, "grad_norm": 0.04728744179010391, "learning_rate": 9.664302272445357e-05, "loss": 0.007, "step": 6532 }, { "epoch": 1.2645123839009287, "grad_norm": 0.10592816770076752, "learning_rate": 9.664198277165685e-05, "loss": 0.0101, "step": 6533 }, { "epoch": 1.2647058823529411, "grad_norm": 0.07749928534030914, "learning_rate": 9.664094266404548e-05, "loss": 0.0096, "step": 6534 }, { "epoch": 1.2648993808049536, "grad_norm": 0.08677744120359421, "learning_rate": 9.663990240162334e-05, "loss": 0.0087, "step": 6535 }, { "epoch": 1.265092879256966, "grad_norm": 0.08537527918815613, "learning_rate": 9.663886198439432e-05, "loss": 0.0096, "step": 6536 }, { "epoch": 1.2652863777089784, "grad_norm": 0.057205576449632645, "learning_rate": 9.663782141236226e-05, "loss": 0.0089, "step": 6537 }, { "epoch": 1.2654798761609907, "grad_norm": 0.07143810391426086, "learning_rate": 9.663678068553105e-05, "loss": 0.0074, "step": 6538 }, { "epoch": 1.265673374613003, "grad_norm": 0.05124812573194504, "learning_rate": 9.663573980390454e-05, "loss": 0.0081, "step": 6539 }, { "epoch": 1.2658668730650156, "grad_norm": 0.07038769125938416, "learning_rate": 9.66346987674866e-05, "loss": 0.0094, "step": 6540 }, { "epoch": 1.2660603715170278, "grad_norm": 0.0726773738861084, "learning_rate": 9.663365757628113e-05, "loss": 0.0101, "step": 6541 }, { "epoch": 1.2662538699690402, "grad_norm": 0.07522070407867432, "learning_rate": 9.663261623029197e-05, "loss": 0.0083, "step": 6542 }, { "epoch": 1.2664473684210527, "grad_norm": 0.08245165646076202, "learning_rate": 9.663157472952298e-05, "loss": 0.0078, "step": 6543 }, { "epoch": 1.266640866873065, "grad_norm": 0.05257498845458031, "learning_rate": 9.66305330739781e-05, "loss": 0.0087, "step": 6544 }, { "epoch": 1.2668343653250773, "grad_norm": 0.09305637329816818, "learning_rate": 9.662949126366111e-05, "loss": 0.0071, "step": 6545 }, { "epoch": 1.2670278637770898, "grad_norm": 0.05593246966600418, "learning_rate": 9.662844929857596e-05, "loss": 0.0081, "step": 6546 }, { "epoch": 1.2672213622291022, "grad_norm": 0.06083057075738907, "learning_rate": 9.662740717872648e-05, "loss": 0.0082, "step": 6547 }, { "epoch": 1.2674148606811144, "grad_norm": 0.08895960450172424, "learning_rate": 9.662636490411656e-05, "loss": 0.0109, "step": 6548 }, { "epoch": 1.2676083591331269, "grad_norm": 0.04347194731235504, "learning_rate": 9.662532247475009e-05, "loss": 0.0073, "step": 6549 }, { "epoch": 1.2678018575851393, "grad_norm": 0.09734126925468445, "learning_rate": 9.662427989063088e-05, "loss": 0.0079, "step": 6550 }, { "epoch": 1.2679953560371517, "grad_norm": 0.0577726773917675, "learning_rate": 9.66232371517629e-05, "loss": 0.0097, "step": 6551 }, { "epoch": 1.2681888544891642, "grad_norm": 0.08584002405405045, "learning_rate": 9.662219425814997e-05, "loss": 0.0088, "step": 6552 }, { "epoch": 1.2683823529411764, "grad_norm": 0.05464193969964981, "learning_rate": 9.662115120979595e-05, "loss": 0.0091, "step": 6553 }, { "epoch": 1.2685758513931888, "grad_norm": 0.0829588994383812, "learning_rate": 9.662010800670478e-05, "loss": 0.008, "step": 6554 }, { "epoch": 1.2687693498452013, "grad_norm": 0.052485398948192596, "learning_rate": 9.661906464888029e-05, "loss": 0.0078, "step": 6555 }, { "epoch": 1.2689628482972135, "grad_norm": 0.06845124810934067, "learning_rate": 9.661802113632637e-05, "loss": 0.0074, "step": 6556 }, { "epoch": 1.269156346749226, "grad_norm": 0.0655713900923729, "learning_rate": 9.66169774690469e-05, "loss": 0.0093, "step": 6557 }, { "epoch": 1.2693498452012384, "grad_norm": 0.050320420414209366, "learning_rate": 9.661593364704578e-05, "loss": 0.0084, "step": 6558 }, { "epoch": 1.2695433436532508, "grad_norm": 0.1050112321972847, "learning_rate": 9.661488967032686e-05, "loss": 0.008, "step": 6559 }, { "epoch": 1.2697368421052633, "grad_norm": 0.03927500173449516, "learning_rate": 9.661384553889405e-05, "loss": 0.0083, "step": 6560 }, { "epoch": 1.2699303405572755, "grad_norm": 0.09025148302316666, "learning_rate": 9.661280125275119e-05, "loss": 0.0077, "step": 6561 }, { "epoch": 1.270123839009288, "grad_norm": 0.03782247379422188, "learning_rate": 9.66117568119022e-05, "loss": 0.0082, "step": 6562 }, { "epoch": 1.2703173374613004, "grad_norm": 0.0703737661242485, "learning_rate": 9.661071221635096e-05, "loss": 0.008, "step": 6563 }, { "epoch": 1.2705108359133126, "grad_norm": 0.05052981525659561, "learning_rate": 9.660966746610133e-05, "loss": 0.0086, "step": 6564 }, { "epoch": 1.270704334365325, "grad_norm": 0.05246717110276222, "learning_rate": 9.660862256115722e-05, "loss": 0.0088, "step": 6565 }, { "epoch": 1.2708978328173375, "grad_norm": 0.05880986899137497, "learning_rate": 9.66075775015225e-05, "loss": 0.011, "step": 6566 }, { "epoch": 1.27109133126935, "grad_norm": 0.06863187998533249, "learning_rate": 9.660653228720105e-05, "loss": 0.0089, "step": 6567 }, { "epoch": 1.2712848297213624, "grad_norm": 0.05168683081865311, "learning_rate": 9.660548691819679e-05, "loss": 0.0097, "step": 6568 }, { "epoch": 1.2714783281733746, "grad_norm": 0.10568349063396454, "learning_rate": 9.660444139451356e-05, "loss": 0.0096, "step": 6569 }, { "epoch": 1.271671826625387, "grad_norm": 0.0534898079931736, "learning_rate": 9.660339571615527e-05, "loss": 0.0085, "step": 6570 }, { "epoch": 1.2718653250773992, "grad_norm": 0.1397256851196289, "learning_rate": 9.660234988312582e-05, "loss": 0.01, "step": 6571 }, { "epoch": 1.2720588235294117, "grad_norm": 0.06063245236873627, "learning_rate": 9.660130389542908e-05, "loss": 0.0071, "step": 6572 }, { "epoch": 1.2722523219814241, "grad_norm": 0.144322469830513, "learning_rate": 9.660025775306893e-05, "loss": 0.0096, "step": 6573 }, { "epoch": 1.2724458204334366, "grad_norm": 0.08651553094387054, "learning_rate": 9.659921145604927e-05, "loss": 0.0076, "step": 6574 }, { "epoch": 1.272639318885449, "grad_norm": 0.12264585494995117, "learning_rate": 9.659816500437401e-05, "loss": 0.0098, "step": 6575 }, { "epoch": 1.2728328173374612, "grad_norm": 0.1263696849346161, "learning_rate": 9.659711839804702e-05, "loss": 0.009, "step": 6576 }, { "epoch": 1.2730263157894737, "grad_norm": 0.07459570467472076, "learning_rate": 9.659607163707218e-05, "loss": 0.0106, "step": 6577 }, { "epoch": 1.2732198142414861, "grad_norm": 0.10805423557758331, "learning_rate": 9.65950247214534e-05, "loss": 0.0087, "step": 6578 }, { "epoch": 1.2734133126934983, "grad_norm": 0.04464788734912872, "learning_rate": 9.65939776511946e-05, "loss": 0.0097, "step": 6579 }, { "epoch": 1.2736068111455108, "grad_norm": 0.09642684459686279, "learning_rate": 9.659293042629959e-05, "loss": 0.0093, "step": 6580 }, { "epoch": 1.2738003095975232, "grad_norm": 0.04568909853696823, "learning_rate": 9.659188304677234e-05, "loss": 0.0095, "step": 6581 }, { "epoch": 1.2739938080495357, "grad_norm": 0.06623689085245132, "learning_rate": 9.65908355126167e-05, "loss": 0.0086, "step": 6582 }, { "epoch": 1.274187306501548, "grad_norm": 0.056436821818351746, "learning_rate": 9.658978782383661e-05, "loss": 0.0086, "step": 6583 }, { "epoch": 1.2743808049535603, "grad_norm": 0.07135368883609772, "learning_rate": 9.658873998043591e-05, "loss": 0.0086, "step": 6584 }, { "epoch": 1.2745743034055728, "grad_norm": 0.06429103016853333, "learning_rate": 9.658769198241854e-05, "loss": 0.0084, "step": 6585 }, { "epoch": 1.2747678018575852, "grad_norm": 0.07986175268888474, "learning_rate": 9.658664382978836e-05, "loss": 0.0089, "step": 6586 }, { "epoch": 1.2749613003095974, "grad_norm": 0.06415398418903351, "learning_rate": 9.658559552254931e-05, "loss": 0.0086, "step": 6587 }, { "epoch": 1.2751547987616099, "grad_norm": 0.05578016862273216, "learning_rate": 9.658454706070524e-05, "loss": 0.0085, "step": 6588 }, { "epoch": 1.2753482972136223, "grad_norm": 0.07108212262392044, "learning_rate": 9.658349844426009e-05, "loss": 0.0089, "step": 6589 }, { "epoch": 1.2755417956656347, "grad_norm": 0.08297918736934662, "learning_rate": 9.658244967321772e-05, "loss": 0.0098, "step": 6590 }, { "epoch": 1.2757352941176472, "grad_norm": 0.0396701842546463, "learning_rate": 9.658140074758207e-05, "loss": 0.0069, "step": 6591 }, { "epoch": 1.2759287925696594, "grad_norm": 0.11385705322027206, "learning_rate": 9.658035166735699e-05, "loss": 0.0083, "step": 6592 }, { "epoch": 1.2761222910216719, "grad_norm": 0.06007669121026993, "learning_rate": 9.657930243254644e-05, "loss": 0.0105, "step": 6593 }, { "epoch": 1.2763157894736843, "grad_norm": 0.0807318463921547, "learning_rate": 9.657825304315427e-05, "loss": 0.0098, "step": 6594 }, { "epoch": 1.2765092879256965, "grad_norm": 0.0971909835934639, "learning_rate": 9.657720349918442e-05, "loss": 0.0089, "step": 6595 }, { "epoch": 1.276702786377709, "grad_norm": 0.06580529361963272, "learning_rate": 9.657615380064074e-05, "loss": 0.0089, "step": 6596 }, { "epoch": 1.2768962848297214, "grad_norm": 0.06405334174633026, "learning_rate": 9.657510394752718e-05, "loss": 0.0086, "step": 6597 }, { "epoch": 1.2770897832817338, "grad_norm": 0.0776616632938385, "learning_rate": 9.657405393984762e-05, "loss": 0.0082, "step": 6598 }, { "epoch": 1.277283281733746, "grad_norm": 0.06047382205724716, "learning_rate": 9.657300377760598e-05, "loss": 0.0094, "step": 6599 }, { "epoch": 1.2774767801857585, "grad_norm": 0.08827383071184158, "learning_rate": 9.657195346080618e-05, "loss": 0.0096, "step": 6600 }, { "epoch": 1.277670278637771, "grad_norm": 0.08238212019205093, "learning_rate": 9.657090298945206e-05, "loss": 0.0081, "step": 6601 }, { "epoch": 1.2778637770897832, "grad_norm": 0.10292333364486694, "learning_rate": 9.656985236354759e-05, "loss": 0.0086, "step": 6602 }, { "epoch": 1.2780572755417956, "grad_norm": 0.05830957368016243, "learning_rate": 9.656880158309665e-05, "loss": 0.0088, "step": 6603 }, { "epoch": 1.278250773993808, "grad_norm": 0.08005308359861374, "learning_rate": 9.656775064810312e-05, "loss": 0.0093, "step": 6604 }, { "epoch": 1.2784442724458205, "grad_norm": 0.07964491844177246, "learning_rate": 9.656669955857097e-05, "loss": 0.0086, "step": 6605 }, { "epoch": 1.278637770897833, "grad_norm": 0.0664120465517044, "learning_rate": 9.656564831450406e-05, "loss": 0.0084, "step": 6606 }, { "epoch": 1.2788312693498451, "grad_norm": 0.06158532574772835, "learning_rate": 9.656459691590633e-05, "loss": 0.0089, "step": 6607 }, { "epoch": 1.2790247678018576, "grad_norm": 0.06236685439944267, "learning_rate": 9.656354536278164e-05, "loss": 0.0106, "step": 6608 }, { "epoch": 1.27921826625387, "grad_norm": 0.06523588299751282, "learning_rate": 9.656249365513394e-05, "loss": 0.0106, "step": 6609 }, { "epoch": 1.2794117647058822, "grad_norm": 0.046122197061777115, "learning_rate": 9.656144179296713e-05, "loss": 0.0079, "step": 6610 }, { "epoch": 1.2796052631578947, "grad_norm": 0.058783724904060364, "learning_rate": 9.656038977628514e-05, "loss": 0.0085, "step": 6611 }, { "epoch": 1.2797987616099071, "grad_norm": 0.05879206582903862, "learning_rate": 9.655933760509184e-05, "loss": 0.008, "step": 6612 }, { "epoch": 1.2799922600619196, "grad_norm": 0.05827118083834648, "learning_rate": 9.655828527939117e-05, "loss": 0.0097, "step": 6613 }, { "epoch": 1.280185758513932, "grad_norm": 0.12573111057281494, "learning_rate": 9.655723279918702e-05, "loss": 0.0084, "step": 6614 }, { "epoch": 1.2803792569659442, "grad_norm": 0.07629901170730591, "learning_rate": 9.655618016448334e-05, "loss": 0.0072, "step": 6615 }, { "epoch": 1.2805727554179567, "grad_norm": 0.11515524238348007, "learning_rate": 9.655512737528401e-05, "loss": 0.0089, "step": 6616 }, { "epoch": 1.2807662538699691, "grad_norm": 0.10431879013776779, "learning_rate": 9.655407443159295e-05, "loss": 0.0087, "step": 6617 }, { "epoch": 1.2809597523219813, "grad_norm": 0.10199050605297089, "learning_rate": 9.65530213334141e-05, "loss": 0.0075, "step": 6618 }, { "epoch": 1.2811532507739938, "grad_norm": 0.10151458531618118, "learning_rate": 9.655196808075135e-05, "loss": 0.0095, "step": 6619 }, { "epoch": 1.2813467492260062, "grad_norm": 0.09272727370262146, "learning_rate": 9.655091467360861e-05, "loss": 0.0097, "step": 6620 }, { "epoch": 1.2815402476780187, "grad_norm": 0.1278316080570221, "learning_rate": 9.654986111198983e-05, "loss": 0.0088, "step": 6621 }, { "epoch": 1.2817337461300309, "grad_norm": 0.08557485789060593, "learning_rate": 9.654880739589889e-05, "loss": 0.0106, "step": 6622 }, { "epoch": 1.2819272445820433, "grad_norm": 0.09775610268115997, "learning_rate": 9.654775352533972e-05, "loss": 0.0089, "step": 6623 }, { "epoch": 1.2821207430340558, "grad_norm": 0.0721951499581337, "learning_rate": 9.654669950031624e-05, "loss": 0.0078, "step": 6624 }, { "epoch": 1.282314241486068, "grad_norm": 0.05554037541151047, "learning_rate": 9.654564532083237e-05, "loss": 0.0094, "step": 6625 }, { "epoch": 1.2825077399380804, "grad_norm": 0.13343368470668793, "learning_rate": 9.654459098689202e-05, "loss": 0.0085, "step": 6626 }, { "epoch": 1.2827012383900929, "grad_norm": 0.062090128660202026, "learning_rate": 9.654353649849914e-05, "loss": 0.0087, "step": 6627 }, { "epoch": 1.2828947368421053, "grad_norm": 0.13034309446811676, "learning_rate": 9.654248185565761e-05, "loss": 0.0092, "step": 6628 }, { "epoch": 1.2830882352941178, "grad_norm": 0.0549730584025383, "learning_rate": 9.654142705837137e-05, "loss": 0.0091, "step": 6629 }, { "epoch": 1.28328173374613, "grad_norm": 0.12487710267305374, "learning_rate": 9.654037210664435e-05, "loss": 0.009, "step": 6630 }, { "epoch": 1.2834752321981424, "grad_norm": 0.055636122822761536, "learning_rate": 9.653931700048045e-05, "loss": 0.008, "step": 6631 }, { "epoch": 1.2836687306501549, "grad_norm": 0.1257362812757492, "learning_rate": 9.653826173988362e-05, "loss": 0.0087, "step": 6632 }, { "epoch": 1.283862229102167, "grad_norm": 0.05642035976052284, "learning_rate": 9.653720632485774e-05, "loss": 0.009, "step": 6633 }, { "epoch": 1.2840557275541795, "grad_norm": 0.06896838545799255, "learning_rate": 9.65361507554068e-05, "loss": 0.0084, "step": 6634 }, { "epoch": 1.284249226006192, "grad_norm": 0.05433933809399605, "learning_rate": 9.653509503153465e-05, "loss": 0.008, "step": 6635 }, { "epoch": 1.2844427244582044, "grad_norm": 0.07036109268665314, "learning_rate": 9.653403915324527e-05, "loss": 0.008, "step": 6636 }, { "epoch": 1.2846362229102168, "grad_norm": 0.08642087131738663, "learning_rate": 9.653298312054257e-05, "loss": 0.008, "step": 6637 }, { "epoch": 1.284829721362229, "grad_norm": 0.1276918202638626, "learning_rate": 9.653192693343046e-05, "loss": 0.0084, "step": 6638 }, { "epoch": 1.2850232198142415, "grad_norm": 0.08325275033712387, "learning_rate": 9.653087059191289e-05, "loss": 0.0081, "step": 6639 }, { "epoch": 1.285216718266254, "grad_norm": 0.15691715478897095, "learning_rate": 9.652981409599378e-05, "loss": 0.0086, "step": 6640 }, { "epoch": 1.2854102167182662, "grad_norm": 0.09076520055532455, "learning_rate": 9.652875744567704e-05, "loss": 0.0086, "step": 6641 }, { "epoch": 1.2856037151702786, "grad_norm": 0.11477532982826233, "learning_rate": 9.652770064096662e-05, "loss": 0.0089, "step": 6642 }, { "epoch": 1.285797213622291, "grad_norm": 0.11288431286811829, "learning_rate": 9.652664368186644e-05, "loss": 0.0088, "step": 6643 }, { "epoch": 1.2859907120743035, "grad_norm": 0.0764911100268364, "learning_rate": 9.652558656838043e-05, "loss": 0.0067, "step": 6644 }, { "epoch": 1.2861842105263157, "grad_norm": 0.12818758189678192, "learning_rate": 9.652452930051253e-05, "loss": 0.0091, "step": 6645 }, { "epoch": 1.2863777089783281, "grad_norm": 0.061611950397491455, "learning_rate": 9.652347187826664e-05, "loss": 0.0085, "step": 6646 }, { "epoch": 1.2865712074303406, "grad_norm": 0.10648495703935623, "learning_rate": 9.652241430164675e-05, "loss": 0.0092, "step": 6647 }, { "epoch": 1.2867647058823528, "grad_norm": 0.10989251732826233, "learning_rate": 9.652135657065673e-05, "loss": 0.0085, "step": 6648 }, { "epoch": 1.2869582043343653, "grad_norm": 0.1114363744854927, "learning_rate": 9.652029868530054e-05, "loss": 0.0082, "step": 6649 }, { "epoch": 1.2871517027863777, "grad_norm": 0.09615334123373032, "learning_rate": 9.65192406455821e-05, "loss": 0.0096, "step": 6650 }, { "epoch": 1.2873452012383901, "grad_norm": 0.12563642859458923, "learning_rate": 9.651818245150537e-05, "loss": 0.0093, "step": 6651 }, { "epoch": 1.2875386996904026, "grad_norm": 0.07395914942026138, "learning_rate": 9.651712410307426e-05, "loss": 0.0078, "step": 6652 }, { "epoch": 1.2877321981424148, "grad_norm": 0.1532924622297287, "learning_rate": 9.651606560029273e-05, "loss": 0.0086, "step": 6653 }, { "epoch": 1.2879256965944272, "grad_norm": 0.08342217653989792, "learning_rate": 9.651500694316468e-05, "loss": 0.009, "step": 6654 }, { "epoch": 1.2881191950464397, "grad_norm": 0.13281457126140594, "learning_rate": 9.651394813169408e-05, "loss": 0.009, "step": 6655 }, { "epoch": 1.288312693498452, "grad_norm": 0.11887363344430923, "learning_rate": 9.651288916588483e-05, "loss": 0.0079, "step": 6656 }, { "epoch": 1.2885061919504643, "grad_norm": 0.09968450665473938, "learning_rate": 9.651183004574089e-05, "loss": 0.0084, "step": 6657 }, { "epoch": 1.2886996904024768, "grad_norm": 0.11852701753377914, "learning_rate": 9.65107707712662e-05, "loss": 0.0084, "step": 6658 }, { "epoch": 1.2888931888544892, "grad_norm": 0.10404849797487259, "learning_rate": 9.650971134246469e-05, "loss": 0.0103, "step": 6659 }, { "epoch": 1.2890866873065017, "grad_norm": 0.09585975110530853, "learning_rate": 9.650865175934031e-05, "loss": 0.0077, "step": 6660 }, { "epoch": 1.2892801857585139, "grad_norm": 0.09109427034854889, "learning_rate": 9.650759202189699e-05, "loss": 0.0084, "step": 6661 }, { "epoch": 1.2894736842105263, "grad_norm": 0.10205815732479095, "learning_rate": 9.650653213013866e-05, "loss": 0.0065, "step": 6662 }, { "epoch": 1.2896671826625388, "grad_norm": 0.07610918581485748, "learning_rate": 9.650547208406928e-05, "loss": 0.0076, "step": 6663 }, { "epoch": 1.289860681114551, "grad_norm": 0.11573223769664764, "learning_rate": 9.650441188369278e-05, "loss": 0.0096, "step": 6664 }, { "epoch": 1.2900541795665634, "grad_norm": 0.09028510004281998, "learning_rate": 9.65033515290131e-05, "loss": 0.0091, "step": 6665 }, { "epoch": 1.2902476780185759, "grad_norm": 0.12396986037492752, "learning_rate": 9.650229102003418e-05, "loss": 0.0086, "step": 6666 }, { "epoch": 1.2904411764705883, "grad_norm": 0.07857771217823029, "learning_rate": 9.650123035675998e-05, "loss": 0.0093, "step": 6667 }, { "epoch": 1.2906346749226008, "grad_norm": 0.10654494911432266, "learning_rate": 9.650016953919442e-05, "loss": 0.0088, "step": 6668 }, { "epoch": 1.290828173374613, "grad_norm": 0.0944790169596672, "learning_rate": 9.649910856734148e-05, "loss": 0.0094, "step": 6669 }, { "epoch": 1.2910216718266254, "grad_norm": 0.10533750802278519, "learning_rate": 9.649804744120505e-05, "loss": 0.008, "step": 6670 }, { "epoch": 1.2912151702786376, "grad_norm": 0.09903132170438766, "learning_rate": 9.649698616078912e-05, "loss": 0.0088, "step": 6671 }, { "epoch": 1.29140866873065, "grad_norm": 0.10626506060361862, "learning_rate": 9.649592472609762e-05, "loss": 0.0077, "step": 6672 }, { "epoch": 1.2916021671826625, "grad_norm": 0.11725019663572311, "learning_rate": 9.649486313713451e-05, "loss": 0.0088, "step": 6673 }, { "epoch": 1.291795665634675, "grad_norm": 0.08305216580629349, "learning_rate": 9.64938013939037e-05, "loss": 0.0098, "step": 6674 }, { "epoch": 1.2919891640866874, "grad_norm": 0.13508372008800507, "learning_rate": 9.649273949640917e-05, "loss": 0.0087, "step": 6675 }, { "epoch": 1.2921826625386996, "grad_norm": 0.10508493334054947, "learning_rate": 9.649167744465487e-05, "loss": 0.008, "step": 6676 }, { "epoch": 1.292376160990712, "grad_norm": 0.11868415772914886, "learning_rate": 9.649061523864472e-05, "loss": 0.0089, "step": 6677 }, { "epoch": 1.2925696594427245, "grad_norm": 0.14826959371566772, "learning_rate": 9.648955287838269e-05, "loss": 0.0087, "step": 6678 }, { "epoch": 1.2927631578947367, "grad_norm": 0.056881725788116455, "learning_rate": 9.648849036387273e-05, "loss": 0.0081, "step": 6679 }, { "epoch": 1.2929566563467492, "grad_norm": 0.16795547306537628, "learning_rate": 9.648742769511877e-05, "loss": 0.0066, "step": 6680 }, { "epoch": 1.2931501547987616, "grad_norm": 0.10220401734113693, "learning_rate": 9.648636487212481e-05, "loss": 0.0078, "step": 6681 }, { "epoch": 1.293343653250774, "grad_norm": 0.09670790284872055, "learning_rate": 9.648530189489474e-05, "loss": 0.007, "step": 6682 }, { "epoch": 1.2935371517027865, "grad_norm": 0.1822957992553711, "learning_rate": 9.648423876343253e-05, "loss": 0.0095, "step": 6683 }, { "epoch": 1.2937306501547987, "grad_norm": 0.03731786832213402, "learning_rate": 9.648317547774218e-05, "loss": 0.0102, "step": 6684 }, { "epoch": 1.2939241486068112, "grad_norm": 0.23369646072387695, "learning_rate": 9.648211203782757e-05, "loss": 0.0095, "step": 6685 }, { "epoch": 1.2941176470588236, "grad_norm": 0.06113268807530403, "learning_rate": 9.648104844369272e-05, "loss": 0.0067, "step": 6686 }, { "epoch": 1.2943111455108358, "grad_norm": 0.2064640074968338, "learning_rate": 9.647998469534152e-05, "loss": 0.0083, "step": 6687 }, { "epoch": 1.2945046439628483, "grad_norm": 0.07611075043678284, "learning_rate": 9.647892079277798e-05, "loss": 0.0082, "step": 6688 }, { "epoch": 1.2946981424148607, "grad_norm": 0.08534844219684601, "learning_rate": 9.647785673600602e-05, "loss": 0.0093, "step": 6689 }, { "epoch": 1.2948916408668731, "grad_norm": 0.12632368505001068, "learning_rate": 9.647679252502963e-05, "loss": 0.0085, "step": 6690 }, { "epoch": 1.2950851393188856, "grad_norm": 0.056747253984212875, "learning_rate": 9.647572815985273e-05, "loss": 0.01, "step": 6691 }, { "epoch": 1.2952786377708978, "grad_norm": 0.09398927539587021, "learning_rate": 9.64746636404793e-05, "loss": 0.0087, "step": 6692 }, { "epoch": 1.2954721362229102, "grad_norm": 0.03507547453045845, "learning_rate": 9.647359896691328e-05, "loss": 0.0082, "step": 6693 }, { "epoch": 1.2956656346749227, "grad_norm": 0.0722920298576355, "learning_rate": 9.647253413915864e-05, "loss": 0.0112, "step": 6694 }, { "epoch": 1.295859133126935, "grad_norm": 0.13284794986248016, "learning_rate": 9.647146915721935e-05, "loss": 0.0091, "step": 6695 }, { "epoch": 1.2960526315789473, "grad_norm": 0.09549591690301895, "learning_rate": 9.647040402109933e-05, "loss": 0.0093, "step": 6696 }, { "epoch": 1.2962461300309598, "grad_norm": 0.1436176896095276, "learning_rate": 9.64693387308026e-05, "loss": 0.0103, "step": 6697 }, { "epoch": 1.2964396284829722, "grad_norm": 0.08128891885280609, "learning_rate": 9.646827328633307e-05, "loss": 0.0081, "step": 6698 }, { "epoch": 1.2966331269349844, "grad_norm": 0.130141943693161, "learning_rate": 9.646720768769472e-05, "loss": 0.009, "step": 6699 }, { "epoch": 1.296826625386997, "grad_norm": 0.062400031834840775, "learning_rate": 9.64661419348915e-05, "loss": 0.0101, "step": 6700 }, { "epoch": 1.2970201238390093, "grad_norm": 0.04189849644899368, "learning_rate": 9.64650760279274e-05, "loss": 0.0083, "step": 6701 }, { "epoch": 1.2972136222910216, "grad_norm": 0.0662771612405777, "learning_rate": 9.646400996680634e-05, "loss": 0.0085, "step": 6702 }, { "epoch": 1.297407120743034, "grad_norm": 0.03723352774977684, "learning_rate": 9.646294375153233e-05, "loss": 0.0092, "step": 6703 }, { "epoch": 1.2976006191950464, "grad_norm": 0.05778253450989723, "learning_rate": 9.646187738210932e-05, "loss": 0.0086, "step": 6704 }, { "epoch": 1.2977941176470589, "grad_norm": 0.05077066272497177, "learning_rate": 9.646081085854122e-05, "loss": 0.0088, "step": 6705 }, { "epoch": 1.2979876160990713, "grad_norm": 0.05477157235145569, "learning_rate": 9.645974418083209e-05, "loss": 0.0092, "step": 6706 }, { "epoch": 1.2981811145510835, "grad_norm": 0.15538477897644043, "learning_rate": 9.645867734898582e-05, "loss": 0.0084, "step": 6707 }, { "epoch": 1.298374613003096, "grad_norm": 0.10450267046689987, "learning_rate": 9.645761036300641e-05, "loss": 0.0087, "step": 6708 }, { "epoch": 1.2985681114551084, "grad_norm": 0.1455080360174179, "learning_rate": 9.645654322289783e-05, "loss": 0.0084, "step": 6709 }, { "epoch": 1.2987616099071206, "grad_norm": 0.06706800311803818, "learning_rate": 9.645547592866403e-05, "loss": 0.0085, "step": 6710 }, { "epoch": 1.298955108359133, "grad_norm": 0.1541338711977005, "learning_rate": 9.645440848030898e-05, "loss": 0.0096, "step": 6711 }, { "epoch": 1.2991486068111455, "grad_norm": 0.0883345678448677, "learning_rate": 9.645334087783667e-05, "loss": 0.008, "step": 6712 }, { "epoch": 1.299342105263158, "grad_norm": 0.09002400189638138, "learning_rate": 9.645227312125103e-05, "loss": 0.0078, "step": 6713 }, { "epoch": 1.2995356037151704, "grad_norm": 0.136656254529953, "learning_rate": 9.645120521055606e-05, "loss": 0.0077, "step": 6714 }, { "epoch": 1.2997291021671826, "grad_norm": 0.0388522632420063, "learning_rate": 9.645013714575574e-05, "loss": 0.0087, "step": 6715 }, { "epoch": 1.299922600619195, "grad_norm": 0.1400790512561798, "learning_rate": 9.644906892685401e-05, "loss": 0.0073, "step": 6716 }, { "epoch": 1.3001160990712075, "grad_norm": 0.05457952991127968, "learning_rate": 9.644800055385485e-05, "loss": 0.0087, "step": 6717 }, { "epoch": 1.3003095975232197, "grad_norm": 0.051442693918943405, "learning_rate": 9.644693202676225e-05, "loss": 0.008, "step": 6718 }, { "epoch": 1.3005030959752322, "grad_norm": 0.09915869683027267, "learning_rate": 9.644586334558017e-05, "loss": 0.0087, "step": 6719 }, { "epoch": 1.3006965944272446, "grad_norm": 0.13504542410373688, "learning_rate": 9.644479451031258e-05, "loss": 0.0082, "step": 6720 }, { "epoch": 1.300890092879257, "grad_norm": 0.13024261593818665, "learning_rate": 9.644372552096344e-05, "loss": 0.007, "step": 6721 }, { "epoch": 1.3010835913312693, "grad_norm": 0.15547426044940948, "learning_rate": 9.644265637753675e-05, "loss": 0.0109, "step": 6722 }, { "epoch": 1.3012770897832817, "grad_norm": 0.11941459774971008, "learning_rate": 9.644158708003647e-05, "loss": 0.0095, "step": 6723 }, { "epoch": 1.3014705882352942, "grad_norm": 0.19684983789920807, "learning_rate": 9.64405176284666e-05, "loss": 0.0104, "step": 6724 }, { "epoch": 1.3016640866873064, "grad_norm": 0.08094783872365952, "learning_rate": 9.643944802283108e-05, "loss": 0.0085, "step": 6725 }, { "epoch": 1.3018575851393188, "grad_norm": 0.23376740515232086, "learning_rate": 9.643837826313391e-05, "loss": 0.0095, "step": 6726 }, { "epoch": 1.3020510835913313, "grad_norm": 0.06445009261369705, "learning_rate": 9.643730834937906e-05, "loss": 0.0084, "step": 6727 }, { "epoch": 1.3022445820433437, "grad_norm": 0.24468068778514862, "learning_rate": 9.64362382815705e-05, "loss": 0.01, "step": 6728 }, { "epoch": 1.3024380804953561, "grad_norm": 0.07763632386922836, "learning_rate": 9.643516805971222e-05, "loss": 0.0089, "step": 6729 }, { "epoch": 1.3026315789473684, "grad_norm": 0.1526903510093689, "learning_rate": 9.64340976838082e-05, "loss": 0.0081, "step": 6730 }, { "epoch": 1.3028250773993808, "grad_norm": 0.09923721104860306, "learning_rate": 9.643302715386243e-05, "loss": 0.0087, "step": 6731 }, { "epoch": 1.3030185758513932, "grad_norm": 0.08136946707963943, "learning_rate": 9.643195646987884e-05, "loss": 0.008, "step": 6732 }, { "epoch": 1.3032120743034055, "grad_norm": 0.10910211503505707, "learning_rate": 9.643088563186146e-05, "loss": 0.0096, "step": 6733 }, { "epoch": 1.303405572755418, "grad_norm": 0.0546850711107254, "learning_rate": 9.642981463981426e-05, "loss": 0.009, "step": 6734 }, { "epoch": 1.3035990712074303, "grad_norm": 0.06634963303804398, "learning_rate": 9.642874349374122e-05, "loss": 0.0068, "step": 6735 }, { "epoch": 1.3037925696594428, "grad_norm": 0.0681196078658104, "learning_rate": 9.64276721936463e-05, "loss": 0.0094, "step": 6736 }, { "epoch": 1.3039860681114552, "grad_norm": 0.08742861449718475, "learning_rate": 9.642660073953353e-05, "loss": 0.0099, "step": 6737 }, { "epoch": 1.3041795665634675, "grad_norm": 0.04872484505176544, "learning_rate": 9.642552913140685e-05, "loss": 0.0068, "step": 6738 }, { "epoch": 1.30437306501548, "grad_norm": 0.10766440629959106, "learning_rate": 9.642445736927026e-05, "loss": 0.0097, "step": 6739 }, { "epoch": 1.3045665634674923, "grad_norm": 0.07579237967729568, "learning_rate": 9.642338545312774e-05, "loss": 0.0089, "step": 6740 }, { "epoch": 1.3047600619195046, "grad_norm": 0.10871924459934235, "learning_rate": 9.642231338298329e-05, "loss": 0.0098, "step": 6741 }, { "epoch": 1.304953560371517, "grad_norm": 0.09855256229639053, "learning_rate": 9.64212411588409e-05, "loss": 0.009, "step": 6742 }, { "epoch": 1.3051470588235294, "grad_norm": 0.0867316946387291, "learning_rate": 9.642016878070453e-05, "loss": 0.0086, "step": 6743 }, { "epoch": 1.3053405572755419, "grad_norm": 0.09549931436777115, "learning_rate": 9.641909624857817e-05, "loss": 0.0084, "step": 6744 }, { "epoch": 1.305534055727554, "grad_norm": 0.05690852552652359, "learning_rate": 9.641802356246582e-05, "loss": 0.0083, "step": 6745 }, { "epoch": 1.3057275541795665, "grad_norm": 0.08727322518825531, "learning_rate": 9.641695072237145e-05, "loss": 0.0091, "step": 6746 }, { "epoch": 1.305921052631579, "grad_norm": 0.0688626766204834, "learning_rate": 9.641587772829909e-05, "loss": 0.0094, "step": 6747 }, { "epoch": 1.3061145510835912, "grad_norm": 0.059440284967422485, "learning_rate": 9.641480458025268e-05, "loss": 0.0091, "step": 6748 }, { "epoch": 1.3063080495356036, "grad_norm": 0.0943874791264534, "learning_rate": 9.641373127823625e-05, "loss": 0.0087, "step": 6749 }, { "epoch": 1.306501547987616, "grad_norm": 0.031620435416698456, "learning_rate": 9.641265782225377e-05, "loss": 0.0086, "step": 6750 }, { "epoch": 1.3066950464396285, "grad_norm": 0.10476327687501907, "learning_rate": 9.641158421230921e-05, "loss": 0.0085, "step": 6751 }, { "epoch": 1.306888544891641, "grad_norm": 0.035992059856653214, "learning_rate": 9.64105104484066e-05, "loss": 0.0067, "step": 6752 }, { "epoch": 1.3070820433436532, "grad_norm": 0.08747053891420364, "learning_rate": 9.640943653054991e-05, "loss": 0.008, "step": 6753 }, { "epoch": 1.3072755417956656, "grad_norm": 0.05505620688199997, "learning_rate": 9.640836245874316e-05, "loss": 0.0091, "step": 6754 }, { "epoch": 1.307469040247678, "grad_norm": 0.056684065610170364, "learning_rate": 9.640728823299029e-05, "loss": 0.0085, "step": 6755 }, { "epoch": 1.3076625386996903, "grad_norm": 0.05126126483082771, "learning_rate": 9.640621385329534e-05, "loss": 0.0074, "step": 6756 }, { "epoch": 1.3078560371517027, "grad_norm": 0.047924406826496124, "learning_rate": 9.64051393196623e-05, "loss": 0.0095, "step": 6757 }, { "epoch": 1.3080495356037152, "grad_norm": 0.07931389659643173, "learning_rate": 9.640406463209515e-05, "loss": 0.0094, "step": 6758 }, { "epoch": 1.3082430340557276, "grad_norm": 0.04565368965268135, "learning_rate": 9.640298979059787e-05, "loss": 0.0061, "step": 6759 }, { "epoch": 1.30843653250774, "grad_norm": 0.12856939435005188, "learning_rate": 9.64019147951745e-05, "loss": 0.0082, "step": 6760 }, { "epoch": 1.3086300309597523, "grad_norm": 0.029535289853811264, "learning_rate": 9.6400839645829e-05, "loss": 0.0084, "step": 6761 }, { "epoch": 1.3088235294117647, "grad_norm": 0.12023560702800751, "learning_rate": 9.639976434256538e-05, "loss": 0.009, "step": 6762 }, { "epoch": 1.3090170278637772, "grad_norm": 0.05755571275949478, "learning_rate": 9.639868888538764e-05, "loss": 0.0097, "step": 6763 }, { "epoch": 1.3092105263157894, "grad_norm": 0.08426612615585327, "learning_rate": 9.639761327429977e-05, "loss": 0.0079, "step": 6764 }, { "epoch": 1.3094040247678018, "grad_norm": 0.08234462887048721, "learning_rate": 9.639653750930579e-05, "loss": 0.0075, "step": 6765 }, { "epoch": 1.3095975232198143, "grad_norm": 0.08335865288972855, "learning_rate": 9.639546159040967e-05, "loss": 0.0069, "step": 6766 }, { "epoch": 1.3097910216718267, "grad_norm": 0.08244406431913376, "learning_rate": 9.639438551761543e-05, "loss": 0.0077, "step": 6767 }, { "epoch": 1.3099845201238391, "grad_norm": 0.061059337109327316, "learning_rate": 9.639330929092705e-05, "loss": 0.0108, "step": 6768 }, { "epoch": 1.3101780185758514, "grad_norm": 0.10807008296251297, "learning_rate": 9.639223291034855e-05, "loss": 0.0075, "step": 6769 }, { "epoch": 1.3103715170278638, "grad_norm": 0.03907574340701103, "learning_rate": 9.639115637588394e-05, "loss": 0.0073, "step": 6770 }, { "epoch": 1.310565015479876, "grad_norm": 0.1059393584728241, "learning_rate": 9.63900796875372e-05, "loss": 0.0083, "step": 6771 }, { "epoch": 1.3107585139318885, "grad_norm": 0.0637531504034996, "learning_rate": 9.638900284531237e-05, "loss": 0.0082, "step": 6772 }, { "epoch": 1.310952012383901, "grad_norm": 0.07750248163938522, "learning_rate": 9.638792584921339e-05, "loss": 0.0093, "step": 6773 }, { "epoch": 1.3111455108359134, "grad_norm": 0.08812602609395981, "learning_rate": 9.638684869924431e-05, "loss": 0.0072, "step": 6774 }, { "epoch": 1.3113390092879258, "grad_norm": 0.07005312293767929, "learning_rate": 9.638577139540913e-05, "loss": 0.0101, "step": 6775 }, { "epoch": 1.311532507739938, "grad_norm": 0.09436897188425064, "learning_rate": 9.638469393771185e-05, "loss": 0.0109, "step": 6776 }, { "epoch": 1.3117260061919505, "grad_norm": 0.06688245385885239, "learning_rate": 9.638361632615647e-05, "loss": 0.0079, "step": 6777 }, { "epoch": 1.311919504643963, "grad_norm": 0.0919724702835083, "learning_rate": 9.638253856074701e-05, "loss": 0.0098, "step": 6778 }, { "epoch": 1.3121130030959751, "grad_norm": 0.08141349256038666, "learning_rate": 9.638146064148745e-05, "loss": 0.008, "step": 6779 }, { "epoch": 1.3123065015479876, "grad_norm": 0.06358946859836578, "learning_rate": 9.638038256838184e-05, "loss": 0.0084, "step": 6780 }, { "epoch": 1.3125, "grad_norm": 0.048325490206480026, "learning_rate": 9.637930434143415e-05, "loss": 0.0097, "step": 6781 }, { "epoch": 1.3126934984520124, "grad_norm": 0.0892360582947731, "learning_rate": 9.637822596064839e-05, "loss": 0.0069, "step": 6782 }, { "epoch": 1.3128869969040249, "grad_norm": 0.1032865047454834, "learning_rate": 9.637714742602861e-05, "loss": 0.0093, "step": 6783 }, { "epoch": 1.313080495356037, "grad_norm": 0.09141882508993149, "learning_rate": 9.637606873757877e-05, "loss": 0.0093, "step": 6784 }, { "epoch": 1.3132739938080495, "grad_norm": 0.15368662774562836, "learning_rate": 9.637498989530292e-05, "loss": 0.0098, "step": 6785 }, { "epoch": 1.313467492260062, "grad_norm": 0.06374359875917435, "learning_rate": 9.637391089920503e-05, "loss": 0.0083, "step": 6786 }, { "epoch": 1.3136609907120742, "grad_norm": 0.20351606607437134, "learning_rate": 9.637283174928916e-05, "loss": 0.0114, "step": 6787 }, { "epoch": 1.3138544891640866, "grad_norm": 0.04443033039569855, "learning_rate": 9.637175244555927e-05, "loss": 0.0089, "step": 6788 }, { "epoch": 1.314047987616099, "grad_norm": 0.18629759550094604, "learning_rate": 9.637067298801941e-05, "loss": 0.0094, "step": 6789 }, { "epoch": 1.3142414860681115, "grad_norm": 0.07244130223989487, "learning_rate": 9.63695933766736e-05, "loss": 0.0079, "step": 6790 }, { "epoch": 1.314434984520124, "grad_norm": 0.14046967029571533, "learning_rate": 9.636851361152581e-05, "loss": 0.0092, "step": 6791 }, { "epoch": 1.3146284829721362, "grad_norm": 0.09545210748910904, "learning_rate": 9.636743369258009e-05, "loss": 0.0093, "step": 6792 }, { "epoch": 1.3148219814241486, "grad_norm": 0.10933206975460052, "learning_rate": 9.636635361984045e-05, "loss": 0.0101, "step": 6793 }, { "epoch": 1.3150154798761609, "grad_norm": 0.08908793330192566, "learning_rate": 9.636527339331088e-05, "loss": 0.0077, "step": 6794 }, { "epoch": 1.3152089783281733, "grad_norm": 0.09063348919153214, "learning_rate": 9.636419301299543e-05, "loss": 0.0074, "step": 6795 }, { "epoch": 1.3154024767801857, "grad_norm": 0.0875527560710907, "learning_rate": 9.636311247889812e-05, "loss": 0.0091, "step": 6796 }, { "epoch": 1.3155959752321982, "grad_norm": 0.07751494646072388, "learning_rate": 9.636203179102292e-05, "loss": 0.0083, "step": 6797 }, { "epoch": 1.3157894736842106, "grad_norm": 0.11614938825368881, "learning_rate": 9.636095094937388e-05, "loss": 0.009, "step": 6798 }, { "epoch": 1.3159829721362228, "grad_norm": 0.06179110333323479, "learning_rate": 9.635986995395504e-05, "loss": 0.0068, "step": 6799 }, { "epoch": 1.3161764705882353, "grad_norm": 0.14717641472816467, "learning_rate": 9.635878880477038e-05, "loss": 0.0071, "step": 6800 }, { "epoch": 1.3163699690402477, "grad_norm": 0.07573731243610382, "learning_rate": 9.635770750182392e-05, "loss": 0.0074, "step": 6801 }, { "epoch": 1.31656346749226, "grad_norm": 0.11463375389575958, "learning_rate": 9.635662604511971e-05, "loss": 0.0077, "step": 6802 }, { "epoch": 1.3167569659442724, "grad_norm": 0.1113906055688858, "learning_rate": 9.635554443466176e-05, "loss": 0.0089, "step": 6803 }, { "epoch": 1.3169504643962848, "grad_norm": 0.08401066064834595, "learning_rate": 9.635446267045409e-05, "loss": 0.0103, "step": 6804 }, { "epoch": 1.3171439628482973, "grad_norm": 0.08797909319400787, "learning_rate": 9.63533807525007e-05, "loss": 0.0078, "step": 6805 }, { "epoch": 1.3173374613003097, "grad_norm": 0.08920248597860336, "learning_rate": 9.635229868080564e-05, "loss": 0.0077, "step": 6806 }, { "epoch": 1.317530959752322, "grad_norm": 0.08723799884319305, "learning_rate": 9.635121645537293e-05, "loss": 0.01, "step": 6807 }, { "epoch": 1.3177244582043344, "grad_norm": 0.07653085142374039, "learning_rate": 9.635013407620657e-05, "loss": 0.0083, "step": 6808 }, { "epoch": 1.3179179566563468, "grad_norm": 0.09840037673711777, "learning_rate": 9.63490515433106e-05, "loss": 0.0101, "step": 6809 }, { "epoch": 1.318111455108359, "grad_norm": 0.0660792887210846, "learning_rate": 9.634796885668907e-05, "loss": 0.007, "step": 6810 }, { "epoch": 1.3183049535603715, "grad_norm": 0.07958665490150452, "learning_rate": 9.634688601634596e-05, "loss": 0.0068, "step": 6811 }, { "epoch": 1.318498452012384, "grad_norm": 0.06770135462284088, "learning_rate": 9.63458030222853e-05, "loss": 0.0095, "step": 6812 }, { "epoch": 1.3186919504643964, "grad_norm": 0.12025370448827744, "learning_rate": 9.634471987451115e-05, "loss": 0.009, "step": 6813 }, { "epoch": 1.3188854489164088, "grad_norm": 0.03559517860412598, "learning_rate": 9.634363657302754e-05, "loss": 0.0085, "step": 6814 }, { "epoch": 1.319078947368421, "grad_norm": 0.11122938245534897, "learning_rate": 9.634255311783846e-05, "loss": 0.0075, "step": 6815 }, { "epoch": 1.3192724458204335, "grad_norm": 0.036491911858320236, "learning_rate": 9.634146950894793e-05, "loss": 0.0082, "step": 6816 }, { "epoch": 1.319465944272446, "grad_norm": 0.09791169315576553, "learning_rate": 9.634038574636003e-05, "loss": 0.0098, "step": 6817 }, { "epoch": 1.3196594427244581, "grad_norm": 0.05084078386425972, "learning_rate": 9.633930183007877e-05, "loss": 0.0075, "step": 6818 }, { "epoch": 1.3198529411764706, "grad_norm": 0.10341786593198776, "learning_rate": 9.633821776010815e-05, "loss": 0.0089, "step": 6819 }, { "epoch": 1.320046439628483, "grad_norm": 0.07943049818277359, "learning_rate": 9.633713353645222e-05, "loss": 0.0079, "step": 6820 }, { "epoch": 1.3202399380804954, "grad_norm": 0.13445532321929932, "learning_rate": 9.633604915911503e-05, "loss": 0.0085, "step": 6821 }, { "epoch": 1.3204334365325077, "grad_norm": 0.09324142336845398, "learning_rate": 9.63349646281006e-05, "loss": 0.0071, "step": 6822 }, { "epoch": 1.32062693498452, "grad_norm": 0.08667690306901932, "learning_rate": 9.633387994341294e-05, "loss": 0.0076, "step": 6823 }, { "epoch": 1.3208204334365325, "grad_norm": 0.12703737616539001, "learning_rate": 9.633279510505611e-05, "loss": 0.0081, "step": 6824 }, { "epoch": 1.3210139318885448, "grad_norm": 0.06497558951377869, "learning_rate": 9.633171011303413e-05, "loss": 0.0091, "step": 6825 }, { "epoch": 1.3212074303405572, "grad_norm": 0.15029197931289673, "learning_rate": 9.633062496735103e-05, "loss": 0.0081, "step": 6826 }, { "epoch": 1.3214009287925697, "grad_norm": 0.05483126640319824, "learning_rate": 9.632953966801086e-05, "loss": 0.0098, "step": 6827 }, { "epoch": 1.321594427244582, "grad_norm": 0.08851192146539688, "learning_rate": 9.632845421501762e-05, "loss": 0.0065, "step": 6828 }, { "epoch": 1.3217879256965945, "grad_norm": 0.08267904818058014, "learning_rate": 9.63273686083754e-05, "loss": 0.007, "step": 6829 }, { "epoch": 1.3219814241486068, "grad_norm": 0.09459616988897324, "learning_rate": 9.63262828480882e-05, "loss": 0.0091, "step": 6830 }, { "epoch": 1.3221749226006192, "grad_norm": 0.08600477874279022, "learning_rate": 9.632519693416006e-05, "loss": 0.0098, "step": 6831 }, { "epoch": 1.3223684210526316, "grad_norm": 0.03698163107037544, "learning_rate": 9.632411086659502e-05, "loss": 0.0093, "step": 6832 }, { "epoch": 1.3225619195046439, "grad_norm": 0.09985169023275375, "learning_rate": 9.632302464539712e-05, "loss": 0.0083, "step": 6833 }, { "epoch": 1.3227554179566563, "grad_norm": 0.032279934734106064, "learning_rate": 9.63219382705704e-05, "loss": 0.0088, "step": 6834 }, { "epoch": 1.3229489164086687, "grad_norm": 0.0669139102101326, "learning_rate": 9.632085174211889e-05, "loss": 0.009, "step": 6835 }, { "epoch": 1.3231424148606812, "grad_norm": 0.043112613260746, "learning_rate": 9.631976506004663e-05, "loss": 0.0087, "step": 6836 }, { "epoch": 1.3233359133126936, "grad_norm": 0.06611806154251099, "learning_rate": 9.631867822435768e-05, "loss": 0.0095, "step": 6837 }, { "epoch": 1.3235294117647058, "grad_norm": 0.04014051705598831, "learning_rate": 9.631759123505605e-05, "loss": 0.0092, "step": 6838 }, { "epoch": 1.3237229102167183, "grad_norm": 0.06196175515651703, "learning_rate": 9.631650409214581e-05, "loss": 0.0078, "step": 6839 }, { "epoch": 1.3239164086687307, "grad_norm": 0.049228981137275696, "learning_rate": 9.631541679563097e-05, "loss": 0.0095, "step": 6840 }, { "epoch": 1.324109907120743, "grad_norm": 0.11574845761060715, "learning_rate": 9.631432934551562e-05, "loss": 0.0093, "step": 6841 }, { "epoch": 1.3243034055727554, "grad_norm": 0.05488743633031845, "learning_rate": 9.631324174180375e-05, "loss": 0.0092, "step": 6842 }, { "epoch": 1.3244969040247678, "grad_norm": 0.14361615478992462, "learning_rate": 9.631215398449944e-05, "loss": 0.0091, "step": 6843 }, { "epoch": 1.3246904024767803, "grad_norm": 0.039646122604608536, "learning_rate": 9.63110660736067e-05, "loss": 0.0083, "step": 6844 }, { "epoch": 1.3248839009287925, "grad_norm": 0.12458084523677826, "learning_rate": 9.630997800912962e-05, "loss": 0.009, "step": 6845 }, { "epoch": 1.325077399380805, "grad_norm": 0.06197606027126312, "learning_rate": 9.63088897910722e-05, "loss": 0.0087, "step": 6846 }, { "epoch": 1.3252708978328174, "grad_norm": 0.10126174241304398, "learning_rate": 9.630780141943853e-05, "loss": 0.0079, "step": 6847 }, { "epoch": 1.3254643962848296, "grad_norm": 0.08793260157108307, "learning_rate": 9.630671289423261e-05, "loss": 0.0104, "step": 6848 }, { "epoch": 1.325657894736842, "grad_norm": 0.07746618986129761, "learning_rate": 9.630562421545852e-05, "loss": 0.0093, "step": 6849 }, { "epoch": 1.3258513931888545, "grad_norm": 0.09996771812438965, "learning_rate": 9.63045353831203e-05, "loss": 0.0079, "step": 6850 }, { "epoch": 1.326044891640867, "grad_norm": 0.04475226253271103, "learning_rate": 9.630344639722199e-05, "loss": 0.0083, "step": 6851 }, { "epoch": 1.3262383900928794, "grad_norm": 0.16226106882095337, "learning_rate": 9.630235725776765e-05, "loss": 0.0082, "step": 6852 }, { "epoch": 1.3264318885448916, "grad_norm": 0.06030472740530968, "learning_rate": 9.630126796476131e-05, "loss": 0.0084, "step": 6853 }, { "epoch": 1.326625386996904, "grad_norm": 0.12611255049705505, "learning_rate": 9.630017851820704e-05, "loss": 0.0102, "step": 6854 }, { "epoch": 1.3268188854489165, "grad_norm": 0.08494049310684204, "learning_rate": 9.62990889181089e-05, "loss": 0.0091, "step": 6855 }, { "epoch": 1.3270123839009287, "grad_norm": 0.08040724694728851, "learning_rate": 9.62979991644709e-05, "loss": 0.0094, "step": 6856 }, { "epoch": 1.3272058823529411, "grad_norm": 0.08856910467147827, "learning_rate": 9.629690925729711e-05, "loss": 0.0096, "step": 6857 }, { "epoch": 1.3273993808049536, "grad_norm": 0.03683893382549286, "learning_rate": 9.629581919659162e-05, "loss": 0.0097, "step": 6858 }, { "epoch": 1.327592879256966, "grad_norm": 0.05403870716691017, "learning_rate": 9.629472898235843e-05, "loss": 0.0086, "step": 6859 }, { "epoch": 1.3277863777089784, "grad_norm": 0.06315679103136063, "learning_rate": 9.62936386146016e-05, "loss": 0.0088, "step": 6860 }, { "epoch": 1.3279798761609907, "grad_norm": 0.03146445378661156, "learning_rate": 9.629254809332522e-05, "loss": 0.008, "step": 6861 }, { "epoch": 1.328173374613003, "grad_norm": 0.06961636990308762, "learning_rate": 9.62914574185333e-05, "loss": 0.0105, "step": 6862 }, { "epoch": 1.3283668730650156, "grad_norm": 0.04297405853867531, "learning_rate": 9.629036659022993e-05, "loss": 0.0071, "step": 6863 }, { "epoch": 1.3285603715170278, "grad_norm": 0.04271300509572029, "learning_rate": 9.628927560841914e-05, "loss": 0.0083, "step": 6864 }, { "epoch": 1.3287538699690402, "grad_norm": 0.07047205418348312, "learning_rate": 9.628818447310501e-05, "loss": 0.007, "step": 6865 }, { "epoch": 1.3289473684210527, "grad_norm": 0.06722056865692139, "learning_rate": 9.628709318429159e-05, "loss": 0.0082, "step": 6866 }, { "epoch": 1.329140866873065, "grad_norm": 0.0666690245270729, "learning_rate": 9.628600174198291e-05, "loss": 0.0072, "step": 6867 }, { "epoch": 1.3293343653250773, "grad_norm": 0.09937846660614014, "learning_rate": 9.628491014618306e-05, "loss": 0.0077, "step": 6868 }, { "epoch": 1.3295278637770898, "grad_norm": 0.06543686240911484, "learning_rate": 9.628381839689608e-05, "loss": 0.0077, "step": 6869 }, { "epoch": 1.3297213622291022, "grad_norm": 0.05223461985588074, "learning_rate": 9.628272649412604e-05, "loss": 0.0086, "step": 6870 }, { "epoch": 1.3299148606811144, "grad_norm": 0.04491123557090759, "learning_rate": 9.6281634437877e-05, "loss": 0.008, "step": 6871 }, { "epoch": 1.3301083591331269, "grad_norm": 0.050053998827934265, "learning_rate": 9.628054222815301e-05, "loss": 0.008, "step": 6872 }, { "epoch": 1.3303018575851393, "grad_norm": 0.02947920933365822, "learning_rate": 9.627944986495814e-05, "loss": 0.0079, "step": 6873 }, { "epoch": 1.3304953560371517, "grad_norm": 0.082120880484581, "learning_rate": 9.627835734829645e-05, "loss": 0.0076, "step": 6874 }, { "epoch": 1.3306888544891642, "grad_norm": 0.04087759554386139, "learning_rate": 9.6277264678172e-05, "loss": 0.0078, "step": 6875 }, { "epoch": 1.3308823529411764, "grad_norm": 0.08267058432102203, "learning_rate": 9.627617185458883e-05, "loss": 0.0083, "step": 6876 }, { "epoch": 1.3310758513931888, "grad_norm": 0.04305100068449974, "learning_rate": 9.627507887755102e-05, "loss": 0.0108, "step": 6877 }, { "epoch": 1.3312693498452013, "grad_norm": 0.11290997266769409, "learning_rate": 9.627398574706266e-05, "loss": 0.0068, "step": 6878 }, { "epoch": 1.3314628482972135, "grad_norm": 0.07719332724809647, "learning_rate": 9.62728924631278e-05, "loss": 0.009, "step": 6879 }, { "epoch": 1.331656346749226, "grad_norm": 0.08468282222747803, "learning_rate": 9.627179902575047e-05, "loss": 0.0089, "step": 6880 }, { "epoch": 1.3318498452012384, "grad_norm": 0.08132702857255936, "learning_rate": 9.627070543493478e-05, "loss": 0.0085, "step": 6881 }, { "epoch": 1.3320433436532508, "grad_norm": 0.028234802186489105, "learning_rate": 9.626961169068477e-05, "loss": 0.0073, "step": 6882 }, { "epoch": 1.3322368421052633, "grad_norm": 0.07678382843732834, "learning_rate": 9.626851779300451e-05, "loss": 0.0076, "step": 6883 }, { "epoch": 1.3324303405572755, "grad_norm": 0.049969326704740524, "learning_rate": 9.626742374189806e-05, "loss": 0.0102, "step": 6884 }, { "epoch": 1.332623839009288, "grad_norm": 0.06529580801725388, "learning_rate": 9.626632953736951e-05, "loss": 0.0073, "step": 6885 }, { "epoch": 1.3328173374613004, "grad_norm": 0.07020263373851776, "learning_rate": 9.626523517942292e-05, "loss": 0.0073, "step": 6886 }, { "epoch": 1.3330108359133126, "grad_norm": 0.10355490446090698, "learning_rate": 9.626414066806235e-05, "loss": 0.0074, "step": 6887 }, { "epoch": 1.333204334365325, "grad_norm": 0.06322084367275238, "learning_rate": 9.626304600329187e-05, "loss": 0.0083, "step": 6888 }, { "epoch": 1.3333978328173375, "grad_norm": 0.053736697882413864, "learning_rate": 9.626195118511555e-05, "loss": 0.0094, "step": 6889 }, { "epoch": 1.33359133126935, "grad_norm": 0.0650179386138916, "learning_rate": 9.626085621353746e-05, "loss": 0.0103, "step": 6890 }, { "epoch": 1.3337848297213624, "grad_norm": 0.040210168808698654, "learning_rate": 9.62597610885617e-05, "loss": 0.0119, "step": 6891 }, { "epoch": 1.3339783281733746, "grad_norm": 0.06146945431828499, "learning_rate": 9.625866581019227e-05, "loss": 0.0085, "step": 6892 }, { "epoch": 1.334171826625387, "grad_norm": 0.10479462891817093, "learning_rate": 9.625757037843331e-05, "loss": 0.0065, "step": 6893 }, { "epoch": 1.3343653250773992, "grad_norm": 0.04534396529197693, "learning_rate": 9.625647479328888e-05, "loss": 0.0085, "step": 6894 }, { "epoch": 1.3345588235294117, "grad_norm": 0.10343945771455765, "learning_rate": 9.625537905476303e-05, "loss": 0.0074, "step": 6895 }, { "epoch": 1.3347523219814241, "grad_norm": 0.03144298493862152, "learning_rate": 9.625428316285985e-05, "loss": 0.0099, "step": 6896 }, { "epoch": 1.3349458204334366, "grad_norm": 0.08307361602783203, "learning_rate": 9.62531871175834e-05, "loss": 0.0088, "step": 6897 }, { "epoch": 1.335139318885449, "grad_norm": 0.03740439563989639, "learning_rate": 9.625209091893777e-05, "loss": 0.0077, "step": 6898 }, { "epoch": 1.3353328173374612, "grad_norm": 0.07220032066106796, "learning_rate": 9.625099456692705e-05, "loss": 0.0102, "step": 6899 }, { "epoch": 1.3355263157894737, "grad_norm": 0.058864325284957886, "learning_rate": 9.624989806155528e-05, "loss": 0.0091, "step": 6900 }, { "epoch": 1.3357198142414861, "grad_norm": 0.08191309124231339, "learning_rate": 9.624880140282655e-05, "loss": 0.0094, "step": 6901 }, { "epoch": 1.3359133126934983, "grad_norm": 0.08175534754991531, "learning_rate": 9.624770459074495e-05, "loss": 0.0091, "step": 6902 }, { "epoch": 1.3361068111455108, "grad_norm": 0.07857760787010193, "learning_rate": 9.624660762531454e-05, "loss": 0.0079, "step": 6903 }, { "epoch": 1.3363003095975232, "grad_norm": 0.08102010935544968, "learning_rate": 9.624551050653941e-05, "loss": 0.0094, "step": 6904 }, { "epoch": 1.3364938080495357, "grad_norm": 0.06485046446323395, "learning_rate": 9.624441323442363e-05, "loss": 0.0086, "step": 6905 }, { "epoch": 1.336687306501548, "grad_norm": 0.15310250222682953, "learning_rate": 9.624331580897129e-05, "loss": 0.0094, "step": 6906 }, { "epoch": 1.3368808049535603, "grad_norm": 0.08549971878528595, "learning_rate": 9.624221823018646e-05, "loss": 0.0084, "step": 6907 }, { "epoch": 1.3370743034055728, "grad_norm": 0.17950467765331268, "learning_rate": 9.624112049807322e-05, "loss": 0.0097, "step": 6908 }, { "epoch": 1.3372678018575852, "grad_norm": 0.1111312285065651, "learning_rate": 9.624002261263567e-05, "loss": 0.0072, "step": 6909 }, { "epoch": 1.3374613003095974, "grad_norm": 0.11865463107824326, "learning_rate": 9.623892457387788e-05, "loss": 0.0081, "step": 6910 }, { "epoch": 1.3376547987616099, "grad_norm": 0.15746060013771057, "learning_rate": 9.623782638180392e-05, "loss": 0.0087, "step": 6911 }, { "epoch": 1.3378482972136223, "grad_norm": 0.05657145008444786, "learning_rate": 9.623672803641787e-05, "loss": 0.0089, "step": 6912 }, { "epoch": 1.3380417956656347, "grad_norm": 0.14271627366542816, "learning_rate": 9.623562953772385e-05, "loss": 0.0103, "step": 6913 }, { "epoch": 1.3382352941176472, "grad_norm": 0.09367464482784271, "learning_rate": 9.623453088572591e-05, "loss": 0.0112, "step": 6914 }, { "epoch": 1.3384287925696594, "grad_norm": 0.15008942782878876, "learning_rate": 9.623343208042814e-05, "loss": 0.0088, "step": 6915 }, { "epoch": 1.3386222910216719, "grad_norm": 0.12145937979221344, "learning_rate": 9.623233312183465e-05, "loss": 0.0089, "step": 6916 }, { "epoch": 1.3388157894736843, "grad_norm": 0.11508671939373016, "learning_rate": 9.623123400994949e-05, "loss": 0.0087, "step": 6917 }, { "epoch": 1.3390092879256965, "grad_norm": 0.12274739146232605, "learning_rate": 9.623013474477677e-05, "loss": 0.009, "step": 6918 }, { "epoch": 1.339202786377709, "grad_norm": 0.06375275552272797, "learning_rate": 9.622903532632055e-05, "loss": 0.0085, "step": 6919 }, { "epoch": 1.3393962848297214, "grad_norm": 0.14070135354995728, "learning_rate": 9.622793575458494e-05, "loss": 0.0094, "step": 6920 }, { "epoch": 1.3395897832817338, "grad_norm": 0.05049096792936325, "learning_rate": 9.622683602957403e-05, "loss": 0.0083, "step": 6921 }, { "epoch": 1.339783281733746, "grad_norm": 0.14830859005451202, "learning_rate": 9.622573615129191e-05, "loss": 0.0076, "step": 6922 }, { "epoch": 1.3399767801857585, "grad_norm": 0.0557839572429657, "learning_rate": 9.622463611974265e-05, "loss": 0.0089, "step": 6923 }, { "epoch": 1.340170278637771, "grad_norm": 0.06888988614082336, "learning_rate": 9.622353593493035e-05, "loss": 0.0085, "step": 6924 }, { "epoch": 1.3403637770897832, "grad_norm": 0.09703002125024796, "learning_rate": 9.622243559685911e-05, "loss": 0.0088, "step": 6925 }, { "epoch": 1.3405572755417956, "grad_norm": 0.033916912972927094, "learning_rate": 9.6221335105533e-05, "loss": 0.0077, "step": 6926 }, { "epoch": 1.340750773993808, "grad_norm": 0.0614619255065918, "learning_rate": 9.622023446095613e-05, "loss": 0.0081, "step": 6927 }, { "epoch": 1.3409442724458205, "grad_norm": 0.07840016484260559, "learning_rate": 9.62191336631326e-05, "loss": 0.0112, "step": 6928 }, { "epoch": 1.341137770897833, "grad_norm": 0.04720645025372505, "learning_rate": 9.621803271206646e-05, "loss": 0.0087, "step": 6929 }, { "epoch": 1.3413312693498451, "grad_norm": 0.09363842010498047, "learning_rate": 9.621693160776186e-05, "loss": 0.0105, "step": 6930 }, { "epoch": 1.3415247678018576, "grad_norm": 0.0687270537018776, "learning_rate": 9.621583035022284e-05, "loss": 0.0075, "step": 6931 }, { "epoch": 1.34171826625387, "grad_norm": 0.09457188099622726, "learning_rate": 9.621472893945353e-05, "loss": 0.0099, "step": 6932 }, { "epoch": 1.3419117647058822, "grad_norm": 0.10178039968013763, "learning_rate": 9.6213627375458e-05, "loss": 0.0078, "step": 6933 }, { "epoch": 1.3421052631578947, "grad_norm": 0.05542154610157013, "learning_rate": 9.621252565824037e-05, "loss": 0.0078, "step": 6934 }, { "epoch": 1.3422987616099071, "grad_norm": 0.11822275072336197, "learning_rate": 9.621142378780474e-05, "loss": 0.0095, "step": 6935 }, { "epoch": 1.3424922600619196, "grad_norm": 0.046088218688964844, "learning_rate": 9.621032176415516e-05, "loss": 0.0084, "step": 6936 }, { "epoch": 1.342685758513932, "grad_norm": 0.09303438663482666, "learning_rate": 9.620921958729577e-05, "loss": 0.0086, "step": 6937 }, { "epoch": 1.3428792569659442, "grad_norm": 0.09708806127309799, "learning_rate": 9.620811725723067e-05, "loss": 0.0075, "step": 6938 }, { "epoch": 1.3430727554179567, "grad_norm": 0.06648127734661102, "learning_rate": 9.620701477396394e-05, "loss": 0.0084, "step": 6939 }, { "epoch": 1.3432662538699691, "grad_norm": 0.13874560594558716, "learning_rate": 9.620591213749966e-05, "loss": 0.0073, "step": 6940 }, { "epoch": 1.3434597523219813, "grad_norm": 0.09719549864530563, "learning_rate": 9.620480934784196e-05, "loss": 0.0093, "step": 6941 }, { "epoch": 1.3436532507739938, "grad_norm": 0.16190607845783234, "learning_rate": 9.620370640499495e-05, "loss": 0.0096, "step": 6942 }, { "epoch": 1.3438467492260062, "grad_norm": 0.13612550497055054, "learning_rate": 9.620260330896271e-05, "loss": 0.007, "step": 6943 }, { "epoch": 1.3440402476780187, "grad_norm": 0.12365803867578506, "learning_rate": 9.620150005974933e-05, "loss": 0.0078, "step": 6944 }, { "epoch": 1.3442337461300309, "grad_norm": 0.18387599289417267, "learning_rate": 9.620039665735893e-05, "loss": 0.0084, "step": 6945 }, { "epoch": 1.3444272445820433, "grad_norm": 0.10788527876138687, "learning_rate": 9.61992931017956e-05, "loss": 0.009, "step": 6946 }, { "epoch": 1.3446207430340558, "grad_norm": 0.21308492124080658, "learning_rate": 9.619818939306346e-05, "loss": 0.0086, "step": 6947 }, { "epoch": 1.344814241486068, "grad_norm": 0.11664694547653198, "learning_rate": 9.61970855311666e-05, "loss": 0.0091, "step": 6948 }, { "epoch": 1.3450077399380804, "grad_norm": 0.17855195701122284, "learning_rate": 9.619598151610912e-05, "loss": 0.0087, "step": 6949 }, { "epoch": 1.3452012383900929, "grad_norm": 0.14471223950386047, "learning_rate": 9.619487734789513e-05, "loss": 0.0069, "step": 6950 }, { "epoch": 1.3453947368421053, "grad_norm": 0.13759778439998627, "learning_rate": 9.619377302652878e-05, "loss": 0.0084, "step": 6951 }, { "epoch": 1.3455882352941178, "grad_norm": 0.09486768394708633, "learning_rate": 9.619266855201409e-05, "loss": 0.0073, "step": 6952 }, { "epoch": 1.34578173374613, "grad_norm": 0.16433307528495789, "learning_rate": 9.619156392435521e-05, "loss": 0.0091, "step": 6953 }, { "epoch": 1.3459752321981424, "grad_norm": 0.10203689336776733, "learning_rate": 9.619045914355625e-05, "loss": 0.0094, "step": 6954 }, { "epoch": 1.3461687306501549, "grad_norm": 0.1459539532661438, "learning_rate": 9.618935420962132e-05, "loss": 0.0094, "step": 6955 }, { "epoch": 1.346362229102167, "grad_norm": 0.0734780952334404, "learning_rate": 9.618824912255452e-05, "loss": 0.0078, "step": 6956 }, { "epoch": 1.3465557275541795, "grad_norm": 0.11611774563789368, "learning_rate": 9.618714388235996e-05, "loss": 0.0076, "step": 6957 }, { "epoch": 1.346749226006192, "grad_norm": 0.09188499301671982, "learning_rate": 9.618603848904172e-05, "loss": 0.0091, "step": 6958 }, { "epoch": 1.3469427244582044, "grad_norm": 0.07351808995008469, "learning_rate": 9.618493294260398e-05, "loss": 0.0073, "step": 6959 }, { "epoch": 1.3471362229102168, "grad_norm": 0.1022578701376915, "learning_rate": 9.618382724305078e-05, "loss": 0.0103, "step": 6960 }, { "epoch": 1.347329721362229, "grad_norm": 0.08569595962762833, "learning_rate": 9.618272139038625e-05, "loss": 0.0073, "step": 6961 }, { "epoch": 1.3475232198142415, "grad_norm": 0.11191006004810333, "learning_rate": 9.618161538461453e-05, "loss": 0.0103, "step": 6962 }, { "epoch": 1.347716718266254, "grad_norm": 0.10330814868211746, "learning_rate": 9.618050922573972e-05, "loss": 0.0101, "step": 6963 }, { "epoch": 1.3479102167182662, "grad_norm": 0.1030784398317337, "learning_rate": 9.61794029137659e-05, "loss": 0.0094, "step": 6964 }, { "epoch": 1.3481037151702786, "grad_norm": 0.08817562460899353, "learning_rate": 9.617829644869721e-05, "loss": 0.009, "step": 6965 }, { "epoch": 1.348297213622291, "grad_norm": 0.11379374563694, "learning_rate": 9.617718983053777e-05, "loss": 0.0114, "step": 6966 }, { "epoch": 1.3484907120743035, "grad_norm": 0.04603414237499237, "learning_rate": 9.617608305929169e-05, "loss": 0.0081, "step": 6967 }, { "epoch": 1.3486842105263157, "grad_norm": 0.12759388983249664, "learning_rate": 9.617497613496307e-05, "loss": 0.0098, "step": 6968 }, { "epoch": 1.3488777089783281, "grad_norm": 0.05529092252254486, "learning_rate": 9.617386905755604e-05, "loss": 0.0073, "step": 6969 }, { "epoch": 1.3490712074303406, "grad_norm": 0.13688591122627258, "learning_rate": 9.617276182707469e-05, "loss": 0.0083, "step": 6970 }, { "epoch": 1.3492647058823528, "grad_norm": 0.0568755641579628, "learning_rate": 9.617165444352319e-05, "loss": 0.0093, "step": 6971 }, { "epoch": 1.3494582043343653, "grad_norm": 0.09510990977287292, "learning_rate": 9.61705469069056e-05, "loss": 0.0091, "step": 6972 }, { "epoch": 1.3496517027863777, "grad_norm": 0.10141156613826752, "learning_rate": 9.616943921722606e-05, "loss": 0.0077, "step": 6973 }, { "epoch": 1.3498452012383901, "grad_norm": 0.0738569051027298, "learning_rate": 9.616833137448871e-05, "loss": 0.0086, "step": 6974 }, { "epoch": 1.3500386996904026, "grad_norm": 0.10883177071809769, "learning_rate": 9.616722337869764e-05, "loss": 0.0089, "step": 6975 }, { "epoch": 1.3502321981424148, "grad_norm": 0.10788090527057648, "learning_rate": 9.616611522985697e-05, "loss": 0.0077, "step": 6976 }, { "epoch": 1.3504256965944272, "grad_norm": 0.08973916620016098, "learning_rate": 9.616500692797081e-05, "loss": 0.0075, "step": 6977 }, { "epoch": 1.3506191950464397, "grad_norm": 0.12813912332057953, "learning_rate": 9.616389847304334e-05, "loss": 0.0088, "step": 6978 }, { "epoch": 1.350812693498452, "grad_norm": 0.0805954560637474, "learning_rate": 9.616278986507861e-05, "loss": 0.0103, "step": 6979 }, { "epoch": 1.3510061919504643, "grad_norm": 0.07927736639976501, "learning_rate": 9.616168110408077e-05, "loss": 0.0101, "step": 6980 }, { "epoch": 1.3511996904024768, "grad_norm": 0.0837177038192749, "learning_rate": 9.616057219005394e-05, "loss": 0.0076, "step": 6981 }, { "epoch": 1.3513931888544892, "grad_norm": 0.05708486586809158, "learning_rate": 9.615946312300224e-05, "loss": 0.0073, "step": 6982 }, { "epoch": 1.3515866873065017, "grad_norm": 0.09853262454271317, "learning_rate": 9.615835390292983e-05, "loss": 0.0093, "step": 6983 }, { "epoch": 1.3517801857585139, "grad_norm": 0.03943983465433121, "learning_rate": 9.615724452984079e-05, "loss": 0.0069, "step": 6984 }, { "epoch": 1.3519736842105263, "grad_norm": 0.0847296267747879, "learning_rate": 9.615613500373923e-05, "loss": 0.0101, "step": 6985 }, { "epoch": 1.3521671826625388, "grad_norm": 0.06256972253322601, "learning_rate": 9.615502532462933e-05, "loss": 0.0099, "step": 6986 }, { "epoch": 1.352360681114551, "grad_norm": 0.08995971828699112, "learning_rate": 9.615391549251518e-05, "loss": 0.0101, "step": 6987 }, { "epoch": 1.3525541795665634, "grad_norm": 0.0581686869263649, "learning_rate": 9.61528055074009e-05, "loss": 0.0092, "step": 6988 }, { "epoch": 1.3527476780185759, "grad_norm": 0.07267807424068451, "learning_rate": 9.615169536929064e-05, "loss": 0.0083, "step": 6989 }, { "epoch": 1.3529411764705883, "grad_norm": 0.05413058027625084, "learning_rate": 9.615058507818851e-05, "loss": 0.0092, "step": 6990 }, { "epoch": 1.3531346749226008, "grad_norm": 0.06974195688962936, "learning_rate": 9.614947463409866e-05, "loss": 0.008, "step": 6991 }, { "epoch": 1.353328173374613, "grad_norm": 0.06625349074602127, "learning_rate": 9.614836403702519e-05, "loss": 0.0081, "step": 6992 }, { "epoch": 1.3535216718266254, "grad_norm": 0.08801767975091934, "learning_rate": 9.614725328697223e-05, "loss": 0.0084, "step": 6993 }, { "epoch": 1.3537151702786376, "grad_norm": 0.06637287139892578, "learning_rate": 9.614614238394395e-05, "loss": 0.0085, "step": 6994 }, { "epoch": 1.35390866873065, "grad_norm": 0.11163681000471115, "learning_rate": 9.614503132794442e-05, "loss": 0.0065, "step": 6995 }, { "epoch": 1.3541021671826625, "grad_norm": 0.06675436347723007, "learning_rate": 9.614392011897783e-05, "loss": 0.0089, "step": 6996 }, { "epoch": 1.354295665634675, "grad_norm": 0.12124568969011307, "learning_rate": 9.614280875704827e-05, "loss": 0.0092, "step": 6997 }, { "epoch": 1.3544891640866874, "grad_norm": 0.08580677956342697, "learning_rate": 9.61416972421599e-05, "loss": 0.0082, "step": 6998 }, { "epoch": 1.3546826625386996, "grad_norm": 0.13022609055042267, "learning_rate": 9.614058557431682e-05, "loss": 0.007, "step": 6999 }, { "epoch": 1.354876160990712, "grad_norm": 0.06897071748971939, "learning_rate": 9.613947375352318e-05, "loss": 0.0069, "step": 7000 }, { "epoch": 1.3550696594427245, "grad_norm": 0.09096347540616989, "learning_rate": 9.613836177978311e-05, "loss": 0.0078, "step": 7001 }, { "epoch": 1.3552631578947367, "grad_norm": 0.07414508610963821, "learning_rate": 9.613724965310076e-05, "loss": 0.0087, "step": 7002 }, { "epoch": 1.3554566563467492, "grad_norm": 0.04246091470122337, "learning_rate": 9.613613737348024e-05, "loss": 0.0068, "step": 7003 }, { "epoch": 1.3556501547987616, "grad_norm": 0.08465325087308884, "learning_rate": 9.613502494092571e-05, "loss": 0.0071, "step": 7004 }, { "epoch": 1.355843653250774, "grad_norm": 0.06719225645065308, "learning_rate": 9.613391235544129e-05, "loss": 0.0086, "step": 7005 }, { "epoch": 1.3560371517027865, "grad_norm": 0.06550008058547974, "learning_rate": 9.613279961703111e-05, "loss": 0.0081, "step": 7006 }, { "epoch": 1.3562306501547987, "grad_norm": 0.0750652626156807, "learning_rate": 9.613168672569933e-05, "loss": 0.009, "step": 7007 }, { "epoch": 1.3564241486068112, "grad_norm": 0.06845016032457352, "learning_rate": 9.613057368145006e-05, "loss": 0.0081, "step": 7008 }, { "epoch": 1.3566176470588236, "grad_norm": 0.053975265473127365, "learning_rate": 9.612946048428747e-05, "loss": 0.0091, "step": 7009 }, { "epoch": 1.3568111455108358, "grad_norm": 0.11025320738554001, "learning_rate": 9.612834713421567e-05, "loss": 0.0079, "step": 7010 }, { "epoch": 1.3570046439628483, "grad_norm": 0.06090637668967247, "learning_rate": 9.612723363123881e-05, "loss": 0.0098, "step": 7011 }, { "epoch": 1.3571981424148607, "grad_norm": 0.13157957792282104, "learning_rate": 9.612611997536103e-05, "loss": 0.0072, "step": 7012 }, { "epoch": 1.3573916408668731, "grad_norm": 0.06514769792556763, "learning_rate": 9.612500616658647e-05, "loss": 0.0086, "step": 7013 }, { "epoch": 1.3575851393188856, "grad_norm": 0.14002922177314758, "learning_rate": 9.612389220491927e-05, "loss": 0.0091, "step": 7014 }, { "epoch": 1.3577786377708978, "grad_norm": 0.09195441007614136, "learning_rate": 9.612277809036357e-05, "loss": 0.0101, "step": 7015 }, { "epoch": 1.3579721362229102, "grad_norm": 0.13270409405231476, "learning_rate": 9.61216638229235e-05, "loss": 0.0074, "step": 7016 }, { "epoch": 1.3581656346749227, "grad_norm": 0.10386575758457184, "learning_rate": 9.612054940260323e-05, "loss": 0.0086, "step": 7017 }, { "epoch": 1.358359133126935, "grad_norm": 0.10467828810214996, "learning_rate": 9.611943482940689e-05, "loss": 0.007, "step": 7018 }, { "epoch": 1.3585526315789473, "grad_norm": 0.11788134276866913, "learning_rate": 9.611832010333863e-05, "loss": 0.0079, "step": 7019 }, { "epoch": 1.3587461300309598, "grad_norm": 0.05829774588346481, "learning_rate": 9.611720522440259e-05, "loss": 0.0085, "step": 7020 }, { "epoch": 1.3589396284829722, "grad_norm": 0.14749659597873688, "learning_rate": 9.61160901926029e-05, "loss": 0.0084, "step": 7021 }, { "epoch": 1.3591331269349844, "grad_norm": 0.05597825348377228, "learning_rate": 9.611497500794373e-05, "loss": 0.0082, "step": 7022 }, { "epoch": 1.359326625386997, "grad_norm": 0.1599733531475067, "learning_rate": 9.611385967042921e-05, "loss": 0.0074, "step": 7023 }, { "epoch": 1.3595201238390093, "grad_norm": 0.07934549450874329, "learning_rate": 9.611274418006348e-05, "loss": 0.0111, "step": 7024 }, { "epoch": 1.3597136222910216, "grad_norm": 0.12405716627836227, "learning_rate": 9.61116285368507e-05, "loss": 0.0098, "step": 7025 }, { "epoch": 1.359907120743034, "grad_norm": 0.09184841811656952, "learning_rate": 9.611051274079503e-05, "loss": 0.0097, "step": 7026 }, { "epoch": 1.3601006191950464, "grad_norm": 0.10007303208112717, "learning_rate": 9.610939679190059e-05, "loss": 0.008, "step": 7027 }, { "epoch": 1.3602941176470589, "grad_norm": 0.10398092120885849, "learning_rate": 9.610828069017154e-05, "loss": 0.0071, "step": 7028 }, { "epoch": 1.3604876160990713, "grad_norm": 0.08975543081760406, "learning_rate": 9.610716443561204e-05, "loss": 0.0077, "step": 7029 }, { "epoch": 1.3606811145510835, "grad_norm": 0.09520155936479568, "learning_rate": 9.610604802822623e-05, "loss": 0.0081, "step": 7030 }, { "epoch": 1.360874613003096, "grad_norm": 0.050525810569524765, "learning_rate": 9.610493146801825e-05, "loss": 0.0086, "step": 7031 }, { "epoch": 1.3610681114551084, "grad_norm": 0.09658867120742798, "learning_rate": 9.610381475499229e-05, "loss": 0.0091, "step": 7032 }, { "epoch": 1.3612616099071206, "grad_norm": 0.05843878537416458, "learning_rate": 9.610269788915246e-05, "loss": 0.0086, "step": 7033 }, { "epoch": 1.361455108359133, "grad_norm": 0.08252932131290436, "learning_rate": 9.610158087050292e-05, "loss": 0.009, "step": 7034 }, { "epoch": 1.3616486068111455, "grad_norm": 0.0679241269826889, "learning_rate": 9.610046369904783e-05, "loss": 0.009, "step": 7035 }, { "epoch": 1.361842105263158, "grad_norm": 0.0828642025589943, "learning_rate": 9.609934637479136e-05, "loss": 0.0077, "step": 7036 }, { "epoch": 1.3620356037151704, "grad_norm": 0.0766301155090332, "learning_rate": 9.609822889773762e-05, "loss": 0.0088, "step": 7037 }, { "epoch": 1.3622291021671826, "grad_norm": 0.0737309679389, "learning_rate": 9.609711126789081e-05, "loss": 0.008, "step": 7038 }, { "epoch": 1.362422600619195, "grad_norm": 0.060912225395441055, "learning_rate": 9.609599348525509e-05, "loss": 0.0077, "step": 7039 }, { "epoch": 1.3626160990712075, "grad_norm": 0.07035999745130539, "learning_rate": 9.609487554983456e-05, "loss": 0.0081, "step": 7040 }, { "epoch": 1.3628095975232197, "grad_norm": 0.03126682713627815, "learning_rate": 9.609375746163342e-05, "loss": 0.0097, "step": 7041 }, { "epoch": 1.3630030959752322, "grad_norm": 0.05261893570423126, "learning_rate": 9.60926392206558e-05, "loss": 0.0086, "step": 7042 }, { "epoch": 1.3631965944272446, "grad_norm": 0.03250507265329361, "learning_rate": 9.609152082690591e-05, "loss": 0.0075, "step": 7043 }, { "epoch": 1.363390092879257, "grad_norm": 0.06372825056314468, "learning_rate": 9.609040228038783e-05, "loss": 0.0093, "step": 7044 }, { "epoch": 1.3635835913312693, "grad_norm": 0.05711152032017708, "learning_rate": 9.608928358110578e-05, "loss": 0.0085, "step": 7045 }, { "epoch": 1.3637770897832817, "grad_norm": 0.05783272162079811, "learning_rate": 9.608816472906389e-05, "loss": 0.009, "step": 7046 }, { "epoch": 1.3639705882352942, "grad_norm": 0.07251270860433578, "learning_rate": 9.608704572426634e-05, "loss": 0.0089, "step": 7047 }, { "epoch": 1.3641640866873064, "grad_norm": 0.05501524731516838, "learning_rate": 9.608592656671727e-05, "loss": 0.0094, "step": 7048 }, { "epoch": 1.3643575851393188, "grad_norm": 0.06649965792894363, "learning_rate": 9.608480725642085e-05, "loss": 0.009, "step": 7049 }, { "epoch": 1.3645510835913313, "grad_norm": 0.07633283734321594, "learning_rate": 9.608368779338123e-05, "loss": 0.0102, "step": 7050 }, { "epoch": 1.3647445820433437, "grad_norm": 0.059680912643671036, "learning_rate": 9.608256817760259e-05, "loss": 0.0094, "step": 7051 }, { "epoch": 1.3649380804953561, "grad_norm": 0.0759977176785469, "learning_rate": 9.608144840908909e-05, "loss": 0.0087, "step": 7052 }, { "epoch": 1.3651315789473684, "grad_norm": 0.08259135484695435, "learning_rate": 9.608032848784488e-05, "loss": 0.0083, "step": 7053 }, { "epoch": 1.3653250773993808, "grad_norm": 0.0750802606344223, "learning_rate": 9.607920841387413e-05, "loss": 0.0095, "step": 7054 }, { "epoch": 1.3655185758513932, "grad_norm": 0.09817662090063095, "learning_rate": 9.6078088187181e-05, "loss": 0.008, "step": 7055 }, { "epoch": 1.3657120743034055, "grad_norm": 0.06993801891803741, "learning_rate": 9.607696780776968e-05, "loss": 0.0098, "step": 7056 }, { "epoch": 1.365905572755418, "grad_norm": 0.09483451396226883, "learning_rate": 9.60758472756443e-05, "loss": 0.0106, "step": 7057 }, { "epoch": 1.3660990712074303, "grad_norm": 0.06411920487880707, "learning_rate": 9.607472659080905e-05, "loss": 0.0085, "step": 7058 }, { "epoch": 1.3662925696594428, "grad_norm": 0.06916837394237518, "learning_rate": 9.607360575326808e-05, "loss": 0.0102, "step": 7059 }, { "epoch": 1.3664860681114552, "grad_norm": 0.1050436794757843, "learning_rate": 9.607248476302558e-05, "loss": 0.008, "step": 7060 }, { "epoch": 1.3666795665634675, "grad_norm": 0.11243049800395966, "learning_rate": 9.607136362008568e-05, "loss": 0.0095, "step": 7061 }, { "epoch": 1.36687306501548, "grad_norm": 0.1258758306503296, "learning_rate": 9.607024232445258e-05, "loss": 0.0108, "step": 7062 }, { "epoch": 1.3670665634674923, "grad_norm": 0.15430907905101776, "learning_rate": 9.606912087613044e-05, "loss": 0.008, "step": 7063 }, { "epoch": 1.3672600619195046, "grad_norm": 0.12661468982696533, "learning_rate": 9.606799927512342e-05, "loss": 0.0103, "step": 7064 }, { "epoch": 1.367453560371517, "grad_norm": 0.14295431971549988, "learning_rate": 9.606687752143572e-05, "loss": 0.0092, "step": 7065 }, { "epoch": 1.3676470588235294, "grad_norm": 0.11602820456027985, "learning_rate": 9.606575561507147e-05, "loss": 0.0095, "step": 7066 }, { "epoch": 1.3678405572755419, "grad_norm": 0.1085166484117508, "learning_rate": 9.606463355603487e-05, "loss": 0.0074, "step": 7067 }, { "epoch": 1.368034055727554, "grad_norm": 0.14820124208927155, "learning_rate": 9.606351134433007e-05, "loss": 0.0093, "step": 7068 }, { "epoch": 1.3682275541795665, "grad_norm": 0.10599645227193832, "learning_rate": 9.606238897996127e-05, "loss": 0.0085, "step": 7069 }, { "epoch": 1.368421052631579, "grad_norm": 0.17911171913146973, "learning_rate": 9.60612664629326e-05, "loss": 0.0089, "step": 7070 }, { "epoch": 1.3686145510835912, "grad_norm": 0.06733790785074234, "learning_rate": 9.606014379324828e-05, "loss": 0.0082, "step": 7071 }, { "epoch": 1.3688080495356036, "grad_norm": 0.17803244292736053, "learning_rate": 9.605902097091245e-05, "loss": 0.0096, "step": 7072 }, { "epoch": 1.369001547987616, "grad_norm": 0.08060889691114426, "learning_rate": 9.605789799592932e-05, "loss": 0.0074, "step": 7073 }, { "epoch": 1.3691950464396285, "grad_norm": 0.1766708940267563, "learning_rate": 9.605677486830302e-05, "loss": 0.0086, "step": 7074 }, { "epoch": 1.369388544891641, "grad_norm": 0.127587229013443, "learning_rate": 9.605565158803774e-05, "loss": 0.0092, "step": 7075 }, { "epoch": 1.3695820433436532, "grad_norm": 0.13767270743846893, "learning_rate": 9.605452815513768e-05, "loss": 0.0077, "step": 7076 }, { "epoch": 1.3697755417956656, "grad_norm": 0.13667166233062744, "learning_rate": 9.605340456960699e-05, "loss": 0.0081, "step": 7077 }, { "epoch": 1.369969040247678, "grad_norm": 0.10047300904989243, "learning_rate": 9.605228083144987e-05, "loss": 0.0094, "step": 7078 }, { "epoch": 1.3701625386996903, "grad_norm": 0.15914711356163025, "learning_rate": 9.605115694067048e-05, "loss": 0.0081, "step": 7079 }, { "epoch": 1.3703560371517027, "grad_norm": 0.06699233502149582, "learning_rate": 9.6050032897273e-05, "loss": 0.0107, "step": 7080 }, { "epoch": 1.3705495356037152, "grad_norm": 0.11831203103065491, "learning_rate": 9.604890870126162e-05, "loss": 0.0077, "step": 7081 }, { "epoch": 1.3707430340557276, "grad_norm": 0.12430216372013092, "learning_rate": 9.60477843526405e-05, "loss": 0.0074, "step": 7082 }, { "epoch": 1.37093653250774, "grad_norm": 0.09130121767520905, "learning_rate": 9.604665985141385e-05, "loss": 0.0076, "step": 7083 }, { "epoch": 1.3711300309597523, "grad_norm": 0.1609005630016327, "learning_rate": 9.604553519758583e-05, "loss": 0.0112, "step": 7084 }, { "epoch": 1.3713235294117647, "grad_norm": 0.0748656839132309, "learning_rate": 9.604441039116059e-05, "loss": 0.0082, "step": 7085 }, { "epoch": 1.3715170278637772, "grad_norm": 0.14801129698753357, "learning_rate": 9.604328543214239e-05, "loss": 0.0086, "step": 7086 }, { "epoch": 1.3717105263157894, "grad_norm": 0.032229647040367126, "learning_rate": 9.604216032053534e-05, "loss": 0.0065, "step": 7087 }, { "epoch": 1.3719040247678018, "grad_norm": 0.1071728989481926, "learning_rate": 9.604103505634368e-05, "loss": 0.0093, "step": 7088 }, { "epoch": 1.3720975232198143, "grad_norm": 0.06263966858386993, "learning_rate": 9.603990963957155e-05, "loss": 0.0101, "step": 7089 }, { "epoch": 1.3722910216718267, "grad_norm": 0.0779508501291275, "learning_rate": 9.603878407022313e-05, "loss": 0.0079, "step": 7090 }, { "epoch": 1.3724845201238391, "grad_norm": 0.07616910338401794, "learning_rate": 9.603765834830263e-05, "loss": 0.0079, "step": 7091 }, { "epoch": 1.3726780185758514, "grad_norm": 0.0795876681804657, "learning_rate": 9.603653247381425e-05, "loss": 0.0092, "step": 7092 }, { "epoch": 1.3728715170278638, "grad_norm": 0.06443257629871368, "learning_rate": 9.603540644676215e-05, "loss": 0.0081, "step": 7093 }, { "epoch": 1.373065015479876, "grad_norm": 0.08636771142482758, "learning_rate": 9.603428026715051e-05, "loss": 0.0085, "step": 7094 }, { "epoch": 1.3732585139318885, "grad_norm": 0.037482988089323044, "learning_rate": 9.603315393498351e-05, "loss": 0.007, "step": 7095 }, { "epoch": 1.373452012383901, "grad_norm": 0.1257471740245819, "learning_rate": 9.60320274502654e-05, "loss": 0.009, "step": 7096 }, { "epoch": 1.3736455108359134, "grad_norm": 0.05033702403306961, "learning_rate": 9.603090081300028e-05, "loss": 0.0077, "step": 7097 }, { "epoch": 1.3738390092879258, "grad_norm": 0.12308837473392487, "learning_rate": 9.602977402319238e-05, "loss": 0.0083, "step": 7098 }, { "epoch": 1.374032507739938, "grad_norm": 0.07673268765211105, "learning_rate": 9.60286470808459e-05, "loss": 0.0104, "step": 7099 }, { "epoch": 1.3742260061919505, "grad_norm": 0.14365920424461365, "learning_rate": 9.602751998596505e-05, "loss": 0.0091, "step": 7100 }, { "epoch": 1.374419504643963, "grad_norm": 0.07262194901704788, "learning_rate": 9.602639273855398e-05, "loss": 0.0102, "step": 7101 }, { "epoch": 1.3746130030959751, "grad_norm": 0.16308777034282684, "learning_rate": 9.602526533861687e-05, "loss": 0.0091, "step": 7102 }, { "epoch": 1.3748065015479876, "grad_norm": 0.048489589244127274, "learning_rate": 9.602413778615792e-05, "loss": 0.0074, "step": 7103 }, { "epoch": 1.375, "grad_norm": 0.1701643317937851, "learning_rate": 9.602301008118136e-05, "loss": 0.0083, "step": 7104 }, { "epoch": 1.3751934984520124, "grad_norm": 0.0490446500480175, "learning_rate": 9.602188222369135e-05, "loss": 0.0092, "step": 7105 }, { "epoch": 1.3753869969040249, "grad_norm": 0.13147367537021637, "learning_rate": 9.602075421369209e-05, "loss": 0.0076, "step": 7106 }, { "epoch": 1.375580495356037, "grad_norm": 0.13527914881706238, "learning_rate": 9.601962605118777e-05, "loss": 0.0082, "step": 7107 }, { "epoch": 1.3757739938080495, "grad_norm": 0.11853945255279541, "learning_rate": 9.60184977361826e-05, "loss": 0.0091, "step": 7108 }, { "epoch": 1.375967492260062, "grad_norm": 0.18142680823802948, "learning_rate": 9.601736926868075e-05, "loss": 0.0085, "step": 7109 }, { "epoch": 1.3761609907120742, "grad_norm": 0.10139428824186325, "learning_rate": 9.601624064868643e-05, "loss": 0.0078, "step": 7110 }, { "epoch": 1.3763544891640866, "grad_norm": 0.16767476499080658, "learning_rate": 9.601511187620384e-05, "loss": 0.0083, "step": 7111 }, { "epoch": 1.376547987616099, "grad_norm": 0.07891189306974411, "learning_rate": 9.601398295123717e-05, "loss": 0.0085, "step": 7112 }, { "epoch": 1.3767414860681115, "grad_norm": 0.1337054967880249, "learning_rate": 9.60128538737906e-05, "loss": 0.0085, "step": 7113 }, { "epoch": 1.376934984520124, "grad_norm": 0.12241560220718384, "learning_rate": 9.601172464386836e-05, "loss": 0.0111, "step": 7114 }, { "epoch": 1.3771284829721362, "grad_norm": 0.07581162452697754, "learning_rate": 9.601059526147463e-05, "loss": 0.0072, "step": 7115 }, { "epoch": 1.3773219814241486, "grad_norm": 0.144457146525383, "learning_rate": 9.600946572661363e-05, "loss": 0.0082, "step": 7116 }, { "epoch": 1.3775154798761609, "grad_norm": 0.03254441171884537, "learning_rate": 9.600833603928951e-05, "loss": 0.0075, "step": 7117 }, { "epoch": 1.3777089783281733, "grad_norm": 0.10544565320014954, "learning_rate": 9.600720619950653e-05, "loss": 0.009, "step": 7118 }, { "epoch": 1.3779024767801857, "grad_norm": 0.06539534032344818, "learning_rate": 9.600607620726885e-05, "loss": 0.0077, "step": 7119 }, { "epoch": 1.3780959752321982, "grad_norm": 0.09647879749536514, "learning_rate": 9.600494606258068e-05, "loss": 0.0082, "step": 7120 }, { "epoch": 1.3782894736842106, "grad_norm": 0.06938038021326065, "learning_rate": 9.600381576544622e-05, "loss": 0.0067, "step": 7121 }, { "epoch": 1.3784829721362228, "grad_norm": 0.07086554914712906, "learning_rate": 9.600268531586969e-05, "loss": 0.0084, "step": 7122 }, { "epoch": 1.3786764705882353, "grad_norm": 0.07162334024906158, "learning_rate": 9.600155471385527e-05, "loss": 0.0088, "step": 7123 }, { "epoch": 1.3788699690402477, "grad_norm": 0.10015340894460678, "learning_rate": 9.600042395940717e-05, "loss": 0.0073, "step": 7124 }, { "epoch": 1.37906346749226, "grad_norm": 0.05047614127397537, "learning_rate": 9.599929305252959e-05, "loss": 0.0096, "step": 7125 }, { "epoch": 1.3792569659442724, "grad_norm": 0.11174092441797256, "learning_rate": 9.599816199322676e-05, "loss": 0.0086, "step": 7126 }, { "epoch": 1.3794504643962848, "grad_norm": 0.045991186052560806, "learning_rate": 9.599703078150286e-05, "loss": 0.0083, "step": 7127 }, { "epoch": 1.3796439628482973, "grad_norm": 0.10780385136604309, "learning_rate": 9.599589941736209e-05, "loss": 0.0076, "step": 7128 }, { "epoch": 1.3798374613003097, "grad_norm": 0.07136843353509903, "learning_rate": 9.599476790080868e-05, "loss": 0.0087, "step": 7129 }, { "epoch": 1.380030959752322, "grad_norm": 0.0971129834651947, "learning_rate": 9.599363623184682e-05, "loss": 0.0068, "step": 7130 }, { "epoch": 1.3802244582043344, "grad_norm": 0.06761250644922256, "learning_rate": 9.599250441048074e-05, "loss": 0.0085, "step": 7131 }, { "epoch": 1.3804179566563468, "grad_norm": 0.12582603096961975, "learning_rate": 9.599137243671462e-05, "loss": 0.0095, "step": 7132 }, { "epoch": 1.380611455108359, "grad_norm": 0.054675016552209854, "learning_rate": 9.599024031055265e-05, "loss": 0.0094, "step": 7133 }, { "epoch": 1.3808049535603715, "grad_norm": 0.12286785244941711, "learning_rate": 9.59891080319991e-05, "loss": 0.0095, "step": 7134 }, { "epoch": 1.380998452012384, "grad_norm": 0.10493162274360657, "learning_rate": 9.598797560105812e-05, "loss": 0.0089, "step": 7135 }, { "epoch": 1.3811919504643964, "grad_norm": 0.11822619289159775, "learning_rate": 9.598684301773396e-05, "loss": 0.0083, "step": 7136 }, { "epoch": 1.3813854489164088, "grad_norm": 0.13647989928722382, "learning_rate": 9.598571028203082e-05, "loss": 0.0084, "step": 7137 }, { "epoch": 1.381578947368421, "grad_norm": 0.06560898572206497, "learning_rate": 9.59845773939529e-05, "loss": 0.0081, "step": 7138 }, { "epoch": 1.3817724458204335, "grad_norm": 0.13165706396102905, "learning_rate": 9.598344435350442e-05, "loss": 0.0083, "step": 7139 }, { "epoch": 1.381965944272446, "grad_norm": 0.04825718700885773, "learning_rate": 9.598231116068961e-05, "loss": 0.0088, "step": 7140 }, { "epoch": 1.3821594427244581, "grad_norm": 0.12514542043209076, "learning_rate": 9.598117781551264e-05, "loss": 0.0087, "step": 7141 }, { "epoch": 1.3823529411764706, "grad_norm": 0.04127310961484909, "learning_rate": 9.598004431797777e-05, "loss": 0.0079, "step": 7142 }, { "epoch": 1.382546439628483, "grad_norm": 0.08379680663347244, "learning_rate": 9.597891066808918e-05, "loss": 0.0077, "step": 7143 }, { "epoch": 1.3827399380804954, "grad_norm": 0.08483126014471054, "learning_rate": 9.59777768658511e-05, "loss": 0.0095, "step": 7144 }, { "epoch": 1.3829334365325077, "grad_norm": 0.08596709370613098, "learning_rate": 9.597664291126774e-05, "loss": 0.008, "step": 7145 }, { "epoch": 1.38312693498452, "grad_norm": 0.09636642783880234, "learning_rate": 9.597550880434333e-05, "loss": 0.0065, "step": 7146 }, { "epoch": 1.3833204334365325, "grad_norm": 0.041660286486148834, "learning_rate": 9.597437454508208e-05, "loss": 0.009, "step": 7147 }, { "epoch": 1.3835139318885448, "grad_norm": 0.12653566896915436, "learning_rate": 9.597324013348818e-05, "loss": 0.0084, "step": 7148 }, { "epoch": 1.3837074303405572, "grad_norm": 0.06594185531139374, "learning_rate": 9.597210556956589e-05, "loss": 0.0093, "step": 7149 }, { "epoch": 1.3839009287925697, "grad_norm": 0.11736004054546356, "learning_rate": 9.597097085331939e-05, "loss": 0.0075, "step": 7150 }, { "epoch": 1.384094427244582, "grad_norm": 0.08395309746265411, "learning_rate": 9.596983598475291e-05, "loss": 0.0083, "step": 7151 }, { "epoch": 1.3842879256965945, "grad_norm": 0.098980613052845, "learning_rate": 9.596870096387069e-05, "loss": 0.0081, "step": 7152 }, { "epoch": 1.3844814241486068, "grad_norm": 0.10057257860898972, "learning_rate": 9.596756579067692e-05, "loss": 0.0088, "step": 7153 }, { "epoch": 1.3846749226006192, "grad_norm": 0.0792561024427414, "learning_rate": 9.596643046517586e-05, "loss": 0.0079, "step": 7154 }, { "epoch": 1.3848684210526316, "grad_norm": 0.11269025504589081, "learning_rate": 9.59652949873717e-05, "loss": 0.0092, "step": 7155 }, { "epoch": 1.3850619195046439, "grad_norm": 0.04218105226755142, "learning_rate": 9.596415935726866e-05, "loss": 0.0075, "step": 7156 }, { "epoch": 1.3852554179566563, "grad_norm": 0.16923663020133972, "learning_rate": 9.596302357487097e-05, "loss": 0.0095, "step": 7157 }, { "epoch": 1.3854489164086687, "grad_norm": 0.06295040994882584, "learning_rate": 9.596188764018286e-05, "loss": 0.008, "step": 7158 }, { "epoch": 1.3856424148606812, "grad_norm": 0.15695200860500336, "learning_rate": 9.596075155320852e-05, "loss": 0.008, "step": 7159 }, { "epoch": 1.3858359133126936, "grad_norm": 0.037284690886735916, "learning_rate": 9.595961531395222e-05, "loss": 0.0084, "step": 7160 }, { "epoch": 1.3860294117647058, "grad_norm": 0.14295193552970886, "learning_rate": 9.595847892241817e-05, "loss": 0.0079, "step": 7161 }, { "epoch": 1.3862229102167183, "grad_norm": 0.07685856521129608, "learning_rate": 9.595734237861057e-05, "loss": 0.0087, "step": 7162 }, { "epoch": 1.3864164086687307, "grad_norm": 0.12792296707630157, "learning_rate": 9.595620568253367e-05, "loss": 0.0076, "step": 7163 }, { "epoch": 1.386609907120743, "grad_norm": 0.09934356063604355, "learning_rate": 9.595506883419168e-05, "loss": 0.0075, "step": 7164 }, { "epoch": 1.3868034055727554, "grad_norm": 0.08852653950452805, "learning_rate": 9.595393183358885e-05, "loss": 0.01, "step": 7165 }, { "epoch": 1.3869969040247678, "grad_norm": 0.13938970863819122, "learning_rate": 9.595279468072939e-05, "loss": 0.0084, "step": 7166 }, { "epoch": 1.3871904024767803, "grad_norm": 0.03177822008728981, "learning_rate": 9.595165737561753e-05, "loss": 0.0069, "step": 7167 }, { "epoch": 1.3873839009287925, "grad_norm": 0.17109987139701843, "learning_rate": 9.595051991825748e-05, "loss": 0.0086, "step": 7168 }, { "epoch": 1.387577399380805, "grad_norm": 0.04664147272706032, "learning_rate": 9.594938230865351e-05, "loss": 0.0074, "step": 7169 }, { "epoch": 1.3877708978328174, "grad_norm": 0.12401345372200012, "learning_rate": 9.594824454680981e-05, "loss": 0.0109, "step": 7170 }, { "epoch": 1.3879643962848296, "grad_norm": 0.12140486389398575, "learning_rate": 9.594710663273065e-05, "loss": 0.0081, "step": 7171 }, { "epoch": 1.388157894736842, "grad_norm": 0.09149143099784851, "learning_rate": 9.594596856642023e-05, "loss": 0.0081, "step": 7172 }, { "epoch": 1.3883513931888545, "grad_norm": 0.14908966422080994, "learning_rate": 9.594483034788278e-05, "loss": 0.0088, "step": 7173 }, { "epoch": 1.388544891640867, "grad_norm": 0.06980164349079132, "learning_rate": 9.594369197712255e-05, "loss": 0.0089, "step": 7174 }, { "epoch": 1.3887383900928794, "grad_norm": 0.1514701545238495, "learning_rate": 9.594255345414373e-05, "loss": 0.008, "step": 7175 }, { "epoch": 1.3889318885448916, "grad_norm": 0.06785313785076141, "learning_rate": 9.594141477895063e-05, "loss": 0.0093, "step": 7176 }, { "epoch": 1.389125386996904, "grad_norm": 0.1925024688243866, "learning_rate": 9.59402759515474e-05, "loss": 0.0095, "step": 7177 }, { "epoch": 1.3893188854489165, "grad_norm": 0.06092662364244461, "learning_rate": 9.593913697193833e-05, "loss": 0.0098, "step": 7178 }, { "epoch": 1.3895123839009287, "grad_norm": 0.21511267125606537, "learning_rate": 9.593799784012764e-05, "loss": 0.0104, "step": 7179 }, { "epoch": 1.3897058823529411, "grad_norm": 0.06545937806367874, "learning_rate": 9.593685855611956e-05, "loss": 0.0103, "step": 7180 }, { "epoch": 1.3898993808049536, "grad_norm": 0.18768665194511414, "learning_rate": 9.593571911991833e-05, "loss": 0.0082, "step": 7181 }, { "epoch": 1.390092879256966, "grad_norm": 0.12923593819141388, "learning_rate": 9.593457953152817e-05, "loss": 0.0095, "step": 7182 }, { "epoch": 1.3902863777089784, "grad_norm": 0.13602103292942047, "learning_rate": 9.593343979095333e-05, "loss": 0.0087, "step": 7183 }, { "epoch": 1.3904798761609907, "grad_norm": 0.1852889358997345, "learning_rate": 9.593229989819805e-05, "loss": 0.0077, "step": 7184 }, { "epoch": 1.390673374613003, "grad_norm": 0.07430725544691086, "learning_rate": 9.593115985326657e-05, "loss": 0.0088, "step": 7185 }, { "epoch": 1.3908668730650156, "grad_norm": 0.18223810195922852, "learning_rate": 9.593001965616313e-05, "loss": 0.008, "step": 7186 }, { "epoch": 1.3910603715170278, "grad_norm": 0.100715272128582, "learning_rate": 9.592887930689195e-05, "loss": 0.0081, "step": 7187 }, { "epoch": 1.3912538699690402, "grad_norm": 0.1291082203388214, "learning_rate": 9.592773880545727e-05, "loss": 0.0068, "step": 7188 }, { "epoch": 1.3914473684210527, "grad_norm": 0.12390333414077759, "learning_rate": 9.592659815186338e-05, "loss": 0.0089, "step": 7189 }, { "epoch": 1.391640866873065, "grad_norm": 0.05350319296121597, "learning_rate": 9.592545734611444e-05, "loss": 0.0075, "step": 7190 }, { "epoch": 1.3918343653250773, "grad_norm": 0.15096738934516907, "learning_rate": 9.592431638821477e-05, "loss": 0.0073, "step": 7191 }, { "epoch": 1.3920278637770898, "grad_norm": 0.06094682216644287, "learning_rate": 9.592317527816855e-05, "loss": 0.0081, "step": 7192 }, { "epoch": 1.3922213622291022, "grad_norm": 0.14152275025844574, "learning_rate": 9.592203401598005e-05, "loss": 0.0074, "step": 7193 }, { "epoch": 1.3924148606811144, "grad_norm": 0.17809759080410004, "learning_rate": 9.592089260165354e-05, "loss": 0.0081, "step": 7194 }, { "epoch": 1.3926083591331269, "grad_norm": 0.18607111275196075, "learning_rate": 9.591975103519321e-05, "loss": 0.0074, "step": 7195 }, { "epoch": 1.3928018575851393, "grad_norm": 0.16985008120536804, "learning_rate": 9.591860931660334e-05, "loss": 0.0081, "step": 7196 }, { "epoch": 1.3929953560371517, "grad_norm": 0.12339020520448685, "learning_rate": 9.591746744588815e-05, "loss": 0.008, "step": 7197 }, { "epoch": 1.3931888544891642, "grad_norm": 0.14503222703933716, "learning_rate": 9.591632542305189e-05, "loss": 0.0078, "step": 7198 }, { "epoch": 1.3933823529411764, "grad_norm": 0.07325202226638794, "learning_rate": 9.591518324809884e-05, "loss": 0.0081, "step": 7199 }, { "epoch": 1.3935758513931888, "grad_norm": 0.0414855070412159, "learning_rate": 9.59140409210332e-05, "loss": 0.0085, "step": 7200 }, { "epoch": 1.3937693498452013, "grad_norm": 0.07938633114099503, "learning_rate": 9.591289844185925e-05, "loss": 0.0082, "step": 7201 }, { "epoch": 1.3939628482972135, "grad_norm": 0.04026932641863823, "learning_rate": 9.591175581058122e-05, "loss": 0.0074, "step": 7202 }, { "epoch": 1.394156346749226, "grad_norm": 0.0611368753015995, "learning_rate": 9.591061302720335e-05, "loss": 0.0114, "step": 7203 }, { "epoch": 1.3943498452012384, "grad_norm": 0.025860624387860298, "learning_rate": 9.590947009172993e-05, "loss": 0.0098, "step": 7204 }, { "epoch": 1.3945433436532508, "grad_norm": 0.06486847996711731, "learning_rate": 9.590832700416516e-05, "loss": 0.0073, "step": 7205 }, { "epoch": 1.3947368421052633, "grad_norm": 0.051955707371234894, "learning_rate": 9.590718376451332e-05, "loss": 0.0073, "step": 7206 }, { "epoch": 1.3949303405572755, "grad_norm": 0.04257999733090401, "learning_rate": 9.590604037277863e-05, "loss": 0.0093, "step": 7207 }, { "epoch": 1.395123839009288, "grad_norm": 0.05605442076921463, "learning_rate": 9.590489682896539e-05, "loss": 0.0112, "step": 7208 }, { "epoch": 1.3953173374613004, "grad_norm": 0.048097144812345505, "learning_rate": 9.59037531330778e-05, "loss": 0.0102, "step": 7209 }, { "epoch": 1.3955108359133126, "grad_norm": 0.06535856425762177, "learning_rate": 9.590260928512015e-05, "loss": 0.0097, "step": 7210 }, { "epoch": 1.395704334365325, "grad_norm": 0.057529985904693604, "learning_rate": 9.590146528509668e-05, "loss": 0.0082, "step": 7211 }, { "epoch": 1.3958978328173375, "grad_norm": 0.08079877495765686, "learning_rate": 9.590032113301162e-05, "loss": 0.0077, "step": 7212 }, { "epoch": 1.39609133126935, "grad_norm": 0.04898768290877342, "learning_rate": 9.589917682886927e-05, "loss": 0.0081, "step": 7213 }, { "epoch": 1.3962848297213624, "grad_norm": 0.0799872875213623, "learning_rate": 9.589803237267383e-05, "loss": 0.0093, "step": 7214 }, { "epoch": 1.3964783281733746, "grad_norm": 0.10644588619470596, "learning_rate": 9.589688776442961e-05, "loss": 0.0092, "step": 7215 }, { "epoch": 1.396671826625387, "grad_norm": 0.056237880140542984, "learning_rate": 9.589574300414083e-05, "loss": 0.0083, "step": 7216 }, { "epoch": 1.3968653250773992, "grad_norm": 0.13503994047641754, "learning_rate": 9.589459809181174e-05, "loss": 0.0081, "step": 7217 }, { "epoch": 1.3970588235294117, "grad_norm": 0.04677325114607811, "learning_rate": 9.589345302744662e-05, "loss": 0.0108, "step": 7218 }, { "epoch": 1.3972523219814241, "grad_norm": 0.13564254343509674, "learning_rate": 9.589230781104972e-05, "loss": 0.0092, "step": 7219 }, { "epoch": 1.3974458204334366, "grad_norm": 0.04060714319348335, "learning_rate": 9.589116244262529e-05, "loss": 0.0078, "step": 7220 }, { "epoch": 1.397639318885449, "grad_norm": 0.08679362386465073, "learning_rate": 9.589001692217761e-05, "loss": 0.0109, "step": 7221 }, { "epoch": 1.3978328173374612, "grad_norm": 0.04711885750293732, "learning_rate": 9.588887124971091e-05, "loss": 0.0091, "step": 7222 }, { "epoch": 1.3980263157894737, "grad_norm": 0.0898931697010994, "learning_rate": 9.588772542522947e-05, "loss": 0.0098, "step": 7223 }, { "epoch": 1.3982198142414861, "grad_norm": 0.030275238677859306, "learning_rate": 9.588657944873754e-05, "loss": 0.0083, "step": 7224 }, { "epoch": 1.3984133126934983, "grad_norm": 0.08197163790464401, "learning_rate": 9.588543332023937e-05, "loss": 0.0089, "step": 7225 }, { "epoch": 1.3986068111455108, "grad_norm": 0.045506637543439865, "learning_rate": 9.588428703973925e-05, "loss": 0.0091, "step": 7226 }, { "epoch": 1.3988003095975232, "grad_norm": 0.11825115233659744, "learning_rate": 9.58831406072414e-05, "loss": 0.009, "step": 7227 }, { "epoch": 1.3989938080495357, "grad_norm": 0.0706091970205307, "learning_rate": 9.588199402275014e-05, "loss": 0.0083, "step": 7228 }, { "epoch": 1.399187306501548, "grad_norm": 0.1437193751335144, "learning_rate": 9.588084728626968e-05, "loss": 0.0098, "step": 7229 }, { "epoch": 1.3993808049535603, "grad_norm": 0.13217990100383759, "learning_rate": 9.58797003978043e-05, "loss": 0.0079, "step": 7230 }, { "epoch": 1.3995743034055728, "grad_norm": 0.18899041414260864, "learning_rate": 9.587855335735827e-05, "loss": 0.0093, "step": 7231 }, { "epoch": 1.3997678018575852, "grad_norm": 0.08916755020618439, "learning_rate": 9.587740616493586e-05, "loss": 0.0089, "step": 7232 }, { "epoch": 1.3999613003095974, "grad_norm": 0.15365442633628845, "learning_rate": 9.587625882054132e-05, "loss": 0.0096, "step": 7233 }, { "epoch": 1.4001547987616099, "grad_norm": 0.06943564116954803, "learning_rate": 9.587511132417892e-05, "loss": 0.0099, "step": 7234 }, { "epoch": 1.4003482972136223, "grad_norm": 0.1019597202539444, "learning_rate": 9.587396367585292e-05, "loss": 0.0097, "step": 7235 }, { "epoch": 1.4005417956656347, "grad_norm": 0.09209407866001129, "learning_rate": 9.587281587556762e-05, "loss": 0.0082, "step": 7236 }, { "epoch": 1.4007352941176472, "grad_norm": 0.08487790077924728, "learning_rate": 9.587166792332723e-05, "loss": 0.0098, "step": 7237 }, { "epoch": 1.4009287925696594, "grad_norm": 0.08037591725587845, "learning_rate": 9.587051981913609e-05, "loss": 0.0096, "step": 7238 }, { "epoch": 1.4011222910216719, "grad_norm": 0.10463935881853104, "learning_rate": 9.58693715629984e-05, "loss": 0.0077, "step": 7239 }, { "epoch": 1.4013157894736843, "grad_norm": 0.09436079859733582, "learning_rate": 9.586822315491845e-05, "loss": 0.0115, "step": 7240 }, { "epoch": 1.4015092879256965, "grad_norm": 0.08686903119087219, "learning_rate": 9.586707459490054e-05, "loss": 0.0076, "step": 7241 }, { "epoch": 1.401702786377709, "grad_norm": 0.06712665408849716, "learning_rate": 9.58659258829489e-05, "loss": 0.009, "step": 7242 }, { "epoch": 1.4018962848297214, "grad_norm": 0.07472355663776398, "learning_rate": 9.586477701906782e-05, "loss": 0.0078, "step": 7243 }, { "epoch": 1.4020897832817338, "grad_norm": 0.09192080050706863, "learning_rate": 9.586362800326158e-05, "loss": 0.0092, "step": 7244 }, { "epoch": 1.402283281733746, "grad_norm": 0.08838781714439392, "learning_rate": 9.586247883553443e-05, "loss": 0.0113, "step": 7245 }, { "epoch": 1.4024767801857585, "grad_norm": 0.14253894984722137, "learning_rate": 9.586132951589066e-05, "loss": 0.0093, "step": 7246 }, { "epoch": 1.402670278637771, "grad_norm": 0.12442446500062943, "learning_rate": 9.586018004433452e-05, "loss": 0.0087, "step": 7247 }, { "epoch": 1.4028637770897832, "grad_norm": 0.12136075645685196, "learning_rate": 9.585903042087031e-05, "loss": 0.0078, "step": 7248 }, { "epoch": 1.4030572755417956, "grad_norm": 0.08043466508388519, "learning_rate": 9.585788064550228e-05, "loss": 0.0105, "step": 7249 }, { "epoch": 1.403250773993808, "grad_norm": 0.16179423034191132, "learning_rate": 9.585673071823474e-05, "loss": 0.0104, "step": 7250 }, { "epoch": 1.4034442724458205, "grad_norm": 0.05894505977630615, "learning_rate": 9.585558063907192e-05, "loss": 0.0089, "step": 7251 }, { "epoch": 1.403637770897833, "grad_norm": 0.14238958060741425, "learning_rate": 9.585443040801813e-05, "loss": 0.0077, "step": 7252 }, { "epoch": 1.4038312693498451, "grad_norm": 0.04978786036372185, "learning_rate": 9.585328002507763e-05, "loss": 0.0078, "step": 7253 }, { "epoch": 1.4040247678018576, "grad_norm": 0.1816834807395935, "learning_rate": 9.58521294902547e-05, "loss": 0.0087, "step": 7254 }, { "epoch": 1.40421826625387, "grad_norm": 0.05975942313671112, "learning_rate": 9.585097880355364e-05, "loss": 0.0094, "step": 7255 }, { "epoch": 1.4044117647058822, "grad_norm": 0.10862759500741959, "learning_rate": 9.584982796497869e-05, "loss": 0.0079, "step": 7256 }, { "epoch": 1.4046052631578947, "grad_norm": 0.05503741279244423, "learning_rate": 9.584867697453414e-05, "loss": 0.0081, "step": 7257 }, { "epoch": 1.4047987616099071, "grad_norm": 0.1768641322851181, "learning_rate": 9.584752583222429e-05, "loss": 0.0086, "step": 7258 }, { "epoch": 1.4049922600619196, "grad_norm": 0.04758044332265854, "learning_rate": 9.584637453805338e-05, "loss": 0.0092, "step": 7259 }, { "epoch": 1.405185758513932, "grad_norm": 0.14781437814235687, "learning_rate": 9.584522309202573e-05, "loss": 0.0074, "step": 7260 }, { "epoch": 1.4053792569659442, "grad_norm": 0.08175313472747803, "learning_rate": 9.58440714941456e-05, "loss": 0.0088, "step": 7261 }, { "epoch": 1.4055727554179567, "grad_norm": 0.13131223618984222, "learning_rate": 9.584291974441729e-05, "loss": 0.0097, "step": 7262 }, { "epoch": 1.4057662538699691, "grad_norm": 0.12295228242874146, "learning_rate": 9.584176784284504e-05, "loss": 0.0087, "step": 7263 }, { "epoch": 1.4059597523219813, "grad_norm": 0.10869750380516052, "learning_rate": 9.584061578943319e-05, "loss": 0.0084, "step": 7264 }, { "epoch": 1.4061532507739938, "grad_norm": 0.1661389023065567, "learning_rate": 9.583946358418599e-05, "loss": 0.0086, "step": 7265 }, { "epoch": 1.4063467492260062, "grad_norm": 0.06824151426553726, "learning_rate": 9.58383112271077e-05, "loss": 0.0069, "step": 7266 }, { "epoch": 1.4065402476780187, "grad_norm": 0.1883067786693573, "learning_rate": 9.583715871820264e-05, "loss": 0.0091, "step": 7267 }, { "epoch": 1.4067337461300309, "grad_norm": 0.03368668630719185, "learning_rate": 9.583600605747511e-05, "loss": 0.0082, "step": 7268 }, { "epoch": 1.4069272445820433, "grad_norm": 0.18856006860733032, "learning_rate": 9.583485324492936e-05, "loss": 0.0082, "step": 7269 }, { "epoch": 1.4071207430340558, "grad_norm": 0.06427785009145737, "learning_rate": 9.58337002805697e-05, "loss": 0.0094, "step": 7270 }, { "epoch": 1.407314241486068, "grad_norm": 0.2208968847990036, "learning_rate": 9.583254716440039e-05, "loss": 0.0095, "step": 7271 }, { "epoch": 1.4075077399380804, "grad_norm": 0.08732391893863678, "learning_rate": 9.583139389642573e-05, "loss": 0.0088, "step": 7272 }, { "epoch": 1.4077012383900929, "grad_norm": 0.1965516209602356, "learning_rate": 9.583024047665e-05, "loss": 0.0081, "step": 7273 }, { "epoch": 1.4078947368421053, "grad_norm": 0.11030954867601395, "learning_rate": 9.582908690507752e-05, "loss": 0.0091, "step": 7274 }, { "epoch": 1.4080882352941178, "grad_norm": 0.19640929996967316, "learning_rate": 9.582793318171253e-05, "loss": 0.0069, "step": 7275 }, { "epoch": 1.40828173374613, "grad_norm": 0.1621541529893875, "learning_rate": 9.582677930655936e-05, "loss": 0.0083, "step": 7276 }, { "epoch": 1.4084752321981424, "grad_norm": 0.15011157095432281, "learning_rate": 9.58256252796223e-05, "loss": 0.0068, "step": 7277 }, { "epoch": 1.4086687306501549, "grad_norm": 0.25018471479415894, "learning_rate": 9.582447110090558e-05, "loss": 0.0076, "step": 7278 }, { "epoch": 1.408862229102167, "grad_norm": 0.05522777885198593, "learning_rate": 9.582331677041358e-05, "loss": 0.0078, "step": 7279 }, { "epoch": 1.4090557275541795, "grad_norm": 0.23264943063259125, "learning_rate": 9.582216228815053e-05, "loss": 0.0091, "step": 7280 }, { "epoch": 1.409249226006192, "grad_norm": 0.12296625226736069, "learning_rate": 9.582100765412073e-05, "loss": 0.0096, "step": 7281 }, { "epoch": 1.4094427244582044, "grad_norm": 0.1380145400762558, "learning_rate": 9.58198528683285e-05, "loss": 0.0084, "step": 7282 }, { "epoch": 1.4096362229102168, "grad_norm": 0.1873733252286911, "learning_rate": 9.58186979307781e-05, "loss": 0.0092, "step": 7283 }, { "epoch": 1.409829721362229, "grad_norm": 0.08848591893911362, "learning_rate": 9.581754284147385e-05, "loss": 0.0093, "step": 7284 }, { "epoch": 1.4100232198142415, "grad_norm": 0.16157543659210205, "learning_rate": 9.581638760042003e-05, "loss": 0.0076, "step": 7285 }, { "epoch": 1.410216718266254, "grad_norm": 0.09338801354169846, "learning_rate": 9.581523220762095e-05, "loss": 0.0092, "step": 7286 }, { "epoch": 1.4104102167182662, "grad_norm": 0.1550564169883728, "learning_rate": 9.581407666308086e-05, "loss": 0.0086, "step": 7287 }, { "epoch": 1.4106037151702786, "grad_norm": 0.07395132631063461, "learning_rate": 9.58129209668041e-05, "loss": 0.01, "step": 7288 }, { "epoch": 1.410797213622291, "grad_norm": 0.13386082649230957, "learning_rate": 9.581176511879499e-05, "loss": 0.0092, "step": 7289 }, { "epoch": 1.4109907120743035, "grad_norm": 0.08213496953248978, "learning_rate": 9.581060911905777e-05, "loss": 0.0091, "step": 7290 }, { "epoch": 1.4111842105263157, "grad_norm": 0.15511539578437805, "learning_rate": 9.580945296759676e-05, "loss": 0.0106, "step": 7291 }, { "epoch": 1.4113777089783281, "grad_norm": 0.11567037552595139, "learning_rate": 9.580829666441625e-05, "loss": 0.0089, "step": 7292 }, { "epoch": 1.4115712074303406, "grad_norm": 0.19385632872581482, "learning_rate": 9.580714020952056e-05, "loss": 0.0071, "step": 7293 }, { "epoch": 1.4117647058823528, "grad_norm": 0.17017880082130432, "learning_rate": 9.580598360291398e-05, "loss": 0.0077, "step": 7294 }, { "epoch": 1.4119582043343653, "grad_norm": 0.10179703682661057, "learning_rate": 9.580482684460081e-05, "loss": 0.0094, "step": 7295 }, { "epoch": 1.4121517027863777, "grad_norm": 0.25834032893180847, "learning_rate": 9.580366993458534e-05, "loss": 0.0088, "step": 7296 }, { "epoch": 1.4123452012383901, "grad_norm": 0.04186626523733139, "learning_rate": 9.58025128728719e-05, "loss": 0.0089, "step": 7297 }, { "epoch": 1.4125386996904026, "grad_norm": 0.2530297040939331, "learning_rate": 9.580135565946474e-05, "loss": 0.0073, "step": 7298 }, { "epoch": 1.4127321981424148, "grad_norm": 0.123264379799366, "learning_rate": 9.580019829436821e-05, "loss": 0.0106, "step": 7299 }, { "epoch": 1.4129256965944272, "grad_norm": 0.1414327472448349, "learning_rate": 9.57990407775866e-05, "loss": 0.0077, "step": 7300 }, { "epoch": 1.4131191950464397, "grad_norm": 0.1728878915309906, "learning_rate": 9.57978831091242e-05, "loss": 0.0097, "step": 7301 }, { "epoch": 1.413312693498452, "grad_norm": 0.06478451192378998, "learning_rate": 9.579672528898534e-05, "loss": 0.0102, "step": 7302 }, { "epoch": 1.4135061919504643, "grad_norm": 0.17433106899261475, "learning_rate": 9.579556731717429e-05, "loss": 0.0079, "step": 7303 }, { "epoch": 1.4136996904024768, "grad_norm": 0.08249904215335846, "learning_rate": 9.579440919369538e-05, "loss": 0.0081, "step": 7304 }, { "epoch": 1.4138931888544892, "grad_norm": 0.10814137011766434, "learning_rate": 9.57932509185529e-05, "loss": 0.0099, "step": 7305 }, { "epoch": 1.4140866873065017, "grad_norm": 0.10679958760738373, "learning_rate": 9.579209249175118e-05, "loss": 0.0096, "step": 7306 }, { "epoch": 1.4142801857585139, "grad_norm": 0.06805555522441864, "learning_rate": 9.57909339132945e-05, "loss": 0.01, "step": 7307 }, { "epoch": 1.4144736842105263, "grad_norm": 0.13808909058570862, "learning_rate": 9.578977518318719e-05, "loss": 0.011, "step": 7308 }, { "epoch": 1.4146671826625388, "grad_norm": 0.10762044787406921, "learning_rate": 9.578861630143352e-05, "loss": 0.0073, "step": 7309 }, { "epoch": 1.414860681114551, "grad_norm": 0.13637563586235046, "learning_rate": 9.578745726803786e-05, "loss": 0.0084, "step": 7310 }, { "epoch": 1.4150541795665634, "grad_norm": 0.08723552525043488, "learning_rate": 9.578629808300445e-05, "loss": 0.0093, "step": 7311 }, { "epoch": 1.4152476780185759, "grad_norm": 0.09454643726348877, "learning_rate": 9.578513874633765e-05, "loss": 0.0067, "step": 7312 }, { "epoch": 1.4154411764705883, "grad_norm": 0.17469505965709686, "learning_rate": 9.578397925804176e-05, "loss": 0.0089, "step": 7313 }, { "epoch": 1.4156346749226008, "grad_norm": 0.1696016937494278, "learning_rate": 9.578281961812108e-05, "loss": 0.0092, "step": 7314 }, { "epoch": 1.415828173374613, "grad_norm": 0.16629163920879364, "learning_rate": 9.578165982657994e-05, "loss": 0.0074, "step": 7315 }, { "epoch": 1.4160216718266254, "grad_norm": 0.09556140750646591, "learning_rate": 9.578049988342261e-05, "loss": 0.0089, "step": 7316 }, { "epoch": 1.4162151702786376, "grad_norm": 0.19121862947940826, "learning_rate": 9.577933978865344e-05, "loss": 0.0087, "step": 7317 }, { "epoch": 1.41640866873065, "grad_norm": 0.07613545656204224, "learning_rate": 9.577817954227675e-05, "loss": 0.009, "step": 7318 }, { "epoch": 1.4166021671826625, "grad_norm": 0.18666210770606995, "learning_rate": 9.577701914429681e-05, "loss": 0.0084, "step": 7319 }, { "epoch": 1.416795665634675, "grad_norm": 0.11068066209554672, "learning_rate": 9.577585859471798e-05, "loss": 0.0077, "step": 7320 }, { "epoch": 1.4169891640866874, "grad_norm": 0.12098977714776993, "learning_rate": 9.577469789354456e-05, "loss": 0.009, "step": 7321 }, { "epoch": 1.4171826625386996, "grad_norm": 0.15149392187595367, "learning_rate": 9.577353704078083e-05, "loss": 0.0093, "step": 7322 }, { "epoch": 1.417376160990712, "grad_norm": 0.11726927012205124, "learning_rate": 9.577237603643115e-05, "loss": 0.0084, "step": 7323 }, { "epoch": 1.4175696594427245, "grad_norm": 0.11491350829601288, "learning_rate": 9.577121488049984e-05, "loss": 0.0104, "step": 7324 }, { "epoch": 1.4177631578947367, "grad_norm": 0.0856919065117836, "learning_rate": 9.577005357299119e-05, "loss": 0.0096, "step": 7325 }, { "epoch": 1.4179566563467492, "grad_norm": 0.08252256363630295, "learning_rate": 9.576889211390953e-05, "loss": 0.0084, "step": 7326 }, { "epoch": 1.4181501547987616, "grad_norm": 0.1381043642759323, "learning_rate": 9.576773050325915e-05, "loss": 0.0108, "step": 7327 }, { "epoch": 1.418343653250774, "grad_norm": 0.1213221326470375, "learning_rate": 9.576656874104443e-05, "loss": 0.0088, "step": 7328 }, { "epoch": 1.4185371517027865, "grad_norm": 0.07012113928794861, "learning_rate": 9.576540682726963e-05, "loss": 0.0078, "step": 7329 }, { "epoch": 1.4187306501547987, "grad_norm": 0.07655959576368332, "learning_rate": 9.57642447619391e-05, "loss": 0.0081, "step": 7330 }, { "epoch": 1.4189241486068112, "grad_norm": 0.09575857222080231, "learning_rate": 9.576308254505713e-05, "loss": 0.0109, "step": 7331 }, { "epoch": 1.4191176470588236, "grad_norm": 0.044843319803476334, "learning_rate": 9.576192017662808e-05, "loss": 0.0082, "step": 7332 }, { "epoch": 1.4193111455108358, "grad_norm": 0.10464195162057877, "learning_rate": 9.576075765665626e-05, "loss": 0.0079, "step": 7333 }, { "epoch": 1.4195046439628483, "grad_norm": 0.07867492735385895, "learning_rate": 9.5759594985146e-05, "loss": 0.0086, "step": 7334 }, { "epoch": 1.4196981424148607, "grad_norm": 0.09859462827444077, "learning_rate": 9.575843216210158e-05, "loss": 0.0092, "step": 7335 }, { "epoch": 1.4198916408668731, "grad_norm": 0.11558498442173004, "learning_rate": 9.575726918752738e-05, "loss": 0.0082, "step": 7336 }, { "epoch": 1.4200851393188856, "grad_norm": 0.059138376265764236, "learning_rate": 9.575610606142768e-05, "loss": 0.0087, "step": 7337 }, { "epoch": 1.4202786377708978, "grad_norm": 0.11718160659074783, "learning_rate": 9.575494278380683e-05, "loss": 0.0069, "step": 7338 }, { "epoch": 1.4204721362229102, "grad_norm": 0.07152917981147766, "learning_rate": 9.575377935466911e-05, "loss": 0.0073, "step": 7339 }, { "epoch": 1.4206656346749227, "grad_norm": 0.06977351754903793, "learning_rate": 9.575261577401893e-05, "loss": 0.008, "step": 7340 }, { "epoch": 1.420859133126935, "grad_norm": 0.06900165230035782, "learning_rate": 9.575145204186054e-05, "loss": 0.0074, "step": 7341 }, { "epoch": 1.4210526315789473, "grad_norm": 0.06100447475910187, "learning_rate": 9.575028815819828e-05, "loss": 0.0087, "step": 7342 }, { "epoch": 1.4212461300309598, "grad_norm": 0.08283454179763794, "learning_rate": 9.574912412303652e-05, "loss": 0.0089, "step": 7343 }, { "epoch": 1.4214396284829722, "grad_norm": 0.041036978363990784, "learning_rate": 9.574795993637955e-05, "loss": 0.0084, "step": 7344 }, { "epoch": 1.4216331269349844, "grad_norm": 0.08104360848665237, "learning_rate": 9.574679559823169e-05, "loss": 0.0077, "step": 7345 }, { "epoch": 1.421826625386997, "grad_norm": 0.0674101710319519, "learning_rate": 9.574563110859729e-05, "loss": 0.008, "step": 7346 }, { "epoch": 1.4220201238390093, "grad_norm": 0.0900023803114891, "learning_rate": 9.574446646748069e-05, "loss": 0.0093, "step": 7347 }, { "epoch": 1.4222136222910216, "grad_norm": 0.06119311973452568, "learning_rate": 9.574330167488618e-05, "loss": 0.0073, "step": 7348 }, { "epoch": 1.422407120743034, "grad_norm": 0.08960789442062378, "learning_rate": 9.574213673081811e-05, "loss": 0.0065, "step": 7349 }, { "epoch": 1.4226006191950464, "grad_norm": 0.06421595811843872, "learning_rate": 9.574097163528085e-05, "loss": 0.007, "step": 7350 }, { "epoch": 1.4227941176470589, "grad_norm": 0.08606338500976562, "learning_rate": 9.573980638827866e-05, "loss": 0.007, "step": 7351 }, { "epoch": 1.4229876160990713, "grad_norm": 0.09405149519443512, "learning_rate": 9.573864098981594e-05, "loss": 0.0086, "step": 7352 }, { "epoch": 1.4231811145510835, "grad_norm": 0.07967813313007355, "learning_rate": 9.573747543989696e-05, "loss": 0.0093, "step": 7353 }, { "epoch": 1.423374613003096, "grad_norm": 0.15028950572013855, "learning_rate": 9.57363097385261e-05, "loss": 0.0093, "step": 7354 }, { "epoch": 1.4235681114551084, "grad_norm": 0.0707942396402359, "learning_rate": 9.573514388570767e-05, "loss": 0.0085, "step": 7355 }, { "epoch": 1.4237616099071206, "grad_norm": 0.17132751643657684, "learning_rate": 9.573397788144602e-05, "loss": 0.0086, "step": 7356 }, { "epoch": 1.423955108359133, "grad_norm": 0.062252242118120193, "learning_rate": 9.573281172574548e-05, "loss": 0.0078, "step": 7357 }, { "epoch": 1.4241486068111455, "grad_norm": 0.1454022079706192, "learning_rate": 9.573164541861038e-05, "loss": 0.0084, "step": 7358 }, { "epoch": 1.424342105263158, "grad_norm": 0.07119372487068176, "learning_rate": 9.573047896004505e-05, "loss": 0.0084, "step": 7359 }, { "epoch": 1.4245356037151704, "grad_norm": 0.08358373492956161, "learning_rate": 9.572931235005385e-05, "loss": 0.0075, "step": 7360 }, { "epoch": 1.4247291021671826, "grad_norm": 0.06841042637825012, "learning_rate": 9.57281455886411e-05, "loss": 0.0086, "step": 7361 }, { "epoch": 1.424922600619195, "grad_norm": 0.056884489953517914, "learning_rate": 9.572697867581113e-05, "loss": 0.0081, "step": 7362 }, { "epoch": 1.4251160990712075, "grad_norm": 0.11393686383962631, "learning_rate": 9.57258116115683e-05, "loss": 0.0097, "step": 7363 }, { "epoch": 1.4253095975232197, "grad_norm": 0.05235951021313667, "learning_rate": 9.572464439591694e-05, "loss": 0.0072, "step": 7364 }, { "epoch": 1.4255030959752322, "grad_norm": 0.12941183149814606, "learning_rate": 9.572347702886138e-05, "loss": 0.0088, "step": 7365 }, { "epoch": 1.4256965944272446, "grad_norm": 0.05537673085927963, "learning_rate": 9.572230951040595e-05, "loss": 0.0078, "step": 7366 }, { "epoch": 1.425890092879257, "grad_norm": 0.10058235377073288, "learning_rate": 9.572114184055504e-05, "loss": 0.0085, "step": 7367 }, { "epoch": 1.4260835913312693, "grad_norm": 0.0690535232424736, "learning_rate": 9.571997401931293e-05, "loss": 0.0087, "step": 7368 }, { "epoch": 1.4262770897832817, "grad_norm": 0.0977557897567749, "learning_rate": 9.571880604668401e-05, "loss": 0.0077, "step": 7369 }, { "epoch": 1.4264705882352942, "grad_norm": 0.06488523632287979, "learning_rate": 9.571763792267259e-05, "loss": 0.0081, "step": 7370 }, { "epoch": 1.4266640866873064, "grad_norm": 0.10192038118839264, "learning_rate": 9.571646964728302e-05, "loss": 0.0099, "step": 7371 }, { "epoch": 1.4268575851393188, "grad_norm": 0.07657424360513687, "learning_rate": 9.571530122051967e-05, "loss": 0.007, "step": 7372 }, { "epoch": 1.4270510835913313, "grad_norm": 0.07443151623010635, "learning_rate": 9.571413264238686e-05, "loss": 0.0078, "step": 7373 }, { "epoch": 1.4272445820433437, "grad_norm": 0.10267727077007294, "learning_rate": 9.571296391288891e-05, "loss": 0.009, "step": 7374 }, { "epoch": 1.4274380804953561, "grad_norm": 0.07048721611499786, "learning_rate": 9.571179503203022e-05, "loss": 0.0071, "step": 7375 }, { "epoch": 1.4276315789473684, "grad_norm": 0.07994959503412247, "learning_rate": 9.571062599981511e-05, "loss": 0.0072, "step": 7376 }, { "epoch": 1.4278250773993808, "grad_norm": 0.05660257861018181, "learning_rate": 9.57094568162479e-05, "loss": 0.0085, "step": 7377 }, { "epoch": 1.4280185758513932, "grad_norm": 0.07788361608982086, "learning_rate": 9.570828748133297e-05, "loss": 0.0067, "step": 7378 }, { "epoch": 1.4282120743034055, "grad_norm": 0.06677667051553726, "learning_rate": 9.570711799507467e-05, "loss": 0.0098, "step": 7379 }, { "epoch": 1.428405572755418, "grad_norm": 0.13140419125556946, "learning_rate": 9.570594835747731e-05, "loss": 0.0074, "step": 7380 }, { "epoch": 1.4285990712074303, "grad_norm": 0.042855020612478256, "learning_rate": 9.570477856854529e-05, "loss": 0.0066, "step": 7381 }, { "epoch": 1.4287925696594428, "grad_norm": 0.15970104932785034, "learning_rate": 9.570360862828292e-05, "loss": 0.0099, "step": 7382 }, { "epoch": 1.4289860681114552, "grad_norm": 0.07563386112451553, "learning_rate": 9.570243853669457e-05, "loss": 0.0083, "step": 7383 }, { "epoch": 1.4291795665634675, "grad_norm": 0.16632941365242004, "learning_rate": 9.570126829378458e-05, "loss": 0.0079, "step": 7384 }, { "epoch": 1.42937306501548, "grad_norm": 0.1535903662443161, "learning_rate": 9.570009789955731e-05, "loss": 0.0101, "step": 7385 }, { "epoch": 1.4295665634674923, "grad_norm": 0.13486772775650024, "learning_rate": 9.56989273540171e-05, "loss": 0.0098, "step": 7386 }, { "epoch": 1.4297600619195046, "grad_norm": 0.1754021942615509, "learning_rate": 9.569775665716829e-05, "loss": 0.0081, "step": 7387 }, { "epoch": 1.429953560371517, "grad_norm": 0.1106816902756691, "learning_rate": 9.569658580901526e-05, "loss": 0.0103, "step": 7388 }, { "epoch": 1.4301470588235294, "grad_norm": 0.19147878885269165, "learning_rate": 9.569541480956236e-05, "loss": 0.0087, "step": 7389 }, { "epoch": 1.4303405572755419, "grad_norm": 0.1007261723279953, "learning_rate": 9.569424365881395e-05, "loss": 0.0082, "step": 7390 }, { "epoch": 1.430534055727554, "grad_norm": 0.17721350491046906, "learning_rate": 9.569307235677433e-05, "loss": 0.008, "step": 7391 }, { "epoch": 1.4307275541795665, "grad_norm": 0.06459081172943115, "learning_rate": 9.569190090344791e-05, "loss": 0.008, "step": 7392 }, { "epoch": 1.430921052631579, "grad_norm": 0.17410245537757874, "learning_rate": 9.569072929883904e-05, "loss": 0.0093, "step": 7393 }, { "epoch": 1.4311145510835912, "grad_norm": 0.07656633853912354, "learning_rate": 9.568955754295206e-05, "loss": 0.0097, "step": 7394 }, { "epoch": 1.4313080495356036, "grad_norm": 0.13885429501533508, "learning_rate": 9.568838563579134e-05, "loss": 0.009, "step": 7395 }, { "epoch": 1.431501547987616, "grad_norm": 0.1074567586183548, "learning_rate": 9.568721357736121e-05, "loss": 0.009, "step": 7396 }, { "epoch": 1.4316950464396285, "grad_norm": 0.09877118468284607, "learning_rate": 9.568604136766605e-05, "loss": 0.0081, "step": 7397 }, { "epoch": 1.431888544891641, "grad_norm": 0.1080990582704544, "learning_rate": 9.568486900671022e-05, "loss": 0.0074, "step": 7398 }, { "epoch": 1.4320820433436532, "grad_norm": 0.06815218180418015, "learning_rate": 9.568369649449806e-05, "loss": 0.0088, "step": 7399 }, { "epoch": 1.4322755417956656, "grad_norm": 0.16352805495262146, "learning_rate": 9.568252383103394e-05, "loss": 0.0072, "step": 7400 }, { "epoch": 1.432469040247678, "grad_norm": 0.11085371673107147, "learning_rate": 9.568135101632224e-05, "loss": 0.0078, "step": 7401 }, { "epoch": 1.4326625386996903, "grad_norm": 0.11560142785310745, "learning_rate": 9.568017805036729e-05, "loss": 0.0073, "step": 7402 }, { "epoch": 1.4328560371517027, "grad_norm": 0.08054836094379425, "learning_rate": 9.567900493317346e-05, "loss": 0.0085, "step": 7403 }, { "epoch": 1.4330495356037152, "grad_norm": 0.08252736926078796, "learning_rate": 9.567783166474511e-05, "loss": 0.0085, "step": 7404 }, { "epoch": 1.4332430340557276, "grad_norm": 0.07530619949102402, "learning_rate": 9.567665824508662e-05, "loss": 0.0096, "step": 7405 }, { "epoch": 1.43343653250774, "grad_norm": 0.06411765515804291, "learning_rate": 9.567548467420234e-05, "loss": 0.0096, "step": 7406 }, { "epoch": 1.4336300309597523, "grad_norm": 0.08034607023000717, "learning_rate": 9.567431095209661e-05, "loss": 0.0072, "step": 7407 }, { "epoch": 1.4338235294117647, "grad_norm": 0.05309666320681572, "learning_rate": 9.567313707877382e-05, "loss": 0.0079, "step": 7408 }, { "epoch": 1.4340170278637772, "grad_norm": 0.06567498296499252, "learning_rate": 9.567196305423834e-05, "loss": 0.0071, "step": 7409 }, { "epoch": 1.4342105263157894, "grad_norm": 0.046067237854003906, "learning_rate": 9.56707888784945e-05, "loss": 0.0078, "step": 7410 }, { "epoch": 1.4344040247678018, "grad_norm": 0.06063979119062424, "learning_rate": 9.566961455154672e-05, "loss": 0.0083, "step": 7411 }, { "epoch": 1.4345975232198143, "grad_norm": 0.04909305274486542, "learning_rate": 9.566844007339932e-05, "loss": 0.0086, "step": 7412 }, { "epoch": 1.4347910216718267, "grad_norm": 0.07107856869697571, "learning_rate": 9.566726544405667e-05, "loss": 0.0093, "step": 7413 }, { "epoch": 1.4349845201238391, "grad_norm": 0.06845986098051071, "learning_rate": 9.566609066352316e-05, "loss": 0.0095, "step": 7414 }, { "epoch": 1.4351780185758514, "grad_norm": 0.06606729328632355, "learning_rate": 9.566491573180314e-05, "loss": 0.0085, "step": 7415 }, { "epoch": 1.4353715170278638, "grad_norm": 0.07386061549186707, "learning_rate": 9.566374064890098e-05, "loss": 0.0088, "step": 7416 }, { "epoch": 1.435565015479876, "grad_norm": 0.06588257849216461, "learning_rate": 9.566256541482108e-05, "loss": 0.0082, "step": 7417 }, { "epoch": 1.4357585139318885, "grad_norm": 0.07047124207019806, "learning_rate": 9.566139002956774e-05, "loss": 0.0074, "step": 7418 }, { "epoch": 1.435952012383901, "grad_norm": 0.06488123536109924, "learning_rate": 9.566021449314539e-05, "loss": 0.0096, "step": 7419 }, { "epoch": 1.4361455108359134, "grad_norm": 0.0433131568133831, "learning_rate": 9.565903880555839e-05, "loss": 0.0082, "step": 7420 }, { "epoch": 1.4363390092879258, "grad_norm": 0.11029074341058731, "learning_rate": 9.565786296681108e-05, "loss": 0.0083, "step": 7421 }, { "epoch": 1.436532507739938, "grad_norm": 0.0815020501613617, "learning_rate": 9.565668697690788e-05, "loss": 0.0077, "step": 7422 }, { "epoch": 1.4367260061919505, "grad_norm": 0.10571999847888947, "learning_rate": 9.565551083585312e-05, "loss": 0.0079, "step": 7423 }, { "epoch": 1.436919504643963, "grad_norm": 0.11381588131189346, "learning_rate": 9.56543345436512e-05, "loss": 0.008, "step": 7424 }, { "epoch": 1.4371130030959751, "grad_norm": 0.0802348181605339, "learning_rate": 9.565315810030647e-05, "loss": 0.009, "step": 7425 }, { "epoch": 1.4373065015479876, "grad_norm": 0.11203579604625702, "learning_rate": 9.565198150582331e-05, "loss": 0.0084, "step": 7426 }, { "epoch": 1.4375, "grad_norm": 0.04751542583107948, "learning_rate": 9.56508047602061e-05, "loss": 0.0088, "step": 7427 }, { "epoch": 1.4376934984520124, "grad_norm": 0.13584493100643158, "learning_rate": 9.564962786345924e-05, "loss": 0.0095, "step": 7428 }, { "epoch": 1.4378869969040249, "grad_norm": 0.07114499062299728, "learning_rate": 9.564845081558705e-05, "loss": 0.0095, "step": 7429 }, { "epoch": 1.438080495356037, "grad_norm": 0.11918710917234421, "learning_rate": 9.564727361659396e-05, "loss": 0.0081, "step": 7430 }, { "epoch": 1.4382739938080495, "grad_norm": 0.11055658012628555, "learning_rate": 9.56460962664843e-05, "loss": 0.0082, "step": 7431 }, { "epoch": 1.438467492260062, "grad_norm": 0.09735112637281418, "learning_rate": 9.564491876526249e-05, "loss": 0.0064, "step": 7432 }, { "epoch": 1.4386609907120742, "grad_norm": 0.1435536891222, "learning_rate": 9.564374111293288e-05, "loss": 0.0093, "step": 7433 }, { "epoch": 1.4388544891640866, "grad_norm": 0.10503001511096954, "learning_rate": 9.564256330949984e-05, "loss": 0.0106, "step": 7434 }, { "epoch": 1.439047987616099, "grad_norm": 0.10986936837434769, "learning_rate": 9.564138535496779e-05, "loss": 0.0076, "step": 7435 }, { "epoch": 1.4392414860681115, "grad_norm": 0.07592315971851349, "learning_rate": 9.564020724934106e-05, "loss": 0.0071, "step": 7436 }, { "epoch": 1.439434984520124, "grad_norm": 0.08970765769481659, "learning_rate": 9.563902899262407e-05, "loss": 0.0095, "step": 7437 }, { "epoch": 1.4396284829721362, "grad_norm": 0.07816857099533081, "learning_rate": 9.563785058482117e-05, "loss": 0.0082, "step": 7438 }, { "epoch": 1.4398219814241486, "grad_norm": 0.04072193428874016, "learning_rate": 9.563667202593677e-05, "loss": 0.0069, "step": 7439 }, { "epoch": 1.4400154798761609, "grad_norm": 0.08293775469064713, "learning_rate": 9.563549331597523e-05, "loss": 0.0069, "step": 7440 }, { "epoch": 1.4402089783281733, "grad_norm": 0.03733367472887039, "learning_rate": 9.563431445494094e-05, "loss": 0.0086, "step": 7441 }, { "epoch": 1.4404024767801857, "grad_norm": 0.08004564791917801, "learning_rate": 9.563313544283827e-05, "loss": 0.0072, "step": 7442 }, { "epoch": 1.4405959752321982, "grad_norm": 0.06167396157979965, "learning_rate": 9.563195627967163e-05, "loss": 0.0068, "step": 7443 }, { "epoch": 1.4407894736842106, "grad_norm": 0.051651716232299805, "learning_rate": 9.56307769654454e-05, "loss": 0.008, "step": 7444 }, { "epoch": 1.4409829721362228, "grad_norm": 0.08897875994443893, "learning_rate": 9.562959750016393e-05, "loss": 0.0095, "step": 7445 }, { "epoch": 1.4411764705882353, "grad_norm": 0.03358250856399536, "learning_rate": 9.562841788383162e-05, "loss": 0.0106, "step": 7446 }, { "epoch": 1.4413699690402477, "grad_norm": 0.08383014798164368, "learning_rate": 9.562723811645289e-05, "loss": 0.0081, "step": 7447 }, { "epoch": 1.44156346749226, "grad_norm": 0.10472244024276733, "learning_rate": 9.562605819803208e-05, "loss": 0.008, "step": 7448 }, { "epoch": 1.4417569659442724, "grad_norm": 0.08663734048604965, "learning_rate": 9.562487812857362e-05, "loss": 0.0062, "step": 7449 }, { "epoch": 1.4419504643962848, "grad_norm": 0.16037237644195557, "learning_rate": 9.562369790808185e-05, "loss": 0.0093, "step": 7450 }, { "epoch": 1.4421439628482973, "grad_norm": 0.07752332836389542, "learning_rate": 9.562251753656117e-05, "loss": 0.0078, "step": 7451 }, { "epoch": 1.4423374613003097, "grad_norm": 0.14697898924350739, "learning_rate": 9.562133701401602e-05, "loss": 0.0089, "step": 7452 }, { "epoch": 1.442530959752322, "grad_norm": 0.051973119378089905, "learning_rate": 9.562015634045071e-05, "loss": 0.0091, "step": 7453 }, { "epoch": 1.4427244582043344, "grad_norm": 0.130117729306221, "learning_rate": 9.561897551586968e-05, "loss": 0.0078, "step": 7454 }, { "epoch": 1.4429179566563468, "grad_norm": 0.070261150598526, "learning_rate": 9.561779454027731e-05, "loss": 0.0099, "step": 7455 }, { "epoch": 1.443111455108359, "grad_norm": 0.11581189930438995, "learning_rate": 9.5616613413678e-05, "loss": 0.0094, "step": 7456 }, { "epoch": 1.4433049535603715, "grad_norm": 0.06750825047492981, "learning_rate": 9.561543213607612e-05, "loss": 0.0064, "step": 7457 }, { "epoch": 1.443498452012384, "grad_norm": 0.043619923293590546, "learning_rate": 9.561425070747607e-05, "loss": 0.0075, "step": 7458 }, { "epoch": 1.4436919504643964, "grad_norm": 0.11139373481273651, "learning_rate": 9.561306912788223e-05, "loss": 0.0088, "step": 7459 }, { "epoch": 1.4438854489164088, "grad_norm": 0.10590405017137527, "learning_rate": 9.561188739729901e-05, "loss": 0.009, "step": 7460 }, { "epoch": 1.444078947368421, "grad_norm": 0.15003418922424316, "learning_rate": 9.561070551573081e-05, "loss": 0.0098, "step": 7461 }, { "epoch": 1.4442724458204335, "grad_norm": 0.08992712944746017, "learning_rate": 9.560952348318201e-05, "loss": 0.0089, "step": 7462 }, { "epoch": 1.444465944272446, "grad_norm": 0.10159659385681152, "learning_rate": 9.560834129965702e-05, "loss": 0.0088, "step": 7463 }, { "epoch": 1.4446594427244581, "grad_norm": 0.07563818246126175, "learning_rate": 9.560715896516022e-05, "loss": 0.0081, "step": 7464 }, { "epoch": 1.4448529411764706, "grad_norm": 0.06653103232383728, "learning_rate": 9.5605976479696e-05, "loss": 0.0089, "step": 7465 }, { "epoch": 1.445046439628483, "grad_norm": 0.050239935517311096, "learning_rate": 9.560479384326876e-05, "loss": 0.0093, "step": 7466 }, { "epoch": 1.4452399380804954, "grad_norm": 0.07270937412977219, "learning_rate": 9.560361105588291e-05, "loss": 0.0086, "step": 7467 }, { "epoch": 1.4454334365325077, "grad_norm": 0.05329446122050285, "learning_rate": 9.560242811754284e-05, "loss": 0.0081, "step": 7468 }, { "epoch": 1.44562693498452, "grad_norm": 0.07491686940193176, "learning_rate": 9.560124502825295e-05, "loss": 0.0081, "step": 7469 }, { "epoch": 1.4458204334365325, "grad_norm": 0.08132092654705048, "learning_rate": 9.560006178801763e-05, "loss": 0.0095, "step": 7470 }, { "epoch": 1.4460139318885448, "grad_norm": 0.08649294823408127, "learning_rate": 9.559887839684128e-05, "loss": 0.0069, "step": 7471 }, { "epoch": 1.4462074303405572, "grad_norm": 0.09192162752151489, "learning_rate": 9.559769485472832e-05, "loss": 0.0099, "step": 7472 }, { "epoch": 1.4464009287925697, "grad_norm": 0.10723159462213516, "learning_rate": 9.559651116168313e-05, "loss": 0.0098, "step": 7473 }, { "epoch": 1.446594427244582, "grad_norm": 0.05371222272515297, "learning_rate": 9.559532731771011e-05, "loss": 0.0095, "step": 7474 }, { "epoch": 1.4467879256965945, "grad_norm": 0.09507042169570923, "learning_rate": 9.559414332281365e-05, "loss": 0.0088, "step": 7475 }, { "epoch": 1.4469814241486068, "grad_norm": 0.050926025956869125, "learning_rate": 9.559295917699819e-05, "loss": 0.0093, "step": 7476 }, { "epoch": 1.4471749226006192, "grad_norm": 0.11197390407323837, "learning_rate": 9.559177488026809e-05, "loss": 0.0088, "step": 7477 }, { "epoch": 1.4473684210526316, "grad_norm": 0.06565603613853455, "learning_rate": 9.559059043262781e-05, "loss": 0.0071, "step": 7478 }, { "epoch": 1.4475619195046439, "grad_norm": 0.11562099307775497, "learning_rate": 9.558940583408167e-05, "loss": 0.0084, "step": 7479 }, { "epoch": 1.4477554179566563, "grad_norm": 0.059070296585559845, "learning_rate": 9.558822108463414e-05, "loss": 0.0075, "step": 7480 }, { "epoch": 1.4479489164086687, "grad_norm": 0.16171956062316895, "learning_rate": 9.558703618428962e-05, "loss": 0.0087, "step": 7481 }, { "epoch": 1.4481424148606812, "grad_norm": 0.09828023612499237, "learning_rate": 9.558585113305248e-05, "loss": 0.009, "step": 7482 }, { "epoch": 1.4483359133126936, "grad_norm": 0.1412808746099472, "learning_rate": 9.558466593092715e-05, "loss": 0.0081, "step": 7483 }, { "epoch": 1.4485294117647058, "grad_norm": 0.1273408979177475, "learning_rate": 9.558348057791805e-05, "loss": 0.008, "step": 7484 }, { "epoch": 1.4487229102167183, "grad_norm": 0.10771819204092026, "learning_rate": 9.558229507402955e-05, "loss": 0.0078, "step": 7485 }, { "epoch": 1.4489164086687307, "grad_norm": 0.09577716141939163, "learning_rate": 9.558110941926608e-05, "loss": 0.009, "step": 7486 }, { "epoch": 1.449109907120743, "grad_norm": 0.14017082750797272, "learning_rate": 9.557992361363204e-05, "loss": 0.0084, "step": 7487 }, { "epoch": 1.4493034055727554, "grad_norm": 0.10360392928123474, "learning_rate": 9.557873765713184e-05, "loss": 0.0099, "step": 7488 }, { "epoch": 1.4494969040247678, "grad_norm": 0.12781888246536255, "learning_rate": 9.557755154976992e-05, "loss": 0.0087, "step": 7489 }, { "epoch": 1.4496904024767803, "grad_norm": 0.042084481567144394, "learning_rate": 9.557636529155065e-05, "loss": 0.0078, "step": 7490 }, { "epoch": 1.4498839009287925, "grad_norm": 0.11095242947340012, "learning_rate": 9.557517888247844e-05, "loss": 0.0095, "step": 7491 }, { "epoch": 1.450077399380805, "grad_norm": 0.0740802139043808, "learning_rate": 9.557399232255772e-05, "loss": 0.0087, "step": 7492 }, { "epoch": 1.4502708978328174, "grad_norm": 0.048869069665670395, "learning_rate": 9.557280561179289e-05, "loss": 0.0093, "step": 7493 }, { "epoch": 1.4504643962848296, "grad_norm": 0.08322704583406448, "learning_rate": 9.557161875018837e-05, "loss": 0.009, "step": 7494 }, { "epoch": 1.450657894736842, "grad_norm": 0.06041518971323967, "learning_rate": 9.557043173774856e-05, "loss": 0.0096, "step": 7495 }, { "epoch": 1.4508513931888545, "grad_norm": 0.08290144056081772, "learning_rate": 9.556924457447789e-05, "loss": 0.0083, "step": 7496 }, { "epoch": 1.451044891640867, "grad_norm": 0.04850061610341072, "learning_rate": 9.556805726038077e-05, "loss": 0.008, "step": 7497 }, { "epoch": 1.4512383900928794, "grad_norm": 0.09261012822389603, "learning_rate": 9.556686979546161e-05, "loss": 0.0096, "step": 7498 }, { "epoch": 1.4514318885448916, "grad_norm": 0.05435584485530853, "learning_rate": 9.556568217972482e-05, "loss": 0.0086, "step": 7499 }, { "epoch": 1.451625386996904, "grad_norm": 0.11770126223564148, "learning_rate": 9.556449441317481e-05, "loss": 0.0107, "step": 7500 }, { "epoch": 1.4518188854489165, "grad_norm": 0.051220621913671494, "learning_rate": 9.556330649581601e-05, "loss": 0.0079, "step": 7501 }, { "epoch": 1.4520123839009287, "grad_norm": 0.12131380289793015, "learning_rate": 9.556211842765283e-05, "loss": 0.0099, "step": 7502 }, { "epoch": 1.4522058823529411, "grad_norm": 0.06413453817367554, "learning_rate": 9.55609302086897e-05, "loss": 0.0079, "step": 7503 }, { "epoch": 1.4523993808049536, "grad_norm": 0.10263293236494064, "learning_rate": 9.555974183893102e-05, "loss": 0.0091, "step": 7504 }, { "epoch": 1.452592879256966, "grad_norm": 0.07499992847442627, "learning_rate": 9.555855331838121e-05, "loss": 0.0065, "step": 7505 }, { "epoch": 1.4527863777089784, "grad_norm": 0.0854664221405983, "learning_rate": 9.555736464704469e-05, "loss": 0.0097, "step": 7506 }, { "epoch": 1.4529798761609907, "grad_norm": 0.09541017562150955, "learning_rate": 9.555617582492589e-05, "loss": 0.0093, "step": 7507 }, { "epoch": 1.453173374613003, "grad_norm": 0.06775235384702682, "learning_rate": 9.55549868520292e-05, "loss": 0.0085, "step": 7508 }, { "epoch": 1.4533668730650156, "grad_norm": 0.07734058052301407, "learning_rate": 9.555379772835909e-05, "loss": 0.0107, "step": 7509 }, { "epoch": 1.4535603715170278, "grad_norm": 0.12448814511299133, "learning_rate": 9.555260845391995e-05, "loss": 0.0091, "step": 7510 }, { "epoch": 1.4537538699690402, "grad_norm": 0.07348643243312836, "learning_rate": 9.55514190287162e-05, "loss": 0.0077, "step": 7511 }, { "epoch": 1.4539473684210527, "grad_norm": 0.16529801487922668, "learning_rate": 9.555022945275225e-05, "loss": 0.0083, "step": 7512 }, { "epoch": 1.454140866873065, "grad_norm": 0.08995214104652405, "learning_rate": 9.554903972603257e-05, "loss": 0.0071, "step": 7513 }, { "epoch": 1.4543343653250773, "grad_norm": 0.07143361866474152, "learning_rate": 9.554784984856152e-05, "loss": 0.0085, "step": 7514 }, { "epoch": 1.4545278637770898, "grad_norm": 0.0811995342373848, "learning_rate": 9.554665982034358e-05, "loss": 0.009, "step": 7515 }, { "epoch": 1.4547213622291022, "grad_norm": 0.041896283626556396, "learning_rate": 9.554546964138314e-05, "loss": 0.0079, "step": 7516 }, { "epoch": 1.4549148606811144, "grad_norm": 0.06820490956306458, "learning_rate": 9.554427931168463e-05, "loss": 0.0083, "step": 7517 }, { "epoch": 1.4551083591331269, "grad_norm": 0.0745282992720604, "learning_rate": 9.554308883125248e-05, "loss": 0.0073, "step": 7518 }, { "epoch": 1.4553018575851393, "grad_norm": 0.05463526397943497, "learning_rate": 9.554189820009111e-05, "loss": 0.0081, "step": 7519 }, { "epoch": 1.4554953560371517, "grad_norm": 0.05826135352253914, "learning_rate": 9.554070741820497e-05, "loss": 0.0073, "step": 7520 }, { "epoch": 1.4556888544891642, "grad_norm": 0.06701968610286713, "learning_rate": 9.553951648559845e-05, "loss": 0.0089, "step": 7521 }, { "epoch": 1.4558823529411764, "grad_norm": 0.05255195125937462, "learning_rate": 9.553832540227601e-05, "loss": 0.0091, "step": 7522 }, { "epoch": 1.4560758513931888, "grad_norm": 0.06964699923992157, "learning_rate": 9.553713416824207e-05, "loss": 0.0097, "step": 7523 }, { "epoch": 1.4562693498452013, "grad_norm": 0.08443012833595276, "learning_rate": 9.553594278350105e-05, "loss": 0.0084, "step": 7524 }, { "epoch": 1.4564628482972135, "grad_norm": 0.09336927533149719, "learning_rate": 9.553475124805737e-05, "loss": 0.0082, "step": 7525 }, { "epoch": 1.456656346749226, "grad_norm": 0.05060577392578125, "learning_rate": 9.553355956191547e-05, "loss": 0.0086, "step": 7526 }, { "epoch": 1.4568498452012384, "grad_norm": 0.12291111797094345, "learning_rate": 9.553236772507982e-05, "loss": 0.0077, "step": 7527 }, { "epoch": 1.4570433436532508, "grad_norm": 0.04891195893287659, "learning_rate": 9.553117573755478e-05, "loss": 0.0083, "step": 7528 }, { "epoch": 1.4572368421052633, "grad_norm": 0.0667538121342659, "learning_rate": 9.552998359934482e-05, "loss": 0.0089, "step": 7529 }, { "epoch": 1.4574303405572755, "grad_norm": 0.054652728140354156, "learning_rate": 9.552879131045437e-05, "loss": 0.0093, "step": 7530 }, { "epoch": 1.457623839009288, "grad_norm": 0.04093044996261597, "learning_rate": 9.552759887088788e-05, "loss": 0.0095, "step": 7531 }, { "epoch": 1.4578173374613004, "grad_norm": 0.17496755719184875, "learning_rate": 9.552640628064974e-05, "loss": 0.0111, "step": 7532 }, { "epoch": 1.4580108359133126, "grad_norm": 0.11486298590898514, "learning_rate": 9.55252135397444e-05, "loss": 0.008, "step": 7533 }, { "epoch": 1.458204334365325, "grad_norm": 0.08253036439418793, "learning_rate": 9.552402064817633e-05, "loss": 0.0077, "step": 7534 }, { "epoch": 1.4583978328173375, "grad_norm": 0.08617318421602249, "learning_rate": 9.552282760594991e-05, "loss": 0.0067, "step": 7535 }, { "epoch": 1.45859133126935, "grad_norm": 0.08700576424598694, "learning_rate": 9.552163441306964e-05, "loss": 0.0069, "step": 7536 }, { "epoch": 1.4587848297213624, "grad_norm": 0.126901313662529, "learning_rate": 9.552044106953988e-05, "loss": 0.007, "step": 7537 }, { "epoch": 1.4589783281733746, "grad_norm": 0.05132006108760834, "learning_rate": 9.551924757536511e-05, "loss": 0.0086, "step": 7538 }, { "epoch": 1.459171826625387, "grad_norm": 0.11351781338453293, "learning_rate": 9.55180539305498e-05, "loss": 0.0091, "step": 7539 }, { "epoch": 1.4593653250773992, "grad_norm": 0.10704211890697479, "learning_rate": 9.551686013509829e-05, "loss": 0.0088, "step": 7540 }, { "epoch": 1.4595588235294117, "grad_norm": 0.07493321597576141, "learning_rate": 9.551566618901511e-05, "loss": 0.0075, "step": 7541 }, { "epoch": 1.4597523219814241, "grad_norm": 0.1463630199432373, "learning_rate": 9.551447209230467e-05, "loss": 0.0084, "step": 7542 }, { "epoch": 1.4599458204334366, "grad_norm": 0.05103222653269768, "learning_rate": 9.551327784497139e-05, "loss": 0.0081, "step": 7543 }, { "epoch": 1.460139318885449, "grad_norm": 0.12154154479503632, "learning_rate": 9.551208344701972e-05, "loss": 0.0088, "step": 7544 }, { "epoch": 1.4603328173374612, "grad_norm": 0.1559150218963623, "learning_rate": 9.551088889845414e-05, "loss": 0.0095, "step": 7545 }, { "epoch": 1.4605263157894737, "grad_norm": 0.09825221449136734, "learning_rate": 9.550969419927904e-05, "loss": 0.0087, "step": 7546 }, { "epoch": 1.4607198142414861, "grad_norm": 0.18560746312141418, "learning_rate": 9.550849934949886e-05, "loss": 0.0086, "step": 7547 }, { "epoch": 1.4609133126934983, "grad_norm": 0.037995755672454834, "learning_rate": 9.55073043491181e-05, "loss": 0.0073, "step": 7548 }, { "epoch": 1.4611068111455108, "grad_norm": 0.1679142266511917, "learning_rate": 9.550610919814112e-05, "loss": 0.0073, "step": 7549 }, { "epoch": 1.4613003095975232, "grad_norm": 0.14440390467643738, "learning_rate": 9.550491389657244e-05, "loss": 0.0106, "step": 7550 }, { "epoch": 1.4614938080495357, "grad_norm": 0.11057236045598984, "learning_rate": 9.550371844441645e-05, "loss": 0.0096, "step": 7551 }, { "epoch": 1.461687306501548, "grad_norm": 0.178447887301445, "learning_rate": 9.550252284167762e-05, "loss": 0.0083, "step": 7552 }, { "epoch": 1.4618808049535603, "grad_norm": 0.07206639647483826, "learning_rate": 9.55013270883604e-05, "loss": 0.0077, "step": 7553 }, { "epoch": 1.4620743034055728, "grad_norm": 0.16355547308921814, "learning_rate": 9.550013118446923e-05, "loss": 0.0071, "step": 7554 }, { "epoch": 1.4622678018575852, "grad_norm": 0.15551842749118805, "learning_rate": 9.549893513000854e-05, "loss": 0.0081, "step": 7555 }, { "epoch": 1.4624613003095974, "grad_norm": 0.09677720069885254, "learning_rate": 9.549773892498278e-05, "loss": 0.0086, "step": 7556 }, { "epoch": 1.4626547987616099, "grad_norm": 0.18703147768974304, "learning_rate": 9.549654256939643e-05, "loss": 0.0083, "step": 7557 }, { "epoch": 1.4628482972136223, "grad_norm": 0.08214589953422546, "learning_rate": 9.549534606325389e-05, "loss": 0.0068, "step": 7558 }, { "epoch": 1.4630417956656347, "grad_norm": 0.10730655491352081, "learning_rate": 9.549414940655964e-05, "loss": 0.0094, "step": 7559 }, { "epoch": 1.4632352941176472, "grad_norm": 0.17563098669052124, "learning_rate": 9.549295259931813e-05, "loss": 0.009, "step": 7560 }, { "epoch": 1.4634287925696594, "grad_norm": 0.050211552530527115, "learning_rate": 9.549175564153379e-05, "loss": 0.0082, "step": 7561 }, { "epoch": 1.4636222910216719, "grad_norm": 0.14663736522197723, "learning_rate": 9.549055853321108e-05, "loss": 0.0085, "step": 7562 }, { "epoch": 1.4638157894736843, "grad_norm": 0.08973600715398788, "learning_rate": 9.548936127435443e-05, "loss": 0.0081, "step": 7563 }, { "epoch": 1.4640092879256965, "grad_norm": 0.08465572446584702, "learning_rate": 9.548816386496835e-05, "loss": 0.0076, "step": 7564 }, { "epoch": 1.464202786377709, "grad_norm": 0.10822953283786774, "learning_rate": 9.548696630505722e-05, "loss": 0.0077, "step": 7565 }, { "epoch": 1.4643962848297214, "grad_norm": 0.06259685754776001, "learning_rate": 9.548576859462555e-05, "loss": 0.0081, "step": 7566 }, { "epoch": 1.4645897832817338, "grad_norm": 0.08489571511745453, "learning_rate": 9.548457073367776e-05, "loss": 0.0097, "step": 7567 }, { "epoch": 1.464783281733746, "grad_norm": 0.06374838948249817, "learning_rate": 9.548337272221832e-05, "loss": 0.0077, "step": 7568 }, { "epoch": 1.4649767801857585, "grad_norm": 0.07712788879871368, "learning_rate": 9.548217456025166e-05, "loss": 0.0091, "step": 7569 }, { "epoch": 1.465170278637771, "grad_norm": 0.05119612067937851, "learning_rate": 9.548097624778227e-05, "loss": 0.0076, "step": 7570 }, { "epoch": 1.4653637770897832, "grad_norm": 0.054094988852739334, "learning_rate": 9.547977778481457e-05, "loss": 0.0083, "step": 7571 }, { "epoch": 1.4655572755417956, "grad_norm": 0.06382311135530472, "learning_rate": 9.547857917135302e-05, "loss": 0.0073, "step": 7572 }, { "epoch": 1.465750773993808, "grad_norm": 0.050648465752601624, "learning_rate": 9.54773804074021e-05, "loss": 0.0091, "step": 7573 }, { "epoch": 1.4659442724458205, "grad_norm": 0.05246773734688759, "learning_rate": 9.547618149296625e-05, "loss": 0.0071, "step": 7574 }, { "epoch": 1.466137770897833, "grad_norm": 0.04636145383119583, "learning_rate": 9.547498242804994e-05, "loss": 0.0068, "step": 7575 }, { "epoch": 1.4663312693498451, "grad_norm": 0.09027273207902908, "learning_rate": 9.547378321265761e-05, "loss": 0.0076, "step": 7576 }, { "epoch": 1.4665247678018576, "grad_norm": 0.034364718943834305, "learning_rate": 9.547258384679373e-05, "loss": 0.0082, "step": 7577 }, { "epoch": 1.46671826625387, "grad_norm": 0.07078136503696442, "learning_rate": 9.547138433046275e-05, "loss": 0.0076, "step": 7578 }, { "epoch": 1.4669117647058822, "grad_norm": 0.06866902858018875, "learning_rate": 9.547018466366914e-05, "loss": 0.0078, "step": 7579 }, { "epoch": 1.4671052631578947, "grad_norm": 0.06018812581896782, "learning_rate": 9.546898484641736e-05, "loss": 0.0074, "step": 7580 }, { "epoch": 1.4672987616099071, "grad_norm": 0.046805720776319504, "learning_rate": 9.546778487871187e-05, "loss": 0.0094, "step": 7581 }, { "epoch": 1.4674922600619196, "grad_norm": 0.05048032104969025, "learning_rate": 9.546658476055711e-05, "loss": 0.0091, "step": 7582 }, { "epoch": 1.467685758513932, "grad_norm": 0.038993410766124725, "learning_rate": 9.546538449195758e-05, "loss": 0.0085, "step": 7583 }, { "epoch": 1.4678792569659442, "grad_norm": 0.04797722399234772, "learning_rate": 9.546418407291772e-05, "loss": 0.0085, "step": 7584 }, { "epoch": 1.4680727554179567, "grad_norm": 0.05178879201412201, "learning_rate": 9.546298350344198e-05, "loss": 0.0082, "step": 7585 }, { "epoch": 1.4682662538699691, "grad_norm": 0.06968439370393753, "learning_rate": 9.546178278353484e-05, "loss": 0.0075, "step": 7586 }, { "epoch": 1.4684597523219813, "grad_norm": 0.04030628502368927, "learning_rate": 9.546058191320076e-05, "loss": 0.0071, "step": 7587 }, { "epoch": 1.4686532507739938, "grad_norm": 0.08022435754537582, "learning_rate": 9.545938089244421e-05, "loss": 0.0085, "step": 7588 }, { "epoch": 1.4688467492260062, "grad_norm": 0.03224727511405945, "learning_rate": 9.545817972126965e-05, "loss": 0.0078, "step": 7589 }, { "epoch": 1.4690402476780187, "grad_norm": 0.07724949717521667, "learning_rate": 9.545697839968155e-05, "loss": 0.0068, "step": 7590 }, { "epoch": 1.4692337461300309, "grad_norm": 0.057108476758003235, "learning_rate": 9.545577692768436e-05, "loss": 0.0094, "step": 7591 }, { "epoch": 1.4694272445820433, "grad_norm": 0.07346215099096298, "learning_rate": 9.545457530528256e-05, "loss": 0.0093, "step": 7592 }, { "epoch": 1.4696207430340558, "grad_norm": 0.0768633559346199, "learning_rate": 9.545337353248064e-05, "loss": 0.0077, "step": 7593 }, { "epoch": 1.469814241486068, "grad_norm": 0.047820914536714554, "learning_rate": 9.545217160928304e-05, "loss": 0.0083, "step": 7594 }, { "epoch": 1.4700077399380804, "grad_norm": 0.06881329417228699, "learning_rate": 9.545096953569422e-05, "loss": 0.009, "step": 7595 }, { "epoch": 1.4702012383900929, "grad_norm": 0.04642875865101814, "learning_rate": 9.544976731171868e-05, "loss": 0.0083, "step": 7596 }, { "epoch": 1.4703947368421053, "grad_norm": 0.05477463826537132, "learning_rate": 9.544856493736086e-05, "loss": 0.0075, "step": 7597 }, { "epoch": 1.4705882352941178, "grad_norm": 0.033391695469617844, "learning_rate": 9.544736241262524e-05, "loss": 0.0077, "step": 7598 }, { "epoch": 1.47078173374613, "grad_norm": 0.043718114495277405, "learning_rate": 9.54461597375163e-05, "loss": 0.0079, "step": 7599 }, { "epoch": 1.4709752321981424, "grad_norm": 0.049555789679288864, "learning_rate": 9.54449569120385e-05, "loss": 0.0078, "step": 7600 }, { "epoch": 1.4711687306501549, "grad_norm": 0.05361936613917351, "learning_rate": 9.544375393619633e-05, "loss": 0.0074, "step": 7601 }, { "epoch": 1.471362229102167, "grad_norm": 0.086073137819767, "learning_rate": 9.544255080999423e-05, "loss": 0.0076, "step": 7602 }, { "epoch": 1.4715557275541795, "grad_norm": 0.07175371050834656, "learning_rate": 9.544134753343669e-05, "loss": 0.0083, "step": 7603 }, { "epoch": 1.471749226006192, "grad_norm": 0.06031306833028793, "learning_rate": 9.544014410652819e-05, "loss": 0.0096, "step": 7604 }, { "epoch": 1.4719427244582044, "grad_norm": 0.04730663821101189, "learning_rate": 9.54389405292732e-05, "loss": 0.0083, "step": 7605 }, { "epoch": 1.4721362229102168, "grad_norm": 0.059138018637895584, "learning_rate": 9.54377368016762e-05, "loss": 0.0071, "step": 7606 }, { "epoch": 1.472329721362229, "grad_norm": 0.054376427084207535, "learning_rate": 9.543653292374166e-05, "loss": 0.0091, "step": 7607 }, { "epoch": 1.4725232198142415, "grad_norm": 0.06184082105755806, "learning_rate": 9.543532889547406e-05, "loss": 0.0075, "step": 7608 }, { "epoch": 1.472716718266254, "grad_norm": 0.10829722136259079, "learning_rate": 9.543412471687784e-05, "loss": 0.0095, "step": 7609 }, { "epoch": 1.4729102167182662, "grad_norm": 0.042729318141937256, "learning_rate": 9.543292038795753e-05, "loss": 0.0083, "step": 7610 }, { "epoch": 1.4731037151702786, "grad_norm": 0.11600162088871002, "learning_rate": 9.543171590871758e-05, "loss": 0.0084, "step": 7611 }, { "epoch": 1.473297213622291, "grad_norm": 0.06159479543566704, "learning_rate": 9.543051127916248e-05, "loss": 0.0079, "step": 7612 }, { "epoch": 1.4734907120743035, "grad_norm": 0.12561403214931488, "learning_rate": 9.54293064992967e-05, "loss": 0.0087, "step": 7613 }, { "epoch": 1.4736842105263157, "grad_norm": 0.10046006739139557, "learning_rate": 9.54281015691247e-05, "loss": 0.0087, "step": 7614 }, { "epoch": 1.4738777089783281, "grad_norm": 0.12072626501321793, "learning_rate": 9.5426896488651e-05, "loss": 0.0084, "step": 7615 }, { "epoch": 1.4740712074303406, "grad_norm": 0.12177567183971405, "learning_rate": 9.542569125788007e-05, "loss": 0.0073, "step": 7616 }, { "epoch": 1.4742647058823528, "grad_norm": 0.10272322595119476, "learning_rate": 9.542448587681636e-05, "loss": 0.0075, "step": 7617 }, { "epoch": 1.4744582043343653, "grad_norm": 0.13324475288391113, "learning_rate": 9.542328034546438e-05, "loss": 0.0068, "step": 7618 }, { "epoch": 1.4746517027863777, "grad_norm": 0.13606460392475128, "learning_rate": 9.54220746638286e-05, "loss": 0.0087, "step": 7619 }, { "epoch": 1.4748452012383901, "grad_norm": 0.1593523472547531, "learning_rate": 9.542086883191351e-05, "loss": 0.0106, "step": 7620 }, { "epoch": 1.4750386996904026, "grad_norm": 0.11652909964323044, "learning_rate": 9.541966284972358e-05, "loss": 0.0082, "step": 7621 }, { "epoch": 1.4752321981424148, "grad_norm": 0.14093737304210663, "learning_rate": 9.541845671726332e-05, "loss": 0.0097, "step": 7622 }, { "epoch": 1.4754256965944272, "grad_norm": 0.09280698001384735, "learning_rate": 9.54172504345372e-05, "loss": 0.0083, "step": 7623 }, { "epoch": 1.4756191950464397, "grad_norm": 0.10492711514234543, "learning_rate": 9.54160440015497e-05, "loss": 0.0084, "step": 7624 }, { "epoch": 1.475812693498452, "grad_norm": 0.05724724754691124, "learning_rate": 9.541483741830528e-05, "loss": 0.0098, "step": 7625 }, { "epoch": 1.4760061919504643, "grad_norm": 0.07586781680583954, "learning_rate": 9.541363068480847e-05, "loss": 0.0084, "step": 7626 }, { "epoch": 1.4761996904024768, "grad_norm": 0.038914065808057785, "learning_rate": 9.541242380106374e-05, "loss": 0.0085, "step": 7627 }, { "epoch": 1.4763931888544892, "grad_norm": 0.07652439922094345, "learning_rate": 9.541121676707559e-05, "loss": 0.0086, "step": 7628 }, { "epoch": 1.4765866873065017, "grad_norm": 0.043237462639808655, "learning_rate": 9.541000958284848e-05, "loss": 0.0105, "step": 7629 }, { "epoch": 1.4767801857585139, "grad_norm": 0.08625654131174088, "learning_rate": 9.540880224838692e-05, "loss": 0.0081, "step": 7630 }, { "epoch": 1.4769736842105263, "grad_norm": 0.03577258065342903, "learning_rate": 9.540759476369537e-05, "loss": 0.0089, "step": 7631 }, { "epoch": 1.4771671826625388, "grad_norm": 0.08111244440078735, "learning_rate": 9.540638712877836e-05, "loss": 0.0092, "step": 7632 }, { "epoch": 1.477360681114551, "grad_norm": 0.07045236974954605, "learning_rate": 9.540517934364036e-05, "loss": 0.0096, "step": 7633 }, { "epoch": 1.4775541795665634, "grad_norm": 0.05762193724513054, "learning_rate": 9.540397140828584e-05, "loss": 0.0064, "step": 7634 }, { "epoch": 1.4777476780185759, "grad_norm": 0.09201257675886154, "learning_rate": 9.540276332271933e-05, "loss": 0.0084, "step": 7635 }, { "epoch": 1.4779411764705883, "grad_norm": 0.047803543508052826, "learning_rate": 9.540155508694528e-05, "loss": 0.0075, "step": 7636 }, { "epoch": 1.4781346749226008, "grad_norm": 0.08683755993843079, "learning_rate": 9.540034670096823e-05, "loss": 0.0085, "step": 7637 }, { "epoch": 1.478328173374613, "grad_norm": 0.07668758928775787, "learning_rate": 9.539913816479262e-05, "loss": 0.0099, "step": 7638 }, { "epoch": 1.4785216718266254, "grad_norm": 0.05710829794406891, "learning_rate": 9.539792947842299e-05, "loss": 0.0079, "step": 7639 }, { "epoch": 1.4787151702786376, "grad_norm": 0.05744743347167969, "learning_rate": 9.53967206418638e-05, "loss": 0.0078, "step": 7640 }, { "epoch": 1.47890866873065, "grad_norm": 0.052305933088064194, "learning_rate": 9.539551165511955e-05, "loss": 0.0083, "step": 7641 }, { "epoch": 1.4791021671826625, "grad_norm": 0.051184773445129395, "learning_rate": 9.539430251819475e-05, "loss": 0.0082, "step": 7642 }, { "epoch": 1.479295665634675, "grad_norm": 0.06831096112728119, "learning_rate": 9.53930932310939e-05, "loss": 0.0065, "step": 7643 }, { "epoch": 1.4794891640866874, "grad_norm": 0.05168037489056587, "learning_rate": 9.539188379382147e-05, "loss": 0.0084, "step": 7644 }, { "epoch": 1.4796826625386996, "grad_norm": 0.0769650936126709, "learning_rate": 9.539067420638196e-05, "loss": 0.0101, "step": 7645 }, { "epoch": 1.479876160990712, "grad_norm": 0.08675100654363632, "learning_rate": 9.538946446877989e-05, "loss": 0.0078, "step": 7646 }, { "epoch": 1.4800696594427245, "grad_norm": 0.09342297166585922, "learning_rate": 9.538825458101975e-05, "loss": 0.0085, "step": 7647 }, { "epoch": 1.4802631578947367, "grad_norm": 0.06675311923027039, "learning_rate": 9.5387044543106e-05, "loss": 0.0086, "step": 7648 }, { "epoch": 1.4804566563467492, "grad_norm": 0.045619893819093704, "learning_rate": 9.538583435504321e-05, "loss": 0.008, "step": 7649 }, { "epoch": 1.4806501547987616, "grad_norm": 0.06767554581165314, "learning_rate": 9.53846240168358e-05, "loss": 0.0087, "step": 7650 }, { "epoch": 1.480843653250774, "grad_norm": 0.11344871670007706, "learning_rate": 9.538341352848833e-05, "loss": 0.0089, "step": 7651 }, { "epoch": 1.4810371517027865, "grad_norm": 0.09659238904714584, "learning_rate": 9.538220289000528e-05, "loss": 0.0088, "step": 7652 }, { "epoch": 1.4812306501547987, "grad_norm": 0.11395543813705444, "learning_rate": 9.538099210139115e-05, "loss": 0.0075, "step": 7653 }, { "epoch": 1.4814241486068112, "grad_norm": 0.03910593315958977, "learning_rate": 9.537978116265044e-05, "loss": 0.0092, "step": 7654 }, { "epoch": 1.4816176470588236, "grad_norm": 0.10351505130529404, "learning_rate": 9.537857007378765e-05, "loss": 0.0098, "step": 7655 }, { "epoch": 1.4818111455108358, "grad_norm": 0.021334435790777206, "learning_rate": 9.537735883480729e-05, "loss": 0.007, "step": 7656 }, { "epoch": 1.4820046439628483, "grad_norm": 0.07754552364349365, "learning_rate": 9.537614744571386e-05, "loss": 0.0082, "step": 7657 }, { "epoch": 1.4821981424148607, "grad_norm": 0.0371076837182045, "learning_rate": 9.537493590651185e-05, "loss": 0.0079, "step": 7658 }, { "epoch": 1.4823916408668731, "grad_norm": 0.08554574847221375, "learning_rate": 9.53737242172058e-05, "loss": 0.0085, "step": 7659 }, { "epoch": 1.4825851393188856, "grad_norm": 0.04660254344344139, "learning_rate": 9.537251237780018e-05, "loss": 0.009, "step": 7660 }, { "epoch": 1.4827786377708978, "grad_norm": 0.09556183964014053, "learning_rate": 9.53713003882995e-05, "loss": 0.0079, "step": 7661 }, { "epoch": 1.4829721362229102, "grad_norm": 0.05735746771097183, "learning_rate": 9.537008824870827e-05, "loss": 0.0075, "step": 7662 }, { "epoch": 1.4831656346749227, "grad_norm": 0.0641329362988472, "learning_rate": 9.5368875959031e-05, "loss": 0.0075, "step": 7663 }, { "epoch": 1.483359133126935, "grad_norm": 0.1669037938117981, "learning_rate": 9.536766351927221e-05, "loss": 0.0097, "step": 7664 }, { "epoch": 1.4835526315789473, "grad_norm": 0.0592077262699604, "learning_rate": 9.536645092943639e-05, "loss": 0.0072, "step": 7665 }, { "epoch": 1.4837461300309598, "grad_norm": 0.12182366847991943, "learning_rate": 9.536523818952806e-05, "loss": 0.0092, "step": 7666 }, { "epoch": 1.4839396284829722, "grad_norm": 0.1336876004934311, "learning_rate": 9.536402529955171e-05, "loss": 0.0091, "step": 7667 }, { "epoch": 1.4841331269349844, "grad_norm": 0.16208404302597046, "learning_rate": 9.536281225951186e-05, "loss": 0.007, "step": 7668 }, { "epoch": 1.484326625386997, "grad_norm": 0.17332759499549866, "learning_rate": 9.536159906941301e-05, "loss": 0.0076, "step": 7669 }, { "epoch": 1.4845201238390093, "grad_norm": 0.11324383318424225, "learning_rate": 9.53603857292597e-05, "loss": 0.0094, "step": 7670 }, { "epoch": 1.4847136222910216, "grad_norm": 0.20256970822811127, "learning_rate": 9.535917223905641e-05, "loss": 0.0101, "step": 7671 }, { "epoch": 1.484907120743034, "grad_norm": 0.06461775302886963, "learning_rate": 9.535795859880765e-05, "loss": 0.0104, "step": 7672 }, { "epoch": 1.4851006191950464, "grad_norm": 0.2590065002441406, "learning_rate": 9.535674480851796e-05, "loss": 0.0081, "step": 7673 }, { "epoch": 1.4852941176470589, "grad_norm": 0.06501574069261551, "learning_rate": 9.535553086819184e-05, "loss": 0.0081, "step": 7674 }, { "epoch": 1.4854876160990713, "grad_norm": 0.19709616899490356, "learning_rate": 9.535431677783379e-05, "loss": 0.0077, "step": 7675 }, { "epoch": 1.4856811145510835, "grad_norm": 0.12131915241479874, "learning_rate": 9.535310253744835e-05, "loss": 0.0083, "step": 7676 }, { "epoch": 1.485874613003096, "grad_norm": 0.09826412051916122, "learning_rate": 9.535188814703999e-05, "loss": 0.0092, "step": 7677 }, { "epoch": 1.4860681114551084, "grad_norm": 0.3184124529361725, "learning_rate": 9.535067360661328e-05, "loss": 0.0091, "step": 7678 }, { "epoch": 1.4862616099071206, "grad_norm": 0.11163698881864548, "learning_rate": 9.53494589161727e-05, "loss": 0.007, "step": 7679 }, { "epoch": 1.486455108359133, "grad_norm": 0.23077844083309174, "learning_rate": 9.534824407572278e-05, "loss": 0.0085, "step": 7680 }, { "epoch": 1.4866486068111455, "grad_norm": 0.13765926659107208, "learning_rate": 9.534702908526801e-05, "loss": 0.0092, "step": 7681 }, { "epoch": 1.486842105263158, "grad_norm": 0.16908203065395355, "learning_rate": 9.534581394481293e-05, "loss": 0.0088, "step": 7682 }, { "epoch": 1.4870356037151704, "grad_norm": 0.16889964044094086, "learning_rate": 9.534459865436209e-05, "loss": 0.0101, "step": 7683 }, { "epoch": 1.4872291021671826, "grad_norm": 0.10803141444921494, "learning_rate": 9.534338321391992e-05, "loss": 0.0083, "step": 7684 }, { "epoch": 1.487422600619195, "grad_norm": 0.14417193830013275, "learning_rate": 9.534216762349102e-05, "loss": 0.009, "step": 7685 }, { "epoch": 1.4876160990712075, "grad_norm": 0.11352455615997314, "learning_rate": 9.534095188307989e-05, "loss": 0.0102, "step": 7686 }, { "epoch": 1.4878095975232197, "grad_norm": 0.13370977342128754, "learning_rate": 9.533973599269104e-05, "loss": 0.0086, "step": 7687 }, { "epoch": 1.4880030959752322, "grad_norm": 0.11591845750808716, "learning_rate": 9.533851995232897e-05, "loss": 0.0082, "step": 7688 }, { "epoch": 1.4881965944272446, "grad_norm": 0.13013513386249542, "learning_rate": 9.533730376199825e-05, "loss": 0.0099, "step": 7689 }, { "epoch": 1.488390092879257, "grad_norm": 0.11481918394565582, "learning_rate": 9.533608742170336e-05, "loss": 0.0102, "step": 7690 }, { "epoch": 1.4885835913312693, "grad_norm": 0.14830170571804047, "learning_rate": 9.533487093144883e-05, "loss": 0.0093, "step": 7691 }, { "epoch": 1.4887770897832817, "grad_norm": 0.07337785512208939, "learning_rate": 9.53336542912392e-05, "loss": 0.008, "step": 7692 }, { "epoch": 1.4889705882352942, "grad_norm": 0.10007946193218231, "learning_rate": 9.533243750107899e-05, "loss": 0.0079, "step": 7693 }, { "epoch": 1.4891640866873064, "grad_norm": 0.12362468987703323, "learning_rate": 9.53312205609727e-05, "loss": 0.0083, "step": 7694 }, { "epoch": 1.4893575851393188, "grad_norm": 0.13856598734855652, "learning_rate": 9.533000347092487e-05, "loss": 0.008, "step": 7695 }, { "epoch": 1.4895510835913313, "grad_norm": 0.13640139997005463, "learning_rate": 9.532878623094003e-05, "loss": 0.009, "step": 7696 }, { "epoch": 1.4897445820433437, "grad_norm": 0.11922892928123474, "learning_rate": 9.53275688410227e-05, "loss": 0.0079, "step": 7697 }, { "epoch": 1.4899380804953561, "grad_norm": 0.14165247976779938, "learning_rate": 9.532635130117739e-05, "loss": 0.009, "step": 7698 }, { "epoch": 1.4901315789473684, "grad_norm": 0.07999342679977417, "learning_rate": 9.532513361140865e-05, "loss": 0.0066, "step": 7699 }, { "epoch": 1.4903250773993808, "grad_norm": 0.09962738305330276, "learning_rate": 9.532391577172102e-05, "loss": 0.0081, "step": 7700 }, { "epoch": 1.4905185758513932, "grad_norm": 0.08043226599693298, "learning_rate": 9.532269778211898e-05, "loss": 0.009, "step": 7701 }, { "epoch": 1.4907120743034055, "grad_norm": 0.06399957090616226, "learning_rate": 9.53214796426071e-05, "loss": 0.0088, "step": 7702 }, { "epoch": 1.490905572755418, "grad_norm": 0.13053491711616516, "learning_rate": 9.532026135318989e-05, "loss": 0.0076, "step": 7703 }, { "epoch": 1.4910990712074303, "grad_norm": 0.08304692059755325, "learning_rate": 9.531904291387189e-05, "loss": 0.0094, "step": 7704 }, { "epoch": 1.4912925696594428, "grad_norm": 0.0910971611738205, "learning_rate": 9.531782432465763e-05, "loss": 0.0083, "step": 7705 }, { "epoch": 1.4914860681114552, "grad_norm": 0.07574979215860367, "learning_rate": 9.531660558555162e-05, "loss": 0.0079, "step": 7706 }, { "epoch": 1.4916795665634675, "grad_norm": 0.07097972184419632, "learning_rate": 9.53153866965584e-05, "loss": 0.0084, "step": 7707 }, { "epoch": 1.49187306501548, "grad_norm": 0.07596451789140701, "learning_rate": 9.531416765768251e-05, "loss": 0.0088, "step": 7708 }, { "epoch": 1.4920665634674923, "grad_norm": 0.06750088185071945, "learning_rate": 9.531294846892849e-05, "loss": 0.0092, "step": 7709 }, { "epoch": 1.4922600619195046, "grad_norm": 0.07691973447799683, "learning_rate": 9.531172913030084e-05, "loss": 0.0084, "step": 7710 }, { "epoch": 1.492453560371517, "grad_norm": 0.06402169167995453, "learning_rate": 9.531050964180413e-05, "loss": 0.0084, "step": 7711 }, { "epoch": 1.4926470588235294, "grad_norm": 0.07940913736820221, "learning_rate": 9.530929000344287e-05, "loss": 0.011, "step": 7712 }, { "epoch": 1.4928405572755419, "grad_norm": 0.06339016556739807, "learning_rate": 9.530807021522162e-05, "loss": 0.0083, "step": 7713 }, { "epoch": 1.493034055727554, "grad_norm": 0.12236376106739044, "learning_rate": 9.530685027714487e-05, "loss": 0.0093, "step": 7714 }, { "epoch": 1.4932275541795665, "grad_norm": 0.04728218540549278, "learning_rate": 9.530563018921721e-05, "loss": 0.0085, "step": 7715 }, { "epoch": 1.493421052631579, "grad_norm": 0.14786610007286072, "learning_rate": 9.530440995144313e-05, "loss": 0.0078, "step": 7716 }, { "epoch": 1.4936145510835912, "grad_norm": 0.08793649077415466, "learning_rate": 9.530318956382719e-05, "loss": 0.0089, "step": 7717 }, { "epoch": 1.4938080495356036, "grad_norm": 0.12246806174516678, "learning_rate": 9.530196902637393e-05, "loss": 0.0072, "step": 7718 }, { "epoch": 1.494001547987616, "grad_norm": 0.09603004157543182, "learning_rate": 9.530074833908787e-05, "loss": 0.0079, "step": 7719 }, { "epoch": 1.4941950464396285, "grad_norm": 0.08585312217473984, "learning_rate": 9.529952750197356e-05, "loss": 0.0095, "step": 7720 }, { "epoch": 1.494388544891641, "grad_norm": 0.16103090345859528, "learning_rate": 9.529830651503553e-05, "loss": 0.009, "step": 7721 }, { "epoch": 1.4945820433436532, "grad_norm": 0.1349996030330658, "learning_rate": 9.529708537827833e-05, "loss": 0.0091, "step": 7722 }, { "epoch": 1.4947755417956656, "grad_norm": 0.08793836086988449, "learning_rate": 9.52958640917065e-05, "loss": 0.0087, "step": 7723 }, { "epoch": 1.494969040247678, "grad_norm": 0.19121935963630676, "learning_rate": 9.529464265532459e-05, "loss": 0.0087, "step": 7724 }, { "epoch": 1.4951625386996903, "grad_norm": 0.06129162758588791, "learning_rate": 9.52934210691371e-05, "loss": 0.0073, "step": 7725 }, { "epoch": 1.4953560371517027, "grad_norm": 0.19488473236560822, "learning_rate": 9.529219933314863e-05, "loss": 0.008, "step": 7726 }, { "epoch": 1.4955495356037152, "grad_norm": 0.13169632852077484, "learning_rate": 9.529097744736367e-05, "loss": 0.0073, "step": 7727 }, { "epoch": 1.4957430340557276, "grad_norm": 0.1125340685248375, "learning_rate": 9.528975541178678e-05, "loss": 0.0075, "step": 7728 }, { "epoch": 1.49593653250774, "grad_norm": 0.09658318012952805, "learning_rate": 9.52885332264225e-05, "loss": 0.0084, "step": 7729 }, { "epoch": 1.4961300309597523, "grad_norm": 0.10205879807472229, "learning_rate": 9.528731089127541e-05, "loss": 0.0088, "step": 7730 }, { "epoch": 1.4963235294117647, "grad_norm": 0.08500822633504868, "learning_rate": 9.528608840635e-05, "loss": 0.01, "step": 7731 }, { "epoch": 1.4965170278637772, "grad_norm": 0.18204672634601593, "learning_rate": 9.528486577165085e-05, "loss": 0.0092, "step": 7732 }, { "epoch": 1.4967105263157894, "grad_norm": 0.08403930068016052, "learning_rate": 9.52836429871825e-05, "loss": 0.007, "step": 7733 }, { "epoch": 1.4969040247678018, "grad_norm": 0.1802515685558319, "learning_rate": 9.52824200529495e-05, "loss": 0.0096, "step": 7734 }, { "epoch": 1.4970975232198143, "grad_norm": 0.0856381505727768, "learning_rate": 9.528119696895635e-05, "loss": 0.0074, "step": 7735 }, { "epoch": 1.4972910216718267, "grad_norm": 0.1310485154390335, "learning_rate": 9.527997373520767e-05, "loss": 0.0083, "step": 7736 }, { "epoch": 1.4974845201238391, "grad_norm": 0.13581904768943787, "learning_rate": 9.527875035170796e-05, "loss": 0.0088, "step": 7737 }, { "epoch": 1.4976780185758514, "grad_norm": 0.06031809747219086, "learning_rate": 9.52775268184618e-05, "loss": 0.0094, "step": 7738 }, { "epoch": 1.4978715170278638, "grad_norm": 0.1387447565793991, "learning_rate": 9.52763031354737e-05, "loss": 0.01, "step": 7739 }, { "epoch": 1.498065015479876, "grad_norm": 0.044983357191085815, "learning_rate": 9.527507930274823e-05, "loss": 0.0095, "step": 7740 }, { "epoch": 1.4982585139318885, "grad_norm": 0.10869541019201279, "learning_rate": 9.527385532028996e-05, "loss": 0.0093, "step": 7741 }, { "epoch": 1.498452012383901, "grad_norm": 0.07925143837928772, "learning_rate": 9.527263118810338e-05, "loss": 0.0083, "step": 7742 }, { "epoch": 1.4986455108359134, "grad_norm": 0.07479136437177658, "learning_rate": 9.52714069061931e-05, "loss": 0.0071, "step": 7743 }, { "epoch": 1.4988390092879258, "grad_norm": 0.078960120677948, "learning_rate": 9.527018247456366e-05, "loss": 0.007, "step": 7744 }, { "epoch": 1.499032507739938, "grad_norm": 0.06799779832363129, "learning_rate": 9.52689578932196e-05, "loss": 0.0076, "step": 7745 }, { "epoch": 1.4992260061919505, "grad_norm": 0.05374309793114662, "learning_rate": 9.526773316216547e-05, "loss": 0.0088, "step": 7746 }, { "epoch": 1.499419504643963, "grad_norm": 0.09437815845012665, "learning_rate": 9.526650828140585e-05, "loss": 0.0086, "step": 7747 }, { "epoch": 1.4996130030959751, "grad_norm": 0.04460340738296509, "learning_rate": 9.526528325094526e-05, "loss": 0.0077, "step": 7748 }, { "epoch": 1.4998065015479876, "grad_norm": 0.08755359053611755, "learning_rate": 9.526405807078826e-05, "loss": 0.009, "step": 7749 }, { "epoch": 1.5, "grad_norm": 0.07226257026195526, "learning_rate": 9.526283274093942e-05, "loss": 0.0085, "step": 7750 }, { "epoch": 1.5001934984520124, "grad_norm": 0.04908905178308487, "learning_rate": 9.52616072614033e-05, "loss": 0.0073, "step": 7751 }, { "epoch": 1.5003869969040249, "grad_norm": 0.0867113247513771, "learning_rate": 9.526038163218442e-05, "loss": 0.009, "step": 7752 }, { "epoch": 1.500580495356037, "grad_norm": 0.05446907505393028, "learning_rate": 9.525915585328739e-05, "loss": 0.0078, "step": 7753 }, { "epoch": 1.5007739938080495, "grad_norm": 0.08250056207180023, "learning_rate": 9.525792992471672e-05, "loss": 0.0068, "step": 7754 }, { "epoch": 1.5009674922600618, "grad_norm": 0.09123343974351883, "learning_rate": 9.5256703846477e-05, "loss": 0.0091, "step": 7755 }, { "epoch": 1.5011609907120742, "grad_norm": 0.08909082412719727, "learning_rate": 9.525547761857277e-05, "loss": 0.0075, "step": 7756 }, { "epoch": 1.5013544891640866, "grad_norm": 0.09409059584140778, "learning_rate": 9.525425124100858e-05, "loss": 0.0073, "step": 7757 }, { "epoch": 1.501547987616099, "grad_norm": 0.07748932391405106, "learning_rate": 9.525302471378902e-05, "loss": 0.0072, "step": 7758 }, { "epoch": 1.5017414860681115, "grad_norm": 0.08841713517904282, "learning_rate": 9.525179803691864e-05, "loss": 0.0075, "step": 7759 }, { "epoch": 1.501934984520124, "grad_norm": 0.0879068523645401, "learning_rate": 9.525057121040197e-05, "loss": 0.0106, "step": 7760 }, { "epoch": 1.5021284829721362, "grad_norm": 0.08286841958761215, "learning_rate": 9.52493442342436e-05, "loss": 0.008, "step": 7761 }, { "epoch": 1.5023219814241486, "grad_norm": 0.1004939153790474, "learning_rate": 9.524811710844808e-05, "loss": 0.0072, "step": 7762 }, { "epoch": 1.5025154798761609, "grad_norm": 0.05540371313691139, "learning_rate": 9.524688983302e-05, "loss": 0.011, "step": 7763 }, { "epoch": 1.5027089783281733, "grad_norm": 0.11088036745786667, "learning_rate": 9.524566240796389e-05, "loss": 0.0079, "step": 7764 }, { "epoch": 1.5029024767801857, "grad_norm": 0.057605959475040436, "learning_rate": 9.524443483328433e-05, "loss": 0.0077, "step": 7765 }, { "epoch": 1.5030959752321982, "grad_norm": 0.09194640070199966, "learning_rate": 9.524320710898585e-05, "loss": 0.009, "step": 7766 }, { "epoch": 1.5032894736842106, "grad_norm": 0.07667584717273712, "learning_rate": 9.524197923507308e-05, "loss": 0.0082, "step": 7767 }, { "epoch": 1.503482972136223, "grad_norm": 0.06103472039103508, "learning_rate": 9.524075121155051e-05, "loss": 0.009, "step": 7768 }, { "epoch": 1.5036764705882353, "grad_norm": 0.09633610397577286, "learning_rate": 9.523952303842277e-05, "loss": 0.0082, "step": 7769 }, { "epoch": 1.5038699690402477, "grad_norm": 0.046987999230623245, "learning_rate": 9.523829471569439e-05, "loss": 0.0078, "step": 7770 }, { "epoch": 1.50406346749226, "grad_norm": 0.09253408014774323, "learning_rate": 9.523706624336994e-05, "loss": 0.0092, "step": 7771 }, { "epoch": 1.5042569659442724, "grad_norm": 0.05916983261704445, "learning_rate": 9.5235837621454e-05, "loss": 0.0092, "step": 7772 }, { "epoch": 1.5044504643962848, "grad_norm": 0.08242719620466232, "learning_rate": 9.523460884995113e-05, "loss": 0.0066, "step": 7773 }, { "epoch": 1.5046439628482973, "grad_norm": 0.139897882938385, "learning_rate": 9.523337992886589e-05, "loss": 0.0082, "step": 7774 }, { "epoch": 1.5048374613003097, "grad_norm": 0.06348425894975662, "learning_rate": 9.523215085820287e-05, "loss": 0.0089, "step": 7775 }, { "epoch": 1.505030959752322, "grad_norm": 0.14780932664871216, "learning_rate": 9.523092163796662e-05, "loss": 0.0092, "step": 7776 }, { "epoch": 1.5052244582043344, "grad_norm": 0.08756519854068756, "learning_rate": 9.522969226816172e-05, "loss": 0.0082, "step": 7777 }, { "epoch": 1.5054179566563466, "grad_norm": 0.14444762468338013, "learning_rate": 9.522846274879275e-05, "loss": 0.0085, "step": 7778 }, { "epoch": 1.505611455108359, "grad_norm": 0.1517372578382492, "learning_rate": 9.522723307986425e-05, "loss": 0.0075, "step": 7779 }, { "epoch": 1.5058049535603715, "grad_norm": 0.09752009809017181, "learning_rate": 9.52260032613808e-05, "loss": 0.0083, "step": 7780 }, { "epoch": 1.505998452012384, "grad_norm": 0.23390688002109528, "learning_rate": 9.5224773293347e-05, "loss": 0.008, "step": 7781 }, { "epoch": 1.5061919504643964, "grad_norm": 0.05295002833008766, "learning_rate": 9.522354317576739e-05, "loss": 0.0091, "step": 7782 }, { "epoch": 1.5063854489164088, "grad_norm": 0.23852749168872833, "learning_rate": 9.522231290864658e-05, "loss": 0.0075, "step": 7783 }, { "epoch": 1.506578947368421, "grad_norm": 0.06880012899637222, "learning_rate": 9.522108249198909e-05, "loss": 0.0101, "step": 7784 }, { "epoch": 1.5067724458204335, "grad_norm": 0.1661946028470993, "learning_rate": 9.521985192579954e-05, "loss": 0.0097, "step": 7785 }, { "epoch": 1.5069659442724457, "grad_norm": 0.1487649530172348, "learning_rate": 9.52186212100825e-05, "loss": 0.0081, "step": 7786 }, { "epoch": 1.5071594427244581, "grad_norm": 0.17489302158355713, "learning_rate": 9.521739034484254e-05, "loss": 0.0079, "step": 7787 }, { "epoch": 1.5073529411764706, "grad_norm": 0.13280637562274933, "learning_rate": 9.52161593300842e-05, "loss": 0.0084, "step": 7788 }, { "epoch": 1.507546439628483, "grad_norm": 0.10309208929538727, "learning_rate": 9.521492816581213e-05, "loss": 0.0094, "step": 7789 }, { "epoch": 1.5077399380804954, "grad_norm": 0.16581383347511292, "learning_rate": 9.521369685203084e-05, "loss": 0.0079, "step": 7790 }, { "epoch": 1.5079334365325079, "grad_norm": 0.07629954814910889, "learning_rate": 9.521246538874494e-05, "loss": 0.0082, "step": 7791 }, { "epoch": 1.50812693498452, "grad_norm": 0.13754041492938995, "learning_rate": 9.5211233775959e-05, "loss": 0.0089, "step": 7792 }, { "epoch": 1.5083204334365325, "grad_norm": 0.06420362740755081, "learning_rate": 9.521000201367761e-05, "loss": 0.0092, "step": 7793 }, { "epoch": 1.5085139318885448, "grad_norm": 0.11640698462724686, "learning_rate": 9.520877010190534e-05, "loss": 0.0078, "step": 7794 }, { "epoch": 1.5087074303405572, "grad_norm": 0.06129865720868111, "learning_rate": 9.520753804064677e-05, "loss": 0.0075, "step": 7795 }, { "epoch": 1.5089009287925697, "grad_norm": 0.0861811563372612, "learning_rate": 9.520630582990647e-05, "loss": 0.0074, "step": 7796 }, { "epoch": 1.509094427244582, "grad_norm": 0.10731629282236099, "learning_rate": 9.520507346968904e-05, "loss": 0.0097, "step": 7797 }, { "epoch": 1.5092879256965945, "grad_norm": 0.16308584809303284, "learning_rate": 9.520384095999904e-05, "loss": 0.0083, "step": 7798 }, { "epoch": 1.509481424148607, "grad_norm": 0.09893795102834702, "learning_rate": 9.52026083008411e-05, "loss": 0.0097, "step": 7799 }, { "epoch": 1.5096749226006192, "grad_norm": 0.17770905792713165, "learning_rate": 9.520137549221974e-05, "loss": 0.0092, "step": 7800 }, { "epoch": 1.5098684210526314, "grad_norm": 0.081815205514431, "learning_rate": 9.520014253413958e-05, "loss": 0.0081, "step": 7801 }, { "epoch": 1.5100619195046439, "grad_norm": 0.16500665247440338, "learning_rate": 9.51989094266052e-05, "loss": 0.0076, "step": 7802 }, { "epoch": 1.5102554179566563, "grad_norm": 0.08653102815151215, "learning_rate": 9.519767616962117e-05, "loss": 0.009, "step": 7803 }, { "epoch": 1.5104489164086687, "grad_norm": 0.1364135891199112, "learning_rate": 9.519644276319209e-05, "loss": 0.0097, "step": 7804 }, { "epoch": 1.5106424148606812, "grad_norm": 0.09833502769470215, "learning_rate": 9.519520920732254e-05, "loss": 0.0084, "step": 7805 }, { "epoch": 1.5108359133126936, "grad_norm": 0.08885307610034943, "learning_rate": 9.51939755020171e-05, "loss": 0.0094, "step": 7806 }, { "epoch": 1.5110294117647058, "grad_norm": 0.098073311150074, "learning_rate": 9.519274164728038e-05, "loss": 0.0068, "step": 7807 }, { "epoch": 1.5112229102167183, "grad_norm": 0.07190632075071335, "learning_rate": 9.519150764311695e-05, "loss": 0.0101, "step": 7808 }, { "epoch": 1.5114164086687305, "grad_norm": 0.10289061069488525, "learning_rate": 9.519027348953138e-05, "loss": 0.0089, "step": 7809 }, { "epoch": 1.511609907120743, "grad_norm": 0.05342711880803108, "learning_rate": 9.518903918652829e-05, "loss": 0.0089, "step": 7810 }, { "epoch": 1.5118034055727554, "grad_norm": 0.07387397438287735, "learning_rate": 9.518780473411224e-05, "loss": 0.0083, "step": 7811 }, { "epoch": 1.5119969040247678, "grad_norm": 0.06398288160562515, "learning_rate": 9.518657013228785e-05, "loss": 0.0089, "step": 7812 }, { "epoch": 1.5121904024767803, "grad_norm": 0.05637384578585625, "learning_rate": 9.518533538105968e-05, "loss": 0.0103, "step": 7813 }, { "epoch": 1.5123839009287927, "grad_norm": 0.08510717004537582, "learning_rate": 9.518410048043235e-05, "loss": 0.0086, "step": 7814 }, { "epoch": 1.512577399380805, "grad_norm": 0.0670798197388649, "learning_rate": 9.518286543041042e-05, "loss": 0.0078, "step": 7815 }, { "epoch": 1.5127708978328174, "grad_norm": 0.07283037155866623, "learning_rate": 9.518163023099851e-05, "loss": 0.0087, "step": 7816 }, { "epoch": 1.5129643962848296, "grad_norm": 0.0931074395775795, "learning_rate": 9.518039488220119e-05, "loss": 0.0082, "step": 7817 }, { "epoch": 1.513157894736842, "grad_norm": 0.057479362934827805, "learning_rate": 9.517915938402307e-05, "loss": 0.009, "step": 7818 }, { "epoch": 1.5133513931888545, "grad_norm": 0.0923219695687294, "learning_rate": 9.517792373646875e-05, "loss": 0.0079, "step": 7819 }, { "epoch": 1.513544891640867, "grad_norm": 0.13246415555477142, "learning_rate": 9.517668793954279e-05, "loss": 0.0083, "step": 7820 }, { "epoch": 1.5137383900928794, "grad_norm": 0.06761909276247025, "learning_rate": 9.517545199324982e-05, "loss": 0.0097, "step": 7821 }, { "epoch": 1.5139318885448918, "grad_norm": 0.1318705976009369, "learning_rate": 9.51742158975944e-05, "loss": 0.007, "step": 7822 }, { "epoch": 1.514125386996904, "grad_norm": 0.05702745541930199, "learning_rate": 9.517297965258115e-05, "loss": 0.008, "step": 7823 }, { "epoch": 1.5143188854489165, "grad_norm": 0.10769141465425491, "learning_rate": 9.517174325821466e-05, "loss": 0.009, "step": 7824 }, { "epoch": 1.5145123839009287, "grad_norm": 0.08818855881690979, "learning_rate": 9.517050671449954e-05, "loss": 0.0094, "step": 7825 }, { "epoch": 1.5147058823529411, "grad_norm": 0.06448102742433548, "learning_rate": 9.516927002144036e-05, "loss": 0.0088, "step": 7826 }, { "epoch": 1.5148993808049536, "grad_norm": 0.10950639843940735, "learning_rate": 9.516803317904174e-05, "loss": 0.0087, "step": 7827 }, { "epoch": 1.515092879256966, "grad_norm": 0.04704367741942406, "learning_rate": 9.516679618730825e-05, "loss": 0.0082, "step": 7828 }, { "epoch": 1.5152863777089784, "grad_norm": 0.09526385366916656, "learning_rate": 9.516555904624455e-05, "loss": 0.009, "step": 7829 }, { "epoch": 1.5154798761609907, "grad_norm": 0.0649324506521225, "learning_rate": 9.516432175585515e-05, "loss": 0.009, "step": 7830 }, { "epoch": 1.515673374613003, "grad_norm": 0.07009964436292648, "learning_rate": 9.516308431614473e-05, "loss": 0.0071, "step": 7831 }, { "epoch": 1.5158668730650153, "grad_norm": 0.06378989666700363, "learning_rate": 9.516184672711786e-05, "loss": 0.0084, "step": 7832 }, { "epoch": 1.5160603715170278, "grad_norm": 0.07903671264648438, "learning_rate": 9.516060898877913e-05, "loss": 0.0084, "step": 7833 }, { "epoch": 1.5162538699690402, "grad_norm": 0.04013985022902489, "learning_rate": 9.515937110113314e-05, "loss": 0.0081, "step": 7834 }, { "epoch": 1.5164473684210527, "grad_norm": 0.08112596720457077, "learning_rate": 9.515813306418453e-05, "loss": 0.0079, "step": 7835 }, { "epoch": 1.516640866873065, "grad_norm": 0.047017309814691544, "learning_rate": 9.515689487793785e-05, "loss": 0.0086, "step": 7836 }, { "epoch": 1.5168343653250775, "grad_norm": 0.05949300900101662, "learning_rate": 9.515565654239774e-05, "loss": 0.008, "step": 7837 }, { "epoch": 1.5170278637770898, "grad_norm": 0.06389164924621582, "learning_rate": 9.515441805756882e-05, "loss": 0.0092, "step": 7838 }, { "epoch": 1.5172213622291022, "grad_norm": 0.05571642890572548, "learning_rate": 9.515317942345564e-05, "loss": 0.0076, "step": 7839 }, { "epoch": 1.5174148606811144, "grad_norm": 0.056873664259910583, "learning_rate": 9.515194064006283e-05, "loss": 0.0079, "step": 7840 }, { "epoch": 1.5176083591331269, "grad_norm": 0.08119289577007294, "learning_rate": 9.515070170739501e-05, "loss": 0.0069, "step": 7841 }, { "epoch": 1.5178018575851393, "grad_norm": 0.06493683904409409, "learning_rate": 9.514946262545676e-05, "loss": 0.0099, "step": 7842 }, { "epoch": 1.5179953560371517, "grad_norm": 0.06847658008337021, "learning_rate": 9.514822339425273e-05, "loss": 0.0077, "step": 7843 }, { "epoch": 1.5181888544891642, "grad_norm": 0.061931103467941284, "learning_rate": 9.514698401378747e-05, "loss": 0.007, "step": 7844 }, { "epoch": 1.5183823529411766, "grad_norm": 0.05287986248731613, "learning_rate": 9.514574448406562e-05, "loss": 0.0074, "step": 7845 }, { "epoch": 1.5185758513931888, "grad_norm": 0.07300252467393875, "learning_rate": 9.51445048050918e-05, "loss": 0.0077, "step": 7846 }, { "epoch": 1.5187693498452013, "grad_norm": 0.03854778781533241, "learning_rate": 9.514326497687058e-05, "loss": 0.007, "step": 7847 }, { "epoch": 1.5189628482972135, "grad_norm": 0.0875028520822525, "learning_rate": 9.514202499940662e-05, "loss": 0.0071, "step": 7848 }, { "epoch": 1.519156346749226, "grad_norm": 0.043654076755046844, "learning_rate": 9.514078487270448e-05, "loss": 0.0083, "step": 7849 }, { "epoch": 1.5193498452012384, "grad_norm": 0.08062859624624252, "learning_rate": 9.513954459676879e-05, "loss": 0.0087, "step": 7850 }, { "epoch": 1.5195433436532508, "grad_norm": 0.0883389487862587, "learning_rate": 9.513830417160417e-05, "loss": 0.0085, "step": 7851 }, { "epoch": 1.5197368421052633, "grad_norm": 0.1388423889875412, "learning_rate": 9.513706359721524e-05, "loss": 0.0082, "step": 7852 }, { "epoch": 1.5199303405572755, "grad_norm": 0.044669196009635925, "learning_rate": 9.513582287360658e-05, "loss": 0.0088, "step": 7853 }, { "epoch": 1.520123839009288, "grad_norm": 0.1349010318517685, "learning_rate": 9.513458200078284e-05, "loss": 0.0066, "step": 7854 }, { "epoch": 1.5203173374613002, "grad_norm": 0.05716723948717117, "learning_rate": 9.513334097874859e-05, "loss": 0.0076, "step": 7855 }, { "epoch": 1.5205108359133126, "grad_norm": 0.1358029693365097, "learning_rate": 9.513209980750849e-05, "loss": 0.0094, "step": 7856 }, { "epoch": 1.520704334365325, "grad_norm": 0.1224331483244896, "learning_rate": 9.513085848706711e-05, "loss": 0.0076, "step": 7857 }, { "epoch": 1.5208978328173375, "grad_norm": 0.16315071284770966, "learning_rate": 9.51296170174291e-05, "loss": 0.0074, "step": 7858 }, { "epoch": 1.52109133126935, "grad_norm": 0.19490095973014832, "learning_rate": 9.512837539859906e-05, "loss": 0.0081, "step": 7859 }, { "epoch": 1.5212848297213624, "grad_norm": 0.09497328847646713, "learning_rate": 9.51271336305816e-05, "loss": 0.0088, "step": 7860 }, { "epoch": 1.5214783281733746, "grad_norm": 0.20319761335849762, "learning_rate": 9.512589171338135e-05, "loss": 0.0089, "step": 7861 }, { "epoch": 1.521671826625387, "grad_norm": 0.05111265182495117, "learning_rate": 9.512464964700293e-05, "loss": 0.0077, "step": 7862 }, { "epoch": 1.5218653250773992, "grad_norm": 0.1635875254869461, "learning_rate": 9.512340743145095e-05, "loss": 0.0078, "step": 7863 }, { "epoch": 1.5220588235294117, "grad_norm": 0.09506641328334808, "learning_rate": 9.512216506673001e-05, "loss": 0.0095, "step": 7864 }, { "epoch": 1.5222523219814241, "grad_norm": 0.07496567815542221, "learning_rate": 9.512092255284476e-05, "loss": 0.0078, "step": 7865 }, { "epoch": 1.5224458204334366, "grad_norm": 0.13862180709838867, "learning_rate": 9.511967988979981e-05, "loss": 0.0084, "step": 7866 }, { "epoch": 1.522639318885449, "grad_norm": 0.05061599984765053, "learning_rate": 9.511843707759976e-05, "loss": 0.0071, "step": 7867 }, { "epoch": 1.5228328173374615, "grad_norm": 0.15750077366828918, "learning_rate": 9.511719411624926e-05, "loss": 0.009, "step": 7868 }, { "epoch": 1.5230263157894737, "grad_norm": 0.0801970362663269, "learning_rate": 9.511595100575291e-05, "loss": 0.0091, "step": 7869 }, { "epoch": 1.5232198142414861, "grad_norm": 0.1217290386557579, "learning_rate": 9.511470774611533e-05, "loss": 0.008, "step": 7870 }, { "epoch": 1.5234133126934983, "grad_norm": 0.11306267976760864, "learning_rate": 9.511346433734117e-05, "loss": 0.0095, "step": 7871 }, { "epoch": 1.5236068111455108, "grad_norm": 0.08492972701787949, "learning_rate": 9.511222077943504e-05, "loss": 0.0079, "step": 7872 }, { "epoch": 1.5238003095975232, "grad_norm": 0.10640498995780945, "learning_rate": 9.511097707240153e-05, "loss": 0.0066, "step": 7873 }, { "epoch": 1.5239938080495357, "grad_norm": 0.058750346302986145, "learning_rate": 9.51097332162453e-05, "loss": 0.0098, "step": 7874 }, { "epoch": 1.524187306501548, "grad_norm": 0.13693083822727203, "learning_rate": 9.510848921097098e-05, "loss": 0.0098, "step": 7875 }, { "epoch": 1.5243808049535603, "grad_norm": 0.08936426788568497, "learning_rate": 9.510724505658318e-05, "loss": 0.0084, "step": 7876 }, { "epoch": 1.5245743034055728, "grad_norm": 0.11094339191913605, "learning_rate": 9.51060007530865e-05, "loss": 0.0071, "step": 7877 }, { "epoch": 1.524767801857585, "grad_norm": 0.11236198991537094, "learning_rate": 9.510475630048559e-05, "loss": 0.0077, "step": 7878 }, { "epoch": 1.5249613003095974, "grad_norm": 0.06805584579706192, "learning_rate": 9.510351169878512e-05, "loss": 0.0106, "step": 7879 }, { "epoch": 1.5251547987616099, "grad_norm": 0.13562805950641632, "learning_rate": 9.510226694798964e-05, "loss": 0.0106, "step": 7880 }, { "epoch": 1.5253482972136223, "grad_norm": 0.060216426849365234, "learning_rate": 9.510102204810383e-05, "loss": 0.006, "step": 7881 }, { "epoch": 1.5255417956656347, "grad_norm": 0.09575389325618744, "learning_rate": 9.509977699913229e-05, "loss": 0.0084, "step": 7882 }, { "epoch": 1.5257352941176472, "grad_norm": 0.08346224576234818, "learning_rate": 9.509853180107966e-05, "loss": 0.0081, "step": 7883 }, { "epoch": 1.5259287925696594, "grad_norm": 0.07813330739736557, "learning_rate": 9.509728645395057e-05, "loss": 0.0088, "step": 7884 }, { "epoch": 1.5261222910216719, "grad_norm": 0.06403624266386032, "learning_rate": 9.509604095774967e-05, "loss": 0.0077, "step": 7885 }, { "epoch": 1.526315789473684, "grad_norm": 0.041362013667821884, "learning_rate": 9.509479531248156e-05, "loss": 0.007, "step": 7886 }, { "epoch": 1.5265092879256965, "grad_norm": 0.06282348185777664, "learning_rate": 9.509354951815086e-05, "loss": 0.0079, "step": 7887 }, { "epoch": 1.526702786377709, "grad_norm": 0.03159327432513237, "learning_rate": 9.509230357476223e-05, "loss": 0.0076, "step": 7888 }, { "epoch": 1.5268962848297214, "grad_norm": 0.06347937881946564, "learning_rate": 9.50910574823203e-05, "loss": 0.0094, "step": 7889 }, { "epoch": 1.5270897832817338, "grad_norm": 0.028452858328819275, "learning_rate": 9.508981124082969e-05, "loss": 0.0077, "step": 7890 }, { "epoch": 1.5272832817337463, "grad_norm": 0.039649512618780136, "learning_rate": 9.508856485029506e-05, "loss": 0.0082, "step": 7891 }, { "epoch": 1.5274767801857585, "grad_norm": 0.04299572482705116, "learning_rate": 9.508731831072101e-05, "loss": 0.0092, "step": 7892 }, { "epoch": 1.527670278637771, "grad_norm": 0.07208193838596344, "learning_rate": 9.508607162211219e-05, "loss": 0.0119, "step": 7893 }, { "epoch": 1.5278637770897832, "grad_norm": 0.06690558046102524, "learning_rate": 9.508482478447324e-05, "loss": 0.0065, "step": 7894 }, { "epoch": 1.5280572755417956, "grad_norm": 0.09266479313373566, "learning_rate": 9.508357779780877e-05, "loss": 0.0091, "step": 7895 }, { "epoch": 1.528250773993808, "grad_norm": 0.11543600261211395, "learning_rate": 9.508233066212344e-05, "loss": 0.0078, "step": 7896 }, { "epoch": 1.5284442724458205, "grad_norm": 0.09008695185184479, "learning_rate": 9.508108337742188e-05, "loss": 0.0083, "step": 7897 }, { "epoch": 1.528637770897833, "grad_norm": 0.07173697650432587, "learning_rate": 9.507983594370873e-05, "loss": 0.0081, "step": 7898 }, { "epoch": 1.5288312693498454, "grad_norm": 0.17171718180179596, "learning_rate": 9.507858836098865e-05, "loss": 0.0077, "step": 7899 }, { "epoch": 1.5290247678018576, "grad_norm": 0.038676030933856964, "learning_rate": 9.507734062926622e-05, "loss": 0.0081, "step": 7900 }, { "epoch": 1.5292182662538698, "grad_norm": 0.1372956931591034, "learning_rate": 9.507609274854613e-05, "loss": 0.0093, "step": 7901 }, { "epoch": 1.5294117647058822, "grad_norm": 0.07309561967849731, "learning_rate": 9.507484471883301e-05, "loss": 0.0098, "step": 7902 }, { "epoch": 1.5296052631578947, "grad_norm": 0.11858535557985306, "learning_rate": 9.507359654013148e-05, "loss": 0.0095, "step": 7903 }, { "epoch": 1.5297987616099071, "grad_norm": 0.09948133677244186, "learning_rate": 9.50723482124462e-05, "loss": 0.0069, "step": 7904 }, { "epoch": 1.5299922600619196, "grad_norm": 0.0737311914563179, "learning_rate": 9.507109973578178e-05, "loss": 0.0087, "step": 7905 }, { "epoch": 1.530185758513932, "grad_norm": 0.094582200050354, "learning_rate": 9.50698511101429e-05, "loss": 0.0079, "step": 7906 }, { "epoch": 1.5303792569659442, "grad_norm": 0.09420577436685562, "learning_rate": 9.50686023355342e-05, "loss": 0.0079, "step": 7907 }, { "epoch": 1.5305727554179567, "grad_norm": 0.08191285282373428, "learning_rate": 9.50673534119603e-05, "loss": 0.0076, "step": 7908 }, { "epoch": 1.530766253869969, "grad_norm": 0.08241602033376694, "learning_rate": 9.506610433942584e-05, "loss": 0.0084, "step": 7909 }, { "epoch": 1.5309597523219813, "grad_norm": 0.055200520902872086, "learning_rate": 9.506485511793551e-05, "loss": 0.0086, "step": 7910 }, { "epoch": 1.5311532507739938, "grad_norm": 0.06166056543588638, "learning_rate": 9.506360574749388e-05, "loss": 0.0077, "step": 7911 }, { "epoch": 1.5313467492260062, "grad_norm": 0.04742143675684929, "learning_rate": 9.506235622810567e-05, "loss": 0.008, "step": 7912 }, { "epoch": 1.5315402476780187, "grad_norm": 0.04911187291145325, "learning_rate": 9.506110655977547e-05, "loss": 0.0089, "step": 7913 }, { "epoch": 1.531733746130031, "grad_norm": 0.07553967833518982, "learning_rate": 9.505985674250795e-05, "loss": 0.009, "step": 7914 }, { "epoch": 1.5319272445820433, "grad_norm": 0.03371725603938103, "learning_rate": 9.505860677630776e-05, "loss": 0.0094, "step": 7915 }, { "epoch": 1.5321207430340558, "grad_norm": 0.06637277454137802, "learning_rate": 9.505735666117953e-05, "loss": 0.0089, "step": 7916 }, { "epoch": 1.532314241486068, "grad_norm": 0.041418302804231644, "learning_rate": 9.505610639712793e-05, "loss": 0.0074, "step": 7917 }, { "epoch": 1.5325077399380804, "grad_norm": 0.06893284618854523, "learning_rate": 9.50548559841576e-05, "loss": 0.0085, "step": 7918 }, { "epoch": 1.5327012383900929, "grad_norm": 0.04086803272366524, "learning_rate": 9.505360542227316e-05, "loss": 0.0074, "step": 7919 }, { "epoch": 1.5328947368421053, "grad_norm": 0.06451261788606644, "learning_rate": 9.50523547114793e-05, "loss": 0.0078, "step": 7920 }, { "epoch": 1.5330882352941178, "grad_norm": 0.03515632450580597, "learning_rate": 9.505110385178067e-05, "loss": 0.0067, "step": 7921 }, { "epoch": 1.5332817337461302, "grad_norm": 0.03687019646167755, "learning_rate": 9.504985284318189e-05, "loss": 0.0089, "step": 7922 }, { "epoch": 1.5334752321981424, "grad_norm": 0.05731121450662613, "learning_rate": 9.504860168568763e-05, "loss": 0.0075, "step": 7923 }, { "epoch": 1.5336687306501546, "grad_norm": 0.0509781539440155, "learning_rate": 9.504735037930253e-05, "loss": 0.0081, "step": 7924 }, { "epoch": 1.533862229102167, "grad_norm": 0.08079660683870316, "learning_rate": 9.504609892403125e-05, "loss": 0.0077, "step": 7925 }, { "epoch": 1.5340557275541795, "grad_norm": 0.03739549592137337, "learning_rate": 9.504484731987845e-05, "loss": 0.0073, "step": 7926 }, { "epoch": 1.534249226006192, "grad_norm": 0.06285770982503891, "learning_rate": 9.504359556684876e-05, "loss": 0.0083, "step": 7927 }, { "epoch": 1.5344427244582044, "grad_norm": 0.04753255471587181, "learning_rate": 9.504234366494686e-05, "loss": 0.0081, "step": 7928 }, { "epoch": 1.5346362229102168, "grad_norm": 0.056330837309360504, "learning_rate": 9.504109161417739e-05, "loss": 0.0065, "step": 7929 }, { "epoch": 1.534829721362229, "grad_norm": 0.0444304458796978, "learning_rate": 9.5039839414545e-05, "loss": 0.0099, "step": 7930 }, { "epoch": 1.5350232198142415, "grad_norm": 0.08041691780090332, "learning_rate": 9.503858706605439e-05, "loss": 0.0107, "step": 7931 }, { "epoch": 1.5352167182662537, "grad_norm": 0.05121910572052002, "learning_rate": 9.503733456871014e-05, "loss": 0.0071, "step": 7932 }, { "epoch": 1.5354102167182662, "grad_norm": 0.09858401864767075, "learning_rate": 9.503608192251695e-05, "loss": 0.0071, "step": 7933 }, { "epoch": 1.5356037151702786, "grad_norm": 0.06523309648036957, "learning_rate": 9.503482912747947e-05, "loss": 0.0093, "step": 7934 }, { "epoch": 1.535797213622291, "grad_norm": 0.11284953355789185, "learning_rate": 9.503357618360239e-05, "loss": 0.01, "step": 7935 }, { "epoch": 1.5359907120743035, "grad_norm": 0.043409328907728195, "learning_rate": 9.503232309089031e-05, "loss": 0.0085, "step": 7936 }, { "epoch": 1.536184210526316, "grad_norm": 0.14402027428150177, "learning_rate": 9.503106984934794e-05, "loss": 0.0082, "step": 7937 }, { "epoch": 1.5363777089783281, "grad_norm": 0.04324393346905708, "learning_rate": 9.502981645897988e-05, "loss": 0.0095, "step": 7938 }, { "epoch": 1.5365712074303406, "grad_norm": 0.13660620152950287, "learning_rate": 9.502856291979086e-05, "loss": 0.0102, "step": 7939 }, { "epoch": 1.5367647058823528, "grad_norm": 0.06726456433534622, "learning_rate": 9.502730923178549e-05, "loss": 0.0076, "step": 7940 }, { "epoch": 1.5369582043343653, "grad_norm": 0.12149455398321152, "learning_rate": 9.502605539496845e-05, "loss": 0.0092, "step": 7941 }, { "epoch": 1.5371517027863777, "grad_norm": 0.04263278469443321, "learning_rate": 9.50248014093444e-05, "loss": 0.0087, "step": 7942 }, { "epoch": 1.5373452012383901, "grad_norm": 0.046153437346220016, "learning_rate": 9.5023547274918e-05, "loss": 0.0076, "step": 7943 }, { "epoch": 1.5375386996904026, "grad_norm": 0.0500592477619648, "learning_rate": 9.502229299169392e-05, "loss": 0.0079, "step": 7944 }, { "epoch": 1.537732198142415, "grad_norm": 0.030403081327676773, "learning_rate": 9.502103855967678e-05, "loss": 0.0084, "step": 7945 }, { "epoch": 1.5379256965944272, "grad_norm": 0.08640248328447342, "learning_rate": 9.501978397887133e-05, "loss": 0.008, "step": 7946 }, { "epoch": 1.5381191950464397, "grad_norm": 0.09041204303503036, "learning_rate": 9.501852924928217e-05, "loss": 0.0084, "step": 7947 }, { "epoch": 1.538312693498452, "grad_norm": 0.03464217483997345, "learning_rate": 9.501727437091397e-05, "loss": 0.0087, "step": 7948 }, { "epoch": 1.5385061919504643, "grad_norm": 0.07654397934675217, "learning_rate": 9.50160193437714e-05, "loss": 0.0085, "step": 7949 }, { "epoch": 1.5386996904024768, "grad_norm": 0.029879862442612648, "learning_rate": 9.501476416785915e-05, "loss": 0.008, "step": 7950 }, { "epoch": 1.5388931888544892, "grad_norm": 0.10354137420654297, "learning_rate": 9.501350884318184e-05, "loss": 0.0096, "step": 7951 }, { "epoch": 1.5390866873065017, "grad_norm": 0.03612164035439491, "learning_rate": 9.501225336974419e-05, "loss": 0.0078, "step": 7952 }, { "epoch": 1.5392801857585139, "grad_norm": 0.11269714683294296, "learning_rate": 9.501099774755083e-05, "loss": 0.0083, "step": 7953 }, { "epoch": 1.5394736842105263, "grad_norm": 0.06161518767476082, "learning_rate": 9.500974197660644e-05, "loss": 0.0087, "step": 7954 }, { "epoch": 1.5396671826625385, "grad_norm": 0.09614214301109314, "learning_rate": 9.500848605691567e-05, "loss": 0.0097, "step": 7955 }, { "epoch": 1.539860681114551, "grad_norm": 0.07195541262626648, "learning_rate": 9.500722998848322e-05, "loss": 0.0099, "step": 7956 }, { "epoch": 1.5400541795665634, "grad_norm": 0.06098887324333191, "learning_rate": 9.500597377131376e-05, "loss": 0.0074, "step": 7957 }, { "epoch": 1.5402476780185759, "grad_norm": 0.048671264201402664, "learning_rate": 9.500471740541193e-05, "loss": 0.0085, "step": 7958 }, { "epoch": 1.5404411764705883, "grad_norm": 0.05987273156642914, "learning_rate": 9.500346089078243e-05, "loss": 0.0077, "step": 7959 }, { "epoch": 1.5406346749226008, "grad_norm": 0.04346938803792, "learning_rate": 9.500220422742993e-05, "loss": 0.0073, "step": 7960 }, { "epoch": 1.540828173374613, "grad_norm": 0.0690232366323471, "learning_rate": 9.500094741535906e-05, "loss": 0.0097, "step": 7961 }, { "epoch": 1.5410216718266254, "grad_norm": 0.06764637678861618, "learning_rate": 9.499969045457455e-05, "loss": 0.0074, "step": 7962 }, { "epoch": 1.5412151702786376, "grad_norm": 0.05966693535447121, "learning_rate": 9.499843334508102e-05, "loss": 0.008, "step": 7963 }, { "epoch": 1.54140866873065, "grad_norm": 0.14553187787532806, "learning_rate": 9.49971760868832e-05, "loss": 0.0089, "step": 7964 }, { "epoch": 1.5416021671826625, "grad_norm": 0.0831407681107521, "learning_rate": 9.499591867998573e-05, "loss": 0.0084, "step": 7965 }, { "epoch": 1.541795665634675, "grad_norm": 0.11025698482990265, "learning_rate": 9.499466112439327e-05, "loss": 0.0083, "step": 7966 }, { "epoch": 1.5419891640866874, "grad_norm": 0.03937986120581627, "learning_rate": 9.499340342011054e-05, "loss": 0.0089, "step": 7967 }, { "epoch": 1.5421826625386998, "grad_norm": 0.1052207425236702, "learning_rate": 9.499214556714216e-05, "loss": 0.0101, "step": 7968 }, { "epoch": 1.542376160990712, "grad_norm": 0.1117272600531578, "learning_rate": 9.499088756549287e-05, "loss": 0.0089, "step": 7969 }, { "epoch": 1.5425696594427245, "grad_norm": 0.08781489729881287, "learning_rate": 9.498962941516729e-05, "loss": 0.0094, "step": 7970 }, { "epoch": 1.5427631578947367, "grad_norm": 0.11112736910581589, "learning_rate": 9.498837111617014e-05, "loss": 0.0078, "step": 7971 }, { "epoch": 1.5429566563467492, "grad_norm": 0.0707574188709259, "learning_rate": 9.498711266850608e-05, "loss": 0.0085, "step": 7972 }, { "epoch": 1.5431501547987616, "grad_norm": 0.07461749762296677, "learning_rate": 9.498585407217977e-05, "loss": 0.0081, "step": 7973 }, { "epoch": 1.543343653250774, "grad_norm": 0.11107929050922394, "learning_rate": 9.498459532719592e-05, "loss": 0.0082, "step": 7974 }, { "epoch": 1.5435371517027865, "grad_norm": 0.0783025324344635, "learning_rate": 9.498333643355919e-05, "loss": 0.0088, "step": 7975 }, { "epoch": 1.5437306501547987, "grad_norm": 0.06151234731078148, "learning_rate": 9.498207739127428e-05, "loss": 0.0082, "step": 7976 }, { "epoch": 1.5439241486068112, "grad_norm": 0.06454487144947052, "learning_rate": 9.498081820034584e-05, "loss": 0.0089, "step": 7977 }, { "epoch": 1.5441176470588234, "grad_norm": 0.0590168721973896, "learning_rate": 9.497955886077858e-05, "loss": 0.0083, "step": 7978 }, { "epoch": 1.5443111455108358, "grad_norm": 0.06514132022857666, "learning_rate": 9.497829937257717e-05, "loss": 0.0076, "step": 7979 }, { "epoch": 1.5445046439628483, "grad_norm": 0.05365239456295967, "learning_rate": 9.497703973574629e-05, "loss": 0.0082, "step": 7980 }, { "epoch": 1.5446981424148607, "grad_norm": 0.08693116903305054, "learning_rate": 9.497577995029064e-05, "loss": 0.0101, "step": 7981 }, { "epoch": 1.5448916408668731, "grad_norm": 0.08523522317409515, "learning_rate": 9.497452001621488e-05, "loss": 0.0089, "step": 7982 }, { "epoch": 1.5450851393188856, "grad_norm": 0.06980790942907333, "learning_rate": 9.49732599335237e-05, "loss": 0.008, "step": 7983 }, { "epoch": 1.5452786377708978, "grad_norm": 0.16729329526424408, "learning_rate": 9.49719997022218e-05, "loss": 0.0101, "step": 7984 }, { "epoch": 1.5454721362229102, "grad_norm": 0.12759986519813538, "learning_rate": 9.497073932231384e-05, "loss": 0.0082, "step": 7985 }, { "epoch": 1.5456656346749225, "grad_norm": 0.13820721209049225, "learning_rate": 9.496947879380452e-05, "loss": 0.0107, "step": 7986 }, { "epoch": 1.545859133126935, "grad_norm": 0.10748688131570816, "learning_rate": 9.496821811669855e-05, "loss": 0.0087, "step": 7987 }, { "epoch": 1.5460526315789473, "grad_norm": 0.13360857963562012, "learning_rate": 9.496695729100056e-05, "loss": 0.0084, "step": 7988 }, { "epoch": 1.5462461300309598, "grad_norm": 0.07277115434408188, "learning_rate": 9.49656963167153e-05, "loss": 0.0082, "step": 7989 }, { "epoch": 1.5464396284829722, "grad_norm": 0.1882784366607666, "learning_rate": 9.496443519384742e-05, "loss": 0.0082, "step": 7990 }, { "epoch": 1.5466331269349847, "grad_norm": 0.1366942822933197, "learning_rate": 9.496317392240162e-05, "loss": 0.0083, "step": 7991 }, { "epoch": 1.546826625386997, "grad_norm": 0.13990497589111328, "learning_rate": 9.496191250238258e-05, "loss": 0.0069, "step": 7992 }, { "epoch": 1.5470201238390093, "grad_norm": 0.24899084866046906, "learning_rate": 9.496065093379501e-05, "loss": 0.0095, "step": 7993 }, { "epoch": 1.5472136222910216, "grad_norm": 0.08243933320045471, "learning_rate": 9.495938921664357e-05, "loss": 0.008, "step": 7994 }, { "epoch": 1.547407120743034, "grad_norm": 0.2847912311553955, "learning_rate": 9.495812735093296e-05, "loss": 0.009, "step": 7995 }, { "epoch": 1.5476006191950464, "grad_norm": 0.05514451116323471, "learning_rate": 9.49568653366679e-05, "loss": 0.0086, "step": 7996 }, { "epoch": 1.5477941176470589, "grad_norm": 0.20915749669075012, "learning_rate": 9.495560317385304e-05, "loss": 0.0084, "step": 7997 }, { "epoch": 1.5479876160990713, "grad_norm": 0.08257302641868591, "learning_rate": 9.495434086249311e-05, "loss": 0.009, "step": 7998 }, { "epoch": 1.5481811145510835, "grad_norm": 0.09645312279462814, "learning_rate": 9.495307840259278e-05, "loss": 0.0092, "step": 7999 }, { "epoch": 1.548374613003096, "grad_norm": 0.12503953278064728, "learning_rate": 9.495181579415675e-05, "loss": 0.0099, "step": 8000 }, { "epoch": 1.5485681114551082, "grad_norm": 0.12686105072498322, "learning_rate": 9.495055303718971e-05, "loss": 0.0105, "step": 8001 }, { "epoch": 1.5487616099071206, "grad_norm": 0.06484240293502808, "learning_rate": 9.494929013169635e-05, "loss": 0.0076, "step": 8002 }, { "epoch": 1.548955108359133, "grad_norm": 0.16619446873664856, "learning_rate": 9.49480270776814e-05, "loss": 0.008, "step": 8003 }, { "epoch": 1.5491486068111455, "grad_norm": 0.09462002664804459, "learning_rate": 9.49467638751495e-05, "loss": 0.0082, "step": 8004 }, { "epoch": 1.549342105263158, "grad_norm": 0.06890351325273514, "learning_rate": 9.494550052410539e-05, "loss": 0.0088, "step": 8005 }, { "epoch": 1.5495356037151704, "grad_norm": 0.1521475464105606, "learning_rate": 9.494423702455376e-05, "loss": 0.0097, "step": 8006 }, { "epoch": 1.5497291021671826, "grad_norm": 0.0934171974658966, "learning_rate": 9.494297337649928e-05, "loss": 0.0089, "step": 8007 }, { "epoch": 1.549922600619195, "grad_norm": 0.07274804264307022, "learning_rate": 9.494170957994668e-05, "loss": 0.0083, "step": 8008 }, { "epoch": 1.5501160990712073, "grad_norm": 0.1093909963965416, "learning_rate": 9.494044563490063e-05, "loss": 0.0082, "step": 8009 }, { "epoch": 1.5503095975232197, "grad_norm": 0.0487191267311573, "learning_rate": 9.493918154136586e-05, "loss": 0.0082, "step": 8010 }, { "epoch": 1.5505030959752322, "grad_norm": 0.12158852815628052, "learning_rate": 9.493791729934705e-05, "loss": 0.007, "step": 8011 }, { "epoch": 1.5506965944272446, "grad_norm": 0.06891200691461563, "learning_rate": 9.493665290884891e-05, "loss": 0.0073, "step": 8012 }, { "epoch": 1.550890092879257, "grad_norm": 0.07835877686738968, "learning_rate": 9.49353883698761e-05, "loss": 0.0075, "step": 8013 }, { "epoch": 1.5510835913312695, "grad_norm": 0.11704463511705399, "learning_rate": 9.49341236824334e-05, "loss": 0.0083, "step": 8014 }, { "epoch": 1.5512770897832817, "grad_norm": 0.052406661212444305, "learning_rate": 9.493285884652545e-05, "loss": 0.0098, "step": 8015 }, { "epoch": 1.5514705882352942, "grad_norm": 0.09783957153558731, "learning_rate": 9.493159386215698e-05, "loss": 0.0073, "step": 8016 }, { "epoch": 1.5516640866873064, "grad_norm": 0.06558778136968613, "learning_rate": 9.493032872933267e-05, "loss": 0.0085, "step": 8017 }, { "epoch": 1.5518575851393188, "grad_norm": 0.08083803206682205, "learning_rate": 9.492906344805726e-05, "loss": 0.0074, "step": 8018 }, { "epoch": 1.5520510835913313, "grad_norm": 0.09199145436286926, "learning_rate": 9.492779801833541e-05, "loss": 0.0096, "step": 8019 }, { "epoch": 1.5522445820433437, "grad_norm": 0.06507046520709991, "learning_rate": 9.492653244017185e-05, "loss": 0.0091, "step": 8020 }, { "epoch": 1.5524380804953561, "grad_norm": 0.0811411440372467, "learning_rate": 9.492526671357128e-05, "loss": 0.0082, "step": 8021 }, { "epoch": 1.5526315789473686, "grad_norm": 0.08141320943832397, "learning_rate": 9.49240008385384e-05, "loss": 0.0086, "step": 8022 }, { "epoch": 1.5528250773993808, "grad_norm": 0.04572425037622452, "learning_rate": 9.492273481507794e-05, "loss": 0.0083, "step": 8023 }, { "epoch": 1.553018575851393, "grad_norm": 0.10254263877868652, "learning_rate": 9.492146864319457e-05, "loss": 0.008, "step": 8024 }, { "epoch": 1.5532120743034055, "grad_norm": 0.04168553650379181, "learning_rate": 9.492020232289304e-05, "loss": 0.0077, "step": 8025 }, { "epoch": 1.553405572755418, "grad_norm": 0.08811270445585251, "learning_rate": 9.491893585417801e-05, "loss": 0.0065, "step": 8026 }, { "epoch": 1.5535990712074303, "grad_norm": 0.05389225110411644, "learning_rate": 9.491766923705421e-05, "loss": 0.0077, "step": 8027 }, { "epoch": 1.5537925696594428, "grad_norm": 0.041485320776700974, "learning_rate": 9.491640247152637e-05, "loss": 0.0097, "step": 8028 }, { "epoch": 1.5539860681114552, "grad_norm": 0.036061905324459076, "learning_rate": 9.491513555759917e-05, "loss": 0.0085, "step": 8029 }, { "epoch": 1.5541795665634675, "grad_norm": 0.03932877257466316, "learning_rate": 9.491386849527734e-05, "loss": 0.0068, "step": 8030 }, { "epoch": 1.55437306501548, "grad_norm": 0.03171107918024063, "learning_rate": 9.491260128456558e-05, "loss": 0.0069, "step": 8031 }, { "epoch": 1.5545665634674921, "grad_norm": 0.04368280991911888, "learning_rate": 9.491133392546862e-05, "loss": 0.0081, "step": 8032 }, { "epoch": 1.5547600619195046, "grad_norm": 0.024433212354779243, "learning_rate": 9.491006641799113e-05, "loss": 0.0068, "step": 8033 }, { "epoch": 1.554953560371517, "grad_norm": 0.06796266883611679, "learning_rate": 9.490879876213785e-05, "loss": 0.0083, "step": 8034 }, { "epoch": 1.5551470588235294, "grad_norm": 0.039739977568387985, "learning_rate": 9.490753095791349e-05, "loss": 0.0097, "step": 8035 }, { "epoch": 1.5553405572755419, "grad_norm": 0.0766085684299469, "learning_rate": 9.490626300532278e-05, "loss": 0.0069, "step": 8036 }, { "epoch": 1.5555340557275543, "grad_norm": 0.06473656743764877, "learning_rate": 9.490499490437041e-05, "loss": 0.0082, "step": 8037 }, { "epoch": 1.5557275541795665, "grad_norm": 0.05551265925168991, "learning_rate": 9.490372665506108e-05, "loss": 0.0078, "step": 8038 }, { "epoch": 1.555921052631579, "grad_norm": 0.04163483902812004, "learning_rate": 9.490245825739955e-05, "loss": 0.0088, "step": 8039 }, { "epoch": 1.5561145510835912, "grad_norm": 0.041823118925094604, "learning_rate": 9.490118971139051e-05, "loss": 0.0094, "step": 8040 }, { "epoch": 1.5563080495356036, "grad_norm": 0.047210343182086945, "learning_rate": 9.489992101703866e-05, "loss": 0.008, "step": 8041 }, { "epoch": 1.556501547987616, "grad_norm": 0.03420048579573631, "learning_rate": 9.489865217434876e-05, "loss": 0.0081, "step": 8042 }, { "epoch": 1.5566950464396285, "grad_norm": 0.05611487850546837, "learning_rate": 9.489738318332548e-05, "loss": 0.0069, "step": 8043 }, { "epoch": 1.556888544891641, "grad_norm": 0.03959234431385994, "learning_rate": 9.489611404397359e-05, "loss": 0.0073, "step": 8044 }, { "epoch": 1.5570820433436534, "grad_norm": 0.03334904834628105, "learning_rate": 9.489484475629774e-05, "loss": 0.0075, "step": 8045 }, { "epoch": 1.5572755417956656, "grad_norm": 0.05183635279536247, "learning_rate": 9.489357532030272e-05, "loss": 0.0104, "step": 8046 }, { "epoch": 1.557469040247678, "grad_norm": 0.05222121998667717, "learning_rate": 9.489230573599319e-05, "loss": 0.0081, "step": 8047 }, { "epoch": 1.5576625386996903, "grad_norm": 0.04425608739256859, "learning_rate": 9.489103600337392e-05, "loss": 0.009, "step": 8048 }, { "epoch": 1.5578560371517027, "grad_norm": 0.07298066467046738, "learning_rate": 9.48897661224496e-05, "loss": 0.009, "step": 8049 }, { "epoch": 1.5580495356037152, "grad_norm": 0.06547907739877701, "learning_rate": 9.488849609322495e-05, "loss": 0.0078, "step": 8050 }, { "epoch": 1.5582430340557276, "grad_norm": 0.048679664731025696, "learning_rate": 9.488722591570471e-05, "loss": 0.0079, "step": 8051 }, { "epoch": 1.55843653250774, "grad_norm": 0.08571010082960129, "learning_rate": 9.48859555898936e-05, "loss": 0.008, "step": 8052 }, { "epoch": 1.5586300309597523, "grad_norm": 0.049031175673007965, "learning_rate": 9.488468511579631e-05, "loss": 0.0089, "step": 8053 }, { "epoch": 1.5588235294117647, "grad_norm": 0.06499996781349182, "learning_rate": 9.488341449341762e-05, "loss": 0.0078, "step": 8054 }, { "epoch": 1.559017027863777, "grad_norm": 0.03675942122936249, "learning_rate": 9.488214372276221e-05, "loss": 0.0102, "step": 8055 }, { "epoch": 1.5592105263157894, "grad_norm": 0.06680519133806229, "learning_rate": 9.488087280383481e-05, "loss": 0.0096, "step": 8056 }, { "epoch": 1.5594040247678018, "grad_norm": 0.05398508533835411, "learning_rate": 9.487960173664015e-05, "loss": 0.0072, "step": 8057 }, { "epoch": 1.5595975232198143, "grad_norm": 0.03436832129955292, "learning_rate": 9.487833052118297e-05, "loss": 0.0078, "step": 8058 }, { "epoch": 1.5597910216718267, "grad_norm": 0.04624847695231438, "learning_rate": 9.487705915746798e-05, "loss": 0.0082, "step": 8059 }, { "epoch": 1.5599845201238391, "grad_norm": 0.0359567292034626, "learning_rate": 9.48757876454999e-05, "loss": 0.0085, "step": 8060 }, { "epoch": 1.5601780185758514, "grad_norm": 0.038361456245183945, "learning_rate": 9.487451598528347e-05, "loss": 0.0078, "step": 8061 }, { "epoch": 1.5603715170278638, "grad_norm": 0.033986154943704605, "learning_rate": 9.487324417682343e-05, "loss": 0.0079, "step": 8062 }, { "epoch": 1.560565015479876, "grad_norm": 0.05261331424117088, "learning_rate": 9.487197222012448e-05, "loss": 0.007, "step": 8063 }, { "epoch": 1.5607585139318885, "grad_norm": 0.042641930282115936, "learning_rate": 9.487070011519136e-05, "loss": 0.0101, "step": 8064 }, { "epoch": 1.560952012383901, "grad_norm": 0.07273413240909576, "learning_rate": 9.486942786202881e-05, "loss": 0.0063, "step": 8065 }, { "epoch": 1.5611455108359134, "grad_norm": 0.04531269893050194, "learning_rate": 9.486815546064154e-05, "loss": 0.0073, "step": 8066 }, { "epoch": 1.5613390092879258, "grad_norm": 0.06725657731294632, "learning_rate": 9.48668829110343e-05, "loss": 0.0082, "step": 8067 }, { "epoch": 1.5615325077399382, "grad_norm": 0.048273541033267975, "learning_rate": 9.486561021321182e-05, "loss": 0.0081, "step": 8068 }, { "epoch": 1.5617260061919505, "grad_norm": 0.05209926515817642, "learning_rate": 9.486433736717881e-05, "loss": 0.008, "step": 8069 }, { "epoch": 1.561919504643963, "grad_norm": 0.04937409609556198, "learning_rate": 9.486306437294002e-05, "loss": 0.0082, "step": 8070 }, { "epoch": 1.5621130030959751, "grad_norm": 0.04662098363041878, "learning_rate": 9.486179123050017e-05, "loss": 0.0094, "step": 8071 }, { "epoch": 1.5623065015479876, "grad_norm": 0.08233410120010376, "learning_rate": 9.486051793986402e-05, "loss": 0.007, "step": 8072 }, { "epoch": 1.5625, "grad_norm": 0.04084150865674019, "learning_rate": 9.485924450103627e-05, "loss": 0.0085, "step": 8073 }, { "epoch": 1.5626934984520124, "grad_norm": 0.08003229647874832, "learning_rate": 9.485797091402168e-05, "loss": 0.009, "step": 8074 }, { "epoch": 1.5628869969040249, "grad_norm": 0.062371671199798584, "learning_rate": 9.4856697178825e-05, "loss": 0.0082, "step": 8075 }, { "epoch": 1.563080495356037, "grad_norm": 0.08885921537876129, "learning_rate": 9.485542329545091e-05, "loss": 0.0072, "step": 8076 }, { "epoch": 1.5632739938080495, "grad_norm": 0.04431550204753876, "learning_rate": 9.485414926390418e-05, "loss": 0.0079, "step": 8077 }, { "epoch": 1.5634674922600618, "grad_norm": 0.05131885036826134, "learning_rate": 9.485287508418954e-05, "loss": 0.0088, "step": 8078 }, { "epoch": 1.5636609907120742, "grad_norm": 0.04562634229660034, "learning_rate": 9.485160075631174e-05, "loss": 0.0078, "step": 8079 }, { "epoch": 1.5638544891640866, "grad_norm": 0.039943404495716095, "learning_rate": 9.485032628027549e-05, "loss": 0.0085, "step": 8080 }, { "epoch": 1.564047987616099, "grad_norm": 0.04962730035185814, "learning_rate": 9.484905165608559e-05, "loss": 0.0092, "step": 8081 }, { "epoch": 1.5642414860681115, "grad_norm": 0.04090309888124466, "learning_rate": 9.48477768837467e-05, "loss": 0.007, "step": 8082 }, { "epoch": 1.564434984520124, "grad_norm": 0.048326682299375534, "learning_rate": 9.484650196326359e-05, "loss": 0.0083, "step": 8083 }, { "epoch": 1.5646284829721362, "grad_norm": 0.06900910288095474, "learning_rate": 9.484522689464103e-05, "loss": 0.0078, "step": 8084 }, { "epoch": 1.5648219814241486, "grad_norm": 0.05747582018375397, "learning_rate": 9.48439516778837e-05, "loss": 0.0086, "step": 8085 }, { "epoch": 1.5650154798761609, "grad_norm": 0.07063981890678406, "learning_rate": 9.48426763129964e-05, "loss": 0.0086, "step": 8086 }, { "epoch": 1.5652089783281733, "grad_norm": 0.07036508619785309, "learning_rate": 9.484140079998384e-05, "loss": 0.0087, "step": 8087 }, { "epoch": 1.5654024767801857, "grad_norm": 0.06855539232492447, "learning_rate": 9.484012513885077e-05, "loss": 0.0082, "step": 8088 }, { "epoch": 1.5655959752321982, "grad_norm": 0.0820731520652771, "learning_rate": 9.483884932960193e-05, "loss": 0.0078, "step": 8089 }, { "epoch": 1.5657894736842106, "grad_norm": 0.05151795595884323, "learning_rate": 9.483757337224207e-05, "loss": 0.0078, "step": 8090 }, { "epoch": 1.565982972136223, "grad_norm": 0.10660082846879959, "learning_rate": 9.483629726677592e-05, "loss": 0.0068, "step": 8091 }, { "epoch": 1.5661764705882353, "grad_norm": 0.04783312976360321, "learning_rate": 9.483502101320823e-05, "loss": 0.0088, "step": 8092 }, { "epoch": 1.5663699690402477, "grad_norm": 0.06356653571128845, "learning_rate": 9.483374461154377e-05, "loss": 0.0064, "step": 8093 }, { "epoch": 1.56656346749226, "grad_norm": 0.042330507189035416, "learning_rate": 9.483246806178725e-05, "loss": 0.0092, "step": 8094 }, { "epoch": 1.5667569659442724, "grad_norm": 0.09912639111280441, "learning_rate": 9.483119136394342e-05, "loss": 0.0085, "step": 8095 }, { "epoch": 1.5669504643962848, "grad_norm": 0.05041906610131264, "learning_rate": 9.482991451801704e-05, "loss": 0.0073, "step": 8096 }, { "epoch": 1.5671439628482973, "grad_norm": 0.10988561064004898, "learning_rate": 9.482863752401285e-05, "loss": 0.0067, "step": 8097 }, { "epoch": 1.5673374613003097, "grad_norm": 0.053785305470228195, "learning_rate": 9.48273603819356e-05, "loss": 0.0079, "step": 8098 }, { "epoch": 1.567530959752322, "grad_norm": 0.07078594714403152, "learning_rate": 9.482608309179006e-05, "loss": 0.0076, "step": 8099 }, { "epoch": 1.5677244582043344, "grad_norm": 0.17104342579841614, "learning_rate": 9.482480565358093e-05, "loss": 0.0096, "step": 8100 }, { "epoch": 1.5679179566563466, "grad_norm": 0.1078423485159874, "learning_rate": 9.482352806731298e-05, "loss": 0.0108, "step": 8101 }, { "epoch": 1.568111455108359, "grad_norm": 0.1548309177160263, "learning_rate": 9.4822250332991e-05, "loss": 0.0094, "step": 8102 }, { "epoch": 1.5683049535603715, "grad_norm": 0.05843197554349899, "learning_rate": 9.482097245061967e-05, "loss": 0.0091, "step": 8103 }, { "epoch": 1.568498452012384, "grad_norm": 0.16165605187416077, "learning_rate": 9.48196944202038e-05, "loss": 0.0082, "step": 8104 }, { "epoch": 1.5686919504643964, "grad_norm": 0.04812851548194885, "learning_rate": 9.48184162417481e-05, "loss": 0.0081, "step": 8105 }, { "epoch": 1.5688854489164088, "grad_norm": 0.12576758861541748, "learning_rate": 9.481713791525736e-05, "loss": 0.0097, "step": 8106 }, { "epoch": 1.569078947368421, "grad_norm": 0.09156397730112076, "learning_rate": 9.48158594407363e-05, "loss": 0.0098, "step": 8107 }, { "epoch": 1.5692724458204335, "grad_norm": 0.10603352636098862, "learning_rate": 9.481458081818969e-05, "loss": 0.007, "step": 8108 }, { "epoch": 1.5694659442724457, "grad_norm": 0.09588005393743515, "learning_rate": 9.481330204762227e-05, "loss": 0.0074, "step": 8109 }, { "epoch": 1.5696594427244581, "grad_norm": 0.06935854256153107, "learning_rate": 9.481202312903882e-05, "loss": 0.0083, "step": 8110 }, { "epoch": 1.5698529411764706, "grad_norm": 0.1070963442325592, "learning_rate": 9.481074406244406e-05, "loss": 0.0077, "step": 8111 }, { "epoch": 1.570046439628483, "grad_norm": 0.07394027709960938, "learning_rate": 9.480946484784276e-05, "loss": 0.0078, "step": 8112 }, { "epoch": 1.5702399380804954, "grad_norm": 0.11950667947530746, "learning_rate": 9.480818548523968e-05, "loss": 0.0086, "step": 8113 }, { "epoch": 1.5704334365325079, "grad_norm": 0.1005353182554245, "learning_rate": 9.48069059746396e-05, "loss": 0.0073, "step": 8114 }, { "epoch": 1.57062693498452, "grad_norm": 0.12446407228708267, "learning_rate": 9.480562631604724e-05, "loss": 0.0092, "step": 8115 }, { "epoch": 1.5708204334365325, "grad_norm": 0.09722420573234558, "learning_rate": 9.480434650946736e-05, "loss": 0.0073, "step": 8116 }, { "epoch": 1.5710139318885448, "grad_norm": 0.1042386069893837, "learning_rate": 9.480306655490474e-05, "loss": 0.0067, "step": 8117 }, { "epoch": 1.5712074303405572, "grad_norm": 0.0748324766755104, "learning_rate": 9.480178645236412e-05, "loss": 0.0065, "step": 8118 }, { "epoch": 1.5714009287925697, "grad_norm": 0.10399923473596573, "learning_rate": 9.480050620185027e-05, "loss": 0.0073, "step": 8119 }, { "epoch": 1.571594427244582, "grad_norm": 0.07225265353918076, "learning_rate": 9.479922580336793e-05, "loss": 0.0081, "step": 8120 }, { "epoch": 1.5717879256965945, "grad_norm": 0.12299969792366028, "learning_rate": 9.479794525692188e-05, "loss": 0.0082, "step": 8121 }, { "epoch": 1.571981424148607, "grad_norm": 0.046628646552562714, "learning_rate": 9.479666456251689e-05, "loss": 0.0082, "step": 8122 }, { "epoch": 1.5721749226006192, "grad_norm": 0.15658344328403473, "learning_rate": 9.47953837201577e-05, "loss": 0.0079, "step": 8123 }, { "epoch": 1.5723684210526314, "grad_norm": 0.05956602096557617, "learning_rate": 9.479410272984908e-05, "loss": 0.0068, "step": 8124 }, { "epoch": 1.5725619195046439, "grad_norm": 0.11079688370227814, "learning_rate": 9.479282159159579e-05, "loss": 0.0119, "step": 8125 }, { "epoch": 1.5727554179566563, "grad_norm": 0.09477414935827255, "learning_rate": 9.47915403054026e-05, "loss": 0.0093, "step": 8126 }, { "epoch": 1.5729489164086687, "grad_norm": 0.026822682470083237, "learning_rate": 9.479025887127426e-05, "loss": 0.0067, "step": 8127 }, { "epoch": 1.5731424148606812, "grad_norm": 0.08618377894163132, "learning_rate": 9.478897728921554e-05, "loss": 0.0091, "step": 8128 }, { "epoch": 1.5733359133126936, "grad_norm": 0.08764629811048508, "learning_rate": 9.478769555923121e-05, "loss": 0.0084, "step": 8129 }, { "epoch": 1.5735294117647058, "grad_norm": 0.08219926804304123, "learning_rate": 9.478641368132604e-05, "loss": 0.0079, "step": 8130 }, { "epoch": 1.5737229102167183, "grad_norm": 0.10743959248065948, "learning_rate": 9.478513165550477e-05, "loss": 0.0091, "step": 8131 }, { "epoch": 1.5739164086687305, "grad_norm": 0.05544554069638252, "learning_rate": 9.47838494817722e-05, "loss": 0.0069, "step": 8132 }, { "epoch": 1.574109907120743, "grad_norm": 0.11302915960550308, "learning_rate": 9.478256716013307e-05, "loss": 0.0078, "step": 8133 }, { "epoch": 1.5743034055727554, "grad_norm": 0.04132683947682381, "learning_rate": 9.478128469059216e-05, "loss": 0.007, "step": 8134 }, { "epoch": 1.5744969040247678, "grad_norm": 0.10387057811021805, "learning_rate": 9.478000207315425e-05, "loss": 0.0094, "step": 8135 }, { "epoch": 1.5746904024767803, "grad_norm": 0.08892592042684555, "learning_rate": 9.477871930782409e-05, "loss": 0.0073, "step": 8136 }, { "epoch": 1.5748839009287927, "grad_norm": 0.07108133286237717, "learning_rate": 9.477743639460645e-05, "loss": 0.0069, "step": 8137 }, { "epoch": 1.575077399380805, "grad_norm": 0.07922070473432541, "learning_rate": 9.477615333350609e-05, "loss": 0.0105, "step": 8138 }, { "epoch": 1.5752708978328174, "grad_norm": 0.07204879075288773, "learning_rate": 9.477487012452782e-05, "loss": 0.0054, "step": 8139 }, { "epoch": 1.5754643962848296, "grad_norm": 0.08115408569574356, "learning_rate": 9.477358676767635e-05, "loss": 0.0077, "step": 8140 }, { "epoch": 1.575657894736842, "grad_norm": 0.09099629521369934, "learning_rate": 9.47723032629565e-05, "loss": 0.009, "step": 8141 }, { "epoch": 1.5758513931888545, "grad_norm": 0.07458017021417618, "learning_rate": 9.477101961037302e-05, "loss": 0.008, "step": 8142 }, { "epoch": 1.576044891640867, "grad_norm": 0.10306863486766815, "learning_rate": 9.47697358099307e-05, "loss": 0.0082, "step": 8143 }, { "epoch": 1.5762383900928794, "grad_norm": 0.06714153289794922, "learning_rate": 9.47684518616343e-05, "loss": 0.008, "step": 8144 }, { "epoch": 1.5764318885448918, "grad_norm": 0.07697994261980057, "learning_rate": 9.476716776548859e-05, "loss": 0.0082, "step": 8145 }, { "epoch": 1.576625386996904, "grad_norm": 0.07521915435791016, "learning_rate": 9.476588352149837e-05, "loss": 0.0091, "step": 8146 }, { "epoch": 1.5768188854489165, "grad_norm": 0.06752217561006546, "learning_rate": 9.476459912966837e-05, "loss": 0.0074, "step": 8147 }, { "epoch": 1.5770123839009287, "grad_norm": 0.09464633464813232, "learning_rate": 9.47633145900034e-05, "loss": 0.0084, "step": 8148 }, { "epoch": 1.5772058823529411, "grad_norm": 0.09165576845407486, "learning_rate": 9.476202990250822e-05, "loss": 0.0087, "step": 8149 }, { "epoch": 1.5773993808049536, "grad_norm": 0.10030325502157211, "learning_rate": 9.47607450671876e-05, "loss": 0.0089, "step": 8150 }, { "epoch": 1.577592879256966, "grad_norm": 0.06178080290555954, "learning_rate": 9.475946008404633e-05, "loss": 0.0077, "step": 8151 }, { "epoch": 1.5777863777089784, "grad_norm": 0.08960381150245667, "learning_rate": 9.47581749530892e-05, "loss": 0.0079, "step": 8152 }, { "epoch": 1.5779798761609907, "grad_norm": 0.07739061117172241, "learning_rate": 9.475688967432096e-05, "loss": 0.0088, "step": 8153 }, { "epoch": 1.578173374613003, "grad_norm": 0.09507149457931519, "learning_rate": 9.47556042477464e-05, "loss": 0.0096, "step": 8154 }, { "epoch": 1.5783668730650153, "grad_norm": 0.08189931511878967, "learning_rate": 9.47543186733703e-05, "loss": 0.0078, "step": 8155 }, { "epoch": 1.5785603715170278, "grad_norm": 0.07952503114938736, "learning_rate": 9.475303295119745e-05, "loss": 0.0079, "step": 8156 }, { "epoch": 1.5787538699690402, "grad_norm": 0.044034507125616074, "learning_rate": 9.475174708123261e-05, "loss": 0.0091, "step": 8157 }, { "epoch": 1.5789473684210527, "grad_norm": 0.08332768827676773, "learning_rate": 9.475046106348057e-05, "loss": 0.0093, "step": 8158 }, { "epoch": 1.579140866873065, "grad_norm": 0.04284977540373802, "learning_rate": 9.47491748979461e-05, "loss": 0.0082, "step": 8159 }, { "epoch": 1.5793343653250775, "grad_norm": 0.05455051362514496, "learning_rate": 9.474788858463402e-05, "loss": 0.007, "step": 8160 }, { "epoch": 1.5795278637770898, "grad_norm": 0.04768051579594612, "learning_rate": 9.474660212354905e-05, "loss": 0.0069, "step": 8161 }, { "epoch": 1.5797213622291022, "grad_norm": 0.04396706074476242, "learning_rate": 9.474531551469604e-05, "loss": 0.0086, "step": 8162 }, { "epoch": 1.5799148606811144, "grad_norm": 0.06991332769393921, "learning_rate": 9.474402875807971e-05, "loss": 0.0075, "step": 8163 }, { "epoch": 1.5801083591331269, "grad_norm": 0.05828458443284035, "learning_rate": 9.47427418537049e-05, "loss": 0.0069, "step": 8164 }, { "epoch": 1.5803018575851393, "grad_norm": 0.07079343497753143, "learning_rate": 9.474145480157637e-05, "loss": 0.0069, "step": 8165 }, { "epoch": 1.5804953560371517, "grad_norm": 0.06327178329229355, "learning_rate": 9.474016760169888e-05, "loss": 0.0079, "step": 8166 }, { "epoch": 1.5806888544891642, "grad_norm": 0.046990640461444855, "learning_rate": 9.473888025407726e-05, "loss": 0.0088, "step": 8167 }, { "epoch": 1.5808823529411766, "grad_norm": 0.10220085829496384, "learning_rate": 9.473759275871625e-05, "loss": 0.0097, "step": 8168 }, { "epoch": 1.5810758513931888, "grad_norm": 0.050499603152275085, "learning_rate": 9.473630511562069e-05, "loss": 0.0083, "step": 8169 }, { "epoch": 1.5812693498452013, "grad_norm": 0.07217169553041458, "learning_rate": 9.473501732479533e-05, "loss": 0.0087, "step": 8170 }, { "epoch": 1.5814628482972135, "grad_norm": 0.05641527101397514, "learning_rate": 9.473372938624495e-05, "loss": 0.0074, "step": 8171 }, { "epoch": 1.581656346749226, "grad_norm": 0.05414986237883568, "learning_rate": 9.473244129997437e-05, "loss": 0.0077, "step": 8172 }, { "epoch": 1.5818498452012384, "grad_norm": 0.062251221388578415, "learning_rate": 9.473115306598836e-05, "loss": 0.0092, "step": 8173 }, { "epoch": 1.5820433436532508, "grad_norm": 0.07782579958438873, "learning_rate": 9.472986468429172e-05, "loss": 0.0067, "step": 8174 }, { "epoch": 1.5822368421052633, "grad_norm": 0.08410988003015518, "learning_rate": 9.472857615488921e-05, "loss": 0.008, "step": 8175 }, { "epoch": 1.5824303405572755, "grad_norm": 0.06360882520675659, "learning_rate": 9.472728747778567e-05, "loss": 0.0085, "step": 8176 }, { "epoch": 1.582623839009288, "grad_norm": 0.08283273875713348, "learning_rate": 9.472599865298585e-05, "loss": 0.0089, "step": 8177 }, { "epoch": 1.5828173374613002, "grad_norm": 0.03941705450415611, "learning_rate": 9.472470968049456e-05, "loss": 0.0079, "step": 8178 }, { "epoch": 1.5830108359133126, "grad_norm": 0.04475453495979309, "learning_rate": 9.47234205603166e-05, "loss": 0.0071, "step": 8179 }, { "epoch": 1.583204334365325, "grad_norm": 0.034606434404850006, "learning_rate": 9.472213129245673e-05, "loss": 0.0079, "step": 8180 }, { "epoch": 1.5833978328173375, "grad_norm": 0.05920004844665527, "learning_rate": 9.472084187691977e-05, "loss": 0.0068, "step": 8181 }, { "epoch": 1.58359133126935, "grad_norm": 0.05026818439364433, "learning_rate": 9.47195523137105e-05, "loss": 0.0079, "step": 8182 }, { "epoch": 1.5837848297213624, "grad_norm": 0.051331669092178345, "learning_rate": 9.471826260283373e-05, "loss": 0.0076, "step": 8183 }, { "epoch": 1.5839783281733746, "grad_norm": 0.0541381910443306, "learning_rate": 9.471697274429425e-05, "loss": 0.0076, "step": 8184 }, { "epoch": 1.584171826625387, "grad_norm": 0.08257875591516495, "learning_rate": 9.471568273809686e-05, "loss": 0.0077, "step": 8185 }, { "epoch": 1.5843653250773992, "grad_norm": 0.07421277463436127, "learning_rate": 9.471439258424634e-05, "loss": 0.008, "step": 8186 }, { "epoch": 1.5845588235294117, "grad_norm": 0.09180045127868652, "learning_rate": 9.471310228274748e-05, "loss": 0.0082, "step": 8187 }, { "epoch": 1.5847523219814241, "grad_norm": 0.11066817492246628, "learning_rate": 9.471181183360511e-05, "loss": 0.0083, "step": 8188 }, { "epoch": 1.5849458204334366, "grad_norm": 0.07112789154052734, "learning_rate": 9.4710521236824e-05, "loss": 0.0083, "step": 8189 }, { "epoch": 1.585139318885449, "grad_norm": 0.1009746640920639, "learning_rate": 9.470923049240895e-05, "loss": 0.0093, "step": 8190 }, { "epoch": 1.5853328173374615, "grad_norm": 0.12598873674869537, "learning_rate": 9.470793960036477e-05, "loss": 0.0069, "step": 8191 }, { "epoch": 1.5855263157894737, "grad_norm": 0.08747267723083496, "learning_rate": 9.470664856069626e-05, "loss": 0.0065, "step": 8192 }, { "epoch": 1.5857198142414861, "grad_norm": 0.12011539191007614, "learning_rate": 9.47053573734082e-05, "loss": 0.0082, "step": 8193 }, { "epoch": 1.5859133126934983, "grad_norm": 0.0821397602558136, "learning_rate": 9.470406603850544e-05, "loss": 0.0079, "step": 8194 }, { "epoch": 1.5861068111455108, "grad_norm": 0.09138520061969757, "learning_rate": 9.470277455599272e-05, "loss": 0.0089, "step": 8195 }, { "epoch": 1.5863003095975232, "grad_norm": 0.087612085044384, "learning_rate": 9.470148292587487e-05, "loss": 0.0086, "step": 8196 }, { "epoch": 1.5864938080495357, "grad_norm": 0.0828513503074646, "learning_rate": 9.470019114815671e-05, "loss": 0.0079, "step": 8197 }, { "epoch": 1.586687306501548, "grad_norm": 0.09624151885509491, "learning_rate": 9.469889922284299e-05, "loss": 0.007, "step": 8198 }, { "epoch": 1.5868808049535603, "grad_norm": 0.08691149204969406, "learning_rate": 9.469760714993857e-05, "loss": 0.0088, "step": 8199 }, { "epoch": 1.5870743034055728, "grad_norm": 0.12051602452993393, "learning_rate": 9.469631492944819e-05, "loss": 0.008, "step": 8200 }, { "epoch": 1.587267801857585, "grad_norm": 0.060644928365945816, "learning_rate": 9.469502256137673e-05, "loss": 0.0077, "step": 8201 }, { "epoch": 1.5874613003095974, "grad_norm": 0.10170493274927139, "learning_rate": 9.469373004572893e-05, "loss": 0.0079, "step": 8202 }, { "epoch": 1.5876547987616099, "grad_norm": 0.04362856224179268, "learning_rate": 9.469243738250965e-05, "loss": 0.0073, "step": 8203 }, { "epoch": 1.5878482972136223, "grad_norm": 0.07597973942756653, "learning_rate": 9.469114457172364e-05, "loss": 0.0071, "step": 8204 }, { "epoch": 1.5880417956656347, "grad_norm": 0.06971926987171173, "learning_rate": 9.468985161337575e-05, "loss": 0.0083, "step": 8205 }, { "epoch": 1.5882352941176472, "grad_norm": 0.08775925636291504, "learning_rate": 9.468855850747079e-05, "loss": 0.0074, "step": 8206 }, { "epoch": 1.5884287925696594, "grad_norm": 0.10544350743293762, "learning_rate": 9.468726525401352e-05, "loss": 0.009, "step": 8207 }, { "epoch": 1.5886222910216719, "grad_norm": 0.14385338127613068, "learning_rate": 9.468597185300879e-05, "loss": 0.0067, "step": 8208 }, { "epoch": 1.588815789473684, "grad_norm": 0.08981988579034805, "learning_rate": 9.468467830446139e-05, "loss": 0.007, "step": 8209 }, { "epoch": 1.5890092879256965, "grad_norm": 0.12610796093940735, "learning_rate": 9.468338460837614e-05, "loss": 0.0091, "step": 8210 }, { "epoch": 1.589202786377709, "grad_norm": 0.12488577514886856, "learning_rate": 9.468209076475785e-05, "loss": 0.0075, "step": 8211 }, { "epoch": 1.5893962848297214, "grad_norm": 0.08687546849250793, "learning_rate": 9.468079677361132e-05, "loss": 0.0083, "step": 8212 }, { "epoch": 1.5895897832817338, "grad_norm": 0.15050926804542542, "learning_rate": 9.467950263494134e-05, "loss": 0.0075, "step": 8213 }, { "epoch": 1.5897832817337463, "grad_norm": 0.044084325432777405, "learning_rate": 9.467820834875278e-05, "loss": 0.0063, "step": 8214 }, { "epoch": 1.5899767801857585, "grad_norm": 0.13646692037582397, "learning_rate": 9.467691391505041e-05, "loss": 0.009, "step": 8215 }, { "epoch": 1.590170278637771, "grad_norm": 0.05708907172083855, "learning_rate": 9.467561933383905e-05, "loss": 0.0067, "step": 8216 }, { "epoch": 1.5903637770897832, "grad_norm": 0.10948345065116882, "learning_rate": 9.467432460512351e-05, "loss": 0.0077, "step": 8217 }, { "epoch": 1.5905572755417956, "grad_norm": 0.07252856343984604, "learning_rate": 9.467302972890859e-05, "loss": 0.0071, "step": 8218 }, { "epoch": 1.590750773993808, "grad_norm": 0.09308111667633057, "learning_rate": 9.467173470519915e-05, "loss": 0.008, "step": 8219 }, { "epoch": 1.5909442724458205, "grad_norm": 0.09190770238637924, "learning_rate": 9.467043953399996e-05, "loss": 0.0088, "step": 8220 }, { "epoch": 1.591137770897833, "grad_norm": 0.07721931487321854, "learning_rate": 9.466914421531586e-05, "loss": 0.0091, "step": 8221 }, { "epoch": 1.5913312693498454, "grad_norm": 0.12177489697933197, "learning_rate": 9.466784874915164e-05, "loss": 0.008, "step": 8222 }, { "epoch": 1.5915247678018576, "grad_norm": 0.10815902054309845, "learning_rate": 9.466655313551216e-05, "loss": 0.0087, "step": 8223 }, { "epoch": 1.5917182662538698, "grad_norm": 0.1201247125864029, "learning_rate": 9.466525737440219e-05, "loss": 0.0072, "step": 8224 }, { "epoch": 1.5919117647058822, "grad_norm": 0.09717907011508942, "learning_rate": 9.466396146582656e-05, "loss": 0.0088, "step": 8225 }, { "epoch": 1.5921052631578947, "grad_norm": 0.10662346333265305, "learning_rate": 9.466266540979012e-05, "loss": 0.0103, "step": 8226 }, { "epoch": 1.5922987616099071, "grad_norm": 0.09517350792884827, "learning_rate": 9.466136920629765e-05, "loss": 0.0074, "step": 8227 }, { "epoch": 1.5924922600619196, "grad_norm": 0.12650461494922638, "learning_rate": 9.466007285535398e-05, "loss": 0.0085, "step": 8228 }, { "epoch": 1.592685758513932, "grad_norm": 0.10017276555299759, "learning_rate": 9.465877635696393e-05, "loss": 0.0069, "step": 8229 }, { "epoch": 1.5928792569659442, "grad_norm": 0.16753293573856354, "learning_rate": 9.465747971113233e-05, "loss": 0.0095, "step": 8230 }, { "epoch": 1.5930727554179567, "grad_norm": 0.11672589182853699, "learning_rate": 9.465618291786398e-05, "loss": 0.0078, "step": 8231 }, { "epoch": 1.593266253869969, "grad_norm": 0.1868564635515213, "learning_rate": 9.465488597716374e-05, "loss": 0.0087, "step": 8232 }, { "epoch": 1.5934597523219813, "grad_norm": 0.08300338685512543, "learning_rate": 9.465358888903636e-05, "loss": 0.0074, "step": 8233 }, { "epoch": 1.5936532507739938, "grad_norm": 0.1465172916650772, "learning_rate": 9.465229165348675e-05, "loss": 0.0077, "step": 8234 }, { "epoch": 1.5938467492260062, "grad_norm": 0.12069746851921082, "learning_rate": 9.465099427051967e-05, "loss": 0.0103, "step": 8235 }, { "epoch": 1.5940402476780187, "grad_norm": 0.22738324105739594, "learning_rate": 9.464969674013996e-05, "loss": 0.0096, "step": 8236 }, { "epoch": 1.594233746130031, "grad_norm": 0.17480897903442383, "learning_rate": 9.464839906235245e-05, "loss": 0.0074, "step": 8237 }, { "epoch": 1.5944272445820433, "grad_norm": 0.18818166851997375, "learning_rate": 9.464710123716197e-05, "loss": 0.0084, "step": 8238 }, { "epoch": 1.5946207430340558, "grad_norm": 0.2520710527896881, "learning_rate": 9.464580326457333e-05, "loss": 0.0098, "step": 8239 }, { "epoch": 1.594814241486068, "grad_norm": 0.10096201300621033, "learning_rate": 9.464450514459137e-05, "loss": 0.0104, "step": 8240 }, { "epoch": 1.5950077399380804, "grad_norm": 0.2773055136203766, "learning_rate": 9.464320687722089e-05, "loss": 0.0091, "step": 8241 }, { "epoch": 1.5952012383900929, "grad_norm": 0.1288902908563614, "learning_rate": 9.464190846246676e-05, "loss": 0.0078, "step": 8242 }, { "epoch": 1.5953947368421053, "grad_norm": 0.2161664515733719, "learning_rate": 9.464060990033376e-05, "loss": 0.0082, "step": 8243 }, { "epoch": 1.5955882352941178, "grad_norm": 0.196554034948349, "learning_rate": 9.463931119082677e-05, "loss": 0.0072, "step": 8244 }, { "epoch": 1.5957817337461302, "grad_norm": 0.23321957886219025, "learning_rate": 9.463801233395055e-05, "loss": 0.0102, "step": 8245 }, { "epoch": 1.5959752321981424, "grad_norm": 0.1180727556347847, "learning_rate": 9.463671332970999e-05, "loss": 0.007, "step": 8246 }, { "epoch": 1.5961687306501546, "grad_norm": 0.18591076135635376, "learning_rate": 9.463541417810989e-05, "loss": 0.0103, "step": 8247 }, { "epoch": 1.596362229102167, "grad_norm": 0.1578156054019928, "learning_rate": 9.463411487915509e-05, "loss": 0.0077, "step": 8248 }, { "epoch": 1.5965557275541795, "grad_norm": 0.11569185554981232, "learning_rate": 9.46328154328504e-05, "loss": 0.0085, "step": 8249 }, { "epoch": 1.596749226006192, "grad_norm": 0.13653108477592468, "learning_rate": 9.46315158392007e-05, "loss": 0.008, "step": 8250 }, { "epoch": 1.5969427244582044, "grad_norm": 0.09137631952762604, "learning_rate": 9.463021609821077e-05, "loss": 0.0078, "step": 8251 }, { "epoch": 1.5971362229102168, "grad_norm": 0.10321714729070663, "learning_rate": 9.462891620988548e-05, "loss": 0.0067, "step": 8252 }, { "epoch": 1.597329721362229, "grad_norm": 0.07277367264032364, "learning_rate": 9.462761617422961e-05, "loss": 0.0081, "step": 8253 }, { "epoch": 1.5975232198142415, "grad_norm": 0.1003563404083252, "learning_rate": 9.462631599124806e-05, "loss": 0.009, "step": 8254 }, { "epoch": 1.5977167182662537, "grad_norm": 0.07960667461156845, "learning_rate": 9.462501566094563e-05, "loss": 0.008, "step": 8255 }, { "epoch": 1.5979102167182662, "grad_norm": 0.11827599257230759, "learning_rate": 9.462371518332713e-05, "loss": 0.0073, "step": 8256 }, { "epoch": 1.5981037151702786, "grad_norm": 0.05007212609052658, "learning_rate": 9.462241455839743e-05, "loss": 0.0075, "step": 8257 }, { "epoch": 1.598297213622291, "grad_norm": 0.06306502968072891, "learning_rate": 9.462111378616138e-05, "loss": 0.0078, "step": 8258 }, { "epoch": 1.5984907120743035, "grad_norm": 0.09776685386896133, "learning_rate": 9.461981286662375e-05, "loss": 0.0088, "step": 8259 }, { "epoch": 1.598684210526316, "grad_norm": 0.12252594530582428, "learning_rate": 9.461851179978945e-05, "loss": 0.0102, "step": 8260 }, { "epoch": 1.5988777089783281, "grad_norm": 0.07437250763177872, "learning_rate": 9.461721058566328e-05, "loss": 0.009, "step": 8261 }, { "epoch": 1.5990712074303406, "grad_norm": 0.11102805286645889, "learning_rate": 9.461590922425008e-05, "loss": 0.0087, "step": 8262 }, { "epoch": 1.5992647058823528, "grad_norm": 0.0838155746459961, "learning_rate": 9.461460771555469e-05, "loss": 0.0109, "step": 8263 }, { "epoch": 1.5994582043343653, "grad_norm": 0.10426004976034164, "learning_rate": 9.461330605958194e-05, "loss": 0.008, "step": 8264 }, { "epoch": 1.5996517027863777, "grad_norm": 0.05627962946891785, "learning_rate": 9.46120042563367e-05, "loss": 0.0084, "step": 8265 }, { "epoch": 1.5998452012383901, "grad_norm": 0.09872867912054062, "learning_rate": 9.461070230582378e-05, "loss": 0.0066, "step": 8266 }, { "epoch": 1.6000386996904026, "grad_norm": 0.08678635209798813, "learning_rate": 9.460940020804802e-05, "loss": 0.0066, "step": 8267 }, { "epoch": 1.600232198142415, "grad_norm": 0.06018806993961334, "learning_rate": 9.460809796301429e-05, "loss": 0.0096, "step": 8268 }, { "epoch": 1.6004256965944272, "grad_norm": 0.11999376863241196, "learning_rate": 9.460679557072739e-05, "loss": 0.0085, "step": 8269 }, { "epoch": 1.6006191950464397, "grad_norm": 0.060718975961208344, "learning_rate": 9.460549303119219e-05, "loss": 0.0083, "step": 8270 }, { "epoch": 1.600812693498452, "grad_norm": 0.0735970064997673, "learning_rate": 9.460419034441353e-05, "loss": 0.0098, "step": 8271 }, { "epoch": 1.6010061919504643, "grad_norm": 0.051114771515131, "learning_rate": 9.460288751039624e-05, "loss": 0.0078, "step": 8272 }, { "epoch": 1.6011996904024768, "grad_norm": 0.05780276283621788, "learning_rate": 9.460158452914519e-05, "loss": 0.0095, "step": 8273 }, { "epoch": 1.6013931888544892, "grad_norm": 0.03788237273693085, "learning_rate": 9.460028140066519e-05, "loss": 0.0079, "step": 8274 }, { "epoch": 1.6015866873065017, "grad_norm": 0.1009465679526329, "learning_rate": 9.459897812496112e-05, "loss": 0.0085, "step": 8275 }, { "epoch": 1.6017801857585139, "grad_norm": 0.04628709703683853, "learning_rate": 9.459767470203777e-05, "loss": 0.0086, "step": 8276 }, { "epoch": 1.6019736842105263, "grad_norm": 0.09856130927801132, "learning_rate": 9.459637113190006e-05, "loss": 0.0076, "step": 8277 }, { "epoch": 1.6021671826625385, "grad_norm": 0.0741828978061676, "learning_rate": 9.459506741455278e-05, "loss": 0.0092, "step": 8278 }, { "epoch": 1.602360681114551, "grad_norm": 0.09897240251302719, "learning_rate": 9.459376355000081e-05, "loss": 0.0084, "step": 8279 }, { "epoch": 1.6025541795665634, "grad_norm": 0.0734822005033493, "learning_rate": 9.459245953824898e-05, "loss": 0.0082, "step": 8280 }, { "epoch": 1.6027476780185759, "grad_norm": 0.09465844184160233, "learning_rate": 9.459115537930212e-05, "loss": 0.0089, "step": 8281 }, { "epoch": 1.6029411764705883, "grad_norm": 0.09033786505460739, "learning_rate": 9.458985107316512e-05, "loss": 0.0079, "step": 8282 }, { "epoch": 1.6031346749226008, "grad_norm": 0.09293514490127563, "learning_rate": 9.458854661984281e-05, "loss": 0.0078, "step": 8283 }, { "epoch": 1.603328173374613, "grad_norm": 0.10630608350038528, "learning_rate": 9.458724201934004e-05, "loss": 0.0095, "step": 8284 }, { "epoch": 1.6035216718266254, "grad_norm": 0.09486532211303711, "learning_rate": 9.458593727166164e-05, "loss": 0.0105, "step": 8285 }, { "epoch": 1.6037151702786376, "grad_norm": 0.10332910716533661, "learning_rate": 9.45846323768125e-05, "loss": 0.009, "step": 8286 }, { "epoch": 1.60390866873065, "grad_norm": 0.09459476172924042, "learning_rate": 9.458332733479744e-05, "loss": 0.0096, "step": 8287 }, { "epoch": 1.6041021671826625, "grad_norm": 0.0748007744550705, "learning_rate": 9.458202214562133e-05, "loss": 0.0088, "step": 8288 }, { "epoch": 1.604295665634675, "grad_norm": 0.07695846259593964, "learning_rate": 9.458071680928901e-05, "loss": 0.0072, "step": 8289 }, { "epoch": 1.6044891640866874, "grad_norm": 0.09076713025569916, "learning_rate": 9.457941132580533e-05, "loss": 0.0086, "step": 8290 }, { "epoch": 1.6046826625386998, "grad_norm": 0.09222011268138885, "learning_rate": 9.457810569517516e-05, "loss": 0.0103, "step": 8291 }, { "epoch": 1.604876160990712, "grad_norm": 0.09932563453912735, "learning_rate": 9.457679991740335e-05, "loss": 0.0082, "step": 8292 }, { "epoch": 1.6050696594427245, "grad_norm": 0.10281407088041306, "learning_rate": 9.457549399249475e-05, "loss": 0.0071, "step": 8293 }, { "epoch": 1.6052631578947367, "grad_norm": 0.09384650737047195, "learning_rate": 9.45741879204542e-05, "loss": 0.0071, "step": 8294 }, { "epoch": 1.6054566563467492, "grad_norm": 0.07643796503543854, "learning_rate": 9.457288170128658e-05, "loss": 0.0091, "step": 8295 }, { "epoch": 1.6056501547987616, "grad_norm": 0.08472352474927902, "learning_rate": 9.457157533499673e-05, "loss": 0.0073, "step": 8296 }, { "epoch": 1.605843653250774, "grad_norm": 0.08691368252038956, "learning_rate": 9.457026882158954e-05, "loss": 0.0081, "step": 8297 }, { "epoch": 1.6060371517027865, "grad_norm": 0.10754160583019257, "learning_rate": 9.45689621610698e-05, "loss": 0.0076, "step": 8298 }, { "epoch": 1.6062306501547987, "grad_norm": 0.08812445402145386, "learning_rate": 9.456765535344244e-05, "loss": 0.0069, "step": 8299 }, { "epoch": 1.6064241486068112, "grad_norm": 0.12407616525888443, "learning_rate": 9.456634839871228e-05, "loss": 0.0082, "step": 8300 }, { "epoch": 1.6066176470588234, "grad_norm": 0.04434734210371971, "learning_rate": 9.456504129688418e-05, "loss": 0.0081, "step": 8301 }, { "epoch": 1.6068111455108358, "grad_norm": 0.07790905982255936, "learning_rate": 9.456373404796302e-05, "loss": 0.0101, "step": 8302 }, { "epoch": 1.6070046439628483, "grad_norm": 0.09007450938224792, "learning_rate": 9.456242665195361e-05, "loss": 0.008, "step": 8303 }, { "epoch": 1.6071981424148607, "grad_norm": 0.0514695905148983, "learning_rate": 9.456111910886087e-05, "loss": 0.0087, "step": 8304 }, { "epoch": 1.6073916408668731, "grad_norm": 0.06631261855363846, "learning_rate": 9.455981141868965e-05, "loss": 0.0066, "step": 8305 }, { "epoch": 1.6075851393188856, "grad_norm": 0.06362583488225937, "learning_rate": 9.455850358144479e-05, "loss": 0.0076, "step": 8306 }, { "epoch": 1.6077786377708978, "grad_norm": 0.04512138292193413, "learning_rate": 9.455719559713115e-05, "loss": 0.0086, "step": 8307 }, { "epoch": 1.6079721362229102, "grad_norm": 0.05255966633558273, "learning_rate": 9.455588746575361e-05, "loss": 0.008, "step": 8308 }, { "epoch": 1.6081656346749225, "grad_norm": 0.06631321460008621, "learning_rate": 9.455457918731703e-05, "loss": 0.0074, "step": 8309 }, { "epoch": 1.608359133126935, "grad_norm": 0.05743557587265968, "learning_rate": 9.455327076182628e-05, "loss": 0.008, "step": 8310 }, { "epoch": 1.6085526315789473, "grad_norm": 0.06498150527477264, "learning_rate": 9.45519621892862e-05, "loss": 0.0082, "step": 8311 }, { "epoch": 1.6087461300309598, "grad_norm": 0.06407064199447632, "learning_rate": 9.455065346970167e-05, "loss": 0.0086, "step": 8312 }, { "epoch": 1.6089396284829722, "grad_norm": 0.05913359671831131, "learning_rate": 9.454934460307757e-05, "loss": 0.0082, "step": 8313 }, { "epoch": 1.6091331269349847, "grad_norm": 0.06336285173892975, "learning_rate": 9.454803558941873e-05, "loss": 0.007, "step": 8314 }, { "epoch": 1.609326625386997, "grad_norm": 0.06298960745334625, "learning_rate": 9.454672642873006e-05, "loss": 0.0069, "step": 8315 }, { "epoch": 1.6095201238390093, "grad_norm": 0.05802971497178078, "learning_rate": 9.454541712101641e-05, "loss": 0.0076, "step": 8316 }, { "epoch": 1.6097136222910216, "grad_norm": 0.0411049984395504, "learning_rate": 9.454410766628262e-05, "loss": 0.0083, "step": 8317 }, { "epoch": 1.609907120743034, "grad_norm": 0.05249180644750595, "learning_rate": 9.45427980645336e-05, "loss": 0.0093, "step": 8318 }, { "epoch": 1.6101006191950464, "grad_norm": 0.048558369278907776, "learning_rate": 9.454148831577418e-05, "loss": 0.0086, "step": 8319 }, { "epoch": 1.6102941176470589, "grad_norm": 0.04210183769464493, "learning_rate": 9.454017842000928e-05, "loss": 0.0074, "step": 8320 }, { "epoch": 1.6104876160990713, "grad_norm": 0.05336516723036766, "learning_rate": 9.453886837724371e-05, "loss": 0.0091, "step": 8321 }, { "epoch": 1.6106811145510835, "grad_norm": 0.06728692352771759, "learning_rate": 9.453755818748238e-05, "loss": 0.008, "step": 8322 }, { "epoch": 1.610874613003096, "grad_norm": 0.06465265154838562, "learning_rate": 9.453624785073016e-05, "loss": 0.0082, "step": 8323 }, { "epoch": 1.6110681114551082, "grad_norm": 0.07102291285991669, "learning_rate": 9.453493736699189e-05, "loss": 0.0095, "step": 8324 }, { "epoch": 1.6112616099071206, "grad_norm": 0.04934288188815117, "learning_rate": 9.453362673627248e-05, "loss": 0.0082, "step": 8325 }, { "epoch": 1.611455108359133, "grad_norm": 0.10283064842224121, "learning_rate": 9.45323159585768e-05, "loss": 0.0083, "step": 8326 }, { "epoch": 1.6116486068111455, "grad_norm": 0.0754886046051979, "learning_rate": 9.45310050339097e-05, "loss": 0.0067, "step": 8327 }, { "epoch": 1.611842105263158, "grad_norm": 0.07416516542434692, "learning_rate": 9.452969396227606e-05, "loss": 0.0086, "step": 8328 }, { "epoch": 1.6120356037151704, "grad_norm": 0.10355900973081589, "learning_rate": 9.452838274368076e-05, "loss": 0.0081, "step": 8329 }, { "epoch": 1.6122291021671826, "grad_norm": 0.059576861560344696, "learning_rate": 9.452707137812868e-05, "loss": 0.0079, "step": 8330 }, { "epoch": 1.612422600619195, "grad_norm": 0.08643396198749542, "learning_rate": 9.452575986562468e-05, "loss": 0.007, "step": 8331 }, { "epoch": 1.6126160990712073, "grad_norm": 0.042280685156583786, "learning_rate": 9.452444820617364e-05, "loss": 0.0094, "step": 8332 }, { "epoch": 1.6128095975232197, "grad_norm": 0.0767904743552208, "learning_rate": 9.452313639978044e-05, "loss": 0.0073, "step": 8333 }, { "epoch": 1.6130030959752322, "grad_norm": 0.05250341817736626, "learning_rate": 9.452182444644996e-05, "loss": 0.0087, "step": 8334 }, { "epoch": 1.6131965944272446, "grad_norm": 0.10960526764392853, "learning_rate": 9.452051234618709e-05, "loss": 0.0072, "step": 8335 }, { "epoch": 1.613390092879257, "grad_norm": 0.0396258644759655, "learning_rate": 9.451920009899668e-05, "loss": 0.0067, "step": 8336 }, { "epoch": 1.6135835913312695, "grad_norm": 0.10857275128364563, "learning_rate": 9.451788770488362e-05, "loss": 0.0065, "step": 8337 }, { "epoch": 1.6137770897832817, "grad_norm": 0.0708044096827507, "learning_rate": 9.45165751638528e-05, "loss": 0.0084, "step": 8338 }, { "epoch": 1.6139705882352942, "grad_norm": 0.11281556636095047, "learning_rate": 9.451526247590909e-05, "loss": 0.0088, "step": 8339 }, { "epoch": 1.6141640866873064, "grad_norm": 0.09716250747442245, "learning_rate": 9.451394964105737e-05, "loss": 0.0078, "step": 8340 }, { "epoch": 1.6143575851393188, "grad_norm": 0.06359777599573135, "learning_rate": 9.45126366593025e-05, "loss": 0.0093, "step": 8341 }, { "epoch": 1.6145510835913313, "grad_norm": 0.11573940515518188, "learning_rate": 9.45113235306494e-05, "loss": 0.0088, "step": 8342 }, { "epoch": 1.6147445820433437, "grad_norm": 0.05390915647149086, "learning_rate": 9.451001025510293e-05, "loss": 0.0066, "step": 8343 }, { "epoch": 1.6149380804953561, "grad_norm": 0.09314031898975372, "learning_rate": 9.4508696832668e-05, "loss": 0.0076, "step": 8344 }, { "epoch": 1.6151315789473686, "grad_norm": 0.08666804432868958, "learning_rate": 9.450738326334945e-05, "loss": 0.0067, "step": 8345 }, { "epoch": 1.6153250773993808, "grad_norm": 0.043108370155096054, "learning_rate": 9.450606954715218e-05, "loss": 0.0066, "step": 8346 }, { "epoch": 1.615518575851393, "grad_norm": 0.09283420443534851, "learning_rate": 9.45047556840811e-05, "loss": 0.007, "step": 8347 }, { "epoch": 1.6157120743034055, "grad_norm": 0.0666709840297699, "learning_rate": 9.450344167414104e-05, "loss": 0.0087, "step": 8348 }, { "epoch": 1.615905572755418, "grad_norm": 0.10735543817281723, "learning_rate": 9.450212751733695e-05, "loss": 0.0077, "step": 8349 }, { "epoch": 1.6160990712074303, "grad_norm": 0.09150989353656769, "learning_rate": 9.450081321367367e-05, "loss": 0.0077, "step": 8350 }, { "epoch": 1.6162925696594428, "grad_norm": 0.10915807634592056, "learning_rate": 9.449949876315609e-05, "loss": 0.007, "step": 8351 }, { "epoch": 1.6164860681114552, "grad_norm": 0.09703879803419113, "learning_rate": 9.449818416578912e-05, "loss": 0.007, "step": 8352 }, { "epoch": 1.6166795665634675, "grad_norm": 0.08570529520511627, "learning_rate": 9.449686942157763e-05, "loss": 0.0073, "step": 8353 }, { "epoch": 1.61687306501548, "grad_norm": 0.102229043841362, "learning_rate": 9.449555453052651e-05, "loss": 0.0088, "step": 8354 }, { "epoch": 1.6170665634674921, "grad_norm": 0.06866627186536789, "learning_rate": 9.449423949264065e-05, "loss": 0.0083, "step": 8355 }, { "epoch": 1.6172600619195046, "grad_norm": 0.10414288192987442, "learning_rate": 9.449292430792493e-05, "loss": 0.007, "step": 8356 }, { "epoch": 1.617453560371517, "grad_norm": 0.08446738868951797, "learning_rate": 9.449160897638427e-05, "loss": 0.0093, "step": 8357 }, { "epoch": 1.6176470588235294, "grad_norm": 0.08818414062261581, "learning_rate": 9.449029349802352e-05, "loss": 0.0071, "step": 8358 }, { "epoch": 1.6178405572755419, "grad_norm": 0.07630929350852966, "learning_rate": 9.44889778728476e-05, "loss": 0.0091, "step": 8359 }, { "epoch": 1.6180340557275543, "grad_norm": 0.085090272128582, "learning_rate": 9.448766210086139e-05, "loss": 0.0089, "step": 8360 }, { "epoch": 1.6182275541795665, "grad_norm": 0.05062360689043999, "learning_rate": 9.448634618206979e-05, "loss": 0.008, "step": 8361 }, { "epoch": 1.618421052631579, "grad_norm": 0.0824255496263504, "learning_rate": 9.448503011647767e-05, "loss": 0.0079, "step": 8362 }, { "epoch": 1.6186145510835912, "grad_norm": 0.06071262061595917, "learning_rate": 9.448371390408993e-05, "loss": 0.007, "step": 8363 }, { "epoch": 1.6188080495356036, "grad_norm": 0.07848266512155533, "learning_rate": 9.44823975449115e-05, "loss": 0.0082, "step": 8364 }, { "epoch": 1.619001547987616, "grad_norm": 0.08989310264587402, "learning_rate": 9.448108103894722e-05, "loss": 0.008, "step": 8365 }, { "epoch": 1.6191950464396285, "grad_norm": 0.05145605653524399, "learning_rate": 9.447976438620201e-05, "loss": 0.0092, "step": 8366 }, { "epoch": 1.619388544891641, "grad_norm": 0.07459837198257446, "learning_rate": 9.447844758668077e-05, "loss": 0.0093, "step": 8367 }, { "epoch": 1.6195820433436534, "grad_norm": 0.07318005710840225, "learning_rate": 9.44771306403884e-05, "loss": 0.0078, "step": 8368 }, { "epoch": 1.6197755417956656, "grad_norm": 0.07543099671602249, "learning_rate": 9.447581354732977e-05, "loss": 0.008, "step": 8369 }, { "epoch": 1.619969040247678, "grad_norm": 0.08856178820133209, "learning_rate": 9.447449630750981e-05, "loss": 0.0067, "step": 8370 }, { "epoch": 1.6201625386996903, "grad_norm": 0.05209856107831001, "learning_rate": 9.447317892093338e-05, "loss": 0.0092, "step": 8371 }, { "epoch": 1.6203560371517027, "grad_norm": 0.09112729877233505, "learning_rate": 9.447186138760539e-05, "loss": 0.0084, "step": 8372 }, { "epoch": 1.6205495356037152, "grad_norm": 0.04853861406445503, "learning_rate": 9.447054370753078e-05, "loss": 0.0089, "step": 8373 }, { "epoch": 1.6207430340557276, "grad_norm": 0.06269556283950806, "learning_rate": 9.44692258807144e-05, "loss": 0.0083, "step": 8374 }, { "epoch": 1.62093653250774, "grad_norm": 0.046553730964660645, "learning_rate": 9.446790790716114e-05, "loss": 0.0108, "step": 8375 }, { "epoch": 1.6211300309597523, "grad_norm": 0.04485248774290085, "learning_rate": 9.446658978687595e-05, "loss": 0.0077, "step": 8376 }, { "epoch": 1.6213235294117647, "grad_norm": 0.04069061577320099, "learning_rate": 9.446527151986368e-05, "loss": 0.0103, "step": 8377 }, { "epoch": 1.621517027863777, "grad_norm": 0.0739077776670456, "learning_rate": 9.446395310612928e-05, "loss": 0.0079, "step": 8378 }, { "epoch": 1.6217105263157894, "grad_norm": 0.12667891383171082, "learning_rate": 9.446263454567762e-05, "loss": 0.0087, "step": 8379 }, { "epoch": 1.6219040247678018, "grad_norm": 0.05789491534233093, "learning_rate": 9.44613158385136e-05, "loss": 0.0093, "step": 8380 }, { "epoch": 1.6220975232198143, "grad_norm": 0.12735670804977417, "learning_rate": 9.445999698464214e-05, "loss": 0.0087, "step": 8381 }, { "epoch": 1.6222910216718267, "grad_norm": 0.08640504628419876, "learning_rate": 9.445867798406813e-05, "loss": 0.0078, "step": 8382 }, { "epoch": 1.6224845201238391, "grad_norm": 0.12339068204164505, "learning_rate": 9.445735883679648e-05, "loss": 0.0079, "step": 8383 }, { "epoch": 1.6226780185758514, "grad_norm": 0.19376538693904877, "learning_rate": 9.44560395428321e-05, "loss": 0.0077, "step": 8384 }, { "epoch": 1.6228715170278638, "grad_norm": 0.20633959770202637, "learning_rate": 9.445472010217987e-05, "loss": 0.0084, "step": 8385 }, { "epoch": 1.623065015479876, "grad_norm": 0.14907506108283997, "learning_rate": 9.445340051484473e-05, "loss": 0.0076, "step": 8386 }, { "epoch": 1.6232585139318885, "grad_norm": 0.06379641592502594, "learning_rate": 9.445208078083155e-05, "loss": 0.0068, "step": 8387 }, { "epoch": 1.623452012383901, "grad_norm": 0.23936685919761658, "learning_rate": 9.445076090014526e-05, "loss": 0.0105, "step": 8388 }, { "epoch": 1.6236455108359134, "grad_norm": 0.11990450322628021, "learning_rate": 9.444944087279078e-05, "loss": 0.0077, "step": 8389 }, { "epoch": 1.6238390092879258, "grad_norm": 0.19913652539253235, "learning_rate": 9.444812069877299e-05, "loss": 0.0088, "step": 8390 }, { "epoch": 1.6240325077399382, "grad_norm": 0.1462908238172531, "learning_rate": 9.44468003780968e-05, "loss": 0.0084, "step": 8391 }, { "epoch": 1.6242260061919505, "grad_norm": 0.11791616678237915, "learning_rate": 9.444547991076713e-05, "loss": 0.0096, "step": 8392 }, { "epoch": 1.624419504643963, "grad_norm": 0.15995801985263824, "learning_rate": 9.444415929678889e-05, "loss": 0.0083, "step": 8393 }, { "epoch": 1.6246130030959751, "grad_norm": 0.0639842227101326, "learning_rate": 9.444283853616698e-05, "loss": 0.0071, "step": 8394 }, { "epoch": 1.6248065015479876, "grad_norm": 0.1419372260570526, "learning_rate": 9.444151762890632e-05, "loss": 0.0081, "step": 8395 }, { "epoch": 1.625, "grad_norm": 0.06250724196434021, "learning_rate": 9.44401965750118e-05, "loss": 0.0067, "step": 8396 }, { "epoch": 1.6251934984520124, "grad_norm": 0.11191496253013611, "learning_rate": 9.443887537448837e-05, "loss": 0.008, "step": 8397 }, { "epoch": 1.6253869969040249, "grad_norm": 0.06454847753047943, "learning_rate": 9.44375540273409e-05, "loss": 0.0077, "step": 8398 }, { "epoch": 1.625580495356037, "grad_norm": 0.09510614722967148, "learning_rate": 9.443623253357433e-05, "loss": 0.0077, "step": 8399 }, { "epoch": 1.6257739938080495, "grad_norm": 0.06258632242679596, "learning_rate": 9.443491089319357e-05, "loss": 0.0075, "step": 8400 }, { "epoch": 1.6259674922600618, "grad_norm": 0.10068574547767639, "learning_rate": 9.443358910620353e-05, "loss": 0.0088, "step": 8401 }, { "epoch": 1.6261609907120742, "grad_norm": 0.088511623442173, "learning_rate": 9.44322671726091e-05, "loss": 0.0085, "step": 8402 }, { "epoch": 1.6263544891640866, "grad_norm": 0.09089316427707672, "learning_rate": 9.443094509241523e-05, "loss": 0.0082, "step": 8403 }, { "epoch": 1.626547987616099, "grad_norm": 0.04824870079755783, "learning_rate": 9.442962286562683e-05, "loss": 0.0074, "step": 8404 }, { "epoch": 1.6267414860681115, "grad_norm": 0.10872020572423935, "learning_rate": 9.44283004922488e-05, "loss": 0.0095, "step": 8405 }, { "epoch": 1.626934984520124, "grad_norm": 0.1289917528629303, "learning_rate": 9.442697797228607e-05, "loss": 0.0073, "step": 8406 }, { "epoch": 1.6271284829721362, "grad_norm": 0.07629555463790894, "learning_rate": 9.442565530574354e-05, "loss": 0.0074, "step": 8407 }, { "epoch": 1.6273219814241486, "grad_norm": 0.12527351081371307, "learning_rate": 9.442433249262615e-05, "loss": 0.0071, "step": 8408 }, { "epoch": 1.6275154798761609, "grad_norm": 0.08405584841966629, "learning_rate": 9.442300953293881e-05, "loss": 0.0089, "step": 8409 }, { "epoch": 1.6277089783281733, "grad_norm": 0.09105195850133896, "learning_rate": 9.442168642668642e-05, "loss": 0.0085, "step": 8410 }, { "epoch": 1.6279024767801857, "grad_norm": 0.11035740375518799, "learning_rate": 9.442036317387393e-05, "loss": 0.0071, "step": 8411 }, { "epoch": 1.6280959752321982, "grad_norm": 0.06515467166900635, "learning_rate": 9.441903977450622e-05, "loss": 0.0076, "step": 8412 }, { "epoch": 1.6282894736842106, "grad_norm": 0.113587886095047, "learning_rate": 9.441771622858827e-05, "loss": 0.0072, "step": 8413 }, { "epoch": 1.628482972136223, "grad_norm": 0.046112433075904846, "learning_rate": 9.441639253612491e-05, "loss": 0.0084, "step": 8414 }, { "epoch": 1.6286764705882353, "grad_norm": 0.11478756368160248, "learning_rate": 9.441506869712116e-05, "loss": 0.0078, "step": 8415 }, { "epoch": 1.6288699690402477, "grad_norm": 0.06629331409931183, "learning_rate": 9.441374471158188e-05, "loss": 0.0092, "step": 8416 }, { "epoch": 1.62906346749226, "grad_norm": 0.10918378084897995, "learning_rate": 9.441242057951202e-05, "loss": 0.0081, "step": 8417 }, { "epoch": 1.6292569659442724, "grad_norm": 0.06786619126796722, "learning_rate": 9.441109630091647e-05, "loss": 0.0057, "step": 8418 }, { "epoch": 1.6294504643962848, "grad_norm": 0.10186953842639923, "learning_rate": 9.440977187580018e-05, "loss": 0.0089, "step": 8419 }, { "epoch": 1.6296439628482973, "grad_norm": 0.04575927555561066, "learning_rate": 9.440844730416809e-05, "loss": 0.0082, "step": 8420 }, { "epoch": 1.6298374613003097, "grad_norm": 0.10922187566757202, "learning_rate": 9.440712258602509e-05, "loss": 0.0084, "step": 8421 }, { "epoch": 1.630030959752322, "grad_norm": 0.04494202882051468, "learning_rate": 9.440579772137613e-05, "loss": 0.0071, "step": 8422 }, { "epoch": 1.6302244582043344, "grad_norm": 0.08841775357723236, "learning_rate": 9.44044727102261e-05, "loss": 0.0069, "step": 8423 }, { "epoch": 1.6304179566563466, "grad_norm": 0.0727020651102066, "learning_rate": 9.440314755257996e-05, "loss": 0.0073, "step": 8424 }, { "epoch": 1.630611455108359, "grad_norm": 0.0666603222489357, "learning_rate": 9.440182224844263e-05, "loss": 0.0075, "step": 8425 }, { "epoch": 1.6308049535603715, "grad_norm": 0.084202341735363, "learning_rate": 9.440049679781903e-05, "loss": 0.0069, "step": 8426 }, { "epoch": 1.630998452012384, "grad_norm": 0.05347198620438576, "learning_rate": 9.43991712007141e-05, "loss": 0.0074, "step": 8427 }, { "epoch": 1.6311919504643964, "grad_norm": 0.07001828402280807, "learning_rate": 9.439784545713275e-05, "loss": 0.0064, "step": 8428 }, { "epoch": 1.6313854489164088, "grad_norm": 0.0601249597966671, "learning_rate": 9.439651956707992e-05, "loss": 0.0089, "step": 8429 }, { "epoch": 1.631578947368421, "grad_norm": 0.11259544640779495, "learning_rate": 9.439519353056056e-05, "loss": 0.0081, "step": 8430 }, { "epoch": 1.6317724458204335, "grad_norm": 0.06652763485908508, "learning_rate": 9.439386734757954e-05, "loss": 0.0076, "step": 8431 }, { "epoch": 1.6319659442724457, "grad_norm": 0.10707983374595642, "learning_rate": 9.439254101814185e-05, "loss": 0.009, "step": 8432 }, { "epoch": 1.6321594427244581, "grad_norm": 0.08342597633600235, "learning_rate": 9.43912145422524e-05, "loss": 0.0092, "step": 8433 }, { "epoch": 1.6323529411764706, "grad_norm": 0.09510441869497299, "learning_rate": 9.438988791991611e-05, "loss": 0.008, "step": 8434 }, { "epoch": 1.632546439628483, "grad_norm": 0.055481623858213425, "learning_rate": 9.438856115113792e-05, "loss": 0.0093, "step": 8435 }, { "epoch": 1.6327399380804954, "grad_norm": 0.08867568522691727, "learning_rate": 9.438723423592279e-05, "loss": 0.0085, "step": 8436 }, { "epoch": 1.6329334365325079, "grad_norm": 0.04033998027443886, "learning_rate": 9.438590717427561e-05, "loss": 0.0093, "step": 8437 }, { "epoch": 1.63312693498452, "grad_norm": 0.08783403784036636, "learning_rate": 9.438457996620132e-05, "loss": 0.0071, "step": 8438 }, { "epoch": 1.6333204334365325, "grad_norm": 0.03652998059988022, "learning_rate": 9.438325261170489e-05, "loss": 0.0082, "step": 8439 }, { "epoch": 1.6335139318885448, "grad_norm": 0.05926986783742905, "learning_rate": 9.438192511079121e-05, "loss": 0.0069, "step": 8440 }, { "epoch": 1.6337074303405572, "grad_norm": 0.04713153839111328, "learning_rate": 9.438059746346526e-05, "loss": 0.0083, "step": 8441 }, { "epoch": 1.6339009287925697, "grad_norm": 0.06795237213373184, "learning_rate": 9.437926966973194e-05, "loss": 0.0081, "step": 8442 }, { "epoch": 1.634094427244582, "grad_norm": 0.047150593250989914, "learning_rate": 9.43779417295962e-05, "loss": 0.0086, "step": 8443 }, { "epoch": 1.6342879256965945, "grad_norm": 0.07421907782554626, "learning_rate": 9.437661364306298e-05, "loss": 0.0079, "step": 8444 }, { "epoch": 1.634481424148607, "grad_norm": 0.06125493720173836, "learning_rate": 9.437528541013719e-05, "loss": 0.0075, "step": 8445 }, { "epoch": 1.6346749226006192, "grad_norm": 0.07858404517173767, "learning_rate": 9.437395703082382e-05, "loss": 0.0083, "step": 8446 }, { "epoch": 1.6348684210526314, "grad_norm": 0.06681420654058456, "learning_rate": 9.437262850512777e-05, "loss": 0.0085, "step": 8447 }, { "epoch": 1.6350619195046439, "grad_norm": 0.07975056767463684, "learning_rate": 9.437129983305398e-05, "loss": 0.0076, "step": 8448 }, { "epoch": 1.6352554179566563, "grad_norm": 0.07886659353971481, "learning_rate": 9.43699710146074e-05, "loss": 0.0075, "step": 8449 }, { "epoch": 1.6354489164086687, "grad_norm": 0.07449764013290405, "learning_rate": 9.436864204979297e-05, "loss": 0.0085, "step": 8450 }, { "epoch": 1.6356424148606812, "grad_norm": 0.11025359481573105, "learning_rate": 9.436731293861564e-05, "loss": 0.0082, "step": 8451 }, { "epoch": 1.6358359133126936, "grad_norm": 0.09244431555271149, "learning_rate": 9.436598368108034e-05, "loss": 0.0101, "step": 8452 }, { "epoch": 1.6360294117647058, "grad_norm": 0.08348111808300018, "learning_rate": 9.4364654277192e-05, "loss": 0.0079, "step": 8453 }, { "epoch": 1.6362229102167183, "grad_norm": 0.06451191008090973, "learning_rate": 9.436332472695559e-05, "loss": 0.0084, "step": 8454 }, { "epoch": 1.6364164086687305, "grad_norm": 0.04931340366601944, "learning_rate": 9.436199503037603e-05, "loss": 0.008, "step": 8455 }, { "epoch": 1.636609907120743, "grad_norm": 0.04680771008133888, "learning_rate": 9.436066518745828e-05, "loss": 0.0081, "step": 8456 }, { "epoch": 1.6368034055727554, "grad_norm": 0.07052979618310928, "learning_rate": 9.435933519820725e-05, "loss": 0.008, "step": 8457 }, { "epoch": 1.6369969040247678, "grad_norm": 0.0444270595908165, "learning_rate": 9.435800506262794e-05, "loss": 0.0084, "step": 8458 }, { "epoch": 1.6371904024767803, "grad_norm": 0.10344308614730835, "learning_rate": 9.435667478072524e-05, "loss": 0.0071, "step": 8459 }, { "epoch": 1.6373839009287927, "grad_norm": 0.055252935737371445, "learning_rate": 9.435534435250415e-05, "loss": 0.0092, "step": 8460 }, { "epoch": 1.637577399380805, "grad_norm": 0.11640478670597076, "learning_rate": 9.435401377796957e-05, "loss": 0.0082, "step": 8461 }, { "epoch": 1.6377708978328174, "grad_norm": 0.038953278213739395, "learning_rate": 9.435268305712645e-05, "loss": 0.0083, "step": 8462 }, { "epoch": 1.6379643962848296, "grad_norm": 0.12716901302337646, "learning_rate": 9.435135218997977e-05, "loss": 0.0076, "step": 8463 }, { "epoch": 1.638157894736842, "grad_norm": 0.03196529299020767, "learning_rate": 9.435002117653445e-05, "loss": 0.0072, "step": 8464 }, { "epoch": 1.6383513931888545, "grad_norm": 0.11161752045154572, "learning_rate": 9.434869001679546e-05, "loss": 0.0082, "step": 8465 }, { "epoch": 1.638544891640867, "grad_norm": 0.1062694638967514, "learning_rate": 9.434735871076772e-05, "loss": 0.0084, "step": 8466 }, { "epoch": 1.6387383900928794, "grad_norm": 0.16163370013237, "learning_rate": 9.43460272584562e-05, "loss": 0.0093, "step": 8467 }, { "epoch": 1.6389318885448918, "grad_norm": 0.05390071123838425, "learning_rate": 9.434469565986586e-05, "loss": 0.0085, "step": 8468 }, { "epoch": 1.639125386996904, "grad_norm": 0.12425181269645691, "learning_rate": 9.434336391500162e-05, "loss": 0.0089, "step": 8469 }, { "epoch": 1.6393188854489165, "grad_norm": 0.04657990112900734, "learning_rate": 9.434203202386846e-05, "loss": 0.009, "step": 8470 }, { "epoch": 1.6395123839009287, "grad_norm": 0.1370939463376999, "learning_rate": 9.434069998647133e-05, "loss": 0.0087, "step": 8471 }, { "epoch": 1.6397058823529411, "grad_norm": 0.05892710015177727, "learning_rate": 9.433936780281513e-05, "loss": 0.009, "step": 8472 }, { "epoch": 1.6398993808049536, "grad_norm": 0.16878069937229156, "learning_rate": 9.433803547290489e-05, "loss": 0.0074, "step": 8473 }, { "epoch": 1.640092879256966, "grad_norm": 0.07809264212846756, "learning_rate": 9.43367029967455e-05, "loss": 0.0069, "step": 8474 }, { "epoch": 1.6402863777089784, "grad_norm": 0.14960478246212006, "learning_rate": 9.433537037434195e-05, "loss": 0.0087, "step": 8475 }, { "epoch": 1.6404798761609907, "grad_norm": 0.1273338794708252, "learning_rate": 9.433403760569922e-05, "loss": 0.0074, "step": 8476 }, { "epoch": 1.640673374613003, "grad_norm": 0.1310960352420807, "learning_rate": 9.433270469082218e-05, "loss": 0.0077, "step": 8477 }, { "epoch": 1.6408668730650153, "grad_norm": 0.14062346518039703, "learning_rate": 9.433137162971585e-05, "loss": 0.0101, "step": 8478 }, { "epoch": 1.6410603715170278, "grad_norm": 0.06153799593448639, "learning_rate": 9.43300384223852e-05, "loss": 0.0068, "step": 8479 }, { "epoch": 1.6412538699690402, "grad_norm": 0.12370466440916061, "learning_rate": 9.432870506883513e-05, "loss": 0.0084, "step": 8480 }, { "epoch": 1.6414473684210527, "grad_norm": 0.07878150790929794, "learning_rate": 9.432737156907064e-05, "loss": 0.0091, "step": 8481 }, { "epoch": 1.641640866873065, "grad_norm": 0.06748247891664505, "learning_rate": 9.432603792309666e-05, "loss": 0.0084, "step": 8482 }, { "epoch": 1.6418343653250775, "grad_norm": 0.21271854639053345, "learning_rate": 9.432470413091818e-05, "loss": 0.0078, "step": 8483 }, { "epoch": 1.6420278637770898, "grad_norm": 0.0719156339764595, "learning_rate": 9.432337019254011e-05, "loss": 0.0091, "step": 8484 }, { "epoch": 1.6422213622291022, "grad_norm": 0.19264426827430725, "learning_rate": 9.432203610796746e-05, "loss": 0.007, "step": 8485 }, { "epoch": 1.6424148606811144, "grad_norm": 0.12363529205322266, "learning_rate": 9.432070187720516e-05, "loss": 0.0071, "step": 8486 }, { "epoch": 1.6426083591331269, "grad_norm": 0.1132805272936821, "learning_rate": 9.431936750025819e-05, "loss": 0.0087, "step": 8487 }, { "epoch": 1.6428018575851393, "grad_norm": 0.20256221294403076, "learning_rate": 9.43180329771315e-05, "loss": 0.009, "step": 8488 }, { "epoch": 1.6429953560371517, "grad_norm": 0.059840843081474304, "learning_rate": 9.431669830783004e-05, "loss": 0.0094, "step": 8489 }, { "epoch": 1.6431888544891642, "grad_norm": 0.21418288350105286, "learning_rate": 9.431536349235879e-05, "loss": 0.0084, "step": 8490 }, { "epoch": 1.6433823529411766, "grad_norm": 0.11534133553504944, "learning_rate": 9.431402853072271e-05, "loss": 0.0073, "step": 8491 }, { "epoch": 1.6435758513931888, "grad_norm": 0.16691236197948456, "learning_rate": 9.431269342292675e-05, "loss": 0.0074, "step": 8492 }, { "epoch": 1.6437693498452013, "grad_norm": 0.17000532150268555, "learning_rate": 9.43113581689759e-05, "loss": 0.0095, "step": 8493 }, { "epoch": 1.6439628482972135, "grad_norm": 0.09038115292787552, "learning_rate": 9.431002276887508e-05, "loss": 0.0053, "step": 8494 }, { "epoch": 1.644156346749226, "grad_norm": 0.22040005028247833, "learning_rate": 9.430868722262931e-05, "loss": 0.0093, "step": 8495 }, { "epoch": 1.6443498452012384, "grad_norm": 0.09020227193832397, "learning_rate": 9.43073515302435e-05, "loss": 0.0075, "step": 8496 }, { "epoch": 1.6445433436532508, "grad_norm": 0.18241798877716064, "learning_rate": 9.430601569172265e-05, "loss": 0.008, "step": 8497 }, { "epoch": 1.6447368421052633, "grad_norm": 0.10108538717031479, "learning_rate": 9.430467970707172e-05, "loss": 0.0088, "step": 8498 }, { "epoch": 1.6449303405572755, "grad_norm": 0.18234442174434662, "learning_rate": 9.43033435762957e-05, "loss": 0.009, "step": 8499 }, { "epoch": 1.645123839009288, "grad_norm": 0.11819525063037872, "learning_rate": 9.430200729939951e-05, "loss": 0.009, "step": 8500 }, { "epoch": 1.6453173374613002, "grad_norm": 0.13559892773628235, "learning_rate": 9.430067087638814e-05, "loss": 0.0085, "step": 8501 }, { "epoch": 1.6455108359133126, "grad_norm": 0.09614525735378265, "learning_rate": 9.429933430726656e-05, "loss": 0.0104, "step": 8502 }, { "epoch": 1.645704334365325, "grad_norm": 0.17106999456882477, "learning_rate": 9.429799759203975e-05, "loss": 0.0084, "step": 8503 }, { "epoch": 1.6458978328173375, "grad_norm": 0.17499478161334991, "learning_rate": 9.429666073071266e-05, "loss": 0.0108, "step": 8504 }, { "epoch": 1.64609133126935, "grad_norm": 0.19098709523677826, "learning_rate": 9.429532372329027e-05, "loss": 0.0074, "step": 8505 }, { "epoch": 1.6462848297213624, "grad_norm": 0.26726290583610535, "learning_rate": 9.429398656977755e-05, "loss": 0.0098, "step": 8506 }, { "epoch": 1.6464783281733746, "grad_norm": 0.13152478635311127, "learning_rate": 9.429264927017946e-05, "loss": 0.0087, "step": 8507 }, { "epoch": 1.646671826625387, "grad_norm": 0.3033013939857483, "learning_rate": 9.429131182450099e-05, "loss": 0.0089, "step": 8508 }, { "epoch": 1.6468653250773992, "grad_norm": 0.11971736699342728, "learning_rate": 9.428997423274712e-05, "loss": 0.0101, "step": 8509 }, { "epoch": 1.6470588235294117, "grad_norm": 0.295062392950058, "learning_rate": 9.428863649492278e-05, "loss": 0.0088, "step": 8510 }, { "epoch": 1.6472523219814241, "grad_norm": 0.2063106894493103, "learning_rate": 9.4287298611033e-05, "loss": 0.0082, "step": 8511 }, { "epoch": 1.6474458204334366, "grad_norm": 0.17033003270626068, "learning_rate": 9.428596058108269e-05, "loss": 0.0081, "step": 8512 }, { "epoch": 1.647639318885449, "grad_norm": 0.2690364420413971, "learning_rate": 9.428462240507687e-05, "loss": 0.011, "step": 8513 }, { "epoch": 1.6478328173374615, "grad_norm": 0.0659160315990448, "learning_rate": 9.428328408302051e-05, "loss": 0.0073, "step": 8514 }, { "epoch": 1.6480263157894737, "grad_norm": 0.20413292944431305, "learning_rate": 9.428194561491859e-05, "loss": 0.0088, "step": 8515 }, { "epoch": 1.6482198142414861, "grad_norm": 0.15939854085445404, "learning_rate": 9.428060700077607e-05, "loss": 0.007, "step": 8516 }, { "epoch": 1.6484133126934983, "grad_norm": 0.12263717502355576, "learning_rate": 9.42792682405979e-05, "loss": 0.0074, "step": 8517 }, { "epoch": 1.6486068111455108, "grad_norm": 0.1567227840423584, "learning_rate": 9.427792933438913e-05, "loss": 0.008, "step": 8518 }, { "epoch": 1.6488003095975232, "grad_norm": 0.06181391701102257, "learning_rate": 9.427659028215467e-05, "loss": 0.0076, "step": 8519 }, { "epoch": 1.6489938080495357, "grad_norm": 0.10568960011005402, "learning_rate": 9.427525108389955e-05, "loss": 0.0084, "step": 8520 }, { "epoch": 1.649187306501548, "grad_norm": 0.11108551174402237, "learning_rate": 9.427391173962871e-05, "loss": 0.0083, "step": 8521 }, { "epoch": 1.6493808049535603, "grad_norm": 0.042463649064302444, "learning_rate": 9.427257224934715e-05, "loss": 0.0077, "step": 8522 }, { "epoch": 1.6495743034055728, "grad_norm": 0.09299187362194061, "learning_rate": 9.427123261305983e-05, "loss": 0.0073, "step": 8523 }, { "epoch": 1.649767801857585, "grad_norm": 0.043096210807561874, "learning_rate": 9.426989283077177e-05, "loss": 0.0088, "step": 8524 }, { "epoch": 1.6499613003095974, "grad_norm": 0.07628367096185684, "learning_rate": 9.42685529024879e-05, "loss": 0.0077, "step": 8525 }, { "epoch": 1.6501547987616099, "grad_norm": 0.044561129063367844, "learning_rate": 9.426721282821322e-05, "loss": 0.0068, "step": 8526 }, { "epoch": 1.6503482972136223, "grad_norm": 0.08289707452058792, "learning_rate": 9.426587260795274e-05, "loss": 0.0093, "step": 8527 }, { "epoch": 1.6505417956656347, "grad_norm": 0.03841182962059975, "learning_rate": 9.426453224171143e-05, "loss": 0.0093, "step": 8528 }, { "epoch": 1.6507352941176472, "grad_norm": 0.08608833700418472, "learning_rate": 9.426319172949422e-05, "loss": 0.0082, "step": 8529 }, { "epoch": 1.6509287925696594, "grad_norm": 0.0629851222038269, "learning_rate": 9.426185107130619e-05, "loss": 0.0079, "step": 8530 }, { "epoch": 1.6511222910216719, "grad_norm": 0.07878308743238449, "learning_rate": 9.426051026715224e-05, "loss": 0.0079, "step": 8531 }, { "epoch": 1.651315789473684, "grad_norm": 0.0796469897031784, "learning_rate": 9.42591693170374e-05, "loss": 0.0083, "step": 8532 }, { "epoch": 1.6515092879256965, "grad_norm": 0.10697147250175476, "learning_rate": 9.425782822096662e-05, "loss": 0.0082, "step": 8533 }, { "epoch": 1.651702786377709, "grad_norm": 0.06208378076553345, "learning_rate": 9.425648697894494e-05, "loss": 0.0079, "step": 8534 }, { "epoch": 1.6518962848297214, "grad_norm": 0.12213151156902313, "learning_rate": 9.425514559097729e-05, "loss": 0.0077, "step": 8535 }, { "epoch": 1.6520897832817338, "grad_norm": 0.06999287754297256, "learning_rate": 9.42538040570687e-05, "loss": 0.0082, "step": 8536 }, { "epoch": 1.6522832817337463, "grad_norm": 0.09973356127738953, "learning_rate": 9.425246237722413e-05, "loss": 0.0078, "step": 8537 }, { "epoch": 1.6524767801857585, "grad_norm": 0.03472357988357544, "learning_rate": 9.425112055144857e-05, "loss": 0.007, "step": 8538 }, { "epoch": 1.652670278637771, "grad_norm": 0.13591253757476807, "learning_rate": 9.424977857974702e-05, "loss": 0.0076, "step": 8539 }, { "epoch": 1.6528637770897832, "grad_norm": 0.05419883877038956, "learning_rate": 9.424843646212448e-05, "loss": 0.0087, "step": 8540 }, { "epoch": 1.6530572755417956, "grad_norm": 0.1371638923883438, "learning_rate": 9.42470941985859e-05, "loss": 0.0087, "step": 8541 }, { "epoch": 1.653250773993808, "grad_norm": 0.061972726136446, "learning_rate": 9.424575178913629e-05, "loss": 0.0073, "step": 8542 }, { "epoch": 1.6534442724458205, "grad_norm": 0.12280192971229553, "learning_rate": 9.424440923378067e-05, "loss": 0.0098, "step": 8543 }, { "epoch": 1.653637770897833, "grad_norm": 0.12128002196550369, "learning_rate": 9.4243066532524e-05, "loss": 0.0075, "step": 8544 }, { "epoch": 1.6538312693498454, "grad_norm": 0.08936598151922226, "learning_rate": 9.424172368537128e-05, "loss": 0.007, "step": 8545 }, { "epoch": 1.6540247678018576, "grad_norm": 0.1426437944173813, "learning_rate": 9.42403806923275e-05, "loss": 0.0079, "step": 8546 }, { "epoch": 1.6542182662538698, "grad_norm": 0.101002536714077, "learning_rate": 9.423903755339765e-05, "loss": 0.0086, "step": 8547 }, { "epoch": 1.6544117647058822, "grad_norm": 0.11781739443540573, "learning_rate": 9.423769426858671e-05, "loss": 0.0071, "step": 8548 }, { "epoch": 1.6546052631578947, "grad_norm": 0.12602929770946503, "learning_rate": 9.42363508378997e-05, "loss": 0.0083, "step": 8549 }, { "epoch": 1.6547987616099071, "grad_norm": 0.07063712924718857, "learning_rate": 9.423500726134163e-05, "loss": 0.0105, "step": 8550 }, { "epoch": 1.6549922600619196, "grad_norm": 0.15239915251731873, "learning_rate": 9.423366353891745e-05, "loss": 0.009, "step": 8551 }, { "epoch": 1.655185758513932, "grad_norm": 0.05360317602753639, "learning_rate": 9.423231967063218e-05, "loss": 0.008, "step": 8552 }, { "epoch": 1.6553792569659442, "grad_norm": 0.1220177561044693, "learning_rate": 9.423097565649082e-05, "loss": 0.0084, "step": 8553 }, { "epoch": 1.6555727554179567, "grad_norm": 0.07709437608718872, "learning_rate": 9.422963149649834e-05, "loss": 0.0074, "step": 8554 }, { "epoch": 1.655766253869969, "grad_norm": 0.10801517963409424, "learning_rate": 9.422828719065977e-05, "loss": 0.0093, "step": 8555 }, { "epoch": 1.6559597523219813, "grad_norm": 0.11972127854824066, "learning_rate": 9.422694273898009e-05, "loss": 0.0068, "step": 8556 }, { "epoch": 1.6561532507739938, "grad_norm": 0.15124203264713287, "learning_rate": 9.422559814146432e-05, "loss": 0.0073, "step": 8557 }, { "epoch": 1.6563467492260062, "grad_norm": 0.051245223730802536, "learning_rate": 9.422425339811743e-05, "loss": 0.0083, "step": 8558 }, { "epoch": 1.6565402476780187, "grad_norm": 0.15992261469364166, "learning_rate": 9.422290850894442e-05, "loss": 0.0093, "step": 8559 }, { "epoch": 1.656733746130031, "grad_norm": 0.05121754854917526, "learning_rate": 9.422156347395031e-05, "loss": 0.0075, "step": 8560 }, { "epoch": 1.6569272445820433, "grad_norm": 0.16065803170204163, "learning_rate": 9.422021829314009e-05, "loss": 0.0072, "step": 8561 }, { "epoch": 1.6571207430340558, "grad_norm": 0.08947202563285828, "learning_rate": 9.421887296651877e-05, "loss": 0.0078, "step": 8562 }, { "epoch": 1.657314241486068, "grad_norm": 0.14877671003341675, "learning_rate": 9.421752749409134e-05, "loss": 0.0082, "step": 8563 }, { "epoch": 1.6575077399380804, "grad_norm": 0.09966934472322464, "learning_rate": 9.42161818758628e-05, "loss": 0.0084, "step": 8564 }, { "epoch": 1.6577012383900929, "grad_norm": 0.1318804919719696, "learning_rate": 9.421483611183816e-05, "loss": 0.0079, "step": 8565 }, { "epoch": 1.6578947368421053, "grad_norm": 0.11770789325237274, "learning_rate": 9.421349020202242e-05, "loss": 0.0073, "step": 8566 }, { "epoch": 1.6580882352941178, "grad_norm": 0.11025054007768631, "learning_rate": 9.42121441464206e-05, "loss": 0.009, "step": 8567 }, { "epoch": 1.6582817337461302, "grad_norm": 0.10110291838645935, "learning_rate": 9.421079794503766e-05, "loss": 0.0076, "step": 8568 }, { "epoch": 1.6584752321981424, "grad_norm": 0.10342002660036087, "learning_rate": 9.420945159787867e-05, "loss": 0.0099, "step": 8569 }, { "epoch": 1.6586687306501546, "grad_norm": 0.11194349080324173, "learning_rate": 9.420810510494858e-05, "loss": 0.0076, "step": 8570 }, { "epoch": 1.658862229102167, "grad_norm": 0.11246005445718765, "learning_rate": 9.420675846625242e-05, "loss": 0.0075, "step": 8571 }, { "epoch": 1.6590557275541795, "grad_norm": 0.11974639445543289, "learning_rate": 9.42054116817952e-05, "loss": 0.0088, "step": 8572 }, { "epoch": 1.659249226006192, "grad_norm": 0.12789404392242432, "learning_rate": 9.420406475158193e-05, "loss": 0.0083, "step": 8573 }, { "epoch": 1.6594427244582044, "grad_norm": 0.11958345025777817, "learning_rate": 9.420271767561757e-05, "loss": 0.007, "step": 8574 }, { "epoch": 1.6596362229102168, "grad_norm": 0.1469038724899292, "learning_rate": 9.420137045390719e-05, "loss": 0.0088, "step": 8575 }, { "epoch": 1.659829721362229, "grad_norm": 0.09110637754201889, "learning_rate": 9.420002308645577e-05, "loss": 0.0081, "step": 8576 }, { "epoch": 1.6600232198142415, "grad_norm": 0.15264618396759033, "learning_rate": 9.419867557326833e-05, "loss": 0.0084, "step": 8577 }, { "epoch": 1.6602167182662537, "grad_norm": 0.0854489654302597, "learning_rate": 9.419732791434988e-05, "loss": 0.0084, "step": 8578 }, { "epoch": 1.6604102167182662, "grad_norm": 0.14823266863822937, "learning_rate": 9.419598010970539e-05, "loss": 0.0074, "step": 8579 }, { "epoch": 1.6606037151702786, "grad_norm": 0.079933300614357, "learning_rate": 9.419463215933993e-05, "loss": 0.0071, "step": 8580 }, { "epoch": 1.660797213622291, "grad_norm": 0.11310727149248123, "learning_rate": 9.41932840632585e-05, "loss": 0.0074, "step": 8581 }, { "epoch": 1.6609907120743035, "grad_norm": 0.0640186071395874, "learning_rate": 9.419193582146606e-05, "loss": 0.007, "step": 8582 }, { "epoch": 1.661184210526316, "grad_norm": 0.141593337059021, "learning_rate": 9.419058743396768e-05, "loss": 0.0097, "step": 8583 }, { "epoch": 1.6613777089783281, "grad_norm": 0.06726711988449097, "learning_rate": 9.418923890076835e-05, "loss": 0.0079, "step": 8584 }, { "epoch": 1.6615712074303406, "grad_norm": 0.1277599334716797, "learning_rate": 9.418789022187309e-05, "loss": 0.0095, "step": 8585 }, { "epoch": 1.6617647058823528, "grad_norm": 0.10531773418188095, "learning_rate": 9.418654139728691e-05, "loss": 0.0061, "step": 8586 }, { "epoch": 1.6619582043343653, "grad_norm": 0.11403029412031174, "learning_rate": 9.418519242701483e-05, "loss": 0.0077, "step": 8587 }, { "epoch": 1.6621517027863777, "grad_norm": 0.12358073890209198, "learning_rate": 9.418384331106185e-05, "loss": 0.0091, "step": 8588 }, { "epoch": 1.6623452012383901, "grad_norm": 0.08563679456710815, "learning_rate": 9.418249404943298e-05, "loss": 0.0094, "step": 8589 }, { "epoch": 1.6625386996904026, "grad_norm": 0.1425493359565735, "learning_rate": 9.418114464213328e-05, "loss": 0.0083, "step": 8590 }, { "epoch": 1.662732198142415, "grad_norm": 0.07123440504074097, "learning_rate": 9.417979508916772e-05, "loss": 0.0086, "step": 8591 }, { "epoch": 1.6629256965944272, "grad_norm": 0.07460541278123856, "learning_rate": 9.417844539054133e-05, "loss": 0.0079, "step": 8592 }, { "epoch": 1.6631191950464397, "grad_norm": 0.16051168739795685, "learning_rate": 9.417709554625918e-05, "loss": 0.0081, "step": 8593 }, { "epoch": 1.663312693498452, "grad_norm": 0.05704192817211151, "learning_rate": 9.41757455563262e-05, "loss": 0.0076, "step": 8594 }, { "epoch": 1.6635061919504643, "grad_norm": 0.18392132222652435, "learning_rate": 9.417439542074747e-05, "loss": 0.0101, "step": 8595 }, { "epoch": 1.6636996904024768, "grad_norm": 0.037271734327077866, "learning_rate": 9.417304513952797e-05, "loss": 0.008, "step": 8596 }, { "epoch": 1.6638931888544892, "grad_norm": 0.1714758425951004, "learning_rate": 9.417169471267275e-05, "loss": 0.0106, "step": 8597 }, { "epoch": 1.6640866873065017, "grad_norm": 0.06311741471290588, "learning_rate": 9.417034414018682e-05, "loss": 0.0071, "step": 8598 }, { "epoch": 1.6642801857585139, "grad_norm": 0.1336391419172287, "learning_rate": 9.416899342207522e-05, "loss": 0.0067, "step": 8599 }, { "epoch": 1.6644736842105263, "grad_norm": 0.09487518668174744, "learning_rate": 9.416764255834294e-05, "loss": 0.0085, "step": 8600 }, { "epoch": 1.6646671826625385, "grad_norm": 0.08134099096059799, "learning_rate": 9.416629154899501e-05, "loss": 0.0074, "step": 8601 }, { "epoch": 1.664860681114551, "grad_norm": 0.1227572038769722, "learning_rate": 9.416494039403646e-05, "loss": 0.0072, "step": 8602 }, { "epoch": 1.6650541795665634, "grad_norm": 0.06194669008255005, "learning_rate": 9.416358909347231e-05, "loss": 0.0065, "step": 8603 }, { "epoch": 1.6652476780185759, "grad_norm": 0.11024433374404907, "learning_rate": 9.41622376473076e-05, "loss": 0.0077, "step": 8604 }, { "epoch": 1.6654411764705883, "grad_norm": 0.07092869281768799, "learning_rate": 9.416088605554734e-05, "loss": 0.0073, "step": 8605 }, { "epoch": 1.6656346749226008, "grad_norm": 0.11534130573272705, "learning_rate": 9.415953431819654e-05, "loss": 0.0087, "step": 8606 }, { "epoch": 1.665828173374613, "grad_norm": 0.12010498344898224, "learning_rate": 9.415818243526026e-05, "loss": 0.0103, "step": 8607 }, { "epoch": 1.6660216718266254, "grad_norm": 0.10515346378087997, "learning_rate": 9.41568304067435e-05, "loss": 0.0093, "step": 8608 }, { "epoch": 1.6662151702786376, "grad_norm": 0.14512589573860168, "learning_rate": 9.415547823265128e-05, "loss": 0.0089, "step": 8609 }, { "epoch": 1.66640866873065, "grad_norm": 0.12525197863578796, "learning_rate": 9.415412591298866e-05, "loss": 0.0068, "step": 8610 }, { "epoch": 1.6666021671826625, "grad_norm": 0.0600111298263073, "learning_rate": 9.415277344776062e-05, "loss": 0.0076, "step": 8611 }, { "epoch": 1.666795665634675, "grad_norm": 0.1281513124704361, "learning_rate": 9.415142083697224e-05, "loss": 0.008, "step": 8612 }, { "epoch": 1.6669891640866874, "grad_norm": 0.056684281677007675, "learning_rate": 9.415006808062852e-05, "loss": 0.008, "step": 8613 }, { "epoch": 1.6671826625386998, "grad_norm": 0.10905113816261292, "learning_rate": 9.414871517873449e-05, "loss": 0.0068, "step": 8614 }, { "epoch": 1.667376160990712, "grad_norm": 0.11774375289678574, "learning_rate": 9.414736213129518e-05, "loss": 0.0082, "step": 8615 }, { "epoch": 1.6675696594427245, "grad_norm": 0.1511552780866623, "learning_rate": 9.414600893831564e-05, "loss": 0.0079, "step": 8616 }, { "epoch": 1.6677631578947367, "grad_norm": 0.11815701425075531, "learning_rate": 9.414465559980086e-05, "loss": 0.0078, "step": 8617 }, { "epoch": 1.6679566563467492, "grad_norm": 0.12178581953048706, "learning_rate": 9.414330211575591e-05, "loss": 0.0076, "step": 8618 }, { "epoch": 1.6681501547987616, "grad_norm": 0.15066833794116974, "learning_rate": 9.414194848618581e-05, "loss": 0.0072, "step": 8619 }, { "epoch": 1.668343653250774, "grad_norm": 0.06358233839273453, "learning_rate": 9.414059471109557e-05, "loss": 0.0083, "step": 8620 }, { "epoch": 1.6685371517027865, "grad_norm": 0.1771807223558426, "learning_rate": 9.413924079049026e-05, "loss": 0.0077, "step": 8621 }, { "epoch": 1.6687306501547987, "grad_norm": 0.055164337158203125, "learning_rate": 9.413788672437491e-05, "loss": 0.0082, "step": 8622 }, { "epoch": 1.6689241486068112, "grad_norm": 0.14591215550899506, "learning_rate": 9.413653251275452e-05, "loss": 0.008, "step": 8623 }, { "epoch": 1.6691176470588234, "grad_norm": 0.13427576422691345, "learning_rate": 9.413517815563416e-05, "loss": 0.0063, "step": 8624 }, { "epoch": 1.6693111455108358, "grad_norm": 0.07636242359876633, "learning_rate": 9.413382365301885e-05, "loss": 0.0075, "step": 8625 }, { "epoch": 1.6695046439628483, "grad_norm": 0.17642629146575928, "learning_rate": 9.413246900491361e-05, "loss": 0.008, "step": 8626 }, { "epoch": 1.6696981424148607, "grad_norm": 0.12282218784093857, "learning_rate": 9.41311142113235e-05, "loss": 0.0071, "step": 8627 }, { "epoch": 1.6698916408668731, "grad_norm": 0.16199487447738647, "learning_rate": 9.412975927225356e-05, "loss": 0.0076, "step": 8628 }, { "epoch": 1.6700851393188856, "grad_norm": 0.16212323307991028, "learning_rate": 9.41284041877088e-05, "loss": 0.0072, "step": 8629 }, { "epoch": 1.6702786377708978, "grad_norm": 0.06688322871923447, "learning_rate": 9.412704895769429e-05, "loss": 0.0078, "step": 8630 }, { "epoch": 1.6704721362229102, "grad_norm": 0.20077332854270935, "learning_rate": 9.412569358221504e-05, "loss": 0.0078, "step": 8631 }, { "epoch": 1.6706656346749225, "grad_norm": 0.06467686593532562, "learning_rate": 9.412433806127611e-05, "loss": 0.0065, "step": 8632 }, { "epoch": 1.670859133126935, "grad_norm": 0.1305485963821411, "learning_rate": 9.412298239488253e-05, "loss": 0.0074, "step": 8633 }, { "epoch": 1.6710526315789473, "grad_norm": 0.12560515105724335, "learning_rate": 9.412162658303936e-05, "loss": 0.0084, "step": 8634 }, { "epoch": 1.6712461300309598, "grad_norm": 0.04480721056461334, "learning_rate": 9.41202706257516e-05, "loss": 0.0074, "step": 8635 }, { "epoch": 1.6714396284829722, "grad_norm": 0.15522755682468414, "learning_rate": 9.411891452302431e-05, "loss": 0.0089, "step": 8636 }, { "epoch": 1.6716331269349847, "grad_norm": 0.024878213182091713, "learning_rate": 9.411755827486254e-05, "loss": 0.0083, "step": 8637 }, { "epoch": 1.671826625386997, "grad_norm": 0.150039941072464, "learning_rate": 9.411620188127132e-05, "loss": 0.0078, "step": 8638 }, { "epoch": 1.6720201238390093, "grad_norm": 0.04711522161960602, "learning_rate": 9.411484534225574e-05, "loss": 0.0088, "step": 8639 }, { "epoch": 1.6722136222910216, "grad_norm": 0.12331824749708176, "learning_rate": 9.411348865782076e-05, "loss": 0.0085, "step": 8640 }, { "epoch": 1.672407120743034, "grad_norm": 0.0807790607213974, "learning_rate": 9.411213182797148e-05, "loss": 0.0091, "step": 8641 }, { "epoch": 1.6726006191950464, "grad_norm": 0.11070098727941513, "learning_rate": 9.411077485271292e-05, "loss": 0.0088, "step": 8642 }, { "epoch": 1.6727941176470589, "grad_norm": 0.08370976150035858, "learning_rate": 9.410941773205015e-05, "loss": 0.0075, "step": 8643 }, { "epoch": 1.6729876160990713, "grad_norm": 0.08796106278896332, "learning_rate": 9.41080604659882e-05, "loss": 0.0095, "step": 8644 }, { "epoch": 1.6731811145510835, "grad_norm": 0.0634441152215004, "learning_rate": 9.410670305453212e-05, "loss": 0.0077, "step": 8645 }, { "epoch": 1.673374613003096, "grad_norm": 0.07418318092823029, "learning_rate": 9.410534549768696e-05, "loss": 0.0093, "step": 8646 }, { "epoch": 1.6735681114551082, "grad_norm": 0.08750368654727936, "learning_rate": 9.410398779545774e-05, "loss": 0.0087, "step": 8647 }, { "epoch": 1.6737616099071206, "grad_norm": 0.09506000578403473, "learning_rate": 9.410262994784954e-05, "loss": 0.0079, "step": 8648 }, { "epoch": 1.673955108359133, "grad_norm": 0.09534891694784164, "learning_rate": 9.41012719548674e-05, "loss": 0.0085, "step": 8649 }, { "epoch": 1.6741486068111455, "grad_norm": 0.05617320165038109, "learning_rate": 9.409991381651636e-05, "loss": 0.0075, "step": 8650 }, { "epoch": 1.674342105263158, "grad_norm": 0.11764168739318848, "learning_rate": 9.409855553280149e-05, "loss": 0.0085, "step": 8651 }, { "epoch": 1.6745356037151704, "grad_norm": 0.06255308538675308, "learning_rate": 9.409719710372779e-05, "loss": 0.0078, "step": 8652 }, { "epoch": 1.6747291021671826, "grad_norm": 0.10583528876304626, "learning_rate": 9.409583852930038e-05, "loss": 0.0085, "step": 8653 }, { "epoch": 1.674922600619195, "grad_norm": 0.06715661287307739, "learning_rate": 9.409447980952428e-05, "loss": 0.0072, "step": 8654 }, { "epoch": 1.6751160990712073, "grad_norm": 0.09377723187208176, "learning_rate": 9.409312094440451e-05, "loss": 0.0077, "step": 8655 }, { "epoch": 1.6753095975232197, "grad_norm": 0.06085259094834328, "learning_rate": 9.409176193394616e-05, "loss": 0.0066, "step": 8656 }, { "epoch": 1.6755030959752322, "grad_norm": 0.07707568258047104, "learning_rate": 9.409040277815427e-05, "loss": 0.0077, "step": 8657 }, { "epoch": 1.6756965944272446, "grad_norm": 0.045562200248241425, "learning_rate": 9.40890434770339e-05, "loss": 0.0068, "step": 8658 }, { "epoch": 1.675890092879257, "grad_norm": 0.0659448578953743, "learning_rate": 9.408768403059009e-05, "loss": 0.0071, "step": 8659 }, { "epoch": 1.6760835913312695, "grad_norm": 0.08081294596195221, "learning_rate": 9.408632443882792e-05, "loss": 0.0077, "step": 8660 }, { "epoch": 1.6762770897832817, "grad_norm": 0.143538236618042, "learning_rate": 9.40849647017524e-05, "loss": 0.0069, "step": 8661 }, { "epoch": 1.6764705882352942, "grad_norm": 0.08098189532756805, "learning_rate": 9.408360481936863e-05, "loss": 0.007, "step": 8662 }, { "epoch": 1.6766640866873064, "grad_norm": 0.0991780087351799, "learning_rate": 9.408224479168163e-05, "loss": 0.0084, "step": 8663 }, { "epoch": 1.6768575851393188, "grad_norm": 0.13585370779037476, "learning_rate": 9.40808846186965e-05, "loss": 0.0084, "step": 8664 }, { "epoch": 1.6770510835913313, "grad_norm": 0.03722500428557396, "learning_rate": 9.407952430041826e-05, "loss": 0.008, "step": 8665 }, { "epoch": 1.6772445820433437, "grad_norm": 0.12613190710544586, "learning_rate": 9.407816383685198e-05, "loss": 0.0083, "step": 8666 }, { "epoch": 1.6774380804953561, "grad_norm": 0.13622619211673737, "learning_rate": 9.40768032280027e-05, "loss": 0.0087, "step": 8667 }, { "epoch": 1.6776315789473686, "grad_norm": 0.10798567533493042, "learning_rate": 9.407544247387552e-05, "loss": 0.0086, "step": 8668 }, { "epoch": 1.6778250773993808, "grad_norm": 0.2229260951280594, "learning_rate": 9.407408157447545e-05, "loss": 0.0069, "step": 8669 }, { "epoch": 1.678018575851393, "grad_norm": 0.07526818662881851, "learning_rate": 9.407272052980757e-05, "loss": 0.0085, "step": 8670 }, { "epoch": 1.6782120743034055, "grad_norm": 0.1833873689174652, "learning_rate": 9.407135933987696e-05, "loss": 0.0088, "step": 8671 }, { "epoch": 1.678405572755418, "grad_norm": 0.11636720597743988, "learning_rate": 9.406999800468867e-05, "loss": 0.0091, "step": 8672 }, { "epoch": 1.6785990712074303, "grad_norm": 0.1266012340784073, "learning_rate": 9.406863652424775e-05, "loss": 0.0076, "step": 8673 }, { "epoch": 1.6787925696594428, "grad_norm": 0.14636780321598053, "learning_rate": 9.406727489855926e-05, "loss": 0.0103, "step": 8674 }, { "epoch": 1.6789860681114552, "grad_norm": 0.05446169525384903, "learning_rate": 9.406591312762825e-05, "loss": 0.0071, "step": 8675 }, { "epoch": 1.6791795665634675, "grad_norm": 0.16265949606895447, "learning_rate": 9.406455121145982e-05, "loss": 0.0084, "step": 8676 }, { "epoch": 1.67937306501548, "grad_norm": 0.045468904078006744, "learning_rate": 9.406318915005902e-05, "loss": 0.0081, "step": 8677 }, { "epoch": 1.6795665634674921, "grad_norm": 0.15107698738574982, "learning_rate": 9.406182694343089e-05, "loss": 0.0093, "step": 8678 }, { "epoch": 1.6797600619195046, "grad_norm": 0.048835452646017075, "learning_rate": 9.406046459158052e-05, "loss": 0.0086, "step": 8679 }, { "epoch": 1.679953560371517, "grad_norm": 0.07144588977098465, "learning_rate": 9.405910209451298e-05, "loss": 0.0078, "step": 8680 }, { "epoch": 1.6801470588235294, "grad_norm": 0.12081355601549149, "learning_rate": 9.405773945223332e-05, "loss": 0.0084, "step": 8681 }, { "epoch": 1.6803405572755419, "grad_norm": 0.06970077008008957, "learning_rate": 9.40563766647466e-05, "loss": 0.0083, "step": 8682 }, { "epoch": 1.6805340557275543, "grad_norm": 0.09704681485891342, "learning_rate": 9.40550137320579e-05, "loss": 0.0086, "step": 8683 }, { "epoch": 1.6807275541795665, "grad_norm": 0.1064511239528656, "learning_rate": 9.405365065417228e-05, "loss": 0.0065, "step": 8684 }, { "epoch": 1.680921052631579, "grad_norm": 0.044980160892009735, "learning_rate": 9.405228743109481e-05, "loss": 0.008, "step": 8685 }, { "epoch": 1.6811145510835912, "grad_norm": 0.12393937259912491, "learning_rate": 9.405092406283056e-05, "loss": 0.009, "step": 8686 }, { "epoch": 1.6813080495356036, "grad_norm": 0.048659760504961014, "learning_rate": 9.404956054938459e-05, "loss": 0.0061, "step": 8687 }, { "epoch": 1.681501547987616, "grad_norm": 0.0932469516992569, "learning_rate": 9.404819689076197e-05, "loss": 0.0079, "step": 8688 }, { "epoch": 1.6816950464396285, "grad_norm": 0.05714632570743561, "learning_rate": 9.40468330869678e-05, "loss": 0.0077, "step": 8689 }, { "epoch": 1.681888544891641, "grad_norm": 0.08655934035778046, "learning_rate": 9.404546913800711e-05, "loss": 0.0087, "step": 8690 }, { "epoch": 1.6820820433436534, "grad_norm": 0.04779162257909775, "learning_rate": 9.404410504388499e-05, "loss": 0.0087, "step": 8691 }, { "epoch": 1.6822755417956656, "grad_norm": 0.09766308963298798, "learning_rate": 9.40427408046065e-05, "loss": 0.0085, "step": 8692 }, { "epoch": 1.682469040247678, "grad_norm": 0.08431040495634079, "learning_rate": 9.404137642017673e-05, "loss": 0.0089, "step": 8693 }, { "epoch": 1.6826625386996903, "grad_norm": 0.17195388674736023, "learning_rate": 9.404001189060074e-05, "loss": 0.0085, "step": 8694 }, { "epoch": 1.6828560371517027, "grad_norm": 0.11727190762758255, "learning_rate": 9.403864721588361e-05, "loss": 0.0075, "step": 8695 }, { "epoch": 1.6830495356037152, "grad_norm": 0.19979923963546753, "learning_rate": 9.403728239603041e-05, "loss": 0.0078, "step": 8696 }, { "epoch": 1.6832430340557276, "grad_norm": 0.11261333525180817, "learning_rate": 9.40359174310462e-05, "loss": 0.0087, "step": 8697 }, { "epoch": 1.68343653250774, "grad_norm": 0.13456664979457855, "learning_rate": 9.403455232093607e-05, "loss": 0.0068, "step": 8698 }, { "epoch": 1.6836300309597523, "grad_norm": 0.10449007153511047, "learning_rate": 9.40331870657051e-05, "loss": 0.0086, "step": 8699 }, { "epoch": 1.6838235294117647, "grad_norm": 0.07613769173622131, "learning_rate": 9.403182166535836e-05, "loss": 0.0079, "step": 8700 }, { "epoch": 1.684017027863777, "grad_norm": 0.11107572913169861, "learning_rate": 9.403045611990093e-05, "loss": 0.0068, "step": 8701 }, { "epoch": 1.6842105263157894, "grad_norm": 0.06901383399963379, "learning_rate": 9.402909042933786e-05, "loss": 0.0091, "step": 8702 }, { "epoch": 1.6844040247678018, "grad_norm": 0.11749541014432907, "learning_rate": 9.402772459367426e-05, "loss": 0.0084, "step": 8703 }, { "epoch": 1.6845975232198143, "grad_norm": 0.06347718089818954, "learning_rate": 9.40263586129152e-05, "loss": 0.0077, "step": 8704 }, { "epoch": 1.6847910216718267, "grad_norm": 0.10742942243814468, "learning_rate": 9.402499248706575e-05, "loss": 0.0094, "step": 8705 }, { "epoch": 1.6849845201238391, "grad_norm": 0.08939328789710999, "learning_rate": 9.402362621613101e-05, "loss": 0.0085, "step": 8706 }, { "epoch": 1.6851780185758514, "grad_norm": 0.13003575801849365, "learning_rate": 9.402225980011603e-05, "loss": 0.0074, "step": 8707 }, { "epoch": 1.6853715170278638, "grad_norm": 0.09411238878965378, "learning_rate": 9.40208932390259e-05, "loss": 0.0083, "step": 8708 }, { "epoch": 1.685565015479876, "grad_norm": 0.10699550062417984, "learning_rate": 9.40195265328657e-05, "loss": 0.0086, "step": 8709 }, { "epoch": 1.6857585139318885, "grad_norm": 0.04693319648504257, "learning_rate": 9.401815968164054e-05, "loss": 0.0092, "step": 8710 }, { "epoch": 1.685952012383901, "grad_norm": 0.08799724280834198, "learning_rate": 9.401679268535545e-05, "loss": 0.0076, "step": 8711 }, { "epoch": 1.6861455108359134, "grad_norm": 0.05128226429224014, "learning_rate": 9.401542554401554e-05, "loss": 0.0071, "step": 8712 }, { "epoch": 1.6863390092879258, "grad_norm": 0.09342116862535477, "learning_rate": 9.401405825762591e-05, "loss": 0.009, "step": 8713 }, { "epoch": 1.6865325077399382, "grad_norm": 0.0848516896367073, "learning_rate": 9.401269082619161e-05, "loss": 0.0073, "step": 8714 }, { "epoch": 1.6867260061919505, "grad_norm": 0.0906137153506279, "learning_rate": 9.401132324971774e-05, "loss": 0.007, "step": 8715 }, { "epoch": 1.686919504643963, "grad_norm": 0.07692476361989975, "learning_rate": 9.400995552820939e-05, "loss": 0.0082, "step": 8716 }, { "epoch": 1.6871130030959751, "grad_norm": 0.07154078036546707, "learning_rate": 9.400858766167163e-05, "loss": 0.0067, "step": 8717 }, { "epoch": 1.6873065015479876, "grad_norm": 0.11547510325908661, "learning_rate": 9.400721965010955e-05, "loss": 0.0092, "step": 8718 }, { "epoch": 1.6875, "grad_norm": 0.05486977845430374, "learning_rate": 9.400585149352825e-05, "loss": 0.0089, "step": 8719 }, { "epoch": 1.6876934984520124, "grad_norm": 0.10924243181943893, "learning_rate": 9.40044831919328e-05, "loss": 0.0075, "step": 8720 }, { "epoch": 1.6878869969040249, "grad_norm": 0.08187323808670044, "learning_rate": 9.40031147453283e-05, "loss": 0.0097, "step": 8721 }, { "epoch": 1.688080495356037, "grad_norm": 0.10836740583181381, "learning_rate": 9.400174615371982e-05, "loss": 0.0076, "step": 8722 }, { "epoch": 1.6882739938080495, "grad_norm": 0.037804827094078064, "learning_rate": 9.400037741711246e-05, "loss": 0.0087, "step": 8723 }, { "epoch": 1.6884674922600618, "grad_norm": 0.08938124775886536, "learning_rate": 9.39990085355113e-05, "loss": 0.0071, "step": 8724 }, { "epoch": 1.6886609907120742, "grad_norm": 0.04534213989973068, "learning_rate": 9.399763950892146e-05, "loss": 0.0087, "step": 8725 }, { "epoch": 1.6888544891640866, "grad_norm": 0.09706127643585205, "learning_rate": 9.399627033734799e-05, "loss": 0.0071, "step": 8726 }, { "epoch": 1.689047987616099, "grad_norm": 0.058909546583890915, "learning_rate": 9.399490102079599e-05, "loss": 0.009, "step": 8727 }, { "epoch": 1.6892414860681115, "grad_norm": 0.08561374992132187, "learning_rate": 9.399353155927056e-05, "loss": 0.0067, "step": 8728 }, { "epoch": 1.689434984520124, "grad_norm": 0.06623104959726334, "learning_rate": 9.399216195277678e-05, "loss": 0.0085, "step": 8729 }, { "epoch": 1.6896284829721362, "grad_norm": 0.045238424092531204, "learning_rate": 9.399079220131976e-05, "loss": 0.0092, "step": 8730 }, { "epoch": 1.6898219814241486, "grad_norm": 0.07829547673463821, "learning_rate": 9.398942230490459e-05, "loss": 0.0068, "step": 8731 }, { "epoch": 1.6900154798761609, "grad_norm": 0.06925023347139359, "learning_rate": 9.398805226353634e-05, "loss": 0.0088, "step": 8732 }, { "epoch": 1.6902089783281733, "grad_norm": 0.06665365397930145, "learning_rate": 9.398668207722012e-05, "loss": 0.0093, "step": 8733 }, { "epoch": 1.6904024767801857, "grad_norm": 0.12233741581439972, "learning_rate": 9.398531174596104e-05, "loss": 0.0092, "step": 8734 }, { "epoch": 1.6905959752321982, "grad_norm": 0.04830159619450569, "learning_rate": 9.398394126976417e-05, "loss": 0.0095, "step": 8735 }, { "epoch": 1.6907894736842106, "grad_norm": 0.1172216385602951, "learning_rate": 9.39825706486346e-05, "loss": 0.0075, "step": 8736 }, { "epoch": 1.690982972136223, "grad_norm": 0.04423971474170685, "learning_rate": 9.398119988257744e-05, "loss": 0.0087, "step": 8737 }, { "epoch": 1.6911764705882353, "grad_norm": 0.04462878406047821, "learning_rate": 9.397982897159779e-05, "loss": 0.0071, "step": 8738 }, { "epoch": 1.6913699690402477, "grad_norm": 0.04616399109363556, "learning_rate": 9.397845791570073e-05, "loss": 0.0072, "step": 8739 }, { "epoch": 1.69156346749226, "grad_norm": 0.030395330861210823, "learning_rate": 9.39770867148914e-05, "loss": 0.0075, "step": 8740 }, { "epoch": 1.6917569659442724, "grad_norm": 0.03674372658133507, "learning_rate": 9.397571536917485e-05, "loss": 0.0086, "step": 8741 }, { "epoch": 1.6919504643962848, "grad_norm": 0.02977052889764309, "learning_rate": 9.397434387855617e-05, "loss": 0.0081, "step": 8742 }, { "epoch": 1.6921439628482973, "grad_norm": 0.041859015822410583, "learning_rate": 9.39729722430405e-05, "loss": 0.0083, "step": 8743 }, { "epoch": 1.6923374613003097, "grad_norm": 0.036052361130714417, "learning_rate": 9.397160046263293e-05, "loss": 0.007, "step": 8744 }, { "epoch": 1.692530959752322, "grad_norm": 0.034765634685754776, "learning_rate": 9.397022853733855e-05, "loss": 0.0071, "step": 8745 }, { "epoch": 1.6927244582043344, "grad_norm": 0.04681118577718735, "learning_rate": 9.396885646716244e-05, "loss": 0.0085, "step": 8746 }, { "epoch": 1.6929179566563466, "grad_norm": 0.04432785138487816, "learning_rate": 9.396748425210976e-05, "loss": 0.0078, "step": 8747 }, { "epoch": 1.693111455108359, "grad_norm": 0.03571874648332596, "learning_rate": 9.396611189218554e-05, "loss": 0.0061, "step": 8748 }, { "epoch": 1.6933049535603715, "grad_norm": 0.047000717371702194, "learning_rate": 9.396473938739493e-05, "loss": 0.0088, "step": 8749 }, { "epoch": 1.693498452012384, "grad_norm": 0.04729620739817619, "learning_rate": 9.396336673774305e-05, "loss": 0.0081, "step": 8750 }, { "epoch": 1.6936919504643964, "grad_norm": 0.03320714458823204, "learning_rate": 9.396199394323493e-05, "loss": 0.0082, "step": 8751 }, { "epoch": 1.6938854489164088, "grad_norm": 0.047902580350637436, "learning_rate": 9.396062100387575e-05, "loss": 0.0063, "step": 8752 }, { "epoch": 1.694078947368421, "grad_norm": 0.04442540556192398, "learning_rate": 9.395924791967055e-05, "loss": 0.0087, "step": 8753 }, { "epoch": 1.6942724458204335, "grad_norm": 0.04776822030544281, "learning_rate": 9.395787469062448e-05, "loss": 0.0082, "step": 8754 }, { "epoch": 1.6944659442724457, "grad_norm": 0.07417158782482147, "learning_rate": 9.395650131674264e-05, "loss": 0.0086, "step": 8755 }, { "epoch": 1.6946594427244581, "grad_norm": 0.041958000510931015, "learning_rate": 9.395512779803012e-05, "loss": 0.0072, "step": 8756 }, { "epoch": 1.6948529411764706, "grad_norm": 0.05313201621174812, "learning_rate": 9.395375413449204e-05, "loss": 0.0084, "step": 8757 }, { "epoch": 1.695046439628483, "grad_norm": 0.050189435482025146, "learning_rate": 9.395238032613349e-05, "loss": 0.0079, "step": 8758 }, { "epoch": 1.6952399380804954, "grad_norm": 0.04538985714316368, "learning_rate": 9.39510063729596e-05, "loss": 0.0066, "step": 8759 }, { "epoch": 1.6954334365325079, "grad_norm": 0.04742993414402008, "learning_rate": 9.394963227497545e-05, "loss": 0.0083, "step": 8760 }, { "epoch": 1.69562693498452, "grad_norm": 0.09648481756448746, "learning_rate": 9.394825803218617e-05, "loss": 0.008, "step": 8761 }, { "epoch": 1.6958204334365325, "grad_norm": 0.032109033316373825, "learning_rate": 9.394688364459686e-05, "loss": 0.008, "step": 8762 }, { "epoch": 1.6960139318885448, "grad_norm": 0.10074413567781448, "learning_rate": 9.394550911221264e-05, "loss": 0.0086, "step": 8763 }, { "epoch": 1.6962074303405572, "grad_norm": 0.07202783972024918, "learning_rate": 9.394413443503863e-05, "loss": 0.0073, "step": 8764 }, { "epoch": 1.6964009287925697, "grad_norm": 0.058579981327056885, "learning_rate": 9.39427596130799e-05, "loss": 0.0077, "step": 8765 }, { "epoch": 1.696594427244582, "grad_norm": 0.1328037977218628, "learning_rate": 9.394138464634161e-05, "loss": 0.0068, "step": 8766 }, { "epoch": 1.6967879256965945, "grad_norm": 0.050526753067970276, "learning_rate": 9.394000953482882e-05, "loss": 0.0087, "step": 8767 }, { "epoch": 1.696981424148607, "grad_norm": 0.13552622497081757, "learning_rate": 9.393863427854668e-05, "loss": 0.009, "step": 8768 }, { "epoch": 1.6971749226006192, "grad_norm": 0.12057290226221085, "learning_rate": 9.39372588775003e-05, "loss": 0.0077, "step": 8769 }, { "epoch": 1.6973684210526314, "grad_norm": 0.05446799844503403, "learning_rate": 9.393588333169478e-05, "loss": 0.0081, "step": 8770 }, { "epoch": 1.6975619195046439, "grad_norm": 0.13331785798072815, "learning_rate": 9.393450764113524e-05, "loss": 0.0066, "step": 8771 }, { "epoch": 1.6977554179566563, "grad_norm": 0.09101204574108124, "learning_rate": 9.393313180582679e-05, "loss": 0.0095, "step": 8772 }, { "epoch": 1.6979489164086687, "grad_norm": 0.12261087447404861, "learning_rate": 9.393175582577455e-05, "loss": 0.009, "step": 8773 }, { "epoch": 1.6981424148606812, "grad_norm": 0.061559390276670456, "learning_rate": 9.393037970098364e-05, "loss": 0.0094, "step": 8774 }, { "epoch": 1.6983359133126936, "grad_norm": 0.18027710914611816, "learning_rate": 9.392900343145917e-05, "loss": 0.0065, "step": 8775 }, { "epoch": 1.6985294117647058, "grad_norm": 0.07454602420330048, "learning_rate": 9.392762701720626e-05, "loss": 0.0089, "step": 8776 }, { "epoch": 1.6987229102167183, "grad_norm": 0.18303418159484863, "learning_rate": 9.392625045823001e-05, "loss": 0.0071, "step": 8777 }, { "epoch": 1.6989164086687305, "grad_norm": 0.19010382890701294, "learning_rate": 9.392487375453557e-05, "loss": 0.0084, "step": 8778 }, { "epoch": 1.699109907120743, "grad_norm": 0.10351800173521042, "learning_rate": 9.392349690612802e-05, "loss": 0.0079, "step": 8779 }, { "epoch": 1.6993034055727554, "grad_norm": 0.23073898255825043, "learning_rate": 9.392211991301252e-05, "loss": 0.0091, "step": 8780 }, { "epoch": 1.6994969040247678, "grad_norm": 0.11308234184980392, "learning_rate": 9.392074277519417e-05, "loss": 0.0103, "step": 8781 }, { "epoch": 1.6996904024767803, "grad_norm": 0.15322574973106384, "learning_rate": 9.391936549267808e-05, "loss": 0.0058, "step": 8782 }, { "epoch": 1.6998839009287927, "grad_norm": 0.17883019149303436, "learning_rate": 9.391798806546937e-05, "loss": 0.0084, "step": 8783 }, { "epoch": 1.700077399380805, "grad_norm": 0.037860896438360214, "learning_rate": 9.391661049357319e-05, "loss": 0.0084, "step": 8784 }, { "epoch": 1.7002708978328174, "grad_norm": 0.19275948405265808, "learning_rate": 9.391523277699463e-05, "loss": 0.0089, "step": 8785 }, { "epoch": 1.7004643962848296, "grad_norm": 0.06706199795007706, "learning_rate": 9.391385491573884e-05, "loss": 0.0096, "step": 8786 }, { "epoch": 1.700657894736842, "grad_norm": 0.1191302016377449, "learning_rate": 9.391247690981088e-05, "loss": 0.0074, "step": 8787 }, { "epoch": 1.7008513931888545, "grad_norm": 0.10968776047229767, "learning_rate": 9.391109875921596e-05, "loss": 0.009, "step": 8788 }, { "epoch": 1.701044891640867, "grad_norm": 0.06898756325244904, "learning_rate": 9.390972046395916e-05, "loss": 0.0075, "step": 8789 }, { "epoch": 1.7012383900928794, "grad_norm": 0.09533406049013138, "learning_rate": 9.39083420240456e-05, "loss": 0.0079, "step": 8790 }, { "epoch": 1.7014318885448918, "grad_norm": 0.07892800122499466, "learning_rate": 9.39069634394804e-05, "loss": 0.0079, "step": 8791 }, { "epoch": 1.701625386996904, "grad_norm": 0.16366708278656006, "learning_rate": 9.390558471026872e-05, "loss": 0.0077, "step": 8792 }, { "epoch": 1.7018188854489165, "grad_norm": 0.07235163450241089, "learning_rate": 9.390420583641565e-05, "loss": 0.0077, "step": 8793 }, { "epoch": 1.7020123839009287, "grad_norm": 0.13490112125873566, "learning_rate": 9.390282681792632e-05, "loss": 0.0074, "step": 8794 }, { "epoch": 1.7022058823529411, "grad_norm": 0.09860114753246307, "learning_rate": 9.390144765480589e-05, "loss": 0.0084, "step": 8795 }, { "epoch": 1.7023993808049536, "grad_norm": 0.08989183604717255, "learning_rate": 9.390006834705945e-05, "loss": 0.0076, "step": 8796 }, { "epoch": 1.702592879256966, "grad_norm": 0.1221097782254219, "learning_rate": 9.389868889469213e-05, "loss": 0.0078, "step": 8797 }, { "epoch": 1.7027863777089784, "grad_norm": 0.06636715680360794, "learning_rate": 9.389730929770908e-05, "loss": 0.0079, "step": 8798 }, { "epoch": 1.7029798761609907, "grad_norm": 0.13930656015872955, "learning_rate": 9.389592955611541e-05, "loss": 0.0086, "step": 8799 }, { "epoch": 1.703173374613003, "grad_norm": 0.11377682536840439, "learning_rate": 9.389454966991628e-05, "loss": 0.0081, "step": 8800 }, { "epoch": 1.7033668730650153, "grad_norm": 0.09024505317211151, "learning_rate": 9.389316963911678e-05, "loss": 0.0081, "step": 8801 }, { "epoch": 1.7035603715170278, "grad_norm": 0.13832737505435944, "learning_rate": 9.389178946372207e-05, "loss": 0.0071, "step": 8802 }, { "epoch": 1.7037538699690402, "grad_norm": 0.03491910919547081, "learning_rate": 9.389040914373725e-05, "loss": 0.0081, "step": 8803 }, { "epoch": 1.7039473684210527, "grad_norm": 0.09961872547864914, "learning_rate": 9.388902867916749e-05, "loss": 0.0076, "step": 8804 }, { "epoch": 1.704140866873065, "grad_norm": 0.07356078922748566, "learning_rate": 9.38876480700179e-05, "loss": 0.0073, "step": 8805 }, { "epoch": 1.7043343653250775, "grad_norm": 0.08050348609685898, "learning_rate": 9.388626731629361e-05, "loss": 0.0069, "step": 8806 }, { "epoch": 1.7045278637770898, "grad_norm": 0.12865659594535828, "learning_rate": 9.388488641799976e-05, "loss": 0.0097, "step": 8807 }, { "epoch": 1.7047213622291022, "grad_norm": 0.07718458771705627, "learning_rate": 9.388350537514148e-05, "loss": 0.0081, "step": 8808 }, { "epoch": 1.7049148606811144, "grad_norm": 0.0730895847082138, "learning_rate": 9.388212418772394e-05, "loss": 0.0064, "step": 8809 }, { "epoch": 1.7051083591331269, "grad_norm": 0.034790441393852234, "learning_rate": 9.388074285575222e-05, "loss": 0.0078, "step": 8810 }, { "epoch": 1.7053018575851393, "grad_norm": 0.03560972958803177, "learning_rate": 9.387936137923147e-05, "loss": 0.0093, "step": 8811 }, { "epoch": 1.7054953560371517, "grad_norm": 0.05857396125793457, "learning_rate": 9.387797975816685e-05, "loss": 0.0068, "step": 8812 }, { "epoch": 1.7056888544891642, "grad_norm": 0.09064274281263351, "learning_rate": 9.387659799256346e-05, "loss": 0.0084, "step": 8813 }, { "epoch": 1.7058823529411766, "grad_norm": 0.05710757151246071, "learning_rate": 9.387521608242648e-05, "loss": 0.0101, "step": 8814 }, { "epoch": 1.7060758513931888, "grad_norm": 0.1006222665309906, "learning_rate": 9.387383402776103e-05, "loss": 0.0067, "step": 8815 }, { "epoch": 1.7062693498452013, "grad_norm": 0.06366007030010223, "learning_rate": 9.387245182857222e-05, "loss": 0.0085, "step": 8816 }, { "epoch": 1.7064628482972135, "grad_norm": 0.08157865703105927, "learning_rate": 9.387106948486523e-05, "loss": 0.0083, "step": 8817 }, { "epoch": 1.706656346749226, "grad_norm": 0.07802803069353104, "learning_rate": 9.386968699664516e-05, "loss": 0.0071, "step": 8818 }, { "epoch": 1.7068498452012384, "grad_norm": 0.03326811641454697, "learning_rate": 9.38683043639172e-05, "loss": 0.0077, "step": 8819 }, { "epoch": 1.7070433436532508, "grad_norm": 0.08418135344982147, "learning_rate": 9.386692158668645e-05, "loss": 0.0102, "step": 8820 }, { "epoch": 1.7072368421052633, "grad_norm": 0.042830441147089005, "learning_rate": 9.386553866495807e-05, "loss": 0.0066, "step": 8821 }, { "epoch": 1.7074303405572755, "grad_norm": 0.07797616720199585, "learning_rate": 9.386415559873716e-05, "loss": 0.0075, "step": 8822 }, { "epoch": 1.707623839009288, "grad_norm": 0.03933916613459587, "learning_rate": 9.386277238802895e-05, "loss": 0.0088, "step": 8823 }, { "epoch": 1.7078173374613002, "grad_norm": 0.12581434845924377, "learning_rate": 9.386138903283847e-05, "loss": 0.0077, "step": 8824 }, { "epoch": 1.7080108359133126, "grad_norm": 0.06554608047008514, "learning_rate": 9.386000553317097e-05, "loss": 0.0085, "step": 8825 }, { "epoch": 1.708204334365325, "grad_norm": 0.11527074128389359, "learning_rate": 9.385862188903153e-05, "loss": 0.0086, "step": 8826 }, { "epoch": 1.7083978328173375, "grad_norm": 0.03960276395082474, "learning_rate": 9.385723810042529e-05, "loss": 0.0083, "step": 8827 }, { "epoch": 1.70859133126935, "grad_norm": 0.13433025777339935, "learning_rate": 9.385585416735744e-05, "loss": 0.0078, "step": 8828 }, { "epoch": 1.7087848297213624, "grad_norm": 0.057829104363918304, "learning_rate": 9.385447008983308e-05, "loss": 0.0078, "step": 8829 }, { "epoch": 1.7089783281733746, "grad_norm": 0.12817659974098206, "learning_rate": 9.385308586785738e-05, "loss": 0.0083, "step": 8830 }, { "epoch": 1.709171826625387, "grad_norm": 0.09383635967969894, "learning_rate": 9.385170150143548e-05, "loss": 0.0077, "step": 8831 }, { "epoch": 1.7093653250773992, "grad_norm": 0.1464935690164566, "learning_rate": 9.385031699057253e-05, "loss": 0.0072, "step": 8832 }, { "epoch": 1.7095588235294117, "grad_norm": 0.060897424817085266, "learning_rate": 9.384893233527368e-05, "loss": 0.0081, "step": 8833 }, { "epoch": 1.7097523219814241, "grad_norm": 0.07649475336074829, "learning_rate": 9.384754753554406e-05, "loss": 0.0089, "step": 8834 }, { "epoch": 1.7099458204334366, "grad_norm": 0.05700535327196121, "learning_rate": 9.384616259138883e-05, "loss": 0.0072, "step": 8835 }, { "epoch": 1.710139318885449, "grad_norm": 0.07841053605079651, "learning_rate": 9.384477750281314e-05, "loss": 0.0074, "step": 8836 }, { "epoch": 1.7103328173374615, "grad_norm": 0.06268132477998734, "learning_rate": 9.384339226982215e-05, "loss": 0.0077, "step": 8837 }, { "epoch": 1.7105263157894737, "grad_norm": 0.08755267411470413, "learning_rate": 9.3842006892421e-05, "loss": 0.0089, "step": 8838 }, { "epoch": 1.7107198142414861, "grad_norm": 0.05148722603917122, "learning_rate": 9.384062137061483e-05, "loss": 0.0078, "step": 8839 }, { "epoch": 1.7109133126934983, "grad_norm": 0.120698481798172, "learning_rate": 9.38392357044088e-05, "loss": 0.0081, "step": 8840 }, { "epoch": 1.7111068111455108, "grad_norm": 0.0664062649011612, "learning_rate": 9.383784989380806e-05, "loss": 0.0093, "step": 8841 }, { "epoch": 1.7113003095975232, "grad_norm": 0.10914065688848495, "learning_rate": 9.383646393881777e-05, "loss": 0.0081, "step": 8842 }, { "epoch": 1.7114938080495357, "grad_norm": 0.09911798685789108, "learning_rate": 9.383507783944307e-05, "loss": 0.0072, "step": 8843 }, { "epoch": 1.711687306501548, "grad_norm": 0.06351258605718613, "learning_rate": 9.383369159568913e-05, "loss": 0.0076, "step": 8844 }, { "epoch": 1.7118808049535603, "grad_norm": 0.11415830254554749, "learning_rate": 9.38323052075611e-05, "loss": 0.0075, "step": 8845 }, { "epoch": 1.7120743034055728, "grad_norm": 0.04223280027508736, "learning_rate": 9.38309186750641e-05, "loss": 0.0074, "step": 8846 }, { "epoch": 1.712267801857585, "grad_norm": 0.11868663877248764, "learning_rate": 9.382953199820333e-05, "loss": 0.0097, "step": 8847 }, { "epoch": 1.7124613003095974, "grad_norm": 0.09916957467794418, "learning_rate": 9.382814517698392e-05, "loss": 0.0073, "step": 8848 }, { "epoch": 1.7126547987616099, "grad_norm": 0.11741586774587631, "learning_rate": 9.382675821141103e-05, "loss": 0.0077, "step": 8849 }, { "epoch": 1.7128482972136223, "grad_norm": 0.11816605925559998, "learning_rate": 9.382537110148983e-05, "loss": 0.008, "step": 8850 }, { "epoch": 1.7130417956656347, "grad_norm": 0.06409604847431183, "learning_rate": 9.382398384722548e-05, "loss": 0.0083, "step": 8851 }, { "epoch": 1.7132352941176472, "grad_norm": 0.13533586263656616, "learning_rate": 9.382259644862311e-05, "loss": 0.0075, "step": 8852 }, { "epoch": 1.7134287925696594, "grad_norm": 0.07207098603248596, "learning_rate": 9.382120890568788e-05, "loss": 0.0073, "step": 8853 }, { "epoch": 1.7136222910216719, "grad_norm": 0.124236099421978, "learning_rate": 9.381982121842499e-05, "loss": 0.0085, "step": 8854 }, { "epoch": 1.713815789473684, "grad_norm": 0.1302768886089325, "learning_rate": 9.381843338683955e-05, "loss": 0.0074, "step": 8855 }, { "epoch": 1.7140092879256965, "grad_norm": 0.07079053670167923, "learning_rate": 9.381704541093673e-05, "loss": 0.0082, "step": 8856 }, { "epoch": 1.714202786377709, "grad_norm": 0.15110579133033752, "learning_rate": 9.381565729072174e-05, "loss": 0.0091, "step": 8857 }, { "epoch": 1.7143962848297214, "grad_norm": 0.05757535248994827, "learning_rate": 9.381426902619967e-05, "loss": 0.0059, "step": 8858 }, { "epoch": 1.7145897832817338, "grad_norm": 0.12747137248516083, "learning_rate": 9.381288061737571e-05, "loss": 0.0072, "step": 8859 }, { "epoch": 1.7147832817337463, "grad_norm": 0.06280072033405304, "learning_rate": 9.381149206425502e-05, "loss": 0.0056, "step": 8860 }, { "epoch": 1.7149767801857585, "grad_norm": 0.11270550638437271, "learning_rate": 9.381010336684278e-05, "loss": 0.0081, "step": 8861 }, { "epoch": 1.715170278637771, "grad_norm": 0.08056693524122238, "learning_rate": 9.380871452514414e-05, "loss": 0.0081, "step": 8862 }, { "epoch": 1.7153637770897832, "grad_norm": 0.09967035055160522, "learning_rate": 9.380732553916427e-05, "loss": 0.0072, "step": 8863 }, { "epoch": 1.7155572755417956, "grad_norm": 0.111885666847229, "learning_rate": 9.38059364089083e-05, "loss": 0.0077, "step": 8864 }, { "epoch": 1.715750773993808, "grad_norm": 0.10887101292610168, "learning_rate": 9.380454713438143e-05, "loss": 0.0072, "step": 8865 }, { "epoch": 1.7159442724458205, "grad_norm": 0.10137434303760529, "learning_rate": 9.380315771558882e-05, "loss": 0.0092, "step": 8866 }, { "epoch": 1.716137770897833, "grad_norm": 0.10076477378606796, "learning_rate": 9.380176815253564e-05, "loss": 0.0093, "step": 8867 }, { "epoch": 1.7163312693498454, "grad_norm": 0.11033301800489426, "learning_rate": 9.380037844522704e-05, "loss": 0.0088, "step": 8868 }, { "epoch": 1.7165247678018576, "grad_norm": 0.07765227556228638, "learning_rate": 9.379898859366819e-05, "loss": 0.0065, "step": 8869 }, { "epoch": 1.7167182662538698, "grad_norm": 0.11259996145963669, "learning_rate": 9.379759859786425e-05, "loss": 0.0074, "step": 8870 }, { "epoch": 1.7169117647058822, "grad_norm": 0.040004901587963104, "learning_rate": 9.379620845782042e-05, "loss": 0.0076, "step": 8871 }, { "epoch": 1.7171052631578947, "grad_norm": 0.12451520562171936, "learning_rate": 9.379481817354183e-05, "loss": 0.0084, "step": 8872 }, { "epoch": 1.7172987616099071, "grad_norm": 0.06097693368792534, "learning_rate": 9.379342774503368e-05, "loss": 0.0069, "step": 8873 }, { "epoch": 1.7174922600619196, "grad_norm": 0.1350616216659546, "learning_rate": 9.379203717230111e-05, "loss": 0.0072, "step": 8874 }, { "epoch": 1.717685758513932, "grad_norm": 0.11319996416568756, "learning_rate": 9.37906464553493e-05, "loss": 0.0082, "step": 8875 }, { "epoch": 1.7178792569659442, "grad_norm": 0.13046705722808838, "learning_rate": 9.378925559418344e-05, "loss": 0.0083, "step": 8876 }, { "epoch": 1.7180727554179567, "grad_norm": 0.13753832876682281, "learning_rate": 9.378786458880868e-05, "loss": 0.0081, "step": 8877 }, { "epoch": 1.718266253869969, "grad_norm": 0.08371826261281967, "learning_rate": 9.37864734392302e-05, "loss": 0.0058, "step": 8878 }, { "epoch": 1.7184597523219813, "grad_norm": 0.11334694176912308, "learning_rate": 9.378508214545317e-05, "loss": 0.0075, "step": 8879 }, { "epoch": 1.7186532507739938, "grad_norm": 0.1330394595861435, "learning_rate": 9.378369070748277e-05, "loss": 0.0085, "step": 8880 }, { "epoch": 1.7188467492260062, "grad_norm": 0.10310950130224228, "learning_rate": 9.378229912532415e-05, "loss": 0.0085, "step": 8881 }, { "epoch": 1.7190402476780187, "grad_norm": 0.13163338601589203, "learning_rate": 9.37809073989825e-05, "loss": 0.0079, "step": 8882 }, { "epoch": 1.719233746130031, "grad_norm": 0.04413874074816704, "learning_rate": 9.377951552846299e-05, "loss": 0.0074, "step": 8883 }, { "epoch": 1.7194272445820433, "grad_norm": 0.14855311810970306, "learning_rate": 9.37781235137708e-05, "loss": 0.008, "step": 8884 }, { "epoch": 1.7196207430340558, "grad_norm": 0.07389620691537857, "learning_rate": 9.37767313549111e-05, "loss": 0.0087, "step": 8885 }, { "epoch": 1.719814241486068, "grad_norm": 0.1055116280913353, "learning_rate": 9.377533905188908e-05, "loss": 0.0076, "step": 8886 }, { "epoch": 1.7200077399380804, "grad_norm": 0.12071173638105392, "learning_rate": 9.377394660470987e-05, "loss": 0.0083, "step": 8887 }, { "epoch": 1.7202012383900929, "grad_norm": 0.06941566616296768, "learning_rate": 9.377255401337871e-05, "loss": 0.0071, "step": 8888 }, { "epoch": 1.7203947368421053, "grad_norm": 0.15119293332099915, "learning_rate": 9.377116127790074e-05, "loss": 0.0089, "step": 8889 }, { "epoch": 1.7205882352941178, "grad_norm": 0.059181272983551025, "learning_rate": 9.376976839828115e-05, "loss": 0.0062, "step": 8890 }, { "epoch": 1.7207817337461302, "grad_norm": 0.14516492187976837, "learning_rate": 9.376837537452511e-05, "loss": 0.0066, "step": 8891 }, { "epoch": 1.7209752321981424, "grad_norm": 0.08130960911512375, "learning_rate": 9.376698220663781e-05, "loss": 0.0081, "step": 8892 }, { "epoch": 1.7211687306501546, "grad_norm": 0.15128754079341888, "learning_rate": 9.376558889462442e-05, "loss": 0.0072, "step": 8893 }, { "epoch": 1.721362229102167, "grad_norm": 0.13191506266593933, "learning_rate": 9.376419543849012e-05, "loss": 0.0074, "step": 8894 }, { "epoch": 1.7215557275541795, "grad_norm": 0.09821031987667084, "learning_rate": 9.37628018382401e-05, "loss": 0.0085, "step": 8895 }, { "epoch": 1.721749226006192, "grad_norm": 0.1834217607975006, "learning_rate": 9.376140809387952e-05, "loss": 0.0096, "step": 8896 }, { "epoch": 1.7219427244582044, "grad_norm": 0.0669415071606636, "learning_rate": 9.376001420541358e-05, "loss": 0.0076, "step": 8897 }, { "epoch": 1.7221362229102168, "grad_norm": 0.12724903225898743, "learning_rate": 9.375862017284746e-05, "loss": 0.0082, "step": 8898 }, { "epoch": 1.722329721362229, "grad_norm": 0.10280465334653854, "learning_rate": 9.375722599618635e-05, "loss": 0.008, "step": 8899 }, { "epoch": 1.7225232198142415, "grad_norm": 0.06216811761260033, "learning_rate": 9.375583167543541e-05, "loss": 0.0065, "step": 8900 }, { "epoch": 1.7227167182662537, "grad_norm": 0.12936262786388397, "learning_rate": 9.375443721059983e-05, "loss": 0.0075, "step": 8901 }, { "epoch": 1.7229102167182662, "grad_norm": 0.035542044788599014, "learning_rate": 9.375304260168483e-05, "loss": 0.008, "step": 8902 }, { "epoch": 1.7231037151702786, "grad_norm": 0.09530872851610184, "learning_rate": 9.375164784869555e-05, "loss": 0.009, "step": 8903 }, { "epoch": 1.723297213622291, "grad_norm": 0.09951445460319519, "learning_rate": 9.375025295163719e-05, "loss": 0.009, "step": 8904 }, { "epoch": 1.7234907120743035, "grad_norm": 0.06530626118183136, "learning_rate": 9.374885791051494e-05, "loss": 0.0065, "step": 8905 }, { "epoch": 1.723684210526316, "grad_norm": 0.11313673108816147, "learning_rate": 9.374746272533398e-05, "loss": 0.0092, "step": 8906 }, { "epoch": 1.7238777089783281, "grad_norm": 0.07839629054069519, "learning_rate": 9.374606739609952e-05, "loss": 0.0085, "step": 8907 }, { "epoch": 1.7240712074303406, "grad_norm": 0.094295933842659, "learning_rate": 9.374467192281672e-05, "loss": 0.0106, "step": 8908 }, { "epoch": 1.7242647058823528, "grad_norm": 0.07216930389404297, "learning_rate": 9.374327630549076e-05, "loss": 0.0085, "step": 8909 }, { "epoch": 1.7244582043343653, "grad_norm": 0.09546821564435959, "learning_rate": 9.374188054412685e-05, "loss": 0.0079, "step": 8910 }, { "epoch": 1.7246517027863777, "grad_norm": 0.03773167356848717, "learning_rate": 9.374048463873018e-05, "loss": 0.0081, "step": 8911 }, { "epoch": 1.7248452012383901, "grad_norm": 0.12188680469989777, "learning_rate": 9.373908858930594e-05, "loss": 0.0065, "step": 8912 }, { "epoch": 1.7250386996904026, "grad_norm": 0.04995060712099075, "learning_rate": 9.373769239585932e-05, "loss": 0.0079, "step": 8913 }, { "epoch": 1.725232198142415, "grad_norm": 0.17562618851661682, "learning_rate": 9.373629605839549e-05, "loss": 0.0088, "step": 8914 }, { "epoch": 1.7254256965944272, "grad_norm": 0.09560283273458481, "learning_rate": 9.373489957691965e-05, "loss": 0.0077, "step": 8915 }, { "epoch": 1.7256191950464397, "grad_norm": 0.14758478105068207, "learning_rate": 9.373350295143701e-05, "loss": 0.007, "step": 8916 }, { "epoch": 1.725812693498452, "grad_norm": 0.14829319715499878, "learning_rate": 9.373210618195276e-05, "loss": 0.0092, "step": 8917 }, { "epoch": 1.7260061919504643, "grad_norm": 0.05931711196899414, "learning_rate": 9.373070926847208e-05, "loss": 0.0094, "step": 8918 }, { "epoch": 1.7261996904024768, "grad_norm": 0.18863685429096222, "learning_rate": 9.372931221100015e-05, "loss": 0.0066, "step": 8919 }, { "epoch": 1.7263931888544892, "grad_norm": 0.036115001887083054, "learning_rate": 9.37279150095422e-05, "loss": 0.0083, "step": 8920 }, { "epoch": 1.7265866873065017, "grad_norm": 0.1088092103600502, "learning_rate": 9.372651766410337e-05, "loss": 0.0094, "step": 8921 }, { "epoch": 1.7267801857585139, "grad_norm": 0.15196362137794495, "learning_rate": 9.372512017468891e-05, "loss": 0.0069, "step": 8922 }, { "epoch": 1.7269736842105263, "grad_norm": 0.12388944625854492, "learning_rate": 9.3723722541304e-05, "loss": 0.0074, "step": 8923 }, { "epoch": 1.7271671826625385, "grad_norm": 0.16425208747386932, "learning_rate": 9.372232476395383e-05, "loss": 0.0078, "step": 8924 }, { "epoch": 1.727360681114551, "grad_norm": 0.0426555797457695, "learning_rate": 9.372092684264361e-05, "loss": 0.0072, "step": 8925 }, { "epoch": 1.7275541795665634, "grad_norm": 0.1616923063993454, "learning_rate": 9.371952877737851e-05, "loss": 0.0095, "step": 8926 }, { "epoch": 1.7277476780185759, "grad_norm": 0.03223397210240364, "learning_rate": 9.371813056816374e-05, "loss": 0.008, "step": 8927 }, { "epoch": 1.7279411764705883, "grad_norm": 0.06287575513124466, "learning_rate": 9.371673221500451e-05, "loss": 0.0089, "step": 8928 }, { "epoch": 1.7281346749226008, "grad_norm": 0.145062655210495, "learning_rate": 9.3715333717906e-05, "loss": 0.0076, "step": 8929 }, { "epoch": 1.728328173374613, "grad_norm": 0.10262525826692581, "learning_rate": 9.371393507687344e-05, "loss": 0.0087, "step": 8930 }, { "epoch": 1.7285216718266254, "grad_norm": 0.1277916133403778, "learning_rate": 9.371253629191199e-05, "loss": 0.0091, "step": 8931 }, { "epoch": 1.7287151702786376, "grad_norm": 0.06455441564321518, "learning_rate": 9.371113736302688e-05, "loss": 0.0076, "step": 8932 }, { "epoch": 1.72890866873065, "grad_norm": 0.16460099816322327, "learning_rate": 9.370973829022328e-05, "loss": 0.0085, "step": 8933 }, { "epoch": 1.7291021671826625, "grad_norm": 0.05855298414826393, "learning_rate": 9.370833907350642e-05, "loss": 0.0081, "step": 8934 }, { "epoch": 1.729295665634675, "grad_norm": 0.17333140969276428, "learning_rate": 9.37069397128815e-05, "loss": 0.0077, "step": 8935 }, { "epoch": 1.7294891640866874, "grad_norm": 0.09636273235082626, "learning_rate": 9.370554020835373e-05, "loss": 0.0085, "step": 8936 }, { "epoch": 1.7296826625386998, "grad_norm": 0.16090267896652222, "learning_rate": 9.370414055992826e-05, "loss": 0.0085, "step": 8937 }, { "epoch": 1.729876160990712, "grad_norm": 0.12939715385437012, "learning_rate": 9.370274076761037e-05, "loss": 0.0081, "step": 8938 }, { "epoch": 1.7300696594427245, "grad_norm": 0.1313934177160263, "learning_rate": 9.37013408314052e-05, "loss": 0.0089, "step": 8939 }, { "epoch": 1.7302631578947367, "grad_norm": 0.1486200988292694, "learning_rate": 9.369994075131799e-05, "loss": 0.0097, "step": 8940 }, { "epoch": 1.7304566563467492, "grad_norm": 0.08858843147754669, "learning_rate": 9.369854052735394e-05, "loss": 0.0072, "step": 8941 }, { "epoch": 1.7306501547987616, "grad_norm": 0.1682778298854828, "learning_rate": 9.369714015951825e-05, "loss": 0.0081, "step": 8942 }, { "epoch": 1.730843653250774, "grad_norm": 0.1332780122756958, "learning_rate": 9.369573964781613e-05, "loss": 0.0072, "step": 8943 }, { "epoch": 1.7310371517027865, "grad_norm": 0.14321987330913544, "learning_rate": 9.369433899225277e-05, "loss": 0.0092, "step": 8944 }, { "epoch": 1.7312306501547987, "grad_norm": 0.122591033577919, "learning_rate": 9.369293819283341e-05, "loss": 0.0077, "step": 8945 }, { "epoch": 1.7314241486068112, "grad_norm": 0.10054007917642593, "learning_rate": 9.369153724956323e-05, "loss": 0.0082, "step": 8946 }, { "epoch": 1.7316176470588234, "grad_norm": 0.10677105188369751, "learning_rate": 9.369013616244747e-05, "loss": 0.0061, "step": 8947 }, { "epoch": 1.7318111455108358, "grad_norm": 0.05542183667421341, "learning_rate": 9.368873493149129e-05, "loss": 0.0083, "step": 8948 }, { "epoch": 1.7320046439628483, "grad_norm": 0.11893085390329361, "learning_rate": 9.368733355669993e-05, "loss": 0.0107, "step": 8949 }, { "epoch": 1.7321981424148607, "grad_norm": 0.05996406450867653, "learning_rate": 9.368593203807861e-05, "loss": 0.0081, "step": 8950 }, { "epoch": 1.7323916408668731, "grad_norm": 0.08848143368959427, "learning_rate": 9.368453037563252e-05, "loss": 0.0066, "step": 8951 }, { "epoch": 1.7325851393188856, "grad_norm": 0.072804756462574, "learning_rate": 9.368312856936688e-05, "loss": 0.01, "step": 8952 }, { "epoch": 1.7327786377708978, "grad_norm": 0.0840286910533905, "learning_rate": 9.36817266192869e-05, "loss": 0.0088, "step": 8953 }, { "epoch": 1.7329721362229102, "grad_norm": 0.08032438904047012, "learning_rate": 9.368032452539779e-05, "loss": 0.0103, "step": 8954 }, { "epoch": 1.7331656346749225, "grad_norm": 0.10153591632843018, "learning_rate": 9.367892228770476e-05, "loss": 0.0067, "step": 8955 }, { "epoch": 1.733359133126935, "grad_norm": 0.063102588057518, "learning_rate": 9.367751990621304e-05, "loss": 0.0082, "step": 8956 }, { "epoch": 1.7335526315789473, "grad_norm": 0.13885758817195892, "learning_rate": 9.367611738092782e-05, "loss": 0.0078, "step": 8957 }, { "epoch": 1.7337461300309598, "grad_norm": 0.11364258825778961, "learning_rate": 9.367471471185433e-05, "loss": 0.0076, "step": 8958 }, { "epoch": 1.7339396284829722, "grad_norm": 0.08250828832387924, "learning_rate": 9.36733118989978e-05, "loss": 0.0082, "step": 8959 }, { "epoch": 1.7341331269349847, "grad_norm": 0.13331885635852814, "learning_rate": 9.367190894236338e-05, "loss": 0.008, "step": 8960 }, { "epoch": 1.734326625386997, "grad_norm": 0.11080065369606018, "learning_rate": 9.367050584195636e-05, "loss": 0.0066, "step": 8961 }, { "epoch": 1.7345201238390093, "grad_norm": 0.11438827216625214, "learning_rate": 9.366910259778193e-05, "loss": 0.009, "step": 8962 }, { "epoch": 1.7347136222910216, "grad_norm": 0.15063609182834625, "learning_rate": 9.36676992098453e-05, "loss": 0.0067, "step": 8963 }, { "epoch": 1.734907120743034, "grad_norm": 0.06683919578790665, "learning_rate": 9.366629567815169e-05, "loss": 0.007, "step": 8964 }, { "epoch": 1.7351006191950464, "grad_norm": 0.15284468233585358, "learning_rate": 9.366489200270632e-05, "loss": 0.0085, "step": 8965 }, { "epoch": 1.7352941176470589, "grad_norm": 0.10189718753099442, "learning_rate": 9.366348818351442e-05, "loss": 0.0068, "step": 8966 }, { "epoch": 1.7354876160990713, "grad_norm": 0.09204110503196716, "learning_rate": 9.366208422058118e-05, "loss": 0.0081, "step": 8967 }, { "epoch": 1.7356811145510835, "grad_norm": 0.09806673973798752, "learning_rate": 9.366068011391187e-05, "loss": 0.0086, "step": 8968 }, { "epoch": 1.735874613003096, "grad_norm": 0.11503414809703827, "learning_rate": 9.365927586351163e-05, "loss": 0.0084, "step": 8969 }, { "epoch": 1.7360681114551082, "grad_norm": 0.0658322423696518, "learning_rate": 9.365787146938575e-05, "loss": 0.0083, "step": 8970 }, { "epoch": 1.7362616099071206, "grad_norm": 0.08453870564699173, "learning_rate": 9.365646693153944e-05, "loss": 0.008, "step": 8971 }, { "epoch": 1.736455108359133, "grad_norm": 0.09340343624353409, "learning_rate": 9.365506224997792e-05, "loss": 0.0082, "step": 8972 }, { "epoch": 1.7366486068111455, "grad_norm": 0.0760616734623909, "learning_rate": 9.365365742470637e-05, "loss": 0.0073, "step": 8973 }, { "epoch": 1.736842105263158, "grad_norm": 0.08093433082103729, "learning_rate": 9.365225245573006e-05, "loss": 0.0102, "step": 8974 }, { "epoch": 1.7370356037151704, "grad_norm": 0.12438514083623886, "learning_rate": 9.36508473430542e-05, "loss": 0.0089, "step": 8975 }, { "epoch": 1.7372291021671826, "grad_norm": 0.0932759940624237, "learning_rate": 9.364944208668403e-05, "loss": 0.0077, "step": 8976 }, { "epoch": 1.737422600619195, "grad_norm": 0.13638822734355927, "learning_rate": 9.364803668662473e-05, "loss": 0.0081, "step": 8977 }, { "epoch": 1.7376160990712073, "grad_norm": 0.06938090175390244, "learning_rate": 9.364663114288157e-05, "loss": 0.0063, "step": 8978 }, { "epoch": 1.7378095975232197, "grad_norm": 0.13079380989074707, "learning_rate": 9.364522545545975e-05, "loss": 0.0084, "step": 8979 }, { "epoch": 1.7380030959752322, "grad_norm": 0.07038510590791702, "learning_rate": 9.36438196243645e-05, "loss": 0.0078, "step": 8980 }, { "epoch": 1.7381965944272446, "grad_norm": 0.11145850270986557, "learning_rate": 9.364241364960107e-05, "loss": 0.0074, "step": 8981 }, { "epoch": 1.738390092879257, "grad_norm": 0.0883619636297226, "learning_rate": 9.364100753117465e-05, "loss": 0.0078, "step": 8982 }, { "epoch": 1.7385835913312695, "grad_norm": 0.08825631439685822, "learning_rate": 9.36396012690905e-05, "loss": 0.0082, "step": 8983 }, { "epoch": 1.7387770897832817, "grad_norm": 0.0910487174987793, "learning_rate": 9.363819486335382e-05, "loss": 0.0072, "step": 8984 }, { "epoch": 1.7389705882352942, "grad_norm": 0.08666499704122543, "learning_rate": 9.363678831396986e-05, "loss": 0.0089, "step": 8985 }, { "epoch": 1.7391640866873064, "grad_norm": 0.12930378317832947, "learning_rate": 9.363538162094384e-05, "loss": 0.0078, "step": 8986 }, { "epoch": 1.7393575851393188, "grad_norm": 0.10163883119821548, "learning_rate": 9.363397478428099e-05, "loss": 0.0085, "step": 8987 }, { "epoch": 1.7395510835913313, "grad_norm": 0.10870776325464249, "learning_rate": 9.363256780398654e-05, "loss": 0.0069, "step": 8988 }, { "epoch": 1.7397445820433437, "grad_norm": 0.041186027228832245, "learning_rate": 9.363116068006573e-05, "loss": 0.0073, "step": 8989 }, { "epoch": 1.7399380804953561, "grad_norm": 0.1161457970738411, "learning_rate": 9.362975341252377e-05, "loss": 0.008, "step": 8990 }, { "epoch": 1.7401315789473686, "grad_norm": 0.0412922240793705, "learning_rate": 9.362834600136592e-05, "loss": 0.0086, "step": 8991 }, { "epoch": 1.7403250773993808, "grad_norm": 0.11212947964668274, "learning_rate": 9.362693844659738e-05, "loss": 0.0087, "step": 8992 }, { "epoch": 1.740518575851393, "grad_norm": 0.07530185580253601, "learning_rate": 9.362553074822342e-05, "loss": 0.0075, "step": 8993 }, { "epoch": 1.7407120743034055, "grad_norm": 0.09719480574131012, "learning_rate": 9.362412290624923e-05, "loss": 0.0095, "step": 8994 }, { "epoch": 1.740905572755418, "grad_norm": 0.07837643474340439, "learning_rate": 9.36227149206801e-05, "loss": 0.0087, "step": 8995 }, { "epoch": 1.7410990712074303, "grad_norm": 0.08476915210485458, "learning_rate": 9.362130679152121e-05, "loss": 0.0061, "step": 8996 }, { "epoch": 1.7412925696594428, "grad_norm": 0.09093080461025238, "learning_rate": 9.361989851877783e-05, "loss": 0.0084, "step": 8997 }, { "epoch": 1.7414860681114552, "grad_norm": 0.10381845384836197, "learning_rate": 9.361849010245519e-05, "loss": 0.009, "step": 8998 }, { "epoch": 1.7416795665634675, "grad_norm": 0.09963518381118774, "learning_rate": 9.361708154255851e-05, "loss": 0.0081, "step": 8999 }, { "epoch": 1.74187306501548, "grad_norm": 0.10670413076877594, "learning_rate": 9.361567283909303e-05, "loss": 0.0078, "step": 9000 }, { "epoch": 1.7420665634674921, "grad_norm": 0.07079911977052689, "learning_rate": 9.361426399206398e-05, "loss": 0.0075, "step": 9001 }, { "epoch": 1.7422600619195046, "grad_norm": 0.07260328531265259, "learning_rate": 9.361285500147664e-05, "loss": 0.0082, "step": 9002 }, { "epoch": 1.742453560371517, "grad_norm": 0.11906559020280838, "learning_rate": 9.361144586733621e-05, "loss": 0.0097, "step": 9003 }, { "epoch": 1.7426470588235294, "grad_norm": 0.08731117844581604, "learning_rate": 9.361003658964792e-05, "loss": 0.0088, "step": 9004 }, { "epoch": 1.7428405572755419, "grad_norm": 0.07712551206350327, "learning_rate": 9.360862716841706e-05, "loss": 0.0088, "step": 9005 }, { "epoch": 1.7430340557275543, "grad_norm": 0.044008564203977585, "learning_rate": 9.360721760364882e-05, "loss": 0.0067, "step": 9006 }, { "epoch": 1.7432275541795665, "grad_norm": 0.13460008800029755, "learning_rate": 9.360580789534845e-05, "loss": 0.0091, "step": 9007 }, { "epoch": 1.743421052631579, "grad_norm": 0.07863293588161469, "learning_rate": 9.360439804352122e-05, "loss": 0.0072, "step": 9008 }, { "epoch": 1.7436145510835912, "grad_norm": 0.12514987587928772, "learning_rate": 9.360298804817233e-05, "loss": 0.0075, "step": 9009 }, { "epoch": 1.7438080495356036, "grad_norm": 0.09495967626571655, "learning_rate": 9.360157790930705e-05, "loss": 0.0067, "step": 9010 }, { "epoch": 1.744001547987616, "grad_norm": 0.1132252886891365, "learning_rate": 9.360016762693062e-05, "loss": 0.0076, "step": 9011 }, { "epoch": 1.7441950464396285, "grad_norm": 0.09644313901662827, "learning_rate": 9.359875720104828e-05, "loss": 0.0063, "step": 9012 }, { "epoch": 1.744388544891641, "grad_norm": 0.10869505256414413, "learning_rate": 9.359734663166524e-05, "loss": 0.0085, "step": 9013 }, { "epoch": 1.7445820433436534, "grad_norm": 0.13334600627422333, "learning_rate": 9.359593591878682e-05, "loss": 0.0056, "step": 9014 }, { "epoch": 1.7447755417956656, "grad_norm": 0.11333681643009186, "learning_rate": 9.35945250624182e-05, "loss": 0.0093, "step": 9015 }, { "epoch": 1.744969040247678, "grad_norm": 0.128074511885643, "learning_rate": 9.359311406256465e-05, "loss": 0.0089, "step": 9016 }, { "epoch": 1.7451625386996903, "grad_norm": 0.12198515236377716, "learning_rate": 9.359170291923141e-05, "loss": 0.0072, "step": 9017 }, { "epoch": 1.7453560371517027, "grad_norm": 0.09030265361070633, "learning_rate": 9.359029163242373e-05, "loss": 0.0065, "step": 9018 }, { "epoch": 1.7455495356037152, "grad_norm": 0.13529369235038757, "learning_rate": 9.358888020214686e-05, "loss": 0.0083, "step": 9019 }, { "epoch": 1.7457430340557276, "grad_norm": 0.08466792106628418, "learning_rate": 9.358746862840603e-05, "loss": 0.0078, "step": 9020 }, { "epoch": 1.74593653250774, "grad_norm": 0.12564779818058014, "learning_rate": 9.35860569112065e-05, "loss": 0.0086, "step": 9021 }, { "epoch": 1.7461300309597523, "grad_norm": 0.0746895894408226, "learning_rate": 9.358464505055353e-05, "loss": 0.008, "step": 9022 }, { "epoch": 1.7463235294117647, "grad_norm": 0.04030367732048035, "learning_rate": 9.358323304645235e-05, "loss": 0.0069, "step": 9023 }, { "epoch": 1.746517027863777, "grad_norm": 0.10573379695415497, "learning_rate": 9.358182089890821e-05, "loss": 0.0086, "step": 9024 }, { "epoch": 1.7467105263157894, "grad_norm": 0.04199904948472977, "learning_rate": 9.358040860792638e-05, "loss": 0.0076, "step": 9025 }, { "epoch": 1.7469040247678018, "grad_norm": 0.1042684018611908, "learning_rate": 9.357899617351208e-05, "loss": 0.0092, "step": 9026 }, { "epoch": 1.7470975232198143, "grad_norm": 0.047709669917821884, "learning_rate": 9.35775835956706e-05, "loss": 0.0085, "step": 9027 }, { "epoch": 1.7472910216718267, "grad_norm": 0.08954137563705444, "learning_rate": 9.357617087440715e-05, "loss": 0.0081, "step": 9028 }, { "epoch": 1.7474845201238391, "grad_norm": 0.06280715763568878, "learning_rate": 9.357475800972702e-05, "loss": 0.0075, "step": 9029 }, { "epoch": 1.7476780185758514, "grad_norm": 0.07662417739629745, "learning_rate": 9.357334500163542e-05, "loss": 0.0069, "step": 9030 }, { "epoch": 1.7478715170278638, "grad_norm": 0.06515716016292572, "learning_rate": 9.357193185013765e-05, "loss": 0.0078, "step": 9031 }, { "epoch": 1.748065015479876, "grad_norm": 0.08087069541215897, "learning_rate": 9.357051855523894e-05, "loss": 0.0069, "step": 9032 }, { "epoch": 1.7482585139318885, "grad_norm": 0.0633034035563469, "learning_rate": 9.356910511694454e-05, "loss": 0.0085, "step": 9033 }, { "epoch": 1.748452012383901, "grad_norm": 0.07879183441400528, "learning_rate": 9.356769153525971e-05, "loss": 0.0078, "step": 9034 }, { "epoch": 1.7486455108359134, "grad_norm": 0.08955280482769012, "learning_rate": 9.356627781018972e-05, "loss": 0.0082, "step": 9035 }, { "epoch": 1.7488390092879258, "grad_norm": 0.08283789455890656, "learning_rate": 9.35648639417398e-05, "loss": 0.0077, "step": 9036 }, { "epoch": 1.7490325077399382, "grad_norm": 0.0782046988606453, "learning_rate": 9.356344992991521e-05, "loss": 0.0086, "step": 9037 }, { "epoch": 1.7492260061919505, "grad_norm": 0.0697217807173729, "learning_rate": 9.356203577472122e-05, "loss": 0.0073, "step": 9038 }, { "epoch": 1.749419504643963, "grad_norm": 0.05372790992259979, "learning_rate": 9.356062147616309e-05, "loss": 0.0092, "step": 9039 }, { "epoch": 1.7496130030959751, "grad_norm": 0.0499885119497776, "learning_rate": 9.355920703424606e-05, "loss": 0.0077, "step": 9040 }, { "epoch": 1.7498065015479876, "grad_norm": 0.04627007246017456, "learning_rate": 9.355779244897543e-05, "loss": 0.0067, "step": 9041 }, { "epoch": 1.75, "grad_norm": 0.05457821115851402, "learning_rate": 9.355637772035639e-05, "loss": 0.0092, "step": 9042 }, { "epoch": 1.7501934984520124, "grad_norm": 0.03524172306060791, "learning_rate": 9.355496284839426e-05, "loss": 0.0097, "step": 9043 }, { "epoch": 1.7503869969040249, "grad_norm": 0.040521085262298584, "learning_rate": 9.355354783309427e-05, "loss": 0.0067, "step": 9044 }, { "epoch": 1.750580495356037, "grad_norm": 0.03891994431614876, "learning_rate": 9.35521326744617e-05, "loss": 0.0084, "step": 9045 }, { "epoch": 1.7507739938080495, "grad_norm": 0.03583500534296036, "learning_rate": 9.355071737250177e-05, "loss": 0.0078, "step": 9046 }, { "epoch": 1.7509674922600618, "grad_norm": 0.06621204316616058, "learning_rate": 9.35493019272198e-05, "loss": 0.0073, "step": 9047 }, { "epoch": 1.7511609907120742, "grad_norm": 0.07747359573841095, "learning_rate": 9.354788633862102e-05, "loss": 0.0078, "step": 9048 }, { "epoch": 1.7513544891640866, "grad_norm": 0.05755484849214554, "learning_rate": 9.35464706067107e-05, "loss": 0.0083, "step": 9049 }, { "epoch": 1.751547987616099, "grad_norm": 0.06632915139198303, "learning_rate": 9.354505473149409e-05, "loss": 0.0079, "step": 9050 }, { "epoch": 1.7517414860681115, "grad_norm": 0.060892682522535324, "learning_rate": 9.354363871297647e-05, "loss": 0.0086, "step": 9051 }, { "epoch": 1.751934984520124, "grad_norm": 0.10442512482404709, "learning_rate": 9.35422225511631e-05, "loss": 0.0072, "step": 9052 }, { "epoch": 1.7521284829721362, "grad_norm": 0.06627117097377777, "learning_rate": 9.354080624605924e-05, "loss": 0.0072, "step": 9053 }, { "epoch": 1.7523219814241486, "grad_norm": 0.09223341941833496, "learning_rate": 9.353938979767016e-05, "loss": 0.0085, "step": 9054 }, { "epoch": 1.7525154798761609, "grad_norm": 0.09339515119791031, "learning_rate": 9.353797320600112e-05, "loss": 0.007, "step": 9055 }, { "epoch": 1.7527089783281733, "grad_norm": 0.04613839089870453, "learning_rate": 9.35365564710574e-05, "loss": 0.0087, "step": 9056 }, { "epoch": 1.7529024767801857, "grad_norm": 0.11361808329820633, "learning_rate": 9.353513959284426e-05, "loss": 0.0074, "step": 9057 }, { "epoch": 1.7530959752321982, "grad_norm": 0.060546863824129105, "learning_rate": 9.353372257136697e-05, "loss": 0.0081, "step": 9058 }, { "epoch": 1.7532894736842106, "grad_norm": 0.109473317861557, "learning_rate": 9.353230540663078e-05, "loss": 0.0081, "step": 9059 }, { "epoch": 1.753482972136223, "grad_norm": 0.044936809688806534, "learning_rate": 9.353088809864097e-05, "loss": 0.0081, "step": 9060 }, { "epoch": 1.7536764705882353, "grad_norm": 0.08744752407073975, "learning_rate": 9.352947064740282e-05, "loss": 0.0091, "step": 9061 }, { "epoch": 1.7538699690402477, "grad_norm": 0.05662400647997856, "learning_rate": 9.352805305292161e-05, "loss": 0.0082, "step": 9062 }, { "epoch": 1.75406346749226, "grad_norm": 0.08879833668470383, "learning_rate": 9.352663531520256e-05, "loss": 0.0092, "step": 9063 }, { "epoch": 1.7542569659442724, "grad_norm": 0.05879545956850052, "learning_rate": 9.3525217434251e-05, "loss": 0.0079, "step": 9064 }, { "epoch": 1.7544504643962848, "grad_norm": 0.08851681649684906, "learning_rate": 9.352379941007216e-05, "loss": 0.0075, "step": 9065 }, { "epoch": 1.7546439628482973, "grad_norm": 0.06721804291009903, "learning_rate": 9.352238124267133e-05, "loss": 0.0078, "step": 9066 }, { "epoch": 1.7548374613003097, "grad_norm": 0.06418384611606598, "learning_rate": 9.352096293205378e-05, "loss": 0.0067, "step": 9067 }, { "epoch": 1.755030959752322, "grad_norm": 0.09616943448781967, "learning_rate": 9.351954447822478e-05, "loss": 0.007, "step": 9068 }, { "epoch": 1.7552244582043344, "grad_norm": 0.06313047558069229, "learning_rate": 9.351812588118961e-05, "loss": 0.008, "step": 9069 }, { "epoch": 1.7554179566563466, "grad_norm": 0.1543467491865158, "learning_rate": 9.351670714095353e-05, "loss": 0.0099, "step": 9070 }, { "epoch": 1.755611455108359, "grad_norm": 0.08490309864282608, "learning_rate": 9.351528825752183e-05, "loss": 0.0084, "step": 9071 }, { "epoch": 1.7558049535603715, "grad_norm": 0.13389967381954193, "learning_rate": 9.351386923089978e-05, "loss": 0.0084, "step": 9072 }, { "epoch": 1.755998452012384, "grad_norm": 0.11965260654687881, "learning_rate": 9.351245006109265e-05, "loss": 0.0082, "step": 9073 }, { "epoch": 1.7561919504643964, "grad_norm": 0.09671308100223541, "learning_rate": 9.351103074810571e-05, "loss": 0.0064, "step": 9074 }, { "epoch": 1.7563854489164088, "grad_norm": 0.12975060939788818, "learning_rate": 9.350961129194427e-05, "loss": 0.0079, "step": 9075 }, { "epoch": 1.756578947368421, "grad_norm": 0.08851308375597, "learning_rate": 9.350819169261356e-05, "loss": 0.0072, "step": 9076 }, { "epoch": 1.7567724458204335, "grad_norm": 0.12166611850261688, "learning_rate": 9.35067719501189e-05, "loss": 0.0086, "step": 9077 }, { "epoch": 1.7569659442724457, "grad_norm": 0.08459731936454773, "learning_rate": 9.350535206446555e-05, "loss": 0.0077, "step": 9078 }, { "epoch": 1.7571594427244581, "grad_norm": 0.13276734948158264, "learning_rate": 9.350393203565878e-05, "loss": 0.0068, "step": 9079 }, { "epoch": 1.7573529411764706, "grad_norm": 0.05510362237691879, "learning_rate": 9.350251186370388e-05, "loss": 0.0067, "step": 9080 }, { "epoch": 1.757546439628483, "grad_norm": 0.14386321604251862, "learning_rate": 9.350109154860614e-05, "loss": 0.0082, "step": 9081 }, { "epoch": 1.7577399380804954, "grad_norm": 0.03365661948919296, "learning_rate": 9.349967109037081e-05, "loss": 0.0066, "step": 9082 }, { "epoch": 1.7579334365325079, "grad_norm": 0.11355014145374298, "learning_rate": 9.34982504890032e-05, "loss": 0.0082, "step": 9083 }, { "epoch": 1.75812693498452, "grad_norm": 0.07521485537290573, "learning_rate": 9.349682974450859e-05, "loss": 0.0092, "step": 9084 }, { "epoch": 1.7583204334365325, "grad_norm": 0.10696688294410706, "learning_rate": 9.349540885689224e-05, "loss": 0.009, "step": 9085 }, { "epoch": 1.7585139318885448, "grad_norm": 0.0651780366897583, "learning_rate": 9.349398782615945e-05, "loss": 0.0075, "step": 9086 }, { "epoch": 1.7587074303405572, "grad_norm": 0.07371865957975388, "learning_rate": 9.349256665231549e-05, "loss": 0.008, "step": 9087 }, { "epoch": 1.7589009287925697, "grad_norm": 0.0404864177107811, "learning_rate": 9.349114533536565e-05, "loss": 0.0087, "step": 9088 }, { "epoch": 1.759094427244582, "grad_norm": 0.07359402626752853, "learning_rate": 9.348972387531522e-05, "loss": 0.0073, "step": 9089 }, { "epoch": 1.7592879256965945, "grad_norm": 0.04837041720747948, "learning_rate": 9.348830227216949e-05, "loss": 0.0084, "step": 9090 }, { "epoch": 1.759481424148607, "grad_norm": 0.0849715918302536, "learning_rate": 9.348688052593373e-05, "loss": 0.0085, "step": 9091 }, { "epoch": 1.7596749226006192, "grad_norm": 0.05646883323788643, "learning_rate": 9.348545863661323e-05, "loss": 0.0085, "step": 9092 }, { "epoch": 1.7598684210526314, "grad_norm": 0.09668589383363724, "learning_rate": 9.348403660421329e-05, "loss": 0.0068, "step": 9093 }, { "epoch": 1.7600619195046439, "grad_norm": 0.05147574096918106, "learning_rate": 9.348261442873918e-05, "loss": 0.0086, "step": 9094 }, { "epoch": 1.7602554179566563, "grad_norm": 0.1126798763871193, "learning_rate": 9.348119211019617e-05, "loss": 0.0089, "step": 9095 }, { "epoch": 1.7604489164086687, "grad_norm": 0.07724152505397797, "learning_rate": 9.347976964858959e-05, "loss": 0.0064, "step": 9096 }, { "epoch": 1.7606424148606812, "grad_norm": 0.10625948756933212, "learning_rate": 9.34783470439247e-05, "loss": 0.0079, "step": 9097 }, { "epoch": 1.7608359133126936, "grad_norm": 0.10251489281654358, "learning_rate": 9.34769242962068e-05, "loss": 0.0084, "step": 9098 }, { "epoch": 1.7610294117647058, "grad_norm": 0.0929722860455513, "learning_rate": 9.34755014054412e-05, "loss": 0.0086, "step": 9099 }, { "epoch": 1.7612229102167183, "grad_norm": 0.10825346410274506, "learning_rate": 9.347407837163314e-05, "loss": 0.0077, "step": 9100 }, { "epoch": 1.7614164086687305, "grad_norm": 0.07768982648849487, "learning_rate": 9.347265519478793e-05, "loss": 0.0083, "step": 9101 }, { "epoch": 1.761609907120743, "grad_norm": 0.14725302159786224, "learning_rate": 9.347123187491089e-05, "loss": 0.0076, "step": 9102 }, { "epoch": 1.7618034055727554, "grad_norm": 0.0823434442281723, "learning_rate": 9.346980841200727e-05, "loss": 0.0078, "step": 9103 }, { "epoch": 1.7619969040247678, "grad_norm": 0.12027385085821152, "learning_rate": 9.346838480608239e-05, "loss": 0.0071, "step": 9104 }, { "epoch": 1.7621904024767803, "grad_norm": 0.044342298060655594, "learning_rate": 9.346696105714153e-05, "loss": 0.008, "step": 9105 }, { "epoch": 1.7623839009287927, "grad_norm": 0.10778414458036423, "learning_rate": 9.346553716518999e-05, "loss": 0.0076, "step": 9106 }, { "epoch": 1.762577399380805, "grad_norm": 0.03955888748168945, "learning_rate": 9.346411313023307e-05, "loss": 0.0072, "step": 9107 }, { "epoch": 1.7627708978328174, "grad_norm": 0.08577337116003036, "learning_rate": 9.346268895227604e-05, "loss": 0.0084, "step": 9108 }, { "epoch": 1.7629643962848296, "grad_norm": 0.03910830244421959, "learning_rate": 9.346126463132422e-05, "loss": 0.0086, "step": 9109 }, { "epoch": 1.763157894736842, "grad_norm": 0.06555704027414322, "learning_rate": 9.345984016738288e-05, "loss": 0.0081, "step": 9110 }, { "epoch": 1.7633513931888545, "grad_norm": 0.04284244030714035, "learning_rate": 9.345841556045735e-05, "loss": 0.008, "step": 9111 }, { "epoch": 1.763544891640867, "grad_norm": 0.07425669580698013, "learning_rate": 9.34569908105529e-05, "loss": 0.0076, "step": 9112 }, { "epoch": 1.7637383900928794, "grad_norm": 0.05293889343738556, "learning_rate": 9.345556591767484e-05, "loss": 0.0079, "step": 9113 }, { "epoch": 1.7639318885448918, "grad_norm": 0.09911596029996872, "learning_rate": 9.345414088182845e-05, "loss": 0.0073, "step": 9114 }, { "epoch": 1.764125386996904, "grad_norm": 0.04695146903395653, "learning_rate": 9.345271570301904e-05, "loss": 0.0075, "step": 9115 }, { "epoch": 1.7643188854489165, "grad_norm": 0.12390143424272537, "learning_rate": 9.345129038125193e-05, "loss": 0.0053, "step": 9116 }, { "epoch": 1.7645123839009287, "grad_norm": 0.04232940450310707, "learning_rate": 9.344986491653238e-05, "loss": 0.007, "step": 9117 }, { "epoch": 1.7647058823529411, "grad_norm": 0.08222822844982147, "learning_rate": 9.34484393088657e-05, "loss": 0.0108, "step": 9118 }, { "epoch": 1.7648993808049536, "grad_norm": 0.036712076514959335, "learning_rate": 9.344701355825721e-05, "loss": 0.0082, "step": 9119 }, { "epoch": 1.765092879256966, "grad_norm": 0.036605916917324066, "learning_rate": 9.344558766471219e-05, "loss": 0.0084, "step": 9120 }, { "epoch": 1.7652863777089784, "grad_norm": 0.030971640720963478, "learning_rate": 9.344416162823595e-05, "loss": 0.0075, "step": 9121 }, { "epoch": 1.7654798761609907, "grad_norm": 0.03237232565879822, "learning_rate": 9.344273544883378e-05, "loss": 0.0069, "step": 9122 }, { "epoch": 1.765673374613003, "grad_norm": 0.03286939486861229, "learning_rate": 9.344130912651101e-05, "loss": 0.0083, "step": 9123 }, { "epoch": 1.7658668730650153, "grad_norm": 0.03387470170855522, "learning_rate": 9.343988266127291e-05, "loss": 0.0089, "step": 9124 }, { "epoch": 1.7660603715170278, "grad_norm": 0.06490505486726761, "learning_rate": 9.34384560531248e-05, "loss": 0.0068, "step": 9125 }, { "epoch": 1.7662538699690402, "grad_norm": 0.04232759773731232, "learning_rate": 9.343702930207199e-05, "loss": 0.0082, "step": 9126 }, { "epoch": 1.7664473684210527, "grad_norm": 0.06450462341308594, "learning_rate": 9.343560240811978e-05, "loss": 0.008, "step": 9127 }, { "epoch": 1.766640866873065, "grad_norm": 0.03167640045285225, "learning_rate": 9.343417537127345e-05, "loss": 0.0079, "step": 9128 }, { "epoch": 1.7668343653250775, "grad_norm": 0.0411144457757473, "learning_rate": 9.343274819153834e-05, "loss": 0.0079, "step": 9129 }, { "epoch": 1.7670278637770898, "grad_norm": 0.03343309462070465, "learning_rate": 9.343132086891975e-05, "loss": 0.0078, "step": 9130 }, { "epoch": 1.7672213622291022, "grad_norm": 0.028844701126217842, "learning_rate": 9.342989340342298e-05, "loss": 0.0076, "step": 9131 }, { "epoch": 1.7674148606811144, "grad_norm": 0.03462404012680054, "learning_rate": 9.342846579505333e-05, "loss": 0.0084, "step": 9132 }, { "epoch": 1.7676083591331269, "grad_norm": 0.04815651848912239, "learning_rate": 9.34270380438161e-05, "loss": 0.008, "step": 9133 }, { "epoch": 1.7678018575851393, "grad_norm": 0.1058100163936615, "learning_rate": 9.342561014971662e-05, "loss": 0.0086, "step": 9134 }, { "epoch": 1.7679953560371517, "grad_norm": 0.0374368317425251, "learning_rate": 9.34241821127602e-05, "loss": 0.0067, "step": 9135 }, { "epoch": 1.7681888544891642, "grad_norm": 0.08695320039987564, "learning_rate": 9.342275393295212e-05, "loss": 0.0066, "step": 9136 }, { "epoch": 1.7683823529411766, "grad_norm": 0.0625234991312027, "learning_rate": 9.342132561029772e-05, "loss": 0.0082, "step": 9137 }, { "epoch": 1.7685758513931888, "grad_norm": 0.07818096876144409, "learning_rate": 9.341989714480229e-05, "loss": 0.008, "step": 9138 }, { "epoch": 1.7687693498452013, "grad_norm": 0.07625662535429001, "learning_rate": 9.341846853647116e-05, "loss": 0.0083, "step": 9139 }, { "epoch": 1.7689628482972135, "grad_norm": 0.07845508307218552, "learning_rate": 9.341703978530964e-05, "loss": 0.0069, "step": 9140 }, { "epoch": 1.769156346749226, "grad_norm": 0.05690687894821167, "learning_rate": 9.3415610891323e-05, "loss": 0.0091, "step": 9141 }, { "epoch": 1.7693498452012384, "grad_norm": 0.06174099072813988, "learning_rate": 9.341418185451662e-05, "loss": 0.0066, "step": 9142 }, { "epoch": 1.7695433436532508, "grad_norm": 0.07362616807222366, "learning_rate": 9.341275267489575e-05, "loss": 0.0074, "step": 9143 }, { "epoch": 1.7697368421052633, "grad_norm": 0.06421858817338943, "learning_rate": 9.341132335246575e-05, "loss": 0.007, "step": 9144 }, { "epoch": 1.7699303405572755, "grad_norm": 0.07287311553955078, "learning_rate": 9.340989388723191e-05, "loss": 0.0092, "step": 9145 }, { "epoch": 1.770123839009288, "grad_norm": 0.04071684554219246, "learning_rate": 9.340846427919953e-05, "loss": 0.0071, "step": 9146 }, { "epoch": 1.7703173374613002, "grad_norm": 0.0625297874212265, "learning_rate": 9.340703452837396e-05, "loss": 0.0087, "step": 9147 }, { "epoch": 1.7705108359133126, "grad_norm": 0.032520879060029984, "learning_rate": 9.34056046347605e-05, "loss": 0.0085, "step": 9148 }, { "epoch": 1.770704334365325, "grad_norm": 0.06094203516840935, "learning_rate": 9.340417459836447e-05, "loss": 0.0091, "step": 9149 }, { "epoch": 1.7708978328173375, "grad_norm": 0.042104240506887436, "learning_rate": 9.340274441919118e-05, "loss": 0.0076, "step": 9150 }, { "epoch": 1.77109133126935, "grad_norm": 0.07086751610040665, "learning_rate": 9.340131409724593e-05, "loss": 0.0065, "step": 9151 }, { "epoch": 1.7712848297213624, "grad_norm": 0.0752350315451622, "learning_rate": 9.339988363253408e-05, "loss": 0.0092, "step": 9152 }, { "epoch": 1.7714783281733746, "grad_norm": 0.07061953097581863, "learning_rate": 9.339845302506091e-05, "loss": 0.0077, "step": 9153 }, { "epoch": 1.771671826625387, "grad_norm": 0.06470499187707901, "learning_rate": 9.339702227483176e-05, "loss": 0.0066, "step": 9154 }, { "epoch": 1.7718653250773992, "grad_norm": 0.09472563862800598, "learning_rate": 9.339559138185195e-05, "loss": 0.0066, "step": 9155 }, { "epoch": 1.7720588235294117, "grad_norm": 0.04706036299467087, "learning_rate": 9.339416034612676e-05, "loss": 0.0089, "step": 9156 }, { "epoch": 1.7722523219814241, "grad_norm": 0.08486729115247726, "learning_rate": 9.339272916766157e-05, "loss": 0.0076, "step": 9157 }, { "epoch": 1.7724458204334366, "grad_norm": 0.05294545367360115, "learning_rate": 9.339129784646167e-05, "loss": 0.0092, "step": 9158 }, { "epoch": 1.772639318885449, "grad_norm": 0.0784020870923996, "learning_rate": 9.338986638253239e-05, "loss": 0.008, "step": 9159 }, { "epoch": 1.7728328173374615, "grad_norm": 0.07350555062294006, "learning_rate": 9.338843477587903e-05, "loss": 0.0084, "step": 9160 }, { "epoch": 1.7730263157894737, "grad_norm": 0.08495240658521652, "learning_rate": 9.338700302650695e-05, "loss": 0.0077, "step": 9161 }, { "epoch": 1.7732198142414861, "grad_norm": 0.05744810029864311, "learning_rate": 9.338557113442143e-05, "loss": 0.0078, "step": 9162 }, { "epoch": 1.7734133126934983, "grad_norm": 0.060911715030670166, "learning_rate": 9.338413909962782e-05, "loss": 0.0081, "step": 9163 }, { "epoch": 1.7736068111455108, "grad_norm": 0.06408558785915375, "learning_rate": 9.338270692213146e-05, "loss": 0.009, "step": 9164 }, { "epoch": 1.7738003095975232, "grad_norm": 0.048294633626937866, "learning_rate": 9.338127460193764e-05, "loss": 0.0087, "step": 9165 }, { "epoch": 1.7739938080495357, "grad_norm": 0.05503982678055763, "learning_rate": 9.337984213905169e-05, "loss": 0.0062, "step": 9166 }, { "epoch": 1.774187306501548, "grad_norm": 0.044172193855047226, "learning_rate": 9.337840953347895e-05, "loss": 0.0083, "step": 9167 }, { "epoch": 1.7743808049535603, "grad_norm": 0.07443083822727203, "learning_rate": 9.337697678522473e-05, "loss": 0.0077, "step": 9168 }, { "epoch": 1.7745743034055728, "grad_norm": 0.03173499554395676, "learning_rate": 9.337554389429438e-05, "loss": 0.0086, "step": 9169 }, { "epoch": 1.774767801857585, "grad_norm": 0.08165308088064194, "learning_rate": 9.337411086069322e-05, "loss": 0.0089, "step": 9170 }, { "epoch": 1.7749613003095974, "grad_norm": 0.047340091317892075, "learning_rate": 9.337267768442656e-05, "loss": 0.0093, "step": 9171 }, { "epoch": 1.7751547987616099, "grad_norm": 0.04976982995867729, "learning_rate": 9.337124436549973e-05, "loss": 0.0068, "step": 9172 }, { "epoch": 1.7753482972136223, "grad_norm": 0.04447908326983452, "learning_rate": 9.336981090391809e-05, "loss": 0.0076, "step": 9173 }, { "epoch": 1.7755417956656347, "grad_norm": 0.04649007320404053, "learning_rate": 9.336837729968693e-05, "loss": 0.0066, "step": 9174 }, { "epoch": 1.7757352941176472, "grad_norm": 0.039206091314554214, "learning_rate": 9.336694355281161e-05, "loss": 0.0087, "step": 9175 }, { "epoch": 1.7759287925696594, "grad_norm": 0.03637313097715378, "learning_rate": 9.336550966329742e-05, "loss": 0.0077, "step": 9176 }, { "epoch": 1.7761222910216719, "grad_norm": 0.04390854388475418, "learning_rate": 9.336407563114976e-05, "loss": 0.0092, "step": 9177 }, { "epoch": 1.776315789473684, "grad_norm": 0.0791010931134224, "learning_rate": 9.33626414563739e-05, "loss": 0.0078, "step": 9178 }, { "epoch": 1.7765092879256965, "grad_norm": 0.07375498861074448, "learning_rate": 9.336120713897519e-05, "loss": 0.0066, "step": 9179 }, { "epoch": 1.776702786377709, "grad_norm": 0.0532676987349987, "learning_rate": 9.335977267895895e-05, "loss": 0.0076, "step": 9180 }, { "epoch": 1.7768962848297214, "grad_norm": 0.03729427978396416, "learning_rate": 9.335833807633056e-05, "loss": 0.0073, "step": 9181 }, { "epoch": 1.7770897832817338, "grad_norm": 0.049838658422231674, "learning_rate": 9.33569033310953e-05, "loss": 0.0072, "step": 9182 }, { "epoch": 1.7772832817337463, "grad_norm": 0.037617508322000504, "learning_rate": 9.335546844325852e-05, "loss": 0.0073, "step": 9183 }, { "epoch": 1.7774767801857585, "grad_norm": 0.04165686294436455, "learning_rate": 9.335403341282558e-05, "loss": 0.0067, "step": 9184 }, { "epoch": 1.777670278637771, "grad_norm": 0.06625290960073471, "learning_rate": 9.335259823980177e-05, "loss": 0.008, "step": 9185 }, { "epoch": 1.7778637770897832, "grad_norm": 0.03374223783612251, "learning_rate": 9.335116292419247e-05, "loss": 0.007, "step": 9186 }, { "epoch": 1.7780572755417956, "grad_norm": 0.07770615071058273, "learning_rate": 9.3349727466003e-05, "loss": 0.0061, "step": 9187 }, { "epoch": 1.778250773993808, "grad_norm": 0.04100207984447479, "learning_rate": 9.334829186523868e-05, "loss": 0.0076, "step": 9188 }, { "epoch": 1.7784442724458205, "grad_norm": 0.0674772560596466, "learning_rate": 9.334685612190487e-05, "loss": 0.0063, "step": 9189 }, { "epoch": 1.778637770897833, "grad_norm": 0.06202884390950203, "learning_rate": 9.334542023600689e-05, "loss": 0.0092, "step": 9190 }, { "epoch": 1.7788312693498454, "grad_norm": 0.05427418649196625, "learning_rate": 9.334398420755007e-05, "loss": 0.0079, "step": 9191 }, { "epoch": 1.7790247678018576, "grad_norm": 0.04995308816432953, "learning_rate": 9.334254803653978e-05, "loss": 0.0077, "step": 9192 }, { "epoch": 1.7792182662538698, "grad_norm": 0.03909391164779663, "learning_rate": 9.334111172298136e-05, "loss": 0.0088, "step": 9193 }, { "epoch": 1.7794117647058822, "grad_norm": 0.06994988769292831, "learning_rate": 9.33396752668801e-05, "loss": 0.0074, "step": 9194 }, { "epoch": 1.7796052631578947, "grad_norm": 0.04457845911383629, "learning_rate": 9.333823866824141e-05, "loss": 0.0081, "step": 9195 }, { "epoch": 1.7797987616099071, "grad_norm": 0.04844571650028229, "learning_rate": 9.333680192707058e-05, "loss": 0.0085, "step": 9196 }, { "epoch": 1.7799922600619196, "grad_norm": 0.03625988960266113, "learning_rate": 9.333536504337296e-05, "loss": 0.007, "step": 9197 }, { "epoch": 1.780185758513932, "grad_norm": 0.042850639671087265, "learning_rate": 9.333392801715391e-05, "loss": 0.007, "step": 9198 }, { "epoch": 1.7803792569659442, "grad_norm": 0.048151735216379166, "learning_rate": 9.333249084841875e-05, "loss": 0.0076, "step": 9199 }, { "epoch": 1.7805727554179567, "grad_norm": 0.040568459779024124, "learning_rate": 9.333105353717283e-05, "loss": 0.0062, "step": 9200 }, { "epoch": 1.780766253869969, "grad_norm": 0.05166339501738548, "learning_rate": 9.332961608342153e-05, "loss": 0.0083, "step": 9201 }, { "epoch": 1.7809597523219813, "grad_norm": 0.03340071812272072, "learning_rate": 9.332817848717014e-05, "loss": 0.0086, "step": 9202 }, { "epoch": 1.7811532507739938, "grad_norm": 0.06285714358091354, "learning_rate": 9.332674074842402e-05, "loss": 0.0084, "step": 9203 }, { "epoch": 1.7813467492260062, "grad_norm": 0.05345407873392105, "learning_rate": 9.332530286718853e-05, "loss": 0.0077, "step": 9204 }, { "epoch": 1.7815402476780187, "grad_norm": 0.1099502295255661, "learning_rate": 9.332386484346902e-05, "loss": 0.0081, "step": 9205 }, { "epoch": 1.781733746130031, "grad_norm": 0.053551189601421356, "learning_rate": 9.33224266772708e-05, "loss": 0.0078, "step": 9206 }, { "epoch": 1.7819272445820433, "grad_norm": 0.13457854092121124, "learning_rate": 9.332098836859925e-05, "loss": 0.0056, "step": 9207 }, { "epoch": 1.7821207430340558, "grad_norm": 0.040261756628751755, "learning_rate": 9.331954991745972e-05, "loss": 0.008, "step": 9208 }, { "epoch": 1.782314241486068, "grad_norm": 0.0939384177327156, "learning_rate": 9.331811132385751e-05, "loss": 0.0076, "step": 9209 }, { "epoch": 1.7825077399380804, "grad_norm": 0.08196628093719482, "learning_rate": 9.331667258779804e-05, "loss": 0.0067, "step": 9210 }, { "epoch": 1.7827012383900929, "grad_norm": 0.07054828107357025, "learning_rate": 9.331523370928661e-05, "loss": 0.0078, "step": 9211 }, { "epoch": 1.7828947368421053, "grad_norm": 0.09243495762348175, "learning_rate": 9.331379468832858e-05, "loss": 0.0085, "step": 9212 }, { "epoch": 1.7830882352941178, "grad_norm": 0.050449859350919724, "learning_rate": 9.331235552492931e-05, "loss": 0.008, "step": 9213 }, { "epoch": 1.7832817337461302, "grad_norm": 0.09450121968984604, "learning_rate": 9.331091621909414e-05, "loss": 0.0087, "step": 9214 }, { "epoch": 1.7834752321981424, "grad_norm": 0.10514383763074875, "learning_rate": 9.330947677082841e-05, "loss": 0.0088, "step": 9215 }, { "epoch": 1.7836687306501546, "grad_norm": 0.10657744854688644, "learning_rate": 9.33080371801375e-05, "loss": 0.0078, "step": 9216 }, { "epoch": 1.783862229102167, "grad_norm": 0.10142548382282257, "learning_rate": 9.330659744702675e-05, "loss": 0.0071, "step": 9217 }, { "epoch": 1.7840557275541795, "grad_norm": 0.06199008598923683, "learning_rate": 9.33051575715015e-05, "loss": 0.0082, "step": 9218 }, { "epoch": 1.784249226006192, "grad_norm": 0.11532391607761383, "learning_rate": 9.330371755356711e-05, "loss": 0.0077, "step": 9219 }, { "epoch": 1.7844427244582044, "grad_norm": 0.030919227749109268, "learning_rate": 9.330227739322895e-05, "loss": 0.0072, "step": 9220 }, { "epoch": 1.7846362229102168, "grad_norm": 0.11210577934980392, "learning_rate": 9.330083709049235e-05, "loss": 0.0085, "step": 9221 }, { "epoch": 1.784829721362229, "grad_norm": 0.038136955350637436, "learning_rate": 9.329939664536268e-05, "loss": 0.0074, "step": 9222 }, { "epoch": 1.7850232198142415, "grad_norm": 0.07189347594976425, "learning_rate": 9.329795605784529e-05, "loss": 0.0073, "step": 9223 }, { "epoch": 1.7852167182662537, "grad_norm": 0.06682798266410828, "learning_rate": 9.329651532794553e-05, "loss": 0.0068, "step": 9224 }, { "epoch": 1.7854102167182662, "grad_norm": 0.07017219066619873, "learning_rate": 9.329507445566877e-05, "loss": 0.0078, "step": 9225 }, { "epoch": 1.7856037151702786, "grad_norm": 0.05507413670420647, "learning_rate": 9.329363344102034e-05, "loss": 0.0068, "step": 9226 }, { "epoch": 1.785797213622291, "grad_norm": 0.040509335696697235, "learning_rate": 9.329219228400564e-05, "loss": 0.01, "step": 9227 }, { "epoch": 1.7859907120743035, "grad_norm": 0.06660272926092148, "learning_rate": 9.329075098463002e-05, "loss": 0.0087, "step": 9228 }, { "epoch": 1.786184210526316, "grad_norm": 0.03632286190986633, "learning_rate": 9.328930954289878e-05, "loss": 0.0079, "step": 9229 }, { "epoch": 1.7863777089783281, "grad_norm": 0.060426659882068634, "learning_rate": 9.328786795881736e-05, "loss": 0.0079, "step": 9230 }, { "epoch": 1.7865712074303406, "grad_norm": 0.030067071318626404, "learning_rate": 9.328642623239104e-05, "loss": 0.0076, "step": 9231 }, { "epoch": 1.7867647058823528, "grad_norm": 0.0526820570230484, "learning_rate": 9.328498436362526e-05, "loss": 0.0077, "step": 9232 }, { "epoch": 1.7869582043343653, "grad_norm": 0.04980522394180298, "learning_rate": 9.328354235252532e-05, "loss": 0.0073, "step": 9233 }, { "epoch": 1.7871517027863777, "grad_norm": 0.060433726757764816, "learning_rate": 9.328210019909662e-05, "loss": 0.0071, "step": 9234 }, { "epoch": 1.7873452012383901, "grad_norm": 0.05180038511753082, "learning_rate": 9.328065790334449e-05, "loss": 0.0081, "step": 9235 }, { "epoch": 1.7875386996904026, "grad_norm": 0.04473099112510681, "learning_rate": 9.327921546527431e-05, "loss": 0.0064, "step": 9236 }, { "epoch": 1.787732198142415, "grad_norm": 0.05814904347062111, "learning_rate": 9.327777288489144e-05, "loss": 0.0083, "step": 9237 }, { "epoch": 1.7879256965944272, "grad_norm": 0.052500881254673004, "learning_rate": 9.327633016220124e-05, "loss": 0.0079, "step": 9238 }, { "epoch": 1.7881191950464397, "grad_norm": 0.07584837824106216, "learning_rate": 9.327488729720907e-05, "loss": 0.0069, "step": 9239 }, { "epoch": 1.788312693498452, "grad_norm": 0.0394962914288044, "learning_rate": 9.327344428992031e-05, "loss": 0.0074, "step": 9240 }, { "epoch": 1.7885061919504643, "grad_norm": 0.08506155014038086, "learning_rate": 9.327200114034032e-05, "loss": 0.0076, "step": 9241 }, { "epoch": 1.7886996904024768, "grad_norm": 0.040241166949272156, "learning_rate": 9.327055784847445e-05, "loss": 0.0077, "step": 9242 }, { "epoch": 1.7888931888544892, "grad_norm": 0.06682693213224411, "learning_rate": 9.326911441432809e-05, "loss": 0.0073, "step": 9243 }, { "epoch": 1.7890866873065017, "grad_norm": 0.04448238015174866, "learning_rate": 9.326767083790657e-05, "loss": 0.0086, "step": 9244 }, { "epoch": 1.7892801857585139, "grad_norm": 0.06944440305233002, "learning_rate": 9.326622711921529e-05, "loss": 0.009, "step": 9245 }, { "epoch": 1.7894736842105263, "grad_norm": 0.035376403480768204, "learning_rate": 9.326478325825961e-05, "loss": 0.0094, "step": 9246 }, { "epoch": 1.7896671826625385, "grad_norm": 0.058407459408044815, "learning_rate": 9.326333925504489e-05, "loss": 0.0091, "step": 9247 }, { "epoch": 1.789860681114551, "grad_norm": 0.046980682760477066, "learning_rate": 9.326189510957651e-05, "loss": 0.0092, "step": 9248 }, { "epoch": 1.7900541795665634, "grad_norm": 0.061889875680208206, "learning_rate": 9.326045082185982e-05, "loss": 0.0084, "step": 9249 }, { "epoch": 1.7902476780185759, "grad_norm": 0.0334005206823349, "learning_rate": 9.325900639190022e-05, "loss": 0.0072, "step": 9250 }, { "epoch": 1.7904411764705883, "grad_norm": 0.059620149433612823, "learning_rate": 9.325756181970304e-05, "loss": 0.0088, "step": 9251 }, { "epoch": 1.7906346749226008, "grad_norm": 0.03658619523048401, "learning_rate": 9.325611710527368e-05, "loss": 0.0085, "step": 9252 }, { "epoch": 1.790828173374613, "grad_norm": 0.08799053728580475, "learning_rate": 9.32546722486175e-05, "loss": 0.0087, "step": 9253 }, { "epoch": 1.7910216718266254, "grad_norm": 0.036394115537405014, "learning_rate": 9.32532272497399e-05, "loss": 0.0073, "step": 9254 }, { "epoch": 1.7912151702786376, "grad_norm": 0.08918153494596481, "learning_rate": 9.32517821086462e-05, "loss": 0.008, "step": 9255 }, { "epoch": 1.79140866873065, "grad_norm": 0.054097216576337814, "learning_rate": 9.32503368253418e-05, "loss": 0.0096, "step": 9256 }, { "epoch": 1.7916021671826625, "grad_norm": 0.08971463888883591, "learning_rate": 9.324889139983208e-05, "loss": 0.0089, "step": 9257 }, { "epoch": 1.791795665634675, "grad_norm": 0.0755169540643692, "learning_rate": 9.324744583212241e-05, "loss": 0.0085, "step": 9258 }, { "epoch": 1.7919891640866874, "grad_norm": 0.07475478947162628, "learning_rate": 9.324600012221816e-05, "loss": 0.0069, "step": 9259 }, { "epoch": 1.7921826625386998, "grad_norm": 0.12686072289943695, "learning_rate": 9.32445542701247e-05, "loss": 0.0075, "step": 9260 }, { "epoch": 1.792376160990712, "grad_norm": 0.06896855682134628, "learning_rate": 9.324310827584741e-05, "loss": 0.0084, "step": 9261 }, { "epoch": 1.7925696594427245, "grad_norm": 0.15914084017276764, "learning_rate": 9.324166213939167e-05, "loss": 0.0078, "step": 9262 }, { "epoch": 1.7927631578947367, "grad_norm": 0.06899300217628479, "learning_rate": 9.324021586076285e-05, "loss": 0.0085, "step": 9263 }, { "epoch": 1.7929566563467492, "grad_norm": 0.15357443690299988, "learning_rate": 9.323876943996634e-05, "loss": 0.0081, "step": 9264 }, { "epoch": 1.7931501547987616, "grad_norm": 0.04420125111937523, "learning_rate": 9.32373228770075e-05, "loss": 0.008, "step": 9265 }, { "epoch": 1.793343653250774, "grad_norm": 0.11729753017425537, "learning_rate": 9.323587617189171e-05, "loss": 0.0103, "step": 9266 }, { "epoch": 1.7935371517027865, "grad_norm": 0.1430460512638092, "learning_rate": 9.323442932462435e-05, "loss": 0.0068, "step": 9267 }, { "epoch": 1.7937306501547987, "grad_norm": 0.16284498572349548, "learning_rate": 9.323298233521083e-05, "loss": 0.0071, "step": 9268 }, { "epoch": 1.7939241486068112, "grad_norm": 0.14919008314609528, "learning_rate": 9.323153520365647e-05, "loss": 0.009, "step": 9269 }, { "epoch": 1.7941176470588234, "grad_norm": 0.08583283424377441, "learning_rate": 9.323008792996671e-05, "loss": 0.0077, "step": 9270 }, { "epoch": 1.7943111455108358, "grad_norm": 0.16279460489749908, "learning_rate": 9.322864051414688e-05, "loss": 0.0079, "step": 9271 }, { "epoch": 1.7945046439628483, "grad_norm": 0.08600988984107971, "learning_rate": 9.32271929562024e-05, "loss": 0.0077, "step": 9272 }, { "epoch": 1.7946981424148607, "grad_norm": 0.17906653881072998, "learning_rate": 9.322574525613862e-05, "loss": 0.0089, "step": 9273 }, { "epoch": 1.7948916408668731, "grad_norm": 0.16893650591373444, "learning_rate": 9.322429741396097e-05, "loss": 0.008, "step": 9274 }, { "epoch": 1.7950851393188856, "grad_norm": 0.10982337594032288, "learning_rate": 9.322284942967479e-05, "loss": 0.0091, "step": 9275 }, { "epoch": 1.7952786377708978, "grad_norm": 0.22956766188144684, "learning_rate": 9.322140130328545e-05, "loss": 0.0089, "step": 9276 }, { "epoch": 1.7954721362229102, "grad_norm": 0.055851273238658905, "learning_rate": 9.321995303479837e-05, "loss": 0.008, "step": 9277 }, { "epoch": 1.7956656346749225, "grad_norm": 0.204532191157341, "learning_rate": 9.321850462421893e-05, "loss": 0.0079, "step": 9278 }, { "epoch": 1.795859133126935, "grad_norm": 0.12456635385751724, "learning_rate": 9.321705607155248e-05, "loss": 0.0091, "step": 9279 }, { "epoch": 1.7960526315789473, "grad_norm": 0.1522350013256073, "learning_rate": 9.321560737680447e-05, "loss": 0.0088, "step": 9280 }, { "epoch": 1.7962461300309598, "grad_norm": 0.15834742784500122, "learning_rate": 9.321415853998024e-05, "loss": 0.0084, "step": 9281 }, { "epoch": 1.7964396284829722, "grad_norm": 0.049672745168209076, "learning_rate": 9.321270956108518e-05, "loss": 0.0081, "step": 9282 }, { "epoch": 1.7966331269349847, "grad_norm": 0.18830879032611847, "learning_rate": 9.321126044012468e-05, "loss": 0.0086, "step": 9283 }, { "epoch": 1.796826625386997, "grad_norm": 0.1357068419456482, "learning_rate": 9.320981117710412e-05, "loss": 0.0067, "step": 9284 }, { "epoch": 1.7970201238390093, "grad_norm": 0.1339716613292694, "learning_rate": 9.320836177202891e-05, "loss": 0.0091, "step": 9285 }, { "epoch": 1.7972136222910216, "grad_norm": 0.16985966265201569, "learning_rate": 9.320691222490442e-05, "loss": 0.0089, "step": 9286 }, { "epoch": 1.797407120743034, "grad_norm": 0.05727818235754967, "learning_rate": 9.320546253573604e-05, "loss": 0.0074, "step": 9287 }, { "epoch": 1.7976006191950464, "grad_norm": 0.1802343726158142, "learning_rate": 9.320401270452917e-05, "loss": 0.0084, "step": 9288 }, { "epoch": 1.7977941176470589, "grad_norm": 0.06553859263658524, "learning_rate": 9.32025627312892e-05, "loss": 0.0068, "step": 9289 }, { "epoch": 1.7979876160990713, "grad_norm": 0.11646780371665955, "learning_rate": 9.32011126160215e-05, "loss": 0.0073, "step": 9290 }, { "epoch": 1.7981811145510835, "grad_norm": 0.11129608750343323, "learning_rate": 9.31996623587315e-05, "loss": 0.0074, "step": 9291 }, { "epoch": 1.798374613003096, "grad_norm": 0.08997514098882675, "learning_rate": 9.319821195942455e-05, "loss": 0.0089, "step": 9292 }, { "epoch": 1.7985681114551082, "grad_norm": 0.1181131899356842, "learning_rate": 9.319676141810606e-05, "loss": 0.0092, "step": 9293 }, { "epoch": 1.7987616099071206, "grad_norm": 0.08357509970664978, "learning_rate": 9.319531073478144e-05, "loss": 0.0083, "step": 9294 }, { "epoch": 1.798955108359133, "grad_norm": 0.07895857095718384, "learning_rate": 9.319385990945603e-05, "loss": 0.008, "step": 9295 }, { "epoch": 1.7991486068111455, "grad_norm": 0.07434042543172836, "learning_rate": 9.319240894213531e-05, "loss": 0.0077, "step": 9296 }, { "epoch": 1.799342105263158, "grad_norm": 0.035910628736019135, "learning_rate": 9.319095783282459e-05, "loss": 0.007, "step": 9297 }, { "epoch": 1.7995356037151704, "grad_norm": 0.13525702059268951, "learning_rate": 9.318950658152931e-05, "loss": 0.008, "step": 9298 }, { "epoch": 1.7997291021671826, "grad_norm": 0.03834801912307739, "learning_rate": 9.318805518825485e-05, "loss": 0.0087, "step": 9299 }, { "epoch": 1.799922600619195, "grad_norm": 0.12557430565357208, "learning_rate": 9.318660365300661e-05, "loss": 0.0076, "step": 9300 }, { "epoch": 1.8001160990712073, "grad_norm": 0.0491446889936924, "learning_rate": 9.318515197578998e-05, "loss": 0.008, "step": 9301 }, { "epoch": 1.8003095975232197, "grad_norm": 0.10151313990354538, "learning_rate": 9.318370015661037e-05, "loss": 0.0088, "step": 9302 }, { "epoch": 1.8005030959752322, "grad_norm": 0.07345975190401077, "learning_rate": 9.318224819547318e-05, "loss": 0.0066, "step": 9303 }, { "epoch": 1.8006965944272446, "grad_norm": 0.0843176320195198, "learning_rate": 9.31807960923838e-05, "loss": 0.0086, "step": 9304 }, { "epoch": 1.800890092879257, "grad_norm": 0.0970454216003418, "learning_rate": 9.317934384734762e-05, "loss": 0.0081, "step": 9305 }, { "epoch": 1.8010835913312695, "grad_norm": 0.09013096243143082, "learning_rate": 9.317789146037005e-05, "loss": 0.0079, "step": 9306 }, { "epoch": 1.8012770897832817, "grad_norm": 0.06274912506341934, "learning_rate": 9.317643893145649e-05, "loss": 0.0068, "step": 9307 }, { "epoch": 1.8014705882352942, "grad_norm": 0.05790187418460846, "learning_rate": 9.317498626061234e-05, "loss": 0.006, "step": 9308 }, { "epoch": 1.8016640866873064, "grad_norm": 0.03934548795223236, "learning_rate": 9.317353344784298e-05, "loss": 0.007, "step": 9309 }, { "epoch": 1.8018575851393188, "grad_norm": 0.0673842802643776, "learning_rate": 9.317208049315383e-05, "loss": 0.011, "step": 9310 }, { "epoch": 1.8020510835913313, "grad_norm": 0.1261289268732071, "learning_rate": 9.317062739655031e-05, "loss": 0.0075, "step": 9311 }, { "epoch": 1.8022445820433437, "grad_norm": 0.04775868356227875, "learning_rate": 9.316917415803779e-05, "loss": 0.0079, "step": 9312 }, { "epoch": 1.8024380804953561, "grad_norm": 0.07253622263669968, "learning_rate": 9.31677207776217e-05, "loss": 0.0084, "step": 9313 }, { "epoch": 1.8026315789473686, "grad_norm": 0.06709126383066177, "learning_rate": 9.316626725530741e-05, "loss": 0.0083, "step": 9314 }, { "epoch": 1.8028250773993808, "grad_norm": 0.03838442265987396, "learning_rate": 9.316481359110035e-05, "loss": 0.0083, "step": 9315 }, { "epoch": 1.803018575851393, "grad_norm": 0.05551040917634964, "learning_rate": 9.316335978500592e-05, "loss": 0.008, "step": 9316 }, { "epoch": 1.8032120743034055, "grad_norm": 0.03263905271887779, "learning_rate": 9.316190583702952e-05, "loss": 0.0077, "step": 9317 }, { "epoch": 1.803405572755418, "grad_norm": 0.05838670954108238, "learning_rate": 9.316045174717655e-05, "loss": 0.0063, "step": 9318 }, { "epoch": 1.8035990712074303, "grad_norm": 0.04613248631358147, "learning_rate": 9.315899751545245e-05, "loss": 0.0081, "step": 9319 }, { "epoch": 1.8037925696594428, "grad_norm": 0.05695691704750061, "learning_rate": 9.315754314186257e-05, "loss": 0.0075, "step": 9320 }, { "epoch": 1.8039860681114552, "grad_norm": 0.06223934516310692, "learning_rate": 9.315608862641236e-05, "loss": 0.0066, "step": 9321 }, { "epoch": 1.8041795665634675, "grad_norm": 0.05696158483624458, "learning_rate": 9.31546339691072e-05, "loss": 0.009, "step": 9322 }, { "epoch": 1.80437306501548, "grad_norm": 0.0802229791879654, "learning_rate": 9.315317916995254e-05, "loss": 0.0075, "step": 9323 }, { "epoch": 1.8045665634674921, "grad_norm": 0.09416021406650543, "learning_rate": 9.315172422895373e-05, "loss": 0.0086, "step": 9324 }, { "epoch": 1.8047600619195046, "grad_norm": 0.13868021965026855, "learning_rate": 9.315026914611623e-05, "loss": 0.0082, "step": 9325 }, { "epoch": 1.804953560371517, "grad_norm": 0.12221307307481766, "learning_rate": 9.314881392144542e-05, "loss": 0.0078, "step": 9326 }, { "epoch": 1.8051470588235294, "grad_norm": 0.11995509266853333, "learning_rate": 9.314735855494673e-05, "loss": 0.0082, "step": 9327 }, { "epoch": 1.8053405572755419, "grad_norm": 0.15445691347122192, "learning_rate": 9.314590304662556e-05, "loss": 0.0085, "step": 9328 }, { "epoch": 1.8055340557275543, "grad_norm": 0.0747440904378891, "learning_rate": 9.314444739648732e-05, "loss": 0.0074, "step": 9329 }, { "epoch": 1.8057275541795665, "grad_norm": 0.16585132479667664, "learning_rate": 9.314299160453741e-05, "loss": 0.0067, "step": 9330 }, { "epoch": 1.805921052631579, "grad_norm": 0.06270959973335266, "learning_rate": 9.314153567078126e-05, "loss": 0.0072, "step": 9331 }, { "epoch": 1.8061145510835912, "grad_norm": 0.14587418735027313, "learning_rate": 9.31400795952243e-05, "loss": 0.0073, "step": 9332 }, { "epoch": 1.8063080495356036, "grad_norm": 0.12017685920000076, "learning_rate": 9.31386233778719e-05, "loss": 0.0073, "step": 9333 }, { "epoch": 1.806501547987616, "grad_norm": 0.10884631425142288, "learning_rate": 9.31371670187295e-05, "loss": 0.0089, "step": 9334 }, { "epoch": 1.8066950464396285, "grad_norm": 0.15490934252738953, "learning_rate": 9.313571051780252e-05, "loss": 0.0079, "step": 9335 }, { "epoch": 1.806888544891641, "grad_norm": 0.06737970560789108, "learning_rate": 9.313425387509635e-05, "loss": 0.0089, "step": 9336 }, { "epoch": 1.8070820433436534, "grad_norm": 0.12726262211799622, "learning_rate": 9.313279709061643e-05, "loss": 0.0087, "step": 9337 }, { "epoch": 1.8072755417956656, "grad_norm": 0.0985795184969902, "learning_rate": 9.313134016436818e-05, "loss": 0.0093, "step": 9338 }, { "epoch": 1.807469040247678, "grad_norm": 0.06589026749134064, "learning_rate": 9.312988309635697e-05, "loss": 0.0065, "step": 9339 }, { "epoch": 1.8076625386996903, "grad_norm": 0.13986803591251373, "learning_rate": 9.312842588658829e-05, "loss": 0.0066, "step": 9340 }, { "epoch": 1.8078560371517027, "grad_norm": 0.056056756526231766, "learning_rate": 9.312696853506748e-05, "loss": 0.0074, "step": 9341 }, { "epoch": 1.8080495356037152, "grad_norm": 0.11506256461143494, "learning_rate": 9.312551104180002e-05, "loss": 0.007, "step": 9342 }, { "epoch": 1.8082430340557276, "grad_norm": 0.0880085751414299, "learning_rate": 9.312405340679129e-05, "loss": 0.0083, "step": 9343 }, { "epoch": 1.80843653250774, "grad_norm": 0.15320329368114471, "learning_rate": 9.312259563004674e-05, "loss": 0.0081, "step": 9344 }, { "epoch": 1.8086300309597523, "grad_norm": 0.0949559360742569, "learning_rate": 9.312113771157177e-05, "loss": 0.0083, "step": 9345 }, { "epoch": 1.8088235294117647, "grad_norm": 0.08802110701799393, "learning_rate": 9.311967965137179e-05, "loss": 0.0078, "step": 9346 }, { "epoch": 1.809017027863777, "grad_norm": 0.13039404153823853, "learning_rate": 9.311822144945224e-05, "loss": 0.0083, "step": 9347 }, { "epoch": 1.8092105263157894, "grad_norm": 0.06298580765724182, "learning_rate": 9.311676310581853e-05, "loss": 0.0065, "step": 9348 }, { "epoch": 1.8094040247678018, "grad_norm": 0.1618567705154419, "learning_rate": 9.311530462047609e-05, "loss": 0.0073, "step": 9349 }, { "epoch": 1.8095975232198143, "grad_norm": 0.06445198506116867, "learning_rate": 9.311384599343034e-05, "loss": 0.0082, "step": 9350 }, { "epoch": 1.8097910216718267, "grad_norm": 0.11944282799959183, "learning_rate": 9.311238722468671e-05, "loss": 0.0083, "step": 9351 }, { "epoch": 1.8099845201238391, "grad_norm": 0.07475032657384872, "learning_rate": 9.31109283142506e-05, "loss": 0.009, "step": 9352 }, { "epoch": 1.8101780185758514, "grad_norm": 0.06642451882362366, "learning_rate": 9.310946926212747e-05, "loss": 0.0093, "step": 9353 }, { "epoch": 1.8103715170278638, "grad_norm": 0.0718289241194725, "learning_rate": 9.310801006832271e-05, "loss": 0.009, "step": 9354 }, { "epoch": 1.810565015479876, "grad_norm": 0.09397897869348526, "learning_rate": 9.310655073284176e-05, "loss": 0.0068, "step": 9355 }, { "epoch": 1.8107585139318885, "grad_norm": 0.06237172707915306, "learning_rate": 9.310509125569004e-05, "loss": 0.009, "step": 9356 }, { "epoch": 1.810952012383901, "grad_norm": 0.07664546370506287, "learning_rate": 9.310363163687299e-05, "loss": 0.0071, "step": 9357 }, { "epoch": 1.8111455108359134, "grad_norm": 0.04797350615262985, "learning_rate": 9.310217187639603e-05, "loss": 0.0071, "step": 9358 }, { "epoch": 1.8113390092879258, "grad_norm": 0.09029897302389145, "learning_rate": 9.310071197426456e-05, "loss": 0.0098, "step": 9359 }, { "epoch": 1.8115325077399382, "grad_norm": 0.04210388660430908, "learning_rate": 9.309925193048405e-05, "loss": 0.008, "step": 9360 }, { "epoch": 1.8117260061919505, "grad_norm": 0.08104292303323746, "learning_rate": 9.309779174505991e-05, "loss": 0.0097, "step": 9361 }, { "epoch": 1.811919504643963, "grad_norm": 0.05005611106753349, "learning_rate": 9.309633141799756e-05, "loss": 0.0086, "step": 9362 }, { "epoch": 1.8121130030959751, "grad_norm": 0.09615173935890198, "learning_rate": 9.309487094930245e-05, "loss": 0.0081, "step": 9363 }, { "epoch": 1.8123065015479876, "grad_norm": 0.04799512028694153, "learning_rate": 9.309341033897998e-05, "loss": 0.0074, "step": 9364 }, { "epoch": 1.8125, "grad_norm": 0.10457022488117218, "learning_rate": 9.309194958703561e-05, "loss": 0.0091, "step": 9365 }, { "epoch": 1.8126934984520124, "grad_norm": 0.0812993124127388, "learning_rate": 9.309048869347474e-05, "loss": 0.009, "step": 9366 }, { "epoch": 1.8128869969040249, "grad_norm": 0.07775601744651794, "learning_rate": 9.308902765830284e-05, "loss": 0.0076, "step": 9367 }, { "epoch": 1.813080495356037, "grad_norm": 0.14758050441741943, "learning_rate": 9.308756648152531e-05, "loss": 0.0096, "step": 9368 }, { "epoch": 1.8132739938080495, "grad_norm": 0.16568219661712646, "learning_rate": 9.308610516314758e-05, "loss": 0.009, "step": 9369 }, { "epoch": 1.8134674922600618, "grad_norm": 0.14914648234844208, "learning_rate": 9.308464370317512e-05, "loss": 0.0073, "step": 9370 }, { "epoch": 1.8136609907120742, "grad_norm": 0.11983390897512436, "learning_rate": 9.308318210161332e-05, "loss": 0.0085, "step": 9371 }, { "epoch": 1.8138544891640866, "grad_norm": 0.1559048742055893, "learning_rate": 9.308172035846764e-05, "loss": 0.0066, "step": 9372 }, { "epoch": 1.814047987616099, "grad_norm": 0.09585367888212204, "learning_rate": 9.308025847374352e-05, "loss": 0.0089, "step": 9373 }, { "epoch": 1.8142414860681115, "grad_norm": 0.13167044520378113, "learning_rate": 9.307879644744636e-05, "loss": 0.0073, "step": 9374 }, { "epoch": 1.814434984520124, "grad_norm": 0.10738489776849747, "learning_rate": 9.307733427958163e-05, "loss": 0.0076, "step": 9375 }, { "epoch": 1.8146284829721362, "grad_norm": 0.09570683538913727, "learning_rate": 9.307587197015476e-05, "loss": 0.0076, "step": 9376 }, { "epoch": 1.8148219814241486, "grad_norm": 0.09208398312330246, "learning_rate": 9.307440951917118e-05, "loss": 0.0063, "step": 9377 }, { "epoch": 1.8150154798761609, "grad_norm": 0.0480203852057457, "learning_rate": 9.307294692663631e-05, "loss": 0.0079, "step": 9378 }, { "epoch": 1.8152089783281733, "grad_norm": 0.08357929438352585, "learning_rate": 9.30714841925556e-05, "loss": 0.0085, "step": 9379 }, { "epoch": 1.8154024767801857, "grad_norm": 0.027272861450910568, "learning_rate": 9.307002131693452e-05, "loss": 0.0064, "step": 9380 }, { "epoch": 1.8155959752321982, "grad_norm": 0.07625113427639008, "learning_rate": 9.306855829977846e-05, "loss": 0.0082, "step": 9381 }, { "epoch": 1.8157894736842106, "grad_norm": 0.049831319600343704, "learning_rate": 9.30670951410929e-05, "loss": 0.0075, "step": 9382 }, { "epoch": 1.815982972136223, "grad_norm": 0.066335029900074, "learning_rate": 9.306563184088323e-05, "loss": 0.0079, "step": 9383 }, { "epoch": 1.8161764705882353, "grad_norm": 0.044010426849126816, "learning_rate": 9.306416839915495e-05, "loss": 0.0073, "step": 9384 }, { "epoch": 1.8163699690402477, "grad_norm": 0.0864773765206337, "learning_rate": 9.306270481591346e-05, "loss": 0.0073, "step": 9385 }, { "epoch": 1.81656346749226, "grad_norm": 0.058124840259552, "learning_rate": 9.306124109116421e-05, "loss": 0.0058, "step": 9386 }, { "epoch": 1.8167569659442724, "grad_norm": 0.10372573882341385, "learning_rate": 9.305977722491265e-05, "loss": 0.0082, "step": 9387 }, { "epoch": 1.8169504643962848, "grad_norm": 0.09219197928905487, "learning_rate": 9.305831321716422e-05, "loss": 0.0076, "step": 9388 }, { "epoch": 1.8171439628482973, "grad_norm": 0.08810316026210785, "learning_rate": 9.305684906792435e-05, "loss": 0.0079, "step": 9389 }, { "epoch": 1.8173374613003097, "grad_norm": 0.11590638011693954, "learning_rate": 9.305538477719849e-05, "loss": 0.009, "step": 9390 }, { "epoch": 1.817530959752322, "grad_norm": 0.12106568366289139, "learning_rate": 9.305392034499209e-05, "loss": 0.0082, "step": 9391 }, { "epoch": 1.8177244582043344, "grad_norm": 0.13826416432857513, "learning_rate": 9.305245577131059e-05, "loss": 0.0089, "step": 9392 }, { "epoch": 1.8179179566563466, "grad_norm": 0.2165689319372177, "learning_rate": 9.305099105615945e-05, "loss": 0.0079, "step": 9393 }, { "epoch": 1.818111455108359, "grad_norm": 0.04824960231781006, "learning_rate": 9.304952619954408e-05, "loss": 0.0071, "step": 9394 }, { "epoch": 1.8183049535603715, "grad_norm": 0.22836971282958984, "learning_rate": 9.304806120146997e-05, "loss": 0.0086, "step": 9395 }, { "epoch": 1.818498452012384, "grad_norm": 0.14756400883197784, "learning_rate": 9.304659606194251e-05, "loss": 0.0072, "step": 9396 }, { "epoch": 1.8186919504643964, "grad_norm": 0.11435551196336746, "learning_rate": 9.30451307809672e-05, "loss": 0.0074, "step": 9397 }, { "epoch": 1.8188854489164088, "grad_norm": 0.1910497546195984, "learning_rate": 9.304366535854948e-05, "loss": 0.0077, "step": 9398 }, { "epoch": 1.819078947368421, "grad_norm": 0.07857231795787811, "learning_rate": 9.304219979469477e-05, "loss": 0.0081, "step": 9399 }, { "epoch": 1.8192724458204335, "grad_norm": 0.1383514255285263, "learning_rate": 9.304073408940855e-05, "loss": 0.0094, "step": 9400 }, { "epoch": 1.8194659442724457, "grad_norm": 0.10579533129930496, "learning_rate": 9.303926824269625e-05, "loss": 0.01, "step": 9401 }, { "epoch": 1.8196594427244581, "grad_norm": 0.04554249346256256, "learning_rate": 9.303780225456332e-05, "loss": 0.0068, "step": 9402 }, { "epoch": 1.8198529411764706, "grad_norm": 0.12094343453645706, "learning_rate": 9.303633612501521e-05, "loss": 0.0079, "step": 9403 }, { "epoch": 1.820046439628483, "grad_norm": 0.05365149304270744, "learning_rate": 9.303486985405736e-05, "loss": 0.0068, "step": 9404 }, { "epoch": 1.8202399380804954, "grad_norm": 0.07363354414701462, "learning_rate": 9.303340344169526e-05, "loss": 0.0076, "step": 9405 }, { "epoch": 1.8204334365325079, "grad_norm": 0.05753978714346886, "learning_rate": 9.303193688793432e-05, "loss": 0.0071, "step": 9406 }, { "epoch": 1.82062693498452, "grad_norm": 0.07223743945360184, "learning_rate": 9.303047019278004e-05, "loss": 0.0088, "step": 9407 }, { "epoch": 1.8208204334365325, "grad_norm": 0.11531156301498413, "learning_rate": 9.302900335623782e-05, "loss": 0.0071, "step": 9408 }, { "epoch": 1.8210139318885448, "grad_norm": 0.07805797457695007, "learning_rate": 9.302753637831313e-05, "loss": 0.0094, "step": 9409 }, { "epoch": 1.8212074303405572, "grad_norm": 0.08274254947900772, "learning_rate": 9.302606925901145e-05, "loss": 0.0087, "step": 9410 }, { "epoch": 1.8214009287925697, "grad_norm": 0.060520511120557785, "learning_rate": 9.30246019983382e-05, "loss": 0.007, "step": 9411 }, { "epoch": 1.821594427244582, "grad_norm": 0.08515723049640656, "learning_rate": 9.302313459629885e-05, "loss": 0.0087, "step": 9412 }, { "epoch": 1.8217879256965945, "grad_norm": 0.043619874864816666, "learning_rate": 9.302166705289887e-05, "loss": 0.0086, "step": 9413 }, { "epoch": 1.821981424148607, "grad_norm": 0.1149928942322731, "learning_rate": 9.302019936814369e-05, "loss": 0.0071, "step": 9414 }, { "epoch": 1.8221749226006192, "grad_norm": 0.06521338224411011, "learning_rate": 9.301873154203877e-05, "loss": 0.0073, "step": 9415 }, { "epoch": 1.8223684210526314, "grad_norm": 0.08011464029550552, "learning_rate": 9.301726357458957e-05, "loss": 0.0081, "step": 9416 }, { "epoch": 1.8225619195046439, "grad_norm": 0.06367512792348862, "learning_rate": 9.301579546580158e-05, "loss": 0.0078, "step": 9417 }, { "epoch": 1.8227554179566563, "grad_norm": 0.04847195744514465, "learning_rate": 9.30143272156802e-05, "loss": 0.0096, "step": 9418 }, { "epoch": 1.8229489164086687, "grad_norm": 0.05284985899925232, "learning_rate": 9.301285882423093e-05, "loss": 0.0077, "step": 9419 }, { "epoch": 1.8231424148606812, "grad_norm": 0.03295431658625603, "learning_rate": 9.301139029145921e-05, "loss": 0.007, "step": 9420 }, { "epoch": 1.8233359133126936, "grad_norm": 0.053167976438999176, "learning_rate": 9.300992161737052e-05, "loss": 0.0088, "step": 9421 }, { "epoch": 1.8235294117647058, "grad_norm": 0.04284973442554474, "learning_rate": 9.30084528019703e-05, "loss": 0.0084, "step": 9422 }, { "epoch": 1.8237229102167183, "grad_norm": 0.07160700112581253, "learning_rate": 9.300698384526403e-05, "loss": 0.0084, "step": 9423 }, { "epoch": 1.8239164086687305, "grad_norm": 0.05626462772488594, "learning_rate": 9.300551474725714e-05, "loss": 0.007, "step": 9424 }, { "epoch": 1.824109907120743, "grad_norm": 0.049777403473854065, "learning_rate": 9.300404550795512e-05, "loss": 0.0094, "step": 9425 }, { "epoch": 1.8243034055727554, "grad_norm": 0.06601220369338989, "learning_rate": 9.300257612736342e-05, "loss": 0.0081, "step": 9426 }, { "epoch": 1.8244969040247678, "grad_norm": 0.04157809540629387, "learning_rate": 9.300110660548751e-05, "loss": 0.0081, "step": 9427 }, { "epoch": 1.8246904024767803, "grad_norm": 0.052745841443538666, "learning_rate": 9.299963694233284e-05, "loss": 0.0078, "step": 9428 }, { "epoch": 1.8248839009287927, "grad_norm": 0.03709680959582329, "learning_rate": 9.29981671379049e-05, "loss": 0.008, "step": 9429 }, { "epoch": 1.825077399380805, "grad_norm": 0.056749992072582245, "learning_rate": 9.299669719220913e-05, "loss": 0.006, "step": 9430 }, { "epoch": 1.8252708978328174, "grad_norm": 0.05610894039273262, "learning_rate": 9.2995227105251e-05, "loss": 0.0078, "step": 9431 }, { "epoch": 1.8254643962848296, "grad_norm": 0.07302945107221603, "learning_rate": 9.299375687703598e-05, "loss": 0.0075, "step": 9432 }, { "epoch": 1.825657894736842, "grad_norm": 0.04535406455397606, "learning_rate": 9.299228650756953e-05, "loss": 0.0072, "step": 9433 }, { "epoch": 1.8258513931888545, "grad_norm": 0.0541456937789917, "learning_rate": 9.299081599685712e-05, "loss": 0.0095, "step": 9434 }, { "epoch": 1.826044891640867, "grad_norm": 0.05173558369278908, "learning_rate": 9.298934534490423e-05, "loss": 0.0077, "step": 9435 }, { "epoch": 1.8262383900928794, "grad_norm": 0.05229293182492256, "learning_rate": 9.298787455171632e-05, "loss": 0.0075, "step": 9436 }, { "epoch": 1.8264318885448918, "grad_norm": 0.039308980107307434, "learning_rate": 9.298640361729882e-05, "loss": 0.0071, "step": 9437 }, { "epoch": 1.826625386996904, "grad_norm": 0.04202880337834358, "learning_rate": 9.298493254165726e-05, "loss": 0.0094, "step": 9438 }, { "epoch": 1.8268188854489165, "grad_norm": 0.028195327147841454, "learning_rate": 9.298346132479706e-05, "loss": 0.0073, "step": 9439 }, { "epoch": 1.8270123839009287, "grad_norm": 0.05405030399560928, "learning_rate": 9.298198996672373e-05, "loss": 0.0081, "step": 9440 }, { "epoch": 1.8272058823529411, "grad_norm": 0.0701511949300766, "learning_rate": 9.29805184674427e-05, "loss": 0.0063, "step": 9441 }, { "epoch": 1.8273993808049536, "grad_norm": 0.06281793862581253, "learning_rate": 9.297904682695948e-05, "loss": 0.0081, "step": 9442 }, { "epoch": 1.827592879256966, "grad_norm": 0.06520416587591171, "learning_rate": 9.29775750452795e-05, "loss": 0.0071, "step": 9443 }, { "epoch": 1.8277863777089784, "grad_norm": 0.07236131280660629, "learning_rate": 9.297610312240829e-05, "loss": 0.0058, "step": 9444 }, { "epoch": 1.8279798761609907, "grad_norm": 0.04440772533416748, "learning_rate": 9.297463105835125e-05, "loss": 0.0089, "step": 9445 }, { "epoch": 1.828173374613003, "grad_norm": 0.08712335675954819, "learning_rate": 9.297315885311391e-05, "loss": 0.0086, "step": 9446 }, { "epoch": 1.8283668730650153, "grad_norm": 0.035393547266721725, "learning_rate": 9.297168650670171e-05, "loss": 0.0073, "step": 9447 }, { "epoch": 1.8285603715170278, "grad_norm": 0.06068822741508484, "learning_rate": 9.297021401912013e-05, "loss": 0.0082, "step": 9448 }, { "epoch": 1.8287538699690402, "grad_norm": 0.06711184233427048, "learning_rate": 9.296874139037466e-05, "loss": 0.0075, "step": 9449 }, { "epoch": 1.8289473684210527, "grad_norm": 0.035421766340732574, "learning_rate": 9.296726862047078e-05, "loss": 0.0093, "step": 9450 }, { "epoch": 1.829140866873065, "grad_norm": 0.07602183520793915, "learning_rate": 9.296579570941394e-05, "loss": 0.0073, "step": 9451 }, { "epoch": 1.8293343653250775, "grad_norm": 0.04094989597797394, "learning_rate": 9.296432265720963e-05, "loss": 0.0074, "step": 9452 }, { "epoch": 1.8295278637770898, "grad_norm": 0.04352473467588425, "learning_rate": 9.296284946386331e-05, "loss": 0.0105, "step": 9453 }, { "epoch": 1.8297213622291022, "grad_norm": 0.06335924565792084, "learning_rate": 9.296137612938047e-05, "loss": 0.0069, "step": 9454 }, { "epoch": 1.8299148606811144, "grad_norm": 0.052284132689237595, "learning_rate": 9.29599026537666e-05, "loss": 0.0086, "step": 9455 }, { "epoch": 1.8301083591331269, "grad_norm": 0.07525429129600525, "learning_rate": 9.295842903702716e-05, "loss": 0.009, "step": 9456 }, { "epoch": 1.8303018575851393, "grad_norm": 0.05473775044083595, "learning_rate": 9.295695527916764e-05, "loss": 0.0052, "step": 9457 }, { "epoch": 1.8304953560371517, "grad_norm": 0.07543186098337173, "learning_rate": 9.29554813801935e-05, "loss": 0.0072, "step": 9458 }, { "epoch": 1.8306888544891642, "grad_norm": 0.05218321084976196, "learning_rate": 9.295400734011025e-05, "loss": 0.0078, "step": 9459 }, { "epoch": 1.8308823529411766, "grad_norm": 0.028277544304728508, "learning_rate": 9.295253315892334e-05, "loss": 0.0087, "step": 9460 }, { "epoch": 1.8310758513931888, "grad_norm": 0.04198329895734787, "learning_rate": 9.295105883663826e-05, "loss": 0.0084, "step": 9461 }, { "epoch": 1.8312693498452013, "grad_norm": 0.045204468071460724, "learning_rate": 9.29495843732605e-05, "loss": 0.0095, "step": 9462 }, { "epoch": 1.8314628482972135, "grad_norm": 0.02925080992281437, "learning_rate": 9.294810976879554e-05, "loss": 0.0075, "step": 9463 }, { "epoch": 1.831656346749226, "grad_norm": 0.048724740743637085, "learning_rate": 9.294663502324886e-05, "loss": 0.006, "step": 9464 }, { "epoch": 1.8318498452012384, "grad_norm": 0.0342915803194046, "learning_rate": 9.294516013662594e-05, "loss": 0.0073, "step": 9465 }, { "epoch": 1.8320433436532508, "grad_norm": 0.034213028848171234, "learning_rate": 9.294368510893225e-05, "loss": 0.0069, "step": 9466 }, { "epoch": 1.8322368421052633, "grad_norm": 0.061069510877132416, "learning_rate": 9.29422099401733e-05, "loss": 0.0086, "step": 9467 }, { "epoch": 1.8324303405572755, "grad_norm": 0.03836718201637268, "learning_rate": 9.294073463035457e-05, "loss": 0.0087, "step": 9468 }, { "epoch": 1.832623839009288, "grad_norm": 0.05064532533288002, "learning_rate": 9.293925917948153e-05, "loss": 0.0083, "step": 9469 }, { "epoch": 1.8328173374613002, "grad_norm": 0.062123123556375504, "learning_rate": 9.293778358755967e-05, "loss": 0.0078, "step": 9470 }, { "epoch": 1.8330108359133126, "grad_norm": 0.04118392616510391, "learning_rate": 9.293630785459447e-05, "loss": 0.0081, "step": 9471 }, { "epoch": 1.833204334365325, "grad_norm": 0.05023519694805145, "learning_rate": 9.293483198059144e-05, "loss": 0.0098, "step": 9472 }, { "epoch": 1.8333978328173375, "grad_norm": 0.028217284008860588, "learning_rate": 9.293335596555605e-05, "loss": 0.0078, "step": 9473 }, { "epoch": 1.83359133126935, "grad_norm": 0.06306952983140945, "learning_rate": 9.293187980949378e-05, "loss": 0.0073, "step": 9474 }, { "epoch": 1.8337848297213624, "grad_norm": 0.05077425390481949, "learning_rate": 9.293040351241014e-05, "loss": 0.0103, "step": 9475 }, { "epoch": 1.8339783281733746, "grad_norm": 0.04862285032868385, "learning_rate": 9.29289270743106e-05, "loss": 0.007, "step": 9476 }, { "epoch": 1.834171826625387, "grad_norm": 0.06158015504479408, "learning_rate": 9.292745049520065e-05, "loss": 0.01, "step": 9477 }, { "epoch": 1.8343653250773992, "grad_norm": 0.06011722609400749, "learning_rate": 9.29259737750858e-05, "loss": 0.0083, "step": 9478 }, { "epoch": 1.8345588235294117, "grad_norm": 0.0586986243724823, "learning_rate": 9.292449691397153e-05, "loss": 0.0073, "step": 9479 }, { "epoch": 1.8347523219814241, "grad_norm": 0.07854758948087692, "learning_rate": 9.29230199118633e-05, "loss": 0.0074, "step": 9480 }, { "epoch": 1.8349458204334366, "grad_norm": 0.04656532034277916, "learning_rate": 9.292154276876663e-05, "loss": 0.0063, "step": 9481 }, { "epoch": 1.835139318885449, "grad_norm": 0.08268757909536362, "learning_rate": 9.292006548468701e-05, "loss": 0.0069, "step": 9482 }, { "epoch": 1.8353328173374615, "grad_norm": 0.04633510485291481, "learning_rate": 9.291858805962994e-05, "loss": 0.0077, "step": 9483 }, { "epoch": 1.8355263157894737, "grad_norm": 0.06950163096189499, "learning_rate": 9.29171104936009e-05, "loss": 0.0078, "step": 9484 }, { "epoch": 1.8357198142414861, "grad_norm": 0.04840051010251045, "learning_rate": 9.291563278660538e-05, "loss": 0.0065, "step": 9485 }, { "epoch": 1.8359133126934983, "grad_norm": 0.053486645221710205, "learning_rate": 9.29141549386489e-05, "loss": 0.0084, "step": 9486 }, { "epoch": 1.8361068111455108, "grad_norm": 0.08204132318496704, "learning_rate": 9.291267694973692e-05, "loss": 0.0092, "step": 9487 }, { "epoch": 1.8363003095975232, "grad_norm": 0.059608858078718185, "learning_rate": 9.291119881987494e-05, "loss": 0.0092, "step": 9488 }, { "epoch": 1.8364938080495357, "grad_norm": 0.08966552466154099, "learning_rate": 9.290972054906846e-05, "loss": 0.0075, "step": 9489 }, { "epoch": 1.836687306501548, "grad_norm": 0.06302008777856827, "learning_rate": 9.290824213732301e-05, "loss": 0.0068, "step": 9490 }, { "epoch": 1.8368808049535603, "grad_norm": 0.05791248753666878, "learning_rate": 9.290676358464404e-05, "loss": 0.0082, "step": 9491 }, { "epoch": 1.8370743034055728, "grad_norm": 0.06211832910776138, "learning_rate": 9.290528489103706e-05, "loss": 0.0096, "step": 9492 }, { "epoch": 1.837267801857585, "grad_norm": 0.06959707289934158, "learning_rate": 9.290380605650757e-05, "loss": 0.0072, "step": 9493 }, { "epoch": 1.8374613003095974, "grad_norm": 0.08332356810569763, "learning_rate": 9.290232708106107e-05, "loss": 0.0075, "step": 9494 }, { "epoch": 1.8376547987616099, "grad_norm": 0.06522876769304276, "learning_rate": 9.290084796470306e-05, "loss": 0.0067, "step": 9495 }, { "epoch": 1.8378482972136223, "grad_norm": 0.05598009005188942, "learning_rate": 9.289936870743904e-05, "loss": 0.0089, "step": 9496 }, { "epoch": 1.8380417956656347, "grad_norm": 0.09769447892904282, "learning_rate": 9.28978893092745e-05, "loss": 0.0068, "step": 9497 }, { "epoch": 1.8382352941176472, "grad_norm": 0.033770520240068436, "learning_rate": 9.289640977021494e-05, "loss": 0.0062, "step": 9498 }, { "epoch": 1.8384287925696594, "grad_norm": 0.10037574917078018, "learning_rate": 9.289493009026588e-05, "loss": 0.0083, "step": 9499 }, { "epoch": 1.8386222910216719, "grad_norm": 0.049992553889751434, "learning_rate": 9.28934502694328e-05, "loss": 0.0088, "step": 9500 }, { "epoch": 1.838815789473684, "grad_norm": 0.11666043102741241, "learning_rate": 9.28919703077212e-05, "loss": 0.0084, "step": 9501 }, { "epoch": 1.8390092879256965, "grad_norm": 0.07082908600568771, "learning_rate": 9.289049020513661e-05, "loss": 0.007, "step": 9502 }, { "epoch": 1.839202786377709, "grad_norm": 0.1460699737071991, "learning_rate": 9.28890099616845e-05, "loss": 0.007, "step": 9503 }, { "epoch": 1.8393962848297214, "grad_norm": 0.08727866411209106, "learning_rate": 9.288752957737041e-05, "loss": 0.0088, "step": 9504 }, { "epoch": 1.8395897832817338, "grad_norm": 0.10598251223564148, "learning_rate": 9.28860490521998e-05, "loss": 0.0073, "step": 9505 }, { "epoch": 1.8397832817337463, "grad_norm": 0.09631914645433426, "learning_rate": 9.28845683861782e-05, "loss": 0.009, "step": 9506 }, { "epoch": 1.8399767801857585, "grad_norm": 0.10069262981414795, "learning_rate": 9.28830875793111e-05, "loss": 0.008, "step": 9507 }, { "epoch": 1.840170278637771, "grad_norm": 0.0750235766172409, "learning_rate": 9.288160663160402e-05, "loss": 0.0076, "step": 9508 }, { "epoch": 1.8403637770897832, "grad_norm": 0.07904481887817383, "learning_rate": 9.288012554306247e-05, "loss": 0.0088, "step": 9509 }, { "epoch": 1.8405572755417956, "grad_norm": 0.10216280072927475, "learning_rate": 9.287864431369195e-05, "loss": 0.0074, "step": 9510 }, { "epoch": 1.840750773993808, "grad_norm": 0.08710086345672607, "learning_rate": 9.287716294349794e-05, "loss": 0.0072, "step": 9511 }, { "epoch": 1.8409442724458205, "grad_norm": 0.11823850870132446, "learning_rate": 9.287568143248598e-05, "loss": 0.0067, "step": 9512 }, { "epoch": 1.841137770897833, "grad_norm": 0.075333371758461, "learning_rate": 9.287419978066158e-05, "loss": 0.0066, "step": 9513 }, { "epoch": 1.8413312693498454, "grad_norm": 0.10366761684417725, "learning_rate": 9.287271798803025e-05, "loss": 0.0066, "step": 9514 }, { "epoch": 1.8415247678018576, "grad_norm": 0.13600610196590424, "learning_rate": 9.287123605459746e-05, "loss": 0.0102, "step": 9515 }, { "epoch": 1.8417182662538698, "grad_norm": 0.10115253180265427, "learning_rate": 9.286975398036875e-05, "loss": 0.0079, "step": 9516 }, { "epoch": 1.8419117647058822, "grad_norm": 0.10472285747528076, "learning_rate": 9.286827176534964e-05, "loss": 0.0076, "step": 9517 }, { "epoch": 1.8421052631578947, "grad_norm": 0.08970260620117188, "learning_rate": 9.286678940954561e-05, "loss": 0.0091, "step": 9518 }, { "epoch": 1.8422987616099071, "grad_norm": 0.11103451997041702, "learning_rate": 9.286530691296221e-05, "loss": 0.0088, "step": 9519 }, { "epoch": 1.8424922600619196, "grad_norm": 0.13691756129264832, "learning_rate": 9.28638242756049e-05, "loss": 0.0078, "step": 9520 }, { "epoch": 1.842685758513932, "grad_norm": 0.14703963696956635, "learning_rate": 9.286234149747924e-05, "loss": 0.0082, "step": 9521 }, { "epoch": 1.8428792569659442, "grad_norm": 0.14173105359077454, "learning_rate": 9.286085857859071e-05, "loss": 0.0076, "step": 9522 }, { "epoch": 1.8430727554179567, "grad_norm": 0.10162577778100967, "learning_rate": 9.285937551894486e-05, "loss": 0.0085, "step": 9523 }, { "epoch": 1.843266253869969, "grad_norm": 0.15988720953464508, "learning_rate": 9.285789231854717e-05, "loss": 0.0079, "step": 9524 }, { "epoch": 1.8434597523219813, "grad_norm": 0.050540097057819366, "learning_rate": 9.285640897740315e-05, "loss": 0.0083, "step": 9525 }, { "epoch": 1.8436532507739938, "grad_norm": 0.15134993195533752, "learning_rate": 9.285492549551835e-05, "loss": 0.0079, "step": 9526 }, { "epoch": 1.8438467492260062, "grad_norm": 0.05081719160079956, "learning_rate": 9.285344187289826e-05, "loss": 0.0071, "step": 9527 }, { "epoch": 1.8440402476780187, "grad_norm": 0.13167104125022888, "learning_rate": 9.285195810954839e-05, "loss": 0.0072, "step": 9528 }, { "epoch": 1.844233746130031, "grad_norm": 0.08632251620292664, "learning_rate": 9.285047420547427e-05, "loss": 0.0078, "step": 9529 }, { "epoch": 1.8444272445820433, "grad_norm": 0.09089554101228714, "learning_rate": 9.284899016068141e-05, "loss": 0.0079, "step": 9530 }, { "epoch": 1.8446207430340558, "grad_norm": 0.11824765801429749, "learning_rate": 9.284750597517535e-05, "loss": 0.0076, "step": 9531 }, { "epoch": 1.844814241486068, "grad_norm": 0.07640399783849716, "learning_rate": 9.284602164896158e-05, "loss": 0.0077, "step": 9532 }, { "epoch": 1.8450077399380804, "grad_norm": 0.07502732425928116, "learning_rate": 9.284453718204563e-05, "loss": 0.0075, "step": 9533 }, { "epoch": 1.8452012383900929, "grad_norm": 0.06406529247760773, "learning_rate": 9.284305257443301e-05, "loss": 0.0078, "step": 9534 }, { "epoch": 1.8453947368421053, "grad_norm": 0.08990079909563065, "learning_rate": 9.284156782612923e-05, "loss": 0.0075, "step": 9535 }, { "epoch": 1.8455882352941178, "grad_norm": 0.12205404788255692, "learning_rate": 9.284008293713983e-05, "loss": 0.0094, "step": 9536 }, { "epoch": 1.8457817337461302, "grad_norm": 0.04226106405258179, "learning_rate": 9.283859790747035e-05, "loss": 0.0063, "step": 9537 }, { "epoch": 1.8459752321981424, "grad_norm": 0.09265509247779846, "learning_rate": 9.283711273712628e-05, "loss": 0.0078, "step": 9538 }, { "epoch": 1.8461687306501546, "grad_norm": 0.0685477927327156, "learning_rate": 9.283562742611314e-05, "loss": 0.0078, "step": 9539 }, { "epoch": 1.846362229102167, "grad_norm": 0.06836309283971786, "learning_rate": 9.283414197443645e-05, "loss": 0.007, "step": 9540 }, { "epoch": 1.8465557275541795, "grad_norm": 0.08540544658899307, "learning_rate": 9.283265638210176e-05, "loss": 0.007, "step": 9541 }, { "epoch": 1.846749226006192, "grad_norm": 0.05673641711473465, "learning_rate": 9.283117064911457e-05, "loss": 0.0077, "step": 9542 }, { "epoch": 1.8469427244582044, "grad_norm": 0.07913970202207565, "learning_rate": 9.282968477548041e-05, "loss": 0.0073, "step": 9543 }, { "epoch": 1.8471362229102168, "grad_norm": 0.058845557272434235, "learning_rate": 9.28281987612048e-05, "loss": 0.0071, "step": 9544 }, { "epoch": 1.847329721362229, "grad_norm": 0.08534244447946548, "learning_rate": 9.282671260629326e-05, "loss": 0.0065, "step": 9545 }, { "epoch": 1.8475232198142415, "grad_norm": 0.06669889390468597, "learning_rate": 9.282522631075134e-05, "loss": 0.0066, "step": 9546 }, { "epoch": 1.8477167182662537, "grad_norm": 0.06924144178628922, "learning_rate": 9.282373987458453e-05, "loss": 0.0073, "step": 9547 }, { "epoch": 1.8479102167182662, "grad_norm": 0.07620498538017273, "learning_rate": 9.28222532977984e-05, "loss": 0.0084, "step": 9548 }, { "epoch": 1.8481037151702786, "grad_norm": 0.04962655156850815, "learning_rate": 9.282076658039844e-05, "loss": 0.0076, "step": 9549 }, { "epoch": 1.848297213622291, "grad_norm": 0.05656588450074196, "learning_rate": 9.281927972239018e-05, "loss": 0.0073, "step": 9550 }, { "epoch": 1.8484907120743035, "grad_norm": 0.06081763282418251, "learning_rate": 9.281779272377917e-05, "loss": 0.0069, "step": 9551 }, { "epoch": 1.848684210526316, "grad_norm": 0.05498208850622177, "learning_rate": 9.28163055845709e-05, "loss": 0.0078, "step": 9552 }, { "epoch": 1.8488777089783281, "grad_norm": 0.05697466805577278, "learning_rate": 9.281481830477097e-05, "loss": 0.0075, "step": 9553 }, { "epoch": 1.8490712074303406, "grad_norm": 0.056082818657159805, "learning_rate": 9.281333088438483e-05, "loss": 0.0078, "step": 9554 }, { "epoch": 1.8492647058823528, "grad_norm": 0.07940027862787247, "learning_rate": 9.281184332341805e-05, "loss": 0.009, "step": 9555 }, { "epoch": 1.8494582043343653, "grad_norm": 0.10492058098316193, "learning_rate": 9.281035562187616e-05, "loss": 0.0084, "step": 9556 }, { "epoch": 1.8496517027863777, "grad_norm": 0.21708831191062927, "learning_rate": 9.280886777976467e-05, "loss": 0.0097, "step": 9557 }, { "epoch": 1.8498452012383901, "grad_norm": 0.08780110627412796, "learning_rate": 9.280737979708914e-05, "loss": 0.0102, "step": 9558 }, { "epoch": 1.8500386996904026, "grad_norm": 0.19161714613437653, "learning_rate": 9.280589167385509e-05, "loss": 0.0083, "step": 9559 }, { "epoch": 1.850232198142415, "grad_norm": 0.10651867836713791, "learning_rate": 9.280440341006803e-05, "loss": 0.0078, "step": 9560 }, { "epoch": 1.8504256965944272, "grad_norm": 0.15900444984436035, "learning_rate": 9.280291500573353e-05, "loss": 0.008, "step": 9561 }, { "epoch": 1.8506191950464397, "grad_norm": 0.14932173490524292, "learning_rate": 9.28014264608571e-05, "loss": 0.0093, "step": 9562 }, { "epoch": 1.850812693498452, "grad_norm": 0.1050439178943634, "learning_rate": 9.279993777544428e-05, "loss": 0.0089, "step": 9563 }, { "epoch": 1.8510061919504643, "grad_norm": 0.16739031672477722, "learning_rate": 9.279844894950061e-05, "loss": 0.0081, "step": 9564 }, { "epoch": 1.8511996904024768, "grad_norm": 0.07577946782112122, "learning_rate": 9.279695998303162e-05, "loss": 0.0074, "step": 9565 }, { "epoch": 1.8513931888544892, "grad_norm": 0.1470431685447693, "learning_rate": 9.279547087604286e-05, "loss": 0.006, "step": 9566 }, { "epoch": 1.8515866873065017, "grad_norm": 0.10583622753620148, "learning_rate": 9.279398162853984e-05, "loss": 0.0077, "step": 9567 }, { "epoch": 1.8517801857585139, "grad_norm": 0.1154770776629448, "learning_rate": 9.279249224052811e-05, "loss": 0.0089, "step": 9568 }, { "epoch": 1.8519736842105263, "grad_norm": 0.13166463375091553, "learning_rate": 9.27910027120132e-05, "loss": 0.0063, "step": 9569 }, { "epoch": 1.8521671826625385, "grad_norm": 0.048253223299980164, "learning_rate": 9.278951304300066e-05, "loss": 0.0092, "step": 9570 }, { "epoch": 1.852360681114551, "grad_norm": 0.15095394849777222, "learning_rate": 9.2788023233496e-05, "loss": 0.0083, "step": 9571 }, { "epoch": 1.8525541795665634, "grad_norm": 0.0982913225889206, "learning_rate": 9.27865332835048e-05, "loss": 0.0084, "step": 9572 }, { "epoch": 1.8527476780185759, "grad_norm": 0.14005306363105774, "learning_rate": 9.278504319303259e-05, "loss": 0.0074, "step": 9573 }, { "epoch": 1.8529411764705883, "grad_norm": 0.1384126842021942, "learning_rate": 9.278355296208489e-05, "loss": 0.0094, "step": 9574 }, { "epoch": 1.8531346749226008, "grad_norm": 0.0935298427939415, "learning_rate": 9.278206259066725e-05, "loss": 0.0085, "step": 9575 }, { "epoch": 1.853328173374613, "grad_norm": 0.132039412856102, "learning_rate": 9.278057207878521e-05, "loss": 0.0087, "step": 9576 }, { "epoch": 1.8535216718266254, "grad_norm": 0.08790445327758789, "learning_rate": 9.277908142644431e-05, "loss": 0.0074, "step": 9577 }, { "epoch": 1.8537151702786376, "grad_norm": 0.10190349072217941, "learning_rate": 9.277759063365011e-05, "loss": 0.0066, "step": 9578 }, { "epoch": 1.85390866873065, "grad_norm": 0.10651558637619019, "learning_rate": 9.277609970040812e-05, "loss": 0.008, "step": 9579 }, { "epoch": 1.8541021671826625, "grad_norm": 0.0651802122592926, "learning_rate": 9.27746086267239e-05, "loss": 0.0075, "step": 9580 }, { "epoch": 1.854295665634675, "grad_norm": 0.08070976287126541, "learning_rate": 9.277311741260301e-05, "loss": 0.0083, "step": 9581 }, { "epoch": 1.8544891640866874, "grad_norm": 0.09872553497552872, "learning_rate": 9.277162605805095e-05, "loss": 0.0068, "step": 9582 }, { "epoch": 1.8546826625386998, "grad_norm": 0.06989190727472305, "learning_rate": 9.277013456307332e-05, "loss": 0.0069, "step": 9583 }, { "epoch": 1.854876160990712, "grad_norm": 0.07299560308456421, "learning_rate": 9.276864292767562e-05, "loss": 0.0078, "step": 9584 }, { "epoch": 1.8550696594427245, "grad_norm": 0.040129486471414566, "learning_rate": 9.27671511518634e-05, "loss": 0.0084, "step": 9585 }, { "epoch": 1.8552631578947367, "grad_norm": 0.05632176622748375, "learning_rate": 9.276565923564224e-05, "loss": 0.0079, "step": 9586 }, { "epoch": 1.8554566563467492, "grad_norm": 0.057727962732315063, "learning_rate": 9.276416717901765e-05, "loss": 0.0075, "step": 9587 }, { "epoch": 1.8556501547987616, "grad_norm": 0.06622936576604843, "learning_rate": 9.27626749819952e-05, "loss": 0.0075, "step": 9588 }, { "epoch": 1.855843653250774, "grad_norm": 0.04836445674300194, "learning_rate": 9.276118264458045e-05, "loss": 0.0075, "step": 9589 }, { "epoch": 1.8560371517027865, "grad_norm": 0.056119561195373535, "learning_rate": 9.27596901667789e-05, "loss": 0.0069, "step": 9590 }, { "epoch": 1.8562306501547987, "grad_norm": 0.029992666095495224, "learning_rate": 9.275819754859613e-05, "loss": 0.0075, "step": 9591 }, { "epoch": 1.8564241486068112, "grad_norm": 0.05149388313293457, "learning_rate": 9.275670479003768e-05, "loss": 0.0083, "step": 9592 }, { "epoch": 1.8566176470588234, "grad_norm": 0.04897821322083473, "learning_rate": 9.275521189110911e-05, "loss": 0.0051, "step": 9593 }, { "epoch": 1.8568111455108358, "grad_norm": 0.04298384115099907, "learning_rate": 9.275371885181597e-05, "loss": 0.0056, "step": 9594 }, { "epoch": 1.8570046439628483, "grad_norm": 0.06382707506418228, "learning_rate": 9.27522256721638e-05, "loss": 0.008, "step": 9595 }, { "epoch": 1.8571981424148607, "grad_norm": 0.06438221037387848, "learning_rate": 9.275073235215817e-05, "loss": 0.008, "step": 9596 }, { "epoch": 1.8573916408668731, "grad_norm": 0.04690249264240265, "learning_rate": 9.27492388918046e-05, "loss": 0.0075, "step": 9597 }, { "epoch": 1.8575851393188856, "grad_norm": 0.054335612803697586, "learning_rate": 9.274774529110868e-05, "loss": 0.007, "step": 9598 }, { "epoch": 1.8577786377708978, "grad_norm": 0.11059851199388504, "learning_rate": 9.274625155007593e-05, "loss": 0.0076, "step": 9599 }, { "epoch": 1.8579721362229102, "grad_norm": 0.0511336587369442, "learning_rate": 9.274475766871193e-05, "loss": 0.007, "step": 9600 }, { "epoch": 1.8581656346749225, "grad_norm": 0.11805079132318497, "learning_rate": 9.274326364702221e-05, "loss": 0.0086, "step": 9601 }, { "epoch": 1.858359133126935, "grad_norm": 0.07728169858455658, "learning_rate": 9.274176948501236e-05, "loss": 0.0082, "step": 9602 }, { "epoch": 1.8585526315789473, "grad_norm": 0.07484659552574158, "learning_rate": 9.274027518268789e-05, "loss": 0.0072, "step": 9603 }, { "epoch": 1.8587461300309598, "grad_norm": 0.10104618966579437, "learning_rate": 9.273878074005436e-05, "loss": 0.0074, "step": 9604 }, { "epoch": 1.8589396284829722, "grad_norm": 0.06995091587305069, "learning_rate": 9.273728615711736e-05, "loss": 0.0077, "step": 9605 }, { "epoch": 1.8591331269349847, "grad_norm": 0.11479946225881577, "learning_rate": 9.273579143388245e-05, "loss": 0.0076, "step": 9606 }, { "epoch": 1.859326625386997, "grad_norm": 0.06144202500581741, "learning_rate": 9.273429657035513e-05, "loss": 0.0089, "step": 9607 }, { "epoch": 1.8595201238390093, "grad_norm": 0.08857011049985886, "learning_rate": 9.273280156654101e-05, "loss": 0.0073, "step": 9608 }, { "epoch": 1.8597136222910216, "grad_norm": 0.10216782987117767, "learning_rate": 9.273130642244562e-05, "loss": 0.0084, "step": 9609 }, { "epoch": 1.859907120743034, "grad_norm": 0.07172255963087082, "learning_rate": 9.272981113807454e-05, "loss": 0.0064, "step": 9610 }, { "epoch": 1.8601006191950464, "grad_norm": 0.07916931807994843, "learning_rate": 9.272831571343333e-05, "loss": 0.0082, "step": 9611 }, { "epoch": 1.8602941176470589, "grad_norm": 0.06864923238754272, "learning_rate": 9.272682014852752e-05, "loss": 0.0081, "step": 9612 }, { "epoch": 1.8604876160990713, "grad_norm": 0.09000764787197113, "learning_rate": 9.272532444336271e-05, "loss": 0.0079, "step": 9613 }, { "epoch": 1.8606811145510835, "grad_norm": 0.08006925880908966, "learning_rate": 9.272382859794441e-05, "loss": 0.009, "step": 9614 }, { "epoch": 1.860874613003096, "grad_norm": 0.06030122563242912, "learning_rate": 9.272233261227823e-05, "loss": 0.006, "step": 9615 }, { "epoch": 1.8610681114551082, "grad_norm": 0.09452666342258453, "learning_rate": 9.27208364863697e-05, "loss": 0.007, "step": 9616 }, { "epoch": 1.8612616099071206, "grad_norm": 0.04257604107260704, "learning_rate": 9.271934022022442e-05, "loss": 0.0077, "step": 9617 }, { "epoch": 1.861455108359133, "grad_norm": 0.11161022633314133, "learning_rate": 9.271784381384791e-05, "loss": 0.0076, "step": 9618 }, { "epoch": 1.8616486068111455, "grad_norm": 0.03324759006500244, "learning_rate": 9.271634726724575e-05, "loss": 0.0069, "step": 9619 }, { "epoch": 1.861842105263158, "grad_norm": 0.0966041088104248, "learning_rate": 9.27148505804235e-05, "loss": 0.0086, "step": 9620 }, { "epoch": 1.8620356037151704, "grad_norm": 0.06820687651634216, "learning_rate": 9.271335375338674e-05, "loss": 0.0083, "step": 9621 }, { "epoch": 1.8622291021671826, "grad_norm": 0.08761821687221527, "learning_rate": 9.2711856786141e-05, "loss": 0.0084, "step": 9622 }, { "epoch": 1.862422600619195, "grad_norm": 0.07713437080383301, "learning_rate": 9.27103596786919e-05, "loss": 0.0076, "step": 9623 }, { "epoch": 1.8626160990712073, "grad_norm": 0.06799793988466263, "learning_rate": 9.270886243104496e-05, "loss": 0.0085, "step": 9624 }, { "epoch": 1.8628095975232197, "grad_norm": 0.058020129799842834, "learning_rate": 9.270736504320577e-05, "loss": 0.0095, "step": 9625 }, { "epoch": 1.8630030959752322, "grad_norm": 0.057193607091903687, "learning_rate": 9.270586751517986e-05, "loss": 0.008, "step": 9626 }, { "epoch": 1.8631965944272446, "grad_norm": 0.06451474875211716, "learning_rate": 9.270436984697286e-05, "loss": 0.0084, "step": 9627 }, { "epoch": 1.863390092879257, "grad_norm": 0.06986778229475021, "learning_rate": 9.27028720385903e-05, "loss": 0.0066, "step": 9628 }, { "epoch": 1.8635835913312695, "grad_norm": 0.058584656566381454, "learning_rate": 9.270137409003773e-05, "loss": 0.0072, "step": 9629 }, { "epoch": 1.8637770897832817, "grad_norm": 0.05742267891764641, "learning_rate": 9.269987600132076e-05, "loss": 0.0079, "step": 9630 }, { "epoch": 1.8639705882352942, "grad_norm": 0.05621430277824402, "learning_rate": 9.269837777244492e-05, "loss": 0.0101, "step": 9631 }, { "epoch": 1.8641640866873064, "grad_norm": 0.04527517408132553, "learning_rate": 9.269687940341583e-05, "loss": 0.008, "step": 9632 }, { "epoch": 1.8643575851393188, "grad_norm": 0.04978114366531372, "learning_rate": 9.269538089423901e-05, "loss": 0.0067, "step": 9633 }, { "epoch": 1.8645510835913313, "grad_norm": 0.06464225798845291, "learning_rate": 9.269388224492007e-05, "loss": 0.0068, "step": 9634 }, { "epoch": 1.8647445820433437, "grad_norm": 0.06157558783888817, "learning_rate": 9.269238345546452e-05, "loss": 0.0072, "step": 9635 }, { "epoch": 1.8649380804953561, "grad_norm": 0.06376224756240845, "learning_rate": 9.269088452587802e-05, "loss": 0.0068, "step": 9636 }, { "epoch": 1.8651315789473686, "grad_norm": 0.06214495375752449, "learning_rate": 9.268938545616608e-05, "loss": 0.0072, "step": 9637 }, { "epoch": 1.8653250773993808, "grad_norm": 0.06158828362822533, "learning_rate": 9.26878862463343e-05, "loss": 0.008, "step": 9638 }, { "epoch": 1.865518575851393, "grad_norm": 0.054128408432006836, "learning_rate": 9.268638689638825e-05, "loss": 0.0089, "step": 9639 }, { "epoch": 1.8657120743034055, "grad_norm": 0.045348696410655975, "learning_rate": 9.26848874063335e-05, "loss": 0.0078, "step": 9640 }, { "epoch": 1.865905572755418, "grad_norm": 0.07915057986974716, "learning_rate": 9.26833877761756e-05, "loss": 0.0086, "step": 9641 }, { "epoch": 1.8660990712074303, "grad_norm": 0.05314701795578003, "learning_rate": 9.268188800592017e-05, "loss": 0.0059, "step": 9642 }, { "epoch": 1.8662925696594428, "grad_norm": 0.09815198928117752, "learning_rate": 9.268038809557276e-05, "loss": 0.008, "step": 9643 }, { "epoch": 1.8664860681114552, "grad_norm": 0.06294161081314087, "learning_rate": 9.267888804513895e-05, "loss": 0.0072, "step": 9644 }, { "epoch": 1.8666795665634675, "grad_norm": 0.07363877445459366, "learning_rate": 9.267738785462433e-05, "loss": 0.0081, "step": 9645 }, { "epoch": 1.86687306501548, "grad_norm": 0.06652571260929108, "learning_rate": 9.267588752403445e-05, "loss": 0.008, "step": 9646 }, { "epoch": 1.8670665634674921, "grad_norm": 0.09567069262266159, "learning_rate": 9.267438705337492e-05, "loss": 0.0068, "step": 9647 }, { "epoch": 1.8672600619195046, "grad_norm": 0.057396575808525085, "learning_rate": 9.267288644265128e-05, "loss": 0.0072, "step": 9648 }, { "epoch": 1.867453560371517, "grad_norm": 0.0726374015212059, "learning_rate": 9.267138569186916e-05, "loss": 0.0077, "step": 9649 }, { "epoch": 1.8676470588235294, "grad_norm": 0.04268817976117134, "learning_rate": 9.266988480103409e-05, "loss": 0.0078, "step": 9650 }, { "epoch": 1.8678405572755419, "grad_norm": 0.06480488181114197, "learning_rate": 9.266838377015167e-05, "loss": 0.0084, "step": 9651 }, { "epoch": 1.8680340557275543, "grad_norm": 0.06715959310531616, "learning_rate": 9.266688259922749e-05, "loss": 0.008, "step": 9652 }, { "epoch": 1.8682275541795665, "grad_norm": 0.08052261173725128, "learning_rate": 9.266538128826712e-05, "loss": 0.008, "step": 9653 }, { "epoch": 1.868421052631579, "grad_norm": 0.0766863003373146, "learning_rate": 9.266387983727613e-05, "loss": 0.0089, "step": 9654 }, { "epoch": 1.8686145510835912, "grad_norm": 0.11727060377597809, "learning_rate": 9.266237824626015e-05, "loss": 0.0092, "step": 9655 }, { "epoch": 1.8688080495356036, "grad_norm": 0.07366285473108292, "learning_rate": 9.26608765152247e-05, "loss": 0.0084, "step": 9656 }, { "epoch": 1.869001547987616, "grad_norm": 0.11054553091526031, "learning_rate": 9.265937464417539e-05, "loss": 0.0063, "step": 9657 }, { "epoch": 1.8691950464396285, "grad_norm": 0.09022876620292664, "learning_rate": 9.26578726331178e-05, "loss": 0.007, "step": 9658 }, { "epoch": 1.869388544891641, "grad_norm": 0.10778705775737762, "learning_rate": 9.265637048205754e-05, "loss": 0.0078, "step": 9659 }, { "epoch": 1.8695820433436534, "grad_norm": 0.1133074089884758, "learning_rate": 9.265486819100017e-05, "loss": 0.0073, "step": 9660 }, { "epoch": 1.8697755417956656, "grad_norm": 0.07940878719091415, "learning_rate": 9.265336575995127e-05, "loss": 0.008, "step": 9661 }, { "epoch": 1.869969040247678, "grad_norm": 0.1290713995695114, "learning_rate": 9.265186318891643e-05, "loss": 0.0087, "step": 9662 }, { "epoch": 1.8701625386996903, "grad_norm": 0.07376021146774292, "learning_rate": 9.265036047790125e-05, "loss": 0.0067, "step": 9663 }, { "epoch": 1.8703560371517027, "grad_norm": 0.11183375120162964, "learning_rate": 9.264885762691129e-05, "loss": 0.0084, "step": 9664 }, { "epoch": 1.8705495356037152, "grad_norm": 0.06755921989679337, "learning_rate": 9.264735463595218e-05, "loss": 0.0067, "step": 9665 }, { "epoch": 1.8707430340557276, "grad_norm": 0.09052169322967529, "learning_rate": 9.264585150502946e-05, "loss": 0.0078, "step": 9666 }, { "epoch": 1.87093653250774, "grad_norm": 0.06261121481657028, "learning_rate": 9.264434823414875e-05, "loss": 0.0064, "step": 9667 }, { "epoch": 1.8711300309597523, "grad_norm": 0.050523098558187485, "learning_rate": 9.264284482331562e-05, "loss": 0.0073, "step": 9668 }, { "epoch": 1.8713235294117647, "grad_norm": 0.06514734029769897, "learning_rate": 9.264134127253569e-05, "loss": 0.0082, "step": 9669 }, { "epoch": 1.871517027863777, "grad_norm": 0.030388839542865753, "learning_rate": 9.26398375818145e-05, "loss": 0.0065, "step": 9670 }, { "epoch": 1.8717105263157894, "grad_norm": 0.0598866268992424, "learning_rate": 9.26383337511577e-05, "loss": 0.009, "step": 9671 }, { "epoch": 1.8719040247678018, "grad_norm": 0.03539906069636345, "learning_rate": 9.263682978057082e-05, "loss": 0.0087, "step": 9672 }, { "epoch": 1.8720975232198143, "grad_norm": 0.03835446015000343, "learning_rate": 9.263532567005949e-05, "loss": 0.0071, "step": 9673 }, { "epoch": 1.8722910216718267, "grad_norm": 0.0615234337747097, "learning_rate": 9.26338214196293e-05, "loss": 0.008, "step": 9674 }, { "epoch": 1.8724845201238391, "grad_norm": 0.04120999947190285, "learning_rate": 9.263231702928584e-05, "loss": 0.0079, "step": 9675 }, { "epoch": 1.8726780185758514, "grad_norm": 0.04719210043549538, "learning_rate": 9.263081249903469e-05, "loss": 0.0069, "step": 9676 }, { "epoch": 1.8728715170278638, "grad_norm": 0.05471547693014145, "learning_rate": 9.262930782888144e-05, "loss": 0.0072, "step": 9677 }, { "epoch": 1.873065015479876, "grad_norm": 0.05051110312342644, "learning_rate": 9.262780301883172e-05, "loss": 0.0082, "step": 9678 }, { "epoch": 1.8732585139318885, "grad_norm": 0.048359647393226624, "learning_rate": 9.262629806889107e-05, "loss": 0.0087, "step": 9679 }, { "epoch": 1.873452012383901, "grad_norm": 0.0712725818157196, "learning_rate": 9.262479297906513e-05, "loss": 0.0069, "step": 9680 }, { "epoch": 1.8736455108359134, "grad_norm": 0.05361293628811836, "learning_rate": 9.26232877493595e-05, "loss": 0.0063, "step": 9681 }, { "epoch": 1.8738390092879258, "grad_norm": 0.08280543237924576, "learning_rate": 9.262178237977973e-05, "loss": 0.0067, "step": 9682 }, { "epoch": 1.8740325077399382, "grad_norm": 0.05024543032050133, "learning_rate": 9.262027687033146e-05, "loss": 0.0076, "step": 9683 }, { "epoch": 1.8742260061919505, "grad_norm": 0.04365149512887001, "learning_rate": 9.261877122102026e-05, "loss": 0.0068, "step": 9684 }, { "epoch": 1.874419504643963, "grad_norm": 0.05921240895986557, "learning_rate": 9.261726543185173e-05, "loss": 0.0077, "step": 9685 }, { "epoch": 1.8746130030959751, "grad_norm": 0.022930076345801353, "learning_rate": 9.261575950283148e-05, "loss": 0.0062, "step": 9686 }, { "epoch": 1.8748065015479876, "grad_norm": 0.0479591004550457, "learning_rate": 9.261425343396514e-05, "loss": 0.0061, "step": 9687 }, { "epoch": 1.875, "grad_norm": 0.04719050973653793, "learning_rate": 9.261274722525823e-05, "loss": 0.0086, "step": 9688 }, { "epoch": 1.8751934984520124, "grad_norm": 0.08969158679246902, "learning_rate": 9.261124087671641e-05, "loss": 0.0066, "step": 9689 }, { "epoch": 1.8753869969040249, "grad_norm": 0.05134458839893341, "learning_rate": 9.260973438834526e-05, "loss": 0.0078, "step": 9690 }, { "epoch": 1.875580495356037, "grad_norm": 0.10822129249572754, "learning_rate": 9.26082277601504e-05, "loss": 0.0061, "step": 9691 }, { "epoch": 1.8757739938080495, "grad_norm": 0.05369715765118599, "learning_rate": 9.26067209921374e-05, "loss": 0.0074, "step": 9692 }, { "epoch": 1.8759674922600618, "grad_norm": 0.073126420378685, "learning_rate": 9.260521408431188e-05, "loss": 0.0077, "step": 9693 }, { "epoch": 1.8761609907120742, "grad_norm": 0.054568856954574585, "learning_rate": 9.260370703667946e-05, "loss": 0.0076, "step": 9694 }, { "epoch": 1.8763544891640866, "grad_norm": 0.0639084205031395, "learning_rate": 9.260219984924569e-05, "loss": 0.0078, "step": 9695 }, { "epoch": 1.876547987616099, "grad_norm": 0.050751686096191406, "learning_rate": 9.260069252201623e-05, "loss": 0.0098, "step": 9696 }, { "epoch": 1.8767414860681115, "grad_norm": 0.061771001666784286, "learning_rate": 9.259918505499665e-05, "loss": 0.0069, "step": 9697 }, { "epoch": 1.876934984520124, "grad_norm": 0.08284758776426315, "learning_rate": 9.259767744819256e-05, "loss": 0.0082, "step": 9698 }, { "epoch": 1.8771284829721362, "grad_norm": 0.048067715018987656, "learning_rate": 9.259616970160958e-05, "loss": 0.0083, "step": 9699 }, { "epoch": 1.8773219814241486, "grad_norm": 0.06937137246131897, "learning_rate": 9.25946618152533e-05, "loss": 0.0072, "step": 9700 }, { "epoch": 1.8775154798761609, "grad_norm": 0.056102119386196136, "learning_rate": 9.259315378912933e-05, "loss": 0.0078, "step": 9701 }, { "epoch": 1.8777089783281733, "grad_norm": 0.04169614240527153, "learning_rate": 9.25916456232433e-05, "loss": 0.0081, "step": 9702 }, { "epoch": 1.8779024767801857, "grad_norm": 0.0655389279127121, "learning_rate": 9.259013731760076e-05, "loss": 0.0081, "step": 9703 }, { "epoch": 1.8780959752321982, "grad_norm": 0.03584178537130356, "learning_rate": 9.258862887220736e-05, "loss": 0.0073, "step": 9704 }, { "epoch": 1.8782894736842106, "grad_norm": 0.07752058655023575, "learning_rate": 9.258712028706872e-05, "loss": 0.0079, "step": 9705 }, { "epoch": 1.878482972136223, "grad_norm": 0.0425322949886322, "learning_rate": 9.258561156219042e-05, "loss": 0.0087, "step": 9706 }, { "epoch": 1.8786764705882353, "grad_norm": 0.12550464272499084, "learning_rate": 9.258410269757807e-05, "loss": 0.0065, "step": 9707 }, { "epoch": 1.8788699690402477, "grad_norm": 0.05267998203635216, "learning_rate": 9.258259369323729e-05, "loss": 0.0067, "step": 9708 }, { "epoch": 1.87906346749226, "grad_norm": 0.10552439838647842, "learning_rate": 9.258108454917368e-05, "loss": 0.0064, "step": 9709 }, { "epoch": 1.8792569659442724, "grad_norm": 0.108310267329216, "learning_rate": 9.257957526539287e-05, "loss": 0.0075, "step": 9710 }, { "epoch": 1.8794504643962848, "grad_norm": 0.0846320167183876, "learning_rate": 9.257806584190045e-05, "loss": 0.0076, "step": 9711 }, { "epoch": 1.8796439628482973, "grad_norm": 0.09082005172967911, "learning_rate": 9.257655627870205e-05, "loss": 0.0073, "step": 9712 }, { "epoch": 1.8798374613003097, "grad_norm": 0.08088862150907516, "learning_rate": 9.257504657580326e-05, "loss": 0.0064, "step": 9713 }, { "epoch": 1.880030959752322, "grad_norm": 0.05805972218513489, "learning_rate": 9.257353673320971e-05, "loss": 0.0083, "step": 9714 }, { "epoch": 1.8802244582043344, "grad_norm": 0.11039262264966965, "learning_rate": 9.257202675092702e-05, "loss": 0.0069, "step": 9715 }, { "epoch": 1.8804179566563466, "grad_norm": 0.050389934331178665, "learning_rate": 9.257051662896078e-05, "loss": 0.005, "step": 9716 }, { "epoch": 1.880611455108359, "grad_norm": 0.06925872713327408, "learning_rate": 9.256900636731661e-05, "loss": 0.0077, "step": 9717 }, { "epoch": 1.8808049535603715, "grad_norm": 0.04165276885032654, "learning_rate": 9.256749596600016e-05, "loss": 0.0078, "step": 9718 }, { "epoch": 1.880998452012384, "grad_norm": 0.044761911034584045, "learning_rate": 9.256598542501698e-05, "loss": 0.0089, "step": 9719 }, { "epoch": 1.8811919504643964, "grad_norm": 0.060543060302734375, "learning_rate": 9.256447474437274e-05, "loss": 0.0079, "step": 9720 }, { "epoch": 1.8813854489164088, "grad_norm": 0.03186739236116409, "learning_rate": 9.256296392407306e-05, "loss": 0.0087, "step": 9721 }, { "epoch": 1.881578947368421, "grad_norm": 0.07593511790037155, "learning_rate": 9.256145296412351e-05, "loss": 0.0074, "step": 9722 }, { "epoch": 1.8817724458204335, "grad_norm": 0.03102961741387844, "learning_rate": 9.255994186452973e-05, "loss": 0.0069, "step": 9723 }, { "epoch": 1.8819659442724457, "grad_norm": 0.08504176139831543, "learning_rate": 9.255843062529736e-05, "loss": 0.0093, "step": 9724 }, { "epoch": 1.8821594427244581, "grad_norm": 0.030602049082517624, "learning_rate": 9.2556919246432e-05, "loss": 0.0074, "step": 9725 }, { "epoch": 1.8823529411764706, "grad_norm": 0.07867849618196487, "learning_rate": 9.255540772793924e-05, "loss": 0.0072, "step": 9726 }, { "epoch": 1.882546439628483, "grad_norm": 0.0588349774479866, "learning_rate": 9.255389606982477e-05, "loss": 0.0084, "step": 9727 }, { "epoch": 1.8827399380804954, "grad_norm": 0.08191917091608047, "learning_rate": 9.255238427209414e-05, "loss": 0.0093, "step": 9728 }, { "epoch": 1.8829334365325079, "grad_norm": 0.09089306741952896, "learning_rate": 9.255087233475299e-05, "loss": 0.0087, "step": 9729 }, { "epoch": 1.88312693498452, "grad_norm": 0.07471442222595215, "learning_rate": 9.254936025780697e-05, "loss": 0.0076, "step": 9730 }, { "epoch": 1.8833204334365325, "grad_norm": 0.053642984479665756, "learning_rate": 9.254784804126168e-05, "loss": 0.0082, "step": 9731 }, { "epoch": 1.8835139318885448, "grad_norm": 0.0670342817902565, "learning_rate": 9.254633568512273e-05, "loss": 0.0075, "step": 9732 }, { "epoch": 1.8837074303405572, "grad_norm": 0.06995932757854462, "learning_rate": 9.254482318939576e-05, "loss": 0.007, "step": 9733 }, { "epoch": 1.8839009287925697, "grad_norm": 0.026838401332497597, "learning_rate": 9.254331055408639e-05, "loss": 0.0079, "step": 9734 }, { "epoch": 1.884094427244582, "grad_norm": 0.06311141699552536, "learning_rate": 9.254179777920024e-05, "loss": 0.007, "step": 9735 }, { "epoch": 1.8842879256965945, "grad_norm": 0.05780133605003357, "learning_rate": 9.254028486474294e-05, "loss": 0.0077, "step": 9736 }, { "epoch": 1.884481424148607, "grad_norm": 0.04454318434000015, "learning_rate": 9.253877181072011e-05, "loss": 0.0076, "step": 9737 }, { "epoch": 1.8846749226006192, "grad_norm": 0.051968999207019806, "learning_rate": 9.253725861713737e-05, "loss": 0.0078, "step": 9738 }, { "epoch": 1.8848684210526314, "grad_norm": 0.09331176429986954, "learning_rate": 9.253574528400037e-05, "loss": 0.0093, "step": 9739 }, { "epoch": 1.8850619195046439, "grad_norm": 0.04563980549573898, "learning_rate": 9.253423181131471e-05, "loss": 0.0068, "step": 9740 }, { "epoch": 1.8852554179566563, "grad_norm": 0.10962852090597153, "learning_rate": 9.253271819908602e-05, "loss": 0.0091, "step": 9741 }, { "epoch": 1.8854489164086687, "grad_norm": 0.05873312056064606, "learning_rate": 9.253120444731993e-05, "loss": 0.0066, "step": 9742 }, { "epoch": 1.8856424148606812, "grad_norm": 0.10050319135189056, "learning_rate": 9.252969055602208e-05, "loss": 0.0088, "step": 9743 }, { "epoch": 1.8858359133126936, "grad_norm": 0.06156589835882187, "learning_rate": 9.252817652519807e-05, "loss": 0.0073, "step": 9744 }, { "epoch": 1.8860294117647058, "grad_norm": 0.0784592404961586, "learning_rate": 9.252666235485357e-05, "loss": 0.0063, "step": 9745 }, { "epoch": 1.8862229102167183, "grad_norm": 0.10435014218091965, "learning_rate": 9.252514804499416e-05, "loss": 0.0095, "step": 9746 }, { "epoch": 1.8864164086687305, "grad_norm": 0.07607351988554001, "learning_rate": 9.252363359562553e-05, "loss": 0.0086, "step": 9747 }, { "epoch": 1.886609907120743, "grad_norm": 0.0839817225933075, "learning_rate": 9.252211900675324e-05, "loss": 0.0068, "step": 9748 }, { "epoch": 1.8868034055727554, "grad_norm": 0.07821333408355713, "learning_rate": 9.252060427838298e-05, "loss": 0.0092, "step": 9749 }, { "epoch": 1.8869969040247678, "grad_norm": 0.09327947348356247, "learning_rate": 9.251908941052036e-05, "loss": 0.0064, "step": 9750 }, { "epoch": 1.8871904024767803, "grad_norm": 0.0852680653333664, "learning_rate": 9.251757440317099e-05, "loss": 0.0074, "step": 9751 }, { "epoch": 1.8873839009287927, "grad_norm": 0.05665796995162964, "learning_rate": 9.251605925634054e-05, "loss": 0.0067, "step": 9752 }, { "epoch": 1.887577399380805, "grad_norm": 0.09635748714208603, "learning_rate": 9.251454397003461e-05, "loss": 0.0075, "step": 9753 }, { "epoch": 1.8877708978328174, "grad_norm": 0.032856181263923645, "learning_rate": 9.251302854425888e-05, "loss": 0.0079, "step": 9754 }, { "epoch": 1.8879643962848296, "grad_norm": 0.09032329171895981, "learning_rate": 9.251151297901892e-05, "loss": 0.0065, "step": 9755 }, { "epoch": 1.888157894736842, "grad_norm": 0.0752594992518425, "learning_rate": 9.250999727432042e-05, "loss": 0.009, "step": 9756 }, { "epoch": 1.8883513931888545, "grad_norm": 0.06738334149122238, "learning_rate": 9.2508481430169e-05, "loss": 0.0078, "step": 9757 }, { "epoch": 1.888544891640867, "grad_norm": 0.08172135055065155, "learning_rate": 9.250696544657026e-05, "loss": 0.0077, "step": 9758 }, { "epoch": 1.8887383900928794, "grad_norm": 0.037918370217084885, "learning_rate": 9.250544932352988e-05, "loss": 0.0097, "step": 9759 }, { "epoch": 1.8889318885448918, "grad_norm": 0.03702186420559883, "learning_rate": 9.250393306105347e-05, "loss": 0.0076, "step": 9760 }, { "epoch": 1.889125386996904, "grad_norm": 0.057789385318756104, "learning_rate": 9.250241665914668e-05, "loss": 0.0071, "step": 9761 }, { "epoch": 1.8893188854489165, "grad_norm": 0.03545104339718819, "learning_rate": 9.250090011781516e-05, "loss": 0.0084, "step": 9762 }, { "epoch": 1.8895123839009287, "grad_norm": 0.04545164108276367, "learning_rate": 9.249938343706453e-05, "loss": 0.009, "step": 9763 }, { "epoch": 1.8897058823529411, "grad_norm": 0.07356583327054977, "learning_rate": 9.249786661690043e-05, "loss": 0.0076, "step": 9764 }, { "epoch": 1.8898993808049536, "grad_norm": 0.06311564892530441, "learning_rate": 9.24963496573285e-05, "loss": 0.0075, "step": 9765 }, { "epoch": 1.890092879256966, "grad_norm": 0.08383078873157501, "learning_rate": 9.24948325583544e-05, "loss": 0.007, "step": 9766 }, { "epoch": 1.8902863777089784, "grad_norm": 0.052543915808200836, "learning_rate": 9.24933153199837e-05, "loss": 0.0085, "step": 9767 }, { "epoch": 1.8904798761609907, "grad_norm": 0.0695180669426918, "learning_rate": 9.249179794222214e-05, "loss": 0.0079, "step": 9768 }, { "epoch": 1.890673374613003, "grad_norm": 0.05576188117265701, "learning_rate": 9.249028042507531e-05, "loss": 0.0069, "step": 9769 }, { "epoch": 1.8908668730650153, "grad_norm": 0.06731889396905899, "learning_rate": 9.248876276854883e-05, "loss": 0.0069, "step": 9770 }, { "epoch": 1.8910603715170278, "grad_norm": 0.05134331062436104, "learning_rate": 9.248724497264839e-05, "loss": 0.007, "step": 9771 }, { "epoch": 1.8912538699690402, "grad_norm": 0.08979891240596771, "learning_rate": 9.248572703737959e-05, "loss": 0.0062, "step": 9772 }, { "epoch": 1.8914473684210527, "grad_norm": 0.08909596502780914, "learning_rate": 9.248420896274811e-05, "loss": 0.0067, "step": 9773 }, { "epoch": 1.891640866873065, "grad_norm": 0.09287546575069427, "learning_rate": 9.248269074875957e-05, "loss": 0.007, "step": 9774 }, { "epoch": 1.8918343653250775, "grad_norm": 0.059039097279310226, "learning_rate": 9.248117239541962e-05, "loss": 0.0084, "step": 9775 }, { "epoch": 1.8920278637770898, "grad_norm": 0.153209388256073, "learning_rate": 9.247965390273391e-05, "loss": 0.0071, "step": 9776 }, { "epoch": 1.8922213622291022, "grad_norm": 0.041149068623781204, "learning_rate": 9.24781352707081e-05, "loss": 0.0079, "step": 9777 }, { "epoch": 1.8924148606811144, "grad_norm": 0.16493044793605804, "learning_rate": 9.247661649934779e-05, "loss": 0.0079, "step": 9778 }, { "epoch": 1.8926083591331269, "grad_norm": 0.05527105927467346, "learning_rate": 9.247509758865867e-05, "loss": 0.0079, "step": 9779 }, { "epoch": 1.8928018575851393, "grad_norm": 0.1633143126964569, "learning_rate": 9.247357853864637e-05, "loss": 0.008, "step": 9780 }, { "epoch": 1.8929953560371517, "grad_norm": 0.0863749161362648, "learning_rate": 9.247205934931654e-05, "loss": 0.0093, "step": 9781 }, { "epoch": 1.8931888544891642, "grad_norm": 0.1172981783747673, "learning_rate": 9.247054002067481e-05, "loss": 0.0089, "step": 9782 }, { "epoch": 1.8933823529411766, "grad_norm": 0.10838054865598679, "learning_rate": 9.246902055272686e-05, "loss": 0.0085, "step": 9783 }, { "epoch": 1.8935758513931888, "grad_norm": 0.11819660663604736, "learning_rate": 9.246750094547833e-05, "loss": 0.0061, "step": 9784 }, { "epoch": 1.8937693498452013, "grad_norm": 0.11292658001184464, "learning_rate": 9.246598119893485e-05, "loss": 0.0071, "step": 9785 }, { "epoch": 1.8939628482972135, "grad_norm": 0.11081406474113464, "learning_rate": 9.246446131310209e-05, "loss": 0.0072, "step": 9786 }, { "epoch": 1.894156346749226, "grad_norm": 0.067703478038311, "learning_rate": 9.246294128798568e-05, "loss": 0.0083, "step": 9787 }, { "epoch": 1.8943498452012384, "grad_norm": 0.07892071455717087, "learning_rate": 9.246142112359132e-05, "loss": 0.0074, "step": 9788 }, { "epoch": 1.8945433436532508, "grad_norm": 0.055415503680706024, "learning_rate": 9.24599008199246e-05, "loss": 0.0077, "step": 9789 }, { "epoch": 1.8947368421052633, "grad_norm": 0.06127026304602623, "learning_rate": 9.24583803769912e-05, "loss": 0.0079, "step": 9790 }, { "epoch": 1.8949303405572755, "grad_norm": 0.06386278569698334, "learning_rate": 9.245685979479678e-05, "loss": 0.0077, "step": 9791 }, { "epoch": 1.895123839009288, "grad_norm": 0.055072445422410965, "learning_rate": 9.245533907334698e-05, "loss": 0.0091, "step": 9792 }, { "epoch": 1.8953173374613002, "grad_norm": 0.0774226114153862, "learning_rate": 9.245381821264746e-05, "loss": 0.0069, "step": 9793 }, { "epoch": 1.8955108359133126, "grad_norm": 0.04169921949505806, "learning_rate": 9.245229721270386e-05, "loss": 0.0077, "step": 9794 }, { "epoch": 1.895704334365325, "grad_norm": 0.08118126541376114, "learning_rate": 9.245077607352188e-05, "loss": 0.0094, "step": 9795 }, { "epoch": 1.8958978328173375, "grad_norm": 0.03241255134344101, "learning_rate": 9.244925479510711e-05, "loss": 0.008, "step": 9796 }, { "epoch": 1.89609133126935, "grad_norm": 0.07224775850772858, "learning_rate": 9.244773337746526e-05, "loss": 0.0079, "step": 9797 }, { "epoch": 1.8962848297213624, "grad_norm": 0.04048129916191101, "learning_rate": 9.244621182060195e-05, "loss": 0.0081, "step": 9798 }, { "epoch": 1.8964783281733746, "grad_norm": 0.05394433066248894, "learning_rate": 9.244469012452285e-05, "loss": 0.0073, "step": 9799 }, { "epoch": 1.896671826625387, "grad_norm": 0.03947438672184944, "learning_rate": 9.244316828923363e-05, "loss": 0.0067, "step": 9800 }, { "epoch": 1.8968653250773992, "grad_norm": 0.05962475389242172, "learning_rate": 9.244164631473993e-05, "loss": 0.0068, "step": 9801 }, { "epoch": 1.8970588235294117, "grad_norm": 0.03641393035650253, "learning_rate": 9.244012420104743e-05, "loss": 0.0076, "step": 9802 }, { "epoch": 1.8972523219814241, "grad_norm": 0.06655926257371902, "learning_rate": 9.243860194816176e-05, "loss": 0.0077, "step": 9803 }, { "epoch": 1.8974458204334366, "grad_norm": 0.034794729202985764, "learning_rate": 9.243707955608859e-05, "loss": 0.0072, "step": 9804 }, { "epoch": 1.897639318885449, "grad_norm": 0.07704425603151321, "learning_rate": 9.243555702483359e-05, "loss": 0.0074, "step": 9805 }, { "epoch": 1.8978328173374615, "grad_norm": 0.04763326421380043, "learning_rate": 9.24340343544024e-05, "loss": 0.0083, "step": 9806 }, { "epoch": 1.8980263157894737, "grad_norm": 0.0854286476969719, "learning_rate": 9.24325115448007e-05, "loss": 0.0085, "step": 9807 }, { "epoch": 1.8982198142414861, "grad_norm": 0.06283935159444809, "learning_rate": 9.243098859603416e-05, "loss": 0.0068, "step": 9808 }, { "epoch": 1.8984133126934983, "grad_norm": 0.05883738398551941, "learning_rate": 9.242946550810841e-05, "loss": 0.0072, "step": 9809 }, { "epoch": 1.8986068111455108, "grad_norm": 0.06618649512529373, "learning_rate": 9.242794228102914e-05, "loss": 0.0065, "step": 9810 }, { "epoch": 1.8988003095975232, "grad_norm": 0.057860150933265686, "learning_rate": 9.2426418914802e-05, "loss": 0.0077, "step": 9811 }, { "epoch": 1.8989938080495357, "grad_norm": 0.09917958080768585, "learning_rate": 9.242489540943267e-05, "loss": 0.0064, "step": 9812 }, { "epoch": 1.899187306501548, "grad_norm": 0.05931966379284859, "learning_rate": 9.242337176492678e-05, "loss": 0.008, "step": 9813 }, { "epoch": 1.8993808049535603, "grad_norm": 0.09113986045122147, "learning_rate": 9.242184798129002e-05, "loss": 0.0071, "step": 9814 }, { "epoch": 1.8995743034055728, "grad_norm": 0.07605576515197754, "learning_rate": 9.242032405852806e-05, "loss": 0.0079, "step": 9815 }, { "epoch": 1.899767801857585, "grad_norm": 0.08288082480430603, "learning_rate": 9.241879999664655e-05, "loss": 0.0072, "step": 9816 }, { "epoch": 1.8999613003095974, "grad_norm": 0.06687111407518387, "learning_rate": 9.241727579565116e-05, "loss": 0.008, "step": 9817 }, { "epoch": 1.9001547987616099, "grad_norm": 0.06569445133209229, "learning_rate": 9.241575145554756e-05, "loss": 0.0081, "step": 9818 }, { "epoch": 1.9003482972136223, "grad_norm": 0.04930400103330612, "learning_rate": 9.241422697634141e-05, "loss": 0.0084, "step": 9819 }, { "epoch": 1.9005417956656347, "grad_norm": 0.10485659539699554, "learning_rate": 9.241270235803838e-05, "loss": 0.007, "step": 9820 }, { "epoch": 1.9007352941176472, "grad_norm": 0.07515503466129303, "learning_rate": 9.241117760064416e-05, "loss": 0.01, "step": 9821 }, { "epoch": 1.9009287925696594, "grad_norm": 0.09787978231906891, "learning_rate": 9.24096527041644e-05, "loss": 0.0058, "step": 9822 }, { "epoch": 1.9011222910216719, "grad_norm": 0.09315866231918335, "learning_rate": 9.240812766860475e-05, "loss": 0.0068, "step": 9823 }, { "epoch": 1.901315789473684, "grad_norm": 0.04701628535985947, "learning_rate": 9.24066024939709e-05, "loss": 0.0072, "step": 9824 }, { "epoch": 1.9015092879256965, "grad_norm": 0.12056058645248413, "learning_rate": 9.240507718026853e-05, "loss": 0.0089, "step": 9825 }, { "epoch": 1.901702786377709, "grad_norm": 0.04282967746257782, "learning_rate": 9.24035517275033e-05, "loss": 0.0079, "step": 9826 }, { "epoch": 1.9018962848297214, "grad_norm": 0.10709251463413239, "learning_rate": 9.240202613568087e-05, "loss": 0.0086, "step": 9827 }, { "epoch": 1.9020897832817338, "grad_norm": 0.07509490102529526, "learning_rate": 9.240050040480693e-05, "loss": 0.0092, "step": 9828 }, { "epoch": 1.9022832817337463, "grad_norm": 0.07665500789880753, "learning_rate": 9.239897453488715e-05, "loss": 0.0079, "step": 9829 }, { "epoch": 1.9024767801857585, "grad_norm": 0.09219817817211151, "learning_rate": 9.23974485259272e-05, "loss": 0.0085, "step": 9830 }, { "epoch": 1.902670278637771, "grad_norm": 0.11007282137870789, "learning_rate": 9.239592237793273e-05, "loss": 0.0073, "step": 9831 }, { "epoch": 1.9028637770897832, "grad_norm": 0.08316017687320709, "learning_rate": 9.239439609090946e-05, "loss": 0.0074, "step": 9832 }, { "epoch": 1.9030572755417956, "grad_norm": 0.1360103338956833, "learning_rate": 9.239286966486302e-05, "loss": 0.007, "step": 9833 }, { "epoch": 1.903250773993808, "grad_norm": 0.04926067590713501, "learning_rate": 9.239134309979913e-05, "loss": 0.0063, "step": 9834 }, { "epoch": 1.9034442724458205, "grad_norm": 0.09086889773607254, "learning_rate": 9.238981639572342e-05, "loss": 0.009, "step": 9835 }, { "epoch": 1.903637770897833, "grad_norm": 0.09817720204591751, "learning_rate": 9.23882895526416e-05, "loss": 0.0092, "step": 9836 }, { "epoch": 1.9038312693498454, "grad_norm": 0.07746236026287079, "learning_rate": 9.238676257055932e-05, "loss": 0.006, "step": 9837 }, { "epoch": 1.9040247678018576, "grad_norm": 0.08980143070220947, "learning_rate": 9.238523544948226e-05, "loss": 0.0076, "step": 9838 }, { "epoch": 1.9042182662538698, "grad_norm": 0.06361111253499985, "learning_rate": 9.238370818941611e-05, "loss": 0.0059, "step": 9839 }, { "epoch": 1.9044117647058822, "grad_norm": 0.05736282840371132, "learning_rate": 9.238218079036656e-05, "loss": 0.0075, "step": 9840 }, { "epoch": 1.9046052631578947, "grad_norm": 0.04049757495522499, "learning_rate": 9.238065325233927e-05, "loss": 0.0068, "step": 9841 }, { "epoch": 1.9047987616099071, "grad_norm": 0.06395097821950912, "learning_rate": 9.237912557533991e-05, "loss": 0.0072, "step": 9842 }, { "epoch": 1.9049922600619196, "grad_norm": 0.05819059908390045, "learning_rate": 9.237759775937418e-05, "loss": 0.0067, "step": 9843 }, { "epoch": 1.905185758513932, "grad_norm": 0.08145106583833694, "learning_rate": 9.237606980444776e-05, "loss": 0.0077, "step": 9844 }, { "epoch": 1.9053792569659442, "grad_norm": 0.05768127739429474, "learning_rate": 9.23745417105663e-05, "loss": 0.0073, "step": 9845 }, { "epoch": 1.9055727554179567, "grad_norm": 0.090041883289814, "learning_rate": 9.237301347773552e-05, "loss": 0.0088, "step": 9846 }, { "epoch": 1.905766253869969, "grad_norm": 0.11356937140226364, "learning_rate": 9.237148510596107e-05, "loss": 0.0081, "step": 9847 }, { "epoch": 1.9059597523219813, "grad_norm": 0.06657147407531738, "learning_rate": 9.236995659524866e-05, "loss": 0.0077, "step": 9848 }, { "epoch": 1.9061532507739938, "grad_norm": 0.08257561177015305, "learning_rate": 9.236842794560397e-05, "loss": 0.0088, "step": 9849 }, { "epoch": 1.9063467492260062, "grad_norm": 0.03541019186377525, "learning_rate": 9.236689915703265e-05, "loss": 0.0081, "step": 9850 }, { "epoch": 1.9065402476780187, "grad_norm": 0.11604005098342896, "learning_rate": 9.236537022954041e-05, "loss": 0.0073, "step": 9851 }, { "epoch": 1.906733746130031, "grad_norm": 0.0428781658411026, "learning_rate": 9.236384116313295e-05, "loss": 0.008, "step": 9852 }, { "epoch": 1.9069272445820433, "grad_norm": 0.11887028813362122, "learning_rate": 9.236231195781591e-05, "loss": 0.0077, "step": 9853 }, { "epoch": 1.9071207430340558, "grad_norm": 0.041211795061826706, "learning_rate": 9.236078261359503e-05, "loss": 0.0087, "step": 9854 }, { "epoch": 1.907314241486068, "grad_norm": 0.08405594527721405, "learning_rate": 9.235925313047595e-05, "loss": 0.0083, "step": 9855 }, { "epoch": 1.9075077399380804, "grad_norm": 0.07148054242134094, "learning_rate": 9.235772350846436e-05, "loss": 0.0076, "step": 9856 }, { "epoch": 1.9077012383900929, "grad_norm": 0.11496680229902267, "learning_rate": 9.235619374756597e-05, "loss": 0.0068, "step": 9857 }, { "epoch": 1.9078947368421053, "grad_norm": 0.044460479170084, "learning_rate": 9.235466384778647e-05, "loss": 0.0074, "step": 9858 }, { "epoch": 1.9080882352941178, "grad_norm": 0.134372279047966, "learning_rate": 9.23531338091315e-05, "loss": 0.0074, "step": 9859 }, { "epoch": 1.9082817337461302, "grad_norm": 0.08293771743774414, "learning_rate": 9.235160363160681e-05, "loss": 0.0077, "step": 9860 }, { "epoch": 1.9084752321981424, "grad_norm": 0.10230779647827148, "learning_rate": 9.235007331521805e-05, "loss": 0.009, "step": 9861 }, { "epoch": 1.9086687306501546, "grad_norm": 0.15630210936069489, "learning_rate": 9.234854285997093e-05, "loss": 0.0083, "step": 9862 }, { "epoch": 1.908862229102167, "grad_norm": 0.07504168897867203, "learning_rate": 9.234701226587113e-05, "loss": 0.0085, "step": 9863 }, { "epoch": 1.9090557275541795, "grad_norm": 0.15814563632011414, "learning_rate": 9.234548153292433e-05, "loss": 0.0086, "step": 9864 }, { "epoch": 1.909249226006192, "grad_norm": 0.07901409268379211, "learning_rate": 9.234395066113624e-05, "loss": 0.0064, "step": 9865 }, { "epoch": 1.9094427244582044, "grad_norm": 0.14375527203083038, "learning_rate": 9.234241965051253e-05, "loss": 0.0094, "step": 9866 }, { "epoch": 1.9096362229102168, "grad_norm": 0.07675280421972275, "learning_rate": 9.234088850105893e-05, "loss": 0.0082, "step": 9867 }, { "epoch": 1.909829721362229, "grad_norm": 0.13344381749629974, "learning_rate": 9.233935721278108e-05, "loss": 0.0077, "step": 9868 }, { "epoch": 1.9100232198142415, "grad_norm": 0.05237671360373497, "learning_rate": 9.233782578568473e-05, "loss": 0.011, "step": 9869 }, { "epoch": 1.9102167182662537, "grad_norm": 0.17796555161476135, "learning_rate": 9.233629421977553e-05, "loss": 0.0073, "step": 9870 }, { "epoch": 1.9104102167182662, "grad_norm": 0.05871327221393585, "learning_rate": 9.233476251505918e-05, "loss": 0.0075, "step": 9871 }, { "epoch": 1.9106037151702786, "grad_norm": 0.1736706644296646, "learning_rate": 9.23332306715414e-05, "loss": 0.0059, "step": 9872 }, { "epoch": 1.910797213622291, "grad_norm": 0.1311449557542801, "learning_rate": 9.233169868922785e-05, "loss": 0.0093, "step": 9873 }, { "epoch": 1.9109907120743035, "grad_norm": 0.16272670030593872, "learning_rate": 9.233016656812425e-05, "loss": 0.0086, "step": 9874 }, { "epoch": 1.911184210526316, "grad_norm": 0.1783047467470169, "learning_rate": 9.232863430823628e-05, "loss": 0.0086, "step": 9875 }, { "epoch": 1.9113777089783281, "grad_norm": 0.09541236609220505, "learning_rate": 9.232710190956966e-05, "loss": 0.0083, "step": 9876 }, { "epoch": 1.9115712074303406, "grad_norm": 0.20145167410373688, "learning_rate": 9.232556937213007e-05, "loss": 0.0065, "step": 9877 }, { "epoch": 1.9117647058823528, "grad_norm": 0.046369630843400955, "learning_rate": 9.232403669592322e-05, "loss": 0.0085, "step": 9878 }, { "epoch": 1.9119582043343653, "grad_norm": 0.15403024852275848, "learning_rate": 9.232250388095477e-05, "loss": 0.0089, "step": 9879 }, { "epoch": 1.9121517027863777, "grad_norm": 0.127999410033226, "learning_rate": 9.232097092723047e-05, "loss": 0.0097, "step": 9880 }, { "epoch": 1.9123452012383901, "grad_norm": 0.10803240537643433, "learning_rate": 9.2319437834756e-05, "loss": 0.0074, "step": 9881 }, { "epoch": 1.9125386996904026, "grad_norm": 0.1516903042793274, "learning_rate": 9.231790460353703e-05, "loss": 0.0076, "step": 9882 }, { "epoch": 1.912732198142415, "grad_norm": 0.10137809813022614, "learning_rate": 9.23163712335793e-05, "loss": 0.0083, "step": 9883 }, { "epoch": 1.9129256965944272, "grad_norm": 0.1161397248506546, "learning_rate": 9.231483772488852e-05, "loss": 0.007, "step": 9884 }, { "epoch": 1.9131191950464397, "grad_norm": 0.10554692894220352, "learning_rate": 9.231330407747032e-05, "loss": 0.009, "step": 9885 }, { "epoch": 1.913312693498452, "grad_norm": 0.1038665771484375, "learning_rate": 9.231177029133048e-05, "loss": 0.0077, "step": 9886 }, { "epoch": 1.9135061919504643, "grad_norm": 0.09757977724075317, "learning_rate": 9.231023636647468e-05, "loss": 0.0085, "step": 9887 }, { "epoch": 1.9136996904024768, "grad_norm": 0.03965272381901741, "learning_rate": 9.230870230290859e-05, "loss": 0.0074, "step": 9888 }, { "epoch": 1.9138931888544892, "grad_norm": 0.11848237365484238, "learning_rate": 9.230716810063795e-05, "loss": 0.0077, "step": 9889 }, { "epoch": 1.9140866873065017, "grad_norm": 0.02978905662894249, "learning_rate": 9.230563375966845e-05, "loss": 0.0069, "step": 9890 }, { "epoch": 1.9142801857585139, "grad_norm": 0.09787901490926743, "learning_rate": 9.230409928000581e-05, "loss": 0.0078, "step": 9891 }, { "epoch": 1.9144736842105263, "grad_norm": 0.06679437309503555, "learning_rate": 9.230256466165572e-05, "loss": 0.0078, "step": 9892 }, { "epoch": 1.9146671826625385, "grad_norm": 0.07553509622812271, "learning_rate": 9.230102990462386e-05, "loss": 0.007, "step": 9893 }, { "epoch": 1.914860681114551, "grad_norm": 0.05085860565304756, "learning_rate": 9.229949500891597e-05, "loss": 0.0074, "step": 9894 }, { "epoch": 1.9150541795665634, "grad_norm": 0.053683869540691376, "learning_rate": 9.229795997453776e-05, "loss": 0.0057, "step": 9895 }, { "epoch": 1.9152476780185759, "grad_norm": 0.06838082522153854, "learning_rate": 9.229642480149492e-05, "loss": 0.0076, "step": 9896 }, { "epoch": 1.9154411764705883, "grad_norm": 0.09172613173723221, "learning_rate": 9.229488948979316e-05, "loss": 0.0075, "step": 9897 }, { "epoch": 1.9156346749226008, "grad_norm": 0.041509732604026794, "learning_rate": 9.22933540394382e-05, "loss": 0.0073, "step": 9898 }, { "epoch": 1.915828173374613, "grad_norm": 0.06798483431339264, "learning_rate": 9.229181845043574e-05, "loss": 0.0082, "step": 9899 }, { "epoch": 1.9160216718266254, "grad_norm": 0.04489586874842644, "learning_rate": 9.229028272279148e-05, "loss": 0.0061, "step": 9900 }, { "epoch": 1.9162151702786376, "grad_norm": 0.10614262521266937, "learning_rate": 9.228874685651112e-05, "loss": 0.0078, "step": 9901 }, { "epoch": 1.91640866873065, "grad_norm": 0.06331118196249008, "learning_rate": 9.228721085160041e-05, "loss": 0.0063, "step": 9902 }, { "epoch": 1.9166021671826625, "grad_norm": 0.09312495589256287, "learning_rate": 9.228567470806504e-05, "loss": 0.009, "step": 9903 }, { "epoch": 1.916795665634675, "grad_norm": 0.07579902559518814, "learning_rate": 9.228413842591071e-05, "loss": 0.0073, "step": 9904 }, { "epoch": 1.9169891640866874, "grad_norm": 0.09848780184984207, "learning_rate": 9.228260200514313e-05, "loss": 0.0078, "step": 9905 }, { "epoch": 1.9171826625386998, "grad_norm": 0.09515324234962463, "learning_rate": 9.228106544576805e-05, "loss": 0.0084, "step": 9906 }, { "epoch": 1.917376160990712, "grad_norm": 0.10424857586622238, "learning_rate": 9.227952874779113e-05, "loss": 0.0075, "step": 9907 }, { "epoch": 1.9175696594427245, "grad_norm": 0.09719565510749817, "learning_rate": 9.227799191121812e-05, "loss": 0.0073, "step": 9908 }, { "epoch": 1.9177631578947367, "grad_norm": 0.12241324782371521, "learning_rate": 9.227645493605474e-05, "loss": 0.0073, "step": 9909 }, { "epoch": 1.9179566563467492, "grad_norm": 0.07264117151498795, "learning_rate": 9.227491782230665e-05, "loss": 0.0086, "step": 9910 }, { "epoch": 1.9181501547987616, "grad_norm": 0.14682680368423462, "learning_rate": 9.227338056997961e-05, "loss": 0.0073, "step": 9911 }, { "epoch": 1.918343653250774, "grad_norm": 0.08221990615129471, "learning_rate": 9.227184317907933e-05, "loss": 0.0083, "step": 9912 }, { "epoch": 1.9185371517027865, "grad_norm": 0.14585626125335693, "learning_rate": 9.227030564961152e-05, "loss": 0.0069, "step": 9913 }, { "epoch": 1.9187306501547987, "grad_norm": 0.11537835001945496, "learning_rate": 9.22687679815819e-05, "loss": 0.0064, "step": 9914 }, { "epoch": 1.9189241486068112, "grad_norm": 0.08619330078363419, "learning_rate": 9.226723017499619e-05, "loss": 0.0083, "step": 9915 }, { "epoch": 1.9191176470588234, "grad_norm": 0.13286162912845612, "learning_rate": 9.226569222986009e-05, "loss": 0.0086, "step": 9916 }, { "epoch": 1.9193111455108358, "grad_norm": 0.0907253697514534, "learning_rate": 9.226415414617932e-05, "loss": 0.0078, "step": 9917 }, { "epoch": 1.9195046439628483, "grad_norm": 0.1788376122713089, "learning_rate": 9.226261592395963e-05, "loss": 0.0076, "step": 9918 }, { "epoch": 1.9196981424148607, "grad_norm": 0.09461984783411026, "learning_rate": 9.22610775632067e-05, "loss": 0.0094, "step": 9919 }, { "epoch": 1.9198916408668731, "grad_norm": 0.13960613310337067, "learning_rate": 9.225953906392628e-05, "loss": 0.0057, "step": 9920 }, { "epoch": 1.9200851393188856, "grad_norm": 0.09787406772375107, "learning_rate": 9.225800042612406e-05, "loss": 0.0085, "step": 9921 }, { "epoch": 1.9202786377708978, "grad_norm": 0.1107088178396225, "learning_rate": 9.225646164980578e-05, "loss": 0.0091, "step": 9922 }, { "epoch": 1.9204721362229102, "grad_norm": 0.15403249859809875, "learning_rate": 9.225492273497717e-05, "loss": 0.0077, "step": 9923 }, { "epoch": 1.9206656346749225, "grad_norm": 0.1427149474620819, "learning_rate": 9.225338368164391e-05, "loss": 0.0083, "step": 9924 }, { "epoch": 1.920859133126935, "grad_norm": 0.10602183640003204, "learning_rate": 9.225184448981176e-05, "loss": 0.0097, "step": 9925 }, { "epoch": 1.9210526315789473, "grad_norm": 0.09517742693424225, "learning_rate": 9.225030515948643e-05, "loss": 0.0076, "step": 9926 }, { "epoch": 1.9212461300309598, "grad_norm": 0.09381616860628128, "learning_rate": 9.224876569067365e-05, "loss": 0.0089, "step": 9927 }, { "epoch": 1.9214396284829722, "grad_norm": 0.08712776750326157, "learning_rate": 9.224722608337914e-05, "loss": 0.007, "step": 9928 }, { "epoch": 1.9216331269349847, "grad_norm": 0.08558247238397598, "learning_rate": 9.224568633760862e-05, "loss": 0.0076, "step": 9929 }, { "epoch": 1.921826625386997, "grad_norm": 0.07596058398485184, "learning_rate": 9.22441464533678e-05, "loss": 0.0074, "step": 9930 }, { "epoch": 1.9220201238390093, "grad_norm": 0.11795434355735779, "learning_rate": 9.224260643066244e-05, "loss": 0.0096, "step": 9931 }, { "epoch": 1.9222136222910216, "grad_norm": 0.05962929129600525, "learning_rate": 9.224106626949824e-05, "loss": 0.0071, "step": 9932 }, { "epoch": 1.922407120743034, "grad_norm": 0.12878671288490295, "learning_rate": 9.223952596988093e-05, "loss": 0.0078, "step": 9933 }, { "epoch": 1.9226006191950464, "grad_norm": 0.08049939572811127, "learning_rate": 9.223798553181624e-05, "loss": 0.0088, "step": 9934 }, { "epoch": 1.9227941176470589, "grad_norm": 0.09255818277597427, "learning_rate": 9.223644495530989e-05, "loss": 0.0088, "step": 9935 }, { "epoch": 1.9229876160990713, "grad_norm": 0.11063779890537262, "learning_rate": 9.223490424036763e-05, "loss": 0.0078, "step": 9936 }, { "epoch": 1.9231811145510835, "grad_norm": 0.09200789779424667, "learning_rate": 9.223336338699516e-05, "loss": 0.0088, "step": 9937 }, { "epoch": 1.923374613003096, "grad_norm": 0.09699562937021255, "learning_rate": 9.223182239519821e-05, "loss": 0.0086, "step": 9938 }, { "epoch": 1.9235681114551082, "grad_norm": 0.06912661343812943, "learning_rate": 9.223028126498253e-05, "loss": 0.0078, "step": 9939 }, { "epoch": 1.9237616099071206, "grad_norm": 0.08709552884101868, "learning_rate": 9.222873999635382e-05, "loss": 0.0082, "step": 9940 }, { "epoch": 1.923955108359133, "grad_norm": 0.0629769042134285, "learning_rate": 9.222719858931785e-05, "loss": 0.0073, "step": 9941 }, { "epoch": 1.9241486068111455, "grad_norm": 0.08968161046504974, "learning_rate": 9.222565704388033e-05, "loss": 0.0082, "step": 9942 }, { "epoch": 1.924342105263158, "grad_norm": 0.07192634791135788, "learning_rate": 9.222411536004698e-05, "loss": 0.0087, "step": 9943 }, { "epoch": 1.9245356037151704, "grad_norm": 0.10538365691900253, "learning_rate": 9.222257353782354e-05, "loss": 0.0094, "step": 9944 }, { "epoch": 1.9247291021671826, "grad_norm": 0.10181902348995209, "learning_rate": 9.222103157721575e-05, "loss": 0.0077, "step": 9945 }, { "epoch": 1.924922600619195, "grad_norm": 0.09315769374370575, "learning_rate": 9.221948947822933e-05, "loss": 0.0075, "step": 9946 }, { "epoch": 1.9251160990712073, "grad_norm": 0.10843908041715622, "learning_rate": 9.221794724087001e-05, "loss": 0.0071, "step": 9947 }, { "epoch": 1.9253095975232197, "grad_norm": 0.07095062732696533, "learning_rate": 9.221640486514355e-05, "loss": 0.0075, "step": 9948 }, { "epoch": 1.9255030959752322, "grad_norm": 0.13651332259178162, "learning_rate": 9.221486235105566e-05, "loss": 0.0087, "step": 9949 }, { "epoch": 1.9256965944272446, "grad_norm": 0.0324568971991539, "learning_rate": 9.221331969861209e-05, "loss": 0.0088, "step": 9950 }, { "epoch": 1.925890092879257, "grad_norm": 0.1337594836950302, "learning_rate": 9.221177690781856e-05, "loss": 0.0068, "step": 9951 }, { "epoch": 1.9260835913312695, "grad_norm": 0.07292404025793076, "learning_rate": 9.22102339786808e-05, "loss": 0.0086, "step": 9952 }, { "epoch": 1.9262770897832817, "grad_norm": 0.12748457491397858, "learning_rate": 9.220869091120459e-05, "loss": 0.0083, "step": 9953 }, { "epoch": 1.9264705882352942, "grad_norm": 0.07569501549005508, "learning_rate": 9.22071477053956e-05, "loss": 0.009, "step": 9954 }, { "epoch": 1.9266640866873064, "grad_norm": 0.08034566044807434, "learning_rate": 9.220560436125963e-05, "loss": 0.0076, "step": 9955 }, { "epoch": 1.9268575851393188, "grad_norm": 0.09357867389917374, "learning_rate": 9.220406087880238e-05, "loss": 0.0065, "step": 9956 }, { "epoch": 1.9270510835913313, "grad_norm": 0.08342967182397842, "learning_rate": 9.22025172580296e-05, "loss": 0.0079, "step": 9957 }, { "epoch": 1.9272445820433437, "grad_norm": 0.04095721244812012, "learning_rate": 9.220097349894701e-05, "loss": 0.0067, "step": 9958 }, { "epoch": 1.9274380804953561, "grad_norm": 0.08890151232481003, "learning_rate": 9.219942960156041e-05, "loss": 0.007, "step": 9959 }, { "epoch": 1.9276315789473686, "grad_norm": 0.07253457605838776, "learning_rate": 9.219788556587546e-05, "loss": 0.0078, "step": 9960 }, { "epoch": 1.9278250773993808, "grad_norm": 0.07957683503627777, "learning_rate": 9.219634139189794e-05, "loss": 0.0074, "step": 9961 }, { "epoch": 1.928018575851393, "grad_norm": 0.06866299360990524, "learning_rate": 9.21947970796336e-05, "loss": 0.0081, "step": 9962 }, { "epoch": 1.9282120743034055, "grad_norm": 0.08439157903194427, "learning_rate": 9.219325262908817e-05, "loss": 0.0079, "step": 9963 }, { "epoch": 1.928405572755418, "grad_norm": 0.0868620052933693, "learning_rate": 9.219170804026738e-05, "loss": 0.0076, "step": 9964 }, { "epoch": 1.9285990712074303, "grad_norm": 0.06352540850639343, "learning_rate": 9.2190163313177e-05, "loss": 0.008, "step": 9965 }, { "epoch": 1.9287925696594428, "grad_norm": 0.06048274412751198, "learning_rate": 9.218861844782274e-05, "loss": 0.0087, "step": 9966 }, { "epoch": 1.9289860681114552, "grad_norm": 0.06563610583543777, "learning_rate": 9.218707344421037e-05, "loss": 0.0068, "step": 9967 }, { "epoch": 1.9291795665634675, "grad_norm": 0.048905786126852036, "learning_rate": 9.218552830234562e-05, "loss": 0.0068, "step": 9968 }, { "epoch": 1.92937306501548, "grad_norm": 0.05778807774186134, "learning_rate": 9.218398302223425e-05, "loss": 0.0073, "step": 9969 }, { "epoch": 1.9295665634674921, "grad_norm": 0.0665178969502449, "learning_rate": 9.218243760388198e-05, "loss": 0.0075, "step": 9970 }, { "epoch": 1.9297600619195046, "grad_norm": 0.03950268030166626, "learning_rate": 9.218089204729458e-05, "loss": 0.0073, "step": 9971 }, { "epoch": 1.929953560371517, "grad_norm": 0.0888739749789238, "learning_rate": 9.217934635247777e-05, "loss": 0.0076, "step": 9972 }, { "epoch": 1.9301470588235294, "grad_norm": 0.039364464581012726, "learning_rate": 9.217780051943731e-05, "loss": 0.0082, "step": 9973 }, { "epoch": 1.9303405572755419, "grad_norm": 0.05009640008211136, "learning_rate": 9.217625454817897e-05, "loss": 0.0073, "step": 9974 }, { "epoch": 1.9305340557275543, "grad_norm": 0.07360121607780457, "learning_rate": 9.217470843870847e-05, "loss": 0.0088, "step": 9975 }, { "epoch": 1.9307275541795665, "grad_norm": 0.05211585760116577, "learning_rate": 9.217316219103155e-05, "loss": 0.0092, "step": 9976 }, { "epoch": 1.930921052631579, "grad_norm": 0.04546559602022171, "learning_rate": 9.217161580515399e-05, "loss": 0.0071, "step": 9977 }, { "epoch": 1.9311145510835912, "grad_norm": 0.06196216121315956, "learning_rate": 9.21700692810815e-05, "loss": 0.0077, "step": 9978 }, { "epoch": 1.9313080495356036, "grad_norm": 0.06968385726213455, "learning_rate": 9.216852261881987e-05, "loss": 0.0072, "step": 9979 }, { "epoch": 1.931501547987616, "grad_norm": 0.043887630105018616, "learning_rate": 9.216697581837482e-05, "loss": 0.0083, "step": 9980 }, { "epoch": 1.9316950464396285, "grad_norm": 0.05641523003578186, "learning_rate": 9.216542887975213e-05, "loss": 0.0079, "step": 9981 }, { "epoch": 1.931888544891641, "grad_norm": 0.060114629566669464, "learning_rate": 9.216388180295752e-05, "loss": 0.0075, "step": 9982 }, { "epoch": 1.9320820433436534, "grad_norm": 0.04660806432366371, "learning_rate": 9.216233458799674e-05, "loss": 0.0082, "step": 9983 }, { "epoch": 1.9322755417956656, "grad_norm": 0.06767958402633667, "learning_rate": 9.216078723487558e-05, "loss": 0.0075, "step": 9984 }, { "epoch": 1.932469040247678, "grad_norm": 0.04558919742703438, "learning_rate": 9.215923974359977e-05, "loss": 0.0098, "step": 9985 }, { "epoch": 1.9326625386996903, "grad_norm": 0.08249672502279282, "learning_rate": 9.215769211417505e-05, "loss": 0.0079, "step": 9986 }, { "epoch": 1.9328560371517027, "grad_norm": 0.049458932131528854, "learning_rate": 9.21561443466072e-05, "loss": 0.0087, "step": 9987 }, { "epoch": 1.9330495356037152, "grad_norm": 0.06487122178077698, "learning_rate": 9.215459644090194e-05, "loss": 0.0081, "step": 9988 }, { "epoch": 1.9332430340557276, "grad_norm": 0.08475343137979507, "learning_rate": 9.215304839706505e-05, "loss": 0.0081, "step": 9989 }, { "epoch": 1.93343653250774, "grad_norm": 0.07274527102708817, "learning_rate": 9.215150021510231e-05, "loss": 0.0086, "step": 9990 }, { "epoch": 1.9336300309597523, "grad_norm": 0.08137670904397964, "learning_rate": 9.214995189501942e-05, "loss": 0.0079, "step": 9991 }, { "epoch": 1.9338235294117647, "grad_norm": 0.05212290585041046, "learning_rate": 9.214840343682216e-05, "loss": 0.0068, "step": 9992 }, { "epoch": 1.934017027863777, "grad_norm": 0.0855373740196228, "learning_rate": 9.214685484051631e-05, "loss": 0.0085, "step": 9993 }, { "epoch": 1.9342105263157894, "grad_norm": 0.06754053384065628, "learning_rate": 9.214530610610759e-05, "loss": 0.007, "step": 9994 }, { "epoch": 1.9344040247678018, "grad_norm": 0.05112232640385628, "learning_rate": 9.214375723360177e-05, "loss": 0.008, "step": 9995 }, { "epoch": 1.9345975232198143, "grad_norm": 0.059741485863924026, "learning_rate": 9.214220822300462e-05, "loss": 0.0065, "step": 9996 }, { "epoch": 1.9347910216718267, "grad_norm": 0.04794887453317642, "learning_rate": 9.21406590743219e-05, "loss": 0.0085, "step": 9997 }, { "epoch": 1.9349845201238391, "grad_norm": 0.04471070319414139, "learning_rate": 9.213910978755935e-05, "loss": 0.0088, "step": 9998 }, { "epoch": 1.9351780185758514, "grad_norm": 0.04635067656636238, "learning_rate": 9.213756036272275e-05, "loss": 0.0071, "step": 9999 }, { "epoch": 1.9353715170278638, "grad_norm": 0.06716295331716537, "learning_rate": 9.213601079981784e-05, "loss": 0.0073, "step": 10000 }, { "epoch": 1.935565015479876, "grad_norm": 0.04530651494860649, "learning_rate": 9.213446109885041e-05, "loss": 0.0071, "step": 10001 }, { "epoch": 1.9357585139318885, "grad_norm": 0.07223577797412872, "learning_rate": 9.213291125982619e-05, "loss": 0.0074, "step": 10002 }, { "epoch": 1.935952012383901, "grad_norm": 0.04413353279232979, "learning_rate": 9.213136128275098e-05, "loss": 0.0074, "step": 10003 }, { "epoch": 1.9361455108359134, "grad_norm": 0.0545554980635643, "learning_rate": 9.212981116763049e-05, "loss": 0.007, "step": 10004 }, { "epoch": 1.9363390092879258, "grad_norm": 0.12388378381729126, "learning_rate": 9.212826091447051e-05, "loss": 0.0072, "step": 10005 }, { "epoch": 1.9365325077399382, "grad_norm": 0.043448954820632935, "learning_rate": 9.212671052327682e-05, "loss": 0.008, "step": 10006 }, { "epoch": 1.9367260061919505, "grad_norm": 0.10250350087881088, "learning_rate": 9.212515999405516e-05, "loss": 0.0075, "step": 10007 }, { "epoch": 1.936919504643963, "grad_norm": 0.03222307190299034, "learning_rate": 9.212360932681131e-05, "loss": 0.0076, "step": 10008 }, { "epoch": 1.9371130030959751, "grad_norm": 0.09974540024995804, "learning_rate": 9.212205852155102e-05, "loss": 0.0067, "step": 10009 }, { "epoch": 1.9373065015479876, "grad_norm": 0.06809209287166595, "learning_rate": 9.212050757828007e-05, "loss": 0.0075, "step": 10010 }, { "epoch": 1.9375, "grad_norm": 0.1271866112947464, "learning_rate": 9.21189564970042e-05, "loss": 0.0075, "step": 10011 }, { "epoch": 1.9376934984520124, "grad_norm": 0.0713377520442009, "learning_rate": 9.211740527772922e-05, "loss": 0.0084, "step": 10012 }, { "epoch": 1.9378869969040249, "grad_norm": 0.10932993143796921, "learning_rate": 9.211585392046087e-05, "loss": 0.0092, "step": 10013 }, { "epoch": 1.938080495356037, "grad_norm": 0.0793139785528183, "learning_rate": 9.211430242520492e-05, "loss": 0.007, "step": 10014 }, { "epoch": 1.9382739938080495, "grad_norm": 0.09859669953584671, "learning_rate": 9.211275079196713e-05, "loss": 0.0082, "step": 10015 }, { "epoch": 1.9384674922600618, "grad_norm": 0.07841035723686218, "learning_rate": 9.211119902075329e-05, "loss": 0.0077, "step": 10016 }, { "epoch": 1.9386609907120742, "grad_norm": 0.06053837016224861, "learning_rate": 9.210964711156915e-05, "loss": 0.0072, "step": 10017 }, { "epoch": 1.9388544891640866, "grad_norm": 0.09319194406270981, "learning_rate": 9.210809506442048e-05, "loss": 0.007, "step": 10018 }, { "epoch": 1.939047987616099, "grad_norm": 0.03796505182981491, "learning_rate": 9.210654287931305e-05, "loss": 0.0066, "step": 10019 }, { "epoch": 1.9392414860681115, "grad_norm": 0.09429039061069489, "learning_rate": 9.210499055625266e-05, "loss": 0.0076, "step": 10020 }, { "epoch": 1.939434984520124, "grad_norm": 0.04106242209672928, "learning_rate": 9.210343809524504e-05, "loss": 0.0086, "step": 10021 }, { "epoch": 1.9396284829721362, "grad_norm": 0.0789610743522644, "learning_rate": 9.210188549629598e-05, "loss": 0.0084, "step": 10022 }, { "epoch": 1.9398219814241486, "grad_norm": 0.046788524836301804, "learning_rate": 9.210033275941127e-05, "loss": 0.0096, "step": 10023 }, { "epoch": 1.9400154798761609, "grad_norm": 0.08063998073339462, "learning_rate": 9.209877988459664e-05, "loss": 0.0068, "step": 10024 }, { "epoch": 1.9402089783281733, "grad_norm": 0.0662464052438736, "learning_rate": 9.209722687185791e-05, "loss": 0.0098, "step": 10025 }, { "epoch": 1.9404024767801857, "grad_norm": 0.09652754664421082, "learning_rate": 9.209567372120082e-05, "loss": 0.0074, "step": 10026 }, { "epoch": 1.9405959752321982, "grad_norm": 0.05748510733246803, "learning_rate": 9.209412043263116e-05, "loss": 0.0054, "step": 10027 }, { "epoch": 1.9407894736842106, "grad_norm": 0.05075681209564209, "learning_rate": 9.20925670061547e-05, "loss": 0.0071, "step": 10028 }, { "epoch": 1.940982972136223, "grad_norm": 0.09015302360057831, "learning_rate": 9.20910134417772e-05, "loss": 0.0073, "step": 10029 }, { "epoch": 1.9411764705882353, "grad_norm": 0.056322574615478516, "learning_rate": 9.208945973950447e-05, "loss": 0.0075, "step": 10030 }, { "epoch": 1.9413699690402477, "grad_norm": 0.058701835572719574, "learning_rate": 9.208790589934226e-05, "loss": 0.0088, "step": 10031 }, { "epoch": 1.94156346749226, "grad_norm": 0.14865462481975555, "learning_rate": 9.208635192129637e-05, "loss": 0.0072, "step": 10032 }, { "epoch": 1.9417569659442724, "grad_norm": 0.06462650001049042, "learning_rate": 9.208479780537254e-05, "loss": 0.0087, "step": 10033 }, { "epoch": 1.9419504643962848, "grad_norm": 0.15975557267665863, "learning_rate": 9.208324355157658e-05, "loss": 0.0089, "step": 10034 }, { "epoch": 1.9421439628482973, "grad_norm": 0.032398149371147156, "learning_rate": 9.208168915991426e-05, "loss": 0.0072, "step": 10035 }, { "epoch": 1.9423374613003097, "grad_norm": 0.14492228627204895, "learning_rate": 9.208013463039136e-05, "loss": 0.0075, "step": 10036 }, { "epoch": 1.942530959752322, "grad_norm": 0.0730268806219101, "learning_rate": 9.207857996301363e-05, "loss": 0.0079, "step": 10037 }, { "epoch": 1.9427244582043344, "grad_norm": 0.10455305874347687, "learning_rate": 9.20770251577869e-05, "loss": 0.0084, "step": 10038 }, { "epoch": 1.9429179566563466, "grad_norm": 0.15526212751865387, "learning_rate": 9.207547021471692e-05, "loss": 0.0065, "step": 10039 }, { "epoch": 1.943111455108359, "grad_norm": 0.08445532619953156, "learning_rate": 9.207391513380948e-05, "loss": 0.0068, "step": 10040 }, { "epoch": 1.9433049535603715, "grad_norm": 0.1810372918844223, "learning_rate": 9.207235991507036e-05, "loss": 0.0073, "step": 10041 }, { "epoch": 1.943498452012384, "grad_norm": 0.09156931191682816, "learning_rate": 9.207080455850534e-05, "loss": 0.0075, "step": 10042 }, { "epoch": 1.9436919504643964, "grad_norm": 0.15765728056430817, "learning_rate": 9.20692490641202e-05, "loss": 0.0074, "step": 10043 }, { "epoch": 1.9438854489164088, "grad_norm": 0.1393376737833023, "learning_rate": 9.206769343192073e-05, "loss": 0.0064, "step": 10044 }, { "epoch": 1.944078947368421, "grad_norm": 0.08827491104602814, "learning_rate": 9.206613766191271e-05, "loss": 0.0068, "step": 10045 }, { "epoch": 1.9442724458204335, "grad_norm": 0.2247689962387085, "learning_rate": 9.206458175410191e-05, "loss": 0.0092, "step": 10046 }, { "epoch": 1.9444659442724457, "grad_norm": 0.06447858363389969, "learning_rate": 9.206302570849415e-05, "loss": 0.0071, "step": 10047 }, { "epoch": 1.9446594427244581, "grad_norm": 0.20762626826763153, "learning_rate": 9.206146952509518e-05, "loss": 0.007, "step": 10048 }, { "epoch": 1.9448529411764706, "grad_norm": 0.08164145052433014, "learning_rate": 9.20599132039108e-05, "loss": 0.0068, "step": 10049 }, { "epoch": 1.945046439628483, "grad_norm": 0.1430378407239914, "learning_rate": 9.20583567449468e-05, "loss": 0.0084, "step": 10050 }, { "epoch": 1.9452399380804954, "grad_norm": 0.14644870162010193, "learning_rate": 9.205680014820895e-05, "loss": 0.0073, "step": 10051 }, { "epoch": 1.9454334365325079, "grad_norm": 0.06772297620773315, "learning_rate": 9.205524341370305e-05, "loss": 0.0076, "step": 10052 }, { "epoch": 1.94562693498452, "grad_norm": 0.12907519936561584, "learning_rate": 9.20536865414349e-05, "loss": 0.0087, "step": 10053 }, { "epoch": 1.9458204334365325, "grad_norm": 0.045413777232170105, "learning_rate": 9.205212953141026e-05, "loss": 0.0072, "step": 10054 }, { "epoch": 1.9460139318885448, "grad_norm": 0.09817727655172348, "learning_rate": 9.205057238363493e-05, "loss": 0.0082, "step": 10055 }, { "epoch": 1.9462074303405572, "grad_norm": 0.055533360689878464, "learning_rate": 9.20490150981147e-05, "loss": 0.0097, "step": 10056 }, { "epoch": 1.9464009287925697, "grad_norm": 0.058981962502002716, "learning_rate": 9.204745767485538e-05, "loss": 0.0078, "step": 10057 }, { "epoch": 1.946594427244582, "grad_norm": 0.07047212868928909, "learning_rate": 9.204590011386274e-05, "loss": 0.009, "step": 10058 }, { "epoch": 1.9467879256965945, "grad_norm": 0.07110288739204407, "learning_rate": 9.204434241514253e-05, "loss": 0.009, "step": 10059 }, { "epoch": 1.946981424148607, "grad_norm": 0.04722655192017555, "learning_rate": 9.204278457870063e-05, "loss": 0.0084, "step": 10060 }, { "epoch": 1.9471749226006192, "grad_norm": 0.07578182965517044, "learning_rate": 9.204122660454277e-05, "loss": 0.0078, "step": 10061 }, { "epoch": 1.9473684210526314, "grad_norm": 0.062148116528987885, "learning_rate": 9.203966849267475e-05, "loss": 0.0095, "step": 10062 }, { "epoch": 1.9475619195046439, "grad_norm": 0.08377688378095627, "learning_rate": 9.203811024310238e-05, "loss": 0.0073, "step": 10063 }, { "epoch": 1.9477554179566563, "grad_norm": 0.04696408286690712, "learning_rate": 9.203655185583144e-05, "loss": 0.0076, "step": 10064 }, { "epoch": 1.9479489164086687, "grad_norm": 0.08249539881944656, "learning_rate": 9.203499333086771e-05, "loss": 0.008, "step": 10065 }, { "epoch": 1.9481424148606812, "grad_norm": 0.030821431428194046, "learning_rate": 9.203343466821703e-05, "loss": 0.0082, "step": 10066 }, { "epoch": 1.9483359133126936, "grad_norm": 0.06014779210090637, "learning_rate": 9.203187586788514e-05, "loss": 0.0087, "step": 10067 }, { "epoch": 1.9485294117647058, "grad_norm": 0.04245266690850258, "learning_rate": 9.203031692987787e-05, "loss": 0.0068, "step": 10068 }, { "epoch": 1.9487229102167183, "grad_norm": 0.10714104771614075, "learning_rate": 9.2028757854201e-05, "loss": 0.0064, "step": 10069 }, { "epoch": 1.9489164086687305, "grad_norm": 0.048302147537469864, "learning_rate": 9.202719864086034e-05, "loss": 0.0068, "step": 10070 }, { "epoch": 1.949109907120743, "grad_norm": 0.11454737186431885, "learning_rate": 9.202563928986169e-05, "loss": 0.0073, "step": 10071 }, { "epoch": 1.9493034055727554, "grad_norm": 0.04074455425143242, "learning_rate": 9.20240798012108e-05, "loss": 0.0065, "step": 10072 }, { "epoch": 1.9494969040247678, "grad_norm": 0.12738841772079468, "learning_rate": 9.202252017491354e-05, "loss": 0.008, "step": 10073 }, { "epoch": 1.9496904024767803, "grad_norm": 0.05961056426167488, "learning_rate": 9.202096041097566e-05, "loss": 0.0077, "step": 10074 }, { "epoch": 1.9498839009287927, "grad_norm": 0.11870616674423218, "learning_rate": 9.201940050940296e-05, "loss": 0.0066, "step": 10075 }, { "epoch": 1.950077399380805, "grad_norm": 0.07320092618465424, "learning_rate": 9.201784047020127e-05, "loss": 0.0089, "step": 10076 }, { "epoch": 1.9502708978328174, "grad_norm": 0.09288652241230011, "learning_rate": 9.201628029337637e-05, "loss": 0.007, "step": 10077 }, { "epoch": 1.9504643962848296, "grad_norm": 0.10664766281843185, "learning_rate": 9.201471997893405e-05, "loss": 0.0068, "step": 10078 }, { "epoch": 1.950657894736842, "grad_norm": 0.06832355260848999, "learning_rate": 9.201315952688012e-05, "loss": 0.0089, "step": 10079 }, { "epoch": 1.9508513931888545, "grad_norm": 0.1448291540145874, "learning_rate": 9.201159893722039e-05, "loss": 0.0077, "step": 10080 }, { "epoch": 1.951044891640867, "grad_norm": 0.09179273247718811, "learning_rate": 9.201003820996066e-05, "loss": 0.0066, "step": 10081 }, { "epoch": 1.9512383900928794, "grad_norm": 0.10384179651737213, "learning_rate": 9.200847734510671e-05, "loss": 0.0069, "step": 10082 }, { "epoch": 1.9514318885448918, "grad_norm": 0.06160491332411766, "learning_rate": 9.200691634266439e-05, "loss": 0.0076, "step": 10083 }, { "epoch": 1.951625386996904, "grad_norm": 0.10542155802249908, "learning_rate": 9.200535520263945e-05, "loss": 0.0096, "step": 10084 }, { "epoch": 1.9518188854489165, "grad_norm": 0.04854162037372589, "learning_rate": 9.200379392503771e-05, "loss": 0.0077, "step": 10085 }, { "epoch": 1.9520123839009287, "grad_norm": 0.1022769957780838, "learning_rate": 9.2002232509865e-05, "loss": 0.0077, "step": 10086 }, { "epoch": 1.9522058823529411, "grad_norm": 0.08552778512239456, "learning_rate": 9.20006709571271e-05, "loss": 0.0067, "step": 10087 }, { "epoch": 1.9523993808049536, "grad_norm": 0.0852905735373497, "learning_rate": 9.199910926682983e-05, "loss": 0.0071, "step": 10088 }, { "epoch": 1.952592879256966, "grad_norm": 0.0795748382806778, "learning_rate": 9.199754743897898e-05, "loss": 0.0067, "step": 10089 }, { "epoch": 1.9527863777089784, "grad_norm": 0.08611658215522766, "learning_rate": 9.199598547358037e-05, "loss": 0.0077, "step": 10090 }, { "epoch": 1.9529798761609907, "grad_norm": 0.07616817951202393, "learning_rate": 9.19944233706398e-05, "loss": 0.006, "step": 10091 }, { "epoch": 1.953173374613003, "grad_norm": 0.08054152131080627, "learning_rate": 9.199286113016308e-05, "loss": 0.008, "step": 10092 }, { "epoch": 1.9533668730650153, "grad_norm": 0.11245157569646835, "learning_rate": 9.199129875215602e-05, "loss": 0.0067, "step": 10093 }, { "epoch": 1.9535603715170278, "grad_norm": 0.06354371458292007, "learning_rate": 9.198973623662442e-05, "loss": 0.008, "step": 10094 }, { "epoch": 1.9537538699690402, "grad_norm": 0.13887035846710205, "learning_rate": 9.19881735835741e-05, "loss": 0.0054, "step": 10095 }, { "epoch": 1.9539473684210527, "grad_norm": 0.03472846373915672, "learning_rate": 9.198661079301086e-05, "loss": 0.0074, "step": 10096 }, { "epoch": 1.954140866873065, "grad_norm": 0.13268913328647614, "learning_rate": 9.198504786494052e-05, "loss": 0.0076, "step": 10097 }, { "epoch": 1.9543343653250775, "grad_norm": 0.07636591792106628, "learning_rate": 9.198348479936888e-05, "loss": 0.0071, "step": 10098 }, { "epoch": 1.9545278637770898, "grad_norm": 0.12315680831670761, "learning_rate": 9.198192159630175e-05, "loss": 0.0083, "step": 10099 }, { "epoch": 1.9547213622291022, "grad_norm": 0.09387510269880295, "learning_rate": 9.198035825574497e-05, "loss": 0.0067, "step": 10100 }, { "epoch": 1.9549148606811144, "grad_norm": 0.06020501255989075, "learning_rate": 9.197879477770431e-05, "loss": 0.0086, "step": 10101 }, { "epoch": 1.9551083591331269, "grad_norm": 0.11896707862615585, "learning_rate": 9.197723116218562e-05, "loss": 0.008, "step": 10102 }, { "epoch": 1.9553018575851393, "grad_norm": 0.06652318686246872, "learning_rate": 9.197566740919468e-05, "loss": 0.0084, "step": 10103 }, { "epoch": 1.9554953560371517, "grad_norm": 0.14250244200229645, "learning_rate": 9.197410351873732e-05, "loss": 0.0082, "step": 10104 }, { "epoch": 1.9556888544891642, "grad_norm": 0.08556164801120758, "learning_rate": 9.197253949081936e-05, "loss": 0.0075, "step": 10105 }, { "epoch": 1.9558823529411766, "grad_norm": 0.09638914465904236, "learning_rate": 9.197097532544662e-05, "loss": 0.0062, "step": 10106 }, { "epoch": 1.9560758513931888, "grad_norm": 0.13225789368152618, "learning_rate": 9.19694110226249e-05, "loss": 0.0076, "step": 10107 }, { "epoch": 1.9562693498452013, "grad_norm": 0.06082480400800705, "learning_rate": 9.196784658236001e-05, "loss": 0.0072, "step": 10108 }, { "epoch": 1.9564628482972135, "grad_norm": 0.14050056040287018, "learning_rate": 9.196628200465778e-05, "loss": 0.0083, "step": 10109 }, { "epoch": 1.956656346749226, "grad_norm": 0.02901366911828518, "learning_rate": 9.196471728952402e-05, "loss": 0.0063, "step": 10110 }, { "epoch": 1.9568498452012384, "grad_norm": 0.10533750057220459, "learning_rate": 9.196315243696454e-05, "loss": 0.008, "step": 10111 }, { "epoch": 1.9570433436532508, "grad_norm": 0.042378347367048264, "learning_rate": 9.196158744698518e-05, "loss": 0.0065, "step": 10112 }, { "epoch": 1.9572368421052633, "grad_norm": 0.06345615535974503, "learning_rate": 9.196002231959174e-05, "loss": 0.0073, "step": 10113 }, { "epoch": 1.9574303405572755, "grad_norm": 0.09284552931785583, "learning_rate": 9.195845705479004e-05, "loss": 0.0085, "step": 10114 }, { "epoch": 1.957623839009288, "grad_norm": 0.04590907320380211, "learning_rate": 9.195689165258591e-05, "loss": 0.0079, "step": 10115 }, { "epoch": 1.9578173374613002, "grad_norm": 0.08421938121318817, "learning_rate": 9.195532611298516e-05, "loss": 0.0082, "step": 10116 }, { "epoch": 1.9580108359133126, "grad_norm": 0.048184167593717575, "learning_rate": 9.195376043599361e-05, "loss": 0.0065, "step": 10117 }, { "epoch": 1.958204334365325, "grad_norm": 0.07091128826141357, "learning_rate": 9.195219462161711e-05, "loss": 0.0083, "step": 10118 }, { "epoch": 1.9583978328173375, "grad_norm": 0.08170383423566818, "learning_rate": 9.195062866986142e-05, "loss": 0.008, "step": 10119 }, { "epoch": 1.95859133126935, "grad_norm": 0.06243753060698509, "learning_rate": 9.194906258073241e-05, "loss": 0.0069, "step": 10120 }, { "epoch": 1.9587848297213624, "grad_norm": 0.12147018313407898, "learning_rate": 9.194749635423589e-05, "loss": 0.0078, "step": 10121 }, { "epoch": 1.9589783281733746, "grad_norm": 0.12257128208875656, "learning_rate": 9.19459299903777e-05, "loss": 0.0068, "step": 10122 }, { "epoch": 1.959171826625387, "grad_norm": 0.07585900276899338, "learning_rate": 9.194436348916363e-05, "loss": 0.0078, "step": 10123 }, { "epoch": 1.9593653250773992, "grad_norm": 0.08894070237874985, "learning_rate": 9.194279685059952e-05, "loss": 0.007, "step": 10124 }, { "epoch": 1.9595588235294117, "grad_norm": 0.08396906405687332, "learning_rate": 9.19412300746912e-05, "loss": 0.0078, "step": 10125 }, { "epoch": 1.9597523219814241, "grad_norm": 0.08904898911714554, "learning_rate": 9.193966316144448e-05, "loss": 0.0068, "step": 10126 }, { "epoch": 1.9599458204334366, "grad_norm": 0.07792290300130844, "learning_rate": 9.19380961108652e-05, "loss": 0.0069, "step": 10127 }, { "epoch": 1.960139318885449, "grad_norm": 0.0435824953019619, "learning_rate": 9.193652892295919e-05, "loss": 0.0063, "step": 10128 }, { "epoch": 1.9603328173374615, "grad_norm": 0.09581442922353745, "learning_rate": 9.193496159773224e-05, "loss": 0.008, "step": 10129 }, { "epoch": 1.9605263157894737, "grad_norm": 0.036516327410936356, "learning_rate": 9.193339413519023e-05, "loss": 0.0093, "step": 10130 }, { "epoch": 1.9607198142414861, "grad_norm": 0.10601886361837387, "learning_rate": 9.193182653533895e-05, "loss": 0.0084, "step": 10131 }, { "epoch": 1.9609133126934983, "grad_norm": 0.05455074459314346, "learning_rate": 9.193025879818423e-05, "loss": 0.0064, "step": 10132 }, { "epoch": 1.9611068111455108, "grad_norm": 0.07251527160406113, "learning_rate": 9.192869092373193e-05, "loss": 0.0083, "step": 10133 }, { "epoch": 1.9613003095975232, "grad_norm": 0.08498363196849823, "learning_rate": 9.192712291198784e-05, "loss": 0.0086, "step": 10134 }, { "epoch": 1.9614938080495357, "grad_norm": 0.09274856746196747, "learning_rate": 9.19255547629578e-05, "loss": 0.0069, "step": 10135 }, { "epoch": 1.961687306501548, "grad_norm": 0.0421680323779583, "learning_rate": 9.192398647664768e-05, "loss": 0.0076, "step": 10136 }, { "epoch": 1.9618808049535603, "grad_norm": 0.09815926849842072, "learning_rate": 9.192241805306326e-05, "loss": 0.0084, "step": 10137 }, { "epoch": 1.9620743034055728, "grad_norm": 0.0696914941072464, "learning_rate": 9.192084949221035e-05, "loss": 0.0085, "step": 10138 }, { "epoch": 1.962267801857585, "grad_norm": 0.11233522742986679, "learning_rate": 9.191928079409486e-05, "loss": 0.0105, "step": 10139 }, { "epoch": 1.9624613003095974, "grad_norm": 0.08319781720638275, "learning_rate": 9.191771195872258e-05, "loss": 0.0063, "step": 10140 }, { "epoch": 1.9626547987616099, "grad_norm": 0.10160823911428452, "learning_rate": 9.191614298609933e-05, "loss": 0.0084, "step": 10141 }, { "epoch": 1.9628482972136223, "grad_norm": 0.0545211024582386, "learning_rate": 9.191457387623097e-05, "loss": 0.0078, "step": 10142 }, { "epoch": 1.9630417956656347, "grad_norm": 0.1164892390370369, "learning_rate": 9.191300462912331e-05, "loss": 0.0087, "step": 10143 }, { "epoch": 1.9632352941176472, "grad_norm": 0.05049901828169823, "learning_rate": 9.19114352447822e-05, "loss": 0.0073, "step": 10144 }, { "epoch": 1.9634287925696594, "grad_norm": 0.1340409517288208, "learning_rate": 9.190986572321346e-05, "loss": 0.0084, "step": 10145 }, { "epoch": 1.9636222910216719, "grad_norm": 0.07992853224277496, "learning_rate": 9.190829606442294e-05, "loss": 0.0089, "step": 10146 }, { "epoch": 1.963815789473684, "grad_norm": 0.11948247998952866, "learning_rate": 9.190672626841647e-05, "loss": 0.0077, "step": 10147 }, { "epoch": 1.9640092879256965, "grad_norm": 0.10844144970178604, "learning_rate": 9.190515633519987e-05, "loss": 0.0078, "step": 10148 }, { "epoch": 1.964202786377709, "grad_norm": 0.08837677538394928, "learning_rate": 9.190358626477902e-05, "loss": 0.0069, "step": 10149 }, { "epoch": 1.9643962848297214, "grad_norm": 0.13104137778282166, "learning_rate": 9.190201605715971e-05, "loss": 0.0074, "step": 10150 }, { "epoch": 1.9645897832817338, "grad_norm": 0.0524248331785202, "learning_rate": 9.19004457123478e-05, "loss": 0.0091, "step": 10151 }, { "epoch": 1.9647832817337463, "grad_norm": 0.13006553053855896, "learning_rate": 9.189887523034914e-05, "loss": 0.0081, "step": 10152 }, { "epoch": 1.9649767801857585, "grad_norm": 0.07122434675693512, "learning_rate": 9.189730461116954e-05, "loss": 0.0072, "step": 10153 }, { "epoch": 1.965170278637771, "grad_norm": 0.08989112824201584, "learning_rate": 9.189573385481486e-05, "loss": 0.0064, "step": 10154 }, { "epoch": 1.9653637770897832, "grad_norm": 0.07818002253770828, "learning_rate": 9.189416296129093e-05, "loss": 0.0078, "step": 10155 }, { "epoch": 1.9655572755417956, "grad_norm": 0.08029981702566147, "learning_rate": 9.18925919306036e-05, "loss": 0.0081, "step": 10156 }, { "epoch": 1.965750773993808, "grad_norm": 0.09921672195196152, "learning_rate": 9.189102076275869e-05, "loss": 0.0074, "step": 10157 }, { "epoch": 1.9659442724458205, "grad_norm": 0.09135811030864716, "learning_rate": 9.188944945776205e-05, "loss": 0.0073, "step": 10158 }, { "epoch": 1.966137770897833, "grad_norm": 0.08718637377023697, "learning_rate": 9.188787801561956e-05, "loss": 0.0069, "step": 10159 }, { "epoch": 1.9663312693498454, "grad_norm": 0.08609245717525482, "learning_rate": 9.188630643633699e-05, "loss": 0.0065, "step": 10160 }, { "epoch": 1.9665247678018576, "grad_norm": 0.09082094579935074, "learning_rate": 9.188473471992024e-05, "loss": 0.0087, "step": 10161 }, { "epoch": 1.9667182662538698, "grad_norm": 0.08140698820352554, "learning_rate": 9.188316286637513e-05, "loss": 0.009, "step": 10162 }, { "epoch": 1.9669117647058822, "grad_norm": 0.056762780994176865, "learning_rate": 9.188159087570752e-05, "loss": 0.0098, "step": 10163 }, { "epoch": 1.9671052631578947, "grad_norm": 0.09175495058298111, "learning_rate": 9.188001874792324e-05, "loss": 0.0068, "step": 10164 }, { "epoch": 1.9672987616099071, "grad_norm": 0.04493524506688118, "learning_rate": 9.187844648302812e-05, "loss": 0.0077, "step": 10165 }, { "epoch": 1.9674922600619196, "grad_norm": 0.07341939210891724, "learning_rate": 9.187687408102804e-05, "loss": 0.0079, "step": 10166 }, { "epoch": 1.967685758513932, "grad_norm": 0.06831242889165878, "learning_rate": 9.187530154192882e-05, "loss": 0.0081, "step": 10167 }, { "epoch": 1.9678792569659442, "grad_norm": 0.06633095443248749, "learning_rate": 9.187372886573633e-05, "loss": 0.0066, "step": 10168 }, { "epoch": 1.9680727554179567, "grad_norm": 0.052834562957286835, "learning_rate": 9.187215605245639e-05, "loss": 0.0078, "step": 10169 }, { "epoch": 1.968266253869969, "grad_norm": 0.11030080169439316, "learning_rate": 9.187058310209485e-05, "loss": 0.0074, "step": 10170 }, { "epoch": 1.9684597523219813, "grad_norm": 0.07846891134977341, "learning_rate": 9.18690100146576e-05, "loss": 0.0091, "step": 10171 }, { "epoch": 1.9686532507739938, "grad_norm": 0.1064966693520546, "learning_rate": 9.186743679015043e-05, "loss": 0.0056, "step": 10172 }, { "epoch": 1.9688467492260062, "grad_norm": 0.06837065517902374, "learning_rate": 9.186586342857922e-05, "loss": 0.0082, "step": 10173 }, { "epoch": 1.9690402476780187, "grad_norm": 0.06938643753528595, "learning_rate": 9.186428992994981e-05, "loss": 0.0073, "step": 10174 }, { "epoch": 1.969233746130031, "grad_norm": 0.05973319336771965, "learning_rate": 9.186271629426805e-05, "loss": 0.0062, "step": 10175 }, { "epoch": 1.9694272445820433, "grad_norm": 0.08747904747724533, "learning_rate": 9.186114252153978e-05, "loss": 0.0087, "step": 10176 }, { "epoch": 1.9696207430340558, "grad_norm": 0.06726804375648499, "learning_rate": 9.18595686117709e-05, "loss": 0.0075, "step": 10177 }, { "epoch": 1.969814241486068, "grad_norm": 0.09255915880203247, "learning_rate": 9.185799456496722e-05, "loss": 0.0082, "step": 10178 }, { "epoch": 1.9700077399380804, "grad_norm": 0.07649792730808258, "learning_rate": 9.185642038113459e-05, "loss": 0.0092, "step": 10179 }, { "epoch": 1.9702012383900929, "grad_norm": 0.08319097757339478, "learning_rate": 9.185484606027888e-05, "loss": 0.0064, "step": 10180 }, { "epoch": 1.9703947368421053, "grad_norm": 0.10405591875314713, "learning_rate": 9.18532716024059e-05, "loss": 0.0058, "step": 10181 }, { "epoch": 1.9705882352941178, "grad_norm": 0.06048450991511345, "learning_rate": 9.185169700752158e-05, "loss": 0.0074, "step": 10182 }, { "epoch": 1.9707817337461302, "grad_norm": 0.06093677505850792, "learning_rate": 9.18501222756317e-05, "loss": 0.008, "step": 10183 }, { "epoch": 1.9709752321981424, "grad_norm": 0.06085100769996643, "learning_rate": 9.184854740674215e-05, "loss": 0.008, "step": 10184 }, { "epoch": 1.9711687306501546, "grad_norm": 0.04756353422999382, "learning_rate": 9.18469724008588e-05, "loss": 0.009, "step": 10185 }, { "epoch": 1.971362229102167, "grad_norm": 0.057546887546777725, "learning_rate": 9.184539725798746e-05, "loss": 0.007, "step": 10186 }, { "epoch": 1.9715557275541795, "grad_norm": 0.0629303976893425, "learning_rate": 9.184382197813403e-05, "loss": 0.0076, "step": 10187 }, { "epoch": 1.971749226006192, "grad_norm": 0.08154331892728806, "learning_rate": 9.184224656130435e-05, "loss": 0.0079, "step": 10188 }, { "epoch": 1.9719427244582044, "grad_norm": 0.09161285310983658, "learning_rate": 9.184067100750427e-05, "loss": 0.0068, "step": 10189 }, { "epoch": 1.9721362229102168, "grad_norm": 0.09107412397861481, "learning_rate": 9.183909531673965e-05, "loss": 0.0093, "step": 10190 }, { "epoch": 1.972329721362229, "grad_norm": 0.12262381613254547, "learning_rate": 9.183751948901635e-05, "loss": 0.0056, "step": 10191 }, { "epoch": 1.9725232198142415, "grad_norm": 0.09654319286346436, "learning_rate": 9.183594352434023e-05, "loss": 0.0083, "step": 10192 }, { "epoch": 1.9727167182662537, "grad_norm": 0.09509178251028061, "learning_rate": 9.183436742271716e-05, "loss": 0.0079, "step": 10193 }, { "epoch": 1.9729102167182662, "grad_norm": 0.12101562321186066, "learning_rate": 9.183279118415298e-05, "loss": 0.0086, "step": 10194 }, { "epoch": 1.9731037151702786, "grad_norm": 0.060927506536245346, "learning_rate": 9.183121480865355e-05, "loss": 0.0079, "step": 10195 }, { "epoch": 1.973297213622291, "grad_norm": 0.09780971705913544, "learning_rate": 9.182963829622474e-05, "loss": 0.0071, "step": 10196 }, { "epoch": 1.9734907120743035, "grad_norm": 0.07374880462884903, "learning_rate": 9.182806164687244e-05, "loss": 0.0097, "step": 10197 }, { "epoch": 1.973684210526316, "grad_norm": 0.10113728791475296, "learning_rate": 9.182648486060243e-05, "loss": 0.0064, "step": 10198 }, { "epoch": 1.9738777089783281, "grad_norm": 0.08636284619569778, "learning_rate": 9.182490793742066e-05, "loss": 0.0078, "step": 10199 }, { "epoch": 1.9740712074303406, "grad_norm": 0.11729425191879272, "learning_rate": 9.182333087733294e-05, "loss": 0.0069, "step": 10200 }, { "epoch": 1.9742647058823528, "grad_norm": 0.058038342744112015, "learning_rate": 9.182175368034517e-05, "loss": 0.007, "step": 10201 }, { "epoch": 1.9744582043343653, "grad_norm": 0.08550604432821274, "learning_rate": 9.182017634646319e-05, "loss": 0.0099, "step": 10202 }, { "epoch": 1.9746517027863777, "grad_norm": 0.09082666784524918, "learning_rate": 9.181859887569285e-05, "loss": 0.0056, "step": 10203 }, { "epoch": 1.9748452012383901, "grad_norm": 0.08273477107286453, "learning_rate": 9.181702126804003e-05, "loss": 0.0073, "step": 10204 }, { "epoch": 1.9750386996904026, "grad_norm": 0.062433794140815735, "learning_rate": 9.18154435235106e-05, "loss": 0.0075, "step": 10205 }, { "epoch": 1.975232198142415, "grad_norm": 0.08561964333057404, "learning_rate": 9.181386564211044e-05, "loss": 0.008, "step": 10206 }, { "epoch": 1.9754256965944272, "grad_norm": 0.0672508105635643, "learning_rate": 9.181228762384539e-05, "loss": 0.0083, "step": 10207 }, { "epoch": 1.9756191950464397, "grad_norm": 0.08207409083843231, "learning_rate": 9.181070946872132e-05, "loss": 0.0074, "step": 10208 }, { "epoch": 1.975812693498452, "grad_norm": 0.06392159312963486, "learning_rate": 9.18091311767441e-05, "loss": 0.0089, "step": 10209 }, { "epoch": 1.9760061919504643, "grad_norm": 0.09987399727106094, "learning_rate": 9.18075527479196e-05, "loss": 0.0072, "step": 10210 }, { "epoch": 1.9761996904024768, "grad_norm": 0.06952029466629028, "learning_rate": 9.18059741822537e-05, "loss": 0.0081, "step": 10211 }, { "epoch": 1.9763931888544892, "grad_norm": 0.09570114314556122, "learning_rate": 9.180439547975225e-05, "loss": 0.0087, "step": 10212 }, { "epoch": 1.9765866873065017, "grad_norm": 0.10986243188381195, "learning_rate": 9.180281664042113e-05, "loss": 0.0076, "step": 10213 }, { "epoch": 1.9767801857585139, "grad_norm": 0.09636490792036057, "learning_rate": 9.18012376642662e-05, "loss": 0.007, "step": 10214 }, { "epoch": 1.9769736842105263, "grad_norm": 0.14002007246017456, "learning_rate": 9.179965855129334e-05, "loss": 0.0077, "step": 10215 }, { "epoch": 1.9771671826625385, "grad_norm": 0.0952318012714386, "learning_rate": 9.179807930150841e-05, "loss": 0.0066, "step": 10216 }, { "epoch": 1.977360681114551, "grad_norm": 0.08314518630504608, "learning_rate": 9.17964999149173e-05, "loss": 0.0088, "step": 10217 }, { "epoch": 1.9775541795665634, "grad_norm": 0.10223344713449478, "learning_rate": 9.179492039152585e-05, "loss": 0.0075, "step": 10218 }, { "epoch": 1.9777476780185759, "grad_norm": 0.03735470771789551, "learning_rate": 9.179334073133999e-05, "loss": 0.0063, "step": 10219 }, { "epoch": 1.9779411764705883, "grad_norm": 0.07893500477075577, "learning_rate": 9.179176093436553e-05, "loss": 0.0094, "step": 10220 }, { "epoch": 1.9781346749226008, "grad_norm": 0.037506069988012314, "learning_rate": 9.179018100060835e-05, "loss": 0.0064, "step": 10221 }, { "epoch": 1.978328173374613, "grad_norm": 0.07199760526418686, "learning_rate": 9.178860093007437e-05, "loss": 0.0066, "step": 10222 }, { "epoch": 1.9785216718266254, "grad_norm": 0.04201699048280716, "learning_rate": 9.178702072276943e-05, "loss": 0.0076, "step": 10223 }, { "epoch": 1.9787151702786376, "grad_norm": 0.04176664724946022, "learning_rate": 9.178544037869941e-05, "loss": 0.0073, "step": 10224 }, { "epoch": 1.97890866873065, "grad_norm": 0.08333536982536316, "learning_rate": 9.178385989787019e-05, "loss": 0.0084, "step": 10225 }, { "epoch": 1.9791021671826625, "grad_norm": 0.05429453030228615, "learning_rate": 9.178227928028765e-05, "loss": 0.0086, "step": 10226 }, { "epoch": 1.979295665634675, "grad_norm": 0.05372169241309166, "learning_rate": 9.178069852595764e-05, "loss": 0.0072, "step": 10227 }, { "epoch": 1.9794891640866874, "grad_norm": 0.08000660687685013, "learning_rate": 9.177911763488607e-05, "loss": 0.0096, "step": 10228 }, { "epoch": 1.9796826625386998, "grad_norm": 0.05314984545111656, "learning_rate": 9.177753660707881e-05, "loss": 0.0074, "step": 10229 }, { "epoch": 1.979876160990712, "grad_norm": 0.060884878039360046, "learning_rate": 9.177595544254171e-05, "loss": 0.0077, "step": 10230 }, { "epoch": 1.9800696594427245, "grad_norm": 0.06257938593626022, "learning_rate": 9.177437414128068e-05, "loss": 0.0081, "step": 10231 }, { "epoch": 1.9802631578947367, "grad_norm": 0.0623970590531826, "learning_rate": 9.17727927033016e-05, "loss": 0.008, "step": 10232 }, { "epoch": 1.9804566563467492, "grad_norm": 0.051263198256492615, "learning_rate": 9.177121112861031e-05, "loss": 0.0081, "step": 10233 }, { "epoch": 1.9806501547987616, "grad_norm": 0.053716424852609634, "learning_rate": 9.176962941721275e-05, "loss": 0.0095, "step": 10234 }, { "epoch": 1.980843653250774, "grad_norm": 0.07453977316617966, "learning_rate": 9.176804756911474e-05, "loss": 0.0077, "step": 10235 }, { "epoch": 1.9810371517027865, "grad_norm": 0.04623923823237419, "learning_rate": 9.176646558432221e-05, "loss": 0.0078, "step": 10236 }, { "epoch": 1.9812306501547987, "grad_norm": 0.06431151926517487, "learning_rate": 9.176488346284101e-05, "loss": 0.0067, "step": 10237 }, { "epoch": 1.9814241486068112, "grad_norm": 0.052044738084077835, "learning_rate": 9.176330120467704e-05, "loss": 0.0081, "step": 10238 }, { "epoch": 1.9816176470588234, "grad_norm": 0.05138557031750679, "learning_rate": 9.176171880983616e-05, "loss": 0.0068, "step": 10239 }, { "epoch": 1.9818111455108358, "grad_norm": 0.055952370166778564, "learning_rate": 9.176013627832427e-05, "loss": 0.0072, "step": 10240 }, { "epoch": 1.9820046439628483, "grad_norm": 0.030145535245537758, "learning_rate": 9.175855361014727e-05, "loss": 0.0075, "step": 10241 }, { "epoch": 1.9821981424148607, "grad_norm": 0.05963903293013573, "learning_rate": 9.175697080531101e-05, "loss": 0.0079, "step": 10242 }, { "epoch": 1.9823916408668731, "grad_norm": 0.05093375965952873, "learning_rate": 9.175538786382139e-05, "loss": 0.0067, "step": 10243 }, { "epoch": 1.9825851393188856, "grad_norm": 0.08645572513341904, "learning_rate": 9.175380478568429e-05, "loss": 0.0076, "step": 10244 }, { "epoch": 1.9827786377708978, "grad_norm": 0.0714929848909378, "learning_rate": 9.175222157090561e-05, "loss": 0.0078, "step": 10245 }, { "epoch": 1.9829721362229102, "grad_norm": 0.10806746780872345, "learning_rate": 9.175063821949122e-05, "loss": 0.0085, "step": 10246 }, { "epoch": 1.9831656346749225, "grad_norm": 0.10043096542358398, "learning_rate": 9.174905473144703e-05, "loss": 0.0069, "step": 10247 }, { "epoch": 1.983359133126935, "grad_norm": 0.0834503322839737, "learning_rate": 9.174747110677889e-05, "loss": 0.0074, "step": 10248 }, { "epoch": 1.9835526315789473, "grad_norm": 0.11067093163728714, "learning_rate": 9.174588734549271e-05, "loss": 0.0065, "step": 10249 }, { "epoch": 1.9837461300309598, "grad_norm": 0.08207432180643082, "learning_rate": 9.174430344759438e-05, "loss": 0.0075, "step": 10250 }, { "epoch": 1.9839396284829722, "grad_norm": 0.05282280594110489, "learning_rate": 9.174271941308979e-05, "loss": 0.0082, "step": 10251 }, { "epoch": 1.9841331269349847, "grad_norm": 0.1593472808599472, "learning_rate": 9.174113524198481e-05, "loss": 0.0082, "step": 10252 }, { "epoch": 1.984326625386997, "grad_norm": 0.04009499028325081, "learning_rate": 9.173955093428535e-05, "loss": 0.0069, "step": 10253 }, { "epoch": 1.9845201238390093, "grad_norm": 0.14911146461963654, "learning_rate": 9.173796648999728e-05, "loss": 0.0083, "step": 10254 }, { "epoch": 1.9847136222910216, "grad_norm": 0.11703604459762573, "learning_rate": 9.17363819091265e-05, "loss": 0.0075, "step": 10255 }, { "epoch": 1.984907120743034, "grad_norm": 0.08323154598474503, "learning_rate": 9.173479719167893e-05, "loss": 0.007, "step": 10256 }, { "epoch": 1.9851006191950464, "grad_norm": 0.2413846254348755, "learning_rate": 9.173321233766042e-05, "loss": 0.0081, "step": 10257 }, { "epoch": 1.9852941176470589, "grad_norm": 0.09320976585149765, "learning_rate": 9.17316273470769e-05, "loss": 0.0078, "step": 10258 }, { "epoch": 1.9854876160990713, "grad_norm": 0.22134967148303986, "learning_rate": 9.17300422199342e-05, "loss": 0.0083, "step": 10259 }, { "epoch": 1.9856811145510835, "grad_norm": 0.15437883138656616, "learning_rate": 9.172845695623827e-05, "loss": 0.0071, "step": 10260 }, { "epoch": 1.985874613003096, "grad_norm": 0.16941529512405396, "learning_rate": 9.172687155599501e-05, "loss": 0.009, "step": 10261 }, { "epoch": 1.9860681114551082, "grad_norm": 0.1814010888338089, "learning_rate": 9.172528601921026e-05, "loss": 0.0067, "step": 10262 }, { "epoch": 1.9862616099071206, "grad_norm": 0.10777197778224945, "learning_rate": 9.172370034588997e-05, "loss": 0.0094, "step": 10263 }, { "epoch": 1.986455108359133, "grad_norm": 0.16283679008483887, "learning_rate": 9.172211453604001e-05, "loss": 0.0074, "step": 10264 }, { "epoch": 1.9866486068111455, "grad_norm": 0.05162012204527855, "learning_rate": 9.172052858966626e-05, "loss": 0.0086, "step": 10265 }, { "epoch": 1.986842105263158, "grad_norm": 0.1214592456817627, "learning_rate": 9.171894250677463e-05, "loss": 0.007, "step": 10266 }, { "epoch": 1.9870356037151704, "grad_norm": 0.05619777739048004, "learning_rate": 9.171735628737104e-05, "loss": 0.0079, "step": 10267 }, { "epoch": 1.9872291021671826, "grad_norm": 0.07965589314699173, "learning_rate": 9.171576993146134e-05, "loss": 0.0077, "step": 10268 }, { "epoch": 1.987422600619195, "grad_norm": 0.05446876585483551, "learning_rate": 9.171418343905147e-05, "loss": 0.0055, "step": 10269 }, { "epoch": 1.9876160990712073, "grad_norm": 0.09365037828683853, "learning_rate": 9.171259681014733e-05, "loss": 0.0095, "step": 10270 }, { "epoch": 1.9878095975232197, "grad_norm": 0.11810436099767685, "learning_rate": 9.171101004475477e-05, "loss": 0.0083, "step": 10271 }, { "epoch": 1.9880030959752322, "grad_norm": 0.06385596841573715, "learning_rate": 9.170942314287973e-05, "loss": 0.0101, "step": 10272 }, { "epoch": 1.9881965944272446, "grad_norm": 0.15418583154678345, "learning_rate": 9.170783610452811e-05, "loss": 0.0056, "step": 10273 }, { "epoch": 1.988390092879257, "grad_norm": 0.059066981077194214, "learning_rate": 9.170624892970579e-05, "loss": 0.0062, "step": 10274 }, { "epoch": 1.9885835913312695, "grad_norm": 0.13033942878246307, "learning_rate": 9.170466161841869e-05, "loss": 0.0076, "step": 10275 }, { "epoch": 1.9887770897832817, "grad_norm": 0.06847665458917618, "learning_rate": 9.170307417067271e-05, "loss": 0.0086, "step": 10276 }, { "epoch": 1.9889705882352942, "grad_norm": 0.06681403517723083, "learning_rate": 9.170148658647373e-05, "loss": 0.0078, "step": 10277 }, { "epoch": 1.9891640866873064, "grad_norm": 0.08770208805799484, "learning_rate": 9.169989886582767e-05, "loss": 0.0078, "step": 10278 }, { "epoch": 1.9893575851393188, "grad_norm": 0.04095250740647316, "learning_rate": 9.169831100874042e-05, "loss": 0.0081, "step": 10279 }, { "epoch": 1.9895510835913313, "grad_norm": 0.08518271893262863, "learning_rate": 9.16967230152179e-05, "loss": 0.006, "step": 10280 }, { "epoch": 1.9897445820433437, "grad_norm": 0.061262257397174835, "learning_rate": 9.169513488526601e-05, "loss": 0.0068, "step": 10281 }, { "epoch": 1.9899380804953561, "grad_norm": 0.05532769113779068, "learning_rate": 9.169354661889065e-05, "loss": 0.0093, "step": 10282 }, { "epoch": 1.9901315789473686, "grad_norm": 0.06948499381542206, "learning_rate": 9.169195821609772e-05, "loss": 0.0096, "step": 10283 }, { "epoch": 1.9903250773993808, "grad_norm": 0.034434255212545395, "learning_rate": 9.169036967689314e-05, "loss": 0.0073, "step": 10284 }, { "epoch": 1.990518575851393, "grad_norm": 0.07346586883068085, "learning_rate": 9.168878100128279e-05, "loss": 0.0074, "step": 10285 }, { "epoch": 1.9907120743034055, "grad_norm": 0.04283277690410614, "learning_rate": 9.16871921892726e-05, "loss": 0.0084, "step": 10286 }, { "epoch": 1.990905572755418, "grad_norm": 0.10320013761520386, "learning_rate": 9.168560324086847e-05, "loss": 0.007, "step": 10287 }, { "epoch": 1.9910990712074303, "grad_norm": 0.03814170882105827, "learning_rate": 9.168401415607632e-05, "loss": 0.0085, "step": 10288 }, { "epoch": 1.9912925696594428, "grad_norm": 0.11142019927501678, "learning_rate": 9.168242493490201e-05, "loss": 0.0059, "step": 10289 }, { "epoch": 1.9914860681114552, "grad_norm": 0.0663892850279808, "learning_rate": 9.168083557735151e-05, "loss": 0.0065, "step": 10290 }, { "epoch": 1.9916795665634675, "grad_norm": 0.10960248857736588, "learning_rate": 9.16792460834307e-05, "loss": 0.007, "step": 10291 }, { "epoch": 1.99187306501548, "grad_norm": 0.07974907010793686, "learning_rate": 9.167765645314549e-05, "loss": 0.0077, "step": 10292 }, { "epoch": 1.9920665634674921, "grad_norm": 0.08301357179880142, "learning_rate": 9.167606668650179e-05, "loss": 0.0073, "step": 10293 }, { "epoch": 1.9922600619195046, "grad_norm": 0.0534292608499527, "learning_rate": 9.167447678350552e-05, "loss": 0.0069, "step": 10294 }, { "epoch": 1.992453560371517, "grad_norm": 0.07300961762666702, "learning_rate": 9.167288674416255e-05, "loss": 0.0078, "step": 10295 }, { "epoch": 1.9926470588235294, "grad_norm": 0.05230800062417984, "learning_rate": 9.167129656847885e-05, "loss": 0.0079, "step": 10296 }, { "epoch": 1.9928405572755419, "grad_norm": 0.09248081594705582, "learning_rate": 9.16697062564603e-05, "loss": 0.0078, "step": 10297 }, { "epoch": 1.9930340557275543, "grad_norm": 0.035029828548431396, "learning_rate": 9.166811580811282e-05, "loss": 0.0075, "step": 10298 }, { "epoch": 1.9932275541795665, "grad_norm": 0.091535784304142, "learning_rate": 9.166652522344232e-05, "loss": 0.0073, "step": 10299 }, { "epoch": 1.993421052631579, "grad_norm": 0.05615110322833061, "learning_rate": 9.166493450245472e-05, "loss": 0.0055, "step": 10300 }, { "epoch": 1.9936145510835912, "grad_norm": 0.08065138012170792, "learning_rate": 9.166334364515592e-05, "loss": 0.007, "step": 10301 }, { "epoch": 1.9938080495356036, "grad_norm": 0.07055529952049255, "learning_rate": 9.166175265155184e-05, "loss": 0.0059, "step": 10302 }, { "epoch": 1.994001547987616, "grad_norm": 0.07550665736198425, "learning_rate": 9.16601615216484e-05, "loss": 0.0074, "step": 10303 }, { "epoch": 1.9941950464396285, "grad_norm": 0.08501244336366653, "learning_rate": 9.165857025545151e-05, "loss": 0.0071, "step": 10304 }, { "epoch": 1.994388544891641, "grad_norm": 0.05393785610795021, "learning_rate": 9.16569788529671e-05, "loss": 0.0074, "step": 10305 }, { "epoch": 1.9945820433436534, "grad_norm": 0.0952010527253151, "learning_rate": 9.165538731420106e-05, "loss": 0.0067, "step": 10306 }, { "epoch": 1.9947755417956656, "grad_norm": 0.05013398453593254, "learning_rate": 9.165379563915933e-05, "loss": 0.0077, "step": 10307 }, { "epoch": 1.994969040247678, "grad_norm": 0.09307316690683365, "learning_rate": 9.165220382784783e-05, "loss": 0.0068, "step": 10308 }, { "epoch": 1.9951625386996903, "grad_norm": 0.04013287276029587, "learning_rate": 9.165061188027245e-05, "loss": 0.0075, "step": 10309 }, { "epoch": 1.9953560371517027, "grad_norm": 0.1215231716632843, "learning_rate": 9.164901979643914e-05, "loss": 0.0079, "step": 10310 }, { "epoch": 1.9955495356037152, "grad_norm": 0.06671430170536041, "learning_rate": 9.16474275763538e-05, "loss": 0.0077, "step": 10311 }, { "epoch": 1.9957430340557276, "grad_norm": 0.06307391077280045, "learning_rate": 9.164583522002233e-05, "loss": 0.0086, "step": 10312 }, { "epoch": 1.99593653250774, "grad_norm": 0.06710986793041229, "learning_rate": 9.16442427274507e-05, "loss": 0.0064, "step": 10313 }, { "epoch": 1.9961300309597523, "grad_norm": 0.055448200553655624, "learning_rate": 9.16426500986448e-05, "loss": 0.0084, "step": 10314 }, { "epoch": 1.9963235294117647, "grad_norm": 0.07904545217752457, "learning_rate": 9.164105733361057e-05, "loss": 0.0063, "step": 10315 }, { "epoch": 1.996517027863777, "grad_norm": 0.06212637946009636, "learning_rate": 9.163946443235391e-05, "loss": 0.0058, "step": 10316 }, { "epoch": 1.9967105263157894, "grad_norm": 0.06424564123153687, "learning_rate": 9.163787139488074e-05, "loss": 0.0056, "step": 10317 }, { "epoch": 1.9969040247678018, "grad_norm": 0.08140187710523605, "learning_rate": 9.163627822119699e-05, "loss": 0.0072, "step": 10318 }, { "epoch": 1.9970975232198143, "grad_norm": 0.042198557406663895, "learning_rate": 9.163468491130857e-05, "loss": 0.0078, "step": 10319 }, { "epoch": 1.9972910216718267, "grad_norm": 0.1077825203537941, "learning_rate": 9.163309146522146e-05, "loss": 0.007, "step": 10320 }, { "epoch": 1.9974845201238391, "grad_norm": 0.05300090089440346, "learning_rate": 9.163149788294153e-05, "loss": 0.007, "step": 10321 }, { "epoch": 1.9976780185758514, "grad_norm": 0.11363508552312851, "learning_rate": 9.16299041644747e-05, "loss": 0.0076, "step": 10322 }, { "epoch": 1.9978715170278638, "grad_norm": 0.08125985413789749, "learning_rate": 9.162831030982693e-05, "loss": 0.008, "step": 10323 }, { "epoch": 1.998065015479876, "grad_norm": 0.11331044137477875, "learning_rate": 9.162671631900412e-05, "loss": 0.0084, "step": 10324 }, { "epoch": 1.9982585139318885, "grad_norm": 0.07782752066850662, "learning_rate": 9.16251221920122e-05, "loss": 0.0089, "step": 10325 }, { "epoch": 1.998452012383901, "grad_norm": 0.10373590886592865, "learning_rate": 9.162352792885708e-05, "loss": 0.0068, "step": 10326 }, { "epoch": 1.9986455108359134, "grad_norm": 0.067158542573452, "learning_rate": 9.162193352954475e-05, "loss": 0.0086, "step": 10327 }, { "epoch": 1.9988390092879258, "grad_norm": 0.117259182035923, "learning_rate": 9.162033899408106e-05, "loss": 0.0078, "step": 10328 }, { "epoch": 1.9990325077399382, "grad_norm": 0.06823649257421494, "learning_rate": 9.1618744322472e-05, "loss": 0.0077, "step": 10329 }, { "epoch": 1.9992260061919505, "grad_norm": 0.08094101399183273, "learning_rate": 9.161714951472346e-05, "loss": 0.0063, "step": 10330 }, { "epoch": 1.999419504643963, "grad_norm": 0.05032993480563164, "learning_rate": 9.161555457084138e-05, "loss": 0.0065, "step": 10331 }, { "epoch": 1.9996130030959751, "grad_norm": 0.08277333527803421, "learning_rate": 9.16139594908317e-05, "loss": 0.0091, "step": 10332 }, { "epoch": 2.0001934984520124, "grad_norm": 0.05333207547664642, "learning_rate": 9.161236427470033e-05, "loss": 0.0072, "step": 10333 }, { "epoch": 2.000386996904025, "grad_norm": 0.07814231514930725, "learning_rate": 9.16107689224532e-05, "loss": 0.0081, "step": 10334 }, { "epoch": 2.0005804953560373, "grad_norm": 0.036186445504426956, "learning_rate": 9.160917343409628e-05, "loss": 0.0069, "step": 10335 }, { "epoch": 2.0007739938080493, "grad_norm": 0.06175631657242775, "learning_rate": 9.160757780963546e-05, "loss": 0.0072, "step": 10336 }, { "epoch": 2.0009674922600618, "grad_norm": 0.04615668207406998, "learning_rate": 9.160598204907668e-05, "loss": 0.0093, "step": 10337 }, { "epoch": 2.001160990712074, "grad_norm": 0.03180021792650223, "learning_rate": 9.160438615242588e-05, "loss": 0.009, "step": 10338 }, { "epoch": 2.0013544891640866, "grad_norm": 0.06526075303554535, "learning_rate": 9.160279011968899e-05, "loss": 0.0066, "step": 10339 }, { "epoch": 2.001547987616099, "grad_norm": 0.03542206063866615, "learning_rate": 9.160119395087196e-05, "loss": 0.0078, "step": 10340 }, { "epoch": 2.0017414860681115, "grad_norm": 0.06910867244005203, "learning_rate": 9.15995976459807e-05, "loss": 0.0083, "step": 10341 }, { "epoch": 2.001934984520124, "grad_norm": 0.05160512402653694, "learning_rate": 9.159800120502115e-05, "loss": 0.0072, "step": 10342 }, { "epoch": 2.0021284829721364, "grad_norm": 0.04426470398902893, "learning_rate": 9.159640462799926e-05, "loss": 0.0083, "step": 10343 }, { "epoch": 2.0023219814241484, "grad_norm": 0.10538161545991898, "learning_rate": 9.159480791492095e-05, "loss": 0.0056, "step": 10344 }, { "epoch": 2.002515479876161, "grad_norm": 0.06095190718770027, "learning_rate": 9.159321106579217e-05, "loss": 0.0079, "step": 10345 }, { "epoch": 2.0027089783281733, "grad_norm": 0.10712521523237228, "learning_rate": 9.159161408061883e-05, "loss": 0.0057, "step": 10346 }, { "epoch": 2.0029024767801857, "grad_norm": 0.052452802658081055, "learning_rate": 9.159001695940688e-05, "loss": 0.0082, "step": 10347 }, { "epoch": 2.003095975232198, "grad_norm": 0.12698879837989807, "learning_rate": 9.15884197021623e-05, "loss": 0.008, "step": 10348 }, { "epoch": 2.0032894736842106, "grad_norm": 0.0440528579056263, "learning_rate": 9.158682230889096e-05, "loss": 0.0076, "step": 10349 }, { "epoch": 2.003482972136223, "grad_norm": 0.12023962289094925, "learning_rate": 9.158522477959884e-05, "loss": 0.0072, "step": 10350 }, { "epoch": 2.0036764705882355, "grad_norm": 0.05504545569419861, "learning_rate": 9.158362711429186e-05, "loss": 0.0081, "step": 10351 }, { "epoch": 2.0038699690402475, "grad_norm": 0.09373129904270172, "learning_rate": 9.158202931297597e-05, "loss": 0.0083, "step": 10352 }, { "epoch": 2.00406346749226, "grad_norm": 0.05319974198937416, "learning_rate": 9.158043137565713e-05, "loss": 0.0081, "step": 10353 }, { "epoch": 2.0042569659442724, "grad_norm": 0.06087076663970947, "learning_rate": 9.157883330234124e-05, "loss": 0.0069, "step": 10354 }, { "epoch": 2.004450464396285, "grad_norm": 0.07834997028112411, "learning_rate": 9.157723509303427e-05, "loss": 0.009, "step": 10355 }, { "epoch": 2.0046439628482973, "grad_norm": 0.05834690108895302, "learning_rate": 9.157563674774214e-05, "loss": 0.008, "step": 10356 }, { "epoch": 2.0048374613003097, "grad_norm": 0.10397379845380783, "learning_rate": 9.15740382664708e-05, "loss": 0.0088, "step": 10357 }, { "epoch": 2.005030959752322, "grad_norm": 0.07854532450437546, "learning_rate": 9.157243964922623e-05, "loss": 0.0066, "step": 10358 }, { "epoch": 2.005224458204334, "grad_norm": 0.101774662733078, "learning_rate": 9.157084089601433e-05, "loss": 0.0068, "step": 10359 }, { "epoch": 2.0054179566563466, "grad_norm": 0.053121767938137054, "learning_rate": 9.156924200684104e-05, "loss": 0.0065, "step": 10360 }, { "epoch": 2.005611455108359, "grad_norm": 0.09963773936033249, "learning_rate": 9.156764298171233e-05, "loss": 0.0077, "step": 10361 }, { "epoch": 2.0058049535603715, "grad_norm": 0.04573127254843712, "learning_rate": 9.156604382063413e-05, "loss": 0.0093, "step": 10362 }, { "epoch": 2.005998452012384, "grad_norm": 0.1429285854101181, "learning_rate": 9.156444452361238e-05, "loss": 0.0081, "step": 10363 }, { "epoch": 2.0061919504643964, "grad_norm": 0.06754834204912186, "learning_rate": 9.156284509065306e-05, "loss": 0.0085, "step": 10364 }, { "epoch": 2.006385448916409, "grad_norm": 0.13374657928943634, "learning_rate": 9.156124552176206e-05, "loss": 0.007, "step": 10365 }, { "epoch": 2.0065789473684212, "grad_norm": 0.07747108489274979, "learning_rate": 9.155964581694538e-05, "loss": 0.0079, "step": 10366 }, { "epoch": 2.0067724458204332, "grad_norm": 0.11712256073951721, "learning_rate": 9.155804597620894e-05, "loss": 0.0061, "step": 10367 }, { "epoch": 2.0069659442724457, "grad_norm": 0.10061904788017273, "learning_rate": 9.15564459995587e-05, "loss": 0.0071, "step": 10368 }, { "epoch": 2.007159442724458, "grad_norm": 0.06910718232393265, "learning_rate": 9.155484588700059e-05, "loss": 0.0069, "step": 10369 }, { "epoch": 2.0073529411764706, "grad_norm": 0.1415584534406662, "learning_rate": 9.155324563854058e-05, "loss": 0.0086, "step": 10370 }, { "epoch": 2.007546439628483, "grad_norm": 0.0718689039349556, "learning_rate": 9.155164525418459e-05, "loss": 0.0068, "step": 10371 }, { "epoch": 2.0077399380804954, "grad_norm": 0.12184730917215347, "learning_rate": 9.155004473393862e-05, "loss": 0.0091, "step": 10372 }, { "epoch": 2.007933436532508, "grad_norm": 0.05590043216943741, "learning_rate": 9.154844407780855e-05, "loss": 0.0078, "step": 10373 }, { "epoch": 2.0081269349845203, "grad_norm": 0.0822809711098671, "learning_rate": 9.154684328580041e-05, "loss": 0.0082, "step": 10374 }, { "epoch": 2.0083204334365323, "grad_norm": 0.04481613636016846, "learning_rate": 9.154524235792009e-05, "loss": 0.0088, "step": 10375 }, { "epoch": 2.0085139318885448, "grad_norm": 0.06287360191345215, "learning_rate": 9.154364129417357e-05, "loss": 0.0067, "step": 10376 }, { "epoch": 2.008707430340557, "grad_norm": 0.050548940896987915, "learning_rate": 9.15420400945668e-05, "loss": 0.007, "step": 10377 }, { "epoch": 2.0089009287925697, "grad_norm": 0.07541078329086304, "learning_rate": 9.154043875910573e-05, "loss": 0.0073, "step": 10378 }, { "epoch": 2.009094427244582, "grad_norm": 0.06158292293548584, "learning_rate": 9.15388372877963e-05, "loss": 0.0073, "step": 10379 }, { "epoch": 2.0092879256965945, "grad_norm": 0.10556917637586594, "learning_rate": 9.153723568064449e-05, "loss": 0.0079, "step": 10380 }, { "epoch": 2.009481424148607, "grad_norm": 0.06595475971698761, "learning_rate": 9.153563393765623e-05, "loss": 0.0072, "step": 10381 }, { "epoch": 2.0096749226006194, "grad_norm": 0.05510588362812996, "learning_rate": 9.15340320588375e-05, "loss": 0.007, "step": 10382 }, { "epoch": 2.0098684210526314, "grad_norm": 0.031653087586164474, "learning_rate": 9.153243004419422e-05, "loss": 0.0075, "step": 10383 }, { "epoch": 2.010061919504644, "grad_norm": 0.05416363477706909, "learning_rate": 9.153082789373238e-05, "loss": 0.0067, "step": 10384 }, { "epoch": 2.0102554179566563, "grad_norm": 0.07706035673618317, "learning_rate": 9.152922560745792e-05, "loss": 0.0073, "step": 10385 }, { "epoch": 2.0104489164086687, "grad_norm": 0.049234308302402496, "learning_rate": 9.15276231853768e-05, "loss": 0.0078, "step": 10386 }, { "epoch": 2.010642414860681, "grad_norm": 0.07094292342662811, "learning_rate": 9.152602062749498e-05, "loss": 0.0058, "step": 10387 }, { "epoch": 2.0108359133126936, "grad_norm": 0.08311697095632553, "learning_rate": 9.152441793381843e-05, "loss": 0.0077, "step": 10388 }, { "epoch": 2.011029411764706, "grad_norm": 0.0655435174703598, "learning_rate": 9.152281510435308e-05, "loss": 0.0076, "step": 10389 }, { "epoch": 2.011222910216718, "grad_norm": 0.08959747105836868, "learning_rate": 9.15212121391049e-05, "loss": 0.0103, "step": 10390 }, { "epoch": 2.0114164086687305, "grad_norm": 0.12083952873945236, "learning_rate": 9.151960903807985e-05, "loss": 0.0079, "step": 10391 }, { "epoch": 2.011609907120743, "grad_norm": 0.09466467797756195, "learning_rate": 9.151800580128389e-05, "loss": 0.0078, "step": 10392 }, { "epoch": 2.0118034055727554, "grad_norm": 0.13101699948310852, "learning_rate": 9.151640242872298e-05, "loss": 0.0074, "step": 10393 }, { "epoch": 2.011996904024768, "grad_norm": 0.08980970084667206, "learning_rate": 9.15147989204031e-05, "loss": 0.0072, "step": 10394 }, { "epoch": 2.0121904024767803, "grad_norm": 0.12999394536018372, "learning_rate": 9.151319527633018e-05, "loss": 0.0081, "step": 10395 }, { "epoch": 2.0123839009287927, "grad_norm": 0.0711049884557724, "learning_rate": 9.151159149651022e-05, "loss": 0.0075, "step": 10396 }, { "epoch": 2.012577399380805, "grad_norm": 0.11333833634853363, "learning_rate": 9.150998758094913e-05, "loss": 0.0076, "step": 10397 }, { "epoch": 2.012770897832817, "grad_norm": 0.06850633770227432, "learning_rate": 9.150838352965292e-05, "loss": 0.0086, "step": 10398 }, { "epoch": 2.0129643962848296, "grad_norm": 0.09496835619211197, "learning_rate": 9.150677934262753e-05, "loss": 0.0075, "step": 10399 }, { "epoch": 2.013157894736842, "grad_norm": 0.05449401214718819, "learning_rate": 9.150517501987895e-05, "loss": 0.0067, "step": 10400 }, { "epoch": 2.0133513931888545, "grad_norm": 0.07925598323345184, "learning_rate": 9.150357056141309e-05, "loss": 0.0091, "step": 10401 }, { "epoch": 2.013544891640867, "grad_norm": 0.04638401418924332, "learning_rate": 9.150196596723599e-05, "loss": 0.0081, "step": 10402 }, { "epoch": 2.0137383900928794, "grad_norm": 0.0629415214061737, "learning_rate": 9.150036123735354e-05, "loss": 0.0096, "step": 10403 }, { "epoch": 2.013931888544892, "grad_norm": 0.04730440676212311, "learning_rate": 9.149875637177174e-05, "loss": 0.0059, "step": 10404 }, { "epoch": 2.0141253869969042, "grad_norm": 0.060882747173309326, "learning_rate": 9.149715137049658e-05, "loss": 0.0079, "step": 10405 }, { "epoch": 2.0143188854489162, "grad_norm": 0.08937162160873413, "learning_rate": 9.1495546233534e-05, "loss": 0.0075, "step": 10406 }, { "epoch": 2.0145123839009287, "grad_norm": 0.07727698981761932, "learning_rate": 9.149394096088996e-05, "loss": 0.0092, "step": 10407 }, { "epoch": 2.014705882352941, "grad_norm": 0.08566281944513321, "learning_rate": 9.149233555257044e-05, "loss": 0.0073, "step": 10408 }, { "epoch": 2.0148993808049536, "grad_norm": 0.05701790750026703, "learning_rate": 9.149073000858141e-05, "loss": 0.0073, "step": 10409 }, { "epoch": 2.015092879256966, "grad_norm": 0.0684284195303917, "learning_rate": 9.148912432892884e-05, "loss": 0.0068, "step": 10410 }, { "epoch": 2.0152863777089784, "grad_norm": 0.061497777700424194, "learning_rate": 9.148751851361869e-05, "loss": 0.0059, "step": 10411 }, { "epoch": 2.015479876160991, "grad_norm": 0.05061795189976692, "learning_rate": 9.148591256265696e-05, "loss": 0.0068, "step": 10412 }, { "epoch": 2.015673374613003, "grad_norm": 0.08790037781000137, "learning_rate": 9.148430647604957e-05, "loss": 0.0075, "step": 10413 }, { "epoch": 2.0158668730650153, "grad_norm": 0.07139082252979279, "learning_rate": 9.148270025380255e-05, "loss": 0.007, "step": 10414 }, { "epoch": 2.0160603715170278, "grad_norm": 0.07189058512449265, "learning_rate": 9.148109389592181e-05, "loss": 0.0077, "step": 10415 }, { "epoch": 2.01625386996904, "grad_norm": 0.10166069865226746, "learning_rate": 9.147948740241336e-05, "loss": 0.008, "step": 10416 }, { "epoch": 2.0164473684210527, "grad_norm": 0.05122082680463791, "learning_rate": 9.147788077328317e-05, "loss": 0.0062, "step": 10417 }, { "epoch": 2.016640866873065, "grad_norm": 0.1239485964179039, "learning_rate": 9.147627400853722e-05, "loss": 0.0068, "step": 10418 }, { "epoch": 2.0168343653250775, "grad_norm": 0.0721592828631401, "learning_rate": 9.147466710818144e-05, "loss": 0.0067, "step": 10419 }, { "epoch": 2.01702786377709, "grad_norm": 0.0944024845957756, "learning_rate": 9.147306007222185e-05, "loss": 0.0067, "step": 10420 }, { "epoch": 2.017221362229102, "grad_norm": 0.08780232816934586, "learning_rate": 9.14714529006644e-05, "loss": 0.0066, "step": 10421 }, { "epoch": 2.0174148606811144, "grad_norm": 0.08986058086156845, "learning_rate": 9.14698455935151e-05, "loss": 0.0081, "step": 10422 }, { "epoch": 2.017608359133127, "grad_norm": 0.1469004601240158, "learning_rate": 9.146823815077988e-05, "loss": 0.0073, "step": 10423 }, { "epoch": 2.0178018575851393, "grad_norm": 0.07173221558332443, "learning_rate": 9.146663057246475e-05, "loss": 0.0067, "step": 10424 }, { "epoch": 2.0179953560371517, "grad_norm": 0.11737705767154694, "learning_rate": 9.146502285857566e-05, "loss": 0.008, "step": 10425 }, { "epoch": 2.018188854489164, "grad_norm": 0.07275974750518799, "learning_rate": 9.146341500911861e-05, "loss": 0.006, "step": 10426 }, { "epoch": 2.0183823529411766, "grad_norm": 0.08831613510847092, "learning_rate": 9.146180702409956e-05, "loss": 0.0071, "step": 10427 }, { "epoch": 2.018575851393189, "grad_norm": 0.09460633993148804, "learning_rate": 9.14601989035245e-05, "loss": 0.0077, "step": 10428 }, { "epoch": 2.018769349845201, "grad_norm": 0.1292971670627594, "learning_rate": 9.145859064739942e-05, "loss": 0.0077, "step": 10429 }, { "epoch": 2.0189628482972135, "grad_norm": 0.0678519606590271, "learning_rate": 9.145698225573026e-05, "loss": 0.0063, "step": 10430 }, { "epoch": 2.019156346749226, "grad_norm": 0.07204272598028183, "learning_rate": 9.145537372852303e-05, "loss": 0.007, "step": 10431 }, { "epoch": 2.0193498452012384, "grad_norm": 0.05706588551402092, "learning_rate": 9.145376506578371e-05, "loss": 0.0072, "step": 10432 }, { "epoch": 2.019543343653251, "grad_norm": 0.05955813080072403, "learning_rate": 9.145215626751826e-05, "loss": 0.008, "step": 10433 }, { "epoch": 2.0197368421052633, "grad_norm": 0.04579278826713562, "learning_rate": 9.145054733373269e-05, "loss": 0.0059, "step": 10434 }, { "epoch": 2.0199303405572757, "grad_norm": 0.03432711958885193, "learning_rate": 9.144893826443295e-05, "loss": 0.0078, "step": 10435 }, { "epoch": 2.0201238390092877, "grad_norm": 0.06501058489084244, "learning_rate": 9.144732905962506e-05, "loss": 0.007, "step": 10436 }, { "epoch": 2.0203173374613, "grad_norm": 0.07250164449214935, "learning_rate": 9.144571971931496e-05, "loss": 0.0072, "step": 10437 }, { "epoch": 2.0205108359133126, "grad_norm": 0.032132837921381, "learning_rate": 9.144411024350867e-05, "loss": 0.007, "step": 10438 }, { "epoch": 2.020704334365325, "grad_norm": 0.09677664190530777, "learning_rate": 9.144250063221216e-05, "loss": 0.0081, "step": 10439 }, { "epoch": 2.0208978328173375, "grad_norm": 0.028748709708452225, "learning_rate": 9.14408908854314e-05, "loss": 0.0071, "step": 10440 }, { "epoch": 2.02109133126935, "grad_norm": 0.09132290631532669, "learning_rate": 9.14392810031724e-05, "loss": 0.0083, "step": 10441 }, { "epoch": 2.0212848297213624, "grad_norm": 0.04161597043275833, "learning_rate": 9.143767098544112e-05, "loss": 0.008, "step": 10442 }, { "epoch": 2.021478328173375, "grad_norm": 0.04353459179401398, "learning_rate": 9.143606083224355e-05, "loss": 0.0081, "step": 10443 }, { "epoch": 2.021671826625387, "grad_norm": 0.13939407467842102, "learning_rate": 9.143445054358571e-05, "loss": 0.0085, "step": 10444 }, { "epoch": 2.0218653250773992, "grad_norm": 0.08734442293643951, "learning_rate": 9.143284011947356e-05, "loss": 0.0062, "step": 10445 }, { "epoch": 2.0220588235294117, "grad_norm": 0.17088262736797333, "learning_rate": 9.143122955991306e-05, "loss": 0.0079, "step": 10446 }, { "epoch": 2.022252321981424, "grad_norm": 0.10360097140073776, "learning_rate": 9.142961886491025e-05, "loss": 0.007, "step": 10447 }, { "epoch": 2.0224458204334366, "grad_norm": 0.17529599368572235, "learning_rate": 9.142800803447107e-05, "loss": 0.0076, "step": 10448 }, { "epoch": 2.022639318885449, "grad_norm": 0.08691716194152832, "learning_rate": 9.142639706860154e-05, "loss": 0.0078, "step": 10449 }, { "epoch": 2.0228328173374615, "grad_norm": 0.16229112446308136, "learning_rate": 9.142478596730766e-05, "loss": 0.009, "step": 10450 }, { "epoch": 2.023026315789474, "grad_norm": 0.11244851350784302, "learning_rate": 9.142317473059538e-05, "loss": 0.0072, "step": 10451 }, { "epoch": 2.023219814241486, "grad_norm": 0.13650326430797577, "learning_rate": 9.142156335847073e-05, "loss": 0.0074, "step": 10452 }, { "epoch": 2.0234133126934983, "grad_norm": 0.13743646442890167, "learning_rate": 9.141995185093966e-05, "loss": 0.0077, "step": 10453 }, { "epoch": 2.0236068111455108, "grad_norm": 0.07370878010988235, "learning_rate": 9.14183402080082e-05, "loss": 0.0084, "step": 10454 }, { "epoch": 2.023800309597523, "grad_norm": 0.11287945508956909, "learning_rate": 9.141672842968232e-05, "loss": 0.0064, "step": 10455 }, { "epoch": 2.0239938080495357, "grad_norm": 0.04086604714393616, "learning_rate": 9.141511651596801e-05, "loss": 0.0062, "step": 10456 }, { "epoch": 2.024187306501548, "grad_norm": 0.08445536345243454, "learning_rate": 9.141350446687128e-05, "loss": 0.0082, "step": 10457 }, { "epoch": 2.0243808049535605, "grad_norm": 0.037091612815856934, "learning_rate": 9.14118922823981e-05, "loss": 0.008, "step": 10458 }, { "epoch": 2.0245743034055725, "grad_norm": 0.03798774629831314, "learning_rate": 9.141027996255449e-05, "loss": 0.0078, "step": 10459 }, { "epoch": 2.024767801857585, "grad_norm": 0.0869208499789238, "learning_rate": 9.140866750734642e-05, "loss": 0.0088, "step": 10460 }, { "epoch": 2.0249613003095974, "grad_norm": 0.1622685045003891, "learning_rate": 9.14070549167799e-05, "loss": 0.0089, "step": 10461 }, { "epoch": 2.02515479876161, "grad_norm": 0.06345569342374802, "learning_rate": 9.140544219086094e-05, "loss": 0.0076, "step": 10462 }, { "epoch": 2.0253482972136223, "grad_norm": 0.14759081602096558, "learning_rate": 9.14038293295955e-05, "loss": 0.009, "step": 10463 }, { "epoch": 2.0255417956656347, "grad_norm": 0.1295165866613388, "learning_rate": 9.140221633298958e-05, "loss": 0.0066, "step": 10464 }, { "epoch": 2.025735294117647, "grad_norm": 0.16150431334972382, "learning_rate": 9.140060320104921e-05, "loss": 0.0082, "step": 10465 }, { "epoch": 2.0259287925696596, "grad_norm": 0.13347527384757996, "learning_rate": 9.139898993378036e-05, "loss": 0.0085, "step": 10466 }, { "epoch": 2.0261222910216716, "grad_norm": 0.11276019364595413, "learning_rate": 9.139737653118903e-05, "loss": 0.0077, "step": 10467 }, { "epoch": 2.026315789473684, "grad_norm": 0.15223746001720428, "learning_rate": 9.139576299328121e-05, "loss": 0.0078, "step": 10468 }, { "epoch": 2.0265092879256965, "grad_norm": 0.08796322345733643, "learning_rate": 9.139414932006293e-05, "loss": 0.0065, "step": 10469 }, { "epoch": 2.026702786377709, "grad_norm": 0.13776250183582306, "learning_rate": 9.139253551154018e-05, "loss": 0.0074, "step": 10470 }, { "epoch": 2.0268962848297214, "grad_norm": 0.06746246665716171, "learning_rate": 9.139092156771894e-05, "loss": 0.0064, "step": 10471 }, { "epoch": 2.027089783281734, "grad_norm": 0.12178141623735428, "learning_rate": 9.13893074886052e-05, "loss": 0.0068, "step": 10472 }, { "epoch": 2.0272832817337463, "grad_norm": 0.07980862259864807, "learning_rate": 9.138769327420501e-05, "loss": 0.0067, "step": 10473 }, { "epoch": 2.0274767801857587, "grad_norm": 0.0533013716340065, "learning_rate": 9.138607892452431e-05, "loss": 0.0068, "step": 10474 }, { "epoch": 2.0276702786377707, "grad_norm": 0.15200933814048767, "learning_rate": 9.138446443956918e-05, "loss": 0.0097, "step": 10475 }, { "epoch": 2.027863777089783, "grad_norm": 0.04824869707226753, "learning_rate": 9.138284981934554e-05, "loss": 0.0086, "step": 10476 }, { "epoch": 2.0280572755417956, "grad_norm": 0.13020522892475128, "learning_rate": 9.138123506385943e-05, "loss": 0.0071, "step": 10477 }, { "epoch": 2.028250773993808, "grad_norm": 0.08324270695447922, "learning_rate": 9.137962017311689e-05, "loss": 0.0078, "step": 10478 }, { "epoch": 2.0284442724458205, "grad_norm": 0.08672107011079788, "learning_rate": 9.137800514712385e-05, "loss": 0.0086, "step": 10479 }, { "epoch": 2.028637770897833, "grad_norm": 0.1316012442111969, "learning_rate": 9.137638998588636e-05, "loss": 0.0063, "step": 10480 }, { "epoch": 2.0288312693498454, "grad_norm": 0.05688666179776192, "learning_rate": 9.137477468941041e-05, "loss": 0.0062, "step": 10481 }, { "epoch": 2.0290247678018574, "grad_norm": 0.12996423244476318, "learning_rate": 9.137315925770202e-05, "loss": 0.0091, "step": 10482 }, { "epoch": 2.02921826625387, "grad_norm": 0.08865156024694443, "learning_rate": 9.137154369076717e-05, "loss": 0.0076, "step": 10483 }, { "epoch": 2.0294117647058822, "grad_norm": 0.09794619679450989, "learning_rate": 9.136992798861191e-05, "loss": 0.0067, "step": 10484 }, { "epoch": 2.0296052631578947, "grad_norm": 0.07207522541284561, "learning_rate": 9.13683121512422e-05, "loss": 0.0066, "step": 10485 }, { "epoch": 2.029798761609907, "grad_norm": 0.08298706263303757, "learning_rate": 9.136669617866407e-05, "loss": 0.0074, "step": 10486 }, { "epoch": 2.0299922600619196, "grad_norm": 0.05935594066977501, "learning_rate": 9.136508007088353e-05, "loss": 0.0068, "step": 10487 }, { "epoch": 2.030185758513932, "grad_norm": 0.07779685407876968, "learning_rate": 9.136346382790658e-05, "loss": 0.0074, "step": 10488 }, { "epoch": 2.0303792569659445, "grad_norm": 0.05988746136426926, "learning_rate": 9.136184744973923e-05, "loss": 0.0063, "step": 10489 }, { "epoch": 2.0305727554179565, "grad_norm": 0.10221365094184875, "learning_rate": 9.13602309363875e-05, "loss": 0.0078, "step": 10490 }, { "epoch": 2.030766253869969, "grad_norm": 0.04713049903512001, "learning_rate": 9.135861428785739e-05, "loss": 0.0069, "step": 10491 }, { "epoch": 2.0309597523219813, "grad_norm": 0.10537765175104141, "learning_rate": 9.135699750415489e-05, "loss": 0.0087, "step": 10492 }, { "epoch": 2.031153250773994, "grad_norm": 0.046949539333581924, "learning_rate": 9.135538058528606e-05, "loss": 0.0064, "step": 10493 }, { "epoch": 2.031346749226006, "grad_norm": 0.0921347588300705, "learning_rate": 9.135376353125686e-05, "loss": 0.0072, "step": 10494 }, { "epoch": 2.0315402476780187, "grad_norm": 0.06956400722265244, "learning_rate": 9.135214634207334e-05, "loss": 0.0073, "step": 10495 }, { "epoch": 2.031733746130031, "grad_norm": 0.09539717435836792, "learning_rate": 9.135052901774148e-05, "loss": 0.006, "step": 10496 }, { "epoch": 2.0319272445820435, "grad_norm": 0.0661977082490921, "learning_rate": 9.134891155826733e-05, "loss": 0.0079, "step": 10497 }, { "epoch": 2.0321207430340555, "grad_norm": 0.11746961623430252, "learning_rate": 9.134729396365687e-05, "loss": 0.0075, "step": 10498 }, { "epoch": 2.032314241486068, "grad_norm": 0.038289669901132584, "learning_rate": 9.134567623391614e-05, "loss": 0.0082, "step": 10499 }, { "epoch": 2.0325077399380804, "grad_norm": 0.14518874883651733, "learning_rate": 9.134405836905113e-05, "loss": 0.0084, "step": 10500 }, { "epoch": 2.032701238390093, "grad_norm": 0.07555775344371796, "learning_rate": 9.134244036906788e-05, "loss": 0.0099, "step": 10501 }, { "epoch": 2.0328947368421053, "grad_norm": 0.13582992553710938, "learning_rate": 9.134082223397237e-05, "loss": 0.0073, "step": 10502 }, { "epoch": 2.0330882352941178, "grad_norm": 0.06557620316743851, "learning_rate": 9.133920396377065e-05, "loss": 0.0076, "step": 10503 }, { "epoch": 2.03328173374613, "grad_norm": 0.08508982509374619, "learning_rate": 9.133758555846872e-05, "loss": 0.008, "step": 10504 }, { "epoch": 2.0334752321981426, "grad_norm": 0.07726358622312546, "learning_rate": 9.133596701807261e-05, "loss": 0.0069, "step": 10505 }, { "epoch": 2.0336687306501546, "grad_norm": 0.032572951167821884, "learning_rate": 9.133434834258831e-05, "loss": 0.0073, "step": 10506 }, { "epoch": 2.033862229102167, "grad_norm": 0.1056390032172203, "learning_rate": 9.133272953202188e-05, "loss": 0.0072, "step": 10507 }, { "epoch": 2.0340557275541795, "grad_norm": 0.062423355877399445, "learning_rate": 9.13311105863793e-05, "loss": 0.0061, "step": 10508 }, { "epoch": 2.034249226006192, "grad_norm": 0.06962569057941437, "learning_rate": 9.13294915056666e-05, "loss": 0.0084, "step": 10509 }, { "epoch": 2.0344427244582044, "grad_norm": 0.07373019307851791, "learning_rate": 9.132787228988982e-05, "loss": 0.0074, "step": 10510 }, { "epoch": 2.034636222910217, "grad_norm": 0.05283842980861664, "learning_rate": 9.132625293905494e-05, "loss": 0.0072, "step": 10511 }, { "epoch": 2.0348297213622293, "grad_norm": 0.0653466135263443, "learning_rate": 9.132463345316802e-05, "loss": 0.0064, "step": 10512 }, { "epoch": 2.0350232198142413, "grad_norm": 0.04660427197813988, "learning_rate": 9.132301383223505e-05, "loss": 0.0079, "step": 10513 }, { "epoch": 2.0352167182662537, "grad_norm": 0.06741002202033997, "learning_rate": 9.132139407626206e-05, "loss": 0.0078, "step": 10514 }, { "epoch": 2.035410216718266, "grad_norm": 0.060441937297582626, "learning_rate": 9.131977418525509e-05, "loss": 0.0073, "step": 10515 }, { "epoch": 2.0356037151702786, "grad_norm": 0.04499369487166405, "learning_rate": 9.131815415922015e-05, "loss": 0.0055, "step": 10516 }, { "epoch": 2.035797213622291, "grad_norm": 0.06387386471033096, "learning_rate": 9.131653399816325e-05, "loss": 0.0066, "step": 10517 }, { "epoch": 2.0359907120743035, "grad_norm": 0.06115711107850075, "learning_rate": 9.131491370209044e-05, "loss": 0.0074, "step": 10518 }, { "epoch": 2.036184210526316, "grad_norm": 0.03984272480010986, "learning_rate": 9.131329327100772e-05, "loss": 0.0079, "step": 10519 }, { "epoch": 2.0363777089783284, "grad_norm": 0.055276039987802505, "learning_rate": 9.131167270492115e-05, "loss": 0.0077, "step": 10520 }, { "epoch": 2.0365712074303404, "grad_norm": 0.05902230367064476, "learning_rate": 9.13100520038367e-05, "loss": 0.0077, "step": 10521 }, { "epoch": 2.036764705882353, "grad_norm": 0.07093527913093567, "learning_rate": 9.13084311677604e-05, "loss": 0.008, "step": 10522 }, { "epoch": 2.0369582043343653, "grad_norm": 0.06777907907962799, "learning_rate": 9.130681019669834e-05, "loss": 0.0075, "step": 10523 }, { "epoch": 2.0371517027863777, "grad_norm": 0.07200782746076584, "learning_rate": 9.13051890906565e-05, "loss": 0.0074, "step": 10524 }, { "epoch": 2.03734520123839, "grad_norm": 0.06685257703065872, "learning_rate": 9.13035678496409e-05, "loss": 0.0066, "step": 10525 }, { "epoch": 2.0375386996904026, "grad_norm": 0.05976317450404167, "learning_rate": 9.130194647365758e-05, "loss": 0.0067, "step": 10526 }, { "epoch": 2.037732198142415, "grad_norm": 0.06858784705400467, "learning_rate": 9.130032496271258e-05, "loss": 0.009, "step": 10527 }, { "epoch": 2.0379256965944275, "grad_norm": 0.05491537228226662, "learning_rate": 9.12987033168119e-05, "loss": 0.0075, "step": 10528 }, { "epoch": 2.0381191950464395, "grad_norm": 0.0756908655166626, "learning_rate": 9.129708153596161e-05, "loss": 0.0055, "step": 10529 }, { "epoch": 2.038312693498452, "grad_norm": 0.07914075255393982, "learning_rate": 9.12954596201677e-05, "loss": 0.0069, "step": 10530 }, { "epoch": 2.0385061919504643, "grad_norm": 0.11174323409795761, "learning_rate": 9.12938375694362e-05, "loss": 0.0076, "step": 10531 }, { "epoch": 2.038699690402477, "grad_norm": 0.04931361973285675, "learning_rate": 9.129221538377319e-05, "loss": 0.0081, "step": 10532 }, { "epoch": 2.0388931888544892, "grad_norm": 0.07656268030405045, "learning_rate": 9.129059306318464e-05, "loss": 0.007, "step": 10533 }, { "epoch": 2.0390866873065017, "grad_norm": 0.07385387271642685, "learning_rate": 9.128897060767661e-05, "loss": 0.0074, "step": 10534 }, { "epoch": 2.039280185758514, "grad_norm": 0.06483182311058044, "learning_rate": 9.128734801725513e-05, "loss": 0.0077, "step": 10535 }, { "epoch": 2.039473684210526, "grad_norm": 0.07199906557798386, "learning_rate": 9.128572529192624e-05, "loss": 0.0072, "step": 10536 }, { "epoch": 2.0396671826625385, "grad_norm": 0.06767673790454865, "learning_rate": 9.128410243169595e-05, "loss": 0.0099, "step": 10537 }, { "epoch": 2.039860681114551, "grad_norm": 0.059442389756441116, "learning_rate": 9.128247943657032e-05, "loss": 0.0085, "step": 10538 }, { "epoch": 2.0400541795665634, "grad_norm": 0.051910314708948135, "learning_rate": 9.128085630655537e-05, "loss": 0.0082, "step": 10539 }, { "epoch": 2.040247678018576, "grad_norm": 0.03204558044672012, "learning_rate": 9.127923304165714e-05, "loss": 0.0088, "step": 10540 }, { "epoch": 2.0404411764705883, "grad_norm": 0.05579734593629837, "learning_rate": 9.127760964188164e-05, "loss": 0.0078, "step": 10541 }, { "epoch": 2.0406346749226008, "grad_norm": 0.02943556010723114, "learning_rate": 9.127598610723494e-05, "loss": 0.0069, "step": 10542 }, { "epoch": 2.040828173374613, "grad_norm": 0.05726472660899162, "learning_rate": 9.127436243772307e-05, "loss": 0.0083, "step": 10543 }, { "epoch": 2.041021671826625, "grad_norm": 0.06348226219415665, "learning_rate": 9.127273863335205e-05, "loss": 0.0072, "step": 10544 }, { "epoch": 2.0412151702786376, "grad_norm": 0.057199038565158844, "learning_rate": 9.127111469412792e-05, "loss": 0.0068, "step": 10545 }, { "epoch": 2.04140866873065, "grad_norm": 0.0659763514995575, "learning_rate": 9.126949062005674e-05, "loss": 0.0081, "step": 10546 }, { "epoch": 2.0416021671826625, "grad_norm": 0.06184115260839462, "learning_rate": 9.126786641114452e-05, "loss": 0.0075, "step": 10547 }, { "epoch": 2.041795665634675, "grad_norm": 0.0672585740685463, "learning_rate": 9.126624206739731e-05, "loss": 0.0097, "step": 10548 }, { "epoch": 2.0419891640866874, "grad_norm": 0.07408053427934647, "learning_rate": 9.126461758882116e-05, "loss": 0.0076, "step": 10549 }, { "epoch": 2.0421826625387, "grad_norm": 0.0713500902056694, "learning_rate": 9.126299297542208e-05, "loss": 0.0071, "step": 10550 }, { "epoch": 2.0423761609907123, "grad_norm": 0.08761132508516312, "learning_rate": 9.126136822720614e-05, "loss": 0.008, "step": 10551 }, { "epoch": 2.0425696594427243, "grad_norm": 0.1100916787981987, "learning_rate": 9.125974334417934e-05, "loss": 0.0072, "step": 10552 }, { "epoch": 2.0427631578947367, "grad_norm": 0.10189684480428696, "learning_rate": 9.125811832634778e-05, "loss": 0.0089, "step": 10553 }, { "epoch": 2.042956656346749, "grad_norm": 0.08789476007223129, "learning_rate": 9.125649317371746e-05, "loss": 0.0087, "step": 10554 }, { "epoch": 2.0431501547987616, "grad_norm": 0.07665982842445374, "learning_rate": 9.125486788629444e-05, "loss": 0.0079, "step": 10555 }, { "epoch": 2.043343653250774, "grad_norm": 0.10071875900030136, "learning_rate": 9.125324246408474e-05, "loss": 0.0067, "step": 10556 }, { "epoch": 2.0435371517027865, "grad_norm": 0.0665353387594223, "learning_rate": 9.125161690709443e-05, "loss": 0.0081, "step": 10557 }, { "epoch": 2.043730650154799, "grad_norm": 0.11207877844572067, "learning_rate": 9.124999121532955e-05, "loss": 0.0075, "step": 10558 }, { "epoch": 2.043924148606811, "grad_norm": 0.036643754690885544, "learning_rate": 9.12483653887961e-05, "loss": 0.0088, "step": 10559 }, { "epoch": 2.0441176470588234, "grad_norm": 0.13143986463546753, "learning_rate": 9.124673942750019e-05, "loss": 0.0083, "step": 10560 }, { "epoch": 2.044311145510836, "grad_norm": 0.06022686883807182, "learning_rate": 9.124511333144783e-05, "loss": 0.0063, "step": 10561 }, { "epoch": 2.0445046439628483, "grad_norm": 0.11136866360902786, "learning_rate": 9.124348710064505e-05, "loss": 0.007, "step": 10562 }, { "epoch": 2.0446981424148607, "grad_norm": 0.1073303297162056, "learning_rate": 9.124186073509793e-05, "loss": 0.0083, "step": 10563 }, { "epoch": 2.044891640866873, "grad_norm": 0.07079502195119858, "learning_rate": 9.124023423481251e-05, "loss": 0.0071, "step": 10564 }, { "epoch": 2.0450851393188856, "grad_norm": 0.09597903490066528, "learning_rate": 9.123860759979481e-05, "loss": 0.0076, "step": 10565 }, { "epoch": 2.045278637770898, "grad_norm": 0.051150791347026825, "learning_rate": 9.12369808300509e-05, "loss": 0.0062, "step": 10566 }, { "epoch": 2.04547213622291, "grad_norm": 0.0696256160736084, "learning_rate": 9.123535392558685e-05, "loss": 0.0074, "step": 10567 }, { "epoch": 2.0456656346749225, "grad_norm": 0.0639854297041893, "learning_rate": 9.123372688640866e-05, "loss": 0.008, "step": 10568 }, { "epoch": 2.045859133126935, "grad_norm": 0.06462793052196503, "learning_rate": 9.12320997125224e-05, "loss": 0.0077, "step": 10569 }, { "epoch": 2.0460526315789473, "grad_norm": 0.08811908960342407, "learning_rate": 9.123047240393413e-05, "loss": 0.0071, "step": 10570 }, { "epoch": 2.04624613003096, "grad_norm": 0.07347185909748077, "learning_rate": 9.12288449606499e-05, "loss": 0.0075, "step": 10571 }, { "epoch": 2.0464396284829722, "grad_norm": 0.05582096055150032, "learning_rate": 9.122721738267573e-05, "loss": 0.0082, "step": 10572 }, { "epoch": 2.0466331269349847, "grad_norm": 0.07058893889188766, "learning_rate": 9.122558967001771e-05, "loss": 0.0083, "step": 10573 }, { "epoch": 2.046826625386997, "grad_norm": 0.04776901379227638, "learning_rate": 9.122396182268186e-05, "loss": 0.0076, "step": 10574 }, { "epoch": 2.047020123839009, "grad_norm": 0.07844838500022888, "learning_rate": 9.122233384067426e-05, "loss": 0.0106, "step": 10575 }, { "epoch": 2.0472136222910216, "grad_norm": 0.05841020494699478, "learning_rate": 9.122070572400092e-05, "loss": 0.0082, "step": 10576 }, { "epoch": 2.047407120743034, "grad_norm": 0.061174169182777405, "learning_rate": 9.121907747266795e-05, "loss": 0.0086, "step": 10577 }, { "epoch": 2.0476006191950464, "grad_norm": 0.0906706228852272, "learning_rate": 9.121744908668137e-05, "loss": 0.0073, "step": 10578 }, { "epoch": 2.047794117647059, "grad_norm": 0.04728557541966438, "learning_rate": 9.121582056604724e-05, "loss": 0.0064, "step": 10579 }, { "epoch": 2.0479876160990713, "grad_norm": 0.0765686109662056, "learning_rate": 9.12141919107716e-05, "loss": 0.0066, "step": 10580 }, { "epoch": 2.0481811145510838, "grad_norm": 0.06348040699958801, "learning_rate": 9.121256312086052e-05, "loss": 0.0065, "step": 10581 }, { "epoch": 2.048374613003096, "grad_norm": 0.06685563176870346, "learning_rate": 9.121093419632005e-05, "loss": 0.007, "step": 10582 }, { "epoch": 2.048568111455108, "grad_norm": 0.060432303696870804, "learning_rate": 9.120930513715627e-05, "loss": 0.0087, "step": 10583 }, { "epoch": 2.0487616099071206, "grad_norm": 0.03610781207680702, "learning_rate": 9.120767594337519e-05, "loss": 0.0063, "step": 10584 }, { "epoch": 2.048955108359133, "grad_norm": 0.10901124030351639, "learning_rate": 9.12060466149829e-05, "loss": 0.0075, "step": 10585 }, { "epoch": 2.0491486068111455, "grad_norm": 0.03856517747044563, "learning_rate": 9.120441715198547e-05, "loss": 0.0074, "step": 10586 }, { "epoch": 2.049342105263158, "grad_norm": 0.062141302973032, "learning_rate": 9.120278755438893e-05, "loss": 0.0078, "step": 10587 }, { "epoch": 2.0495356037151704, "grad_norm": 0.04209108278155327, "learning_rate": 9.120115782219933e-05, "loss": 0.0086, "step": 10588 }, { "epoch": 2.049729102167183, "grad_norm": 0.05348511412739754, "learning_rate": 9.119952795542275e-05, "loss": 0.0071, "step": 10589 }, { "epoch": 2.049922600619195, "grad_norm": 0.05062698572874069, "learning_rate": 9.119789795406524e-05, "loss": 0.0086, "step": 10590 }, { "epoch": 2.0501160990712073, "grad_norm": 0.07953661680221558, "learning_rate": 9.119626781813289e-05, "loss": 0.0065, "step": 10591 }, { "epoch": 2.0503095975232197, "grad_norm": 0.05780719965696335, "learning_rate": 9.119463754763171e-05, "loss": 0.0065, "step": 10592 }, { "epoch": 2.050503095975232, "grad_norm": 0.08023886382579803, "learning_rate": 9.11930071425678e-05, "loss": 0.0083, "step": 10593 }, { "epoch": 2.0506965944272446, "grad_norm": 0.07474980503320694, "learning_rate": 9.119137660294722e-05, "loss": 0.008, "step": 10594 }, { "epoch": 2.050890092879257, "grad_norm": 0.098291777074337, "learning_rate": 9.118974592877599e-05, "loss": 0.007, "step": 10595 }, { "epoch": 2.0510835913312695, "grad_norm": 0.07686963677406311, "learning_rate": 9.118811512006021e-05, "loss": 0.0082, "step": 10596 }, { "epoch": 2.051277089783282, "grad_norm": 0.09802678972482681, "learning_rate": 9.118648417680594e-05, "loss": 0.0073, "step": 10597 }, { "epoch": 2.051470588235294, "grad_norm": 0.11005789041519165, "learning_rate": 9.118485309901926e-05, "loss": 0.0078, "step": 10598 }, { "epoch": 2.0516640866873064, "grad_norm": 0.0905848890542984, "learning_rate": 9.118322188670618e-05, "loss": 0.0062, "step": 10599 }, { "epoch": 2.051857585139319, "grad_norm": 0.09361842274665833, "learning_rate": 9.118159053987281e-05, "loss": 0.006, "step": 10600 }, { "epoch": 2.0520510835913313, "grad_norm": 0.036563027650117874, "learning_rate": 9.11799590585252e-05, "loss": 0.0075, "step": 10601 }, { "epoch": 2.0522445820433437, "grad_norm": 0.0980067253112793, "learning_rate": 9.117832744266941e-05, "loss": 0.0068, "step": 10602 }, { "epoch": 2.052438080495356, "grad_norm": 0.05540990084409714, "learning_rate": 9.117669569231153e-05, "loss": 0.0064, "step": 10603 }, { "epoch": 2.0526315789473686, "grad_norm": 0.08990228176116943, "learning_rate": 9.11750638074576e-05, "loss": 0.0073, "step": 10604 }, { "epoch": 2.0528250773993806, "grad_norm": 0.11142203956842422, "learning_rate": 9.117343178811369e-05, "loss": 0.0071, "step": 10605 }, { "epoch": 2.053018575851393, "grad_norm": 0.10102715343236923, "learning_rate": 9.117179963428588e-05, "loss": 0.0084, "step": 10606 }, { "epoch": 2.0532120743034055, "grad_norm": 0.06442436575889587, "learning_rate": 9.117016734598022e-05, "loss": 0.0075, "step": 10607 }, { "epoch": 2.053405572755418, "grad_norm": 0.05002020671963692, "learning_rate": 9.116853492320281e-05, "loss": 0.0073, "step": 10608 }, { "epoch": 2.0535990712074303, "grad_norm": 0.04771725833415985, "learning_rate": 9.116690236595969e-05, "loss": 0.0072, "step": 10609 }, { "epoch": 2.053792569659443, "grad_norm": 0.03373739868402481, "learning_rate": 9.116526967425695e-05, "loss": 0.0066, "step": 10610 }, { "epoch": 2.0539860681114552, "grad_norm": 0.03878588601946831, "learning_rate": 9.116363684810064e-05, "loss": 0.0069, "step": 10611 }, { "epoch": 2.0541795665634677, "grad_norm": 0.047609344124794006, "learning_rate": 9.116200388749683e-05, "loss": 0.0076, "step": 10612 }, { "epoch": 2.0543730650154797, "grad_norm": 0.05290835350751877, "learning_rate": 9.116037079245161e-05, "loss": 0.0093, "step": 10613 }, { "epoch": 2.054566563467492, "grad_norm": 0.04336028918623924, "learning_rate": 9.115873756297104e-05, "loss": 0.0064, "step": 10614 }, { "epoch": 2.0547600619195046, "grad_norm": 0.0651930421590805, "learning_rate": 9.11571041990612e-05, "loss": 0.0099, "step": 10615 }, { "epoch": 2.054953560371517, "grad_norm": 0.06825646013021469, "learning_rate": 9.115547070072815e-05, "loss": 0.008, "step": 10616 }, { "epoch": 2.0551470588235294, "grad_norm": 0.08509396761655807, "learning_rate": 9.115383706797796e-05, "loss": 0.008, "step": 10617 }, { "epoch": 2.055340557275542, "grad_norm": 0.06972216814756393, "learning_rate": 9.115220330081672e-05, "loss": 0.0061, "step": 10618 }, { "epoch": 2.0555340557275543, "grad_norm": 0.1013200432062149, "learning_rate": 9.11505693992505e-05, "loss": 0.0092, "step": 10619 }, { "epoch": 2.0557275541795668, "grad_norm": 0.06181617081165314, "learning_rate": 9.114893536328536e-05, "loss": 0.006, "step": 10620 }, { "epoch": 2.0559210526315788, "grad_norm": 0.08184211701154709, "learning_rate": 9.11473011929274e-05, "loss": 0.0077, "step": 10621 }, { "epoch": 2.056114551083591, "grad_norm": 0.04947593808174133, "learning_rate": 9.114566688818267e-05, "loss": 0.0064, "step": 10622 }, { "epoch": 2.0563080495356036, "grad_norm": 0.05578083172440529, "learning_rate": 9.114403244905726e-05, "loss": 0.0071, "step": 10623 }, { "epoch": 2.056501547987616, "grad_norm": 0.08442258834838867, "learning_rate": 9.114239787555725e-05, "loss": 0.0067, "step": 10624 }, { "epoch": 2.0566950464396285, "grad_norm": 0.07943593710660934, "learning_rate": 9.11407631676887e-05, "loss": 0.0081, "step": 10625 }, { "epoch": 2.056888544891641, "grad_norm": 0.06559396535158157, "learning_rate": 9.113912832545772e-05, "loss": 0.0061, "step": 10626 }, { "epoch": 2.0570820433436534, "grad_norm": 0.13351315259933472, "learning_rate": 9.113749334887033e-05, "loss": 0.0076, "step": 10627 }, { "epoch": 2.057275541795666, "grad_norm": 0.05595134198665619, "learning_rate": 9.113585823793265e-05, "loss": 0.0082, "step": 10628 }, { "epoch": 2.057469040247678, "grad_norm": 0.13980120420455933, "learning_rate": 9.113422299265078e-05, "loss": 0.0066, "step": 10629 }, { "epoch": 2.0576625386996903, "grad_norm": 0.08932948857545853, "learning_rate": 9.113258761303075e-05, "loss": 0.0085, "step": 10630 }, { "epoch": 2.0578560371517027, "grad_norm": 0.11564666777849197, "learning_rate": 9.113095209907868e-05, "loss": 0.0056, "step": 10631 }, { "epoch": 2.058049535603715, "grad_norm": 0.12101396918296814, "learning_rate": 9.112931645080062e-05, "loss": 0.0074, "step": 10632 }, { "epoch": 2.0582430340557276, "grad_norm": 0.06658568978309631, "learning_rate": 9.112768066820268e-05, "loss": 0.0077, "step": 10633 }, { "epoch": 2.05843653250774, "grad_norm": 0.14036953449249268, "learning_rate": 9.112604475129091e-05, "loss": 0.0078, "step": 10634 }, { "epoch": 2.0586300309597525, "grad_norm": 0.0661991760134697, "learning_rate": 9.11244087000714e-05, "loss": 0.0069, "step": 10635 }, { "epoch": 2.0588235294117645, "grad_norm": 0.08905144035816193, "learning_rate": 9.112277251455025e-05, "loss": 0.007, "step": 10636 }, { "epoch": 2.059017027863777, "grad_norm": 0.09906969219446182, "learning_rate": 9.112113619473353e-05, "loss": 0.0072, "step": 10637 }, { "epoch": 2.0592105263157894, "grad_norm": 0.057043351233005524, "learning_rate": 9.111949974062733e-05, "loss": 0.0075, "step": 10638 }, { "epoch": 2.059404024767802, "grad_norm": 0.09640537202358246, "learning_rate": 9.111786315223774e-05, "loss": 0.0067, "step": 10639 }, { "epoch": 2.0595975232198143, "grad_norm": 0.04369659349322319, "learning_rate": 9.111622642957081e-05, "loss": 0.0076, "step": 10640 }, { "epoch": 2.0597910216718267, "grad_norm": 0.09817982465028763, "learning_rate": 9.111458957263268e-05, "loss": 0.0067, "step": 10641 }, { "epoch": 2.059984520123839, "grad_norm": 0.06408584117889404, "learning_rate": 9.111295258142939e-05, "loss": 0.0074, "step": 10642 }, { "epoch": 2.0601780185758516, "grad_norm": 0.05829407647252083, "learning_rate": 9.111131545596703e-05, "loss": 0.006, "step": 10643 }, { "epoch": 2.0603715170278636, "grad_norm": 0.09743990004062653, "learning_rate": 9.110967819625172e-05, "loss": 0.0073, "step": 10644 }, { "epoch": 2.060565015479876, "grad_norm": 0.029598385095596313, "learning_rate": 9.11080408022895e-05, "loss": 0.0065, "step": 10645 }, { "epoch": 2.0607585139318885, "grad_norm": 0.09989026933908463, "learning_rate": 9.110640327408648e-05, "loss": 0.0083, "step": 10646 }, { "epoch": 2.060952012383901, "grad_norm": 0.04359870404005051, "learning_rate": 9.110476561164877e-05, "loss": 0.006, "step": 10647 }, { "epoch": 2.0611455108359134, "grad_norm": 0.05177367851138115, "learning_rate": 9.110312781498242e-05, "loss": 0.0075, "step": 10648 }, { "epoch": 2.061339009287926, "grad_norm": 0.07327651977539062, "learning_rate": 9.110148988409354e-05, "loss": 0.0074, "step": 10649 }, { "epoch": 2.0615325077399382, "grad_norm": 0.03389512747526169, "learning_rate": 9.109985181898823e-05, "loss": 0.0076, "step": 10650 }, { "epoch": 2.0617260061919507, "grad_norm": 0.05967951938509941, "learning_rate": 9.109821361967256e-05, "loss": 0.0079, "step": 10651 }, { "epoch": 2.0619195046439627, "grad_norm": 0.031683553010225296, "learning_rate": 9.109657528615261e-05, "loss": 0.0065, "step": 10652 }, { "epoch": 2.062113003095975, "grad_norm": 0.06578846275806427, "learning_rate": 9.109493681843452e-05, "loss": 0.0085, "step": 10653 }, { "epoch": 2.0623065015479876, "grad_norm": 0.030459245666861534, "learning_rate": 9.109329821652433e-05, "loss": 0.0071, "step": 10654 }, { "epoch": 2.0625, "grad_norm": 0.06839096546173096, "learning_rate": 9.109165948042815e-05, "loss": 0.0087, "step": 10655 }, { "epoch": 2.0626934984520124, "grad_norm": 0.039152685552835464, "learning_rate": 9.109002061015208e-05, "loss": 0.0067, "step": 10656 }, { "epoch": 2.062886996904025, "grad_norm": 0.058363210409879684, "learning_rate": 9.108838160570219e-05, "loss": 0.0062, "step": 10657 }, { "epoch": 2.0630804953560373, "grad_norm": 0.04463045671582222, "learning_rate": 9.10867424670846e-05, "loss": 0.0064, "step": 10658 }, { "epoch": 2.0632739938080493, "grad_norm": 0.05375517159700394, "learning_rate": 9.10851031943054e-05, "loss": 0.0073, "step": 10659 }, { "epoch": 2.0634674922600618, "grad_norm": 0.07504244148731232, "learning_rate": 9.108346378737067e-05, "loss": 0.0082, "step": 10660 }, { "epoch": 2.063660990712074, "grad_norm": 0.11203771084547043, "learning_rate": 9.10818242462865e-05, "loss": 0.0069, "step": 10661 }, { "epoch": 2.0638544891640866, "grad_norm": 0.04635682329535484, "learning_rate": 9.108018457105904e-05, "loss": 0.0076, "step": 10662 }, { "epoch": 2.064047987616099, "grad_norm": 0.12796612083911896, "learning_rate": 9.10785447616943e-05, "loss": 0.0071, "step": 10663 }, { "epoch": 2.0642414860681115, "grad_norm": 0.06528569757938385, "learning_rate": 9.107690481819844e-05, "loss": 0.0063, "step": 10664 }, { "epoch": 2.064434984520124, "grad_norm": 0.11879462003707886, "learning_rate": 9.107526474057754e-05, "loss": 0.0069, "step": 10665 }, { "epoch": 2.0646284829721364, "grad_norm": 0.08673668652772903, "learning_rate": 9.107362452883769e-05, "loss": 0.0076, "step": 10666 }, { "epoch": 2.0648219814241484, "grad_norm": 0.08635861426591873, "learning_rate": 9.1071984182985e-05, "loss": 0.0081, "step": 10667 }, { "epoch": 2.065015479876161, "grad_norm": 0.11155630648136139, "learning_rate": 9.107034370302553e-05, "loss": 0.0066, "step": 10668 }, { "epoch": 2.0652089783281733, "grad_norm": 0.04844324663281441, "learning_rate": 9.106870308896544e-05, "loss": 0.0074, "step": 10669 }, { "epoch": 2.0654024767801857, "grad_norm": 0.12473966181278229, "learning_rate": 9.10670623408108e-05, "loss": 0.0077, "step": 10670 }, { "epoch": 2.065595975232198, "grad_norm": 0.04140456020832062, "learning_rate": 9.106542145856771e-05, "loss": 0.0077, "step": 10671 }, { "epoch": 2.0657894736842106, "grad_norm": 0.11348486691713333, "learning_rate": 9.106378044224226e-05, "loss": 0.0082, "step": 10672 }, { "epoch": 2.065982972136223, "grad_norm": 0.07427427172660828, "learning_rate": 9.106213929184057e-05, "loss": 0.0094, "step": 10673 }, { "epoch": 2.0661764705882355, "grad_norm": 0.0968848168849945, "learning_rate": 9.106049800736871e-05, "loss": 0.0073, "step": 10674 }, { "epoch": 2.0663699690402475, "grad_norm": 0.1030413880944252, "learning_rate": 9.105885658883282e-05, "loss": 0.0086, "step": 10675 }, { "epoch": 2.06656346749226, "grad_norm": 0.08679131418466568, "learning_rate": 9.1057215036239e-05, "loss": 0.0066, "step": 10676 }, { "epoch": 2.0667569659442724, "grad_norm": 0.09187090396881104, "learning_rate": 9.105557334959331e-05, "loss": 0.0076, "step": 10677 }, { "epoch": 2.066950464396285, "grad_norm": 0.04614805430173874, "learning_rate": 9.10539315289019e-05, "loss": 0.0064, "step": 10678 }, { "epoch": 2.0671439628482973, "grad_norm": 0.08962567150592804, "learning_rate": 9.105228957417086e-05, "loss": 0.0085, "step": 10679 }, { "epoch": 2.0673374613003097, "grad_norm": 0.03525229170918465, "learning_rate": 9.105064748540626e-05, "loss": 0.0072, "step": 10680 }, { "epoch": 2.067530959752322, "grad_norm": 0.07135497033596039, "learning_rate": 9.104900526261428e-05, "loss": 0.0061, "step": 10681 }, { "epoch": 2.067724458204334, "grad_norm": 0.04372555762529373, "learning_rate": 9.104736290580096e-05, "loss": 0.0083, "step": 10682 }, { "epoch": 2.0679179566563466, "grad_norm": 0.07843002676963806, "learning_rate": 9.104572041497242e-05, "loss": 0.0061, "step": 10683 }, { "epoch": 2.068111455108359, "grad_norm": 0.04239920526742935, "learning_rate": 9.10440777901348e-05, "loss": 0.006, "step": 10684 }, { "epoch": 2.0683049535603715, "grad_norm": 0.07832345366477966, "learning_rate": 9.104243503129416e-05, "loss": 0.0062, "step": 10685 }, { "epoch": 2.068498452012384, "grad_norm": 0.047815095633268356, "learning_rate": 9.104079213845663e-05, "loss": 0.0063, "step": 10686 }, { "epoch": 2.0686919504643964, "grad_norm": 0.06348393112421036, "learning_rate": 9.103914911162833e-05, "loss": 0.0067, "step": 10687 }, { "epoch": 2.068885448916409, "grad_norm": 0.03930272161960602, "learning_rate": 9.103750595081533e-05, "loss": 0.0091, "step": 10688 }, { "epoch": 2.0690789473684212, "grad_norm": 0.05486065521836281, "learning_rate": 9.103586265602376e-05, "loss": 0.006, "step": 10689 }, { "epoch": 2.0692724458204332, "grad_norm": 0.042248815298080444, "learning_rate": 9.103421922725974e-05, "loss": 0.0065, "step": 10690 }, { "epoch": 2.0694659442724457, "grad_norm": 0.0721910297870636, "learning_rate": 9.10325756645294e-05, "loss": 0.0062, "step": 10691 }, { "epoch": 2.069659442724458, "grad_norm": 0.06586319208145142, "learning_rate": 9.103093196783879e-05, "loss": 0.0068, "step": 10692 }, { "epoch": 2.0698529411764706, "grad_norm": 0.06712901592254639, "learning_rate": 9.102928813719405e-05, "loss": 0.0086, "step": 10693 }, { "epoch": 2.070046439628483, "grad_norm": 0.0381912924349308, "learning_rate": 9.102764417260131e-05, "loss": 0.0081, "step": 10694 }, { "epoch": 2.0702399380804954, "grad_norm": 0.08793003112077713, "learning_rate": 9.102600007406667e-05, "loss": 0.0085, "step": 10695 }, { "epoch": 2.070433436532508, "grad_norm": 0.07858463376760483, "learning_rate": 9.102435584159622e-05, "loss": 0.0069, "step": 10696 }, { "epoch": 2.0706269349845203, "grad_norm": 0.10577207058668137, "learning_rate": 9.102271147519611e-05, "loss": 0.0087, "step": 10697 }, { "epoch": 2.0708204334365323, "grad_norm": 0.112598717212677, "learning_rate": 9.102106697487242e-05, "loss": 0.0065, "step": 10698 }, { "epoch": 2.0710139318885448, "grad_norm": 0.0768025666475296, "learning_rate": 9.101942234063129e-05, "loss": 0.0089, "step": 10699 }, { "epoch": 2.071207430340557, "grad_norm": 0.12969602644443512, "learning_rate": 9.101777757247882e-05, "loss": 0.0075, "step": 10700 }, { "epoch": 2.0714009287925697, "grad_norm": 0.09010517597198486, "learning_rate": 9.101613267042113e-05, "loss": 0.0092, "step": 10701 }, { "epoch": 2.071594427244582, "grad_norm": 0.11120164394378662, "learning_rate": 9.101448763446434e-05, "loss": 0.0074, "step": 10702 }, { "epoch": 2.0717879256965945, "grad_norm": 0.09107846021652222, "learning_rate": 9.101284246461455e-05, "loss": 0.0097, "step": 10703 }, { "epoch": 2.071981424148607, "grad_norm": 0.09822022914886475, "learning_rate": 9.101119716087787e-05, "loss": 0.0066, "step": 10704 }, { "epoch": 2.0721749226006194, "grad_norm": 0.09561119973659515, "learning_rate": 9.100955172326046e-05, "loss": 0.0087, "step": 10705 }, { "epoch": 2.0723684210526314, "grad_norm": 0.12963812053203583, "learning_rate": 9.100790615176838e-05, "loss": 0.0072, "step": 10706 }, { "epoch": 2.072561919504644, "grad_norm": 0.08581326901912689, "learning_rate": 9.100626044640781e-05, "loss": 0.0093, "step": 10707 }, { "epoch": 2.0727554179566563, "grad_norm": 0.15034723281860352, "learning_rate": 9.100461460718482e-05, "loss": 0.0068, "step": 10708 }, { "epoch": 2.0729489164086687, "grad_norm": 0.15227101743221283, "learning_rate": 9.100296863410554e-05, "loss": 0.0068, "step": 10709 }, { "epoch": 2.073142414860681, "grad_norm": 0.1770058423280716, "learning_rate": 9.100132252717611e-05, "loss": 0.0086, "step": 10710 }, { "epoch": 2.0733359133126936, "grad_norm": 0.09767665714025497, "learning_rate": 9.099967628640261e-05, "loss": 0.0082, "step": 10711 }, { "epoch": 2.073529411764706, "grad_norm": 0.13260389864444733, "learning_rate": 9.099802991179121e-05, "loss": 0.0062, "step": 10712 }, { "epoch": 2.073722910216718, "grad_norm": 0.10941912978887558, "learning_rate": 9.099638340334799e-05, "loss": 0.0062, "step": 10713 }, { "epoch": 2.0739164086687305, "grad_norm": 0.09612511843442917, "learning_rate": 9.099473676107908e-05, "loss": 0.008, "step": 10714 }, { "epoch": 2.074109907120743, "grad_norm": 0.12789469957351685, "learning_rate": 9.099308998499062e-05, "loss": 0.0067, "step": 10715 }, { "epoch": 2.0743034055727554, "grad_norm": 0.06618691235780716, "learning_rate": 9.099144307508871e-05, "loss": 0.007, "step": 10716 }, { "epoch": 2.074496904024768, "grad_norm": 0.12965761125087738, "learning_rate": 9.098979603137949e-05, "loss": 0.0088, "step": 10717 }, { "epoch": 2.0746904024767803, "grad_norm": 0.09122252464294434, "learning_rate": 9.098814885386908e-05, "loss": 0.0084, "step": 10718 }, { "epoch": 2.0748839009287927, "grad_norm": 0.09191472828388214, "learning_rate": 9.098650154256361e-05, "loss": 0.0074, "step": 10719 }, { "epoch": 2.075077399380805, "grad_norm": 0.0878332108259201, "learning_rate": 9.098485409746918e-05, "loss": 0.0068, "step": 10720 }, { "epoch": 2.075270897832817, "grad_norm": 0.12349986284971237, "learning_rate": 9.098320651859192e-05, "loss": 0.0078, "step": 10721 }, { "epoch": 2.0754643962848296, "grad_norm": 0.08999848365783691, "learning_rate": 9.098155880593797e-05, "loss": 0.0055, "step": 10722 }, { "epoch": 2.075657894736842, "grad_norm": 0.09358151257038116, "learning_rate": 9.097991095951345e-05, "loss": 0.0064, "step": 10723 }, { "epoch": 2.0758513931888545, "grad_norm": 0.0840778723359108, "learning_rate": 9.097826297932451e-05, "loss": 0.0072, "step": 10724 }, { "epoch": 2.076044891640867, "grad_norm": 0.07794590294361115, "learning_rate": 9.097661486537723e-05, "loss": 0.0081, "step": 10725 }, { "epoch": 2.0762383900928794, "grad_norm": 0.1209757998585701, "learning_rate": 9.097496661767777e-05, "loss": 0.0068, "step": 10726 }, { "epoch": 2.076431888544892, "grad_norm": 0.06995218247175217, "learning_rate": 9.097331823623225e-05, "loss": 0.0079, "step": 10727 }, { "epoch": 2.076625386996904, "grad_norm": 0.1481991708278656, "learning_rate": 9.097166972104679e-05, "loss": 0.0062, "step": 10728 }, { "epoch": 2.0768188854489162, "grad_norm": 0.07076270878314972, "learning_rate": 9.097002107212751e-05, "loss": 0.0064, "step": 10729 }, { "epoch": 2.0770123839009287, "grad_norm": 0.11312491446733475, "learning_rate": 9.096837228948059e-05, "loss": 0.0069, "step": 10730 }, { "epoch": 2.077205882352941, "grad_norm": 0.10243047773838043, "learning_rate": 9.096672337311211e-05, "loss": 0.0071, "step": 10731 }, { "epoch": 2.0773993808049536, "grad_norm": 0.08658270537853241, "learning_rate": 9.096507432302821e-05, "loss": 0.0065, "step": 10732 }, { "epoch": 2.077592879256966, "grad_norm": 0.11489764600992203, "learning_rate": 9.096342513923503e-05, "loss": 0.0067, "step": 10733 }, { "epoch": 2.0777863777089784, "grad_norm": 0.034846849739551544, "learning_rate": 9.096177582173869e-05, "loss": 0.0085, "step": 10734 }, { "epoch": 2.077979876160991, "grad_norm": 0.11190428584814072, "learning_rate": 9.096012637054533e-05, "loss": 0.0051, "step": 10735 }, { "epoch": 2.078173374613003, "grad_norm": 0.07059787958860397, "learning_rate": 9.09584767856611e-05, "loss": 0.0073, "step": 10736 }, { "epoch": 2.0783668730650153, "grad_norm": 0.08665793389081955, "learning_rate": 9.095682706709207e-05, "loss": 0.0067, "step": 10737 }, { "epoch": 2.0785603715170278, "grad_norm": 0.10173623263835907, "learning_rate": 9.095517721484446e-05, "loss": 0.007, "step": 10738 }, { "epoch": 2.07875386996904, "grad_norm": 0.07017098367214203, "learning_rate": 9.095352722892435e-05, "loss": 0.0065, "step": 10739 }, { "epoch": 2.0789473684210527, "grad_norm": 0.09698083251714706, "learning_rate": 9.095187710933788e-05, "loss": 0.009, "step": 10740 }, { "epoch": 2.079140866873065, "grad_norm": 0.07488100975751877, "learning_rate": 9.095022685609118e-05, "loss": 0.007, "step": 10741 }, { "epoch": 2.0793343653250775, "grad_norm": 0.04763054847717285, "learning_rate": 9.09485764691904e-05, "loss": 0.0064, "step": 10742 }, { "epoch": 2.07952786377709, "grad_norm": 0.05015529692173004, "learning_rate": 9.094692594864169e-05, "loss": 0.0071, "step": 10743 }, { "epoch": 2.079721362229102, "grad_norm": 0.03238946571946144, "learning_rate": 9.094527529445115e-05, "loss": 0.0073, "step": 10744 }, { "epoch": 2.0799148606811144, "grad_norm": 0.034448832273483276, "learning_rate": 9.094362450662493e-05, "loss": 0.0085, "step": 10745 }, { "epoch": 2.080108359133127, "grad_norm": 0.046234939247369766, "learning_rate": 9.094197358516919e-05, "loss": 0.0065, "step": 10746 }, { "epoch": 2.0803018575851393, "grad_norm": 0.0535091906785965, "learning_rate": 9.094032253009003e-05, "loss": 0.0087, "step": 10747 }, { "epoch": 2.0804953560371517, "grad_norm": 0.07783665508031845, "learning_rate": 9.093867134139361e-05, "loss": 0.0075, "step": 10748 }, { "epoch": 2.080688854489164, "grad_norm": 0.07593033462762833, "learning_rate": 9.093702001908607e-05, "loss": 0.0076, "step": 10749 }, { "epoch": 2.0808823529411766, "grad_norm": 0.07697049528360367, "learning_rate": 9.093536856317353e-05, "loss": 0.0076, "step": 10750 }, { "epoch": 2.081075851393189, "grad_norm": 0.11310774087905884, "learning_rate": 9.093371697366217e-05, "loss": 0.0076, "step": 10751 }, { "epoch": 2.081269349845201, "grad_norm": 0.0958874523639679, "learning_rate": 9.093206525055808e-05, "loss": 0.0071, "step": 10752 }, { "epoch": 2.0814628482972135, "grad_norm": 0.1252707540988922, "learning_rate": 9.093041339386744e-05, "loss": 0.0069, "step": 10753 }, { "epoch": 2.081656346749226, "grad_norm": 0.07401866465806961, "learning_rate": 9.092876140359638e-05, "loss": 0.0088, "step": 10754 }, { "epoch": 2.0818498452012384, "grad_norm": 0.1715371161699295, "learning_rate": 9.092710927975103e-05, "loss": 0.0088, "step": 10755 }, { "epoch": 2.082043343653251, "grad_norm": 0.07590008527040482, "learning_rate": 9.092545702233754e-05, "loss": 0.0063, "step": 10756 }, { "epoch": 2.0822368421052633, "grad_norm": 0.18437276780605316, "learning_rate": 9.092380463136206e-05, "loss": 0.006, "step": 10757 }, { "epoch": 2.0824303405572757, "grad_norm": 0.11325359344482422, "learning_rate": 9.092215210683072e-05, "loss": 0.0074, "step": 10758 }, { "epoch": 2.0826238390092877, "grad_norm": 0.1469622701406479, "learning_rate": 9.092049944874967e-05, "loss": 0.0074, "step": 10759 }, { "epoch": 2.0828173374613, "grad_norm": 0.14381542801856995, "learning_rate": 9.091884665712505e-05, "loss": 0.0084, "step": 10760 }, { "epoch": 2.0830108359133126, "grad_norm": 0.10174600034952164, "learning_rate": 9.091719373196302e-05, "loss": 0.0077, "step": 10761 }, { "epoch": 2.083204334365325, "grad_norm": 0.14459668099880219, "learning_rate": 9.091554067326971e-05, "loss": 0.0069, "step": 10762 }, { "epoch": 2.0833978328173375, "grad_norm": 0.08582882583141327, "learning_rate": 9.091388748105127e-05, "loss": 0.006, "step": 10763 }, { "epoch": 2.08359133126935, "grad_norm": 0.11785311996936798, "learning_rate": 9.091223415531383e-05, "loss": 0.007, "step": 10764 }, { "epoch": 2.0837848297213624, "grad_norm": 0.09750804305076599, "learning_rate": 9.091058069606358e-05, "loss": 0.0079, "step": 10765 }, { "epoch": 2.083978328173375, "grad_norm": 0.11733252555131912, "learning_rate": 9.090892710330662e-05, "loss": 0.0073, "step": 10766 }, { "epoch": 2.084171826625387, "grad_norm": 0.10532572865486145, "learning_rate": 9.090727337704912e-05, "loss": 0.0073, "step": 10767 }, { "epoch": 2.0843653250773992, "grad_norm": 0.1010155901312828, "learning_rate": 9.090561951729723e-05, "loss": 0.0069, "step": 10768 }, { "epoch": 2.0845588235294117, "grad_norm": 0.12993073463439941, "learning_rate": 9.090396552405709e-05, "loss": 0.0079, "step": 10769 }, { "epoch": 2.084752321981424, "grad_norm": 0.05379222705960274, "learning_rate": 9.090231139733484e-05, "loss": 0.0076, "step": 10770 }, { "epoch": 2.0849458204334366, "grad_norm": 0.12385726720094681, "learning_rate": 9.090065713713667e-05, "loss": 0.0082, "step": 10771 }, { "epoch": 2.085139318885449, "grad_norm": 0.08231523633003235, "learning_rate": 9.089900274346869e-05, "loss": 0.0085, "step": 10772 }, { "epoch": 2.0853328173374615, "grad_norm": 0.15543332695960999, "learning_rate": 9.089734821633707e-05, "loss": 0.0083, "step": 10773 }, { "epoch": 2.085526315789474, "grad_norm": 0.0692211389541626, "learning_rate": 9.089569355574794e-05, "loss": 0.0082, "step": 10774 }, { "epoch": 2.085719814241486, "grad_norm": 0.036279670894145966, "learning_rate": 9.089403876170748e-05, "loss": 0.007, "step": 10775 }, { "epoch": 2.0859133126934983, "grad_norm": 0.061364829540252686, "learning_rate": 9.089238383422182e-05, "loss": 0.0089, "step": 10776 }, { "epoch": 2.0861068111455108, "grad_norm": 0.05632028728723526, "learning_rate": 9.089072877329713e-05, "loss": 0.0065, "step": 10777 }, { "epoch": 2.086300309597523, "grad_norm": 0.044826239347457886, "learning_rate": 9.088907357893955e-05, "loss": 0.0079, "step": 10778 }, { "epoch": 2.0864938080495357, "grad_norm": 0.04873053729534149, "learning_rate": 9.088741825115523e-05, "loss": 0.0064, "step": 10779 }, { "epoch": 2.086687306501548, "grad_norm": 0.05581695958971977, "learning_rate": 9.088576278995035e-05, "loss": 0.0068, "step": 10780 }, { "epoch": 2.0868808049535605, "grad_norm": 0.044991184026002884, "learning_rate": 9.088410719533103e-05, "loss": 0.0065, "step": 10781 }, { "epoch": 2.087074303405573, "grad_norm": 0.05840127170085907, "learning_rate": 9.088245146730344e-05, "loss": 0.0072, "step": 10782 }, { "epoch": 2.087267801857585, "grad_norm": 0.03092566691339016, "learning_rate": 9.088079560587374e-05, "loss": 0.007, "step": 10783 }, { "epoch": 2.0874613003095974, "grad_norm": 0.05310194194316864, "learning_rate": 9.087913961104809e-05, "loss": 0.0088, "step": 10784 }, { "epoch": 2.08765479876161, "grad_norm": 0.042791496962308884, "learning_rate": 9.087748348283263e-05, "loss": 0.0088, "step": 10785 }, { "epoch": 2.0878482972136223, "grad_norm": 0.04114188998937607, "learning_rate": 9.087582722123353e-05, "loss": 0.0065, "step": 10786 }, { "epoch": 2.0880417956656347, "grad_norm": 0.03743478283286095, "learning_rate": 9.087417082625696e-05, "loss": 0.0087, "step": 10787 }, { "epoch": 2.088235294117647, "grad_norm": 0.06587325036525726, "learning_rate": 9.087251429790904e-05, "loss": 0.0074, "step": 10788 }, { "epoch": 2.0884287925696596, "grad_norm": 0.03713512420654297, "learning_rate": 9.087085763619596e-05, "loss": 0.0078, "step": 10789 }, { "epoch": 2.0886222910216716, "grad_norm": 0.06114136427640915, "learning_rate": 9.086920084112387e-05, "loss": 0.0079, "step": 10790 }, { "epoch": 2.088815789473684, "grad_norm": 0.03886472433805466, "learning_rate": 9.086754391269893e-05, "loss": 0.0081, "step": 10791 }, { "epoch": 2.0890092879256965, "grad_norm": 0.0733465626835823, "learning_rate": 9.086588685092731e-05, "loss": 0.0068, "step": 10792 }, { "epoch": 2.089202786377709, "grad_norm": 0.042199358344078064, "learning_rate": 9.086422965581515e-05, "loss": 0.0066, "step": 10793 }, { "epoch": 2.0893962848297214, "grad_norm": 0.05463041737675667, "learning_rate": 9.086257232736862e-05, "loss": 0.0063, "step": 10794 }, { "epoch": 2.089589783281734, "grad_norm": 0.04246184229850769, "learning_rate": 9.086091486559388e-05, "loss": 0.0077, "step": 10795 }, { "epoch": 2.0897832817337463, "grad_norm": 0.07936390489339828, "learning_rate": 9.08592572704971e-05, "loss": 0.0072, "step": 10796 }, { "epoch": 2.0899767801857587, "grad_norm": 0.0536954328417778, "learning_rate": 9.085759954208444e-05, "loss": 0.0069, "step": 10797 }, { "epoch": 2.0901702786377707, "grad_norm": 0.09733342379331589, "learning_rate": 9.085594168036206e-05, "loss": 0.0072, "step": 10798 }, { "epoch": 2.090363777089783, "grad_norm": 0.06377134472131729, "learning_rate": 9.085428368533611e-05, "loss": 0.008, "step": 10799 }, { "epoch": 2.0905572755417956, "grad_norm": 0.12010457366704941, "learning_rate": 9.085262555701277e-05, "loss": 0.009, "step": 10800 }, { "epoch": 2.090750773993808, "grad_norm": 0.05204759165644646, "learning_rate": 9.085096729539823e-05, "loss": 0.007, "step": 10801 }, { "epoch": 2.0909442724458205, "grad_norm": 0.12196608632802963, "learning_rate": 9.084930890049862e-05, "loss": 0.0075, "step": 10802 }, { "epoch": 2.091137770897833, "grad_norm": 0.0656522661447525, "learning_rate": 9.084765037232008e-05, "loss": 0.0087, "step": 10803 }, { "epoch": 2.0913312693498454, "grad_norm": 0.13914917409420013, "learning_rate": 9.084599171086884e-05, "loss": 0.0064, "step": 10804 }, { "epoch": 2.0915247678018574, "grad_norm": 0.1188705563545227, "learning_rate": 9.084433291615103e-05, "loss": 0.0083, "step": 10805 }, { "epoch": 2.09171826625387, "grad_norm": 0.1643502414226532, "learning_rate": 9.084267398817282e-05, "loss": 0.0073, "step": 10806 }, { "epoch": 2.0919117647058822, "grad_norm": 0.16521461308002472, "learning_rate": 9.084101492694037e-05, "loss": 0.0085, "step": 10807 }, { "epoch": 2.0921052631578947, "grad_norm": 0.1544101983308792, "learning_rate": 9.083935573245988e-05, "loss": 0.01, "step": 10808 }, { "epoch": 2.092298761609907, "grad_norm": 0.1946323812007904, "learning_rate": 9.083769640473746e-05, "loss": 0.0086, "step": 10809 }, { "epoch": 2.0924922600619196, "grad_norm": 0.12978069484233856, "learning_rate": 9.083603694377934e-05, "loss": 0.0081, "step": 10810 }, { "epoch": 2.092685758513932, "grad_norm": 0.2305610030889511, "learning_rate": 9.083437734959167e-05, "loss": 0.0063, "step": 10811 }, { "epoch": 2.0928792569659445, "grad_norm": 0.11676352471113205, "learning_rate": 9.083271762218062e-05, "loss": 0.008, "step": 10812 }, { "epoch": 2.0930727554179565, "grad_norm": 0.235991969704628, "learning_rate": 9.083105776155233e-05, "loss": 0.0094, "step": 10813 }, { "epoch": 2.093266253869969, "grad_norm": 0.1215418130159378, "learning_rate": 9.082939776771301e-05, "loss": 0.0088, "step": 10814 }, { "epoch": 2.0934597523219813, "grad_norm": 0.18578946590423584, "learning_rate": 9.082773764066883e-05, "loss": 0.0077, "step": 10815 }, { "epoch": 2.093653250773994, "grad_norm": 0.14142875373363495, "learning_rate": 9.082607738042592e-05, "loss": 0.007, "step": 10816 }, { "epoch": 2.093846749226006, "grad_norm": 0.11416766792535782, "learning_rate": 9.08244169869905e-05, "loss": 0.0065, "step": 10817 }, { "epoch": 2.0940402476780187, "grad_norm": 0.20046107470989227, "learning_rate": 9.082275646036873e-05, "loss": 0.0076, "step": 10818 }, { "epoch": 2.094233746130031, "grad_norm": 0.08708006143569946, "learning_rate": 9.082109580056677e-05, "loss": 0.0072, "step": 10819 }, { "epoch": 2.0944272445820435, "grad_norm": 0.1887165904045105, "learning_rate": 9.08194350075908e-05, "loss": 0.0092, "step": 10820 }, { "epoch": 2.0946207430340555, "grad_norm": 0.1704983413219452, "learning_rate": 9.081777408144702e-05, "loss": 0.0082, "step": 10821 }, { "epoch": 2.094814241486068, "grad_norm": 0.13557283580303192, "learning_rate": 9.081611302214155e-05, "loss": 0.0069, "step": 10822 }, { "epoch": 2.0950077399380804, "grad_norm": 0.19367311894893646, "learning_rate": 9.08144518296806e-05, "loss": 0.0079, "step": 10823 }, { "epoch": 2.095201238390093, "grad_norm": 0.05333330109715462, "learning_rate": 9.081279050407036e-05, "loss": 0.0075, "step": 10824 }, { "epoch": 2.0953947368421053, "grad_norm": 0.21054282784461975, "learning_rate": 9.081112904531699e-05, "loss": 0.0082, "step": 10825 }, { "epoch": 2.0955882352941178, "grad_norm": 0.15686801075935364, "learning_rate": 9.080946745342665e-05, "loss": 0.0081, "step": 10826 }, { "epoch": 2.09578173374613, "grad_norm": 0.21606718003749847, "learning_rate": 9.080780572840556e-05, "loss": 0.0073, "step": 10827 }, { "epoch": 2.0959752321981426, "grad_norm": 0.19349318742752075, "learning_rate": 9.080614387025985e-05, "loss": 0.0075, "step": 10828 }, { "epoch": 2.0961687306501546, "grad_norm": 0.1332848072052002, "learning_rate": 9.080448187899571e-05, "loss": 0.0068, "step": 10829 }, { "epoch": 2.096362229102167, "grad_norm": 0.1535988748073578, "learning_rate": 9.080281975461934e-05, "loss": 0.0069, "step": 10830 }, { "epoch": 2.0965557275541795, "grad_norm": 0.04176526144146919, "learning_rate": 9.080115749713693e-05, "loss": 0.0063, "step": 10831 }, { "epoch": 2.096749226006192, "grad_norm": 0.13309986889362335, "learning_rate": 9.079949510655461e-05, "loss": 0.0069, "step": 10832 }, { "epoch": 2.0969427244582044, "grad_norm": 0.06354284286499023, "learning_rate": 9.079783258287861e-05, "loss": 0.0074, "step": 10833 }, { "epoch": 2.097136222910217, "grad_norm": 0.08256290853023529, "learning_rate": 9.079616992611508e-05, "loss": 0.0079, "step": 10834 }, { "epoch": 2.0973297213622293, "grad_norm": 0.10349228978157043, "learning_rate": 9.07945071362702e-05, "loss": 0.0079, "step": 10835 }, { "epoch": 2.0975232198142413, "grad_norm": 0.09300203621387482, "learning_rate": 9.079284421335017e-05, "loss": 0.0084, "step": 10836 }, { "epoch": 2.0977167182662537, "grad_norm": 0.07525719702243805, "learning_rate": 9.079118115736116e-05, "loss": 0.0068, "step": 10837 }, { "epoch": 2.097910216718266, "grad_norm": 0.06554687768220901, "learning_rate": 9.078951796830937e-05, "loss": 0.0081, "step": 10838 }, { "epoch": 2.0981037151702786, "grad_norm": 0.058999091386795044, "learning_rate": 9.078785464620096e-05, "loss": 0.0082, "step": 10839 }, { "epoch": 2.098297213622291, "grad_norm": 0.06388090550899506, "learning_rate": 9.078619119104213e-05, "loss": 0.008, "step": 10840 }, { "epoch": 2.0984907120743035, "grad_norm": 0.06779695302248001, "learning_rate": 9.078452760283905e-05, "loss": 0.0069, "step": 10841 }, { "epoch": 2.098684210526316, "grad_norm": 0.05377786606550217, "learning_rate": 9.078286388159793e-05, "loss": 0.006, "step": 10842 }, { "epoch": 2.0988777089783284, "grad_norm": 0.08042702823877335, "learning_rate": 9.078120002732491e-05, "loss": 0.0078, "step": 10843 }, { "epoch": 2.0990712074303404, "grad_norm": 0.039134908467531204, "learning_rate": 9.077953604002624e-05, "loss": 0.0078, "step": 10844 }, { "epoch": 2.099264705882353, "grad_norm": 0.06174524128437042, "learning_rate": 9.077787191970805e-05, "loss": 0.0069, "step": 10845 }, { "epoch": 2.0994582043343653, "grad_norm": 0.05637696757912636, "learning_rate": 9.077620766637657e-05, "loss": 0.0074, "step": 10846 }, { "epoch": 2.0996517027863777, "grad_norm": 0.04663743078708649, "learning_rate": 9.077454328003795e-05, "loss": 0.0084, "step": 10847 }, { "epoch": 2.09984520123839, "grad_norm": 0.0551653727889061, "learning_rate": 9.077287876069838e-05, "loss": 0.0061, "step": 10848 }, { "epoch": 2.1000386996904026, "grad_norm": 0.02756522409617901, "learning_rate": 9.077121410836408e-05, "loss": 0.0088, "step": 10849 }, { "epoch": 2.100232198142415, "grad_norm": 0.05731557309627533, "learning_rate": 9.076954932304121e-05, "loss": 0.007, "step": 10850 }, { "epoch": 2.100425696594427, "grad_norm": 0.03234375640749931, "learning_rate": 9.076788440473598e-05, "loss": 0.0085, "step": 10851 }, { "epoch": 2.1006191950464395, "grad_norm": 0.05058014392852783, "learning_rate": 9.076621935345457e-05, "loss": 0.008, "step": 10852 }, { "epoch": 2.100812693498452, "grad_norm": 0.04032333940267563, "learning_rate": 9.076455416920315e-05, "loss": 0.0079, "step": 10853 }, { "epoch": 2.1010061919504643, "grad_norm": 0.03255622833967209, "learning_rate": 9.076288885198795e-05, "loss": 0.0066, "step": 10854 }, { "epoch": 2.101199690402477, "grad_norm": 0.05632142722606659, "learning_rate": 9.076122340181513e-05, "loss": 0.0068, "step": 10855 }, { "epoch": 2.1013931888544892, "grad_norm": 0.06497262418270111, "learning_rate": 9.07595578186909e-05, "loss": 0.0091, "step": 10856 }, { "epoch": 2.1015866873065017, "grad_norm": 0.1008840799331665, "learning_rate": 9.075789210262143e-05, "loss": 0.0089, "step": 10857 }, { "epoch": 2.101780185758514, "grad_norm": 0.0512373149394989, "learning_rate": 9.075622625361297e-05, "loss": 0.0076, "step": 10858 }, { "epoch": 2.101973684210526, "grad_norm": 0.06959296762943268, "learning_rate": 9.075456027167163e-05, "loss": 0.0066, "step": 10859 }, { "epoch": 2.1021671826625385, "grad_norm": 0.04247857630252838, "learning_rate": 9.075289415680365e-05, "loss": 0.0064, "step": 10860 }, { "epoch": 2.102360681114551, "grad_norm": 0.04290665686130524, "learning_rate": 9.075122790901524e-05, "loss": 0.0072, "step": 10861 }, { "epoch": 2.1025541795665634, "grad_norm": 0.07056590169668198, "learning_rate": 9.074956152831256e-05, "loss": 0.008, "step": 10862 }, { "epoch": 2.102747678018576, "grad_norm": 0.032749079167842865, "learning_rate": 9.074789501470181e-05, "loss": 0.0065, "step": 10863 }, { "epoch": 2.1029411764705883, "grad_norm": 0.04921368509531021, "learning_rate": 9.074622836818922e-05, "loss": 0.0073, "step": 10864 }, { "epoch": 2.1031346749226008, "grad_norm": 0.03740368410944939, "learning_rate": 9.074456158878094e-05, "loss": 0.0089, "step": 10865 }, { "epoch": 2.103328173374613, "grad_norm": 0.053326431661844254, "learning_rate": 9.074289467648319e-05, "loss": 0.0087, "step": 10866 }, { "epoch": 2.103521671826625, "grad_norm": 0.037666432559490204, "learning_rate": 9.074122763130216e-05, "loss": 0.0074, "step": 10867 }, { "epoch": 2.1037151702786376, "grad_norm": 0.042105965316295624, "learning_rate": 9.073956045324406e-05, "loss": 0.0072, "step": 10868 }, { "epoch": 2.10390866873065, "grad_norm": 0.08123339712619781, "learning_rate": 9.07378931423151e-05, "loss": 0.0081, "step": 10869 }, { "epoch": 2.1041021671826625, "grad_norm": 0.04215206578373909, "learning_rate": 9.073622569852144e-05, "loss": 0.0072, "step": 10870 }, { "epoch": 2.104295665634675, "grad_norm": 0.08331511169672012, "learning_rate": 9.07345581218693e-05, "loss": 0.0077, "step": 10871 }, { "epoch": 2.1044891640866874, "grad_norm": 0.03874150663614273, "learning_rate": 9.073289041236486e-05, "loss": 0.0063, "step": 10872 }, { "epoch": 2.1046826625387, "grad_norm": 0.11392658203840256, "learning_rate": 9.073122257001436e-05, "loss": 0.0071, "step": 10873 }, { "epoch": 2.1048761609907123, "grad_norm": 0.04929181933403015, "learning_rate": 9.072955459482398e-05, "loss": 0.007, "step": 10874 }, { "epoch": 2.1050696594427243, "grad_norm": 0.04942939430475235, "learning_rate": 9.072788648679992e-05, "loss": 0.0078, "step": 10875 }, { "epoch": 2.1052631578947367, "grad_norm": 0.12750811874866486, "learning_rate": 9.072621824594837e-05, "loss": 0.0081, "step": 10876 }, { "epoch": 2.105456656346749, "grad_norm": 0.07419164478778839, "learning_rate": 9.072454987227554e-05, "loss": 0.0091, "step": 10877 }, { "epoch": 2.1056501547987616, "grad_norm": 0.15981173515319824, "learning_rate": 9.072288136578766e-05, "loss": 0.0069, "step": 10878 }, { "epoch": 2.105843653250774, "grad_norm": 0.13549351692199707, "learning_rate": 9.072121272649088e-05, "loss": 0.0091, "step": 10879 }, { "epoch": 2.1060371517027865, "grad_norm": 0.14775963127613068, "learning_rate": 9.071954395439147e-05, "loss": 0.0078, "step": 10880 }, { "epoch": 2.106230650154799, "grad_norm": 0.12693017721176147, "learning_rate": 9.071787504949556e-05, "loss": 0.0064, "step": 10881 }, { "epoch": 2.106424148606811, "grad_norm": 0.12295589596033096, "learning_rate": 9.071620601180941e-05, "loss": 0.0089, "step": 10882 }, { "epoch": 2.1066176470588234, "grad_norm": 0.13857808709144592, "learning_rate": 9.07145368413392e-05, "loss": 0.0089, "step": 10883 }, { "epoch": 2.106811145510836, "grad_norm": 0.2118217647075653, "learning_rate": 9.071286753809115e-05, "loss": 0.0064, "step": 10884 }, { "epoch": 2.1070046439628483, "grad_norm": 0.08910243213176727, "learning_rate": 9.071119810207146e-05, "loss": 0.0081, "step": 10885 }, { "epoch": 2.1071981424148607, "grad_norm": 0.21128590404987335, "learning_rate": 9.070952853328633e-05, "loss": 0.0086, "step": 10886 }, { "epoch": 2.107391640866873, "grad_norm": 0.09422237426042557, "learning_rate": 9.070785883174198e-05, "loss": 0.0064, "step": 10887 }, { "epoch": 2.1075851393188856, "grad_norm": 0.19572614133358002, "learning_rate": 9.07061889974446e-05, "loss": 0.0061, "step": 10888 }, { "epoch": 2.107778637770898, "grad_norm": 0.12894786894321442, "learning_rate": 9.07045190304004e-05, "loss": 0.0076, "step": 10889 }, { "epoch": 2.10797213622291, "grad_norm": 0.13623769581317902, "learning_rate": 9.07028489306156e-05, "loss": 0.0077, "step": 10890 }, { "epoch": 2.1081656346749225, "grad_norm": 0.31015393137931824, "learning_rate": 9.070117869809641e-05, "loss": 0.0084, "step": 10891 }, { "epoch": 2.108359133126935, "grad_norm": 0.08551616966724396, "learning_rate": 9.069950833284903e-05, "loss": 0.0052, "step": 10892 }, { "epoch": 2.1085526315789473, "grad_norm": 0.26638516783714294, "learning_rate": 9.069783783487968e-05, "loss": 0.0076, "step": 10893 }, { "epoch": 2.10874613003096, "grad_norm": 0.14775961637496948, "learning_rate": 9.069616720419457e-05, "loss": 0.0072, "step": 10894 }, { "epoch": 2.1089396284829722, "grad_norm": 0.12032803148031235, "learning_rate": 9.069449644079991e-05, "loss": 0.0075, "step": 10895 }, { "epoch": 2.1091331269349847, "grad_norm": 0.2336263209581375, "learning_rate": 9.06928255447019e-05, "loss": 0.0083, "step": 10896 }, { "epoch": 2.109326625386997, "grad_norm": 0.044584497809410095, "learning_rate": 9.069115451590675e-05, "loss": 0.0095, "step": 10897 }, { "epoch": 2.109520123839009, "grad_norm": 0.22039289772510529, "learning_rate": 9.06894833544207e-05, "loss": 0.0082, "step": 10898 }, { "epoch": 2.1097136222910216, "grad_norm": 0.13527770340442657, "learning_rate": 9.068781206024993e-05, "loss": 0.0079, "step": 10899 }, { "epoch": 2.109907120743034, "grad_norm": 0.12579120695590973, "learning_rate": 9.068614063340067e-05, "loss": 0.0078, "step": 10900 }, { "epoch": 2.1101006191950464, "grad_norm": 0.1594058871269226, "learning_rate": 9.068446907387913e-05, "loss": 0.0072, "step": 10901 }, { "epoch": 2.110294117647059, "grad_norm": 0.05418943986296654, "learning_rate": 9.068279738169153e-05, "loss": 0.0083, "step": 10902 }, { "epoch": 2.1104876160990713, "grad_norm": 0.1351304054260254, "learning_rate": 9.068112555684408e-05, "loss": 0.0085, "step": 10903 }, { "epoch": 2.1106811145510838, "grad_norm": 0.04996190220117569, "learning_rate": 9.067945359934301e-05, "loss": 0.0078, "step": 10904 }, { "epoch": 2.110874613003096, "grad_norm": 0.09520556777715683, "learning_rate": 9.067778150919452e-05, "loss": 0.0066, "step": 10905 }, { "epoch": 2.111068111455108, "grad_norm": 0.09432116150856018, "learning_rate": 9.067610928640482e-05, "loss": 0.0082, "step": 10906 }, { "epoch": 2.1112616099071206, "grad_norm": 0.09772875159978867, "learning_rate": 9.067443693098013e-05, "loss": 0.0075, "step": 10907 }, { "epoch": 2.111455108359133, "grad_norm": 0.10615135729312897, "learning_rate": 9.067276444292669e-05, "loss": 0.007, "step": 10908 }, { "epoch": 2.1116486068111455, "grad_norm": 0.07174581289291382, "learning_rate": 9.06710918222507e-05, "loss": 0.0078, "step": 10909 }, { "epoch": 2.111842105263158, "grad_norm": 0.10780499130487442, "learning_rate": 9.066941906895838e-05, "loss": 0.0066, "step": 10910 }, { "epoch": 2.1120356037151704, "grad_norm": 0.06449539959430695, "learning_rate": 9.066774618305594e-05, "loss": 0.0076, "step": 10911 }, { "epoch": 2.112229102167183, "grad_norm": 0.1392977088689804, "learning_rate": 9.06660731645496e-05, "loss": 0.0083, "step": 10912 }, { "epoch": 2.112422600619195, "grad_norm": 0.06271884590387344, "learning_rate": 9.066440001344561e-05, "loss": 0.0086, "step": 10913 }, { "epoch": 2.1126160990712073, "grad_norm": 0.09926525503396988, "learning_rate": 9.066272672975016e-05, "loss": 0.0063, "step": 10914 }, { "epoch": 2.1128095975232197, "grad_norm": 0.09522731602191925, "learning_rate": 9.066105331346948e-05, "loss": 0.0069, "step": 10915 }, { "epoch": 2.113003095975232, "grad_norm": 0.08249882608652115, "learning_rate": 9.06593797646098e-05, "loss": 0.0078, "step": 10916 }, { "epoch": 2.1131965944272446, "grad_norm": 0.16567815840244293, "learning_rate": 9.065770608317731e-05, "loss": 0.0089, "step": 10917 }, { "epoch": 2.113390092879257, "grad_norm": 0.09993476420640945, "learning_rate": 9.065603226917828e-05, "loss": 0.0091, "step": 10918 }, { "epoch": 2.1135835913312695, "grad_norm": 0.10805106163024902, "learning_rate": 9.06543583226189e-05, "loss": 0.0077, "step": 10919 }, { "epoch": 2.113777089783282, "grad_norm": 0.10924544930458069, "learning_rate": 9.065268424350539e-05, "loss": 0.0084, "step": 10920 }, { "epoch": 2.113970588235294, "grad_norm": 0.057102546095848083, "learning_rate": 9.065101003184399e-05, "loss": 0.008, "step": 10921 }, { "epoch": 2.1141640866873064, "grad_norm": 0.0888185054063797, "learning_rate": 9.064933568764093e-05, "loss": 0.0059, "step": 10922 }, { "epoch": 2.114357585139319, "grad_norm": 0.03003736212849617, "learning_rate": 9.064766121090241e-05, "loss": 0.0071, "step": 10923 }, { "epoch": 2.1145510835913313, "grad_norm": 0.09114374220371246, "learning_rate": 9.064598660163468e-05, "loss": 0.0078, "step": 10924 }, { "epoch": 2.1147445820433437, "grad_norm": 0.045872997492551804, "learning_rate": 9.064431185984396e-05, "loss": 0.0069, "step": 10925 }, { "epoch": 2.114938080495356, "grad_norm": 0.07706268876791, "learning_rate": 9.064263698553644e-05, "loss": 0.0076, "step": 10926 }, { "epoch": 2.1151315789473686, "grad_norm": 0.0668623298406601, "learning_rate": 9.06409619787184e-05, "loss": 0.008, "step": 10927 }, { "epoch": 2.1153250773993806, "grad_norm": 0.06299012154340744, "learning_rate": 9.063928683939606e-05, "loss": 0.0064, "step": 10928 }, { "epoch": 2.115518575851393, "grad_norm": 0.12515783309936523, "learning_rate": 9.063761156757561e-05, "loss": 0.008, "step": 10929 }, { "epoch": 2.1157120743034055, "grad_norm": 0.0651937797665596, "learning_rate": 9.063593616326331e-05, "loss": 0.0082, "step": 10930 }, { "epoch": 2.115905572755418, "grad_norm": 0.14732147753238678, "learning_rate": 9.063426062646538e-05, "loss": 0.0078, "step": 10931 }, { "epoch": 2.1160990712074303, "grad_norm": 0.0628056526184082, "learning_rate": 9.063258495718806e-05, "loss": 0.0065, "step": 10932 }, { "epoch": 2.116292569659443, "grad_norm": 0.10724121332168579, "learning_rate": 9.063090915543756e-05, "loss": 0.007, "step": 10933 }, { "epoch": 2.1164860681114552, "grad_norm": 0.09570278227329254, "learning_rate": 9.062923322122012e-05, "loss": 0.0071, "step": 10934 }, { "epoch": 2.1166795665634677, "grad_norm": 0.04481671378016472, "learning_rate": 9.062755715454197e-05, "loss": 0.0073, "step": 10935 }, { "epoch": 2.1168730650154797, "grad_norm": 0.07455439120531082, "learning_rate": 9.062588095540934e-05, "loss": 0.0081, "step": 10936 }, { "epoch": 2.117066563467492, "grad_norm": 0.05265229195356369, "learning_rate": 9.062420462382845e-05, "loss": 0.0071, "step": 10937 }, { "epoch": 2.1172600619195046, "grad_norm": 0.06163192167878151, "learning_rate": 9.062252815980556e-05, "loss": 0.0076, "step": 10938 }, { "epoch": 2.117453560371517, "grad_norm": 0.07540200650691986, "learning_rate": 9.062085156334689e-05, "loss": 0.008, "step": 10939 }, { "epoch": 2.1176470588235294, "grad_norm": 0.051708828657865524, "learning_rate": 9.061917483445867e-05, "loss": 0.0087, "step": 10940 }, { "epoch": 2.117840557275542, "grad_norm": 0.09605996310710907, "learning_rate": 9.061749797314715e-05, "loss": 0.0076, "step": 10941 }, { "epoch": 2.1180340557275543, "grad_norm": 0.050178032368421555, "learning_rate": 9.061582097941852e-05, "loss": 0.0076, "step": 10942 }, { "epoch": 2.1182275541795668, "grad_norm": 0.06440360099077225, "learning_rate": 9.061414385327906e-05, "loss": 0.0088, "step": 10943 }, { "epoch": 2.1184210526315788, "grad_norm": 0.08945334702730179, "learning_rate": 9.061246659473498e-05, "loss": 0.0075, "step": 10944 }, { "epoch": 2.118614551083591, "grad_norm": 0.04558000713586807, "learning_rate": 9.061078920379253e-05, "loss": 0.0076, "step": 10945 }, { "epoch": 2.1188080495356036, "grad_norm": 0.07820425182580948, "learning_rate": 9.060911168045793e-05, "loss": 0.0079, "step": 10946 }, { "epoch": 2.119001547987616, "grad_norm": 0.034666042774915695, "learning_rate": 9.060743402473745e-05, "loss": 0.0074, "step": 10947 }, { "epoch": 2.1191950464396285, "grad_norm": 0.0339231863617897, "learning_rate": 9.060575623663728e-05, "loss": 0.0068, "step": 10948 }, { "epoch": 2.119388544891641, "grad_norm": 0.07831957191228867, "learning_rate": 9.06040783161637e-05, "loss": 0.0078, "step": 10949 }, { "epoch": 2.1195820433436534, "grad_norm": 0.07493539899587631, "learning_rate": 9.060240026332292e-05, "loss": 0.0082, "step": 10950 }, { "epoch": 2.119775541795666, "grad_norm": 0.052131444215774536, "learning_rate": 9.060072207812119e-05, "loss": 0.0071, "step": 10951 }, { "epoch": 2.119969040247678, "grad_norm": 0.03285351023077965, "learning_rate": 9.059904376056474e-05, "loss": 0.0081, "step": 10952 }, { "epoch": 2.1201625386996903, "grad_norm": 0.07380571216344833, "learning_rate": 9.059736531065983e-05, "loss": 0.007, "step": 10953 }, { "epoch": 2.1203560371517027, "grad_norm": 0.040502648800611496, "learning_rate": 9.059568672841267e-05, "loss": 0.0089, "step": 10954 }, { "epoch": 2.120549535603715, "grad_norm": 0.0618303082883358, "learning_rate": 9.059400801382954e-05, "loss": 0.0073, "step": 10955 }, { "epoch": 2.1207430340557276, "grad_norm": 0.05086608976125717, "learning_rate": 9.059232916691664e-05, "loss": 0.0075, "step": 10956 }, { "epoch": 2.12093653250774, "grad_norm": 0.03979877755045891, "learning_rate": 9.059065018768025e-05, "loss": 0.0064, "step": 10957 }, { "epoch": 2.1211300309597525, "grad_norm": 0.05191713944077492, "learning_rate": 9.058897107612658e-05, "loss": 0.008, "step": 10958 }, { "epoch": 2.1213235294117645, "grad_norm": 0.049546729773283005, "learning_rate": 9.05872918322619e-05, "loss": 0.0072, "step": 10959 }, { "epoch": 2.121517027863777, "grad_norm": 0.058806367218494415, "learning_rate": 9.058561245609242e-05, "loss": 0.0064, "step": 10960 }, { "epoch": 2.1217105263157894, "grad_norm": 0.040643151849508286, "learning_rate": 9.05839329476244e-05, "loss": 0.0064, "step": 10961 }, { "epoch": 2.121904024767802, "grad_norm": 0.05374468117952347, "learning_rate": 9.058225330686409e-05, "loss": 0.0071, "step": 10962 }, { "epoch": 2.1220975232198143, "grad_norm": 0.04136384278535843, "learning_rate": 9.058057353381773e-05, "loss": 0.007, "step": 10963 }, { "epoch": 2.1222910216718267, "grad_norm": 0.043409932404756546, "learning_rate": 9.057889362849157e-05, "loss": 0.0073, "step": 10964 }, { "epoch": 2.122484520123839, "grad_norm": 0.0527985580265522, "learning_rate": 9.057721359089184e-05, "loss": 0.0075, "step": 10965 }, { "epoch": 2.1226780185758516, "grad_norm": 0.04188147187232971, "learning_rate": 9.05755334210248e-05, "loss": 0.0086, "step": 10966 }, { "epoch": 2.1228715170278636, "grad_norm": 0.05490031838417053, "learning_rate": 9.05738531188967e-05, "loss": 0.0071, "step": 10967 }, { "epoch": 2.123065015479876, "grad_norm": 0.04092252999544144, "learning_rate": 9.057217268451377e-05, "loss": 0.008, "step": 10968 }, { "epoch": 2.1232585139318885, "grad_norm": 0.030148349702358246, "learning_rate": 9.057049211788226e-05, "loss": 0.0085, "step": 10969 }, { "epoch": 2.123452012383901, "grad_norm": 0.03723965212702751, "learning_rate": 9.056881141900845e-05, "loss": 0.0068, "step": 10970 }, { "epoch": 2.1236455108359134, "grad_norm": 0.04182544723153114, "learning_rate": 9.056713058789856e-05, "loss": 0.0077, "step": 10971 }, { "epoch": 2.123839009287926, "grad_norm": 0.02952658198773861, "learning_rate": 9.056544962455881e-05, "loss": 0.0065, "step": 10972 }, { "epoch": 2.1240325077399382, "grad_norm": 0.05851905792951584, "learning_rate": 9.056376852899552e-05, "loss": 0.0082, "step": 10973 }, { "epoch": 2.1242260061919507, "grad_norm": 0.05263553559780121, "learning_rate": 9.056208730121489e-05, "loss": 0.0083, "step": 10974 }, { "epoch": 2.1244195046439627, "grad_norm": 0.05961737781763077, "learning_rate": 9.056040594122317e-05, "loss": 0.0076, "step": 10975 }, { "epoch": 2.124613003095975, "grad_norm": 0.02783789671957493, "learning_rate": 9.055872444902662e-05, "loss": 0.0087, "step": 10976 }, { "epoch": 2.1248065015479876, "grad_norm": 0.08346587419509888, "learning_rate": 9.055704282463152e-05, "loss": 0.0074, "step": 10977 }, { "epoch": 2.125, "grad_norm": 0.0518111027777195, "learning_rate": 9.055536106804409e-05, "loss": 0.0076, "step": 10978 }, { "epoch": 2.1251934984520124, "grad_norm": 0.046846404671669006, "learning_rate": 9.055367917927056e-05, "loss": 0.0095, "step": 10979 }, { "epoch": 2.125386996904025, "grad_norm": 0.11534987390041351, "learning_rate": 9.055199715831724e-05, "loss": 0.0075, "step": 10980 }, { "epoch": 2.1255804953560373, "grad_norm": 0.03721965104341507, "learning_rate": 9.055031500519034e-05, "loss": 0.0078, "step": 10981 }, { "epoch": 2.1257739938080498, "grad_norm": 0.09581900388002396, "learning_rate": 9.054863271989616e-05, "loss": 0.0067, "step": 10982 }, { "epoch": 2.1259674922600618, "grad_norm": 0.1181233674287796, "learning_rate": 9.054695030244087e-05, "loss": 0.0082, "step": 10983 }, { "epoch": 2.126160990712074, "grad_norm": 0.11165618896484375, "learning_rate": 9.054526775283082e-05, "loss": 0.0059, "step": 10984 }, { "epoch": 2.1263544891640866, "grad_norm": 0.1457156389951706, "learning_rate": 9.054358507107221e-05, "loss": 0.0103, "step": 10985 }, { "epoch": 2.126547987616099, "grad_norm": 0.0891871452331543, "learning_rate": 9.05419022571713e-05, "loss": 0.0079, "step": 10986 }, { "epoch": 2.1267414860681115, "grad_norm": 0.13682927191257477, "learning_rate": 9.054021931113438e-05, "loss": 0.0068, "step": 10987 }, { "epoch": 2.126934984520124, "grad_norm": 0.1275022327899933, "learning_rate": 9.053853623296765e-05, "loss": 0.0081, "step": 10988 }, { "epoch": 2.1271284829721364, "grad_norm": 0.14517422020435333, "learning_rate": 9.053685302267741e-05, "loss": 0.0079, "step": 10989 }, { "epoch": 2.1273219814241484, "grad_norm": 0.1795179396867752, "learning_rate": 9.053516968026993e-05, "loss": 0.009, "step": 10990 }, { "epoch": 2.127515479876161, "grad_norm": 0.10640563815832138, "learning_rate": 9.053348620575142e-05, "loss": 0.0072, "step": 10991 }, { "epoch": 2.1277089783281733, "grad_norm": 0.19797387719154358, "learning_rate": 9.053180259912817e-05, "loss": 0.0057, "step": 10992 }, { "epoch": 2.1279024767801857, "grad_norm": 0.1231616735458374, "learning_rate": 9.053011886040644e-05, "loss": 0.0061, "step": 10993 }, { "epoch": 2.128095975232198, "grad_norm": 0.13357794284820557, "learning_rate": 9.052843498959247e-05, "loss": 0.0088, "step": 10994 }, { "epoch": 2.1282894736842106, "grad_norm": 0.1692144125699997, "learning_rate": 9.052675098669254e-05, "loss": 0.0082, "step": 10995 }, { "epoch": 2.128482972136223, "grad_norm": 0.03464702516794205, "learning_rate": 9.05250668517129e-05, "loss": 0.0075, "step": 10996 }, { "epoch": 2.1286764705882355, "grad_norm": 0.1675976663827896, "learning_rate": 9.052338258465982e-05, "loss": 0.0062, "step": 10997 }, { "epoch": 2.1288699690402475, "grad_norm": 0.09531823545694351, "learning_rate": 9.052169818553956e-05, "loss": 0.0082, "step": 10998 }, { "epoch": 2.12906346749226, "grad_norm": 0.12214755266904831, "learning_rate": 9.052001365435837e-05, "loss": 0.0078, "step": 10999 }, { "epoch": 2.1292569659442724, "grad_norm": 0.12965062260627747, "learning_rate": 9.051832899112253e-05, "loss": 0.0082, "step": 11000 }, { "epoch": 2.129450464396285, "grad_norm": 0.10324980318546295, "learning_rate": 9.051664419583829e-05, "loss": 0.0074, "step": 11001 }, { "epoch": 2.1296439628482973, "grad_norm": 0.09754252433776855, "learning_rate": 9.051495926851191e-05, "loss": 0.0062, "step": 11002 }, { "epoch": 2.1298374613003097, "grad_norm": 0.10041437298059464, "learning_rate": 9.051327420914967e-05, "loss": 0.0062, "step": 11003 }, { "epoch": 2.130030959752322, "grad_norm": 0.07796671241521835, "learning_rate": 9.051158901775784e-05, "loss": 0.0059, "step": 11004 }, { "epoch": 2.130224458204334, "grad_norm": 0.04986414313316345, "learning_rate": 9.050990369434266e-05, "loss": 0.0076, "step": 11005 }, { "epoch": 2.1304179566563466, "grad_norm": 0.09189987927675247, "learning_rate": 9.05082182389104e-05, "loss": 0.0076, "step": 11006 }, { "epoch": 2.130611455108359, "grad_norm": 0.041065338999032974, "learning_rate": 9.050653265146734e-05, "loss": 0.007, "step": 11007 }, { "epoch": 2.1308049535603715, "grad_norm": 0.0925513505935669, "learning_rate": 9.050484693201975e-05, "loss": 0.0079, "step": 11008 }, { "epoch": 2.130998452012384, "grad_norm": 0.04818763583898544, "learning_rate": 9.050316108057387e-05, "loss": 0.0085, "step": 11009 }, { "epoch": 2.1311919504643964, "grad_norm": 0.07055798918008804, "learning_rate": 9.050147509713601e-05, "loss": 0.008, "step": 11010 }, { "epoch": 2.131385448916409, "grad_norm": 0.058250848203897476, "learning_rate": 9.04997889817124e-05, "loss": 0.0068, "step": 11011 }, { "epoch": 2.1315789473684212, "grad_norm": 0.10618562996387482, "learning_rate": 9.049810273430931e-05, "loss": 0.0068, "step": 11012 }, { "epoch": 2.1317724458204332, "grad_norm": 0.08845997601747513, "learning_rate": 9.049641635493304e-05, "loss": 0.007, "step": 11013 }, { "epoch": 2.1319659442724457, "grad_norm": 0.08727515488862991, "learning_rate": 9.049472984358984e-05, "loss": 0.0073, "step": 11014 }, { "epoch": 2.132159442724458, "grad_norm": 0.07660143077373505, "learning_rate": 9.049304320028596e-05, "loss": 0.0082, "step": 11015 }, { "epoch": 2.1323529411764706, "grad_norm": 0.07882267236709595, "learning_rate": 9.04913564250277e-05, "loss": 0.0067, "step": 11016 }, { "epoch": 2.132546439628483, "grad_norm": 0.05702502280473709, "learning_rate": 9.048966951782136e-05, "loss": 0.0075, "step": 11017 }, { "epoch": 2.1327399380804954, "grad_norm": 0.06511713564395905, "learning_rate": 9.048798247867313e-05, "loss": 0.0062, "step": 11018 }, { "epoch": 2.132933436532508, "grad_norm": 0.06988277286291122, "learning_rate": 9.048629530758934e-05, "loss": 0.0081, "step": 11019 }, { "epoch": 2.1331269349845203, "grad_norm": 0.06511019170284271, "learning_rate": 9.048460800457625e-05, "loss": 0.0072, "step": 11020 }, { "epoch": 2.1333204334365323, "grad_norm": 0.09071670472621918, "learning_rate": 9.048292056964012e-05, "loss": 0.0071, "step": 11021 }, { "epoch": 2.1335139318885448, "grad_norm": 0.07211241871118546, "learning_rate": 9.048123300278722e-05, "loss": 0.0083, "step": 11022 }, { "epoch": 2.133707430340557, "grad_norm": 0.08041338622570038, "learning_rate": 9.047954530402387e-05, "loss": 0.0085, "step": 11023 }, { "epoch": 2.1339009287925697, "grad_norm": 0.0615900419652462, "learning_rate": 9.047785747335631e-05, "loss": 0.0077, "step": 11024 }, { "epoch": 2.134094427244582, "grad_norm": 0.06105718016624451, "learning_rate": 9.047616951079081e-05, "loss": 0.0066, "step": 11025 }, { "epoch": 2.1342879256965945, "grad_norm": 0.06463867425918579, "learning_rate": 9.047448141633365e-05, "loss": 0.0068, "step": 11026 }, { "epoch": 2.134481424148607, "grad_norm": 0.04298628121614456, "learning_rate": 9.04727931899911e-05, "loss": 0.0079, "step": 11027 }, { "epoch": 2.1346749226006194, "grad_norm": 0.039559755474328995, "learning_rate": 9.047110483176945e-05, "loss": 0.0071, "step": 11028 }, { "epoch": 2.1348684210526314, "grad_norm": 0.07765673100948334, "learning_rate": 9.0469416341675e-05, "loss": 0.007, "step": 11029 }, { "epoch": 2.135061919504644, "grad_norm": 0.041274767369031906, "learning_rate": 9.046772771971397e-05, "loss": 0.0061, "step": 11030 }, { "epoch": 2.1352554179566563, "grad_norm": 0.07270457595586777, "learning_rate": 9.046603896589267e-05, "loss": 0.0075, "step": 11031 }, { "epoch": 2.1354489164086687, "grad_norm": 0.04719395563006401, "learning_rate": 9.04643500802174e-05, "loss": 0.008, "step": 11032 }, { "epoch": 2.135642414860681, "grad_norm": 0.056103724986314774, "learning_rate": 9.04626610626944e-05, "loss": 0.0066, "step": 11033 }, { "epoch": 2.1358359133126936, "grad_norm": 0.047684282064437866, "learning_rate": 9.046097191332995e-05, "loss": 0.006, "step": 11034 }, { "epoch": 2.136029411764706, "grad_norm": 0.04414062947034836, "learning_rate": 9.045928263213036e-05, "loss": 0.0066, "step": 11035 }, { "epoch": 2.136222910216718, "grad_norm": 0.03303241357207298, "learning_rate": 9.04575932191019e-05, "loss": 0.0069, "step": 11036 }, { "epoch": 2.1364164086687305, "grad_norm": 0.07723645865917206, "learning_rate": 9.045590367425083e-05, "loss": 0.0058, "step": 11037 }, { "epoch": 2.136609907120743, "grad_norm": 0.03474810719490051, "learning_rate": 9.045421399758345e-05, "loss": 0.0066, "step": 11038 }, { "epoch": 2.1368034055727554, "grad_norm": 0.05529704689979553, "learning_rate": 9.045252418910604e-05, "loss": 0.0073, "step": 11039 }, { "epoch": 2.136996904024768, "grad_norm": 0.04833004251122475, "learning_rate": 9.045083424882489e-05, "loss": 0.0078, "step": 11040 }, { "epoch": 2.1371904024767803, "grad_norm": 0.05948048084974289, "learning_rate": 9.044914417674627e-05, "loss": 0.007, "step": 11041 }, { "epoch": 2.1373839009287927, "grad_norm": 0.05103829875588417, "learning_rate": 9.044745397287646e-05, "loss": 0.0078, "step": 11042 }, { "epoch": 2.137577399380805, "grad_norm": 0.06047305464744568, "learning_rate": 9.044576363722175e-05, "loss": 0.0077, "step": 11043 }, { "epoch": 2.137770897832817, "grad_norm": 0.06717553734779358, "learning_rate": 9.044407316978843e-05, "loss": 0.0067, "step": 11044 }, { "epoch": 2.1379643962848296, "grad_norm": 0.052411556243896484, "learning_rate": 9.04423825705828e-05, "loss": 0.0074, "step": 11045 }, { "epoch": 2.138157894736842, "grad_norm": 0.04376920312643051, "learning_rate": 9.044069183961108e-05, "loss": 0.0072, "step": 11046 }, { "epoch": 2.1383513931888545, "grad_norm": 0.06689897179603577, "learning_rate": 9.043900097687963e-05, "loss": 0.0074, "step": 11047 }, { "epoch": 2.138544891640867, "grad_norm": 0.0429970808327198, "learning_rate": 9.043730998239471e-05, "loss": 0.0063, "step": 11048 }, { "epoch": 2.1387383900928794, "grad_norm": 0.06140593811869621, "learning_rate": 9.043561885616259e-05, "loss": 0.0072, "step": 11049 }, { "epoch": 2.138931888544892, "grad_norm": 0.058619607239961624, "learning_rate": 9.043392759818958e-05, "loss": 0.006, "step": 11050 }, { "epoch": 2.139125386996904, "grad_norm": 0.05414985120296478, "learning_rate": 9.043223620848195e-05, "loss": 0.0083, "step": 11051 }, { "epoch": 2.1393188854489162, "grad_norm": 0.044764842838048935, "learning_rate": 9.0430544687046e-05, "loss": 0.0081, "step": 11052 }, { "epoch": 2.1395123839009287, "grad_norm": 0.0691407322883606, "learning_rate": 9.042885303388802e-05, "loss": 0.0067, "step": 11053 }, { "epoch": 2.139705882352941, "grad_norm": 0.0426349900662899, "learning_rate": 9.042716124901428e-05, "loss": 0.0065, "step": 11054 }, { "epoch": 2.1398993808049536, "grad_norm": 0.05573604628443718, "learning_rate": 9.042546933243109e-05, "loss": 0.0061, "step": 11055 }, { "epoch": 2.140092879256966, "grad_norm": 0.041927844285964966, "learning_rate": 9.042377728414473e-05, "loss": 0.0075, "step": 11056 }, { "epoch": 2.1402863777089784, "grad_norm": 0.05272667109966278, "learning_rate": 9.042208510416151e-05, "loss": 0.0071, "step": 11057 }, { "epoch": 2.140479876160991, "grad_norm": 0.030457016080617905, "learning_rate": 9.04203927924877e-05, "loss": 0.0076, "step": 11058 }, { "epoch": 2.140673374613003, "grad_norm": 0.05293356999754906, "learning_rate": 9.041870034912959e-05, "loss": 0.0064, "step": 11059 }, { "epoch": 2.1408668730650153, "grad_norm": 0.04308256506919861, "learning_rate": 9.041700777409349e-05, "loss": 0.0072, "step": 11060 }, { "epoch": 2.1410603715170278, "grad_norm": 0.06473337858915329, "learning_rate": 9.041531506738568e-05, "loss": 0.0079, "step": 11061 }, { "epoch": 2.14125386996904, "grad_norm": 0.06420167535543442, "learning_rate": 9.041362222901244e-05, "loss": 0.0076, "step": 11062 }, { "epoch": 2.1414473684210527, "grad_norm": 0.07014729082584381, "learning_rate": 9.04119292589801e-05, "loss": 0.006, "step": 11063 }, { "epoch": 2.141640866873065, "grad_norm": 0.08129695057868958, "learning_rate": 9.041023615729493e-05, "loss": 0.0087, "step": 11064 }, { "epoch": 2.1418343653250775, "grad_norm": 0.08407984673976898, "learning_rate": 9.040854292396321e-05, "loss": 0.0076, "step": 11065 }, { "epoch": 2.14202786377709, "grad_norm": 0.07878221571445465, "learning_rate": 9.040684955899126e-05, "loss": 0.0071, "step": 11066 }, { "epoch": 2.142221362229102, "grad_norm": 0.07605158537626266, "learning_rate": 9.040515606238537e-05, "loss": 0.0082, "step": 11067 }, { "epoch": 2.1424148606811144, "grad_norm": 0.09179242700338364, "learning_rate": 9.040346243415182e-05, "loss": 0.0075, "step": 11068 }, { "epoch": 2.142608359133127, "grad_norm": 0.0535055547952652, "learning_rate": 9.040176867429695e-05, "loss": 0.007, "step": 11069 }, { "epoch": 2.1428018575851393, "grad_norm": 0.09391043335199356, "learning_rate": 9.040007478282701e-05, "loss": 0.0063, "step": 11070 }, { "epoch": 2.1429953560371517, "grad_norm": 0.032410021871328354, "learning_rate": 9.039838075974833e-05, "loss": 0.0063, "step": 11071 }, { "epoch": 2.143188854489164, "grad_norm": 0.06569831073284149, "learning_rate": 9.039668660506719e-05, "loss": 0.0076, "step": 11072 }, { "epoch": 2.1433823529411766, "grad_norm": 0.05775260180234909, "learning_rate": 9.039499231878986e-05, "loss": 0.0045, "step": 11073 }, { "epoch": 2.143575851393189, "grad_norm": 0.06649795919656754, "learning_rate": 9.03932979009227e-05, "loss": 0.0066, "step": 11074 }, { "epoch": 2.143769349845201, "grad_norm": 0.05952007323503494, "learning_rate": 9.039160335147197e-05, "loss": 0.0071, "step": 11075 }, { "epoch": 2.1439628482972135, "grad_norm": 0.08038383722305298, "learning_rate": 9.038990867044397e-05, "loss": 0.0071, "step": 11076 }, { "epoch": 2.144156346749226, "grad_norm": 0.06464855372905731, "learning_rate": 9.038821385784502e-05, "loss": 0.0086, "step": 11077 }, { "epoch": 2.1443498452012384, "grad_norm": 0.06726353615522385, "learning_rate": 9.038651891368142e-05, "loss": 0.0081, "step": 11078 }, { "epoch": 2.144543343653251, "grad_norm": 0.05050566792488098, "learning_rate": 9.038482383795945e-05, "loss": 0.0068, "step": 11079 }, { "epoch": 2.1447368421052633, "grad_norm": 0.058766692876815796, "learning_rate": 9.038312863068541e-05, "loss": 0.0066, "step": 11080 }, { "epoch": 2.1449303405572757, "grad_norm": 0.05744456127285957, "learning_rate": 9.038143329186564e-05, "loss": 0.0059, "step": 11081 }, { "epoch": 2.1451238390092877, "grad_norm": 0.04887230321764946, "learning_rate": 9.037973782150642e-05, "loss": 0.0069, "step": 11082 }, { "epoch": 2.1453173374613, "grad_norm": 0.07061286270618439, "learning_rate": 9.037804221961404e-05, "loss": 0.0073, "step": 11083 }, { "epoch": 2.1455108359133126, "grad_norm": 0.06211603060364723, "learning_rate": 9.037634648619481e-05, "loss": 0.0073, "step": 11084 }, { "epoch": 2.145704334365325, "grad_norm": 0.05052979663014412, "learning_rate": 9.037465062125506e-05, "loss": 0.0078, "step": 11085 }, { "epoch": 2.1458978328173375, "grad_norm": 0.03913120925426483, "learning_rate": 9.037295462480106e-05, "loss": 0.0071, "step": 11086 }, { "epoch": 2.14609133126935, "grad_norm": 0.05583120509982109, "learning_rate": 9.037125849683915e-05, "loss": 0.006, "step": 11087 }, { "epoch": 2.1462848297213624, "grad_norm": 0.04002746194601059, "learning_rate": 9.03695622373756e-05, "loss": 0.0082, "step": 11088 }, { "epoch": 2.146478328173375, "grad_norm": 0.03517436608672142, "learning_rate": 9.036786584641674e-05, "loss": 0.0063, "step": 11089 }, { "epoch": 2.146671826625387, "grad_norm": 0.059069957584142685, "learning_rate": 9.036616932396886e-05, "loss": 0.0083, "step": 11090 }, { "epoch": 2.1468653250773992, "grad_norm": 0.05059090629220009, "learning_rate": 9.036447267003829e-05, "loss": 0.0068, "step": 11091 }, { "epoch": 2.1470588235294117, "grad_norm": 0.038350824266672134, "learning_rate": 9.036277588463131e-05, "loss": 0.0072, "step": 11092 }, { "epoch": 2.147252321981424, "grad_norm": 0.04704013094305992, "learning_rate": 9.036107896775426e-05, "loss": 0.0067, "step": 11093 }, { "epoch": 2.1474458204334366, "grad_norm": 0.029863912612199783, "learning_rate": 9.035938191941343e-05, "loss": 0.0072, "step": 11094 }, { "epoch": 2.147639318885449, "grad_norm": 0.06415462493896484, "learning_rate": 9.035768473961511e-05, "loss": 0.0067, "step": 11095 }, { "epoch": 2.1478328173374615, "grad_norm": 0.03253978118300438, "learning_rate": 9.035598742836565e-05, "loss": 0.0086, "step": 11096 }, { "epoch": 2.1480263157894735, "grad_norm": 0.05750765651464462, "learning_rate": 9.035428998567134e-05, "loss": 0.0078, "step": 11097 }, { "epoch": 2.148219814241486, "grad_norm": 0.03517153114080429, "learning_rate": 9.035259241153849e-05, "loss": 0.0078, "step": 11098 }, { "epoch": 2.1484133126934983, "grad_norm": 0.06618960201740265, "learning_rate": 9.03508947059734e-05, "loss": 0.0062, "step": 11099 }, { "epoch": 2.1486068111455108, "grad_norm": 0.049490492790937424, "learning_rate": 9.03491968689824e-05, "loss": 0.0054, "step": 11100 }, { "epoch": 2.148800309597523, "grad_norm": 0.055024564266204834, "learning_rate": 9.034749890057181e-05, "loss": 0.0079, "step": 11101 }, { "epoch": 2.1489938080495357, "grad_norm": 0.07305197417736053, "learning_rate": 9.034580080074791e-05, "loss": 0.0083, "step": 11102 }, { "epoch": 2.149187306501548, "grad_norm": 0.04898009076714516, "learning_rate": 9.034410256951704e-05, "loss": 0.0069, "step": 11103 }, { "epoch": 2.1493808049535605, "grad_norm": 0.05126086249947548, "learning_rate": 9.03424042068855e-05, "loss": 0.0073, "step": 11104 }, { "epoch": 2.149574303405573, "grad_norm": 0.05234960839152336, "learning_rate": 9.034070571285962e-05, "loss": 0.0066, "step": 11105 }, { "epoch": 2.149767801857585, "grad_norm": 0.03955329954624176, "learning_rate": 9.033900708744569e-05, "loss": 0.008, "step": 11106 }, { "epoch": 2.1499613003095974, "grad_norm": 0.05581245943903923, "learning_rate": 9.033730833065006e-05, "loss": 0.0065, "step": 11107 }, { "epoch": 2.15015479876161, "grad_norm": 0.03887109085917473, "learning_rate": 9.033560944247901e-05, "loss": 0.0078, "step": 11108 }, { "epoch": 2.1503482972136223, "grad_norm": 0.029102642089128494, "learning_rate": 9.033391042293888e-05, "loss": 0.0094, "step": 11109 }, { "epoch": 2.1505417956656347, "grad_norm": 0.06426291912794113, "learning_rate": 9.033221127203597e-05, "loss": 0.0083, "step": 11110 }, { "epoch": 2.150735294117647, "grad_norm": 0.0403386726975441, "learning_rate": 9.033051198977661e-05, "loss": 0.0074, "step": 11111 }, { "epoch": 2.1509287925696596, "grad_norm": 0.04813352972269058, "learning_rate": 9.03288125761671e-05, "loss": 0.0057, "step": 11112 }, { "epoch": 2.1511222910216716, "grad_norm": 0.04185817390680313, "learning_rate": 9.03271130312138e-05, "loss": 0.007, "step": 11113 }, { "epoch": 2.151315789473684, "grad_norm": 0.0854085236787796, "learning_rate": 9.032541335492296e-05, "loss": 0.0073, "step": 11114 }, { "epoch": 2.1515092879256965, "grad_norm": 0.05053172633051872, "learning_rate": 9.032371354730095e-05, "loss": 0.0056, "step": 11115 }, { "epoch": 2.151702786377709, "grad_norm": 0.06627967953681946, "learning_rate": 9.032201360835409e-05, "loss": 0.0075, "step": 11116 }, { "epoch": 2.1518962848297214, "grad_norm": 0.05531294643878937, "learning_rate": 9.032031353808867e-05, "loss": 0.0079, "step": 11117 }, { "epoch": 2.152089783281734, "grad_norm": 0.04975298419594765, "learning_rate": 9.031861333651105e-05, "loss": 0.0087, "step": 11118 }, { "epoch": 2.1522832817337463, "grad_norm": 0.0765172690153122, "learning_rate": 9.031691300362751e-05, "loss": 0.0054, "step": 11119 }, { "epoch": 2.1524767801857587, "grad_norm": 0.04889377951622009, "learning_rate": 9.03152125394444e-05, "loss": 0.0075, "step": 11120 }, { "epoch": 2.1526702786377707, "grad_norm": 0.05146361514925957, "learning_rate": 9.031351194396803e-05, "loss": 0.0064, "step": 11121 }, { "epoch": 2.152863777089783, "grad_norm": 0.024457864463329315, "learning_rate": 9.031181121720472e-05, "loss": 0.0072, "step": 11122 }, { "epoch": 2.1530572755417956, "grad_norm": 0.049476493149995804, "learning_rate": 9.03101103591608e-05, "loss": 0.0068, "step": 11123 }, { "epoch": 2.153250773993808, "grad_norm": 0.044622018933296204, "learning_rate": 9.030840936984258e-05, "loss": 0.0075, "step": 11124 }, { "epoch": 2.1534442724458205, "grad_norm": 0.04692354425787926, "learning_rate": 9.030670824925641e-05, "loss": 0.0077, "step": 11125 }, { "epoch": 2.153637770897833, "grad_norm": 0.07370781153440475, "learning_rate": 9.030500699740859e-05, "loss": 0.0082, "step": 11126 }, { "epoch": 2.1538312693498454, "grad_norm": 0.04798925668001175, "learning_rate": 9.030330561430545e-05, "loss": 0.006, "step": 11127 }, { "epoch": 2.1540247678018574, "grad_norm": 0.0947909951210022, "learning_rate": 9.030160409995332e-05, "loss": 0.0073, "step": 11128 }, { "epoch": 2.15421826625387, "grad_norm": 0.06328140199184418, "learning_rate": 9.029990245435853e-05, "loss": 0.0074, "step": 11129 }, { "epoch": 2.1544117647058822, "grad_norm": 0.054164737462997437, "learning_rate": 9.029820067752739e-05, "loss": 0.0079, "step": 11130 }, { "epoch": 2.1546052631578947, "grad_norm": 0.07888179272413254, "learning_rate": 9.029649876946626e-05, "loss": 0.008, "step": 11131 }, { "epoch": 2.154798761609907, "grad_norm": 0.04741908237338066, "learning_rate": 9.029479673018142e-05, "loss": 0.007, "step": 11132 }, { "epoch": 2.1549922600619196, "grad_norm": 0.07936951518058777, "learning_rate": 9.029309455967923e-05, "loss": 0.0081, "step": 11133 }, { "epoch": 2.155185758513932, "grad_norm": 0.0840882733464241, "learning_rate": 9.029139225796601e-05, "loss": 0.0075, "step": 11134 }, { "epoch": 2.1553792569659445, "grad_norm": 0.05518609657883644, "learning_rate": 9.028968982504809e-05, "loss": 0.0067, "step": 11135 }, { "epoch": 2.1555727554179565, "grad_norm": 0.08883023262023926, "learning_rate": 9.02879872609318e-05, "loss": 0.0081, "step": 11136 }, { "epoch": 2.155766253869969, "grad_norm": 0.028580062091350555, "learning_rate": 9.028628456562348e-05, "loss": 0.0075, "step": 11137 }, { "epoch": 2.1559597523219813, "grad_norm": 0.0716424435377121, "learning_rate": 9.028458173912945e-05, "loss": 0.007, "step": 11138 }, { "epoch": 2.156153250773994, "grad_norm": 0.043234072625637054, "learning_rate": 9.028287878145602e-05, "loss": 0.0069, "step": 11139 }, { "epoch": 2.156346749226006, "grad_norm": 0.0760122612118721, "learning_rate": 9.028117569260955e-05, "loss": 0.007, "step": 11140 }, { "epoch": 2.1565402476780187, "grad_norm": 0.04669749364256859, "learning_rate": 9.027947247259636e-05, "loss": 0.0071, "step": 11141 }, { "epoch": 2.156733746130031, "grad_norm": 0.03960442170500755, "learning_rate": 9.027776912142279e-05, "loss": 0.0057, "step": 11142 }, { "epoch": 2.1569272445820435, "grad_norm": 0.05233794078230858, "learning_rate": 9.027606563909517e-05, "loss": 0.0095, "step": 11143 }, { "epoch": 2.1571207430340555, "grad_norm": 0.060186706483364105, "learning_rate": 9.027436202561982e-05, "loss": 0.008, "step": 11144 }, { "epoch": 2.157314241486068, "grad_norm": 0.050489943474531174, "learning_rate": 9.027265828100309e-05, "loss": 0.0052, "step": 11145 }, { "epoch": 2.1575077399380804, "grad_norm": 0.0619753859937191, "learning_rate": 9.02709544052513e-05, "loss": 0.0074, "step": 11146 }, { "epoch": 2.157701238390093, "grad_norm": 0.027190987020730972, "learning_rate": 9.026925039837079e-05, "loss": 0.0074, "step": 11147 }, { "epoch": 2.1578947368421053, "grad_norm": 0.06087689474225044, "learning_rate": 9.026754626036791e-05, "loss": 0.0089, "step": 11148 }, { "epoch": 2.1580882352941178, "grad_norm": 0.05169316381216049, "learning_rate": 9.026584199124898e-05, "loss": 0.0084, "step": 11149 }, { "epoch": 2.15828173374613, "grad_norm": 0.06353463977575302, "learning_rate": 9.026413759102034e-05, "loss": 0.0062, "step": 11150 }, { "epoch": 2.1584752321981426, "grad_norm": 0.03723379224538803, "learning_rate": 9.026243305968833e-05, "loss": 0.0062, "step": 11151 }, { "epoch": 2.1586687306501546, "grad_norm": 0.05209282785654068, "learning_rate": 9.026072839725926e-05, "loss": 0.0074, "step": 11152 }, { "epoch": 2.158862229102167, "grad_norm": 0.052671097218990326, "learning_rate": 9.025902360373952e-05, "loss": 0.0068, "step": 11153 }, { "epoch": 2.1590557275541795, "grad_norm": 0.0419214591383934, "learning_rate": 9.025731867913541e-05, "loss": 0.0081, "step": 11154 }, { "epoch": 2.159249226006192, "grad_norm": 0.05194327235221863, "learning_rate": 9.025561362345326e-05, "loss": 0.0077, "step": 11155 }, { "epoch": 2.1594427244582044, "grad_norm": 0.054652947932481766, "learning_rate": 9.025390843669943e-05, "loss": 0.0066, "step": 11156 }, { "epoch": 2.159636222910217, "grad_norm": 0.03314143419265747, "learning_rate": 9.025220311888027e-05, "loss": 0.0065, "step": 11157 }, { "epoch": 2.1598297213622293, "grad_norm": 0.07430794090032578, "learning_rate": 9.02504976700021e-05, "loss": 0.0072, "step": 11158 }, { "epoch": 2.1600232198142413, "grad_norm": 0.03663863614201546, "learning_rate": 9.024879209007126e-05, "loss": 0.0077, "step": 11159 }, { "epoch": 2.1602167182662537, "grad_norm": 0.06489621102809906, "learning_rate": 9.02470863790941e-05, "loss": 0.0074, "step": 11160 }, { "epoch": 2.160410216718266, "grad_norm": 0.03696658834815025, "learning_rate": 9.024538053707695e-05, "loss": 0.0077, "step": 11161 }, { "epoch": 2.1606037151702786, "grad_norm": 0.08356950432062149, "learning_rate": 9.024367456402618e-05, "loss": 0.0073, "step": 11162 }, { "epoch": 2.160797213622291, "grad_norm": 0.036499373614788055, "learning_rate": 9.02419684599481e-05, "loss": 0.0066, "step": 11163 }, { "epoch": 2.1609907120743035, "grad_norm": 0.05759652704000473, "learning_rate": 9.024026222484906e-05, "loss": 0.0069, "step": 11164 }, { "epoch": 2.161184210526316, "grad_norm": 0.04667782410979271, "learning_rate": 9.023855585873541e-05, "loss": 0.0063, "step": 11165 }, { "epoch": 2.1613777089783284, "grad_norm": 0.04349174350500107, "learning_rate": 9.023684936161348e-05, "loss": 0.0087, "step": 11166 }, { "epoch": 2.1615712074303404, "grad_norm": 0.057449162006378174, "learning_rate": 9.023514273348965e-05, "loss": 0.0067, "step": 11167 }, { "epoch": 2.161764705882353, "grad_norm": 0.039532363414764404, "learning_rate": 9.023343597437023e-05, "loss": 0.0075, "step": 11168 }, { "epoch": 2.1619582043343653, "grad_norm": 0.06618042290210724, "learning_rate": 9.02317290842616e-05, "loss": 0.0077, "step": 11169 }, { "epoch": 2.1621517027863777, "grad_norm": 0.06321775913238525, "learning_rate": 9.023002206317007e-05, "loss": 0.0073, "step": 11170 }, { "epoch": 2.16234520123839, "grad_norm": 0.04192870110273361, "learning_rate": 9.022831491110197e-05, "loss": 0.0072, "step": 11171 }, { "epoch": 2.1625386996904026, "grad_norm": 0.09638769179582596, "learning_rate": 9.022660762806371e-05, "loss": 0.0086, "step": 11172 }, { "epoch": 2.162732198142415, "grad_norm": 0.048061221837997437, "learning_rate": 9.022490021406157e-05, "loss": 0.007, "step": 11173 }, { "epoch": 2.162925696594427, "grad_norm": 0.08536335080862045, "learning_rate": 9.022319266910197e-05, "loss": 0.0069, "step": 11174 }, { "epoch": 2.1631191950464395, "grad_norm": 0.05647661164402962, "learning_rate": 9.02214849931912e-05, "loss": 0.0081, "step": 11175 }, { "epoch": 2.163312693498452, "grad_norm": 0.06937845051288605, "learning_rate": 9.021977718633562e-05, "loss": 0.0075, "step": 11176 }, { "epoch": 2.1635061919504643, "grad_norm": 0.06591220200061798, "learning_rate": 9.021806924854159e-05, "loss": 0.0074, "step": 11177 }, { "epoch": 2.163699690402477, "grad_norm": 0.11052721738815308, "learning_rate": 9.021636117981547e-05, "loss": 0.0066, "step": 11178 }, { "epoch": 2.1638931888544892, "grad_norm": 0.07961416244506836, "learning_rate": 9.021465298016358e-05, "loss": 0.0075, "step": 11179 }, { "epoch": 2.1640866873065017, "grad_norm": 0.10342039912939072, "learning_rate": 9.02129446495923e-05, "loss": 0.0068, "step": 11180 }, { "epoch": 2.164280185758514, "grad_norm": 0.08544953167438507, "learning_rate": 9.021123618810795e-05, "loss": 0.0068, "step": 11181 }, { "epoch": 2.1644736842105265, "grad_norm": 0.06865543872117996, "learning_rate": 9.020952759571693e-05, "loss": 0.0067, "step": 11182 }, { "epoch": 2.1646671826625385, "grad_norm": 0.12374512851238251, "learning_rate": 9.020781887242554e-05, "loss": 0.0061, "step": 11183 }, { "epoch": 2.164860681114551, "grad_norm": 0.029935669153928757, "learning_rate": 9.020611001824015e-05, "loss": 0.0082, "step": 11184 }, { "epoch": 2.1650541795665634, "grad_norm": 0.15170887112617493, "learning_rate": 9.020440103316713e-05, "loss": 0.0063, "step": 11185 }, { "epoch": 2.165247678018576, "grad_norm": 0.03279031068086624, "learning_rate": 9.020269191721283e-05, "loss": 0.0063, "step": 11186 }, { "epoch": 2.1654411764705883, "grad_norm": 0.04545535892248154, "learning_rate": 9.020098267038357e-05, "loss": 0.0093, "step": 11187 }, { "epoch": 2.1656346749226008, "grad_norm": 0.10813169181346893, "learning_rate": 9.019927329268576e-05, "loss": 0.007, "step": 11188 }, { "epoch": 2.165828173374613, "grad_norm": 0.03254036605358124, "learning_rate": 9.019756378412571e-05, "loss": 0.0053, "step": 11189 }, { "epoch": 2.166021671826625, "grad_norm": 0.09025026857852936, "learning_rate": 9.01958541447098e-05, "loss": 0.0081, "step": 11190 }, { "epoch": 2.1662151702786376, "grad_norm": 0.09647070616483688, "learning_rate": 9.019414437444435e-05, "loss": 0.0078, "step": 11191 }, { "epoch": 2.16640866873065, "grad_norm": 0.05802524834871292, "learning_rate": 9.019243447333576e-05, "loss": 0.0075, "step": 11192 }, { "epoch": 2.1666021671826625, "grad_norm": 0.10045847296714783, "learning_rate": 9.019072444139038e-05, "loss": 0.0079, "step": 11193 }, { "epoch": 2.166795665634675, "grad_norm": 0.05298657715320587, "learning_rate": 9.018901427861455e-05, "loss": 0.0093, "step": 11194 }, { "epoch": 2.1669891640866874, "grad_norm": 0.06127942353487015, "learning_rate": 9.018730398501463e-05, "loss": 0.0085, "step": 11195 }, { "epoch": 2.1671826625387, "grad_norm": 0.0786532536149025, "learning_rate": 9.018559356059699e-05, "loss": 0.0073, "step": 11196 }, { "epoch": 2.1673761609907123, "grad_norm": 0.04210154712200165, "learning_rate": 9.018388300536799e-05, "loss": 0.0086, "step": 11197 }, { "epoch": 2.1675696594427243, "grad_norm": 0.05884435400366783, "learning_rate": 9.018217231933398e-05, "loss": 0.007, "step": 11198 }, { "epoch": 2.1677631578947367, "grad_norm": 0.06512859463691711, "learning_rate": 9.018046150250132e-05, "loss": 0.0074, "step": 11199 }, { "epoch": 2.167956656346749, "grad_norm": 0.04864604398608208, "learning_rate": 9.017875055487635e-05, "loss": 0.0079, "step": 11200 }, { "epoch": 2.1681501547987616, "grad_norm": 0.08199871331453323, "learning_rate": 9.017703947646547e-05, "loss": 0.0069, "step": 11201 }, { "epoch": 2.168343653250774, "grad_norm": 0.045356594026088715, "learning_rate": 9.017532826727505e-05, "loss": 0.0064, "step": 11202 }, { "epoch": 2.1685371517027865, "grad_norm": 0.053660448640584946, "learning_rate": 9.017361692731139e-05, "loss": 0.0066, "step": 11203 }, { "epoch": 2.168730650154799, "grad_norm": 0.022275907918810844, "learning_rate": 9.017190545658092e-05, "loss": 0.0074, "step": 11204 }, { "epoch": 2.168924148606811, "grad_norm": 0.06505218893289566, "learning_rate": 9.017019385508995e-05, "loss": 0.0063, "step": 11205 }, { "epoch": 2.1691176470588234, "grad_norm": 0.04052772745490074, "learning_rate": 9.016848212284487e-05, "loss": 0.0074, "step": 11206 }, { "epoch": 2.169311145510836, "grad_norm": 0.042318567633628845, "learning_rate": 9.016677025985205e-05, "loss": 0.007, "step": 11207 }, { "epoch": 2.1695046439628483, "grad_norm": 0.030230415984988213, "learning_rate": 9.016505826611783e-05, "loss": 0.0066, "step": 11208 }, { "epoch": 2.1696981424148607, "grad_norm": 0.05239086598157883, "learning_rate": 9.01633461416486e-05, "loss": 0.0076, "step": 11209 }, { "epoch": 2.169891640866873, "grad_norm": 0.061251118779182434, "learning_rate": 9.01616338864507e-05, "loss": 0.0079, "step": 11210 }, { "epoch": 2.1700851393188856, "grad_norm": 0.05683550611138344, "learning_rate": 9.01599215005305e-05, "loss": 0.0099, "step": 11211 }, { "epoch": 2.170278637770898, "grad_norm": 0.08508024364709854, "learning_rate": 9.015820898389441e-05, "loss": 0.0072, "step": 11212 }, { "epoch": 2.17047213622291, "grad_norm": 0.059598661959171295, "learning_rate": 9.015649633654875e-05, "loss": 0.0074, "step": 11213 }, { "epoch": 2.1706656346749225, "grad_norm": 0.08178387582302094, "learning_rate": 9.015478355849988e-05, "loss": 0.007, "step": 11214 }, { "epoch": 2.170859133126935, "grad_norm": 0.06120051443576813, "learning_rate": 9.015307064975421e-05, "loss": 0.0069, "step": 11215 }, { "epoch": 2.1710526315789473, "grad_norm": 0.078671894967556, "learning_rate": 9.015135761031806e-05, "loss": 0.0074, "step": 11216 }, { "epoch": 2.17124613003096, "grad_norm": 0.07448604702949524, "learning_rate": 9.014964444019785e-05, "loss": 0.0077, "step": 11217 }, { "epoch": 2.1714396284829722, "grad_norm": 0.08164441585540771, "learning_rate": 9.014793113939991e-05, "loss": 0.0075, "step": 11218 }, { "epoch": 2.1716331269349847, "grad_norm": 0.05856606736779213, "learning_rate": 9.014621770793062e-05, "loss": 0.0056, "step": 11219 }, { "epoch": 2.171826625386997, "grad_norm": 0.07163868099451065, "learning_rate": 9.014450414579637e-05, "loss": 0.0062, "step": 11220 }, { "epoch": 2.172020123839009, "grad_norm": 0.0467524379491806, "learning_rate": 9.01427904530035e-05, "loss": 0.0077, "step": 11221 }, { "epoch": 2.1722136222910216, "grad_norm": 0.08246947079896927, "learning_rate": 9.01410766295584e-05, "loss": 0.0076, "step": 11222 }, { "epoch": 2.172407120743034, "grad_norm": 0.05302814766764641, "learning_rate": 9.013936267546743e-05, "loss": 0.0063, "step": 11223 }, { "epoch": 2.1726006191950464, "grad_norm": 0.0742369294166565, "learning_rate": 9.013764859073697e-05, "loss": 0.0068, "step": 11224 }, { "epoch": 2.172794117647059, "grad_norm": 0.10985719412565231, "learning_rate": 9.013593437537338e-05, "loss": 0.0073, "step": 11225 }, { "epoch": 2.1729876160990713, "grad_norm": 0.10984636098146439, "learning_rate": 9.013422002938305e-05, "loss": 0.0084, "step": 11226 }, { "epoch": 2.1731811145510838, "grad_norm": 0.15323467552661896, "learning_rate": 9.013250555277236e-05, "loss": 0.0079, "step": 11227 }, { "epoch": 2.173374613003096, "grad_norm": 0.142430379986763, "learning_rate": 9.013079094554767e-05, "loss": 0.006, "step": 11228 }, { "epoch": 2.173568111455108, "grad_norm": 0.16461637616157532, "learning_rate": 9.012907620771534e-05, "loss": 0.007, "step": 11229 }, { "epoch": 2.1737616099071206, "grad_norm": 0.0995408296585083, "learning_rate": 9.012736133928176e-05, "loss": 0.0081, "step": 11230 }, { "epoch": 2.173955108359133, "grad_norm": 0.11372873932123184, "learning_rate": 9.012564634025333e-05, "loss": 0.0068, "step": 11231 }, { "epoch": 2.1741486068111455, "grad_norm": 0.18021591007709503, "learning_rate": 9.01239312106364e-05, "loss": 0.0082, "step": 11232 }, { "epoch": 2.174342105263158, "grad_norm": 0.19144508242607117, "learning_rate": 9.012221595043732e-05, "loss": 0.0079, "step": 11233 }, { "epoch": 2.1745356037151704, "grad_norm": 0.21189257502555847, "learning_rate": 9.01205005596625e-05, "loss": 0.0085, "step": 11234 }, { "epoch": 2.174729102167183, "grad_norm": 0.19808779656887054, "learning_rate": 9.011878503831833e-05, "loss": 0.008, "step": 11235 }, { "epoch": 2.174922600619195, "grad_norm": 0.2118055373430252, "learning_rate": 9.011706938641117e-05, "loss": 0.0065, "step": 11236 }, { "epoch": 2.1751160990712073, "grad_norm": 0.12854458391666412, "learning_rate": 9.011535360394739e-05, "loss": 0.0085, "step": 11237 }, { "epoch": 2.1753095975232197, "grad_norm": 0.2274707555770874, "learning_rate": 9.011363769093338e-05, "loss": 0.0088, "step": 11238 }, { "epoch": 2.175503095975232, "grad_norm": 0.15616488456726074, "learning_rate": 9.011192164737553e-05, "loss": 0.0073, "step": 11239 }, { "epoch": 2.1756965944272446, "grad_norm": 0.14008952677249908, "learning_rate": 9.01102054732802e-05, "loss": 0.0077, "step": 11240 }, { "epoch": 2.175890092879257, "grad_norm": 0.1994226574897766, "learning_rate": 9.010848916865377e-05, "loss": 0.009, "step": 11241 }, { "epoch": 2.1760835913312695, "grad_norm": 0.07484941929578781, "learning_rate": 9.010677273350263e-05, "loss": 0.0071, "step": 11242 }, { "epoch": 2.176277089783282, "grad_norm": 0.22128014266490936, "learning_rate": 9.010505616783315e-05, "loss": 0.0091, "step": 11243 }, { "epoch": 2.176470588235294, "grad_norm": 0.09998620301485062, "learning_rate": 9.010333947165175e-05, "loss": 0.008, "step": 11244 }, { "epoch": 2.1766640866873064, "grad_norm": 0.13343073427677155, "learning_rate": 9.010162264496476e-05, "loss": 0.0071, "step": 11245 }, { "epoch": 2.176857585139319, "grad_norm": 0.11043117195367813, "learning_rate": 9.009990568777859e-05, "loss": 0.0077, "step": 11246 }, { "epoch": 2.1770510835913313, "grad_norm": 0.09689541906118393, "learning_rate": 9.009818860009963e-05, "loss": 0.0091, "step": 11247 }, { "epoch": 2.1772445820433437, "grad_norm": 0.07107948511838913, "learning_rate": 9.009647138193423e-05, "loss": 0.0067, "step": 11248 }, { "epoch": 2.177438080495356, "grad_norm": 0.07086324691772461, "learning_rate": 9.009475403328882e-05, "loss": 0.0073, "step": 11249 }, { "epoch": 2.1776315789473686, "grad_norm": 0.07412231713533401, "learning_rate": 9.009303655416975e-05, "loss": 0.0088, "step": 11250 }, { "epoch": 2.1778250773993806, "grad_norm": 0.07471446692943573, "learning_rate": 9.009131894458342e-05, "loss": 0.0084, "step": 11251 }, { "epoch": 2.178018575851393, "grad_norm": 0.057840775698423386, "learning_rate": 9.008960120453623e-05, "loss": 0.0069, "step": 11252 }, { "epoch": 2.1782120743034055, "grad_norm": 0.054217878729104996, "learning_rate": 9.008788333403453e-05, "loss": 0.006, "step": 11253 }, { "epoch": 2.178405572755418, "grad_norm": 0.11281068623065948, "learning_rate": 9.008616533308472e-05, "loss": 0.0076, "step": 11254 }, { "epoch": 2.1785990712074303, "grad_norm": 0.12422601133584976, "learning_rate": 9.008444720169321e-05, "loss": 0.0065, "step": 11255 }, { "epoch": 2.178792569659443, "grad_norm": 0.13830368220806122, "learning_rate": 9.008272893986636e-05, "loss": 0.0101, "step": 11256 }, { "epoch": 2.1789860681114552, "grad_norm": 0.11676381528377533, "learning_rate": 9.008101054761058e-05, "loss": 0.0085, "step": 11257 }, { "epoch": 2.1791795665634677, "grad_norm": 0.19197487831115723, "learning_rate": 9.007929202493223e-05, "loss": 0.0082, "step": 11258 }, { "epoch": 2.1793730650154797, "grad_norm": 0.07682666927576065, "learning_rate": 9.007757337183772e-05, "loss": 0.0087, "step": 11259 }, { "epoch": 2.179566563467492, "grad_norm": 0.1992921382188797, "learning_rate": 9.007585458833344e-05, "loss": 0.0085, "step": 11260 }, { "epoch": 2.1797600619195046, "grad_norm": 0.11498374491930008, "learning_rate": 9.007413567442577e-05, "loss": 0.0102, "step": 11261 }, { "epoch": 2.179953560371517, "grad_norm": 0.12042506039142609, "learning_rate": 9.007241663012111e-05, "loss": 0.0083, "step": 11262 }, { "epoch": 2.1801470588235294, "grad_norm": 0.15846021473407745, "learning_rate": 9.007069745542586e-05, "loss": 0.0074, "step": 11263 }, { "epoch": 2.180340557275542, "grad_norm": 0.05326756462454796, "learning_rate": 9.006897815034638e-05, "loss": 0.0069, "step": 11264 }, { "epoch": 2.1805340557275543, "grad_norm": 0.16518142819404602, "learning_rate": 9.006725871488909e-05, "loss": 0.0091, "step": 11265 }, { "epoch": 2.1807275541795668, "grad_norm": 0.09982657432556152, "learning_rate": 9.006553914906036e-05, "loss": 0.0081, "step": 11266 }, { "epoch": 2.1809210526315788, "grad_norm": 0.09075430780649185, "learning_rate": 9.006381945286661e-05, "loss": 0.0071, "step": 11267 }, { "epoch": 2.181114551083591, "grad_norm": 0.11941725015640259, "learning_rate": 9.006209962631422e-05, "loss": 0.0082, "step": 11268 }, { "epoch": 2.1813080495356036, "grad_norm": 0.05693470686674118, "learning_rate": 9.006037966940957e-05, "loss": 0.008, "step": 11269 }, { "epoch": 2.181501547987616, "grad_norm": 0.11388491839170456, "learning_rate": 9.005865958215908e-05, "loss": 0.0073, "step": 11270 }, { "epoch": 2.1816950464396285, "grad_norm": 0.09566328674554825, "learning_rate": 9.005693936456913e-05, "loss": 0.0074, "step": 11271 }, { "epoch": 2.181888544891641, "grad_norm": 0.035353031009435654, "learning_rate": 9.005521901664612e-05, "loss": 0.0071, "step": 11272 }, { "epoch": 2.1820820433436534, "grad_norm": 0.11766781657934189, "learning_rate": 9.005349853839642e-05, "loss": 0.0083, "step": 11273 }, { "epoch": 2.182275541795666, "grad_norm": 0.04498666897416115, "learning_rate": 9.005177792982648e-05, "loss": 0.0075, "step": 11274 }, { "epoch": 2.182469040247678, "grad_norm": 0.0585586242377758, "learning_rate": 9.005005719094265e-05, "loss": 0.0058, "step": 11275 }, { "epoch": 2.1826625386996903, "grad_norm": 0.11288539320230484, "learning_rate": 9.004833632175135e-05, "loss": 0.0078, "step": 11276 }, { "epoch": 2.1828560371517027, "grad_norm": 0.0681406632065773, "learning_rate": 9.004661532225898e-05, "loss": 0.0059, "step": 11277 }, { "epoch": 2.183049535603715, "grad_norm": 0.10079185664653778, "learning_rate": 9.004489419247192e-05, "loss": 0.0071, "step": 11278 }, { "epoch": 2.1832430340557276, "grad_norm": 0.06675395369529724, "learning_rate": 9.004317293239657e-05, "loss": 0.0093, "step": 11279 }, { "epoch": 2.18343653250774, "grad_norm": 0.15909042954444885, "learning_rate": 9.004145154203934e-05, "loss": 0.0084, "step": 11280 }, { "epoch": 2.1836300309597525, "grad_norm": 0.0766630470752716, "learning_rate": 9.003973002140664e-05, "loss": 0.007, "step": 11281 }, { "epoch": 2.1838235294117645, "grad_norm": 0.1343040019273758, "learning_rate": 9.003800837050486e-05, "loss": 0.0068, "step": 11282 }, { "epoch": 2.184017027863777, "grad_norm": 0.11041434854269028, "learning_rate": 9.00362865893404e-05, "loss": 0.0083, "step": 11283 }, { "epoch": 2.1842105263157894, "grad_norm": 0.11460047215223312, "learning_rate": 9.003456467791964e-05, "loss": 0.0096, "step": 11284 }, { "epoch": 2.184404024767802, "grad_norm": 0.16784851253032684, "learning_rate": 9.0032842636249e-05, "loss": 0.0089, "step": 11285 }, { "epoch": 2.1845975232198143, "grad_norm": 0.13044753670692444, "learning_rate": 9.003112046433491e-05, "loss": 0.0079, "step": 11286 }, { "epoch": 2.1847910216718267, "grad_norm": 0.15456213057041168, "learning_rate": 9.002939816218373e-05, "loss": 0.0066, "step": 11287 }, { "epoch": 2.184984520123839, "grad_norm": 0.11518829315900803, "learning_rate": 9.002767572980188e-05, "loss": 0.009, "step": 11288 }, { "epoch": 2.1851780185758516, "grad_norm": 0.12816192209720612, "learning_rate": 9.002595316719576e-05, "loss": 0.0083, "step": 11289 }, { "epoch": 2.1853715170278636, "grad_norm": 0.07962312549352646, "learning_rate": 9.002423047437178e-05, "loss": 0.0083, "step": 11290 }, { "epoch": 2.185565015479876, "grad_norm": 0.11234643310308456, "learning_rate": 9.002250765133635e-05, "loss": 0.0069, "step": 11291 }, { "epoch": 2.1857585139318885, "grad_norm": 0.05807432904839516, "learning_rate": 9.002078469809584e-05, "loss": 0.0068, "step": 11292 }, { "epoch": 2.185952012383901, "grad_norm": 0.1260974407196045, "learning_rate": 9.00190616146567e-05, "loss": 0.0078, "step": 11293 }, { "epoch": 2.1861455108359134, "grad_norm": 0.060899171978235245, "learning_rate": 9.001733840102533e-05, "loss": 0.007, "step": 11294 }, { "epoch": 2.186339009287926, "grad_norm": 0.122889444231987, "learning_rate": 9.00156150572081e-05, "loss": 0.0081, "step": 11295 }, { "epoch": 2.1865325077399382, "grad_norm": 0.06958457082509995, "learning_rate": 9.001389158321147e-05, "loss": 0.0064, "step": 11296 }, { "epoch": 2.1867260061919502, "grad_norm": 0.10221949964761734, "learning_rate": 9.00121679790418e-05, "loss": 0.0076, "step": 11297 }, { "epoch": 2.1869195046439627, "grad_norm": 0.08371419459581375, "learning_rate": 9.00104442447055e-05, "loss": 0.0081, "step": 11298 }, { "epoch": 2.187113003095975, "grad_norm": 0.0896788015961647, "learning_rate": 9.000872038020903e-05, "loss": 0.0074, "step": 11299 }, { "epoch": 2.1873065015479876, "grad_norm": 0.0964186042547226, "learning_rate": 9.000699638555874e-05, "loss": 0.0069, "step": 11300 }, { "epoch": 2.1875, "grad_norm": 0.08571532368659973, "learning_rate": 9.000527226076106e-05, "loss": 0.0091, "step": 11301 }, { "epoch": 2.1876934984520124, "grad_norm": 0.12280336022377014, "learning_rate": 9.000354800582243e-05, "loss": 0.0064, "step": 11302 }, { "epoch": 2.187886996904025, "grad_norm": 0.0703219622373581, "learning_rate": 9.00018236207492e-05, "loss": 0.0064, "step": 11303 }, { "epoch": 2.1880804953560373, "grad_norm": 0.14785166084766388, "learning_rate": 9.000009910554786e-05, "loss": 0.007, "step": 11304 }, { "epoch": 2.1882739938080498, "grad_norm": 0.0628303736448288, "learning_rate": 8.999837446022474e-05, "loss": 0.0064, "step": 11305 }, { "epoch": 2.1884674922600618, "grad_norm": 0.13622801005840302, "learning_rate": 8.999664968478629e-05, "loss": 0.0063, "step": 11306 }, { "epoch": 2.188660990712074, "grad_norm": 0.07376023381948471, "learning_rate": 8.999492477923892e-05, "loss": 0.0058, "step": 11307 }, { "epoch": 2.1888544891640866, "grad_norm": 0.11621836572885513, "learning_rate": 8.999319974358904e-05, "loss": 0.0071, "step": 11308 }, { "epoch": 2.189047987616099, "grad_norm": 0.09462813287973404, "learning_rate": 8.999147457784309e-05, "loss": 0.0057, "step": 11309 }, { "epoch": 2.1892414860681115, "grad_norm": 0.10485180467367172, "learning_rate": 8.998974928200745e-05, "loss": 0.0079, "step": 11310 }, { "epoch": 2.189434984520124, "grad_norm": 0.07602955400943756, "learning_rate": 8.998802385608852e-05, "loss": 0.007, "step": 11311 }, { "epoch": 2.1896284829721364, "grad_norm": 0.08222600817680359, "learning_rate": 8.998629830009276e-05, "loss": 0.0071, "step": 11312 }, { "epoch": 2.1898219814241484, "grad_norm": 0.06949897855520248, "learning_rate": 8.998457261402655e-05, "loss": 0.007, "step": 11313 }, { "epoch": 2.190015479876161, "grad_norm": 0.07644850015640259, "learning_rate": 8.998284679789634e-05, "loss": 0.0066, "step": 11314 }, { "epoch": 2.1902089783281733, "grad_norm": 0.06416864693164825, "learning_rate": 8.998112085170851e-05, "loss": 0.0083, "step": 11315 }, { "epoch": 2.1904024767801857, "grad_norm": 0.049099791795015335, "learning_rate": 8.997939477546949e-05, "loss": 0.0074, "step": 11316 }, { "epoch": 2.190595975232198, "grad_norm": 0.05853021889925003, "learning_rate": 8.997766856918571e-05, "loss": 0.0059, "step": 11317 }, { "epoch": 2.1907894736842106, "grad_norm": 0.05395672470331192, "learning_rate": 8.997594223286357e-05, "loss": 0.0079, "step": 11318 }, { "epoch": 2.190982972136223, "grad_norm": 0.05056116729974747, "learning_rate": 8.997421576650951e-05, "loss": 0.0068, "step": 11319 }, { "epoch": 2.1911764705882355, "grad_norm": 0.053557705134153366, "learning_rate": 8.997248917012991e-05, "loss": 0.0059, "step": 11320 }, { "epoch": 2.1913699690402475, "grad_norm": 0.040758829563856125, "learning_rate": 8.997076244373123e-05, "loss": 0.008, "step": 11321 }, { "epoch": 2.19156346749226, "grad_norm": 0.0678199902176857, "learning_rate": 8.996903558731987e-05, "loss": 0.0068, "step": 11322 }, { "epoch": 2.1917569659442724, "grad_norm": 0.05434779077768326, "learning_rate": 8.996730860090224e-05, "loss": 0.008, "step": 11323 }, { "epoch": 2.191950464396285, "grad_norm": 0.07253772765398026, "learning_rate": 8.99655814844848e-05, "loss": 0.0075, "step": 11324 }, { "epoch": 2.1921439628482973, "grad_norm": 0.058556631207466125, "learning_rate": 8.996385423807392e-05, "loss": 0.0058, "step": 11325 }, { "epoch": 2.1923374613003097, "grad_norm": 0.043174684047698975, "learning_rate": 8.996212686167603e-05, "loss": 0.0084, "step": 11326 }, { "epoch": 2.192530959752322, "grad_norm": 0.08626372367143631, "learning_rate": 8.99603993552976e-05, "loss": 0.0084, "step": 11327 }, { "epoch": 2.192724458204334, "grad_norm": 0.03902127593755722, "learning_rate": 8.995867171894502e-05, "loss": 0.0061, "step": 11328 }, { "epoch": 2.1929179566563466, "grad_norm": 0.07653819769620895, "learning_rate": 8.995694395262467e-05, "loss": 0.0067, "step": 11329 }, { "epoch": 2.193111455108359, "grad_norm": 0.04478324204683304, "learning_rate": 8.995521605634307e-05, "loss": 0.0066, "step": 11330 }, { "epoch": 2.1933049535603715, "grad_norm": 0.03514040261507034, "learning_rate": 8.995348803010656e-05, "loss": 0.0075, "step": 11331 }, { "epoch": 2.193498452012384, "grad_norm": 0.06933320313692093, "learning_rate": 8.99517598739216e-05, "loss": 0.0074, "step": 11332 }, { "epoch": 2.1936919504643964, "grad_norm": 0.05566501244902611, "learning_rate": 8.995003158779461e-05, "loss": 0.0067, "step": 11333 }, { "epoch": 2.193885448916409, "grad_norm": 0.059986382722854614, "learning_rate": 8.994830317173202e-05, "loss": 0.0083, "step": 11334 }, { "epoch": 2.1940789473684212, "grad_norm": 0.07887177914381027, "learning_rate": 8.994657462574023e-05, "loss": 0.0073, "step": 11335 }, { "epoch": 2.1942724458204332, "grad_norm": 0.03137782961130142, "learning_rate": 8.994484594982571e-05, "loss": 0.0069, "step": 11336 }, { "epoch": 2.1944659442724457, "grad_norm": 0.07342015206813812, "learning_rate": 8.994311714399485e-05, "loss": 0.0059, "step": 11337 }, { "epoch": 2.194659442724458, "grad_norm": 0.048583101481199265, "learning_rate": 8.99413882082541e-05, "loss": 0.0074, "step": 11338 }, { "epoch": 2.1948529411764706, "grad_norm": 0.062429383397102356, "learning_rate": 8.993965914260987e-05, "loss": 0.0064, "step": 11339 }, { "epoch": 2.195046439628483, "grad_norm": 0.061429720371961594, "learning_rate": 8.99379299470686e-05, "loss": 0.0068, "step": 11340 }, { "epoch": 2.1952399380804954, "grad_norm": 0.06450236588716507, "learning_rate": 8.99362006216367e-05, "loss": 0.0067, "step": 11341 }, { "epoch": 2.195433436532508, "grad_norm": 0.054448097944259644, "learning_rate": 8.993447116632064e-05, "loss": 0.0085, "step": 11342 }, { "epoch": 2.1956269349845203, "grad_norm": 0.07517939805984497, "learning_rate": 8.993274158112681e-05, "loss": 0.0062, "step": 11343 }, { "epoch": 2.1958204334365323, "grad_norm": 0.05636381357908249, "learning_rate": 8.993101186606167e-05, "loss": 0.0068, "step": 11344 }, { "epoch": 2.1960139318885448, "grad_norm": 0.0850786492228508, "learning_rate": 8.992928202113163e-05, "loss": 0.0075, "step": 11345 }, { "epoch": 2.196207430340557, "grad_norm": 0.0767926424741745, "learning_rate": 8.99275520463431e-05, "loss": 0.0069, "step": 11346 }, { "epoch": 2.1964009287925697, "grad_norm": 0.06501340866088867, "learning_rate": 8.992582194170257e-05, "loss": 0.0074, "step": 11347 }, { "epoch": 2.196594427244582, "grad_norm": 0.10707405209541321, "learning_rate": 8.992409170721642e-05, "loss": 0.0084, "step": 11348 }, { "epoch": 2.1967879256965945, "grad_norm": 0.05835513398051262, "learning_rate": 8.99223613428911e-05, "loss": 0.0073, "step": 11349 }, { "epoch": 2.196981424148607, "grad_norm": 0.10206020623445511, "learning_rate": 8.992063084873307e-05, "loss": 0.0072, "step": 11350 }, { "epoch": 2.1971749226006194, "grad_norm": 0.07121903449296951, "learning_rate": 8.991890022474872e-05, "loss": 0.0076, "step": 11351 }, { "epoch": 2.1973684210526314, "grad_norm": 0.0648738443851471, "learning_rate": 8.991716947094452e-05, "loss": 0.0073, "step": 11352 }, { "epoch": 2.197561919504644, "grad_norm": 0.06729083508253098, "learning_rate": 8.991543858732687e-05, "loss": 0.008, "step": 11353 }, { "epoch": 2.1977554179566563, "grad_norm": 0.06584338843822479, "learning_rate": 8.991370757390221e-05, "loss": 0.0075, "step": 11354 }, { "epoch": 2.1979489164086687, "grad_norm": 0.10985402017831802, "learning_rate": 8.991197643067701e-05, "loss": 0.0074, "step": 11355 }, { "epoch": 2.198142414860681, "grad_norm": 0.06990538537502289, "learning_rate": 8.991024515765768e-05, "loss": 0.007, "step": 11356 }, { "epoch": 2.1983359133126936, "grad_norm": 0.08618656545877457, "learning_rate": 8.990851375485067e-05, "loss": 0.0078, "step": 11357 }, { "epoch": 2.198529411764706, "grad_norm": 0.05680143088102341, "learning_rate": 8.99067822222624e-05, "loss": 0.0076, "step": 11358 }, { "epoch": 2.198722910216718, "grad_norm": 0.08088276535272598, "learning_rate": 8.990505055989931e-05, "loss": 0.007, "step": 11359 }, { "epoch": 2.1989164086687305, "grad_norm": 0.052107080817222595, "learning_rate": 8.990331876776785e-05, "loss": 0.008, "step": 11360 }, { "epoch": 2.199109907120743, "grad_norm": 0.06749039143323898, "learning_rate": 8.990158684587445e-05, "loss": 0.0062, "step": 11361 }, { "epoch": 2.1993034055727554, "grad_norm": 0.07783278077840805, "learning_rate": 8.989985479422553e-05, "loss": 0.0073, "step": 11362 }, { "epoch": 2.199496904024768, "grad_norm": 0.06089998409152031, "learning_rate": 8.989812261282758e-05, "loss": 0.0068, "step": 11363 }, { "epoch": 2.1996904024767803, "grad_norm": 0.08533774316310883, "learning_rate": 8.989639030168699e-05, "loss": 0.0079, "step": 11364 }, { "epoch": 2.1998839009287927, "grad_norm": 0.07469253987073898, "learning_rate": 8.989465786081022e-05, "loss": 0.0072, "step": 11365 }, { "epoch": 2.200077399380805, "grad_norm": 0.07075811922550201, "learning_rate": 8.989292529020374e-05, "loss": 0.0078, "step": 11366 }, { "epoch": 2.200270897832817, "grad_norm": 0.10247904807329178, "learning_rate": 8.989119258987392e-05, "loss": 0.0071, "step": 11367 }, { "epoch": 2.2004643962848296, "grad_norm": 0.06763307750225067, "learning_rate": 8.988945975982726e-05, "loss": 0.0073, "step": 11368 }, { "epoch": 2.200657894736842, "grad_norm": 0.08263963460922241, "learning_rate": 8.988772680007019e-05, "loss": 0.0084, "step": 11369 }, { "epoch": 2.2008513931888545, "grad_norm": 0.0824618861079216, "learning_rate": 8.988599371060914e-05, "loss": 0.0099, "step": 11370 }, { "epoch": 2.201044891640867, "grad_norm": 0.09724219143390656, "learning_rate": 8.988426049145056e-05, "loss": 0.0075, "step": 11371 }, { "epoch": 2.2012383900928794, "grad_norm": 0.08548096567392349, "learning_rate": 8.988252714260091e-05, "loss": 0.0064, "step": 11372 }, { "epoch": 2.201431888544892, "grad_norm": 0.06189766526222229, "learning_rate": 8.988079366406662e-05, "loss": 0.0077, "step": 11373 }, { "epoch": 2.201625386996904, "grad_norm": 0.09957544505596161, "learning_rate": 8.987906005585411e-05, "loss": 0.0064, "step": 11374 }, { "epoch": 2.2018188854489162, "grad_norm": 0.03745089843869209, "learning_rate": 8.987732631796989e-05, "loss": 0.0086, "step": 11375 }, { "epoch": 2.2020123839009287, "grad_norm": 0.09658583253622055, "learning_rate": 8.987559245042032e-05, "loss": 0.0065, "step": 11376 }, { "epoch": 2.202205882352941, "grad_norm": 0.026093820109963417, "learning_rate": 8.987385845321192e-05, "loss": 0.006, "step": 11377 }, { "epoch": 2.2023993808049536, "grad_norm": 0.10001066327095032, "learning_rate": 8.987212432635109e-05, "loss": 0.0077, "step": 11378 }, { "epoch": 2.202592879256966, "grad_norm": 0.04724699258804321, "learning_rate": 8.987039006984431e-05, "loss": 0.008, "step": 11379 }, { "epoch": 2.2027863777089784, "grad_norm": 0.07945774495601654, "learning_rate": 8.986865568369798e-05, "loss": 0.0063, "step": 11380 }, { "epoch": 2.202979876160991, "grad_norm": 0.10052437335252762, "learning_rate": 8.986692116791863e-05, "loss": 0.0076, "step": 11381 }, { "epoch": 2.203173374613003, "grad_norm": 0.04262392967939377, "learning_rate": 8.986518652251264e-05, "loss": 0.008, "step": 11382 }, { "epoch": 2.2033668730650153, "grad_norm": 0.12929372489452362, "learning_rate": 8.986345174748645e-05, "loss": 0.0065, "step": 11383 }, { "epoch": 2.2035603715170278, "grad_norm": 0.04928405210375786, "learning_rate": 8.986171684284656e-05, "loss": 0.0076, "step": 11384 }, { "epoch": 2.20375386996904, "grad_norm": 0.08412440121173859, "learning_rate": 8.98599818085994e-05, "loss": 0.0073, "step": 11385 }, { "epoch": 2.2039473684210527, "grad_norm": 0.04804588109254837, "learning_rate": 8.985824664475141e-05, "loss": 0.0085, "step": 11386 }, { "epoch": 2.204140866873065, "grad_norm": 0.04685770720243454, "learning_rate": 8.985651135130904e-05, "loss": 0.0064, "step": 11387 }, { "epoch": 2.2043343653250775, "grad_norm": 0.04250685125589371, "learning_rate": 8.985477592827875e-05, "loss": 0.0076, "step": 11388 }, { "epoch": 2.20452786377709, "grad_norm": 0.03363249823451042, "learning_rate": 8.985304037566702e-05, "loss": 0.007, "step": 11389 }, { "epoch": 2.204721362229102, "grad_norm": 0.03458487242460251, "learning_rate": 8.985130469348025e-05, "loss": 0.008, "step": 11390 }, { "epoch": 2.2049148606811144, "grad_norm": 0.04424705356359482, "learning_rate": 8.984956888172492e-05, "loss": 0.0057, "step": 11391 }, { "epoch": 2.205108359133127, "grad_norm": 0.04254757985472679, "learning_rate": 8.984783294040748e-05, "loss": 0.0076, "step": 11392 }, { "epoch": 2.2053018575851393, "grad_norm": 0.04201832786202431, "learning_rate": 8.984609686953437e-05, "loss": 0.0071, "step": 11393 }, { "epoch": 2.2054953560371517, "grad_norm": 0.04154789820313454, "learning_rate": 8.984436066911209e-05, "loss": 0.0068, "step": 11394 }, { "epoch": 2.205688854489164, "grad_norm": 0.0548202283680439, "learning_rate": 8.984262433914704e-05, "loss": 0.0072, "step": 11395 }, { "epoch": 2.2058823529411766, "grad_norm": 0.07166899740695953, "learning_rate": 8.984088787964571e-05, "loss": 0.008, "step": 11396 }, { "epoch": 2.206075851393189, "grad_norm": 0.057191330939531326, "learning_rate": 8.983915129061455e-05, "loss": 0.0077, "step": 11397 }, { "epoch": 2.206269349845201, "grad_norm": 0.09262462705373764, "learning_rate": 8.983741457205999e-05, "loss": 0.0074, "step": 11398 }, { "epoch": 2.2064628482972135, "grad_norm": 0.0707152932882309, "learning_rate": 8.983567772398851e-05, "loss": 0.0082, "step": 11399 }, { "epoch": 2.206656346749226, "grad_norm": 0.12797655165195465, "learning_rate": 8.983394074640656e-05, "loss": 0.0059, "step": 11400 }, { "epoch": 2.2068498452012384, "grad_norm": 0.07214601337909698, "learning_rate": 8.98322036393206e-05, "loss": 0.0075, "step": 11401 }, { "epoch": 2.207043343653251, "grad_norm": 0.057993706315755844, "learning_rate": 8.983046640273711e-05, "loss": 0.0098, "step": 11402 }, { "epoch": 2.2072368421052633, "grad_norm": 0.15319697558879852, "learning_rate": 8.982872903666252e-05, "loss": 0.0086, "step": 11403 }, { "epoch": 2.2074303405572757, "grad_norm": 0.04398294538259506, "learning_rate": 8.98269915411033e-05, "loss": 0.0092, "step": 11404 }, { "epoch": 2.2076238390092877, "grad_norm": 0.10592087358236313, "learning_rate": 8.982525391606591e-05, "loss": 0.0085, "step": 11405 }, { "epoch": 2.2078173374613, "grad_norm": 0.1043490469455719, "learning_rate": 8.98235161615568e-05, "loss": 0.0087, "step": 11406 }, { "epoch": 2.2080108359133126, "grad_norm": 0.07891425490379333, "learning_rate": 8.982177827758243e-05, "loss": 0.0081, "step": 11407 }, { "epoch": 2.208204334365325, "grad_norm": 0.13296611607074738, "learning_rate": 8.982004026414928e-05, "loss": 0.0071, "step": 11408 }, { "epoch": 2.2083978328173375, "grad_norm": 0.083563894033432, "learning_rate": 8.98183021212638e-05, "loss": 0.0085, "step": 11409 }, { "epoch": 2.20859133126935, "grad_norm": 0.1052069142460823, "learning_rate": 8.981656384893244e-05, "loss": 0.0067, "step": 11410 }, { "epoch": 2.2087848297213624, "grad_norm": 0.13111743330955505, "learning_rate": 8.981482544716169e-05, "loss": 0.0073, "step": 11411 }, { "epoch": 2.208978328173375, "grad_norm": 0.10455211251974106, "learning_rate": 8.9813086915958e-05, "loss": 0.0061, "step": 11412 }, { "epoch": 2.209171826625387, "grad_norm": 0.1175435334444046, "learning_rate": 8.981134825532781e-05, "loss": 0.0096, "step": 11413 }, { "epoch": 2.2093653250773992, "grad_norm": 0.1221795603632927, "learning_rate": 8.980960946527761e-05, "loss": 0.0072, "step": 11414 }, { "epoch": 2.2095588235294117, "grad_norm": 0.09237170964479446, "learning_rate": 8.980787054581386e-05, "loss": 0.0067, "step": 11415 }, { "epoch": 2.209752321981424, "grad_norm": 0.20470260083675385, "learning_rate": 8.980613149694304e-05, "loss": 0.0081, "step": 11416 }, { "epoch": 2.2099458204334366, "grad_norm": 0.09849552810192108, "learning_rate": 8.980439231867158e-05, "loss": 0.0085, "step": 11417 }, { "epoch": 2.210139318885449, "grad_norm": 0.1953674554824829, "learning_rate": 8.980265301100599e-05, "loss": 0.008, "step": 11418 }, { "epoch": 2.2103328173374615, "grad_norm": 0.11449633538722992, "learning_rate": 8.98009135739527e-05, "loss": 0.0094, "step": 11419 }, { "epoch": 2.2105263157894735, "grad_norm": 0.17475004494190216, "learning_rate": 8.979917400751818e-05, "loss": 0.007, "step": 11420 }, { "epoch": 2.210719814241486, "grad_norm": 0.1540174037218094, "learning_rate": 8.97974343117089e-05, "loss": 0.0074, "step": 11421 }, { "epoch": 2.2109133126934983, "grad_norm": 0.12979000806808472, "learning_rate": 8.979569448653134e-05, "loss": 0.0079, "step": 11422 }, { "epoch": 2.2111068111455108, "grad_norm": 0.16231681406497955, "learning_rate": 8.979395453199198e-05, "loss": 0.0072, "step": 11423 }, { "epoch": 2.211300309597523, "grad_norm": 0.1021457239985466, "learning_rate": 8.979221444809726e-05, "loss": 0.0077, "step": 11424 }, { "epoch": 2.2114938080495357, "grad_norm": 0.17067064344882965, "learning_rate": 8.979047423485365e-05, "loss": 0.0086, "step": 11425 }, { "epoch": 2.211687306501548, "grad_norm": 0.08549363166093826, "learning_rate": 8.978873389226764e-05, "loss": 0.0082, "step": 11426 }, { "epoch": 2.2118808049535605, "grad_norm": 0.20584847033023834, "learning_rate": 8.978699342034568e-05, "loss": 0.0069, "step": 11427 }, { "epoch": 2.212074303405573, "grad_norm": 0.08132242411375046, "learning_rate": 8.978525281909424e-05, "loss": 0.0069, "step": 11428 }, { "epoch": 2.212267801857585, "grad_norm": 0.14399199187755585, "learning_rate": 8.978351208851984e-05, "loss": 0.0064, "step": 11429 }, { "epoch": 2.2124613003095974, "grad_norm": 0.12465649098157883, "learning_rate": 8.978177122862889e-05, "loss": 0.008, "step": 11430 }, { "epoch": 2.21265479876161, "grad_norm": 0.09824515879154205, "learning_rate": 8.978003023942788e-05, "loss": 0.0061, "step": 11431 }, { "epoch": 2.2128482972136223, "grad_norm": 0.13972437381744385, "learning_rate": 8.977828912092328e-05, "loss": 0.0068, "step": 11432 }, { "epoch": 2.2130417956656347, "grad_norm": 0.07085785269737244, "learning_rate": 8.977654787312159e-05, "loss": 0.0092, "step": 11433 }, { "epoch": 2.213235294117647, "grad_norm": 0.18010880053043365, "learning_rate": 8.977480649602926e-05, "loss": 0.0066, "step": 11434 }, { "epoch": 2.2134287925696596, "grad_norm": 0.08785189688205719, "learning_rate": 8.977306498965276e-05, "loss": 0.0065, "step": 11435 }, { "epoch": 2.2136222910216716, "grad_norm": 0.12914571166038513, "learning_rate": 8.977132335399857e-05, "loss": 0.0083, "step": 11436 }, { "epoch": 2.213815789473684, "grad_norm": 0.14207814633846283, "learning_rate": 8.976958158907318e-05, "loss": 0.008, "step": 11437 }, { "epoch": 2.2140092879256965, "grad_norm": 0.0717276781797409, "learning_rate": 8.976783969488307e-05, "loss": 0.0088, "step": 11438 }, { "epoch": 2.214202786377709, "grad_norm": 0.14229713380336761, "learning_rate": 8.976609767143467e-05, "loss": 0.0079, "step": 11439 }, { "epoch": 2.2143962848297214, "grad_norm": 0.09256850928068161, "learning_rate": 8.976435551873449e-05, "loss": 0.0066, "step": 11440 }, { "epoch": 2.214589783281734, "grad_norm": 0.2066931426525116, "learning_rate": 8.976261323678901e-05, "loss": 0.0096, "step": 11441 }, { "epoch": 2.2147832817337463, "grad_norm": 0.07906816899776459, "learning_rate": 8.97608708256047e-05, "loss": 0.0092, "step": 11442 }, { "epoch": 2.2149767801857587, "grad_norm": 0.14672334492206573, "learning_rate": 8.9759128285188e-05, "loss": 0.0079, "step": 11443 }, { "epoch": 2.2151702786377707, "grad_norm": 0.11371416598558426, "learning_rate": 8.975738561554549e-05, "loss": 0.0078, "step": 11444 }, { "epoch": 2.215363777089783, "grad_norm": 0.08224964141845703, "learning_rate": 8.975564281668353e-05, "loss": 0.006, "step": 11445 }, { "epoch": 2.2155572755417956, "grad_norm": 0.17325380444526672, "learning_rate": 8.975389988860869e-05, "loss": 0.0067, "step": 11446 }, { "epoch": 2.215750773993808, "grad_norm": 0.03756226226687431, "learning_rate": 8.975215683132739e-05, "loss": 0.007, "step": 11447 }, { "epoch": 2.2159442724458205, "grad_norm": 0.16503645479679108, "learning_rate": 8.975041364484616e-05, "loss": 0.0061, "step": 11448 }, { "epoch": 2.216137770897833, "grad_norm": 0.12580130994319916, "learning_rate": 8.974867032917144e-05, "loss": 0.0078, "step": 11449 }, { "epoch": 2.2163312693498454, "grad_norm": 0.11294309049844742, "learning_rate": 8.974692688430972e-05, "loss": 0.0076, "step": 11450 }, { "epoch": 2.2165247678018574, "grad_norm": 0.19034643471240997, "learning_rate": 8.97451833102675e-05, "loss": 0.0079, "step": 11451 }, { "epoch": 2.21671826625387, "grad_norm": 0.0650244802236557, "learning_rate": 8.974343960705124e-05, "loss": 0.0073, "step": 11452 }, { "epoch": 2.2169117647058822, "grad_norm": 0.18108133971691132, "learning_rate": 8.974169577466743e-05, "loss": 0.007, "step": 11453 }, { "epoch": 2.2171052631578947, "grad_norm": 0.08811677247285843, "learning_rate": 8.973995181312257e-05, "loss": 0.0078, "step": 11454 }, { "epoch": 2.217298761609907, "grad_norm": 0.08762910217046738, "learning_rate": 8.973820772242312e-05, "loss": 0.0065, "step": 11455 }, { "epoch": 2.2174922600619196, "grad_norm": 0.11496712267398834, "learning_rate": 8.973646350257558e-05, "loss": 0.0059, "step": 11456 }, { "epoch": 2.217685758513932, "grad_norm": 0.03651395067572594, "learning_rate": 8.973471915358642e-05, "loss": 0.008, "step": 11457 }, { "epoch": 2.2178792569659445, "grad_norm": 0.08394251763820648, "learning_rate": 8.973297467546213e-05, "loss": 0.0068, "step": 11458 }, { "epoch": 2.2180727554179565, "grad_norm": 0.06755969673395157, "learning_rate": 8.97312300682092e-05, "loss": 0.0067, "step": 11459 }, { "epoch": 2.218266253869969, "grad_norm": 0.05588719993829727, "learning_rate": 8.972948533183412e-05, "loss": 0.0065, "step": 11460 }, { "epoch": 2.2184597523219813, "grad_norm": 0.07213655114173889, "learning_rate": 8.972774046634337e-05, "loss": 0.0076, "step": 11461 }, { "epoch": 2.218653250773994, "grad_norm": 0.06713379174470901, "learning_rate": 8.972599547174343e-05, "loss": 0.0071, "step": 11462 }, { "epoch": 2.218846749226006, "grad_norm": 0.05347537249326706, "learning_rate": 8.972425034804081e-05, "loss": 0.0076, "step": 11463 }, { "epoch": 2.2190402476780187, "grad_norm": 0.09263908118009567, "learning_rate": 8.972250509524198e-05, "loss": 0.0064, "step": 11464 }, { "epoch": 2.219233746130031, "grad_norm": 0.054717909544706345, "learning_rate": 8.972075971335343e-05, "loss": 0.0077, "step": 11465 }, { "epoch": 2.2194272445820435, "grad_norm": 0.09302569925785065, "learning_rate": 8.971901420238165e-05, "loss": 0.0076, "step": 11466 }, { "epoch": 2.2196207430340555, "grad_norm": 0.03375052660703659, "learning_rate": 8.971726856233312e-05, "loss": 0.0062, "step": 11467 }, { "epoch": 2.219814241486068, "grad_norm": 0.110092394053936, "learning_rate": 8.971552279321435e-05, "loss": 0.0065, "step": 11468 }, { "epoch": 2.2200077399380804, "grad_norm": 0.04302137345075607, "learning_rate": 8.971377689503183e-05, "loss": 0.0073, "step": 11469 }, { "epoch": 2.220201238390093, "grad_norm": 0.069670669734478, "learning_rate": 8.971203086779202e-05, "loss": 0.0066, "step": 11470 }, { "epoch": 2.2203947368421053, "grad_norm": 0.03796982020139694, "learning_rate": 8.971028471150145e-05, "loss": 0.0072, "step": 11471 }, { "epoch": 2.2205882352941178, "grad_norm": 0.07862554490566254, "learning_rate": 8.970853842616658e-05, "loss": 0.0068, "step": 11472 }, { "epoch": 2.22078173374613, "grad_norm": 0.06234905123710632, "learning_rate": 8.970679201179393e-05, "loss": 0.0069, "step": 11473 }, { "epoch": 2.2209752321981426, "grad_norm": 0.050237026065588, "learning_rate": 8.970504546838998e-05, "loss": 0.0078, "step": 11474 }, { "epoch": 2.2211687306501546, "grad_norm": 0.0482335090637207, "learning_rate": 8.97032987959612e-05, "loss": 0.0067, "step": 11475 }, { "epoch": 2.221362229102167, "grad_norm": 0.0764918327331543, "learning_rate": 8.970155199451413e-05, "loss": 0.0065, "step": 11476 }, { "epoch": 2.2215557275541795, "grad_norm": 0.09059633314609528, "learning_rate": 8.969980506405523e-05, "loss": 0.0069, "step": 11477 }, { "epoch": 2.221749226006192, "grad_norm": 0.0670713484287262, "learning_rate": 8.9698058004591e-05, "loss": 0.0071, "step": 11478 }, { "epoch": 2.2219427244582044, "grad_norm": 0.07532372325658798, "learning_rate": 8.969631081612797e-05, "loss": 0.007, "step": 11479 }, { "epoch": 2.222136222910217, "grad_norm": 0.049256570637226105, "learning_rate": 8.969456349867258e-05, "loss": 0.0068, "step": 11480 }, { "epoch": 2.2223297213622293, "grad_norm": 0.0568540059030056, "learning_rate": 8.969281605223136e-05, "loss": 0.006, "step": 11481 }, { "epoch": 2.2225232198142413, "grad_norm": 0.06614764779806137, "learning_rate": 8.969106847681078e-05, "loss": 0.0073, "step": 11482 }, { "epoch": 2.2227167182662537, "grad_norm": 0.04292828217148781, "learning_rate": 8.968932077241738e-05, "loss": 0.0063, "step": 11483 }, { "epoch": 2.222910216718266, "grad_norm": 0.08031365275382996, "learning_rate": 8.968757293905762e-05, "loss": 0.0077, "step": 11484 }, { "epoch": 2.2231037151702786, "grad_norm": 0.036635976284742355, "learning_rate": 8.968582497673801e-05, "loss": 0.0068, "step": 11485 }, { "epoch": 2.223297213622291, "grad_norm": 0.06589723378419876, "learning_rate": 8.968407688546504e-05, "loss": 0.0069, "step": 11486 }, { "epoch": 2.2234907120743035, "grad_norm": 0.060419656336307526, "learning_rate": 8.968232866524525e-05, "loss": 0.0076, "step": 11487 }, { "epoch": 2.223684210526316, "grad_norm": 0.031186819076538086, "learning_rate": 8.968058031608508e-05, "loss": 0.0073, "step": 11488 }, { "epoch": 2.2238777089783284, "grad_norm": 0.05510587990283966, "learning_rate": 8.967883183799107e-05, "loss": 0.007, "step": 11489 }, { "epoch": 2.2240712074303404, "grad_norm": 0.027172386646270752, "learning_rate": 8.96770832309697e-05, "loss": 0.0077, "step": 11490 }, { "epoch": 2.224264705882353, "grad_norm": 0.07404810935258865, "learning_rate": 8.967533449502749e-05, "loss": 0.0073, "step": 11491 }, { "epoch": 2.2244582043343653, "grad_norm": 0.02972603216767311, "learning_rate": 8.967358563017092e-05, "loss": 0.0066, "step": 11492 }, { "epoch": 2.2246517027863777, "grad_norm": 0.06121017038822174, "learning_rate": 8.967183663640652e-05, "loss": 0.0079, "step": 11493 }, { "epoch": 2.22484520123839, "grad_norm": 0.05685978755354881, "learning_rate": 8.967008751374075e-05, "loss": 0.0072, "step": 11494 }, { "epoch": 2.2250386996904026, "grad_norm": 0.048207711428403854, "learning_rate": 8.966833826218016e-05, "loss": 0.0066, "step": 11495 }, { "epoch": 2.225232198142415, "grad_norm": 0.0667518675327301, "learning_rate": 8.966658888173121e-05, "loss": 0.0077, "step": 11496 }, { "epoch": 2.225425696594427, "grad_norm": 0.036320652812719345, "learning_rate": 8.966483937240044e-05, "loss": 0.0072, "step": 11497 }, { "epoch": 2.2256191950464395, "grad_norm": 0.11507776379585266, "learning_rate": 8.966308973419434e-05, "loss": 0.0064, "step": 11498 }, { "epoch": 2.225812693498452, "grad_norm": 0.0592055581510067, "learning_rate": 8.96613399671194e-05, "loss": 0.0087, "step": 11499 }, { "epoch": 2.2260061919504643, "grad_norm": 0.11812034249305725, "learning_rate": 8.965959007118216e-05, "loss": 0.0072, "step": 11500 }, { "epoch": 2.226199690402477, "grad_norm": 0.05466322973370552, "learning_rate": 8.965784004638908e-05, "loss": 0.0058, "step": 11501 }, { "epoch": 2.2263931888544892, "grad_norm": 0.09959591180086136, "learning_rate": 8.96560898927467e-05, "loss": 0.0075, "step": 11502 }, { "epoch": 2.2265866873065017, "grad_norm": 0.08983415365219116, "learning_rate": 8.965433961026152e-05, "loss": 0.0061, "step": 11503 }, { "epoch": 2.226780185758514, "grad_norm": 0.06020250916481018, "learning_rate": 8.965258919894005e-05, "loss": 0.008, "step": 11504 }, { "epoch": 2.2269736842105265, "grad_norm": 0.11748459935188293, "learning_rate": 8.965083865878879e-05, "loss": 0.0072, "step": 11505 }, { "epoch": 2.2271671826625385, "grad_norm": 0.04884699359536171, "learning_rate": 8.964908798981423e-05, "loss": 0.0082, "step": 11506 }, { "epoch": 2.227360681114551, "grad_norm": 0.10676009207963943, "learning_rate": 8.964733719202292e-05, "loss": 0.0068, "step": 11507 }, { "epoch": 2.2275541795665634, "grad_norm": 0.10473016649484634, "learning_rate": 8.964558626542134e-05, "loss": 0.0068, "step": 11508 }, { "epoch": 2.227747678018576, "grad_norm": 0.08442436903715134, "learning_rate": 8.9643835210016e-05, "loss": 0.0079, "step": 11509 }, { "epoch": 2.2279411764705883, "grad_norm": 0.16509565711021423, "learning_rate": 8.964208402581343e-05, "loss": 0.0069, "step": 11510 }, { "epoch": 2.2281346749226008, "grad_norm": 0.05869303643703461, "learning_rate": 8.964033271282013e-05, "loss": 0.0063, "step": 11511 }, { "epoch": 2.228328173374613, "grad_norm": 0.16169826686382294, "learning_rate": 8.963858127104261e-05, "loss": 0.0076, "step": 11512 }, { "epoch": 2.228521671826625, "grad_norm": 0.03999979794025421, "learning_rate": 8.963682970048737e-05, "loss": 0.0072, "step": 11513 }, { "epoch": 2.2287151702786376, "grad_norm": 0.14096930623054504, "learning_rate": 8.963507800116093e-05, "loss": 0.0071, "step": 11514 }, { "epoch": 2.22890866873065, "grad_norm": 0.0789215937256813, "learning_rate": 8.963332617306981e-05, "loss": 0.0083, "step": 11515 }, { "epoch": 2.2291021671826625, "grad_norm": 0.12681236863136292, "learning_rate": 8.963157421622052e-05, "loss": 0.006, "step": 11516 }, { "epoch": 2.229295665634675, "grad_norm": 0.11696800589561462, "learning_rate": 8.962982213061954e-05, "loss": 0.0064, "step": 11517 }, { "epoch": 2.2294891640866874, "grad_norm": 0.10960618406534195, "learning_rate": 8.962806991627344e-05, "loss": 0.0076, "step": 11518 }, { "epoch": 2.2296826625387, "grad_norm": 0.14610454440116882, "learning_rate": 8.962631757318872e-05, "loss": 0.0082, "step": 11519 }, { "epoch": 2.2298761609907123, "grad_norm": 0.09696223586797714, "learning_rate": 8.962456510137187e-05, "loss": 0.0064, "step": 11520 }, { "epoch": 2.2300696594427243, "grad_norm": 0.1308172196149826, "learning_rate": 8.962281250082942e-05, "loss": 0.0069, "step": 11521 }, { "epoch": 2.2302631578947367, "grad_norm": 0.09518597275018692, "learning_rate": 8.962105977156789e-05, "loss": 0.0065, "step": 11522 }, { "epoch": 2.230456656346749, "grad_norm": 0.12975861132144928, "learning_rate": 8.961930691359378e-05, "loss": 0.0071, "step": 11523 }, { "epoch": 2.2306501547987616, "grad_norm": 0.08223807066679001, "learning_rate": 8.961755392691362e-05, "loss": 0.007, "step": 11524 }, { "epoch": 2.230843653250774, "grad_norm": 0.10804127156734467, "learning_rate": 8.961580081153395e-05, "loss": 0.0063, "step": 11525 }, { "epoch": 2.2310371517027865, "grad_norm": 0.08309195190668106, "learning_rate": 8.961404756746123e-05, "loss": 0.008, "step": 11526 }, { "epoch": 2.231230650154799, "grad_norm": 0.0761384591460228, "learning_rate": 8.961229419470201e-05, "loss": 0.009, "step": 11527 }, { "epoch": 2.231424148606811, "grad_norm": 0.058770157396793365, "learning_rate": 8.961054069326282e-05, "loss": 0.0084, "step": 11528 }, { "epoch": 2.2316176470588234, "grad_norm": 0.05638968572020531, "learning_rate": 8.960878706315017e-05, "loss": 0.0065, "step": 11529 }, { "epoch": 2.231811145510836, "grad_norm": 0.05500847473740578, "learning_rate": 8.960703330437057e-05, "loss": 0.0065, "step": 11530 }, { "epoch": 2.2320046439628483, "grad_norm": 0.055861543864011765, "learning_rate": 8.960527941693056e-05, "loss": 0.0086, "step": 11531 }, { "epoch": 2.2321981424148607, "grad_norm": 0.06118110567331314, "learning_rate": 8.960352540083664e-05, "loss": 0.0074, "step": 11532 }, { "epoch": 2.232391640866873, "grad_norm": 0.059289637953042984, "learning_rate": 8.960177125609534e-05, "loss": 0.0069, "step": 11533 }, { "epoch": 2.2325851393188856, "grad_norm": 0.05033387988805771, "learning_rate": 8.960001698271318e-05, "loss": 0.007, "step": 11534 }, { "epoch": 2.232778637770898, "grad_norm": 0.09248685091733932, "learning_rate": 8.959826258069667e-05, "loss": 0.0066, "step": 11535 }, { "epoch": 2.23297213622291, "grad_norm": 0.02958551049232483, "learning_rate": 8.959650805005236e-05, "loss": 0.0065, "step": 11536 }, { "epoch": 2.2331656346749225, "grad_norm": 0.07440545409917831, "learning_rate": 8.959475339078675e-05, "loss": 0.0072, "step": 11537 }, { "epoch": 2.233359133126935, "grad_norm": 0.051496829837560654, "learning_rate": 8.959299860290637e-05, "loss": 0.0071, "step": 11538 }, { "epoch": 2.2335526315789473, "grad_norm": 0.04191264882683754, "learning_rate": 8.959124368641775e-05, "loss": 0.008, "step": 11539 }, { "epoch": 2.23374613003096, "grad_norm": 0.04902955889701843, "learning_rate": 8.958948864132742e-05, "loss": 0.0088, "step": 11540 }, { "epoch": 2.2339396284829722, "grad_norm": 0.04107789695262909, "learning_rate": 8.95877334676419e-05, "loss": 0.0072, "step": 11541 }, { "epoch": 2.2341331269349847, "grad_norm": 0.05015050992369652, "learning_rate": 8.958597816536768e-05, "loss": 0.0076, "step": 11542 }, { "epoch": 2.234326625386997, "grad_norm": 0.06392256915569305, "learning_rate": 8.958422273451132e-05, "loss": 0.006, "step": 11543 }, { "epoch": 2.234520123839009, "grad_norm": 0.04018174484372139, "learning_rate": 8.958246717507937e-05, "loss": 0.0074, "step": 11544 }, { "epoch": 2.2347136222910216, "grad_norm": 0.07922198623418808, "learning_rate": 8.95807114870783e-05, "loss": 0.0094, "step": 11545 }, { "epoch": 2.234907120743034, "grad_norm": 0.038812603801488876, "learning_rate": 8.957895567051468e-05, "loss": 0.0069, "step": 11546 }, { "epoch": 2.2351006191950464, "grad_norm": 0.057842496782541275, "learning_rate": 8.957719972539504e-05, "loss": 0.0065, "step": 11547 }, { "epoch": 2.235294117647059, "grad_norm": 0.04823347181081772, "learning_rate": 8.957544365172585e-05, "loss": 0.0074, "step": 11548 }, { "epoch": 2.2354876160990713, "grad_norm": 0.05648735165596008, "learning_rate": 8.95736874495137e-05, "loss": 0.0083, "step": 11549 }, { "epoch": 2.2356811145510838, "grad_norm": 0.05584803223609924, "learning_rate": 8.95719311187651e-05, "loss": 0.0056, "step": 11550 }, { "epoch": 2.235874613003096, "grad_norm": 0.04820903018116951, "learning_rate": 8.957017465948658e-05, "loss": 0.0056, "step": 11551 }, { "epoch": 2.236068111455108, "grad_norm": 0.05834230035543442, "learning_rate": 8.956841807168466e-05, "loss": 0.0083, "step": 11552 }, { "epoch": 2.2362616099071206, "grad_norm": 0.05977584421634674, "learning_rate": 8.956666135536589e-05, "loss": 0.0064, "step": 11553 }, { "epoch": 2.236455108359133, "grad_norm": 0.050832267850637436, "learning_rate": 8.95649045105368e-05, "loss": 0.0066, "step": 11554 }, { "epoch": 2.2366486068111455, "grad_norm": 0.07454097270965576, "learning_rate": 8.956314753720389e-05, "loss": 0.0064, "step": 11555 }, { "epoch": 2.236842105263158, "grad_norm": 0.046954426914453506, "learning_rate": 8.956139043537372e-05, "loss": 0.0066, "step": 11556 }, { "epoch": 2.2370356037151704, "grad_norm": 0.05984830856323242, "learning_rate": 8.955963320505282e-05, "loss": 0.0061, "step": 11557 }, { "epoch": 2.237229102167183, "grad_norm": 0.0709155946969986, "learning_rate": 8.955787584624772e-05, "loss": 0.0072, "step": 11558 }, { "epoch": 2.237422600619195, "grad_norm": 0.04749803617596626, "learning_rate": 8.955611835896496e-05, "loss": 0.0078, "step": 11559 }, { "epoch": 2.2376160990712073, "grad_norm": 0.08106354624032974, "learning_rate": 8.955436074321105e-05, "loss": 0.0062, "step": 11560 }, { "epoch": 2.2378095975232197, "grad_norm": 0.06320151686668396, "learning_rate": 8.955260299899255e-05, "loss": 0.0084, "step": 11561 }, { "epoch": 2.238003095975232, "grad_norm": 0.06278648972511292, "learning_rate": 8.955084512631598e-05, "loss": 0.0075, "step": 11562 }, { "epoch": 2.2381965944272446, "grad_norm": 0.056979481130838394, "learning_rate": 8.954908712518789e-05, "loss": 0.0073, "step": 11563 }, { "epoch": 2.238390092879257, "grad_norm": 0.04626109078526497, "learning_rate": 8.95473289956148e-05, "loss": 0.008, "step": 11564 }, { "epoch": 2.2385835913312695, "grad_norm": 0.04988056793808937, "learning_rate": 8.954557073760327e-05, "loss": 0.0064, "step": 11565 }, { "epoch": 2.238777089783282, "grad_norm": 0.03501631319522858, "learning_rate": 8.95438123511598e-05, "loss": 0.0066, "step": 11566 }, { "epoch": 2.238970588235294, "grad_norm": 0.051621001213788986, "learning_rate": 8.954205383629097e-05, "loss": 0.0061, "step": 11567 }, { "epoch": 2.2391640866873064, "grad_norm": 0.046477850526571274, "learning_rate": 8.954029519300326e-05, "loss": 0.0097, "step": 11568 }, { "epoch": 2.239357585139319, "grad_norm": 0.05369573459029198, "learning_rate": 8.953853642130327e-05, "loss": 0.0073, "step": 11569 }, { "epoch": 2.2395510835913313, "grad_norm": 0.04773155599832535, "learning_rate": 8.953677752119751e-05, "loss": 0.0065, "step": 11570 }, { "epoch": 2.2397445820433437, "grad_norm": 0.044510118663311005, "learning_rate": 8.953501849269251e-05, "loss": 0.0054, "step": 11571 }, { "epoch": 2.239938080495356, "grad_norm": 0.03464917838573456, "learning_rate": 8.953325933579483e-05, "loss": 0.008, "step": 11572 }, { "epoch": 2.2401315789473686, "grad_norm": 0.03171064704656601, "learning_rate": 8.9531500050511e-05, "loss": 0.008, "step": 11573 }, { "epoch": 2.2403250773993806, "grad_norm": 0.02450229972600937, "learning_rate": 8.952974063684756e-05, "loss": 0.0058, "step": 11574 }, { "epoch": 2.240518575851393, "grad_norm": 0.047227390110492706, "learning_rate": 8.952798109481106e-05, "loss": 0.0077, "step": 11575 }, { "epoch": 2.2407120743034055, "grad_norm": 0.03479906916618347, "learning_rate": 8.952622142440803e-05, "loss": 0.0064, "step": 11576 }, { "epoch": 2.240905572755418, "grad_norm": 0.0639682486653328, "learning_rate": 8.952446162564501e-05, "loss": 0.0079, "step": 11577 }, { "epoch": 2.2410990712074303, "grad_norm": 0.05514460802078247, "learning_rate": 8.952270169852856e-05, "loss": 0.0074, "step": 11578 }, { "epoch": 2.241292569659443, "grad_norm": 0.05005914717912674, "learning_rate": 8.952094164306521e-05, "loss": 0.0068, "step": 11579 }, { "epoch": 2.2414860681114552, "grad_norm": 0.06842386722564697, "learning_rate": 8.951918145926148e-05, "loss": 0.007, "step": 11580 }, { "epoch": 2.2416795665634677, "grad_norm": 0.03909055516123772, "learning_rate": 8.951742114712397e-05, "loss": 0.0066, "step": 11581 }, { "epoch": 2.2418730650154797, "grad_norm": 0.07495111972093582, "learning_rate": 8.951566070665918e-05, "loss": 0.007, "step": 11582 }, { "epoch": 2.242066563467492, "grad_norm": 0.04292604327201843, "learning_rate": 8.951390013787367e-05, "loss": 0.0068, "step": 11583 }, { "epoch": 2.2422600619195046, "grad_norm": 0.07729621231555939, "learning_rate": 8.951213944077398e-05, "loss": 0.0079, "step": 11584 }, { "epoch": 2.242453560371517, "grad_norm": 0.06102701276540756, "learning_rate": 8.951037861536667e-05, "loss": 0.0073, "step": 11585 }, { "epoch": 2.2426470588235294, "grad_norm": 0.07093870639801025, "learning_rate": 8.950861766165828e-05, "loss": 0.0066, "step": 11586 }, { "epoch": 2.242840557275542, "grad_norm": 0.0910383090376854, "learning_rate": 8.950685657965533e-05, "loss": 0.0067, "step": 11587 }, { "epoch": 2.2430340557275543, "grad_norm": 0.03199225664138794, "learning_rate": 8.95050953693644e-05, "loss": 0.0057, "step": 11588 }, { "epoch": 2.2432275541795668, "grad_norm": 0.09266728162765503, "learning_rate": 8.950333403079203e-05, "loss": 0.0081, "step": 11589 }, { "epoch": 2.2434210526315788, "grad_norm": 0.025536859408020973, "learning_rate": 8.950157256394477e-05, "loss": 0.0055, "step": 11590 }, { "epoch": 2.243614551083591, "grad_norm": 0.08355066180229187, "learning_rate": 8.949981096882916e-05, "loss": 0.0076, "step": 11591 }, { "epoch": 2.2438080495356036, "grad_norm": 0.041307490319013596, "learning_rate": 8.949804924545175e-05, "loss": 0.0081, "step": 11592 }, { "epoch": 2.244001547987616, "grad_norm": 0.08161243051290512, "learning_rate": 8.94962873938191e-05, "loss": 0.0088, "step": 11593 }, { "epoch": 2.2441950464396285, "grad_norm": 0.0537060871720314, "learning_rate": 8.949452541393776e-05, "loss": 0.008, "step": 11594 }, { "epoch": 2.244388544891641, "grad_norm": 0.05092592164874077, "learning_rate": 8.949276330581427e-05, "loss": 0.0069, "step": 11595 }, { "epoch": 2.2445820433436534, "grad_norm": 0.0849243775010109, "learning_rate": 8.949100106945517e-05, "loss": 0.0069, "step": 11596 }, { "epoch": 2.244775541795666, "grad_norm": 0.05398360267281532, "learning_rate": 8.948923870486704e-05, "loss": 0.0065, "step": 11597 }, { "epoch": 2.244969040247678, "grad_norm": 0.12758220732212067, "learning_rate": 8.948747621205641e-05, "loss": 0.0078, "step": 11598 }, { "epoch": 2.2451625386996903, "grad_norm": 0.04503253847360611, "learning_rate": 8.948571359102985e-05, "loss": 0.0069, "step": 11599 }, { "epoch": 2.2453560371517027, "grad_norm": 0.08124000579118729, "learning_rate": 8.94839508417939e-05, "loss": 0.0065, "step": 11600 }, { "epoch": 2.245549535603715, "grad_norm": 0.08224878460168839, "learning_rate": 8.948218796435512e-05, "loss": 0.0072, "step": 11601 }, { "epoch": 2.2457430340557276, "grad_norm": 0.04981506988406181, "learning_rate": 8.948042495872007e-05, "loss": 0.0061, "step": 11602 }, { "epoch": 2.24593653250774, "grad_norm": 0.09655129164457321, "learning_rate": 8.947866182489529e-05, "loss": 0.0083, "step": 11603 }, { "epoch": 2.2461300309597525, "grad_norm": 0.03208170458674431, "learning_rate": 8.947689856288734e-05, "loss": 0.0073, "step": 11604 }, { "epoch": 2.2463235294117645, "grad_norm": 0.06670471280813217, "learning_rate": 8.947513517270278e-05, "loss": 0.0071, "step": 11605 }, { "epoch": 2.246517027863777, "grad_norm": 0.09183592349290848, "learning_rate": 8.947337165434814e-05, "loss": 0.0078, "step": 11606 }, { "epoch": 2.2467105263157894, "grad_norm": 0.06910037249326706, "learning_rate": 8.947160800783002e-05, "loss": 0.0071, "step": 11607 }, { "epoch": 2.246904024767802, "grad_norm": 0.12686574459075928, "learning_rate": 8.946984423315495e-05, "loss": 0.0068, "step": 11608 }, { "epoch": 2.2470975232198143, "grad_norm": 0.08592087775468826, "learning_rate": 8.94680803303295e-05, "loss": 0.0077, "step": 11609 }, { "epoch": 2.2472910216718267, "grad_norm": 0.10902762413024902, "learning_rate": 8.946631629936021e-05, "loss": 0.0068, "step": 11610 }, { "epoch": 2.247484520123839, "grad_norm": 0.11629428714513779, "learning_rate": 8.946455214025365e-05, "loss": 0.0067, "step": 11611 }, { "epoch": 2.2476780185758516, "grad_norm": 0.06933245062828064, "learning_rate": 8.946278785301638e-05, "loss": 0.0062, "step": 11612 }, { "epoch": 2.2478715170278636, "grad_norm": 0.15029430389404297, "learning_rate": 8.946102343765496e-05, "loss": 0.0068, "step": 11613 }, { "epoch": 2.248065015479876, "grad_norm": 0.07933899760246277, "learning_rate": 8.945925889417594e-05, "loss": 0.008, "step": 11614 }, { "epoch": 2.2482585139318885, "grad_norm": 0.12887203693389893, "learning_rate": 8.945749422258589e-05, "loss": 0.0058, "step": 11615 }, { "epoch": 2.248452012383901, "grad_norm": 0.11481662839651108, "learning_rate": 8.945572942289138e-05, "loss": 0.0063, "step": 11616 }, { "epoch": 2.2486455108359134, "grad_norm": 0.10901401191949844, "learning_rate": 8.945396449509894e-05, "loss": 0.0084, "step": 11617 }, { "epoch": 2.248839009287926, "grad_norm": 0.08584319800138474, "learning_rate": 8.945219943921515e-05, "loss": 0.0076, "step": 11618 }, { "epoch": 2.2490325077399382, "grad_norm": 0.08290291577577591, "learning_rate": 8.945043425524658e-05, "loss": 0.0083, "step": 11619 }, { "epoch": 2.2492260061919502, "grad_norm": 0.08895797282457352, "learning_rate": 8.94486689431998e-05, "loss": 0.0069, "step": 11620 }, { "epoch": 2.2494195046439627, "grad_norm": 0.058955080807209015, "learning_rate": 8.944690350308131e-05, "loss": 0.0066, "step": 11621 }, { "epoch": 2.249613003095975, "grad_norm": 0.11506422609090805, "learning_rate": 8.944513793489777e-05, "loss": 0.0077, "step": 11622 }, { "epoch": 2.2498065015479876, "grad_norm": 0.02281489595770836, "learning_rate": 8.944337223865566e-05, "loss": 0.0058, "step": 11623 }, { "epoch": 2.25, "grad_norm": 0.10158150643110275, "learning_rate": 8.94416064143616e-05, "loss": 0.0082, "step": 11624 }, { "epoch": 2.2501934984520124, "grad_norm": 0.037522751837968826, "learning_rate": 8.943984046202212e-05, "loss": 0.0074, "step": 11625 }, { "epoch": 2.250386996904025, "grad_norm": 0.1103314608335495, "learning_rate": 8.943807438164382e-05, "loss": 0.007, "step": 11626 }, { "epoch": 2.2505804953560373, "grad_norm": 0.051576558500528336, "learning_rate": 8.943630817323322e-05, "loss": 0.0072, "step": 11627 }, { "epoch": 2.2507739938080498, "grad_norm": 0.1421392560005188, "learning_rate": 8.943454183679692e-05, "loss": 0.0081, "step": 11628 }, { "epoch": 2.2509674922600618, "grad_norm": 0.06133582815527916, "learning_rate": 8.94327753723415e-05, "loss": 0.007, "step": 11629 }, { "epoch": 2.251160990712074, "grad_norm": 0.16083034873008728, "learning_rate": 8.943100877987348e-05, "loss": 0.0072, "step": 11630 }, { "epoch": 2.2513544891640866, "grad_norm": 0.04393462836742401, "learning_rate": 8.942924205939946e-05, "loss": 0.0084, "step": 11631 }, { "epoch": 2.251547987616099, "grad_norm": 0.1538080871105194, "learning_rate": 8.9427475210926e-05, "loss": 0.0073, "step": 11632 }, { "epoch": 2.2517414860681115, "grad_norm": 0.07506129145622253, "learning_rate": 8.942570823445968e-05, "loss": 0.0067, "step": 11633 }, { "epoch": 2.251934984520124, "grad_norm": 0.1383805125951767, "learning_rate": 8.942394113000704e-05, "loss": 0.0079, "step": 11634 }, { "epoch": 2.2521284829721364, "grad_norm": 0.1053212583065033, "learning_rate": 8.942217389757468e-05, "loss": 0.0078, "step": 11635 }, { "epoch": 2.2523219814241484, "grad_norm": 0.09888654202222824, "learning_rate": 8.942040653716917e-05, "loss": 0.007, "step": 11636 }, { "epoch": 2.252515479876161, "grad_norm": 0.11409510672092438, "learning_rate": 8.941863904879705e-05, "loss": 0.0072, "step": 11637 }, { "epoch": 2.2527089783281733, "grad_norm": 0.0849517285823822, "learning_rate": 8.941687143246492e-05, "loss": 0.0073, "step": 11638 }, { "epoch": 2.2529024767801857, "grad_norm": 0.12408040463924408, "learning_rate": 8.941510368817935e-05, "loss": 0.0067, "step": 11639 }, { "epoch": 2.253095975232198, "grad_norm": 0.11919053643941879, "learning_rate": 8.941333581594689e-05, "loss": 0.0068, "step": 11640 }, { "epoch": 2.2532894736842106, "grad_norm": 0.10760853439569473, "learning_rate": 8.941156781577414e-05, "loss": 0.0075, "step": 11641 }, { "epoch": 2.253482972136223, "grad_norm": 0.15012596547603607, "learning_rate": 8.940979968766765e-05, "loss": 0.008, "step": 11642 }, { "epoch": 2.2536764705882355, "grad_norm": 0.06858168542385101, "learning_rate": 8.940803143163401e-05, "loss": 0.0067, "step": 11643 }, { "epoch": 2.2538699690402475, "grad_norm": 0.14442256093025208, "learning_rate": 8.940626304767978e-05, "loss": 0.0076, "step": 11644 }, { "epoch": 2.25406346749226, "grad_norm": 0.04922552779316902, "learning_rate": 8.940449453581155e-05, "loss": 0.0071, "step": 11645 }, { "epoch": 2.2542569659442724, "grad_norm": 0.0810270756483078, "learning_rate": 8.940272589603588e-05, "loss": 0.0064, "step": 11646 }, { "epoch": 2.254450464396285, "grad_norm": 0.09751661121845245, "learning_rate": 8.940095712835936e-05, "loss": 0.0081, "step": 11647 }, { "epoch": 2.2546439628482973, "grad_norm": 0.06580118834972382, "learning_rate": 8.939918823278856e-05, "loss": 0.0059, "step": 11648 }, { "epoch": 2.2548374613003097, "grad_norm": 0.10476844012737274, "learning_rate": 8.939741920933006e-05, "loss": 0.0074, "step": 11649 }, { "epoch": 2.255030959752322, "grad_norm": 0.0332033671438694, "learning_rate": 8.939565005799041e-05, "loss": 0.0082, "step": 11650 }, { "epoch": 2.255224458204334, "grad_norm": 0.11014322191476822, "learning_rate": 8.939388077877622e-05, "loss": 0.0087, "step": 11651 }, { "epoch": 2.2554179566563466, "grad_norm": 0.06634091585874557, "learning_rate": 8.939211137169405e-05, "loss": 0.0069, "step": 11652 }, { "epoch": 2.255611455108359, "grad_norm": 0.07498561590909958, "learning_rate": 8.939034183675048e-05, "loss": 0.0075, "step": 11653 }, { "epoch": 2.2558049535603715, "grad_norm": 0.06438500434160233, "learning_rate": 8.938857217395212e-05, "loss": 0.0074, "step": 11654 }, { "epoch": 2.255998452012384, "grad_norm": 0.03965993970632553, "learning_rate": 8.93868023833055e-05, "loss": 0.0081, "step": 11655 }, { "epoch": 2.2561919504643964, "grad_norm": 0.06555084139108658, "learning_rate": 8.938503246481722e-05, "loss": 0.0084, "step": 11656 }, { "epoch": 2.256385448916409, "grad_norm": 0.043360449373722076, "learning_rate": 8.938326241849387e-05, "loss": 0.0074, "step": 11657 }, { "epoch": 2.2565789473684212, "grad_norm": 0.05402588099241257, "learning_rate": 8.938149224434203e-05, "loss": 0.0062, "step": 11658 }, { "epoch": 2.2567724458204337, "grad_norm": 0.03827632963657379, "learning_rate": 8.937972194236826e-05, "loss": 0.006, "step": 11659 }, { "epoch": 2.2569659442724457, "grad_norm": 0.04086169973015785, "learning_rate": 8.937795151257916e-05, "loss": 0.0083, "step": 11660 }, { "epoch": 2.257159442724458, "grad_norm": 0.035941317677497864, "learning_rate": 8.93761809549813e-05, "loss": 0.0078, "step": 11661 }, { "epoch": 2.2573529411764706, "grad_norm": 0.034805189818143845, "learning_rate": 8.937441026958129e-05, "loss": 0.0068, "step": 11662 }, { "epoch": 2.257546439628483, "grad_norm": 0.033795349299907684, "learning_rate": 8.937263945638567e-05, "loss": 0.0081, "step": 11663 }, { "epoch": 2.2577399380804954, "grad_norm": 0.04410603642463684, "learning_rate": 8.937086851540106e-05, "loss": 0.0084, "step": 11664 }, { "epoch": 2.257933436532508, "grad_norm": 0.03732924163341522, "learning_rate": 8.936909744663402e-05, "loss": 0.007, "step": 11665 }, { "epoch": 2.25812693498452, "grad_norm": 0.05853822082281113, "learning_rate": 8.936732625009115e-05, "loss": 0.0075, "step": 11666 }, { "epoch": 2.2583204334365323, "grad_norm": 0.09680918604135513, "learning_rate": 8.936555492577903e-05, "loss": 0.0074, "step": 11667 }, { "epoch": 2.2585139318885448, "grad_norm": 0.04959736764431, "learning_rate": 8.936378347370423e-05, "loss": 0.0068, "step": 11668 }, { "epoch": 2.258707430340557, "grad_norm": 0.092161163687706, "learning_rate": 8.936201189387338e-05, "loss": 0.0081, "step": 11669 }, { "epoch": 2.2589009287925697, "grad_norm": 0.0867418572306633, "learning_rate": 8.936024018629303e-05, "loss": 0.0073, "step": 11670 }, { "epoch": 2.259094427244582, "grad_norm": 0.05149141699075699, "learning_rate": 8.935846835096976e-05, "loss": 0.0061, "step": 11671 }, { "epoch": 2.2592879256965945, "grad_norm": 0.08810770511627197, "learning_rate": 8.935669638791016e-05, "loss": 0.0086, "step": 11672 }, { "epoch": 2.259481424148607, "grad_norm": 0.040749482810497284, "learning_rate": 8.935492429712084e-05, "loss": 0.0076, "step": 11673 }, { "epoch": 2.2596749226006194, "grad_norm": 0.08474738895893097, "learning_rate": 8.93531520786084e-05, "loss": 0.0063, "step": 11674 }, { "epoch": 2.2598684210526314, "grad_norm": 0.028978338465094566, "learning_rate": 8.935137973237938e-05, "loss": 0.0057, "step": 11675 }, { "epoch": 2.260061919504644, "grad_norm": 0.04785209894180298, "learning_rate": 8.93496072584404e-05, "loss": 0.0075, "step": 11676 }, { "epoch": 2.2602554179566563, "grad_norm": 0.08253102749586105, "learning_rate": 8.934783465679805e-05, "loss": 0.0062, "step": 11677 }, { "epoch": 2.2604489164086687, "grad_norm": 0.04498065635561943, "learning_rate": 8.93460619274589e-05, "loss": 0.0081, "step": 11678 }, { "epoch": 2.260642414860681, "grad_norm": 0.0911392867565155, "learning_rate": 8.934428907042957e-05, "loss": 0.0062, "step": 11679 }, { "epoch": 2.2608359133126936, "grad_norm": 0.06254597008228302, "learning_rate": 8.934251608571663e-05, "loss": 0.0058, "step": 11680 }, { "epoch": 2.261029411764706, "grad_norm": 0.06740348786115646, "learning_rate": 8.934074297332667e-05, "loss": 0.0088, "step": 11681 }, { "epoch": 2.261222910216718, "grad_norm": 0.10663005709648132, "learning_rate": 8.933896973326631e-05, "loss": 0.0081, "step": 11682 }, { "epoch": 2.2614164086687305, "grad_norm": 0.10757357627153397, "learning_rate": 8.93371963655421e-05, "loss": 0.009, "step": 11683 }, { "epoch": 2.261609907120743, "grad_norm": 0.1033930703997612, "learning_rate": 8.933542287016065e-05, "loss": 0.0073, "step": 11684 }, { "epoch": 2.2618034055727554, "grad_norm": 0.09600517898797989, "learning_rate": 8.933364924712859e-05, "loss": 0.0061, "step": 11685 }, { "epoch": 2.261996904024768, "grad_norm": 0.08076589554548264, "learning_rate": 8.933187549645245e-05, "loss": 0.0076, "step": 11686 }, { "epoch": 2.2621904024767803, "grad_norm": 0.1304628998041153, "learning_rate": 8.933010161813887e-05, "loss": 0.0066, "step": 11687 }, { "epoch": 2.2623839009287927, "grad_norm": 0.08535682410001755, "learning_rate": 8.932832761219444e-05, "loss": 0.0072, "step": 11688 }, { "epoch": 2.262577399380805, "grad_norm": 0.13019399344921112, "learning_rate": 8.932655347862573e-05, "loss": 0.0079, "step": 11689 }, { "epoch": 2.262770897832817, "grad_norm": 0.13199764490127563, "learning_rate": 8.932477921743935e-05, "loss": 0.0084, "step": 11690 }, { "epoch": 2.2629643962848296, "grad_norm": 0.13010920584201813, "learning_rate": 8.932300482864193e-05, "loss": 0.0066, "step": 11691 }, { "epoch": 2.263157894736842, "grad_norm": 0.1352701336145401, "learning_rate": 8.932123031224002e-05, "loss": 0.0075, "step": 11692 }, { "epoch": 2.2633513931888545, "grad_norm": 0.11909178644418716, "learning_rate": 8.931945566824022e-05, "loss": 0.0077, "step": 11693 }, { "epoch": 2.263544891640867, "grad_norm": 0.1303517371416092, "learning_rate": 8.931768089664915e-05, "loss": 0.0069, "step": 11694 }, { "epoch": 2.2637383900928794, "grad_norm": 0.16367962956428528, "learning_rate": 8.93159059974734e-05, "loss": 0.0075, "step": 11695 }, { "epoch": 2.263931888544892, "grad_norm": 0.2171037793159485, "learning_rate": 8.931413097071956e-05, "loss": 0.0091, "step": 11696 }, { "epoch": 2.264125386996904, "grad_norm": 0.1637764424085617, "learning_rate": 8.931235581639423e-05, "loss": 0.0086, "step": 11697 }, { "epoch": 2.2643188854489162, "grad_norm": 0.1791100651025772, "learning_rate": 8.931058053450402e-05, "loss": 0.0065, "step": 11698 }, { "epoch": 2.2645123839009287, "grad_norm": 0.13833817839622498, "learning_rate": 8.930880512505554e-05, "loss": 0.0096, "step": 11699 }, { "epoch": 2.264705882352941, "grad_norm": 0.1695767045021057, "learning_rate": 8.930702958805536e-05, "loss": 0.0071, "step": 11700 }, { "epoch": 2.2648993808049536, "grad_norm": 0.15416735410690308, "learning_rate": 8.930525392351011e-05, "loss": 0.0071, "step": 11701 }, { "epoch": 2.265092879256966, "grad_norm": 0.1633516401052475, "learning_rate": 8.930347813142637e-05, "loss": 0.0071, "step": 11702 }, { "epoch": 2.2652863777089784, "grad_norm": 0.12859709560871124, "learning_rate": 8.930170221181076e-05, "loss": 0.007, "step": 11703 }, { "epoch": 2.265479876160991, "grad_norm": 0.14160273969173431, "learning_rate": 8.929992616466987e-05, "loss": 0.0075, "step": 11704 }, { "epoch": 2.2656733746130033, "grad_norm": 0.12328287214040756, "learning_rate": 8.92981499900103e-05, "loss": 0.0074, "step": 11705 }, { "epoch": 2.2658668730650153, "grad_norm": 0.15738865733146667, "learning_rate": 8.929637368783867e-05, "loss": 0.0068, "step": 11706 }, { "epoch": 2.2660603715170278, "grad_norm": 0.07371380180120468, "learning_rate": 8.929459725816156e-05, "loss": 0.0078, "step": 11707 }, { "epoch": 2.26625386996904, "grad_norm": 0.16590289771556854, "learning_rate": 8.92928207009856e-05, "loss": 0.0052, "step": 11708 }, { "epoch": 2.2664473684210527, "grad_norm": 0.10348815470933914, "learning_rate": 8.929104401631739e-05, "loss": 0.009, "step": 11709 }, { "epoch": 2.266640866873065, "grad_norm": 0.16901686787605286, "learning_rate": 8.92892672041635e-05, "loss": 0.0071, "step": 11710 }, { "epoch": 2.2668343653250775, "grad_norm": 0.08152551203966141, "learning_rate": 8.928749026453058e-05, "loss": 0.0076, "step": 11711 }, { "epoch": 2.2670278637770895, "grad_norm": 0.12797121703624725, "learning_rate": 8.928571319742523e-05, "loss": 0.0068, "step": 11712 }, { "epoch": 2.267221362229102, "grad_norm": 0.1413613110780716, "learning_rate": 8.928393600285402e-05, "loss": 0.007, "step": 11713 }, { "epoch": 2.2674148606811144, "grad_norm": 0.1582627147436142, "learning_rate": 8.928215868082361e-05, "loss": 0.0076, "step": 11714 }, { "epoch": 2.267608359133127, "grad_norm": 0.12301088869571686, "learning_rate": 8.928038123134058e-05, "loss": 0.0065, "step": 11715 }, { "epoch": 2.2678018575851393, "grad_norm": 0.10063344985246658, "learning_rate": 8.927860365441151e-05, "loss": 0.0067, "step": 11716 }, { "epoch": 2.2679953560371517, "grad_norm": 0.13711045682430267, "learning_rate": 8.927682595004308e-05, "loss": 0.0081, "step": 11717 }, { "epoch": 2.268188854489164, "grad_norm": 0.079793781042099, "learning_rate": 8.927504811824183e-05, "loss": 0.0065, "step": 11718 }, { "epoch": 2.2683823529411766, "grad_norm": 0.14477412402629852, "learning_rate": 8.927327015901439e-05, "loss": 0.0065, "step": 11719 }, { "epoch": 2.268575851393189, "grad_norm": 0.06832653284072876, "learning_rate": 8.92714920723674e-05, "loss": 0.0085, "step": 11720 }, { "epoch": 2.268769349845201, "grad_norm": 0.12486269325017929, "learning_rate": 8.926971385830743e-05, "loss": 0.0064, "step": 11721 }, { "epoch": 2.2689628482972135, "grad_norm": 0.07397014647722244, "learning_rate": 8.92679355168411e-05, "loss": 0.0083, "step": 11722 }, { "epoch": 2.269156346749226, "grad_norm": 0.11064285784959793, "learning_rate": 8.926615704797506e-05, "loss": 0.0067, "step": 11723 }, { "epoch": 2.2693498452012384, "grad_norm": 0.07086368650197983, "learning_rate": 8.926437845171588e-05, "loss": 0.0071, "step": 11724 }, { "epoch": 2.269543343653251, "grad_norm": 0.07832328975200653, "learning_rate": 8.926259972807016e-05, "loss": 0.0086, "step": 11725 }, { "epoch": 2.2697368421052633, "grad_norm": 0.13707546889781952, "learning_rate": 8.926082087704457e-05, "loss": 0.0088, "step": 11726 }, { "epoch": 2.2699303405572757, "grad_norm": 0.07161065191030502, "learning_rate": 8.925904189864568e-05, "loss": 0.0081, "step": 11727 }, { "epoch": 2.2701238390092877, "grad_norm": 0.13911160826683044, "learning_rate": 8.92572627928801e-05, "loss": 0.0077, "step": 11728 }, { "epoch": 2.2703173374613, "grad_norm": 0.07697361707687378, "learning_rate": 8.925548355975447e-05, "loss": 0.0073, "step": 11729 }, { "epoch": 2.2705108359133126, "grad_norm": 0.09761203825473785, "learning_rate": 8.925370419927538e-05, "loss": 0.0078, "step": 11730 }, { "epoch": 2.270704334365325, "grad_norm": 0.08769391477108002, "learning_rate": 8.925192471144949e-05, "loss": 0.0081, "step": 11731 }, { "epoch": 2.2708978328173375, "grad_norm": 0.06567086279392242, "learning_rate": 8.925014509628335e-05, "loss": 0.0077, "step": 11732 }, { "epoch": 2.27109133126935, "grad_norm": 0.059624943882226944, "learning_rate": 8.924836535378363e-05, "loss": 0.008, "step": 11733 }, { "epoch": 2.2712848297213624, "grad_norm": 0.0476866140961647, "learning_rate": 8.924658548395691e-05, "loss": 0.0087, "step": 11734 }, { "epoch": 2.271478328173375, "grad_norm": 0.04119234159588814, "learning_rate": 8.924480548680984e-05, "loss": 0.0069, "step": 11735 }, { "epoch": 2.271671826625387, "grad_norm": 0.05107643082737923, "learning_rate": 8.924302536234902e-05, "loss": 0.0073, "step": 11736 }, { "epoch": 2.2718653250773992, "grad_norm": 0.036156363785266876, "learning_rate": 8.924124511058105e-05, "loss": 0.0072, "step": 11737 }, { "epoch": 2.2720588235294117, "grad_norm": 0.04258369281888008, "learning_rate": 8.923946473151258e-05, "loss": 0.0087, "step": 11738 }, { "epoch": 2.272252321981424, "grad_norm": 0.05605185404419899, "learning_rate": 8.923768422515022e-05, "loss": 0.0074, "step": 11739 }, { "epoch": 2.2724458204334366, "grad_norm": 0.04603336378931999, "learning_rate": 8.92359035915006e-05, "loss": 0.0068, "step": 11740 }, { "epoch": 2.272639318885449, "grad_norm": 0.06687600165605545, "learning_rate": 8.923412283057029e-05, "loss": 0.0081, "step": 11741 }, { "epoch": 2.2728328173374615, "grad_norm": 0.03346430882811546, "learning_rate": 8.923234194236599e-05, "loss": 0.0074, "step": 11742 }, { "epoch": 2.2730263157894735, "grad_norm": 0.06698141992092133, "learning_rate": 8.923056092689425e-05, "loss": 0.0074, "step": 11743 }, { "epoch": 2.273219814241486, "grad_norm": 0.048165883868932724, "learning_rate": 8.922877978416171e-05, "loss": 0.0066, "step": 11744 }, { "epoch": 2.2734133126934983, "grad_norm": 0.05226599797606468, "learning_rate": 8.922699851417503e-05, "loss": 0.0091, "step": 11745 }, { "epoch": 2.2736068111455108, "grad_norm": 0.03920602425932884, "learning_rate": 8.922521711694079e-05, "loss": 0.0064, "step": 11746 }, { "epoch": 2.273800309597523, "grad_norm": 0.04413118213415146, "learning_rate": 8.922343559246562e-05, "loss": 0.0056, "step": 11747 }, { "epoch": 2.2739938080495357, "grad_norm": 0.029792344197630882, "learning_rate": 8.922165394075615e-05, "loss": 0.007, "step": 11748 }, { "epoch": 2.274187306501548, "grad_norm": 0.08833564817905426, "learning_rate": 8.921987216181902e-05, "loss": 0.006, "step": 11749 }, { "epoch": 2.2743808049535605, "grad_norm": 0.06583963334560394, "learning_rate": 8.921809025566081e-05, "loss": 0.0071, "step": 11750 }, { "epoch": 2.274574303405573, "grad_norm": 0.053280699998140335, "learning_rate": 8.92163082222882e-05, "loss": 0.0063, "step": 11751 }, { "epoch": 2.274767801857585, "grad_norm": 0.047376058995723724, "learning_rate": 8.921452606170778e-05, "loss": 0.0086, "step": 11752 }, { "epoch": 2.2749613003095974, "grad_norm": 0.0510905459523201, "learning_rate": 8.921274377392617e-05, "loss": 0.008, "step": 11753 }, { "epoch": 2.27515479876161, "grad_norm": 0.05845530703663826, "learning_rate": 8.921096135895e-05, "loss": 0.0068, "step": 11754 }, { "epoch": 2.2753482972136223, "grad_norm": 0.05342772603034973, "learning_rate": 8.920917881678593e-05, "loss": 0.0074, "step": 11755 }, { "epoch": 2.2755417956656347, "grad_norm": 0.06812169402837753, "learning_rate": 8.920739614744055e-05, "loss": 0.0065, "step": 11756 }, { "epoch": 2.275735294117647, "grad_norm": 0.07111406326293945, "learning_rate": 8.920561335092052e-05, "loss": 0.0079, "step": 11757 }, { "epoch": 2.2759287925696596, "grad_norm": 0.05026806890964508, "learning_rate": 8.920383042723241e-05, "loss": 0.0065, "step": 11758 }, { "epoch": 2.2761222910216716, "grad_norm": 0.08414766937494278, "learning_rate": 8.920204737638292e-05, "loss": 0.007, "step": 11759 }, { "epoch": 2.276315789473684, "grad_norm": 0.042798224836587906, "learning_rate": 8.920026419837863e-05, "loss": 0.0062, "step": 11760 }, { "epoch": 2.2765092879256965, "grad_norm": 0.060878071933984756, "learning_rate": 8.919848089322618e-05, "loss": 0.0076, "step": 11761 }, { "epoch": 2.276702786377709, "grad_norm": 0.0644795373082161, "learning_rate": 8.919669746093222e-05, "loss": 0.0078, "step": 11762 }, { "epoch": 2.2768962848297214, "grad_norm": 0.046497657895088196, "learning_rate": 8.919491390150337e-05, "loss": 0.0068, "step": 11763 }, { "epoch": 2.277089783281734, "grad_norm": 0.04287261515855789, "learning_rate": 8.919313021494623e-05, "loss": 0.0064, "step": 11764 }, { "epoch": 2.2772832817337463, "grad_norm": 0.0677858516573906, "learning_rate": 8.919134640126749e-05, "loss": 0.0068, "step": 11765 }, { "epoch": 2.2774767801857587, "grad_norm": 0.040897004306316376, "learning_rate": 8.918956246047372e-05, "loss": 0.0086, "step": 11766 }, { "epoch": 2.2776702786377707, "grad_norm": 0.06734418123960495, "learning_rate": 8.918777839257159e-05, "loss": 0.008, "step": 11767 }, { "epoch": 2.277863777089783, "grad_norm": 0.052052468061447144, "learning_rate": 8.918599419756772e-05, "loss": 0.0066, "step": 11768 }, { "epoch": 2.2780572755417956, "grad_norm": 0.07431326806545258, "learning_rate": 8.918420987546875e-05, "loss": 0.0073, "step": 11769 }, { "epoch": 2.278250773993808, "grad_norm": 0.058735135942697525, "learning_rate": 8.918242542628132e-05, "loss": 0.008, "step": 11770 }, { "epoch": 2.2784442724458205, "grad_norm": 0.06661835312843323, "learning_rate": 8.918064085001206e-05, "loss": 0.0088, "step": 11771 }, { "epoch": 2.278637770897833, "grad_norm": 0.0724208876490593, "learning_rate": 8.917885614666759e-05, "loss": 0.0077, "step": 11772 }, { "epoch": 2.2788312693498454, "grad_norm": 0.033591270446777344, "learning_rate": 8.917707131625454e-05, "loss": 0.0085, "step": 11773 }, { "epoch": 2.2790247678018574, "grad_norm": 0.06597388535737991, "learning_rate": 8.917528635877957e-05, "loss": 0.0068, "step": 11774 }, { "epoch": 2.27921826625387, "grad_norm": 0.03688950836658478, "learning_rate": 8.917350127424933e-05, "loss": 0.0074, "step": 11775 }, { "epoch": 2.2794117647058822, "grad_norm": 0.07508386671543121, "learning_rate": 8.917171606267041e-05, "loss": 0.0061, "step": 11776 }, { "epoch": 2.2796052631578947, "grad_norm": 0.04858192428946495, "learning_rate": 8.916993072404947e-05, "loss": 0.0065, "step": 11777 }, { "epoch": 2.279798761609907, "grad_norm": 0.05924093350768089, "learning_rate": 8.916814525839315e-05, "loss": 0.0057, "step": 11778 }, { "epoch": 2.2799922600619196, "grad_norm": 0.047367796301841736, "learning_rate": 8.916635966570807e-05, "loss": 0.0068, "step": 11779 }, { "epoch": 2.280185758513932, "grad_norm": 0.05163127928972244, "learning_rate": 8.91645739460009e-05, "loss": 0.0077, "step": 11780 }, { "epoch": 2.2803792569659445, "grad_norm": 0.07664812356233597, "learning_rate": 8.916278809927826e-05, "loss": 0.0077, "step": 11781 }, { "epoch": 2.280572755417957, "grad_norm": 0.06101396679878235, "learning_rate": 8.91610021255468e-05, "loss": 0.0055, "step": 11782 }, { "epoch": 2.280766253869969, "grad_norm": 0.07462117075920105, "learning_rate": 8.915921602481314e-05, "loss": 0.0065, "step": 11783 }, { "epoch": 2.2809597523219813, "grad_norm": 0.0533638671040535, "learning_rate": 8.915742979708394e-05, "loss": 0.0077, "step": 11784 }, { "epoch": 2.281153250773994, "grad_norm": 0.0745922103524208, "learning_rate": 8.915564344236583e-05, "loss": 0.006, "step": 11785 }, { "epoch": 2.281346749226006, "grad_norm": 0.0740191638469696, "learning_rate": 8.915385696066547e-05, "loss": 0.0077, "step": 11786 }, { "epoch": 2.2815402476780187, "grad_norm": 0.10793408751487732, "learning_rate": 8.915207035198946e-05, "loss": 0.0055, "step": 11787 }, { "epoch": 2.281733746130031, "grad_norm": 0.07149409502744675, "learning_rate": 8.915028361634447e-05, "loss": 0.0079, "step": 11788 }, { "epoch": 2.281927244582043, "grad_norm": 0.08682172000408173, "learning_rate": 8.914849675373714e-05, "loss": 0.0072, "step": 11789 }, { "epoch": 2.2821207430340555, "grad_norm": 0.1051584780216217, "learning_rate": 8.914670976417411e-05, "loss": 0.0071, "step": 11790 }, { "epoch": 2.282314241486068, "grad_norm": 0.07074225693941116, "learning_rate": 8.914492264766205e-05, "loss": 0.0069, "step": 11791 }, { "epoch": 2.2825077399380804, "grad_norm": 0.12124433368444443, "learning_rate": 8.914313540420756e-05, "loss": 0.0073, "step": 11792 }, { "epoch": 2.282701238390093, "grad_norm": 0.09109316021203995, "learning_rate": 8.914134803381732e-05, "loss": 0.0069, "step": 11793 }, { "epoch": 2.2828947368421053, "grad_norm": 0.07839670777320862, "learning_rate": 8.913956053649796e-05, "loss": 0.008, "step": 11794 }, { "epoch": 2.2830882352941178, "grad_norm": 0.09756094217300415, "learning_rate": 8.913777291225613e-05, "loss": 0.008, "step": 11795 }, { "epoch": 2.28328173374613, "grad_norm": 0.05050418898463249, "learning_rate": 8.913598516109844e-05, "loss": 0.0073, "step": 11796 }, { "epoch": 2.2834752321981426, "grad_norm": 0.0821048840880394, "learning_rate": 8.913419728303159e-05, "loss": 0.0072, "step": 11797 }, { "epoch": 2.2836687306501546, "grad_norm": 0.06316356360912323, "learning_rate": 8.91324092780622e-05, "loss": 0.0064, "step": 11798 }, { "epoch": 2.283862229102167, "grad_norm": 0.06329172104597092, "learning_rate": 8.913062114619695e-05, "loss": 0.0063, "step": 11799 }, { "epoch": 2.2840557275541795, "grad_norm": 0.085577592253685, "learning_rate": 8.912883288744244e-05, "loss": 0.0076, "step": 11800 }, { "epoch": 2.284249226006192, "grad_norm": 0.03400666266679764, "learning_rate": 8.912704450180532e-05, "loss": 0.0066, "step": 11801 }, { "epoch": 2.2844427244582044, "grad_norm": 0.0847199410200119, "learning_rate": 8.912525598929229e-05, "loss": 0.008, "step": 11802 }, { "epoch": 2.284636222910217, "grad_norm": 0.06956823170185089, "learning_rate": 8.912346734990995e-05, "loss": 0.0066, "step": 11803 }, { "epoch": 2.2848297213622293, "grad_norm": 0.0672420859336853, "learning_rate": 8.912167858366497e-05, "loss": 0.0074, "step": 11804 }, { "epoch": 2.2850232198142413, "grad_norm": 0.09781268984079361, "learning_rate": 8.911988969056399e-05, "loss": 0.0078, "step": 11805 }, { "epoch": 2.2852167182662537, "grad_norm": 0.06886980682611465, "learning_rate": 8.911810067061367e-05, "loss": 0.0061, "step": 11806 }, { "epoch": 2.285410216718266, "grad_norm": 0.09977114200592041, "learning_rate": 8.911631152382066e-05, "loss": 0.0061, "step": 11807 }, { "epoch": 2.2856037151702786, "grad_norm": 0.1256897896528244, "learning_rate": 8.911452225019162e-05, "loss": 0.0084, "step": 11808 }, { "epoch": 2.285797213622291, "grad_norm": 0.0760129988193512, "learning_rate": 8.911273284973318e-05, "loss": 0.0073, "step": 11809 }, { "epoch": 2.2859907120743035, "grad_norm": 0.16358622908592224, "learning_rate": 8.911094332245202e-05, "loss": 0.007, "step": 11810 }, { "epoch": 2.286184210526316, "grad_norm": 0.04463740438222885, "learning_rate": 8.910915366835476e-05, "loss": 0.0071, "step": 11811 }, { "epoch": 2.2863777089783284, "grad_norm": 0.1491500735282898, "learning_rate": 8.910736388744809e-05, "loss": 0.0065, "step": 11812 }, { "epoch": 2.2865712074303404, "grad_norm": 0.07762517780065536, "learning_rate": 8.910557397973864e-05, "loss": 0.0076, "step": 11813 }, { "epoch": 2.286764705882353, "grad_norm": 0.1136619970202446, "learning_rate": 8.910378394523305e-05, "loss": 0.0075, "step": 11814 }, { "epoch": 2.2869582043343653, "grad_norm": 0.14205731451511383, "learning_rate": 8.910199378393801e-05, "loss": 0.0054, "step": 11815 }, { "epoch": 2.2871517027863777, "grad_norm": 0.06941433250904083, "learning_rate": 8.910020349586016e-05, "loss": 0.009, "step": 11816 }, { "epoch": 2.28734520123839, "grad_norm": 0.16940011084079742, "learning_rate": 8.909841308100615e-05, "loss": 0.0065, "step": 11817 }, { "epoch": 2.2875386996904026, "grad_norm": 0.08316835761070251, "learning_rate": 8.909662253938266e-05, "loss": 0.0072, "step": 11818 }, { "epoch": 2.287732198142415, "grad_norm": 0.11071209609508514, "learning_rate": 8.90948318709963e-05, "loss": 0.0069, "step": 11819 }, { "epoch": 2.287925696594427, "grad_norm": 0.15436537563800812, "learning_rate": 8.909304107585377e-05, "loss": 0.0086, "step": 11820 }, { "epoch": 2.2881191950464395, "grad_norm": 0.04707902669906616, "learning_rate": 8.909125015396173e-05, "loss": 0.0073, "step": 11821 }, { "epoch": 2.288312693498452, "grad_norm": 0.15797802805900574, "learning_rate": 8.90894591053268e-05, "loss": 0.0067, "step": 11822 }, { "epoch": 2.2885061919504643, "grad_norm": 0.031459759920835495, "learning_rate": 8.908766792995567e-05, "loss": 0.0069, "step": 11823 }, { "epoch": 2.288699690402477, "grad_norm": 0.11184822767972946, "learning_rate": 8.908587662785499e-05, "loss": 0.007, "step": 11824 }, { "epoch": 2.2888931888544892, "grad_norm": 0.05426229536533356, "learning_rate": 8.908408519903141e-05, "loss": 0.0063, "step": 11825 }, { "epoch": 2.2890866873065017, "grad_norm": 0.06264278292655945, "learning_rate": 8.908229364349161e-05, "loss": 0.0077, "step": 11826 }, { "epoch": 2.289280185758514, "grad_norm": 0.06150428578257561, "learning_rate": 8.908050196124223e-05, "loss": 0.0065, "step": 11827 }, { "epoch": 2.2894736842105265, "grad_norm": 0.03769787400960922, "learning_rate": 8.907871015228996e-05, "loss": 0.0062, "step": 11828 }, { "epoch": 2.2896671826625385, "grad_norm": 0.061890169978141785, "learning_rate": 8.907691821664145e-05, "loss": 0.006, "step": 11829 }, { "epoch": 2.289860681114551, "grad_norm": 0.022328002378344536, "learning_rate": 8.907512615430332e-05, "loss": 0.0068, "step": 11830 }, { "epoch": 2.2900541795665634, "grad_norm": 0.067928746342659, "learning_rate": 8.907333396528229e-05, "loss": 0.0069, "step": 11831 }, { "epoch": 2.290247678018576, "grad_norm": 0.05189807340502739, "learning_rate": 8.9071541649585e-05, "loss": 0.0077, "step": 11832 }, { "epoch": 2.2904411764705883, "grad_norm": 0.041022058576345444, "learning_rate": 8.90697492072181e-05, "loss": 0.0073, "step": 11833 }, { "epoch": 2.2906346749226008, "grad_norm": 0.03574578836560249, "learning_rate": 8.906795663818828e-05, "loss": 0.008, "step": 11834 }, { "epoch": 2.290828173374613, "grad_norm": 0.06353820115327835, "learning_rate": 8.906616394250218e-05, "loss": 0.007, "step": 11835 }, { "epoch": 2.291021671826625, "grad_norm": 0.035882771015167236, "learning_rate": 8.90643711201665e-05, "loss": 0.0072, "step": 11836 }, { "epoch": 2.2912151702786376, "grad_norm": 0.0653415098786354, "learning_rate": 8.906257817118786e-05, "loss": 0.0072, "step": 11837 }, { "epoch": 2.29140866873065, "grad_norm": 0.06321042031049728, "learning_rate": 8.906078509557296e-05, "loss": 0.0085, "step": 11838 }, { "epoch": 2.2916021671826625, "grad_norm": 0.06268323212862015, "learning_rate": 8.905899189332845e-05, "loss": 0.0068, "step": 11839 }, { "epoch": 2.291795665634675, "grad_norm": 0.06473496556282043, "learning_rate": 8.905719856446099e-05, "loss": 0.0057, "step": 11840 }, { "epoch": 2.2919891640866874, "grad_norm": 0.05166139453649521, "learning_rate": 8.905540510897727e-05, "loss": 0.0062, "step": 11841 }, { "epoch": 2.2921826625387, "grad_norm": 0.0765959769487381, "learning_rate": 8.905361152688394e-05, "loss": 0.0076, "step": 11842 }, { "epoch": 2.2923761609907123, "grad_norm": 0.04464583098888397, "learning_rate": 8.905181781818768e-05, "loss": 0.0072, "step": 11843 }, { "epoch": 2.2925696594427243, "grad_norm": 0.10147443413734436, "learning_rate": 8.905002398289513e-05, "loss": 0.008, "step": 11844 }, { "epoch": 2.2927631578947367, "grad_norm": 0.07002575695514679, "learning_rate": 8.904823002101301e-05, "loss": 0.009, "step": 11845 }, { "epoch": 2.292956656346749, "grad_norm": 0.0883704274892807, "learning_rate": 8.904643593254794e-05, "loss": 0.007, "step": 11846 }, { "epoch": 2.2931501547987616, "grad_norm": 0.058509279042482376, "learning_rate": 8.904464171750663e-05, "loss": 0.0064, "step": 11847 }, { "epoch": 2.293343653250774, "grad_norm": 0.0755779966711998, "learning_rate": 8.904284737589572e-05, "loss": 0.0064, "step": 11848 }, { "epoch": 2.2935371517027865, "grad_norm": 0.0478028729557991, "learning_rate": 8.90410529077219e-05, "loss": 0.0068, "step": 11849 }, { "epoch": 2.293730650154799, "grad_norm": 0.055805787444114685, "learning_rate": 8.903925831299182e-05, "loss": 0.0065, "step": 11850 }, { "epoch": 2.293924148606811, "grad_norm": 0.09287546575069427, "learning_rate": 8.903746359171217e-05, "loss": 0.008, "step": 11851 }, { "epoch": 2.2941176470588234, "grad_norm": 0.05833795294165611, "learning_rate": 8.903566874388961e-05, "loss": 0.0082, "step": 11852 }, { "epoch": 2.294311145510836, "grad_norm": 0.05865155905485153, "learning_rate": 8.903387376953082e-05, "loss": 0.0084, "step": 11853 }, { "epoch": 2.2945046439628483, "grad_norm": 0.06438768655061722, "learning_rate": 8.90320786686425e-05, "loss": 0.0069, "step": 11854 }, { "epoch": 2.2946981424148607, "grad_norm": 0.044546373188495636, "learning_rate": 8.903028344123127e-05, "loss": 0.006, "step": 11855 }, { "epoch": 2.294891640866873, "grad_norm": 0.056892286986112595, "learning_rate": 8.902848808730384e-05, "loss": 0.0091, "step": 11856 }, { "epoch": 2.2950851393188856, "grad_norm": 0.05044370889663696, "learning_rate": 8.902669260686686e-05, "loss": 0.0072, "step": 11857 }, { "epoch": 2.295278637770898, "grad_norm": 0.033444810658693314, "learning_rate": 8.902489699992705e-05, "loss": 0.0059, "step": 11858 }, { "epoch": 2.2954721362229105, "grad_norm": 0.047004085034132004, "learning_rate": 8.902310126649105e-05, "loss": 0.0063, "step": 11859 }, { "epoch": 2.2956656346749225, "grad_norm": 0.029024958610534668, "learning_rate": 8.902130540656553e-05, "loss": 0.0069, "step": 11860 }, { "epoch": 2.295859133126935, "grad_norm": 0.0402008481323719, "learning_rate": 8.901950942015719e-05, "loss": 0.0068, "step": 11861 }, { "epoch": 2.2960526315789473, "grad_norm": 0.036619577556848526, "learning_rate": 8.901771330727268e-05, "loss": 0.006, "step": 11862 }, { "epoch": 2.29624613003096, "grad_norm": 0.028307538479566574, "learning_rate": 8.90159170679187e-05, "loss": 0.0071, "step": 11863 }, { "epoch": 2.2964396284829722, "grad_norm": 0.0441068671643734, "learning_rate": 8.901412070210194e-05, "loss": 0.0072, "step": 11864 }, { "epoch": 2.2966331269349847, "grad_norm": 0.036546796560287476, "learning_rate": 8.901232420982905e-05, "loss": 0.0066, "step": 11865 }, { "epoch": 2.2968266253869967, "grad_norm": 0.046823423355817795, "learning_rate": 8.901052759110672e-05, "loss": 0.0088, "step": 11866 }, { "epoch": 2.297020123839009, "grad_norm": 0.030124114826321602, "learning_rate": 8.900873084594164e-05, "loss": 0.0065, "step": 11867 }, { "epoch": 2.2972136222910216, "grad_norm": 0.07486805319786072, "learning_rate": 8.900693397434045e-05, "loss": 0.0072, "step": 11868 }, { "epoch": 2.297407120743034, "grad_norm": 0.031656160950660706, "learning_rate": 8.900513697630987e-05, "loss": 0.0074, "step": 11869 }, { "epoch": 2.2976006191950464, "grad_norm": 0.0551023855805397, "learning_rate": 8.900333985185658e-05, "loss": 0.0067, "step": 11870 }, { "epoch": 2.297794117647059, "grad_norm": 0.04324987158179283, "learning_rate": 8.900154260098725e-05, "loss": 0.0066, "step": 11871 }, { "epoch": 2.2979876160990713, "grad_norm": 0.04282623529434204, "learning_rate": 8.899974522370857e-05, "loss": 0.0067, "step": 11872 }, { "epoch": 2.2981811145510838, "grad_norm": 0.07595429569482803, "learning_rate": 8.89979477200272e-05, "loss": 0.0081, "step": 11873 }, { "epoch": 2.298374613003096, "grad_norm": 0.06062956899404526, "learning_rate": 8.899615008994983e-05, "loss": 0.0062, "step": 11874 }, { "epoch": 2.298568111455108, "grad_norm": 0.06255032867193222, "learning_rate": 8.899435233348317e-05, "loss": 0.0072, "step": 11875 }, { "epoch": 2.2987616099071206, "grad_norm": 0.06697459518909454, "learning_rate": 8.899255445063387e-05, "loss": 0.0077, "step": 11876 }, { "epoch": 2.298955108359133, "grad_norm": 0.041398048400878906, "learning_rate": 8.899075644140864e-05, "loss": 0.0093, "step": 11877 }, { "epoch": 2.2991486068111455, "grad_norm": 0.07283835858106613, "learning_rate": 8.898895830581415e-05, "loss": 0.0073, "step": 11878 }, { "epoch": 2.299342105263158, "grad_norm": 0.08747182786464691, "learning_rate": 8.89871600438571e-05, "loss": 0.0091, "step": 11879 }, { "epoch": 2.2995356037151704, "grad_norm": 0.07955838739871979, "learning_rate": 8.898536165554414e-05, "loss": 0.0067, "step": 11880 }, { "epoch": 2.299729102167183, "grad_norm": 0.09096921235322952, "learning_rate": 8.898356314088197e-05, "loss": 0.0078, "step": 11881 }, { "epoch": 2.299922600619195, "grad_norm": 0.06635493040084839, "learning_rate": 8.898176449987732e-05, "loss": 0.0071, "step": 11882 }, { "epoch": 2.3001160990712073, "grad_norm": 0.0987718477845192, "learning_rate": 8.897996573253683e-05, "loss": 0.0069, "step": 11883 }, { "epoch": 2.3003095975232197, "grad_norm": 0.03849704563617706, "learning_rate": 8.89781668388672e-05, "loss": 0.0061, "step": 11884 }, { "epoch": 2.300503095975232, "grad_norm": 0.09708704799413681, "learning_rate": 8.897636781887511e-05, "loss": 0.0068, "step": 11885 }, { "epoch": 2.3006965944272446, "grad_norm": 0.06791221350431442, "learning_rate": 8.897456867256727e-05, "loss": 0.0073, "step": 11886 }, { "epoch": 2.300890092879257, "grad_norm": 0.0742148905992508, "learning_rate": 8.897276939995036e-05, "loss": 0.0063, "step": 11887 }, { "epoch": 2.3010835913312695, "grad_norm": 0.0928356945514679, "learning_rate": 8.897097000103105e-05, "loss": 0.008, "step": 11888 }, { "epoch": 2.301277089783282, "grad_norm": 0.029197953641414642, "learning_rate": 8.896917047581605e-05, "loss": 0.0072, "step": 11889 }, { "epoch": 2.301470588235294, "grad_norm": 0.08753691613674164, "learning_rate": 8.896737082431203e-05, "loss": 0.0057, "step": 11890 }, { "epoch": 2.3016640866873064, "grad_norm": 0.06531475484371185, "learning_rate": 8.89655710465257e-05, "loss": 0.0073, "step": 11891 }, { "epoch": 2.301857585139319, "grad_norm": 0.033257026225328445, "learning_rate": 8.896377114246376e-05, "loss": 0.0061, "step": 11892 }, { "epoch": 2.3020510835913313, "grad_norm": 0.0730983167886734, "learning_rate": 8.896197111213288e-05, "loss": 0.006, "step": 11893 }, { "epoch": 2.3022445820433437, "grad_norm": 0.047556132078170776, "learning_rate": 8.896017095553977e-05, "loss": 0.0078, "step": 11894 }, { "epoch": 2.302438080495356, "grad_norm": 0.06537069380283356, "learning_rate": 8.895837067269109e-05, "loss": 0.0077, "step": 11895 }, { "epoch": 2.3026315789473686, "grad_norm": 0.09209702908992767, "learning_rate": 8.895657026359357e-05, "loss": 0.0083, "step": 11896 }, { "epoch": 2.3028250773993806, "grad_norm": 0.050343383103609085, "learning_rate": 8.895476972825389e-05, "loss": 0.0078, "step": 11897 }, { "epoch": 2.303018575851393, "grad_norm": 0.09255100041627884, "learning_rate": 8.895296906667872e-05, "loss": 0.008, "step": 11898 }, { "epoch": 2.3032120743034055, "grad_norm": 0.07550907135009766, "learning_rate": 8.895116827887483e-05, "loss": 0.0066, "step": 11899 }, { "epoch": 2.303405572755418, "grad_norm": 0.09081022441387177, "learning_rate": 8.894936736484882e-05, "loss": 0.0077, "step": 11900 }, { "epoch": 2.3035990712074303, "grad_norm": 0.10916785895824432, "learning_rate": 8.894756632460742e-05, "loss": 0.0064, "step": 11901 }, { "epoch": 2.303792569659443, "grad_norm": 0.03808267042040825, "learning_rate": 8.894576515815736e-05, "loss": 0.0065, "step": 11902 }, { "epoch": 2.3039860681114552, "grad_norm": 0.11515335738658905, "learning_rate": 8.894396386550529e-05, "loss": 0.0081, "step": 11903 }, { "epoch": 2.3041795665634677, "grad_norm": 0.040339767932891846, "learning_rate": 8.894216244665795e-05, "loss": 0.0081, "step": 11904 }, { "epoch": 2.30437306501548, "grad_norm": 0.07624813914299011, "learning_rate": 8.8940360901622e-05, "loss": 0.0066, "step": 11905 }, { "epoch": 2.304566563467492, "grad_norm": 0.054470740258693695, "learning_rate": 8.893855923040414e-05, "loss": 0.0065, "step": 11906 }, { "epoch": 2.3047600619195046, "grad_norm": 0.06598064303398132, "learning_rate": 8.89367574330111e-05, "loss": 0.0062, "step": 11907 }, { "epoch": 2.304953560371517, "grad_norm": 0.05439426749944687, "learning_rate": 8.893495550944954e-05, "loss": 0.0063, "step": 11908 }, { "epoch": 2.3051470588235294, "grad_norm": 0.05779218673706055, "learning_rate": 8.893315345972619e-05, "loss": 0.0069, "step": 11909 }, { "epoch": 2.305340557275542, "grad_norm": 0.058440037071704865, "learning_rate": 8.893135128384773e-05, "loss": 0.0077, "step": 11910 }, { "epoch": 2.3055340557275543, "grad_norm": 0.041772812604904175, "learning_rate": 8.892954898182086e-05, "loss": 0.007, "step": 11911 }, { "epoch": 2.3057275541795663, "grad_norm": 0.06522699445486069, "learning_rate": 8.89277465536523e-05, "loss": 0.0068, "step": 11912 }, { "epoch": 2.3059210526315788, "grad_norm": 0.05391658842563629, "learning_rate": 8.892594399934874e-05, "loss": 0.0071, "step": 11913 }, { "epoch": 2.306114551083591, "grad_norm": 0.03358729928731918, "learning_rate": 8.892414131891687e-05, "loss": 0.0063, "step": 11914 }, { "epoch": 2.3063080495356036, "grad_norm": 0.059773243963718414, "learning_rate": 8.892233851236342e-05, "loss": 0.0083, "step": 11915 }, { "epoch": 2.306501547987616, "grad_norm": 0.02682127244770527, "learning_rate": 8.892053557969505e-05, "loss": 0.0087, "step": 11916 }, { "epoch": 2.3066950464396285, "grad_norm": 0.05257248878479004, "learning_rate": 8.891873252091851e-05, "loss": 0.007, "step": 11917 }, { "epoch": 2.306888544891641, "grad_norm": 0.0363587886095047, "learning_rate": 8.891692933604046e-05, "loss": 0.0064, "step": 11918 }, { "epoch": 2.3070820433436534, "grad_norm": 0.05236010253429413, "learning_rate": 8.891512602506763e-05, "loss": 0.008, "step": 11919 }, { "epoch": 2.307275541795666, "grad_norm": 0.039554890245199203, "learning_rate": 8.891332258800672e-05, "loss": 0.0062, "step": 11920 }, { "epoch": 2.307469040247678, "grad_norm": 0.06174299120903015, "learning_rate": 8.891151902486443e-05, "loss": 0.0085, "step": 11921 }, { "epoch": 2.3076625386996903, "grad_norm": 0.06495698541402817, "learning_rate": 8.890971533564747e-05, "loss": 0.0061, "step": 11922 }, { "epoch": 2.3078560371517027, "grad_norm": 0.04897761344909668, "learning_rate": 8.890791152036255e-05, "loss": 0.0062, "step": 11923 }, { "epoch": 2.308049535603715, "grad_norm": 0.04630941152572632, "learning_rate": 8.890610757901637e-05, "loss": 0.0078, "step": 11924 }, { "epoch": 2.3082430340557276, "grad_norm": 0.06517359614372253, "learning_rate": 8.890430351161562e-05, "loss": 0.006, "step": 11925 }, { "epoch": 2.30843653250774, "grad_norm": 0.047102123498916626, "learning_rate": 8.890249931816704e-05, "loss": 0.0074, "step": 11926 }, { "epoch": 2.3086300309597525, "grad_norm": 0.061580266803503036, "learning_rate": 8.890069499867731e-05, "loss": 0.0059, "step": 11927 }, { "epoch": 2.3088235294117645, "grad_norm": 0.06763166934251785, "learning_rate": 8.889889055315317e-05, "loss": 0.0081, "step": 11928 }, { "epoch": 2.309017027863777, "grad_norm": 0.06927863508462906, "learning_rate": 8.889708598160129e-05, "loss": 0.0078, "step": 11929 }, { "epoch": 2.3092105263157894, "grad_norm": 0.10941280424594879, "learning_rate": 8.889528128402841e-05, "loss": 0.0083, "step": 11930 }, { "epoch": 2.309404024767802, "grad_norm": 0.06641270965337753, "learning_rate": 8.88934764604412e-05, "loss": 0.0066, "step": 11931 }, { "epoch": 2.3095975232198143, "grad_norm": 0.11772824823856354, "learning_rate": 8.889167151084641e-05, "loss": 0.0073, "step": 11932 }, { "epoch": 2.3097910216718267, "grad_norm": 0.04680664464831352, "learning_rate": 8.888986643525074e-05, "loss": 0.0066, "step": 11933 }, { "epoch": 2.309984520123839, "grad_norm": 0.11619389802217484, "learning_rate": 8.888806123366092e-05, "loss": 0.0071, "step": 11934 }, { "epoch": 2.3101780185758516, "grad_norm": 0.05467046797275543, "learning_rate": 8.88862559060836e-05, "loss": 0.0077, "step": 11935 }, { "epoch": 2.3103715170278636, "grad_norm": 0.10354510694742203, "learning_rate": 8.888445045252555e-05, "loss": 0.0096, "step": 11936 }, { "epoch": 2.310565015479876, "grad_norm": 0.06284841150045395, "learning_rate": 8.888264487299346e-05, "loss": 0.0067, "step": 11937 }, { "epoch": 2.3107585139318885, "grad_norm": 0.07362710684537888, "learning_rate": 8.888083916749404e-05, "loss": 0.007, "step": 11938 }, { "epoch": 2.310952012383901, "grad_norm": 0.06622345745563507, "learning_rate": 8.887903333603401e-05, "loss": 0.0077, "step": 11939 }, { "epoch": 2.3111455108359134, "grad_norm": 0.03694785386323929, "learning_rate": 8.887722737862009e-05, "loss": 0.0072, "step": 11940 }, { "epoch": 2.311339009287926, "grad_norm": 0.09380952268838882, "learning_rate": 8.887542129525898e-05, "loss": 0.0067, "step": 11941 }, { "epoch": 2.3115325077399382, "grad_norm": 0.04452527314424515, "learning_rate": 8.88736150859574e-05, "loss": 0.0078, "step": 11942 }, { "epoch": 2.3117260061919502, "grad_norm": 0.07033158093690872, "learning_rate": 8.887180875072207e-05, "loss": 0.0062, "step": 11943 }, { "epoch": 2.3119195046439627, "grad_norm": 0.059979602694511414, "learning_rate": 8.887000228955969e-05, "loss": 0.0058, "step": 11944 }, { "epoch": 2.312113003095975, "grad_norm": 0.045889612287282944, "learning_rate": 8.8868195702477e-05, "loss": 0.0071, "step": 11945 }, { "epoch": 2.3123065015479876, "grad_norm": 0.09429546445608139, "learning_rate": 8.886638898948068e-05, "loss": 0.0074, "step": 11946 }, { "epoch": 2.3125, "grad_norm": 0.02389644831418991, "learning_rate": 8.886458215057751e-05, "loss": 0.0065, "step": 11947 }, { "epoch": 2.3126934984520124, "grad_norm": 0.10059543699026108, "learning_rate": 8.886277518577415e-05, "loss": 0.006, "step": 11948 }, { "epoch": 2.312886996904025, "grad_norm": 0.07399865984916687, "learning_rate": 8.886096809507733e-05, "loss": 0.0063, "step": 11949 }, { "epoch": 2.3130804953560373, "grad_norm": 0.08641108125448227, "learning_rate": 8.885916087849377e-05, "loss": 0.0074, "step": 11950 }, { "epoch": 2.3132739938080498, "grad_norm": 0.09726964682340622, "learning_rate": 8.88573535360302e-05, "loss": 0.006, "step": 11951 }, { "epoch": 2.3134674922600618, "grad_norm": 0.07258626073598862, "learning_rate": 8.885554606769334e-05, "loss": 0.0068, "step": 11952 }, { "epoch": 2.313660990712074, "grad_norm": 0.08728652447462082, "learning_rate": 8.885373847348989e-05, "loss": 0.0069, "step": 11953 }, { "epoch": 2.3138544891640866, "grad_norm": 0.10070399194955826, "learning_rate": 8.885193075342658e-05, "loss": 0.0047, "step": 11954 }, { "epoch": 2.314047987616099, "grad_norm": 0.0738447979092598, "learning_rate": 8.885012290751014e-05, "loss": 0.0071, "step": 11955 }, { "epoch": 2.3142414860681115, "grad_norm": 0.11878974735736847, "learning_rate": 8.88483149357473e-05, "loss": 0.0068, "step": 11956 }, { "epoch": 2.314434984520124, "grad_norm": 0.06426942348480225, "learning_rate": 8.884650683814473e-05, "loss": 0.007, "step": 11957 }, { "epoch": 2.3146284829721364, "grad_norm": 0.11244002729654312, "learning_rate": 8.88446986147092e-05, "loss": 0.0065, "step": 11958 }, { "epoch": 2.3148219814241484, "grad_norm": 0.04452965781092644, "learning_rate": 8.884289026544745e-05, "loss": 0.0077, "step": 11959 }, { "epoch": 2.315015479876161, "grad_norm": 0.10490008443593979, "learning_rate": 8.884108179036615e-05, "loss": 0.0077, "step": 11960 }, { "epoch": 2.3152089783281733, "grad_norm": 0.028437113389372826, "learning_rate": 8.883927318947203e-05, "loss": 0.0077, "step": 11961 }, { "epoch": 2.3154024767801857, "grad_norm": 0.06849948316812515, "learning_rate": 8.883746446277186e-05, "loss": 0.0088, "step": 11962 }, { "epoch": 2.315595975232198, "grad_norm": 0.045347973704338074, "learning_rate": 8.883565561027231e-05, "loss": 0.0056, "step": 11963 }, { "epoch": 2.3157894736842106, "grad_norm": 0.03857176750898361, "learning_rate": 8.883384663198016e-05, "loss": 0.0067, "step": 11964 }, { "epoch": 2.315982972136223, "grad_norm": 0.05377379059791565, "learning_rate": 8.883203752790207e-05, "loss": 0.0064, "step": 11965 }, { "epoch": 2.3161764705882355, "grad_norm": 0.0392938032746315, "learning_rate": 8.883022829804482e-05, "loss": 0.0069, "step": 11966 }, { "epoch": 2.3163699690402475, "grad_norm": 0.048375312238931656, "learning_rate": 8.882841894241511e-05, "loss": 0.007, "step": 11967 }, { "epoch": 2.31656346749226, "grad_norm": 0.06695222109556198, "learning_rate": 8.882660946101969e-05, "loss": 0.0081, "step": 11968 }, { "epoch": 2.3167569659442724, "grad_norm": 0.08901980519294739, "learning_rate": 8.882479985386524e-05, "loss": 0.008, "step": 11969 }, { "epoch": 2.316950464396285, "grad_norm": 0.057328734546899796, "learning_rate": 8.882299012095855e-05, "loss": 0.0069, "step": 11970 }, { "epoch": 2.3171439628482973, "grad_norm": 0.0912228599190712, "learning_rate": 8.882118026230631e-05, "loss": 0.0062, "step": 11971 }, { "epoch": 2.3173374613003097, "grad_norm": 0.06364675611257553, "learning_rate": 8.881937027791525e-05, "loss": 0.008, "step": 11972 }, { "epoch": 2.317530959752322, "grad_norm": 0.08760466426610947, "learning_rate": 8.881756016779209e-05, "loss": 0.0081, "step": 11973 }, { "epoch": 2.317724458204334, "grad_norm": 0.05121335759758949, "learning_rate": 8.881574993194361e-05, "loss": 0.0054, "step": 11974 }, { "epoch": 2.3179179566563466, "grad_norm": 0.10127384215593338, "learning_rate": 8.881393957037648e-05, "loss": 0.0069, "step": 11975 }, { "epoch": 2.318111455108359, "grad_norm": 0.04328420013189316, "learning_rate": 8.881212908309744e-05, "loss": 0.0077, "step": 11976 }, { "epoch": 2.3183049535603715, "grad_norm": 0.05788001790642738, "learning_rate": 8.881031847011326e-05, "loss": 0.0074, "step": 11977 }, { "epoch": 2.318498452012384, "grad_norm": 0.05198394134640694, "learning_rate": 8.880850773143065e-05, "loss": 0.0056, "step": 11978 }, { "epoch": 2.3186919504643964, "grad_norm": 0.04305766895413399, "learning_rate": 8.880669686705633e-05, "loss": 0.0073, "step": 11979 }, { "epoch": 2.318885448916409, "grad_norm": 0.04423897713422775, "learning_rate": 8.880488587699703e-05, "loss": 0.0073, "step": 11980 }, { "epoch": 2.3190789473684212, "grad_norm": 0.038924697786569595, "learning_rate": 8.880307476125952e-05, "loss": 0.0068, "step": 11981 }, { "epoch": 2.3192724458204337, "grad_norm": 0.06057029962539673, "learning_rate": 8.880126351985048e-05, "loss": 0.0069, "step": 11982 }, { "epoch": 2.3194659442724457, "grad_norm": 0.03455748036503792, "learning_rate": 8.879945215277669e-05, "loss": 0.0064, "step": 11983 }, { "epoch": 2.319659442724458, "grad_norm": 0.04703814536333084, "learning_rate": 8.879764066004486e-05, "loss": 0.0073, "step": 11984 }, { "epoch": 2.3198529411764706, "grad_norm": 0.036154378205537796, "learning_rate": 8.879582904166172e-05, "loss": 0.0073, "step": 11985 }, { "epoch": 2.320046439628483, "grad_norm": 0.03009652905166149, "learning_rate": 8.879401729763402e-05, "loss": 0.0091, "step": 11986 }, { "epoch": 2.3202399380804954, "grad_norm": 0.06493260711431503, "learning_rate": 8.87922054279685e-05, "loss": 0.0057, "step": 11987 }, { "epoch": 2.320433436532508, "grad_norm": 0.027439581230282784, "learning_rate": 8.87903934326719e-05, "loss": 0.0058, "step": 11988 }, { "epoch": 2.32062693498452, "grad_norm": 0.03982502967119217, "learning_rate": 8.878858131175091e-05, "loss": 0.008, "step": 11989 }, { "epoch": 2.3208204334365323, "grad_norm": 0.0533442460000515, "learning_rate": 8.878676906521231e-05, "loss": 0.008, "step": 11990 }, { "epoch": 2.3210139318885448, "grad_norm": 0.07274044305086136, "learning_rate": 8.878495669306283e-05, "loss": 0.0055, "step": 11991 }, { "epoch": 2.321207430340557, "grad_norm": 0.05226200073957443, "learning_rate": 8.878314419530921e-05, "loss": 0.0065, "step": 11992 }, { "epoch": 2.3214009287925697, "grad_norm": 0.1043701246380806, "learning_rate": 8.878133157195818e-05, "loss": 0.0085, "step": 11993 }, { "epoch": 2.321594427244582, "grad_norm": 0.09508392214775085, "learning_rate": 8.877951882301649e-05, "loss": 0.0076, "step": 11994 }, { "epoch": 2.3217879256965945, "grad_norm": 0.08119190484285355, "learning_rate": 8.877770594849087e-05, "loss": 0.0063, "step": 11995 }, { "epoch": 2.321981424148607, "grad_norm": 0.061950333416461945, "learning_rate": 8.877589294838806e-05, "loss": 0.0076, "step": 11996 }, { "epoch": 2.3221749226006194, "grad_norm": 0.09730584919452667, "learning_rate": 8.87740798227148e-05, "loss": 0.0071, "step": 11997 }, { "epoch": 2.3223684210526314, "grad_norm": 0.05901245400309563, "learning_rate": 8.877226657147783e-05, "loss": 0.0073, "step": 11998 }, { "epoch": 2.322561919504644, "grad_norm": 0.13124801218509674, "learning_rate": 8.877045319468391e-05, "loss": 0.0075, "step": 11999 }, { "epoch": 2.3227554179566563, "grad_norm": 0.0461917519569397, "learning_rate": 8.876863969233976e-05, "loss": 0.0077, "step": 12000 }, { "epoch": 2.3229489164086687, "grad_norm": 0.13077010214328766, "learning_rate": 8.876682606445214e-05, "loss": 0.0084, "step": 12001 }, { "epoch": 2.323142414860681, "grad_norm": 0.04789111018180847, "learning_rate": 8.876501231102776e-05, "loss": 0.008, "step": 12002 }, { "epoch": 2.3233359133126936, "grad_norm": 0.15143924951553345, "learning_rate": 8.876319843207338e-05, "loss": 0.0077, "step": 12003 }, { "epoch": 2.323529411764706, "grad_norm": 0.0812230184674263, "learning_rate": 8.876138442759577e-05, "loss": 0.0072, "step": 12004 }, { "epoch": 2.323722910216718, "grad_norm": 0.13914746046066284, "learning_rate": 8.875957029760163e-05, "loss": 0.0079, "step": 12005 }, { "epoch": 2.3239164086687305, "grad_norm": 0.09393162280321121, "learning_rate": 8.875775604209775e-05, "loss": 0.0059, "step": 12006 }, { "epoch": 2.324109907120743, "grad_norm": 0.09321589022874832, "learning_rate": 8.875594166109082e-05, "loss": 0.007, "step": 12007 }, { "epoch": 2.3243034055727554, "grad_norm": 0.14479367434978485, "learning_rate": 8.875412715458765e-05, "loss": 0.0073, "step": 12008 }, { "epoch": 2.324496904024768, "grad_norm": 0.13566596806049347, "learning_rate": 8.875231252259494e-05, "loss": 0.0081, "step": 12009 }, { "epoch": 2.3246904024767803, "grad_norm": 0.10972476005554199, "learning_rate": 8.875049776511944e-05, "loss": 0.0069, "step": 12010 }, { "epoch": 2.3248839009287927, "grad_norm": 0.1371418982744217, "learning_rate": 8.87486828821679e-05, "loss": 0.0072, "step": 12011 }, { "epoch": 2.325077399380805, "grad_norm": 0.07587720453739166, "learning_rate": 8.874686787374709e-05, "loss": 0.0053, "step": 12012 }, { "epoch": 2.325270897832817, "grad_norm": 0.12265301495790482, "learning_rate": 8.874505273986374e-05, "loss": 0.0081, "step": 12013 }, { "epoch": 2.3254643962848296, "grad_norm": 0.08668877929449081, "learning_rate": 8.874323748052459e-05, "loss": 0.0072, "step": 12014 }, { "epoch": 2.325657894736842, "grad_norm": 0.10723619163036346, "learning_rate": 8.87414220957364e-05, "loss": 0.0077, "step": 12015 }, { "epoch": 2.3258513931888545, "grad_norm": 0.09163526445627213, "learning_rate": 8.873960658550592e-05, "loss": 0.0063, "step": 12016 }, { "epoch": 2.326044891640867, "grad_norm": 0.09818138927221298, "learning_rate": 8.87377909498399e-05, "loss": 0.0075, "step": 12017 }, { "epoch": 2.3262383900928794, "grad_norm": 0.10974587500095367, "learning_rate": 8.873597518874507e-05, "loss": 0.0073, "step": 12018 }, { "epoch": 2.326431888544892, "grad_norm": 0.07388102263212204, "learning_rate": 8.873415930222822e-05, "loss": 0.0072, "step": 12019 }, { "epoch": 2.326625386996904, "grad_norm": 0.14244459569454193, "learning_rate": 8.873234329029606e-05, "loss": 0.0058, "step": 12020 }, { "epoch": 2.3268188854489162, "grad_norm": 0.13839039206504822, "learning_rate": 8.873052715295536e-05, "loss": 0.0092, "step": 12021 }, { "epoch": 2.3270123839009287, "grad_norm": 0.19036297500133514, "learning_rate": 8.872871089021289e-05, "loss": 0.0085, "step": 12022 }, { "epoch": 2.327205882352941, "grad_norm": 0.14472144842147827, "learning_rate": 8.872689450207536e-05, "loss": 0.0086, "step": 12023 }, { "epoch": 2.3273993808049536, "grad_norm": 0.09591417759656906, "learning_rate": 8.872507798854955e-05, "loss": 0.0082, "step": 12024 }, { "epoch": 2.327592879256966, "grad_norm": 0.19838978350162506, "learning_rate": 8.872326134964224e-05, "loss": 0.0073, "step": 12025 }, { "epoch": 2.3277863777089784, "grad_norm": 0.06221164017915726, "learning_rate": 8.872144458536012e-05, "loss": 0.0082, "step": 12026 }, { "epoch": 2.327979876160991, "grad_norm": 0.09088291227817535, "learning_rate": 8.871962769571e-05, "loss": 0.0106, "step": 12027 }, { "epoch": 2.3281733746130033, "grad_norm": 0.40149086713790894, "learning_rate": 8.87178106806986e-05, "loss": 0.0092, "step": 12028 }, { "epoch": 2.3283668730650153, "grad_norm": 0.12743112444877625, "learning_rate": 8.871599354033268e-05, "loss": 0.0094, "step": 12029 }, { "epoch": 2.3285603715170278, "grad_norm": 0.5082271695137024, "learning_rate": 8.871417627461904e-05, "loss": 0.0074, "step": 12030 }, { "epoch": 2.32875386996904, "grad_norm": 0.1892387568950653, "learning_rate": 8.871235888356436e-05, "loss": 0.0078, "step": 12031 }, { "epoch": 2.3289473684210527, "grad_norm": 0.3883424699306488, "learning_rate": 8.871054136717545e-05, "loss": 0.0072, "step": 12032 }, { "epoch": 2.329140866873065, "grad_norm": 0.3125215470790863, "learning_rate": 8.870872372545906e-05, "loss": 0.0067, "step": 12033 }, { "epoch": 2.3293343653250775, "grad_norm": 0.1821030229330063, "learning_rate": 8.870690595842194e-05, "loss": 0.0079, "step": 12034 }, { "epoch": 2.3295278637770895, "grad_norm": 0.4659639298915863, "learning_rate": 8.870508806607083e-05, "loss": 0.0073, "step": 12035 }, { "epoch": 2.329721362229102, "grad_norm": 0.058591101318597794, "learning_rate": 8.870327004841253e-05, "loss": 0.0086, "step": 12036 }, { "epoch": 2.3299148606811144, "grad_norm": 0.31457462906837463, "learning_rate": 8.870145190545378e-05, "loss": 0.0092, "step": 12037 }, { "epoch": 2.330108359133127, "grad_norm": 0.24203240871429443, "learning_rate": 8.869963363720131e-05, "loss": 0.0077, "step": 12038 }, { "epoch": 2.3303018575851393, "grad_norm": 0.28293487429618835, "learning_rate": 8.869781524366192e-05, "loss": 0.0068, "step": 12039 }, { "epoch": 2.3304953560371517, "grad_norm": 0.26812589168548584, "learning_rate": 8.869599672484237e-05, "loss": 0.0092, "step": 12040 }, { "epoch": 2.330688854489164, "grad_norm": 0.1380649358034134, "learning_rate": 8.869417808074938e-05, "loss": 0.0096, "step": 12041 }, { "epoch": 2.3308823529411766, "grad_norm": 0.2352360188961029, "learning_rate": 8.869235931138976e-05, "loss": 0.0081, "step": 12042 }, { "epoch": 2.331075851393189, "grad_norm": 0.06558160483837128, "learning_rate": 8.869054041677025e-05, "loss": 0.0083, "step": 12043 }, { "epoch": 2.331269349845201, "grad_norm": 0.2274392992258072, "learning_rate": 8.86887213968976e-05, "loss": 0.0077, "step": 12044 }, { "epoch": 2.3314628482972135, "grad_norm": 0.11236704140901566, "learning_rate": 8.868690225177859e-05, "loss": 0.0074, "step": 12045 }, { "epoch": 2.331656346749226, "grad_norm": 0.13596561551094055, "learning_rate": 8.868508298141999e-05, "loss": 0.0083, "step": 12046 }, { "epoch": 2.3318498452012384, "grad_norm": 0.20043860375881195, "learning_rate": 8.868326358582853e-05, "loss": 0.0079, "step": 12047 }, { "epoch": 2.332043343653251, "grad_norm": 0.07750985026359558, "learning_rate": 8.868144406501101e-05, "loss": 0.0068, "step": 12048 }, { "epoch": 2.3322368421052633, "grad_norm": 0.1717788577079773, "learning_rate": 8.867962441897418e-05, "loss": 0.0066, "step": 12049 }, { "epoch": 2.3324303405572757, "grad_norm": 0.13572868704795837, "learning_rate": 8.86778046477248e-05, "loss": 0.0087, "step": 12050 }, { "epoch": 2.3326238390092877, "grad_norm": 0.09307827055454254, "learning_rate": 8.867598475126964e-05, "loss": 0.0072, "step": 12051 }, { "epoch": 2.3328173374613, "grad_norm": 0.13302850723266602, "learning_rate": 8.867416472961547e-05, "loss": 0.0068, "step": 12052 }, { "epoch": 2.3330108359133126, "grad_norm": 0.06344746798276901, "learning_rate": 8.867234458276905e-05, "loss": 0.0075, "step": 12053 }, { "epoch": 2.333204334365325, "grad_norm": 0.11143302917480469, "learning_rate": 8.867052431073716e-05, "loss": 0.0083, "step": 12054 }, { "epoch": 2.3333978328173375, "grad_norm": 0.09715446084737778, "learning_rate": 8.866870391352655e-05, "loss": 0.007, "step": 12055 }, { "epoch": 2.33359133126935, "grad_norm": 0.06630714982748032, "learning_rate": 8.866688339114399e-05, "loss": 0.006, "step": 12056 }, { "epoch": 2.3337848297213624, "grad_norm": 0.1304643303155899, "learning_rate": 8.866506274359625e-05, "loss": 0.0078, "step": 12057 }, { "epoch": 2.333978328173375, "grad_norm": 0.032239872962236404, "learning_rate": 8.866324197089012e-05, "loss": 0.0081, "step": 12058 }, { "epoch": 2.334171826625387, "grad_norm": 0.11611732095479965, "learning_rate": 8.866142107303234e-05, "loss": 0.0065, "step": 12059 }, { "epoch": 2.3343653250773992, "grad_norm": 0.04239567741751671, "learning_rate": 8.865960005002969e-05, "loss": 0.0069, "step": 12060 }, { "epoch": 2.3345588235294117, "grad_norm": 0.09411433339118958, "learning_rate": 8.865777890188894e-05, "loss": 0.0077, "step": 12061 }, { "epoch": 2.334752321981424, "grad_norm": 0.1110488772392273, "learning_rate": 8.865595762861686e-05, "loss": 0.0078, "step": 12062 }, { "epoch": 2.3349458204334366, "grad_norm": 0.07160276174545288, "learning_rate": 8.865413623022024e-05, "loss": 0.0086, "step": 12063 }, { "epoch": 2.335139318885449, "grad_norm": 0.07544519007205963, "learning_rate": 8.865231470670582e-05, "loss": 0.0086, "step": 12064 }, { "epoch": 2.3353328173374615, "grad_norm": 0.07589119672775269, "learning_rate": 8.865049305808038e-05, "loss": 0.0068, "step": 12065 }, { "epoch": 2.3355263157894735, "grad_norm": 0.06273863464593887, "learning_rate": 8.86486712843507e-05, "loss": 0.0076, "step": 12066 }, { "epoch": 2.335719814241486, "grad_norm": 0.07352856546640396, "learning_rate": 8.864684938552355e-05, "loss": 0.0078, "step": 12067 }, { "epoch": 2.3359133126934983, "grad_norm": 0.10180677473545074, "learning_rate": 8.864502736160571e-05, "loss": 0.0075, "step": 12068 }, { "epoch": 2.3361068111455108, "grad_norm": 0.05432697758078575, "learning_rate": 8.864320521260397e-05, "loss": 0.0068, "step": 12069 }, { "epoch": 2.336300309597523, "grad_norm": 0.09860803186893463, "learning_rate": 8.864138293852507e-05, "loss": 0.0091, "step": 12070 }, { "epoch": 2.3364938080495357, "grad_norm": 0.07366777211427689, "learning_rate": 8.863956053937578e-05, "loss": 0.0059, "step": 12071 }, { "epoch": 2.336687306501548, "grad_norm": 0.10002682358026505, "learning_rate": 8.863773801516291e-05, "loss": 0.0076, "step": 12072 }, { "epoch": 2.3368808049535605, "grad_norm": 0.07245620340108871, "learning_rate": 8.863591536589321e-05, "loss": 0.0055, "step": 12073 }, { "epoch": 2.337074303405573, "grad_norm": 0.061147067695856094, "learning_rate": 8.863409259157345e-05, "loss": 0.0073, "step": 12074 }, { "epoch": 2.337267801857585, "grad_norm": 0.0881192535161972, "learning_rate": 8.863226969221046e-05, "loss": 0.008, "step": 12075 }, { "epoch": 2.3374613003095974, "grad_norm": 0.042857591062784195, "learning_rate": 8.863044666781094e-05, "loss": 0.0079, "step": 12076 }, { "epoch": 2.33765479876161, "grad_norm": 0.07810694724321365, "learning_rate": 8.862862351838172e-05, "loss": 0.0079, "step": 12077 }, { "epoch": 2.3378482972136223, "grad_norm": 0.06440430879592896, "learning_rate": 8.862680024392958e-05, "loss": 0.0061, "step": 12078 }, { "epoch": 2.3380417956656347, "grad_norm": 0.051256950944662094, "learning_rate": 8.862497684446127e-05, "loss": 0.0085, "step": 12079 }, { "epoch": 2.338235294117647, "grad_norm": 0.12379339337348938, "learning_rate": 8.862315331998358e-05, "loss": 0.0071, "step": 12080 }, { "epoch": 2.3384287925696596, "grad_norm": 0.050675682723522186, "learning_rate": 8.862132967050329e-05, "loss": 0.0077, "step": 12081 }, { "epoch": 2.3386222910216716, "grad_norm": 0.11536518484354019, "learning_rate": 8.861950589602718e-05, "loss": 0.0065, "step": 12082 }, { "epoch": 2.338815789473684, "grad_norm": 0.09373931586742401, "learning_rate": 8.861768199656204e-05, "loss": 0.0079, "step": 12083 }, { "epoch": 2.3390092879256965, "grad_norm": 0.06458155810832977, "learning_rate": 8.861585797211464e-05, "loss": 0.0061, "step": 12084 }, { "epoch": 2.339202786377709, "grad_norm": 0.1346144676208496, "learning_rate": 8.861403382269175e-05, "loss": 0.0069, "step": 12085 }, { "epoch": 2.3393962848297214, "grad_norm": 0.07429265975952148, "learning_rate": 8.861220954830018e-05, "loss": 0.0067, "step": 12086 }, { "epoch": 2.339589783281734, "grad_norm": 0.09605976194143295, "learning_rate": 8.861038514894668e-05, "loss": 0.0081, "step": 12087 }, { "epoch": 2.3397832817337463, "grad_norm": 0.15042316913604736, "learning_rate": 8.860856062463806e-05, "loss": 0.009, "step": 12088 }, { "epoch": 2.3399767801857587, "grad_norm": 0.0653618648648262, "learning_rate": 8.86067359753811e-05, "loss": 0.0058, "step": 12089 }, { "epoch": 2.3401702786377707, "grad_norm": 0.12126678228378296, "learning_rate": 8.860491120118258e-05, "loss": 0.0065, "step": 12090 }, { "epoch": 2.340363777089783, "grad_norm": 0.07179994881153107, "learning_rate": 8.860308630204927e-05, "loss": 0.0069, "step": 12091 }, { "epoch": 2.3405572755417956, "grad_norm": 0.09007991850376129, "learning_rate": 8.860126127798796e-05, "loss": 0.0072, "step": 12092 }, { "epoch": 2.340750773993808, "grad_norm": 0.09928242117166519, "learning_rate": 8.859943612900544e-05, "loss": 0.009, "step": 12093 }, { "epoch": 2.3409442724458205, "grad_norm": 0.09482098370790482, "learning_rate": 8.85976108551085e-05, "loss": 0.0084, "step": 12094 }, { "epoch": 2.341137770897833, "grad_norm": 0.10206848382949829, "learning_rate": 8.859578545630392e-05, "loss": 0.0082, "step": 12095 }, { "epoch": 2.3413312693498454, "grad_norm": 0.0889802798628807, "learning_rate": 8.859395993259851e-05, "loss": 0.0073, "step": 12096 }, { "epoch": 2.3415247678018574, "grad_norm": 0.057283006608486176, "learning_rate": 8.859213428399901e-05, "loss": 0.0074, "step": 12097 }, { "epoch": 2.34171826625387, "grad_norm": 0.09794566035270691, "learning_rate": 8.859030851051223e-05, "loss": 0.0075, "step": 12098 }, { "epoch": 2.3419117647058822, "grad_norm": 0.04827797785401344, "learning_rate": 8.858848261214498e-05, "loss": 0.0068, "step": 12099 }, { "epoch": 2.3421052631578947, "grad_norm": 0.09466015547513962, "learning_rate": 8.858665658890401e-05, "loss": 0.0082, "step": 12100 }, { "epoch": 2.342298761609907, "grad_norm": 0.054270919412374496, "learning_rate": 8.858483044079613e-05, "loss": 0.0079, "step": 12101 }, { "epoch": 2.3424922600619196, "grad_norm": 0.08077429234981537, "learning_rate": 8.858300416782813e-05, "loss": 0.0073, "step": 12102 }, { "epoch": 2.342685758513932, "grad_norm": 0.066738560795784, "learning_rate": 8.85811777700068e-05, "loss": 0.0069, "step": 12103 }, { "epoch": 2.3428792569659445, "grad_norm": 0.0555586963891983, "learning_rate": 8.857935124733892e-05, "loss": 0.0081, "step": 12104 }, { "epoch": 2.343072755417957, "grad_norm": 0.08109856396913528, "learning_rate": 8.857752459983127e-05, "loss": 0.006, "step": 12105 }, { "epoch": 2.343266253869969, "grad_norm": 0.05081064626574516, "learning_rate": 8.857569782749068e-05, "loss": 0.0057, "step": 12106 }, { "epoch": 2.3434597523219813, "grad_norm": 0.05800464749336243, "learning_rate": 8.85738709303239e-05, "loss": 0.0074, "step": 12107 }, { "epoch": 2.343653250773994, "grad_norm": 0.09656113386154175, "learning_rate": 8.857204390833776e-05, "loss": 0.0064, "step": 12108 }, { "epoch": 2.343846749226006, "grad_norm": 0.04270905628800392, "learning_rate": 8.857021676153903e-05, "loss": 0.0061, "step": 12109 }, { "epoch": 2.3440402476780187, "grad_norm": 0.094060018658638, "learning_rate": 8.85683894899345e-05, "loss": 0.0061, "step": 12110 }, { "epoch": 2.344233746130031, "grad_norm": 0.04046180844306946, "learning_rate": 8.856656209353099e-05, "loss": 0.0081, "step": 12111 }, { "epoch": 2.344427244582043, "grad_norm": 0.09323317557573318, "learning_rate": 8.856473457233522e-05, "loss": 0.0069, "step": 12112 }, { "epoch": 2.3446207430340555, "grad_norm": 0.09177681803703308, "learning_rate": 8.856290692635408e-05, "loss": 0.0068, "step": 12113 }, { "epoch": 2.344814241486068, "grad_norm": 0.07183200120925903, "learning_rate": 8.856107915559433e-05, "loss": 0.007, "step": 12114 }, { "epoch": 2.3450077399380804, "grad_norm": 0.10705707967281342, "learning_rate": 8.855925126006273e-05, "loss": 0.0084, "step": 12115 }, { "epoch": 2.345201238390093, "grad_norm": 0.04395166039466858, "learning_rate": 8.855742323976611e-05, "loss": 0.007, "step": 12116 }, { "epoch": 2.3453947368421053, "grad_norm": 0.11188642680644989, "learning_rate": 8.855559509471127e-05, "loss": 0.0073, "step": 12117 }, { "epoch": 2.3455882352941178, "grad_norm": 0.09726176410913467, "learning_rate": 8.855376682490498e-05, "loss": 0.0079, "step": 12118 }, { "epoch": 2.34578173374613, "grad_norm": 0.06858990341424942, "learning_rate": 8.855193843035407e-05, "loss": 0.0074, "step": 12119 }, { "epoch": 2.3459752321981426, "grad_norm": 0.09029294550418854, "learning_rate": 8.855010991106531e-05, "loss": 0.0076, "step": 12120 }, { "epoch": 2.3461687306501546, "grad_norm": 0.046214908361434937, "learning_rate": 8.854828126704552e-05, "loss": 0.0077, "step": 12121 }, { "epoch": 2.346362229102167, "grad_norm": 0.06752128154039383, "learning_rate": 8.854645249830149e-05, "loss": 0.0066, "step": 12122 }, { "epoch": 2.3465557275541795, "grad_norm": 0.052691925317049026, "learning_rate": 8.854462360483998e-05, "loss": 0.0063, "step": 12123 }, { "epoch": 2.346749226006192, "grad_norm": 0.056399013847112656, "learning_rate": 8.854279458666785e-05, "loss": 0.0074, "step": 12124 }, { "epoch": 2.3469427244582044, "grad_norm": 0.0604473352432251, "learning_rate": 8.854096544379188e-05, "loss": 0.0091, "step": 12125 }, { "epoch": 2.347136222910217, "grad_norm": 0.07286333292722702, "learning_rate": 8.853913617621885e-05, "loss": 0.0063, "step": 12126 }, { "epoch": 2.3473297213622293, "grad_norm": 0.09714728593826294, "learning_rate": 8.853730678395559e-05, "loss": 0.0081, "step": 12127 }, { "epoch": 2.3475232198142413, "grad_norm": 0.09803804755210876, "learning_rate": 8.853547726700886e-05, "loss": 0.0056, "step": 12128 }, { "epoch": 2.3477167182662537, "grad_norm": 0.09703809767961502, "learning_rate": 8.853364762538552e-05, "loss": 0.0063, "step": 12129 }, { "epoch": 2.347910216718266, "grad_norm": 0.07370792329311371, "learning_rate": 8.853181785909234e-05, "loss": 0.0082, "step": 12130 }, { "epoch": 2.3481037151702786, "grad_norm": 0.12598837912082672, "learning_rate": 8.852998796813611e-05, "loss": 0.0072, "step": 12131 }, { "epoch": 2.348297213622291, "grad_norm": 0.06903243064880371, "learning_rate": 8.852815795252365e-05, "loss": 0.0079, "step": 12132 }, { "epoch": 2.3484907120743035, "grad_norm": 0.11865042895078659, "learning_rate": 8.852632781226175e-05, "loss": 0.0066, "step": 12133 }, { "epoch": 2.348684210526316, "grad_norm": 0.08426133543252945, "learning_rate": 8.852449754735725e-05, "loss": 0.0064, "step": 12134 }, { "epoch": 2.3488777089783284, "grad_norm": 0.07816441357135773, "learning_rate": 8.852266715781691e-05, "loss": 0.0087, "step": 12135 }, { "epoch": 2.3490712074303404, "grad_norm": 0.14134398102760315, "learning_rate": 8.852083664364755e-05, "loss": 0.0084, "step": 12136 }, { "epoch": 2.349264705882353, "grad_norm": 0.05846814811229706, "learning_rate": 8.851900600485599e-05, "loss": 0.0086, "step": 12137 }, { "epoch": 2.3494582043343653, "grad_norm": 0.155465766787529, "learning_rate": 8.851717524144903e-05, "loss": 0.0075, "step": 12138 }, { "epoch": 2.3496517027863777, "grad_norm": 0.06096462160348892, "learning_rate": 8.851534435343347e-05, "loss": 0.0072, "step": 12139 }, { "epoch": 2.34984520123839, "grad_norm": 0.13388817012310028, "learning_rate": 8.85135133408161e-05, "loss": 0.0081, "step": 12140 }, { "epoch": 2.3500386996904026, "grad_norm": 0.07360633462667465, "learning_rate": 8.851168220360377e-05, "loss": 0.006, "step": 12141 }, { "epoch": 2.350232198142415, "grad_norm": 0.0767514631152153, "learning_rate": 8.850985094180324e-05, "loss": 0.0081, "step": 12142 }, { "epoch": 2.350425696594427, "grad_norm": 0.10926072299480438, "learning_rate": 8.850801955542136e-05, "loss": 0.0058, "step": 12143 }, { "epoch": 2.3506191950464395, "grad_norm": 0.12573805451393127, "learning_rate": 8.85061880444649e-05, "loss": 0.0078, "step": 12144 }, { "epoch": 2.350812693498452, "grad_norm": 0.09443216770887375, "learning_rate": 8.850435640894071e-05, "loss": 0.0082, "step": 12145 }, { "epoch": 2.3510061919504643, "grad_norm": 0.14286549389362335, "learning_rate": 8.850252464885556e-05, "loss": 0.0066, "step": 12146 }, { "epoch": 2.351199690402477, "grad_norm": 0.04105937108397484, "learning_rate": 8.850069276421628e-05, "loss": 0.009, "step": 12147 }, { "epoch": 2.3513931888544892, "grad_norm": 0.16748374700546265, "learning_rate": 8.84988607550297e-05, "loss": 0.0071, "step": 12148 }, { "epoch": 2.3515866873065017, "grad_norm": 0.08751238137483597, "learning_rate": 8.84970286213026e-05, "loss": 0.007, "step": 12149 }, { "epoch": 2.351780185758514, "grad_norm": 0.13438214361667633, "learning_rate": 8.849519636304179e-05, "loss": 0.0089, "step": 12150 }, { "epoch": 2.3519736842105265, "grad_norm": 0.1069614440202713, "learning_rate": 8.84933639802541e-05, "loss": 0.008, "step": 12151 }, { "epoch": 2.3521671826625385, "grad_norm": 0.07383346557617188, "learning_rate": 8.849153147294634e-05, "loss": 0.0079, "step": 12152 }, { "epoch": 2.352360681114551, "grad_norm": 0.08125007152557373, "learning_rate": 8.848969884112531e-05, "loss": 0.0066, "step": 12153 }, { "epoch": 2.3525541795665634, "grad_norm": 0.0628601610660553, "learning_rate": 8.848786608479784e-05, "loss": 0.0079, "step": 12154 }, { "epoch": 2.352747678018576, "grad_norm": 0.05162057653069496, "learning_rate": 8.848603320397073e-05, "loss": 0.0063, "step": 12155 }, { "epoch": 2.3529411764705883, "grad_norm": 0.05923933535814285, "learning_rate": 8.84842001986508e-05, "loss": 0.006, "step": 12156 }, { "epoch": 2.3531346749226008, "grad_norm": 0.03214014321565628, "learning_rate": 8.848236706884487e-05, "loss": 0.0073, "step": 12157 }, { "epoch": 2.353328173374613, "grad_norm": 0.05148535221815109, "learning_rate": 8.848053381455976e-05, "loss": 0.0063, "step": 12158 }, { "epoch": 2.353521671826625, "grad_norm": 0.04266021400690079, "learning_rate": 8.847870043580226e-05, "loss": 0.0077, "step": 12159 }, { "epoch": 2.3537151702786376, "grad_norm": 0.050240691751241684, "learning_rate": 8.847686693257919e-05, "loss": 0.0068, "step": 12160 }, { "epoch": 2.35390866873065, "grad_norm": 0.03978132829070091, "learning_rate": 8.84750333048974e-05, "loss": 0.0073, "step": 12161 }, { "epoch": 2.3541021671826625, "grad_norm": 0.06661392748355865, "learning_rate": 8.847319955276367e-05, "loss": 0.0066, "step": 12162 }, { "epoch": 2.354295665634675, "grad_norm": 0.04845372959971428, "learning_rate": 8.847136567618482e-05, "loss": 0.0064, "step": 12163 }, { "epoch": 2.3544891640866874, "grad_norm": 0.05855538323521614, "learning_rate": 8.846953167516771e-05, "loss": 0.0073, "step": 12164 }, { "epoch": 2.3546826625387, "grad_norm": 0.07821667194366455, "learning_rate": 8.846769754971912e-05, "loss": 0.0077, "step": 12165 }, { "epoch": 2.3548761609907123, "grad_norm": 0.07257946580648422, "learning_rate": 8.846586329984586e-05, "loss": 0.0077, "step": 12166 }, { "epoch": 2.3550696594427243, "grad_norm": 0.07881052047014236, "learning_rate": 8.846402892555478e-05, "loss": 0.0074, "step": 12167 }, { "epoch": 2.3552631578947367, "grad_norm": 0.10459300130605698, "learning_rate": 8.846219442685269e-05, "loss": 0.0051, "step": 12168 }, { "epoch": 2.355456656346749, "grad_norm": 0.03871067240834236, "learning_rate": 8.84603598037464e-05, "loss": 0.0079, "step": 12169 }, { "epoch": 2.3556501547987616, "grad_norm": 0.08776965737342834, "learning_rate": 8.845852505624273e-05, "loss": 0.007, "step": 12170 }, { "epoch": 2.355843653250774, "grad_norm": 0.04873554781079292, "learning_rate": 8.845669018434854e-05, "loss": 0.0074, "step": 12171 }, { "epoch": 2.3560371517027865, "grad_norm": 0.06841208040714264, "learning_rate": 8.845485518807059e-05, "loss": 0.0065, "step": 12172 }, { "epoch": 2.356230650154799, "grad_norm": 0.05353245884180069, "learning_rate": 8.845302006741572e-05, "loss": 0.0061, "step": 12173 }, { "epoch": 2.356424148606811, "grad_norm": 0.04983757436275482, "learning_rate": 8.84511848223908e-05, "loss": 0.0075, "step": 12174 }, { "epoch": 2.3566176470588234, "grad_norm": 0.07702285796403885, "learning_rate": 8.844934945300258e-05, "loss": 0.0066, "step": 12175 }, { "epoch": 2.356811145510836, "grad_norm": 0.03718947246670723, "learning_rate": 8.844751395925795e-05, "loss": 0.0075, "step": 12176 }, { "epoch": 2.3570046439628483, "grad_norm": 0.06853106617927551, "learning_rate": 8.84456783411637e-05, "loss": 0.0086, "step": 12177 }, { "epoch": 2.3571981424148607, "grad_norm": 0.0514405332505703, "learning_rate": 8.844384259872665e-05, "loss": 0.0059, "step": 12178 }, { "epoch": 2.357391640866873, "grad_norm": 0.06944148242473602, "learning_rate": 8.844200673195364e-05, "loss": 0.008, "step": 12179 }, { "epoch": 2.3575851393188856, "grad_norm": 0.057727113366127014, "learning_rate": 8.84401707408515e-05, "loss": 0.0065, "step": 12180 }, { "epoch": 2.357778637770898, "grad_norm": 0.05925267934799194, "learning_rate": 8.843833462542703e-05, "loss": 0.0068, "step": 12181 }, { "epoch": 2.3579721362229105, "grad_norm": 0.06414493918418884, "learning_rate": 8.843649838568705e-05, "loss": 0.0069, "step": 12182 }, { "epoch": 2.3581656346749225, "grad_norm": 0.06744375079870224, "learning_rate": 8.843466202163845e-05, "loss": 0.0064, "step": 12183 }, { "epoch": 2.358359133126935, "grad_norm": 0.08786401897668839, "learning_rate": 8.843282553328799e-05, "loss": 0.0076, "step": 12184 }, { "epoch": 2.3585526315789473, "grad_norm": 0.05930031090974808, "learning_rate": 8.843098892064254e-05, "loss": 0.008, "step": 12185 }, { "epoch": 2.35874613003096, "grad_norm": 0.08483726531267166, "learning_rate": 8.84291521837089e-05, "loss": 0.0062, "step": 12186 }, { "epoch": 2.3589396284829722, "grad_norm": 0.0494270846247673, "learning_rate": 8.84273153224939e-05, "loss": 0.008, "step": 12187 }, { "epoch": 2.3591331269349847, "grad_norm": 0.06846529990434647, "learning_rate": 8.84254783370044e-05, "loss": 0.007, "step": 12188 }, { "epoch": 2.3593266253869967, "grad_norm": 0.0786777138710022, "learning_rate": 8.84236412272472e-05, "loss": 0.0091, "step": 12189 }, { "epoch": 2.359520123839009, "grad_norm": 0.034934330731630325, "learning_rate": 8.842180399322913e-05, "loss": 0.0072, "step": 12190 }, { "epoch": 2.3597136222910216, "grad_norm": 0.11534964293241501, "learning_rate": 8.841996663495704e-05, "loss": 0.0077, "step": 12191 }, { "epoch": 2.359907120743034, "grad_norm": 0.056253693997859955, "learning_rate": 8.841812915243773e-05, "loss": 0.0079, "step": 12192 }, { "epoch": 2.3601006191950464, "grad_norm": 0.08632271736860275, "learning_rate": 8.841629154567805e-05, "loss": 0.0072, "step": 12193 }, { "epoch": 2.360294117647059, "grad_norm": 0.08832719177007675, "learning_rate": 8.841445381468484e-05, "loss": 0.0082, "step": 12194 }, { "epoch": 2.3604876160990713, "grad_norm": 0.0469341017305851, "learning_rate": 8.841261595946495e-05, "loss": 0.0077, "step": 12195 }, { "epoch": 2.3606811145510838, "grad_norm": 0.12193705141544342, "learning_rate": 8.841077798002516e-05, "loss": 0.0066, "step": 12196 }, { "epoch": 2.360874613003096, "grad_norm": 0.06026066094636917, "learning_rate": 8.840893987637233e-05, "loss": 0.0083, "step": 12197 }, { "epoch": 2.361068111455108, "grad_norm": 0.12170394510030746, "learning_rate": 8.84071016485133e-05, "loss": 0.007, "step": 12198 }, { "epoch": 2.3612616099071206, "grad_norm": 0.13291779160499573, "learning_rate": 8.84052632964549e-05, "loss": 0.0069, "step": 12199 }, { "epoch": 2.361455108359133, "grad_norm": 0.0690777450799942, "learning_rate": 8.840342482020394e-05, "loss": 0.0075, "step": 12200 }, { "epoch": 2.3616486068111455, "grad_norm": 0.21412219107151031, "learning_rate": 8.84015862197673e-05, "loss": 0.0083, "step": 12201 }, { "epoch": 2.361842105263158, "grad_norm": 0.04289031773805618, "learning_rate": 8.83997474951518e-05, "loss": 0.0063, "step": 12202 }, { "epoch": 2.3620356037151704, "grad_norm": 0.17427808046340942, "learning_rate": 8.839790864636426e-05, "loss": 0.0082, "step": 12203 }, { "epoch": 2.362229102167183, "grad_norm": 0.12356719374656677, "learning_rate": 8.83960696734115e-05, "loss": 0.006, "step": 12204 }, { "epoch": 2.362422600619195, "grad_norm": 0.09803543239831924, "learning_rate": 8.839423057630042e-05, "loss": 0.006, "step": 12205 }, { "epoch": 2.3626160990712073, "grad_norm": 0.20070767402648926, "learning_rate": 8.83923913550378e-05, "loss": 0.0061, "step": 12206 }, { "epoch": 2.3628095975232197, "grad_norm": 0.029079465195536613, "learning_rate": 8.839055200963051e-05, "loss": 0.0077, "step": 12207 }, { "epoch": 2.363003095975232, "grad_norm": 0.19482474029064178, "learning_rate": 8.838871254008536e-05, "loss": 0.0073, "step": 12208 }, { "epoch": 2.3631965944272446, "grad_norm": 0.13815440237522125, "learning_rate": 8.838687294640921e-05, "loss": 0.0087, "step": 12209 }, { "epoch": 2.363390092879257, "grad_norm": 0.10611242055892944, "learning_rate": 8.83850332286089e-05, "loss": 0.0064, "step": 12210 }, { "epoch": 2.3635835913312695, "grad_norm": 0.19895826280117035, "learning_rate": 8.838319338669125e-05, "loss": 0.0081, "step": 12211 }, { "epoch": 2.363777089783282, "grad_norm": 0.04105197265744209, "learning_rate": 8.838135342066312e-05, "loss": 0.0069, "step": 12212 }, { "epoch": 2.363970588235294, "grad_norm": 0.1683667004108429, "learning_rate": 8.837951333053134e-05, "loss": 0.0086, "step": 12213 }, { "epoch": 2.3641640866873064, "grad_norm": 0.12331221997737885, "learning_rate": 8.837767311630275e-05, "loss": 0.0086, "step": 12214 }, { "epoch": 2.364357585139319, "grad_norm": 0.07906407117843628, "learning_rate": 8.83758327779842e-05, "loss": 0.0067, "step": 12215 }, { "epoch": 2.3645510835913313, "grad_norm": 0.18574358522891998, "learning_rate": 8.837399231558253e-05, "loss": 0.0081, "step": 12216 }, { "epoch": 2.3647445820433437, "grad_norm": 0.06503335386514664, "learning_rate": 8.83721517291046e-05, "loss": 0.0077, "step": 12217 }, { "epoch": 2.364938080495356, "grad_norm": 0.18635345995426178, "learning_rate": 8.83703110185572e-05, "loss": 0.0077, "step": 12218 }, { "epoch": 2.3651315789473686, "grad_norm": 0.11472516506910324, "learning_rate": 8.836847018394722e-05, "loss": 0.008, "step": 12219 }, { "epoch": 2.3653250773993806, "grad_norm": 0.11895402520895004, "learning_rate": 8.836662922528149e-05, "loss": 0.0086, "step": 12220 }, { "epoch": 2.365518575851393, "grad_norm": 0.1525161862373352, "learning_rate": 8.836478814256684e-05, "loss": 0.0076, "step": 12221 }, { "epoch": 2.3657120743034055, "grad_norm": 0.0769161581993103, "learning_rate": 8.836294693581014e-05, "loss": 0.0081, "step": 12222 }, { "epoch": 2.365905572755418, "grad_norm": 0.13213889300823212, "learning_rate": 8.836110560501824e-05, "loss": 0.0073, "step": 12223 }, { "epoch": 2.3660990712074303, "grad_norm": 0.09319533407688141, "learning_rate": 8.835926415019796e-05, "loss": 0.0088, "step": 12224 }, { "epoch": 2.366292569659443, "grad_norm": 0.11462554335594177, "learning_rate": 8.835742257135614e-05, "loss": 0.0066, "step": 12225 }, { "epoch": 2.3664860681114552, "grad_norm": 0.09950319677591324, "learning_rate": 8.835558086849965e-05, "loss": 0.0072, "step": 12226 }, { "epoch": 2.3666795665634677, "grad_norm": 0.08747698366641998, "learning_rate": 8.835373904163534e-05, "loss": 0.0079, "step": 12227 }, { "epoch": 2.36687306501548, "grad_norm": 0.07427938282489777, "learning_rate": 8.835189709077004e-05, "loss": 0.0091, "step": 12228 }, { "epoch": 2.367066563467492, "grad_norm": 0.08201242238283157, "learning_rate": 8.83500550159106e-05, "loss": 0.0069, "step": 12229 }, { "epoch": 2.3672600619195046, "grad_norm": 0.06140216067433357, "learning_rate": 8.834821281706389e-05, "loss": 0.0058, "step": 12230 }, { "epoch": 2.367453560371517, "grad_norm": 0.054714806377887726, "learning_rate": 8.834637049423673e-05, "loss": 0.0075, "step": 12231 }, { "epoch": 2.3676470588235294, "grad_norm": 0.06216324120759964, "learning_rate": 8.834452804743598e-05, "loss": 0.0073, "step": 12232 }, { "epoch": 2.367840557275542, "grad_norm": 0.04226239398121834, "learning_rate": 8.834268547666848e-05, "loss": 0.0085, "step": 12233 }, { "epoch": 2.3680340557275543, "grad_norm": 0.05138035863637924, "learning_rate": 8.834084278194112e-05, "loss": 0.007, "step": 12234 }, { "epoch": 2.3682275541795663, "grad_norm": 0.06682328134775162, "learning_rate": 8.83389999632607e-05, "loss": 0.006, "step": 12235 }, { "epoch": 2.3684210526315788, "grad_norm": 0.061041396111249924, "learning_rate": 8.833715702063409e-05, "loss": 0.0078, "step": 12236 }, { "epoch": 2.368614551083591, "grad_norm": 0.07481725513935089, "learning_rate": 8.833531395406816e-05, "loss": 0.0079, "step": 12237 }, { "epoch": 2.3688080495356036, "grad_norm": 0.05087690427899361, "learning_rate": 8.833347076356974e-05, "loss": 0.0083, "step": 12238 }, { "epoch": 2.369001547987616, "grad_norm": 0.06720481067895889, "learning_rate": 8.833162744914568e-05, "loss": 0.0076, "step": 12239 }, { "epoch": 2.3691950464396285, "grad_norm": 0.036151912063360214, "learning_rate": 8.832978401080287e-05, "loss": 0.0066, "step": 12240 }, { "epoch": 2.369388544891641, "grad_norm": 0.05681924894452095, "learning_rate": 8.832794044854811e-05, "loss": 0.0084, "step": 12241 }, { "epoch": 2.3695820433436534, "grad_norm": 0.055250249803066254, "learning_rate": 8.832609676238828e-05, "loss": 0.0069, "step": 12242 }, { "epoch": 2.369775541795666, "grad_norm": 0.06616710871458054, "learning_rate": 8.832425295233026e-05, "loss": 0.0057, "step": 12243 }, { "epoch": 2.369969040247678, "grad_norm": 0.04887692257761955, "learning_rate": 8.832240901838085e-05, "loss": 0.0073, "step": 12244 }, { "epoch": 2.3701625386996903, "grad_norm": 0.06214949116110802, "learning_rate": 8.832056496054694e-05, "loss": 0.0065, "step": 12245 }, { "epoch": 2.3703560371517027, "grad_norm": 0.06238996982574463, "learning_rate": 8.831872077883538e-05, "loss": 0.0085, "step": 12246 }, { "epoch": 2.370549535603715, "grad_norm": 0.06326935440301895, "learning_rate": 8.831687647325302e-05, "loss": 0.0076, "step": 12247 }, { "epoch": 2.3707430340557276, "grad_norm": 0.06323402374982834, "learning_rate": 8.831503204380675e-05, "loss": 0.0076, "step": 12248 }, { "epoch": 2.37093653250774, "grad_norm": 0.07728347927331924, "learning_rate": 8.831318749050338e-05, "loss": 0.0074, "step": 12249 }, { "epoch": 2.3711300309597525, "grad_norm": 0.06043677031993866, "learning_rate": 8.83113428133498e-05, "loss": 0.0073, "step": 12250 }, { "epoch": 2.3713235294117645, "grad_norm": 0.08641237765550613, "learning_rate": 8.830949801235283e-05, "loss": 0.0065, "step": 12251 }, { "epoch": 2.371517027863777, "grad_norm": 0.05400272458791733, "learning_rate": 8.830765308751936e-05, "loss": 0.0067, "step": 12252 }, { "epoch": 2.3717105263157894, "grad_norm": 0.06572531163692474, "learning_rate": 8.830580803885626e-05, "loss": 0.0065, "step": 12253 }, { "epoch": 2.371904024767802, "grad_norm": 0.08104090392589569, "learning_rate": 8.830396286637037e-05, "loss": 0.0065, "step": 12254 }, { "epoch": 2.3720975232198143, "grad_norm": 0.048125896602869034, "learning_rate": 8.830211757006854e-05, "loss": 0.0076, "step": 12255 }, { "epoch": 2.3722910216718267, "grad_norm": 0.06541003286838531, "learning_rate": 8.830027214995766e-05, "loss": 0.0065, "step": 12256 }, { "epoch": 2.372484520123839, "grad_norm": 0.0396432988345623, "learning_rate": 8.829842660604454e-05, "loss": 0.0065, "step": 12257 }, { "epoch": 2.3726780185758516, "grad_norm": 0.04623278230428696, "learning_rate": 8.829658093833613e-05, "loss": 0.0066, "step": 12258 }, { "epoch": 2.3728715170278636, "grad_norm": 0.08747962862253189, "learning_rate": 8.82947351468392e-05, "loss": 0.0073, "step": 12259 }, { "epoch": 2.373065015479876, "grad_norm": 0.018889060243964195, "learning_rate": 8.829288923156065e-05, "loss": 0.0056, "step": 12260 }, { "epoch": 2.3732585139318885, "grad_norm": 0.07891292124986649, "learning_rate": 8.829104319250735e-05, "loss": 0.0074, "step": 12261 }, { "epoch": 2.373452012383901, "grad_norm": 0.05743790790438652, "learning_rate": 8.828919702968613e-05, "loss": 0.0068, "step": 12262 }, { "epoch": 2.3736455108359134, "grad_norm": 0.0453944206237793, "learning_rate": 8.828735074310392e-05, "loss": 0.0075, "step": 12263 }, { "epoch": 2.373839009287926, "grad_norm": 0.1085319072008133, "learning_rate": 8.82855043327675e-05, "loss": 0.0071, "step": 12264 }, { "epoch": 2.3740325077399382, "grad_norm": 0.04518385976552963, "learning_rate": 8.82836577986838e-05, "loss": 0.0052, "step": 12265 }, { "epoch": 2.3742260061919502, "grad_norm": 0.12485428899526596, "learning_rate": 8.828181114085967e-05, "loss": 0.0069, "step": 12266 }, { "epoch": 2.3744195046439627, "grad_norm": 0.06198902055621147, "learning_rate": 8.827996435930195e-05, "loss": 0.0081, "step": 12267 }, { "epoch": 2.374613003095975, "grad_norm": 0.12278240919113159, "learning_rate": 8.827811745401751e-05, "loss": 0.0062, "step": 12268 }, { "epoch": 2.3748065015479876, "grad_norm": 0.10875800997018814, "learning_rate": 8.827627042501327e-05, "loss": 0.0076, "step": 12269 }, { "epoch": 2.375, "grad_norm": 0.08351029455661774, "learning_rate": 8.827442327229603e-05, "loss": 0.0056, "step": 12270 }, { "epoch": 2.3751934984520124, "grad_norm": 0.13420000672340393, "learning_rate": 8.827257599587269e-05, "loss": 0.0074, "step": 12271 }, { "epoch": 2.375386996904025, "grad_norm": 0.06070995330810547, "learning_rate": 8.827072859575009e-05, "loss": 0.0074, "step": 12272 }, { "epoch": 2.3755804953560373, "grad_norm": 0.10816431045532227, "learning_rate": 8.826888107193515e-05, "loss": 0.0068, "step": 12273 }, { "epoch": 2.3757739938080498, "grad_norm": 0.0909353569149971, "learning_rate": 8.826703342443469e-05, "loss": 0.0086, "step": 12274 }, { "epoch": 2.3759674922600618, "grad_norm": 0.09166315943002701, "learning_rate": 8.82651856532556e-05, "loss": 0.008, "step": 12275 }, { "epoch": 2.376160990712074, "grad_norm": 0.09342579543590546, "learning_rate": 8.826333775840475e-05, "loss": 0.0066, "step": 12276 }, { "epoch": 2.3763544891640866, "grad_norm": 0.035076212137937546, "learning_rate": 8.8261489739889e-05, "loss": 0.0057, "step": 12277 }, { "epoch": 2.376547987616099, "grad_norm": 0.07622487843036652, "learning_rate": 8.825964159771523e-05, "loss": 0.0068, "step": 12278 }, { "epoch": 2.3767414860681115, "grad_norm": 0.09599179029464722, "learning_rate": 8.825779333189031e-05, "loss": 0.0077, "step": 12279 }, { "epoch": 2.376934984520124, "grad_norm": 0.04871855303645134, "learning_rate": 8.82559449424211e-05, "loss": 0.0081, "step": 12280 }, { "epoch": 2.3771284829721364, "grad_norm": 0.13627798855304718, "learning_rate": 8.82540964293145e-05, "loss": 0.0068, "step": 12281 }, { "epoch": 2.3773219814241484, "grad_norm": 0.052802033722400665, "learning_rate": 8.825224779257736e-05, "loss": 0.0076, "step": 12282 }, { "epoch": 2.377515479876161, "grad_norm": 0.10751596838235855, "learning_rate": 8.825039903221655e-05, "loss": 0.0061, "step": 12283 }, { "epoch": 2.3777089783281733, "grad_norm": 0.09444631636142731, "learning_rate": 8.824855014823894e-05, "loss": 0.0084, "step": 12284 }, { "epoch": 2.3779024767801857, "grad_norm": 0.06327353417873383, "learning_rate": 8.824670114065142e-05, "loss": 0.0085, "step": 12285 }, { "epoch": 2.378095975232198, "grad_norm": 0.13568896055221558, "learning_rate": 8.824485200946087e-05, "loss": 0.0065, "step": 12286 }, { "epoch": 2.3782894736842106, "grad_norm": 0.05607634037733078, "learning_rate": 8.824300275467414e-05, "loss": 0.0077, "step": 12287 }, { "epoch": 2.378482972136223, "grad_norm": 0.11592847853899002, "learning_rate": 8.824115337629811e-05, "loss": 0.0072, "step": 12288 }, { "epoch": 2.3786764705882355, "grad_norm": 0.06244152411818504, "learning_rate": 8.823930387433968e-05, "loss": 0.0067, "step": 12289 }, { "epoch": 2.3788699690402475, "grad_norm": 0.08139632642269135, "learning_rate": 8.82374542488057e-05, "loss": 0.0089, "step": 12290 }, { "epoch": 2.37906346749226, "grad_norm": 0.06580932438373566, "learning_rate": 8.823560449970305e-05, "loss": 0.0071, "step": 12291 }, { "epoch": 2.3792569659442724, "grad_norm": 0.06396045535802841, "learning_rate": 8.823375462703861e-05, "loss": 0.0077, "step": 12292 }, { "epoch": 2.379450464396285, "grad_norm": 0.08256231993436813, "learning_rate": 8.823190463081927e-05, "loss": 0.0062, "step": 12293 }, { "epoch": 2.3796439628482973, "grad_norm": 0.05416454002261162, "learning_rate": 8.82300545110519e-05, "loss": 0.0072, "step": 12294 }, { "epoch": 2.3798374613003097, "grad_norm": 0.08177968859672546, "learning_rate": 8.822820426774337e-05, "loss": 0.007, "step": 12295 }, { "epoch": 2.380030959752322, "grad_norm": 0.06447269022464752, "learning_rate": 8.822635390090055e-05, "loss": 0.007, "step": 12296 }, { "epoch": 2.380224458204334, "grad_norm": 0.06847414374351501, "learning_rate": 8.822450341053036e-05, "loss": 0.0067, "step": 12297 }, { "epoch": 2.3804179566563466, "grad_norm": 0.09637191891670227, "learning_rate": 8.822265279663964e-05, "loss": 0.0073, "step": 12298 }, { "epoch": 2.380611455108359, "grad_norm": 0.07016658037900925, "learning_rate": 8.822080205923528e-05, "loss": 0.0077, "step": 12299 }, { "epoch": 2.3808049535603715, "grad_norm": 0.08634940534830093, "learning_rate": 8.821895119832417e-05, "loss": 0.0068, "step": 12300 }, { "epoch": 2.380998452012384, "grad_norm": 0.07712777704000473, "learning_rate": 8.821710021391318e-05, "loss": 0.0076, "step": 12301 }, { "epoch": 2.3811919504643964, "grad_norm": 0.07876093685626984, "learning_rate": 8.82152491060092e-05, "loss": 0.0081, "step": 12302 }, { "epoch": 2.381385448916409, "grad_norm": 0.05978458747267723, "learning_rate": 8.821339787461909e-05, "loss": 0.0078, "step": 12303 }, { "epoch": 2.3815789473684212, "grad_norm": 0.10528303682804108, "learning_rate": 8.821154651974977e-05, "loss": 0.0082, "step": 12304 }, { "epoch": 2.3817724458204337, "grad_norm": 0.07067156583070755, "learning_rate": 8.82096950414081e-05, "loss": 0.0073, "step": 12305 }, { "epoch": 2.3819659442724457, "grad_norm": 0.10602452605962753, "learning_rate": 8.820784343960096e-05, "loss": 0.0055, "step": 12306 }, { "epoch": 2.382159442724458, "grad_norm": 0.057972051203250885, "learning_rate": 8.820599171433526e-05, "loss": 0.0076, "step": 12307 }, { "epoch": 2.3823529411764706, "grad_norm": 0.050907209515571594, "learning_rate": 8.820413986561784e-05, "loss": 0.0077, "step": 12308 }, { "epoch": 2.382546439628483, "grad_norm": 0.07473590224981308, "learning_rate": 8.820228789345562e-05, "loss": 0.0082, "step": 12309 }, { "epoch": 2.3827399380804954, "grad_norm": 0.02897462248802185, "learning_rate": 8.820043579785549e-05, "loss": 0.0071, "step": 12310 }, { "epoch": 2.382933436532508, "grad_norm": 0.08054758608341217, "learning_rate": 8.81985835788243e-05, "loss": 0.0062, "step": 12311 }, { "epoch": 2.38312693498452, "grad_norm": 0.0500534251332283, "learning_rate": 8.819673123636896e-05, "loss": 0.0078, "step": 12312 }, { "epoch": 2.3833204334365323, "grad_norm": 0.06901863217353821, "learning_rate": 8.819487877049637e-05, "loss": 0.0066, "step": 12313 }, { "epoch": 2.3835139318885448, "grad_norm": 0.0412907674908638, "learning_rate": 8.819302618121338e-05, "loss": 0.0068, "step": 12314 }, { "epoch": 2.383707430340557, "grad_norm": 0.053824931383132935, "learning_rate": 8.81911734685269e-05, "loss": 0.0069, "step": 12315 }, { "epoch": 2.3839009287925697, "grad_norm": 0.07008542865514755, "learning_rate": 8.818932063244382e-05, "loss": 0.0079, "step": 12316 }, { "epoch": 2.384094427244582, "grad_norm": 0.037818003445863724, "learning_rate": 8.818746767297103e-05, "loss": 0.0074, "step": 12317 }, { "epoch": 2.3842879256965945, "grad_norm": 0.06286454945802689, "learning_rate": 8.81856145901154e-05, "loss": 0.0078, "step": 12318 }, { "epoch": 2.384481424148607, "grad_norm": 0.09350397437810898, "learning_rate": 8.818376138388384e-05, "loss": 0.0085, "step": 12319 }, { "epoch": 2.3846749226006194, "grad_norm": 0.05636252090334892, "learning_rate": 8.818190805428323e-05, "loss": 0.007, "step": 12320 }, { "epoch": 2.3848684210526314, "grad_norm": 0.12218731641769409, "learning_rate": 8.818005460132047e-05, "loss": 0.0063, "step": 12321 }, { "epoch": 2.385061919504644, "grad_norm": 0.038995981216430664, "learning_rate": 8.817820102500244e-05, "loss": 0.0076, "step": 12322 }, { "epoch": 2.3852554179566563, "grad_norm": 0.1300221085548401, "learning_rate": 8.817634732533602e-05, "loss": 0.0064, "step": 12323 }, { "epoch": 2.3854489164086687, "grad_norm": 0.0651562288403511, "learning_rate": 8.817449350232813e-05, "loss": 0.0079, "step": 12324 }, { "epoch": 2.385642414860681, "grad_norm": 0.10630830377340317, "learning_rate": 8.817263955598565e-05, "loss": 0.0069, "step": 12325 }, { "epoch": 2.3858359133126936, "grad_norm": 0.0994807779788971, "learning_rate": 8.817078548631545e-05, "loss": 0.0061, "step": 12326 }, { "epoch": 2.386029411764706, "grad_norm": 0.07337962090969086, "learning_rate": 8.816893129332445e-05, "loss": 0.0077, "step": 12327 }, { "epoch": 2.386222910216718, "grad_norm": 0.11512835323810577, "learning_rate": 8.816707697701955e-05, "loss": 0.0063, "step": 12328 }, { "epoch": 2.3864164086687305, "grad_norm": 0.04968941584229469, "learning_rate": 8.816522253740763e-05, "loss": 0.0078, "step": 12329 }, { "epoch": 2.386609907120743, "grad_norm": 0.11416079103946686, "learning_rate": 8.816336797449558e-05, "loss": 0.0058, "step": 12330 }, { "epoch": 2.3868034055727554, "grad_norm": 0.06958616524934769, "learning_rate": 8.816151328829029e-05, "loss": 0.0073, "step": 12331 }, { "epoch": 2.386996904024768, "grad_norm": 0.0913148894906044, "learning_rate": 8.815965847879866e-05, "loss": 0.0069, "step": 12332 }, { "epoch": 2.3871904024767803, "grad_norm": 0.06453444063663483, "learning_rate": 8.815780354602761e-05, "loss": 0.0081, "step": 12333 }, { "epoch": 2.3873839009287927, "grad_norm": 0.07402622699737549, "learning_rate": 8.815594848998401e-05, "loss": 0.0066, "step": 12334 }, { "epoch": 2.387577399380805, "grad_norm": 0.09603775292634964, "learning_rate": 8.815409331067476e-05, "loss": 0.0065, "step": 12335 }, { "epoch": 2.387770897832817, "grad_norm": 0.09350483119487762, "learning_rate": 8.815223800810677e-05, "loss": 0.007, "step": 12336 }, { "epoch": 2.3879643962848296, "grad_norm": 0.10365781933069229, "learning_rate": 8.815038258228691e-05, "loss": 0.0069, "step": 12337 }, { "epoch": 2.388157894736842, "grad_norm": 0.06047738343477249, "learning_rate": 8.81485270332221e-05, "loss": 0.007, "step": 12338 }, { "epoch": 2.3883513931888545, "grad_norm": 0.12061583250761032, "learning_rate": 8.814667136091926e-05, "loss": 0.0056, "step": 12339 }, { "epoch": 2.388544891640867, "grad_norm": 0.02685730531811714, "learning_rate": 8.814481556538525e-05, "loss": 0.0066, "step": 12340 }, { "epoch": 2.3887383900928794, "grad_norm": 0.12305781245231628, "learning_rate": 8.814295964662697e-05, "loss": 0.0073, "step": 12341 }, { "epoch": 2.388931888544892, "grad_norm": 0.07699806988239288, "learning_rate": 8.814110360465133e-05, "loss": 0.0062, "step": 12342 }, { "epoch": 2.389125386996904, "grad_norm": 0.060614004731178284, "learning_rate": 8.813924743946524e-05, "loss": 0.0068, "step": 12343 }, { "epoch": 2.3893188854489162, "grad_norm": 0.11637945473194122, "learning_rate": 8.81373911510756e-05, "loss": 0.0085, "step": 12344 }, { "epoch": 2.3895123839009287, "grad_norm": 0.03232314810156822, "learning_rate": 8.81355347394893e-05, "loss": 0.0084, "step": 12345 }, { "epoch": 2.389705882352941, "grad_norm": 0.06646016240119934, "learning_rate": 8.813367820471325e-05, "loss": 0.0068, "step": 12346 }, { "epoch": 2.3898993808049536, "grad_norm": 0.07909586280584335, "learning_rate": 8.813182154675436e-05, "loss": 0.0079, "step": 12347 }, { "epoch": 2.390092879256966, "grad_norm": 0.061282265931367874, "learning_rate": 8.812996476561951e-05, "loss": 0.0079, "step": 12348 }, { "epoch": 2.3902863777089784, "grad_norm": 0.08037558197975159, "learning_rate": 8.81281078613156e-05, "loss": 0.0065, "step": 12349 }, { "epoch": 2.390479876160991, "grad_norm": 0.03610825166106224, "learning_rate": 8.812625083384957e-05, "loss": 0.0075, "step": 12350 }, { "epoch": 2.3906733746130033, "grad_norm": 0.0727684274315834, "learning_rate": 8.81243936832283e-05, "loss": 0.0057, "step": 12351 }, { "epoch": 2.3908668730650153, "grad_norm": 0.049919161945581436, "learning_rate": 8.812253640945867e-05, "loss": 0.0066, "step": 12352 }, { "epoch": 2.3910603715170278, "grad_norm": 0.0413241907954216, "learning_rate": 8.812067901254764e-05, "loss": 0.0068, "step": 12353 }, { "epoch": 2.39125386996904, "grad_norm": 0.1412692815065384, "learning_rate": 8.811882149250207e-05, "loss": 0.0074, "step": 12354 }, { "epoch": 2.3914473684210527, "grad_norm": 0.05643729120492935, "learning_rate": 8.811696384932889e-05, "loss": 0.0075, "step": 12355 }, { "epoch": 2.391640866873065, "grad_norm": 0.13195617496967316, "learning_rate": 8.8115106083035e-05, "loss": 0.0066, "step": 12356 }, { "epoch": 2.3918343653250775, "grad_norm": 0.08564246445894241, "learning_rate": 8.81132481936273e-05, "loss": 0.0057, "step": 12357 }, { "epoch": 2.3920278637770895, "grad_norm": 0.04841529205441475, "learning_rate": 8.81113901811127e-05, "loss": 0.0067, "step": 12358 }, { "epoch": 2.392221362229102, "grad_norm": 0.20492614805698395, "learning_rate": 8.810953204549812e-05, "loss": 0.0071, "step": 12359 }, { "epoch": 2.3924148606811144, "grad_norm": 0.06396416574716568, "learning_rate": 8.810767378679044e-05, "loss": 0.0072, "step": 12360 }, { "epoch": 2.392608359133127, "grad_norm": 0.1290612816810608, "learning_rate": 8.810581540499661e-05, "loss": 0.0053, "step": 12361 }, { "epoch": 2.3928018575851393, "grad_norm": 0.12059379369020462, "learning_rate": 8.81039569001235e-05, "loss": 0.0072, "step": 12362 }, { "epoch": 2.3929953560371517, "grad_norm": 0.08482629805803299, "learning_rate": 8.810209827217805e-05, "loss": 0.0083, "step": 12363 }, { "epoch": 2.393188854489164, "grad_norm": 0.10977520048618317, "learning_rate": 8.810023952116716e-05, "loss": 0.0079, "step": 12364 }, { "epoch": 2.3933823529411766, "grad_norm": 0.045660536736249924, "learning_rate": 8.809838064709771e-05, "loss": 0.0065, "step": 12365 }, { "epoch": 2.393575851393189, "grad_norm": 0.07681754976511002, "learning_rate": 8.809652164997665e-05, "loss": 0.006, "step": 12366 }, { "epoch": 2.393769349845201, "grad_norm": 0.08800586313009262, "learning_rate": 8.809466252981088e-05, "loss": 0.0065, "step": 12367 }, { "epoch": 2.3939628482972135, "grad_norm": 0.031241996213793755, "learning_rate": 8.809280328660731e-05, "loss": 0.0069, "step": 12368 }, { "epoch": 2.394156346749226, "grad_norm": 0.08773436397314072, "learning_rate": 8.809094392037283e-05, "loss": 0.0066, "step": 12369 }, { "epoch": 2.3943498452012384, "grad_norm": 0.0471884161233902, "learning_rate": 8.80890844311144e-05, "loss": 0.0076, "step": 12370 }, { "epoch": 2.394543343653251, "grad_norm": 0.06859033554792404, "learning_rate": 8.80872248188389e-05, "loss": 0.0067, "step": 12371 }, { "epoch": 2.3947368421052633, "grad_norm": 0.03982043266296387, "learning_rate": 8.808536508355325e-05, "loss": 0.0076, "step": 12372 }, { "epoch": 2.3949303405572757, "grad_norm": 0.06936199963092804, "learning_rate": 8.808350522526438e-05, "loss": 0.0079, "step": 12373 }, { "epoch": 2.3951238390092877, "grad_norm": 0.04441891238093376, "learning_rate": 8.808164524397916e-05, "loss": 0.007, "step": 12374 }, { "epoch": 2.3953173374613, "grad_norm": 0.0661642923951149, "learning_rate": 8.807978513970455e-05, "loss": 0.0082, "step": 12375 }, { "epoch": 2.3955108359133126, "grad_norm": 0.06305291503667831, "learning_rate": 8.807792491244746e-05, "loss": 0.0075, "step": 12376 }, { "epoch": 2.395704334365325, "grad_norm": 0.047537270933389664, "learning_rate": 8.807606456221477e-05, "loss": 0.0068, "step": 12377 }, { "epoch": 2.3958978328173375, "grad_norm": 0.06492634117603302, "learning_rate": 8.807420408901344e-05, "loss": 0.0068, "step": 12378 }, { "epoch": 2.39609133126935, "grad_norm": 0.02748951129615307, "learning_rate": 8.807234349285037e-05, "loss": 0.0058, "step": 12379 }, { "epoch": 2.3962848297213624, "grad_norm": 0.06358236819505692, "learning_rate": 8.807048277373248e-05, "loss": 0.007, "step": 12380 }, { "epoch": 2.396478328173375, "grad_norm": 0.06442726403474808, "learning_rate": 8.806862193166667e-05, "loss": 0.0086, "step": 12381 }, { "epoch": 2.396671826625387, "grad_norm": 0.08657373487949371, "learning_rate": 8.806676096665989e-05, "loss": 0.0066, "step": 12382 }, { "epoch": 2.3968653250773992, "grad_norm": 0.048973485827445984, "learning_rate": 8.806489987871901e-05, "loss": 0.0065, "step": 12383 }, { "epoch": 2.3970588235294117, "grad_norm": 0.07690861821174622, "learning_rate": 8.8063038667851e-05, "loss": 0.0054, "step": 12384 }, { "epoch": 2.397252321981424, "grad_norm": 0.04130677133798599, "learning_rate": 8.806117733406277e-05, "loss": 0.0075, "step": 12385 }, { "epoch": 2.3974458204334366, "grad_norm": 0.05734534561634064, "learning_rate": 8.805931587736122e-05, "loss": 0.0087, "step": 12386 }, { "epoch": 2.397639318885449, "grad_norm": 0.04449719190597534, "learning_rate": 8.805745429775327e-05, "loss": 0.008, "step": 12387 }, { "epoch": 2.3978328173374615, "grad_norm": 0.056097421795129776, "learning_rate": 8.805559259524585e-05, "loss": 0.0086, "step": 12388 }, { "epoch": 2.3980263157894735, "grad_norm": 0.06958400458097458, "learning_rate": 8.805373076984589e-05, "loss": 0.007, "step": 12389 }, { "epoch": 2.398219814241486, "grad_norm": 0.06283947825431824, "learning_rate": 8.80518688215603e-05, "loss": 0.0083, "step": 12390 }, { "epoch": 2.3984133126934983, "grad_norm": 0.07771102339029312, "learning_rate": 8.805000675039602e-05, "loss": 0.0074, "step": 12391 }, { "epoch": 2.3986068111455108, "grad_norm": 0.06208766996860504, "learning_rate": 8.804814455635995e-05, "loss": 0.0084, "step": 12392 }, { "epoch": 2.398800309597523, "grad_norm": 0.09814832359552383, "learning_rate": 8.804628223945901e-05, "loss": 0.0075, "step": 12393 }, { "epoch": 2.3989938080495357, "grad_norm": 0.08932669460773468, "learning_rate": 8.804441979970014e-05, "loss": 0.0082, "step": 12394 }, { "epoch": 2.399187306501548, "grad_norm": 0.07751605659723282, "learning_rate": 8.804255723709026e-05, "loss": 0.0058, "step": 12395 }, { "epoch": 2.3993808049535605, "grad_norm": 0.11102118343114853, "learning_rate": 8.80406945516363e-05, "loss": 0.0066, "step": 12396 }, { "epoch": 2.399574303405573, "grad_norm": 0.06735134869813919, "learning_rate": 8.803883174334518e-05, "loss": 0.0091, "step": 12397 }, { "epoch": 2.399767801857585, "grad_norm": 0.1220562532544136, "learning_rate": 8.803696881222383e-05, "loss": 0.0073, "step": 12398 }, { "epoch": 2.3999613003095974, "grad_norm": 0.08989403396844864, "learning_rate": 8.803510575827915e-05, "loss": 0.0071, "step": 12399 }, { "epoch": 2.40015479876161, "grad_norm": 0.08973491191864014, "learning_rate": 8.80332425815181e-05, "loss": 0.0075, "step": 12400 }, { "epoch": 2.4003482972136223, "grad_norm": 0.11245621740818024, "learning_rate": 8.80313792819476e-05, "loss": 0.0061, "step": 12401 }, { "epoch": 2.4005417956656347, "grad_norm": 0.0800066664814949, "learning_rate": 8.802951585957455e-05, "loss": 0.0068, "step": 12402 }, { "epoch": 2.400735294117647, "grad_norm": 0.10042498260736465, "learning_rate": 8.802765231440592e-05, "loss": 0.0084, "step": 12403 }, { "epoch": 2.4009287925696596, "grad_norm": 0.05852516368031502, "learning_rate": 8.80257886464486e-05, "loss": 0.0064, "step": 12404 }, { "epoch": 2.4011222910216716, "grad_norm": 0.10169674456119537, "learning_rate": 8.802392485570955e-05, "loss": 0.0071, "step": 12405 }, { "epoch": 2.401315789473684, "grad_norm": 0.06544376909732819, "learning_rate": 8.802206094219569e-05, "loss": 0.0075, "step": 12406 }, { "epoch": 2.4015092879256965, "grad_norm": 0.049766939133405685, "learning_rate": 8.802019690591392e-05, "loss": 0.0072, "step": 12407 }, { "epoch": 2.401702786377709, "grad_norm": 0.07441861927509308, "learning_rate": 8.801833274687122e-05, "loss": 0.0084, "step": 12408 }, { "epoch": 2.4018962848297214, "grad_norm": 0.03133264556527138, "learning_rate": 8.801646846507446e-05, "loss": 0.0073, "step": 12409 }, { "epoch": 2.402089783281734, "grad_norm": 0.07620552182197571, "learning_rate": 8.801460406053064e-05, "loss": 0.0068, "step": 12410 }, { "epoch": 2.4022832817337463, "grad_norm": 0.03872702643275261, "learning_rate": 8.801273953324665e-05, "loss": 0.007, "step": 12411 }, { "epoch": 2.4024767801857587, "grad_norm": 0.07029169052839279, "learning_rate": 8.801087488322941e-05, "loss": 0.0068, "step": 12412 }, { "epoch": 2.4026702786377707, "grad_norm": 0.0382515974342823, "learning_rate": 8.800901011048588e-05, "loss": 0.0068, "step": 12413 }, { "epoch": 2.402863777089783, "grad_norm": 0.06607620418071747, "learning_rate": 8.8007145215023e-05, "loss": 0.0069, "step": 12414 }, { "epoch": 2.4030572755417956, "grad_norm": 0.04354467615485191, "learning_rate": 8.800528019684768e-05, "loss": 0.0061, "step": 12415 }, { "epoch": 2.403250773993808, "grad_norm": 0.05460481345653534, "learning_rate": 8.800341505596687e-05, "loss": 0.0064, "step": 12416 }, { "epoch": 2.4034442724458205, "grad_norm": 0.05553014576435089, "learning_rate": 8.800154979238747e-05, "loss": 0.0081, "step": 12417 }, { "epoch": 2.403637770897833, "grad_norm": 0.04403351619839668, "learning_rate": 8.799968440611644e-05, "loss": 0.0083, "step": 12418 }, { "epoch": 2.4038312693498454, "grad_norm": 0.06422799080610275, "learning_rate": 8.799781889716074e-05, "loss": 0.0074, "step": 12419 }, { "epoch": 2.4040247678018574, "grad_norm": 0.03414693847298622, "learning_rate": 8.799595326552727e-05, "loss": 0.0071, "step": 12420 }, { "epoch": 2.40421826625387, "grad_norm": 0.07793442904949188, "learning_rate": 8.799408751122298e-05, "loss": 0.0075, "step": 12421 }, { "epoch": 2.4044117647058822, "grad_norm": 0.0671706274151802, "learning_rate": 8.799222163425478e-05, "loss": 0.0084, "step": 12422 }, { "epoch": 2.4046052631578947, "grad_norm": 0.06150360405445099, "learning_rate": 8.799035563462964e-05, "loss": 0.008, "step": 12423 }, { "epoch": 2.404798761609907, "grad_norm": 0.07171522825956345, "learning_rate": 8.79884895123545e-05, "loss": 0.0067, "step": 12424 }, { "epoch": 2.4049922600619196, "grad_norm": 0.06401397287845612, "learning_rate": 8.798662326743627e-05, "loss": 0.0064, "step": 12425 }, { "epoch": 2.405185758513932, "grad_norm": 0.07224816828966141, "learning_rate": 8.798475689988191e-05, "loss": 0.0073, "step": 12426 }, { "epoch": 2.4053792569659445, "grad_norm": 0.06679642200469971, "learning_rate": 8.798289040969836e-05, "loss": 0.0057, "step": 12427 }, { "epoch": 2.405572755417957, "grad_norm": 0.051542676985263824, "learning_rate": 8.798102379689254e-05, "loss": 0.0059, "step": 12428 }, { "epoch": 2.405766253869969, "grad_norm": 0.07373319566249847, "learning_rate": 8.797915706147139e-05, "loss": 0.0069, "step": 12429 }, { "epoch": 2.4059597523219813, "grad_norm": 0.05849955603480339, "learning_rate": 8.797729020344189e-05, "loss": 0.0063, "step": 12430 }, { "epoch": 2.406153250773994, "grad_norm": 0.06836937367916107, "learning_rate": 8.797542322281092e-05, "loss": 0.007, "step": 12431 }, { "epoch": 2.406346749226006, "grad_norm": 0.057330213487148285, "learning_rate": 8.797355611958547e-05, "loss": 0.0067, "step": 12432 }, { "epoch": 2.4065402476780187, "grad_norm": 0.05619954317808151, "learning_rate": 8.797168889377246e-05, "loss": 0.0085, "step": 12433 }, { "epoch": 2.406733746130031, "grad_norm": 0.05555460602045059, "learning_rate": 8.796982154537883e-05, "loss": 0.0067, "step": 12434 }, { "epoch": 2.406927244582043, "grad_norm": 0.03490548953413963, "learning_rate": 8.796795407441154e-05, "loss": 0.0071, "step": 12435 }, { "epoch": 2.4071207430340555, "grad_norm": 0.05874503031373024, "learning_rate": 8.79660864808775e-05, "loss": 0.0072, "step": 12436 }, { "epoch": 2.407314241486068, "grad_norm": 0.037762537598609924, "learning_rate": 8.796421876478369e-05, "loss": 0.0076, "step": 12437 }, { "epoch": 2.4075077399380804, "grad_norm": 0.04460678622126579, "learning_rate": 8.796235092613702e-05, "loss": 0.007, "step": 12438 }, { "epoch": 2.407701238390093, "grad_norm": 0.035603880882263184, "learning_rate": 8.796048296494446e-05, "loss": 0.0072, "step": 12439 }, { "epoch": 2.4078947368421053, "grad_norm": 0.060924824327230453, "learning_rate": 8.795861488121295e-05, "loss": 0.0072, "step": 12440 }, { "epoch": 2.4080882352941178, "grad_norm": 0.03189678117632866, "learning_rate": 8.795674667494946e-05, "loss": 0.0067, "step": 12441 }, { "epoch": 2.40828173374613, "grad_norm": 0.05580885708332062, "learning_rate": 8.795487834616085e-05, "loss": 0.0061, "step": 12442 }, { "epoch": 2.4084752321981426, "grad_norm": 0.02420174703001976, "learning_rate": 8.795300989485416e-05, "loss": 0.0063, "step": 12443 }, { "epoch": 2.4086687306501546, "grad_norm": 0.061308860778808594, "learning_rate": 8.795114132103628e-05, "loss": 0.007, "step": 12444 }, { "epoch": 2.408862229102167, "grad_norm": 0.03384145349264145, "learning_rate": 8.794927262471418e-05, "loss": 0.0076, "step": 12445 }, { "epoch": 2.4090557275541795, "grad_norm": 0.0509631521999836, "learning_rate": 8.794740380589483e-05, "loss": 0.0074, "step": 12446 }, { "epoch": 2.409249226006192, "grad_norm": 0.057396918535232544, "learning_rate": 8.794553486458512e-05, "loss": 0.0059, "step": 12447 }, { "epoch": 2.4094427244582044, "grad_norm": 0.04521980136632919, "learning_rate": 8.794366580079203e-05, "loss": 0.0055, "step": 12448 }, { "epoch": 2.409636222910217, "grad_norm": 0.04610958695411682, "learning_rate": 8.794179661452252e-05, "loss": 0.0057, "step": 12449 }, { "epoch": 2.4098297213622293, "grad_norm": 0.05265658721327782, "learning_rate": 8.793992730578354e-05, "loss": 0.0068, "step": 12450 }, { "epoch": 2.4100232198142413, "grad_norm": 0.05137886479496956, "learning_rate": 8.7938057874582e-05, "loss": 0.0069, "step": 12451 }, { "epoch": 2.4102167182662537, "grad_norm": 0.08019188791513443, "learning_rate": 8.793618832092489e-05, "loss": 0.0077, "step": 12452 }, { "epoch": 2.410410216718266, "grad_norm": 0.050119951367378235, "learning_rate": 8.793431864481912e-05, "loss": 0.0069, "step": 12453 }, { "epoch": 2.4106037151702786, "grad_norm": 0.12478458881378174, "learning_rate": 8.79324488462717e-05, "loss": 0.0061, "step": 12454 }, { "epoch": 2.410797213622291, "grad_norm": 0.05136432871222496, "learning_rate": 8.793057892528955e-05, "loss": 0.0064, "step": 12455 }, { "epoch": 2.4109907120743035, "grad_norm": 0.10556064546108246, "learning_rate": 8.79287088818796e-05, "loss": 0.0072, "step": 12456 }, { "epoch": 2.411184210526316, "grad_norm": 0.09624336659908295, "learning_rate": 8.792683871604885e-05, "loss": 0.0065, "step": 12457 }, { "epoch": 2.4113777089783284, "grad_norm": 0.061688877642154694, "learning_rate": 8.792496842780422e-05, "loss": 0.0079, "step": 12458 }, { "epoch": 2.4115712074303404, "grad_norm": 0.10113586485385895, "learning_rate": 8.792309801715265e-05, "loss": 0.007, "step": 12459 }, { "epoch": 2.411764705882353, "grad_norm": 0.047784384340047836, "learning_rate": 8.792122748410114e-05, "loss": 0.0081, "step": 12460 }, { "epoch": 2.4119582043343653, "grad_norm": 0.08424422889947891, "learning_rate": 8.791935682865661e-05, "loss": 0.0088, "step": 12461 }, { "epoch": 2.4121517027863777, "grad_norm": 0.06878138333559036, "learning_rate": 8.791748605082603e-05, "loss": 0.0084, "step": 12462 }, { "epoch": 2.41234520123839, "grad_norm": 0.12443887442350388, "learning_rate": 8.791561515061633e-05, "loss": 0.0076, "step": 12463 }, { "epoch": 2.4125386996904026, "grad_norm": 0.05694425851106644, "learning_rate": 8.79137441280345e-05, "loss": 0.0072, "step": 12464 }, { "epoch": 2.412732198142415, "grad_norm": 0.13650865852832794, "learning_rate": 8.791187298308746e-05, "loss": 0.0088, "step": 12465 }, { "epoch": 2.412925696594427, "grad_norm": 0.07717198878526688, "learning_rate": 8.79100017157822e-05, "loss": 0.0079, "step": 12466 }, { "epoch": 2.4131191950464395, "grad_norm": 0.10252586752176285, "learning_rate": 8.790813032612566e-05, "loss": 0.0073, "step": 12467 }, { "epoch": 2.413312693498452, "grad_norm": 0.13184507191181183, "learning_rate": 8.79062588141248e-05, "loss": 0.0075, "step": 12468 }, { "epoch": 2.4135061919504643, "grad_norm": 0.12105913460254669, "learning_rate": 8.790438717978658e-05, "loss": 0.008, "step": 12469 }, { "epoch": 2.413699690402477, "grad_norm": 0.09937749803066254, "learning_rate": 8.790251542311796e-05, "loss": 0.0065, "step": 12470 }, { "epoch": 2.4138931888544892, "grad_norm": 0.09213843196630478, "learning_rate": 8.790064354412589e-05, "loss": 0.0075, "step": 12471 }, { "epoch": 2.4140866873065017, "grad_norm": 0.10677170008420944, "learning_rate": 8.789877154281734e-05, "loss": 0.0069, "step": 12472 }, { "epoch": 2.414280185758514, "grad_norm": 0.07335101068019867, "learning_rate": 8.789689941919925e-05, "loss": 0.0067, "step": 12473 }, { "epoch": 2.4144736842105265, "grad_norm": 0.10801543295383453, "learning_rate": 8.789502717327861e-05, "loss": 0.0069, "step": 12474 }, { "epoch": 2.4146671826625385, "grad_norm": 0.04444852098822594, "learning_rate": 8.789315480506234e-05, "loss": 0.0068, "step": 12475 }, { "epoch": 2.414860681114551, "grad_norm": 0.11294344067573547, "learning_rate": 8.789128231455745e-05, "loss": 0.0062, "step": 12476 }, { "epoch": 2.4150541795665634, "grad_norm": 0.08640315383672714, "learning_rate": 8.788940970177087e-05, "loss": 0.0088, "step": 12477 }, { "epoch": 2.415247678018576, "grad_norm": 0.10103553533554077, "learning_rate": 8.788753696670959e-05, "loss": 0.0088, "step": 12478 }, { "epoch": 2.4154411764705883, "grad_norm": 0.09982174634933472, "learning_rate": 8.788566410938051e-05, "loss": 0.0105, "step": 12479 }, { "epoch": 2.4156346749226008, "grad_norm": 0.05383070930838585, "learning_rate": 8.788379112979066e-05, "loss": 0.0058, "step": 12480 }, { "epoch": 2.415828173374613, "grad_norm": 0.11364903301000595, "learning_rate": 8.788191802794696e-05, "loss": 0.0094, "step": 12481 }, { "epoch": 2.416021671826625, "grad_norm": 0.06322026997804642, "learning_rate": 8.78800448038564e-05, "loss": 0.0064, "step": 12482 }, { "epoch": 2.4162151702786376, "grad_norm": 0.10699562728404999, "learning_rate": 8.787817145752595e-05, "loss": 0.0074, "step": 12483 }, { "epoch": 2.41640866873065, "grad_norm": 0.06562285125255585, "learning_rate": 8.787629798896254e-05, "loss": 0.0054, "step": 12484 }, { "epoch": 2.4166021671826625, "grad_norm": 0.09474682807922363, "learning_rate": 8.787442439817316e-05, "loss": 0.008, "step": 12485 }, { "epoch": 2.416795665634675, "grad_norm": 0.06683559715747833, "learning_rate": 8.787255068516477e-05, "loss": 0.0087, "step": 12486 }, { "epoch": 2.4169891640866874, "grad_norm": 0.075080506503582, "learning_rate": 8.787067684994435e-05, "loss": 0.0056, "step": 12487 }, { "epoch": 2.4171826625387, "grad_norm": 0.10189096629619598, "learning_rate": 8.786880289251885e-05, "loss": 0.0067, "step": 12488 }, { "epoch": 2.4173761609907123, "grad_norm": 0.09835579991340637, "learning_rate": 8.786692881289522e-05, "loss": 0.0067, "step": 12489 }, { "epoch": 2.4175696594427243, "grad_norm": 0.11471941322088242, "learning_rate": 8.786505461108046e-05, "loss": 0.0062, "step": 12490 }, { "epoch": 2.4177631578947367, "grad_norm": 0.0963226929306984, "learning_rate": 8.786318028708154e-05, "loss": 0.0072, "step": 12491 }, { "epoch": 2.417956656346749, "grad_norm": 0.12374009937047958, "learning_rate": 8.786130584090538e-05, "loss": 0.0065, "step": 12492 }, { "epoch": 2.4181501547987616, "grad_norm": 0.0703846737742424, "learning_rate": 8.7859431272559e-05, "loss": 0.0062, "step": 12493 }, { "epoch": 2.418343653250774, "grad_norm": 0.11794494837522507, "learning_rate": 8.785755658204937e-05, "loss": 0.0077, "step": 12494 }, { "epoch": 2.4185371517027865, "grad_norm": 0.0860137864947319, "learning_rate": 8.78556817693834e-05, "loss": 0.0066, "step": 12495 }, { "epoch": 2.418730650154799, "grad_norm": 0.09121827781200409, "learning_rate": 8.785380683456814e-05, "loss": 0.0079, "step": 12496 }, { "epoch": 2.418924148606811, "grad_norm": 0.10153733193874359, "learning_rate": 8.78519317776105e-05, "loss": 0.0069, "step": 12497 }, { "epoch": 2.4191176470588234, "grad_norm": 0.06535166501998901, "learning_rate": 8.785005659851747e-05, "loss": 0.0072, "step": 12498 }, { "epoch": 2.419311145510836, "grad_norm": 0.07475941628217697, "learning_rate": 8.784818129729604e-05, "loss": 0.0059, "step": 12499 }, { "epoch": 2.4195046439628483, "grad_norm": 0.07322774082422256, "learning_rate": 8.784630587395314e-05, "loss": 0.0078, "step": 12500 }, { "epoch": 2.4196981424148607, "grad_norm": 0.06536946445703506, "learning_rate": 8.78444303284958e-05, "loss": 0.0061, "step": 12501 }, { "epoch": 2.419891640866873, "grad_norm": 0.07658199220895767, "learning_rate": 8.784255466093094e-05, "loss": 0.0059, "step": 12502 }, { "epoch": 2.4200851393188856, "grad_norm": 0.07910432666540146, "learning_rate": 8.784067887126557e-05, "loss": 0.0065, "step": 12503 }, { "epoch": 2.420278637770898, "grad_norm": 0.05813656374812126, "learning_rate": 8.783880295950663e-05, "loss": 0.0067, "step": 12504 }, { "epoch": 2.4204721362229105, "grad_norm": 0.06497996300458908, "learning_rate": 8.783692692566112e-05, "loss": 0.0068, "step": 12505 }, { "epoch": 2.4206656346749225, "grad_norm": 0.056837327778339386, "learning_rate": 8.783505076973602e-05, "loss": 0.0064, "step": 12506 }, { "epoch": 2.420859133126935, "grad_norm": 0.04987050220370293, "learning_rate": 8.783317449173828e-05, "loss": 0.0082, "step": 12507 }, { "epoch": 2.4210526315789473, "grad_norm": 0.0775705873966217, "learning_rate": 8.783129809167489e-05, "loss": 0.0061, "step": 12508 }, { "epoch": 2.42124613003096, "grad_norm": 0.04881509765982628, "learning_rate": 8.782942156955281e-05, "loss": 0.0066, "step": 12509 }, { "epoch": 2.4214396284829722, "grad_norm": 0.06642332673072815, "learning_rate": 8.782754492537906e-05, "loss": 0.0069, "step": 12510 }, { "epoch": 2.4216331269349847, "grad_norm": 0.03829794377088547, "learning_rate": 8.782566815916056e-05, "loss": 0.0063, "step": 12511 }, { "epoch": 2.4218266253869967, "grad_norm": 0.05141960829496384, "learning_rate": 8.782379127090432e-05, "loss": 0.0069, "step": 12512 }, { "epoch": 2.422020123839009, "grad_norm": 0.053390294313430786, "learning_rate": 8.782191426061731e-05, "loss": 0.0065, "step": 12513 }, { "epoch": 2.4222136222910216, "grad_norm": 0.05073653161525726, "learning_rate": 8.782003712830652e-05, "loss": 0.0065, "step": 12514 }, { "epoch": 2.422407120743034, "grad_norm": 0.034051671624183655, "learning_rate": 8.781815987397892e-05, "loss": 0.0067, "step": 12515 }, { "epoch": 2.4226006191950464, "grad_norm": 0.06776779890060425, "learning_rate": 8.781628249764148e-05, "loss": 0.0068, "step": 12516 }, { "epoch": 2.422794117647059, "grad_norm": 0.04930603876709938, "learning_rate": 8.781440499930118e-05, "loss": 0.0064, "step": 12517 }, { "epoch": 2.4229876160990713, "grad_norm": 0.0714590921998024, "learning_rate": 8.781252737896502e-05, "loss": 0.0074, "step": 12518 }, { "epoch": 2.4231811145510838, "grad_norm": 0.056049734354019165, "learning_rate": 8.781064963663996e-05, "loss": 0.0076, "step": 12519 }, { "epoch": 2.423374613003096, "grad_norm": 0.061484429985284805, "learning_rate": 8.780877177233301e-05, "loss": 0.007, "step": 12520 }, { "epoch": 2.423568111455108, "grad_norm": 0.06368540972471237, "learning_rate": 8.78068937860511e-05, "loss": 0.0083, "step": 12521 }, { "epoch": 2.4237616099071206, "grad_norm": 0.07323099672794342, "learning_rate": 8.780501567780126e-05, "loss": 0.007, "step": 12522 }, { "epoch": 2.423955108359133, "grad_norm": 0.06782842427492142, "learning_rate": 8.780313744759047e-05, "loss": 0.0075, "step": 12523 }, { "epoch": 2.4241486068111455, "grad_norm": 0.08941460400819778, "learning_rate": 8.780125909542568e-05, "loss": 0.0073, "step": 12524 }, { "epoch": 2.424342105263158, "grad_norm": 0.04445980116724968, "learning_rate": 8.77993806213139e-05, "loss": 0.0084, "step": 12525 }, { "epoch": 2.4245356037151704, "grad_norm": 0.11781777441501617, "learning_rate": 8.779750202526209e-05, "loss": 0.0072, "step": 12526 }, { "epoch": 2.424729102167183, "grad_norm": 0.02374713495373726, "learning_rate": 8.779562330727725e-05, "loss": 0.008, "step": 12527 }, { "epoch": 2.424922600619195, "grad_norm": 0.11160033941268921, "learning_rate": 8.779374446736637e-05, "loss": 0.0065, "step": 12528 }, { "epoch": 2.4251160990712073, "grad_norm": 0.04342580959200859, "learning_rate": 8.779186550553644e-05, "loss": 0.0071, "step": 12529 }, { "epoch": 2.4253095975232197, "grad_norm": 0.09314553439617157, "learning_rate": 8.778998642179441e-05, "loss": 0.0059, "step": 12530 }, { "epoch": 2.425503095975232, "grad_norm": 0.03783273324370384, "learning_rate": 8.778810721614732e-05, "loss": 0.0071, "step": 12531 }, { "epoch": 2.4256965944272446, "grad_norm": 0.07093378901481628, "learning_rate": 8.77862278886021e-05, "loss": 0.0066, "step": 12532 }, { "epoch": 2.425890092879257, "grad_norm": 0.052752427756786346, "learning_rate": 8.778434843916577e-05, "loss": 0.0072, "step": 12533 }, { "epoch": 2.4260835913312695, "grad_norm": 0.06561873108148575, "learning_rate": 8.778246886784531e-05, "loss": 0.0079, "step": 12534 }, { "epoch": 2.426277089783282, "grad_norm": 0.05143190920352936, "learning_rate": 8.778058917464771e-05, "loss": 0.0063, "step": 12535 }, { "epoch": 2.426470588235294, "grad_norm": 0.047148920595645905, "learning_rate": 8.777870935957997e-05, "loss": 0.0095, "step": 12536 }, { "epoch": 2.4266640866873064, "grad_norm": 0.10070333629846573, "learning_rate": 8.777682942264906e-05, "loss": 0.0062, "step": 12537 }, { "epoch": 2.426857585139319, "grad_norm": 0.04708515852689743, "learning_rate": 8.777494936386197e-05, "loss": 0.0055, "step": 12538 }, { "epoch": 2.4270510835913313, "grad_norm": 0.08685744553804398, "learning_rate": 8.777306918322569e-05, "loss": 0.0067, "step": 12539 }, { "epoch": 2.4272445820433437, "grad_norm": 0.05460350215435028, "learning_rate": 8.777118888074721e-05, "loss": 0.0056, "step": 12540 }, { "epoch": 2.427438080495356, "grad_norm": 0.06675580143928528, "learning_rate": 8.776930845643354e-05, "loss": 0.0072, "step": 12541 }, { "epoch": 2.4276315789473686, "grad_norm": 0.07101183384656906, "learning_rate": 8.776742791029165e-05, "loss": 0.0064, "step": 12542 }, { "epoch": 2.4278250773993806, "grad_norm": 0.057662900537252426, "learning_rate": 8.776554724232854e-05, "loss": 0.0081, "step": 12543 }, { "epoch": 2.428018575851393, "grad_norm": 0.09035048633813858, "learning_rate": 8.776366645255118e-05, "loss": 0.0065, "step": 12544 }, { "epoch": 2.4282120743034055, "grad_norm": 0.033093634992837906, "learning_rate": 8.77617855409666e-05, "loss": 0.0086, "step": 12545 }, { "epoch": 2.428405572755418, "grad_norm": 0.08696715533733368, "learning_rate": 8.775990450758177e-05, "loss": 0.0075, "step": 12546 }, { "epoch": 2.4285990712074303, "grad_norm": 0.04258587211370468, "learning_rate": 8.775802335240369e-05, "loss": 0.0075, "step": 12547 }, { "epoch": 2.428792569659443, "grad_norm": 0.04413749650120735, "learning_rate": 8.775614207543935e-05, "loss": 0.0081, "step": 12548 }, { "epoch": 2.4289860681114552, "grad_norm": 0.029611794278025627, "learning_rate": 8.775426067669574e-05, "loss": 0.0067, "step": 12549 }, { "epoch": 2.4291795665634677, "grad_norm": 0.05922165885567665, "learning_rate": 8.775237915617988e-05, "loss": 0.0083, "step": 12550 }, { "epoch": 2.42937306501548, "grad_norm": 0.05244598165154457, "learning_rate": 8.775049751389871e-05, "loss": 0.0059, "step": 12551 }, { "epoch": 2.429566563467492, "grad_norm": 0.057395536452531815, "learning_rate": 8.774861574985928e-05, "loss": 0.0067, "step": 12552 }, { "epoch": 2.4297600619195046, "grad_norm": 0.046899259090423584, "learning_rate": 8.774673386406856e-05, "loss": 0.0063, "step": 12553 }, { "epoch": 2.429953560371517, "grad_norm": 0.08069805055856705, "learning_rate": 8.774485185653356e-05, "loss": 0.0059, "step": 12554 }, { "epoch": 2.4301470588235294, "grad_norm": 0.05196727439761162, "learning_rate": 8.774296972726127e-05, "loss": 0.0072, "step": 12555 }, { "epoch": 2.430340557275542, "grad_norm": 0.0744376927614212, "learning_rate": 8.774108747625868e-05, "loss": 0.0063, "step": 12556 }, { "epoch": 2.4305340557275543, "grad_norm": 0.1431836187839508, "learning_rate": 8.77392051035328e-05, "loss": 0.0067, "step": 12557 }, { "epoch": 2.4307275541795663, "grad_norm": 0.06549681723117828, "learning_rate": 8.773732260909061e-05, "loss": 0.0069, "step": 12558 }, { "epoch": 2.4309210526315788, "grad_norm": 0.13394732773303986, "learning_rate": 8.773543999293912e-05, "loss": 0.0087, "step": 12559 }, { "epoch": 2.431114551083591, "grad_norm": 0.06761763244867325, "learning_rate": 8.773355725508535e-05, "loss": 0.0076, "step": 12560 }, { "epoch": 2.4313080495356036, "grad_norm": 0.11743268370628357, "learning_rate": 8.773167439553627e-05, "loss": 0.0068, "step": 12561 }, { "epoch": 2.431501547987616, "grad_norm": 0.13012167811393738, "learning_rate": 8.77297914142989e-05, "loss": 0.0084, "step": 12562 }, { "epoch": 2.4316950464396285, "grad_norm": 0.07953320443630219, "learning_rate": 8.772790831138021e-05, "loss": 0.0062, "step": 12563 }, { "epoch": 2.431888544891641, "grad_norm": 0.1408635526895523, "learning_rate": 8.772602508678723e-05, "loss": 0.0073, "step": 12564 }, { "epoch": 2.4320820433436534, "grad_norm": 0.06935686618089676, "learning_rate": 8.772414174052696e-05, "loss": 0.007, "step": 12565 }, { "epoch": 2.432275541795666, "grad_norm": 0.1357344686985016, "learning_rate": 8.77222582726064e-05, "loss": 0.0075, "step": 12566 }, { "epoch": 2.432469040247678, "grad_norm": 0.08412830531597137, "learning_rate": 8.772037468303253e-05, "loss": 0.0064, "step": 12567 }, { "epoch": 2.4326625386996903, "grad_norm": 0.11924083530902863, "learning_rate": 8.771849097181238e-05, "loss": 0.0076, "step": 12568 }, { "epoch": 2.4328560371517027, "grad_norm": 0.09319625794887543, "learning_rate": 8.771660713895293e-05, "loss": 0.0079, "step": 12569 }, { "epoch": 2.433049535603715, "grad_norm": 0.09859920293092728, "learning_rate": 8.771472318446122e-05, "loss": 0.0067, "step": 12570 }, { "epoch": 2.4332430340557276, "grad_norm": 0.10967005044221878, "learning_rate": 8.77128391083442e-05, "loss": 0.0082, "step": 12571 }, { "epoch": 2.43343653250774, "grad_norm": 0.07165169715881348, "learning_rate": 8.771095491060892e-05, "loss": 0.0101, "step": 12572 }, { "epoch": 2.4336300309597525, "grad_norm": 0.09457601606845856, "learning_rate": 8.770907059126238e-05, "loss": 0.0074, "step": 12573 }, { "epoch": 2.4338235294117645, "grad_norm": 0.05686769634485245, "learning_rate": 8.770718615031155e-05, "loss": 0.0067, "step": 12574 }, { "epoch": 2.434017027863777, "grad_norm": 0.0974818617105484, "learning_rate": 8.770530158776347e-05, "loss": 0.008, "step": 12575 }, { "epoch": 2.4342105263157894, "grad_norm": 0.04872611165046692, "learning_rate": 8.770341690362514e-05, "loss": 0.0071, "step": 12576 }, { "epoch": 2.434404024767802, "grad_norm": 0.08680944889783859, "learning_rate": 8.770153209790359e-05, "loss": 0.0072, "step": 12577 }, { "epoch": 2.4345975232198143, "grad_norm": 0.06050538271665573, "learning_rate": 8.769964717060578e-05, "loss": 0.0071, "step": 12578 }, { "epoch": 2.4347910216718267, "grad_norm": 0.0894596129655838, "learning_rate": 8.769776212173872e-05, "loss": 0.0075, "step": 12579 }, { "epoch": 2.434984520123839, "grad_norm": 0.06125437840819359, "learning_rate": 8.769587695130946e-05, "loss": 0.006, "step": 12580 }, { "epoch": 2.4351780185758516, "grad_norm": 0.056984517723321915, "learning_rate": 8.769399165932498e-05, "loss": 0.0081, "step": 12581 }, { "epoch": 2.4353715170278636, "grad_norm": 0.08465749770402908, "learning_rate": 8.769210624579229e-05, "loss": 0.0069, "step": 12582 }, { "epoch": 2.435565015479876, "grad_norm": 0.03987405449151993, "learning_rate": 8.769022071071841e-05, "loss": 0.0064, "step": 12583 }, { "epoch": 2.4357585139318885, "grad_norm": 0.09323988854885101, "learning_rate": 8.768833505411036e-05, "loss": 0.0071, "step": 12584 }, { "epoch": 2.435952012383901, "grad_norm": 0.045035526156425476, "learning_rate": 8.76864492759751e-05, "loss": 0.0071, "step": 12585 }, { "epoch": 2.4361455108359134, "grad_norm": 0.10351884365081787, "learning_rate": 8.768456337631971e-05, "loss": 0.0063, "step": 12586 }, { "epoch": 2.436339009287926, "grad_norm": 0.08164646476507187, "learning_rate": 8.768267735515115e-05, "loss": 0.0083, "step": 12587 }, { "epoch": 2.4365325077399382, "grad_norm": 0.07867797464132309, "learning_rate": 8.768079121247645e-05, "loss": 0.0065, "step": 12588 }, { "epoch": 2.4367260061919502, "grad_norm": 0.11184379458427429, "learning_rate": 8.767890494830264e-05, "loss": 0.0063, "step": 12589 }, { "epoch": 2.4369195046439627, "grad_norm": 0.05280598998069763, "learning_rate": 8.767701856263668e-05, "loss": 0.0074, "step": 12590 }, { "epoch": 2.437113003095975, "grad_norm": 0.11664963513612747, "learning_rate": 8.767513205548564e-05, "loss": 0.0074, "step": 12591 }, { "epoch": 2.4373065015479876, "grad_norm": 0.06597291678190231, "learning_rate": 8.76732454268565e-05, "loss": 0.0079, "step": 12592 }, { "epoch": 2.4375, "grad_norm": 0.11813029646873474, "learning_rate": 8.767135867675629e-05, "loss": 0.0069, "step": 12593 }, { "epoch": 2.4376934984520124, "grad_norm": 0.06904938817024231, "learning_rate": 8.766947180519202e-05, "loss": 0.0065, "step": 12594 }, { "epoch": 2.437886996904025, "grad_norm": 0.07391807436943054, "learning_rate": 8.76675848121707e-05, "loss": 0.0082, "step": 12595 }, { "epoch": 2.4380804953560373, "grad_norm": 0.09446264803409576, "learning_rate": 8.766569769769935e-05, "loss": 0.0069, "step": 12596 }, { "epoch": 2.4382739938080498, "grad_norm": 0.037787504494190216, "learning_rate": 8.766381046178498e-05, "loss": 0.0074, "step": 12597 }, { "epoch": 2.4384674922600618, "grad_norm": 0.12602274119853973, "learning_rate": 8.766192310443462e-05, "loss": 0.007, "step": 12598 }, { "epoch": 2.438660990712074, "grad_norm": 0.025681940838694572, "learning_rate": 8.766003562565527e-05, "loss": 0.0058, "step": 12599 }, { "epoch": 2.4388544891640866, "grad_norm": 0.11361044645309448, "learning_rate": 8.765814802545396e-05, "loss": 0.0075, "step": 12600 }, { "epoch": 2.439047987616099, "grad_norm": 0.04650924727320671, "learning_rate": 8.76562603038377e-05, "loss": 0.0084, "step": 12601 }, { "epoch": 2.4392414860681115, "grad_norm": 0.06659270823001862, "learning_rate": 8.76543724608135e-05, "loss": 0.0067, "step": 12602 }, { "epoch": 2.439434984520124, "grad_norm": 0.051869455724954605, "learning_rate": 8.765248449638841e-05, "loss": 0.0071, "step": 12603 }, { "epoch": 2.4396284829721364, "grad_norm": 0.05408848077058792, "learning_rate": 8.765059641056941e-05, "loss": 0.0061, "step": 12604 }, { "epoch": 2.4398219814241484, "grad_norm": 0.04460520297288895, "learning_rate": 8.764870820336357e-05, "loss": 0.0062, "step": 12605 }, { "epoch": 2.440015479876161, "grad_norm": 0.03316323459148407, "learning_rate": 8.764681987477786e-05, "loss": 0.0078, "step": 12606 }, { "epoch": 2.4402089783281733, "grad_norm": 0.06489518284797668, "learning_rate": 8.764493142481932e-05, "loss": 0.0071, "step": 12607 }, { "epoch": 2.4404024767801857, "grad_norm": 0.03752891346812248, "learning_rate": 8.764304285349495e-05, "loss": 0.007, "step": 12608 }, { "epoch": 2.440595975232198, "grad_norm": 0.06522263586521149, "learning_rate": 8.76411541608118e-05, "loss": 0.008, "step": 12609 }, { "epoch": 2.4407894736842106, "grad_norm": 0.042305707931518555, "learning_rate": 8.763926534677689e-05, "loss": 0.0056, "step": 12610 }, { "epoch": 2.440982972136223, "grad_norm": 0.0891103595495224, "learning_rate": 8.763737641139724e-05, "loss": 0.0063, "step": 12611 }, { "epoch": 2.4411764705882355, "grad_norm": 0.06103471294045448, "learning_rate": 8.763548735467984e-05, "loss": 0.005, "step": 12612 }, { "epoch": 2.4413699690402475, "grad_norm": 0.09009630233049393, "learning_rate": 8.763359817663176e-05, "loss": 0.0068, "step": 12613 }, { "epoch": 2.44156346749226, "grad_norm": 0.08939304202795029, "learning_rate": 8.763170887726e-05, "loss": 0.0079, "step": 12614 }, { "epoch": 2.4417569659442724, "grad_norm": 0.08613674342632294, "learning_rate": 8.762981945657158e-05, "loss": 0.0096, "step": 12615 }, { "epoch": 2.441950464396285, "grad_norm": 0.09065138548612595, "learning_rate": 8.762792991457353e-05, "loss": 0.006, "step": 12616 }, { "epoch": 2.4421439628482973, "grad_norm": 0.08045566827058792, "learning_rate": 8.762604025127289e-05, "loss": 0.0074, "step": 12617 }, { "epoch": 2.4423374613003097, "grad_norm": 0.07643098384141922, "learning_rate": 8.762415046667666e-05, "loss": 0.0068, "step": 12618 }, { "epoch": 2.442530959752322, "grad_norm": 0.08434311300516129, "learning_rate": 8.762226056079189e-05, "loss": 0.0066, "step": 12619 }, { "epoch": 2.442724458204334, "grad_norm": 0.0740840956568718, "learning_rate": 8.762037053362557e-05, "loss": 0.0075, "step": 12620 }, { "epoch": 2.4429179566563466, "grad_norm": 0.09649886190891266, "learning_rate": 8.761848038518477e-05, "loss": 0.0071, "step": 12621 }, { "epoch": 2.443111455108359, "grad_norm": 0.05958867073059082, "learning_rate": 8.76165901154765e-05, "loss": 0.0059, "step": 12622 }, { "epoch": 2.4433049535603715, "grad_norm": 0.05219507962465286, "learning_rate": 8.761469972450778e-05, "loss": 0.0058, "step": 12623 }, { "epoch": 2.443498452012384, "grad_norm": 0.06974366307258606, "learning_rate": 8.761280921228564e-05, "loss": 0.0075, "step": 12624 }, { "epoch": 2.4436919504643964, "grad_norm": 0.05718241631984711, "learning_rate": 8.761091857881712e-05, "loss": 0.0079, "step": 12625 }, { "epoch": 2.443885448916409, "grad_norm": 0.059777673333883286, "learning_rate": 8.760902782410922e-05, "loss": 0.0064, "step": 12626 }, { "epoch": 2.4440789473684212, "grad_norm": 0.08020026236772537, "learning_rate": 8.7607136948169e-05, "loss": 0.0067, "step": 12627 }, { "epoch": 2.4442724458204337, "grad_norm": 0.05031384527683258, "learning_rate": 8.760524595100347e-05, "loss": 0.0062, "step": 12628 }, { "epoch": 2.4444659442724457, "grad_norm": 0.10098808258771896, "learning_rate": 8.760335483261969e-05, "loss": 0.0067, "step": 12629 }, { "epoch": 2.444659442724458, "grad_norm": 0.0993543341755867, "learning_rate": 8.760146359302466e-05, "loss": 0.007, "step": 12630 }, { "epoch": 2.4448529411764706, "grad_norm": 0.06295992434024811, "learning_rate": 8.759957223222543e-05, "loss": 0.0071, "step": 12631 }, { "epoch": 2.445046439628483, "grad_norm": 0.14094530045986176, "learning_rate": 8.759768075022902e-05, "loss": 0.0067, "step": 12632 }, { "epoch": 2.4452399380804954, "grad_norm": 0.03671251982450485, "learning_rate": 8.759578914704246e-05, "loss": 0.0074, "step": 12633 }, { "epoch": 2.445433436532508, "grad_norm": 0.1359235644340515, "learning_rate": 8.759389742267278e-05, "loss": 0.0072, "step": 12634 }, { "epoch": 2.44562693498452, "grad_norm": 0.07912447303533554, "learning_rate": 8.759200557712704e-05, "loss": 0.0073, "step": 12635 }, { "epoch": 2.4458204334365323, "grad_norm": 0.049534332007169724, "learning_rate": 8.759011361041225e-05, "loss": 0.0071, "step": 12636 }, { "epoch": 2.4460139318885448, "grad_norm": 0.1816049963235855, "learning_rate": 8.758822152253544e-05, "loss": 0.0079, "step": 12637 }, { "epoch": 2.446207430340557, "grad_norm": 0.052461106330156326, "learning_rate": 8.758632931350364e-05, "loss": 0.006, "step": 12638 }, { "epoch": 2.4464009287925697, "grad_norm": 0.19575901329517365, "learning_rate": 8.758443698332392e-05, "loss": 0.0068, "step": 12639 }, { "epoch": 2.446594427244582, "grad_norm": 0.045008376240730286, "learning_rate": 8.758254453200329e-05, "loss": 0.0055, "step": 12640 }, { "epoch": 2.4467879256965945, "grad_norm": 0.18244177103042603, "learning_rate": 8.758065195954878e-05, "loss": 0.0073, "step": 12641 }, { "epoch": 2.446981424148607, "grad_norm": 0.06566983461380005, "learning_rate": 8.757875926596743e-05, "loss": 0.0065, "step": 12642 }, { "epoch": 2.4471749226006194, "grad_norm": 0.12237673997879028, "learning_rate": 8.757686645126629e-05, "loss": 0.0073, "step": 12643 }, { "epoch": 2.4473684210526314, "grad_norm": 0.12390664964914322, "learning_rate": 8.757497351545239e-05, "loss": 0.0084, "step": 12644 }, { "epoch": 2.447561919504644, "grad_norm": 0.06813935190439224, "learning_rate": 8.757308045853275e-05, "loss": 0.0058, "step": 12645 }, { "epoch": 2.4477554179566563, "grad_norm": 0.13781288266181946, "learning_rate": 8.757118728051444e-05, "loss": 0.0077, "step": 12646 }, { "epoch": 2.4479489164086687, "grad_norm": 0.06282957643270493, "learning_rate": 8.756929398140446e-05, "loss": 0.0084, "step": 12647 }, { "epoch": 2.448142414860681, "grad_norm": 0.12141992896795273, "learning_rate": 8.756740056120989e-05, "loss": 0.0065, "step": 12648 }, { "epoch": 2.4483359133126936, "grad_norm": 0.10381557792425156, "learning_rate": 8.756550701993776e-05, "loss": 0.0083, "step": 12649 }, { "epoch": 2.448529411764706, "grad_norm": 0.11770200729370117, "learning_rate": 8.756361335759506e-05, "loss": 0.0065, "step": 12650 }, { "epoch": 2.448722910216718, "grad_norm": 0.1081673726439476, "learning_rate": 8.75617195741889e-05, "loss": 0.007, "step": 12651 }, { "epoch": 2.4489164086687305, "grad_norm": 0.06667179614305496, "learning_rate": 8.755982566972628e-05, "loss": 0.0082, "step": 12652 }, { "epoch": 2.449109907120743, "grad_norm": 0.12421314418315887, "learning_rate": 8.755793164421425e-05, "loss": 0.0088, "step": 12653 }, { "epoch": 2.4493034055727554, "grad_norm": 0.09632324427366257, "learning_rate": 8.755603749765984e-05, "loss": 0.0075, "step": 12654 }, { "epoch": 2.449496904024768, "grad_norm": 0.09158153086900711, "learning_rate": 8.755414323007015e-05, "loss": 0.0067, "step": 12655 }, { "epoch": 2.4496904024767803, "grad_norm": 0.11558675020933151, "learning_rate": 8.755224884145213e-05, "loss": 0.0086, "step": 12656 }, { "epoch": 2.4498839009287927, "grad_norm": 0.10750492662191391, "learning_rate": 8.75503543318129e-05, "loss": 0.0066, "step": 12657 }, { "epoch": 2.450077399380805, "grad_norm": 0.07964865118265152, "learning_rate": 8.754845970115946e-05, "loss": 0.0085, "step": 12658 }, { "epoch": 2.450270897832817, "grad_norm": 0.09829550236463547, "learning_rate": 8.754656494949888e-05, "loss": 0.0078, "step": 12659 }, { "epoch": 2.4504643962848296, "grad_norm": 0.09051261842250824, "learning_rate": 8.754467007683816e-05, "loss": 0.0065, "step": 12660 }, { "epoch": 2.450657894736842, "grad_norm": 0.057593636214733124, "learning_rate": 8.75427750831844e-05, "loss": 0.0071, "step": 12661 }, { "epoch": 2.4508513931888545, "grad_norm": 0.09828251600265503, "learning_rate": 8.754087996854462e-05, "loss": 0.0075, "step": 12662 }, { "epoch": 2.451044891640867, "grad_norm": 0.0730709582567215, "learning_rate": 8.753898473292586e-05, "loss": 0.0074, "step": 12663 }, { "epoch": 2.4512383900928794, "grad_norm": 0.07361666113138199, "learning_rate": 8.753708937633516e-05, "loss": 0.0075, "step": 12664 }, { "epoch": 2.451431888544892, "grad_norm": 0.07400059700012207, "learning_rate": 8.753519389877961e-05, "loss": 0.0089, "step": 12665 }, { "epoch": 2.451625386996904, "grad_norm": 0.05720933899283409, "learning_rate": 8.753329830026619e-05, "loss": 0.008, "step": 12666 }, { "epoch": 2.4518188854489162, "grad_norm": 0.07255717366933823, "learning_rate": 8.753140258080201e-05, "loss": 0.0068, "step": 12667 }, { "epoch": 2.4520123839009287, "grad_norm": 0.04899219051003456, "learning_rate": 8.75295067403941e-05, "loss": 0.0072, "step": 12668 }, { "epoch": 2.452205882352941, "grad_norm": 0.07918205112218857, "learning_rate": 8.752761077904948e-05, "loss": 0.0074, "step": 12669 }, { "epoch": 2.4523993808049536, "grad_norm": 0.07373844087123871, "learning_rate": 8.752571469677522e-05, "loss": 0.0092, "step": 12670 }, { "epoch": 2.452592879256966, "grad_norm": 0.09032983332872391, "learning_rate": 8.752381849357837e-05, "loss": 0.0078, "step": 12671 }, { "epoch": 2.4527863777089784, "grad_norm": 0.06412788480520248, "learning_rate": 8.752192216946597e-05, "loss": 0.0069, "step": 12672 }, { "epoch": 2.452979876160991, "grad_norm": 0.09490922093391418, "learning_rate": 8.752002572444509e-05, "loss": 0.0078, "step": 12673 }, { "epoch": 2.4531733746130033, "grad_norm": 0.05942218005657196, "learning_rate": 8.751812915852276e-05, "loss": 0.0058, "step": 12674 }, { "epoch": 2.4533668730650153, "grad_norm": 0.08924385905265808, "learning_rate": 8.751623247170604e-05, "loss": 0.0079, "step": 12675 }, { "epoch": 2.4535603715170278, "grad_norm": 0.04464607685804367, "learning_rate": 8.751433566400198e-05, "loss": 0.0074, "step": 12676 }, { "epoch": 2.45375386996904, "grad_norm": 0.046004075556993484, "learning_rate": 8.751243873541762e-05, "loss": 0.007, "step": 12677 }, { "epoch": 2.4539473684210527, "grad_norm": 0.11180900037288666, "learning_rate": 8.751054168596005e-05, "loss": 0.0071, "step": 12678 }, { "epoch": 2.454140866873065, "grad_norm": 0.0384509302675724, "learning_rate": 8.750864451563626e-05, "loss": 0.0076, "step": 12679 }, { "epoch": 2.4543343653250775, "grad_norm": 0.0878266915678978, "learning_rate": 8.750674722445337e-05, "loss": 0.0078, "step": 12680 }, { "epoch": 2.4545278637770895, "grad_norm": 0.04201232269406319, "learning_rate": 8.750484981241839e-05, "loss": 0.0072, "step": 12681 }, { "epoch": 2.454721362229102, "grad_norm": 0.05042639002203941, "learning_rate": 8.75029522795384e-05, "loss": 0.0074, "step": 12682 }, { "epoch": 2.4549148606811144, "grad_norm": 0.046445515006780624, "learning_rate": 8.750105462582043e-05, "loss": 0.0055, "step": 12683 }, { "epoch": 2.455108359133127, "grad_norm": 0.06809981912374496, "learning_rate": 8.749915685127155e-05, "loss": 0.0087, "step": 12684 }, { "epoch": 2.4553018575851393, "grad_norm": 0.028009695932269096, "learning_rate": 8.74972589558988e-05, "loss": 0.0057, "step": 12685 }, { "epoch": 2.4554953560371517, "grad_norm": 0.07559145241975784, "learning_rate": 8.749536093970927e-05, "loss": 0.0071, "step": 12686 }, { "epoch": 2.455688854489164, "grad_norm": 0.035835910588502884, "learning_rate": 8.749346280271e-05, "loss": 0.0064, "step": 12687 }, { "epoch": 2.4558823529411766, "grad_norm": 0.05091902241110802, "learning_rate": 8.749156454490803e-05, "loss": 0.0049, "step": 12688 }, { "epoch": 2.456075851393189, "grad_norm": 0.05867236852645874, "learning_rate": 8.748966616631043e-05, "loss": 0.0066, "step": 12689 }, { "epoch": 2.456269349845201, "grad_norm": 0.04788276180624962, "learning_rate": 8.748776766692425e-05, "loss": 0.008, "step": 12690 }, { "epoch": 2.4564628482972135, "grad_norm": 0.06599248945713043, "learning_rate": 8.748586904675656e-05, "loss": 0.0079, "step": 12691 }, { "epoch": 2.456656346749226, "grad_norm": 0.03852149471640587, "learning_rate": 8.748397030581443e-05, "loss": 0.0076, "step": 12692 }, { "epoch": 2.4568498452012384, "grad_norm": 0.0508708655834198, "learning_rate": 8.748207144410489e-05, "loss": 0.0061, "step": 12693 }, { "epoch": 2.457043343653251, "grad_norm": 0.0641825869679451, "learning_rate": 8.748017246163502e-05, "loss": 0.0062, "step": 12694 }, { "epoch": 2.4572368421052633, "grad_norm": 0.037143006920814514, "learning_rate": 8.747827335841185e-05, "loss": 0.0071, "step": 12695 }, { "epoch": 2.4574303405572757, "grad_norm": 0.07593164592981339, "learning_rate": 8.747637413444248e-05, "loss": 0.007, "step": 12696 }, { "epoch": 2.4576238390092877, "grad_norm": 0.059326548129320145, "learning_rate": 8.747447478973395e-05, "loss": 0.0082, "step": 12697 }, { "epoch": 2.4578173374613, "grad_norm": 0.04563894122838974, "learning_rate": 8.747257532429331e-05, "loss": 0.0054, "step": 12698 }, { "epoch": 2.4580108359133126, "grad_norm": 0.09122532606124878, "learning_rate": 8.747067573812765e-05, "loss": 0.0074, "step": 12699 }, { "epoch": 2.458204334365325, "grad_norm": 0.04188362509012222, "learning_rate": 8.746877603124402e-05, "loss": 0.0064, "step": 12700 }, { "epoch": 2.4583978328173375, "grad_norm": 0.07724153995513916, "learning_rate": 8.746687620364948e-05, "loss": 0.0074, "step": 12701 }, { "epoch": 2.45859133126935, "grad_norm": 0.0360611155629158, "learning_rate": 8.74649762553511e-05, "loss": 0.0072, "step": 12702 }, { "epoch": 2.4587848297213624, "grad_norm": 0.06689660996198654, "learning_rate": 8.746307618635593e-05, "loss": 0.0066, "step": 12703 }, { "epoch": 2.458978328173375, "grad_norm": 0.030135730281472206, "learning_rate": 8.746117599667103e-05, "loss": 0.0063, "step": 12704 }, { "epoch": 2.459171826625387, "grad_norm": 0.07138393819332123, "learning_rate": 8.74592756863035e-05, "loss": 0.0059, "step": 12705 }, { "epoch": 2.4593653250773992, "grad_norm": 0.04890071973204613, "learning_rate": 8.745737525526038e-05, "loss": 0.0061, "step": 12706 }, { "epoch": 2.4595588235294117, "grad_norm": 0.06739756464958191, "learning_rate": 8.74554747035487e-05, "loss": 0.0051, "step": 12707 }, { "epoch": 2.459752321981424, "grad_norm": 0.10102000832557678, "learning_rate": 8.745357403117558e-05, "loss": 0.0062, "step": 12708 }, { "epoch": 2.4599458204334366, "grad_norm": 0.08644664287567139, "learning_rate": 8.74516732381481e-05, "loss": 0.0066, "step": 12709 }, { "epoch": 2.460139318885449, "grad_norm": 0.10748733580112457, "learning_rate": 8.744977232447326e-05, "loss": 0.0058, "step": 12710 }, { "epoch": 2.4603328173374615, "grad_norm": 0.07849365472793579, "learning_rate": 8.744787129015818e-05, "loss": 0.0073, "step": 12711 }, { "epoch": 2.4605263157894735, "grad_norm": 0.09287095814943314, "learning_rate": 8.74459701352099e-05, "loss": 0.0073, "step": 12712 }, { "epoch": 2.460719814241486, "grad_norm": 0.03230292722582817, "learning_rate": 8.744406885963552e-05, "loss": 0.0066, "step": 12713 }, { "epoch": 2.4609133126934983, "grad_norm": 0.08953200280666351, "learning_rate": 8.744216746344206e-05, "loss": 0.0072, "step": 12714 }, { "epoch": 2.4611068111455108, "grad_norm": 0.04211459681391716, "learning_rate": 8.744026594663662e-05, "loss": 0.0071, "step": 12715 }, { "epoch": 2.461300309597523, "grad_norm": 0.0742531567811966, "learning_rate": 8.743836430922627e-05, "loss": 0.0076, "step": 12716 }, { "epoch": 2.4614938080495357, "grad_norm": 0.03415023162961006, "learning_rate": 8.743646255121808e-05, "loss": 0.0073, "step": 12717 }, { "epoch": 2.461687306501548, "grad_norm": 0.06194097921252251, "learning_rate": 8.743456067261911e-05, "loss": 0.0063, "step": 12718 }, { "epoch": 2.4618808049535605, "grad_norm": 0.037515994161367416, "learning_rate": 8.743265867343644e-05, "loss": 0.0065, "step": 12719 }, { "epoch": 2.462074303405573, "grad_norm": 0.08154765516519547, "learning_rate": 8.743075655367712e-05, "loss": 0.0079, "step": 12720 }, { "epoch": 2.462267801857585, "grad_norm": 0.09541495144367218, "learning_rate": 8.742885431334826e-05, "loss": 0.0074, "step": 12721 }, { "epoch": 2.4624613003095974, "grad_norm": 0.07085766643285751, "learning_rate": 8.742695195245688e-05, "loss": 0.0071, "step": 12722 }, { "epoch": 2.46265479876161, "grad_norm": 0.09225040674209595, "learning_rate": 8.74250494710101e-05, "loss": 0.0073, "step": 12723 }, { "epoch": 2.4628482972136223, "grad_norm": 0.05582304671406746, "learning_rate": 8.742314686901499e-05, "loss": 0.0066, "step": 12724 }, { "epoch": 2.4630417956656347, "grad_norm": 0.09284380823373795, "learning_rate": 8.74212441464786e-05, "loss": 0.0067, "step": 12725 }, { "epoch": 2.463235294117647, "grad_norm": 0.06766847521066666, "learning_rate": 8.7419341303408e-05, "loss": 0.0065, "step": 12726 }, { "epoch": 2.4634287925696596, "grad_norm": 0.08789518475532532, "learning_rate": 8.74174383398103e-05, "loss": 0.0068, "step": 12727 }, { "epoch": 2.4636222910216716, "grad_norm": 0.06325136870145798, "learning_rate": 8.741553525569254e-05, "loss": 0.0073, "step": 12728 }, { "epoch": 2.463815789473684, "grad_norm": 0.06906700134277344, "learning_rate": 8.741363205106181e-05, "loss": 0.0079, "step": 12729 }, { "epoch": 2.4640092879256965, "grad_norm": 0.07818011194467545, "learning_rate": 8.741172872592516e-05, "loss": 0.0081, "step": 12730 }, { "epoch": 2.464202786377709, "grad_norm": 0.05249158665537834, "learning_rate": 8.740982528028972e-05, "loss": 0.0072, "step": 12731 }, { "epoch": 2.4643962848297214, "grad_norm": 0.0803389698266983, "learning_rate": 8.740792171416254e-05, "loss": 0.0074, "step": 12732 }, { "epoch": 2.464589783281734, "grad_norm": 0.03971035033464432, "learning_rate": 8.740601802755067e-05, "loss": 0.0068, "step": 12733 }, { "epoch": 2.4647832817337463, "grad_norm": 0.10032813996076584, "learning_rate": 8.74041142204612e-05, "loss": 0.0086, "step": 12734 }, { "epoch": 2.4649767801857587, "grad_norm": 0.03793780133128166, "learning_rate": 8.740221029290124e-05, "loss": 0.0067, "step": 12735 }, { "epoch": 2.4651702786377707, "grad_norm": 0.10807256400585175, "learning_rate": 8.740030624487786e-05, "loss": 0.0083, "step": 12736 }, { "epoch": 2.465363777089783, "grad_norm": 0.04434765502810478, "learning_rate": 8.73984020763981e-05, "loss": 0.0075, "step": 12737 }, { "epoch": 2.4655572755417956, "grad_norm": 0.12518127262592316, "learning_rate": 8.739649778746907e-05, "loss": 0.0063, "step": 12738 }, { "epoch": 2.465750773993808, "grad_norm": 0.07252948731184006, "learning_rate": 8.739459337809785e-05, "loss": 0.007, "step": 12739 }, { "epoch": 2.4659442724458205, "grad_norm": 0.11635278910398483, "learning_rate": 8.73926888482915e-05, "loss": 0.0077, "step": 12740 }, { "epoch": 2.466137770897833, "grad_norm": 0.05083518847823143, "learning_rate": 8.739078419805712e-05, "loss": 0.007, "step": 12741 }, { "epoch": 2.4663312693498454, "grad_norm": 0.09996425360441208, "learning_rate": 8.738887942740179e-05, "loss": 0.0065, "step": 12742 }, { "epoch": 2.4665247678018574, "grad_norm": 0.06255144625902176, "learning_rate": 8.738697453633258e-05, "loss": 0.0069, "step": 12743 }, { "epoch": 2.46671826625387, "grad_norm": 0.08047916740179062, "learning_rate": 8.738506952485659e-05, "loss": 0.0067, "step": 12744 }, { "epoch": 2.4669117647058822, "grad_norm": 0.08644668757915497, "learning_rate": 8.738316439298089e-05, "loss": 0.0061, "step": 12745 }, { "epoch": 2.4671052631578947, "grad_norm": 0.054862454533576965, "learning_rate": 8.738125914071256e-05, "loss": 0.0068, "step": 12746 }, { "epoch": 2.467298761609907, "grad_norm": 0.09942057728767395, "learning_rate": 8.737935376805868e-05, "loss": 0.0074, "step": 12747 }, { "epoch": 2.4674922600619196, "grad_norm": 0.03096039965748787, "learning_rate": 8.737744827502635e-05, "loss": 0.0061, "step": 12748 }, { "epoch": 2.467685758513932, "grad_norm": 0.09064527601003647, "learning_rate": 8.737554266162264e-05, "loss": 0.007, "step": 12749 }, { "epoch": 2.4678792569659445, "grad_norm": 0.047146961092948914, "learning_rate": 8.737363692785465e-05, "loss": 0.0078, "step": 12750 }, { "epoch": 2.468072755417957, "grad_norm": 0.08156421780586243, "learning_rate": 8.737173107372945e-05, "loss": 0.0068, "step": 12751 }, { "epoch": 2.468266253869969, "grad_norm": 0.04315860942006111, "learning_rate": 8.736982509925413e-05, "loss": 0.0058, "step": 12752 }, { "epoch": 2.4684597523219813, "grad_norm": 0.08893806487321854, "learning_rate": 8.736791900443578e-05, "loss": 0.0083, "step": 12753 }, { "epoch": 2.468653250773994, "grad_norm": 0.04471186175942421, "learning_rate": 8.736601278928148e-05, "loss": 0.0058, "step": 12754 }, { "epoch": 2.468846749226006, "grad_norm": 0.06354644894599915, "learning_rate": 8.736410645379832e-05, "loss": 0.0072, "step": 12755 }, { "epoch": 2.4690402476780187, "grad_norm": 0.03644156828522682, "learning_rate": 8.736219999799338e-05, "loss": 0.0069, "step": 12756 }, { "epoch": 2.469233746130031, "grad_norm": 0.046691715717315674, "learning_rate": 8.736029342187376e-05, "loss": 0.0073, "step": 12757 }, { "epoch": 2.469427244582043, "grad_norm": 0.027719318866729736, "learning_rate": 8.735838672544653e-05, "loss": 0.0072, "step": 12758 }, { "epoch": 2.4696207430340555, "grad_norm": 0.06427015364170074, "learning_rate": 8.735647990871881e-05, "loss": 0.0065, "step": 12759 }, { "epoch": 2.469814241486068, "grad_norm": 0.03458011895418167, "learning_rate": 8.735457297169767e-05, "loss": 0.0076, "step": 12760 }, { "epoch": 2.4700077399380804, "grad_norm": 0.08387431502342224, "learning_rate": 8.73526659143902e-05, "loss": 0.0064, "step": 12761 }, { "epoch": 2.470201238390093, "grad_norm": 0.07575191557407379, "learning_rate": 8.735075873680348e-05, "loss": 0.006, "step": 12762 }, { "epoch": 2.4703947368421053, "grad_norm": 0.07895386964082718, "learning_rate": 8.734885143894462e-05, "loss": 0.0056, "step": 12763 }, { "epoch": 2.4705882352941178, "grad_norm": 0.09274676442146301, "learning_rate": 8.73469440208207e-05, "loss": 0.0061, "step": 12764 }, { "epoch": 2.47078173374613, "grad_norm": 0.04484957084059715, "learning_rate": 8.73450364824388e-05, "loss": 0.0058, "step": 12765 }, { "epoch": 2.4709752321981426, "grad_norm": 0.08585739135742188, "learning_rate": 8.734312882380604e-05, "loss": 0.0071, "step": 12766 }, { "epoch": 2.4711687306501546, "grad_norm": 0.06220525875687599, "learning_rate": 8.734122104492949e-05, "loss": 0.0077, "step": 12767 }, { "epoch": 2.471362229102167, "grad_norm": 0.05929512158036232, "learning_rate": 8.733931314581624e-05, "loss": 0.0071, "step": 12768 }, { "epoch": 2.4715557275541795, "grad_norm": 0.07346611469984055, "learning_rate": 8.73374051264734e-05, "loss": 0.0062, "step": 12769 }, { "epoch": 2.471749226006192, "grad_norm": 0.05852305144071579, "learning_rate": 8.733549698690807e-05, "loss": 0.008, "step": 12770 }, { "epoch": 2.4719427244582044, "grad_norm": 0.07276435941457748, "learning_rate": 8.733358872712731e-05, "loss": 0.0074, "step": 12771 }, { "epoch": 2.472136222910217, "grad_norm": 0.08529455959796906, "learning_rate": 8.733168034713824e-05, "loss": 0.0084, "step": 12772 }, { "epoch": 2.4723297213622293, "grad_norm": 0.11580663919448853, "learning_rate": 8.732977184694796e-05, "loss": 0.0068, "step": 12773 }, { "epoch": 2.4725232198142413, "grad_norm": 0.06203664094209671, "learning_rate": 8.732786322656354e-05, "loss": 0.0083, "step": 12774 }, { "epoch": 2.4727167182662537, "grad_norm": 0.07040024548768997, "learning_rate": 8.73259544859921e-05, "loss": 0.0072, "step": 12775 }, { "epoch": 2.472910216718266, "grad_norm": 0.06959797441959381, "learning_rate": 8.732404562524072e-05, "loss": 0.0068, "step": 12776 }, { "epoch": 2.4731037151702786, "grad_norm": 0.03209346532821655, "learning_rate": 8.73221366443165e-05, "loss": 0.0069, "step": 12777 }, { "epoch": 2.473297213622291, "grad_norm": 0.08147970587015152, "learning_rate": 8.732022754322655e-05, "loss": 0.0069, "step": 12778 }, { "epoch": 2.4734907120743035, "grad_norm": 0.037220243364572525, "learning_rate": 8.731831832197795e-05, "loss": 0.0071, "step": 12779 }, { "epoch": 2.473684210526316, "grad_norm": 0.07833413779735565, "learning_rate": 8.73164089805778e-05, "loss": 0.0057, "step": 12780 }, { "epoch": 2.4738777089783284, "grad_norm": 0.06871334463357925, "learning_rate": 8.731449951903322e-05, "loss": 0.0077, "step": 12781 }, { "epoch": 2.4740712074303404, "grad_norm": 0.06391430646181107, "learning_rate": 8.73125899373513e-05, "loss": 0.0084, "step": 12782 }, { "epoch": 2.474264705882353, "grad_norm": 0.07067075371742249, "learning_rate": 8.731068023553912e-05, "loss": 0.0076, "step": 12783 }, { "epoch": 2.4744582043343653, "grad_norm": 0.04464438930153847, "learning_rate": 8.730877041360377e-05, "loss": 0.0096, "step": 12784 }, { "epoch": 2.4746517027863777, "grad_norm": 0.06956155598163605, "learning_rate": 8.73068604715524e-05, "loss": 0.007, "step": 12785 }, { "epoch": 2.47484520123839, "grad_norm": 0.04947575181722641, "learning_rate": 8.73049504093921e-05, "loss": 0.0073, "step": 12786 }, { "epoch": 2.4750386996904026, "grad_norm": 0.089356429874897, "learning_rate": 8.730304022712992e-05, "loss": 0.0068, "step": 12787 }, { "epoch": 2.475232198142415, "grad_norm": 0.0543576255440712, "learning_rate": 8.730112992477302e-05, "loss": 0.007, "step": 12788 }, { "epoch": 2.475425696594427, "grad_norm": 0.11105827987194061, "learning_rate": 8.729921950232847e-05, "loss": 0.0076, "step": 12789 }, { "epoch": 2.4756191950464395, "grad_norm": 0.08479736745357513, "learning_rate": 8.729730895980337e-05, "loss": 0.0069, "step": 12790 }, { "epoch": 2.475812693498452, "grad_norm": 0.06871713697910309, "learning_rate": 8.729539829720484e-05, "loss": 0.0074, "step": 12791 }, { "epoch": 2.4760061919504643, "grad_norm": 0.09092359244823456, "learning_rate": 8.729348751453997e-05, "loss": 0.0061, "step": 12792 }, { "epoch": 2.476199690402477, "grad_norm": 0.04915600270032883, "learning_rate": 8.729157661181589e-05, "loss": 0.0071, "step": 12793 }, { "epoch": 2.4763931888544892, "grad_norm": 0.052751172333955765, "learning_rate": 8.728966558903968e-05, "loss": 0.009, "step": 12794 }, { "epoch": 2.4765866873065017, "grad_norm": 0.059898026287555695, "learning_rate": 8.728775444621842e-05, "loss": 0.0075, "step": 12795 }, { "epoch": 2.476780185758514, "grad_norm": 0.031421296298503876, "learning_rate": 8.728584318335928e-05, "loss": 0.0066, "step": 12796 }, { "epoch": 2.4769736842105265, "grad_norm": 0.06645546108484268, "learning_rate": 8.728393180046933e-05, "loss": 0.0063, "step": 12797 }, { "epoch": 2.4771671826625385, "grad_norm": 0.029730604961514473, "learning_rate": 8.728202029755565e-05, "loss": 0.0058, "step": 12798 }, { "epoch": 2.477360681114551, "grad_norm": 0.05605297535657883, "learning_rate": 8.728010867462539e-05, "loss": 0.0074, "step": 12799 }, { "epoch": 2.4775541795665634, "grad_norm": 0.05540873855352402, "learning_rate": 8.727819693168564e-05, "loss": 0.0079, "step": 12800 }, { "epoch": 2.477747678018576, "grad_norm": 0.05877252295613289, "learning_rate": 8.727628506874351e-05, "loss": 0.0079, "step": 12801 }, { "epoch": 2.4779411764705883, "grad_norm": 0.056574393063783646, "learning_rate": 8.72743730858061e-05, "loss": 0.007, "step": 12802 }, { "epoch": 2.4781346749226008, "grad_norm": 0.04584261402487755, "learning_rate": 8.727246098288052e-05, "loss": 0.0078, "step": 12803 }, { "epoch": 2.478328173374613, "grad_norm": 0.05169674754142761, "learning_rate": 8.727054875997389e-05, "loss": 0.0066, "step": 12804 }, { "epoch": 2.478521671826625, "grad_norm": 0.04215057194232941, "learning_rate": 8.72686364170933e-05, "loss": 0.0065, "step": 12805 }, { "epoch": 2.4787151702786376, "grad_norm": 0.056779008358716965, "learning_rate": 8.726672395424589e-05, "loss": 0.0073, "step": 12806 }, { "epoch": 2.47890866873065, "grad_norm": 0.05604651942849159, "learning_rate": 8.726481137143874e-05, "loss": 0.0066, "step": 12807 }, { "epoch": 2.4791021671826625, "grad_norm": 0.06195308268070221, "learning_rate": 8.726289866867897e-05, "loss": 0.0077, "step": 12808 }, { "epoch": 2.479295665634675, "grad_norm": 0.05623259022831917, "learning_rate": 8.726098584597369e-05, "loss": 0.0068, "step": 12809 }, { "epoch": 2.4794891640866874, "grad_norm": 0.05760831758379936, "learning_rate": 8.725907290333002e-05, "loss": 0.0092, "step": 12810 }, { "epoch": 2.4796826625387, "grad_norm": 0.09269478172063828, "learning_rate": 8.725715984075506e-05, "loss": 0.0075, "step": 12811 }, { "epoch": 2.4798761609907123, "grad_norm": 0.030239546671509743, "learning_rate": 8.725524665825594e-05, "loss": 0.007, "step": 12812 }, { "epoch": 2.4800696594427243, "grad_norm": 0.09785354137420654, "learning_rate": 8.725333335583974e-05, "loss": 0.008, "step": 12813 }, { "epoch": 2.4802631578947367, "grad_norm": 0.04229360446333885, "learning_rate": 8.725141993351362e-05, "loss": 0.0072, "step": 12814 }, { "epoch": 2.480456656346749, "grad_norm": 0.03515312820672989, "learning_rate": 8.724950639128465e-05, "loss": 0.0088, "step": 12815 }, { "epoch": 2.4806501547987616, "grad_norm": 0.06881110370159149, "learning_rate": 8.724759272915997e-05, "loss": 0.0055, "step": 12816 }, { "epoch": 2.480843653250774, "grad_norm": 0.04331052303314209, "learning_rate": 8.724567894714668e-05, "loss": 0.007, "step": 12817 }, { "epoch": 2.4810371517027865, "grad_norm": 0.09247943758964539, "learning_rate": 8.72437650452519e-05, "loss": 0.0077, "step": 12818 }, { "epoch": 2.481230650154799, "grad_norm": 0.0529024600982666, "learning_rate": 8.724185102348276e-05, "loss": 0.0064, "step": 12819 }, { "epoch": 2.481424148606811, "grad_norm": 0.08712325990200043, "learning_rate": 8.723993688184635e-05, "loss": 0.0057, "step": 12820 }, { "epoch": 2.4816176470588234, "grad_norm": 0.06961183995008469, "learning_rate": 8.72380226203498e-05, "loss": 0.0074, "step": 12821 }, { "epoch": 2.481811145510836, "grad_norm": 0.07276152074337006, "learning_rate": 8.723610823900023e-05, "loss": 0.0073, "step": 12822 }, { "epoch": 2.4820046439628483, "grad_norm": 0.07937499135732651, "learning_rate": 8.723419373780474e-05, "loss": 0.0075, "step": 12823 }, { "epoch": 2.4821981424148607, "grad_norm": 0.05322378873825073, "learning_rate": 8.723227911677048e-05, "loss": 0.0054, "step": 12824 }, { "epoch": 2.482391640866873, "grad_norm": 0.06442028284072876, "learning_rate": 8.723036437590453e-05, "loss": 0.0079, "step": 12825 }, { "epoch": 2.4825851393188856, "grad_norm": 0.029898041859269142, "learning_rate": 8.722844951521405e-05, "loss": 0.0061, "step": 12826 }, { "epoch": 2.482778637770898, "grad_norm": 0.07033342123031616, "learning_rate": 8.722653453470611e-05, "loss": 0.0073, "step": 12827 }, { "epoch": 2.4829721362229105, "grad_norm": 0.045621514320373535, "learning_rate": 8.722461943438786e-05, "loss": 0.0083, "step": 12828 }, { "epoch": 2.4831656346749225, "grad_norm": 0.07057683169841766, "learning_rate": 8.722270421426642e-05, "loss": 0.007, "step": 12829 }, { "epoch": 2.483359133126935, "grad_norm": 0.07250916212797165, "learning_rate": 8.722078887434891e-05, "loss": 0.0072, "step": 12830 }, { "epoch": 2.4835526315789473, "grad_norm": 0.05542363226413727, "learning_rate": 8.721887341464245e-05, "loss": 0.0082, "step": 12831 }, { "epoch": 2.48374613003096, "grad_norm": 0.0943034216761589, "learning_rate": 8.721695783515415e-05, "loss": 0.0064, "step": 12832 }, { "epoch": 2.4839396284829722, "grad_norm": 0.03730048984289169, "learning_rate": 8.721504213589114e-05, "loss": 0.0077, "step": 12833 }, { "epoch": 2.4841331269349847, "grad_norm": 0.05763747915625572, "learning_rate": 8.721312631686056e-05, "loss": 0.0083, "step": 12834 }, { "epoch": 2.4843266253869967, "grad_norm": 0.03732842206954956, "learning_rate": 8.721121037806947e-05, "loss": 0.0067, "step": 12835 }, { "epoch": 2.484520123839009, "grad_norm": 0.03922741115093231, "learning_rate": 8.720929431952508e-05, "loss": 0.0071, "step": 12836 }, { "epoch": 2.4847136222910216, "grad_norm": 0.076702781021595, "learning_rate": 8.720737814123445e-05, "loss": 0.0067, "step": 12837 }, { "epoch": 2.484907120743034, "grad_norm": 0.039857033640146255, "learning_rate": 8.720546184320473e-05, "loss": 0.0065, "step": 12838 }, { "epoch": 2.4851006191950464, "grad_norm": 0.0732920914888382, "learning_rate": 8.720354542544304e-05, "loss": 0.0085, "step": 12839 }, { "epoch": 2.485294117647059, "grad_norm": 0.04775149002671242, "learning_rate": 8.72016288879565e-05, "loss": 0.0065, "step": 12840 }, { "epoch": 2.4854876160990713, "grad_norm": 0.05408809706568718, "learning_rate": 8.719971223075223e-05, "loss": 0.007, "step": 12841 }, { "epoch": 2.4856811145510838, "grad_norm": 0.03943315148353577, "learning_rate": 8.719779545383737e-05, "loss": 0.007, "step": 12842 }, { "epoch": 2.485874613003096, "grad_norm": 0.06990212202072144, "learning_rate": 8.719587855721905e-05, "loss": 0.0054, "step": 12843 }, { "epoch": 2.486068111455108, "grad_norm": 0.036678243428468704, "learning_rate": 8.719396154090438e-05, "loss": 0.006, "step": 12844 }, { "epoch": 2.4862616099071206, "grad_norm": 0.07447894662618637, "learning_rate": 8.719204440490048e-05, "loss": 0.0062, "step": 12845 }, { "epoch": 2.486455108359133, "grad_norm": 0.03228262811899185, "learning_rate": 8.719012714921452e-05, "loss": 0.0071, "step": 12846 }, { "epoch": 2.4866486068111455, "grad_norm": 0.07344863563776016, "learning_rate": 8.718820977385357e-05, "loss": 0.0085, "step": 12847 }, { "epoch": 2.486842105263158, "grad_norm": 0.040283940732479095, "learning_rate": 8.71862922788248e-05, "loss": 0.0074, "step": 12848 }, { "epoch": 2.4870356037151704, "grad_norm": 0.04356354475021362, "learning_rate": 8.718437466413534e-05, "loss": 0.007, "step": 12849 }, { "epoch": 2.487229102167183, "grad_norm": 0.053222935646772385, "learning_rate": 8.718245692979228e-05, "loss": 0.006, "step": 12850 }, { "epoch": 2.487422600619195, "grad_norm": 0.0582832507789135, "learning_rate": 8.71805390758028e-05, "loss": 0.0061, "step": 12851 }, { "epoch": 2.4876160990712073, "grad_norm": 0.05905454605817795, "learning_rate": 8.717862110217398e-05, "loss": 0.0057, "step": 12852 }, { "epoch": 2.4878095975232197, "grad_norm": 0.052505649626255035, "learning_rate": 8.717670300891299e-05, "loss": 0.0075, "step": 12853 }, { "epoch": 2.488003095975232, "grad_norm": 0.06406935304403305, "learning_rate": 8.717478479602693e-05, "loss": 0.0071, "step": 12854 }, { "epoch": 2.4881965944272446, "grad_norm": 0.04042381793260574, "learning_rate": 8.717286646352297e-05, "loss": 0.0081, "step": 12855 }, { "epoch": 2.488390092879257, "grad_norm": 0.04133383184671402, "learning_rate": 8.717094801140822e-05, "loss": 0.0075, "step": 12856 }, { "epoch": 2.4885835913312695, "grad_norm": 0.06811341643333435, "learning_rate": 8.716902943968981e-05, "loss": 0.006, "step": 12857 }, { "epoch": 2.488777089783282, "grad_norm": 0.0665423572063446, "learning_rate": 8.716711074837487e-05, "loss": 0.0077, "step": 12858 }, { "epoch": 2.488970588235294, "grad_norm": 0.06748224049806595, "learning_rate": 8.716519193747053e-05, "loss": 0.0079, "step": 12859 }, { "epoch": 2.4891640866873064, "grad_norm": 0.060277797281742096, "learning_rate": 8.716327300698394e-05, "loss": 0.0083, "step": 12860 }, { "epoch": 2.489357585139319, "grad_norm": 0.060615845024585724, "learning_rate": 8.716135395692225e-05, "loss": 0.0075, "step": 12861 }, { "epoch": 2.4895510835913313, "grad_norm": 0.04656597971916199, "learning_rate": 8.715943478729252e-05, "loss": 0.008, "step": 12862 }, { "epoch": 2.4897445820433437, "grad_norm": 0.0720759928226471, "learning_rate": 8.715751549810196e-05, "loss": 0.0079, "step": 12863 }, { "epoch": 2.489938080495356, "grad_norm": 0.04293008893728256, "learning_rate": 8.71555960893577e-05, "loss": 0.006, "step": 12864 }, { "epoch": 2.4901315789473686, "grad_norm": 0.05192624032497406, "learning_rate": 8.715367656106684e-05, "loss": 0.0053, "step": 12865 }, { "epoch": 2.4903250773993806, "grad_norm": 0.0534532368183136, "learning_rate": 8.715175691323652e-05, "loss": 0.0076, "step": 12866 }, { "epoch": 2.490518575851393, "grad_norm": 0.042580340057611465, "learning_rate": 8.714983714587392e-05, "loss": 0.006, "step": 12867 }, { "epoch": 2.4907120743034055, "grad_norm": 0.04241017997264862, "learning_rate": 8.714791725898614e-05, "loss": 0.0068, "step": 12868 }, { "epoch": 2.490905572755418, "grad_norm": 0.04929481819272041, "learning_rate": 8.714599725258031e-05, "loss": 0.0072, "step": 12869 }, { "epoch": 2.4910990712074303, "grad_norm": 0.03061886690557003, "learning_rate": 8.71440771266636e-05, "loss": 0.0058, "step": 12870 }, { "epoch": 2.491292569659443, "grad_norm": 0.05117305368185043, "learning_rate": 8.714215688124312e-05, "loss": 0.0063, "step": 12871 }, { "epoch": 2.4914860681114552, "grad_norm": 0.03092769905924797, "learning_rate": 8.714023651632601e-05, "loss": 0.007, "step": 12872 }, { "epoch": 2.4916795665634677, "grad_norm": 0.03692717105150223, "learning_rate": 8.713831603191946e-05, "loss": 0.0066, "step": 12873 }, { "epoch": 2.49187306501548, "grad_norm": 0.028164781630039215, "learning_rate": 8.713639542803053e-05, "loss": 0.006, "step": 12874 }, { "epoch": 2.492066563467492, "grad_norm": 0.03523290157318115, "learning_rate": 8.71344747046664e-05, "loss": 0.0082, "step": 12875 }, { "epoch": 2.4922600619195046, "grad_norm": 0.04189213365316391, "learning_rate": 8.713255386183424e-05, "loss": 0.0071, "step": 12876 }, { "epoch": 2.492453560371517, "grad_norm": 0.03785553202033043, "learning_rate": 8.713063289954114e-05, "loss": 0.0058, "step": 12877 }, { "epoch": 2.4926470588235294, "grad_norm": 0.026196055114269257, "learning_rate": 8.712871181779428e-05, "loss": 0.006, "step": 12878 }, { "epoch": 2.492840557275542, "grad_norm": 0.02873021736741066, "learning_rate": 8.712679061660077e-05, "loss": 0.0078, "step": 12879 }, { "epoch": 2.4930340557275543, "grad_norm": 0.038628511130809784, "learning_rate": 8.712486929596778e-05, "loss": 0.007, "step": 12880 }, { "epoch": 2.4932275541795663, "grad_norm": 0.04494432359933853, "learning_rate": 8.712294785590243e-05, "loss": 0.0075, "step": 12881 }, { "epoch": 2.4934210526315788, "grad_norm": 0.03219333663582802, "learning_rate": 8.712102629641189e-05, "loss": 0.0072, "step": 12882 }, { "epoch": 2.493614551083591, "grad_norm": 0.034972675144672394, "learning_rate": 8.711910461750329e-05, "loss": 0.0085, "step": 12883 }, { "epoch": 2.4938080495356036, "grad_norm": 0.05028947442770004, "learning_rate": 8.711718281918377e-05, "loss": 0.0076, "step": 12884 }, { "epoch": 2.494001547987616, "grad_norm": 0.027610164135694504, "learning_rate": 8.711526090146047e-05, "loss": 0.0058, "step": 12885 }, { "epoch": 2.4941950464396285, "grad_norm": 0.05334347486495972, "learning_rate": 8.711333886434055e-05, "loss": 0.007, "step": 12886 }, { "epoch": 2.494388544891641, "grad_norm": 0.03519248589873314, "learning_rate": 8.711141670783114e-05, "loss": 0.0076, "step": 12887 }, { "epoch": 2.4945820433436534, "grad_norm": 0.06880536675453186, "learning_rate": 8.71094944319394e-05, "loss": 0.0084, "step": 12888 }, { "epoch": 2.494775541795666, "grad_norm": 0.043469663709402084, "learning_rate": 8.710757203667247e-05, "loss": 0.0072, "step": 12889 }, { "epoch": 2.494969040247678, "grad_norm": 0.039355065673589706, "learning_rate": 8.710564952203751e-05, "loss": 0.0058, "step": 12890 }, { "epoch": 2.4951625386996903, "grad_norm": 0.029647570103406906, "learning_rate": 8.710372688804164e-05, "loss": 0.0081, "step": 12891 }, { "epoch": 2.4953560371517027, "grad_norm": 0.04617897421121597, "learning_rate": 8.710180413469204e-05, "loss": 0.0059, "step": 12892 }, { "epoch": 2.495549535603715, "grad_norm": 0.05102917179465294, "learning_rate": 8.709988126199585e-05, "loss": 0.0066, "step": 12893 }, { "epoch": 2.4957430340557276, "grad_norm": 0.043862104415893555, "learning_rate": 8.709795826996018e-05, "loss": 0.0059, "step": 12894 }, { "epoch": 2.49593653250774, "grad_norm": 0.043277300894260406, "learning_rate": 8.709603515859222e-05, "loss": 0.0062, "step": 12895 }, { "epoch": 2.4961300309597525, "grad_norm": 0.03778846561908722, "learning_rate": 8.709411192789911e-05, "loss": 0.0065, "step": 12896 }, { "epoch": 2.4963235294117645, "grad_norm": 0.047334976494312286, "learning_rate": 8.709218857788802e-05, "loss": 0.0053, "step": 12897 }, { "epoch": 2.496517027863777, "grad_norm": 0.038032080978155136, "learning_rate": 8.709026510856606e-05, "loss": 0.0084, "step": 12898 }, { "epoch": 2.4967105263157894, "grad_norm": 0.05571264773607254, "learning_rate": 8.708834151994042e-05, "loss": 0.0078, "step": 12899 }, { "epoch": 2.496904024767802, "grad_norm": 0.02847914956510067, "learning_rate": 8.708641781201822e-05, "loss": 0.0069, "step": 12900 }, { "epoch": 2.4970975232198143, "grad_norm": 0.029737457633018494, "learning_rate": 8.708449398480663e-05, "loss": 0.0073, "step": 12901 }, { "epoch": 2.4972910216718267, "grad_norm": 0.03376138582825661, "learning_rate": 8.708257003831278e-05, "loss": 0.0072, "step": 12902 }, { "epoch": 2.497484520123839, "grad_norm": 0.02804475836455822, "learning_rate": 8.708064597254385e-05, "loss": 0.007, "step": 12903 }, { "epoch": 2.4976780185758516, "grad_norm": 0.04572474583983421, "learning_rate": 8.707872178750698e-05, "loss": 0.0079, "step": 12904 }, { "epoch": 2.4978715170278636, "grad_norm": 0.04234927147626877, "learning_rate": 8.707679748320934e-05, "loss": 0.0061, "step": 12905 }, { "epoch": 2.498065015479876, "grad_norm": 0.05013631656765938, "learning_rate": 8.707487305965807e-05, "loss": 0.0073, "step": 12906 }, { "epoch": 2.4982585139318885, "grad_norm": 0.04596998170018196, "learning_rate": 8.707294851686032e-05, "loss": 0.0066, "step": 12907 }, { "epoch": 2.498452012383901, "grad_norm": 0.04798898473381996, "learning_rate": 8.707102385482325e-05, "loss": 0.0056, "step": 12908 }, { "epoch": 2.4986455108359134, "grad_norm": 0.050840310752391815, "learning_rate": 8.706909907355401e-05, "loss": 0.0061, "step": 12909 }, { "epoch": 2.498839009287926, "grad_norm": 0.06702956557273865, "learning_rate": 8.706717417305976e-05, "loss": 0.0069, "step": 12910 }, { "epoch": 2.4990325077399382, "grad_norm": 0.055643513798713684, "learning_rate": 8.706524915334767e-05, "loss": 0.0064, "step": 12911 }, { "epoch": 2.4992260061919502, "grad_norm": 0.0652519017457962, "learning_rate": 8.706332401442487e-05, "loss": 0.0078, "step": 12912 }, { "epoch": 2.4994195046439627, "grad_norm": 0.025233827531337738, "learning_rate": 8.706139875629856e-05, "loss": 0.0076, "step": 12913 }, { "epoch": 2.499613003095975, "grad_norm": 0.06346795707941055, "learning_rate": 8.705947337897584e-05, "loss": 0.0076, "step": 12914 }, { "epoch": 2.4998065015479876, "grad_norm": 0.055768080055713654, "learning_rate": 8.70575478824639e-05, "loss": 0.0063, "step": 12915 }, { "epoch": 2.5, "grad_norm": 0.054199282079935074, "learning_rate": 8.705562226676992e-05, "loss": 0.0078, "step": 12916 }, { "epoch": 2.5001934984520124, "grad_norm": 0.08164353668689728, "learning_rate": 8.705369653190101e-05, "loss": 0.0062, "step": 12917 }, { "epoch": 2.500386996904025, "grad_norm": 0.046133968979120255, "learning_rate": 8.705177067786437e-05, "loss": 0.0075, "step": 12918 }, { "epoch": 2.5005804953560373, "grad_norm": 0.07533485442399979, "learning_rate": 8.704984470466715e-05, "loss": 0.0065, "step": 12919 }, { "epoch": 2.5007739938080498, "grad_norm": 0.050812914967536926, "learning_rate": 8.704791861231649e-05, "loss": 0.0078, "step": 12920 }, { "epoch": 2.5009674922600618, "grad_norm": 0.04218287020921707, "learning_rate": 8.704599240081957e-05, "loss": 0.0064, "step": 12921 }, { "epoch": 2.501160990712074, "grad_norm": 0.08459503948688507, "learning_rate": 8.704406607018353e-05, "loss": 0.0066, "step": 12922 }, { "epoch": 2.5013544891640866, "grad_norm": 0.05666753277182579, "learning_rate": 8.704213962041557e-05, "loss": 0.0068, "step": 12923 }, { "epoch": 2.501547987616099, "grad_norm": 0.11687377095222473, "learning_rate": 8.704021305152281e-05, "loss": 0.0079, "step": 12924 }, { "epoch": 2.5017414860681115, "grad_norm": 0.08856409043073654, "learning_rate": 8.703828636351247e-05, "loss": 0.0079, "step": 12925 }, { "epoch": 2.501934984520124, "grad_norm": 0.11211100220680237, "learning_rate": 8.703635955639164e-05, "loss": 0.0064, "step": 12926 }, { "epoch": 2.502128482972136, "grad_norm": 0.10914305597543716, "learning_rate": 8.703443263016754e-05, "loss": 0.0075, "step": 12927 }, { "epoch": 2.5023219814241484, "grad_norm": 0.10855388641357422, "learning_rate": 8.70325055848473e-05, "loss": 0.0082, "step": 12928 }, { "epoch": 2.502515479876161, "grad_norm": 0.12720836699008942, "learning_rate": 8.70305784204381e-05, "loss": 0.0069, "step": 12929 }, { "epoch": 2.5027089783281733, "grad_norm": 0.09090693295001984, "learning_rate": 8.70286511369471e-05, "loss": 0.006, "step": 12930 }, { "epoch": 2.5029024767801857, "grad_norm": 0.11494969576597214, "learning_rate": 8.702672373438148e-05, "loss": 0.0066, "step": 12931 }, { "epoch": 2.503095975232198, "grad_norm": 0.08529860526323318, "learning_rate": 8.702479621274839e-05, "loss": 0.0069, "step": 12932 }, { "epoch": 2.5032894736842106, "grad_norm": 0.12313424795866013, "learning_rate": 8.702286857205497e-05, "loss": 0.0062, "step": 12933 }, { "epoch": 2.503482972136223, "grad_norm": 0.09268016368150711, "learning_rate": 8.702094081230845e-05, "loss": 0.0062, "step": 12934 }, { "epoch": 2.5036764705882355, "grad_norm": 0.11200492829084396, "learning_rate": 8.701901293351595e-05, "loss": 0.0071, "step": 12935 }, { "epoch": 2.503869969040248, "grad_norm": 0.08903887122869492, "learning_rate": 8.701708493568465e-05, "loss": 0.0081, "step": 12936 }, { "epoch": 2.50406346749226, "grad_norm": 0.0983893945813179, "learning_rate": 8.701515681882171e-05, "loss": 0.0073, "step": 12937 }, { "epoch": 2.5042569659442724, "grad_norm": 0.09032856673002243, "learning_rate": 8.701322858293431e-05, "loss": 0.0081, "step": 12938 }, { "epoch": 2.504450464396285, "grad_norm": 0.050543781369924545, "learning_rate": 8.701130022802962e-05, "loss": 0.0067, "step": 12939 }, { "epoch": 2.5046439628482973, "grad_norm": 0.10152871906757355, "learning_rate": 8.70093717541148e-05, "loss": 0.007, "step": 12940 }, { "epoch": 2.5048374613003097, "grad_norm": 0.06368331611156464, "learning_rate": 8.700744316119703e-05, "loss": 0.0056, "step": 12941 }, { "epoch": 2.5050309597523217, "grad_norm": 0.08142998069524765, "learning_rate": 8.700551444928346e-05, "loss": 0.0067, "step": 12942 }, { "epoch": 2.505224458204334, "grad_norm": 0.08953262120485306, "learning_rate": 8.700358561838128e-05, "loss": 0.0076, "step": 12943 }, { "epoch": 2.5054179566563466, "grad_norm": 0.045926351100206375, "learning_rate": 8.700165666849766e-05, "loss": 0.0063, "step": 12944 }, { "epoch": 2.505611455108359, "grad_norm": 0.10796735435724258, "learning_rate": 8.699972759963976e-05, "loss": 0.0068, "step": 12945 }, { "epoch": 2.5058049535603715, "grad_norm": 0.05923761799931526, "learning_rate": 8.699779841181475e-05, "loss": 0.0066, "step": 12946 }, { "epoch": 2.505998452012384, "grad_norm": 0.09946665167808533, "learning_rate": 8.699586910502982e-05, "loss": 0.0058, "step": 12947 }, { "epoch": 2.5061919504643964, "grad_norm": 0.07414592057466507, "learning_rate": 8.699393967929214e-05, "loss": 0.007, "step": 12948 }, { "epoch": 2.506385448916409, "grad_norm": 0.11381600797176361, "learning_rate": 8.699201013460887e-05, "loss": 0.0071, "step": 12949 }, { "epoch": 2.5065789473684212, "grad_norm": 0.09250997006893158, "learning_rate": 8.699008047098718e-05, "loss": 0.0068, "step": 12950 }, { "epoch": 2.5067724458204337, "grad_norm": 0.08842246234416962, "learning_rate": 8.698815068843427e-05, "loss": 0.0082, "step": 12951 }, { "epoch": 2.5069659442724457, "grad_norm": 0.10800307989120483, "learning_rate": 8.698622078695728e-05, "loss": 0.0077, "step": 12952 }, { "epoch": 2.507159442724458, "grad_norm": 0.06023219972848892, "learning_rate": 8.698429076656342e-05, "loss": 0.0074, "step": 12953 }, { "epoch": 2.5073529411764706, "grad_norm": 0.08411765098571777, "learning_rate": 8.698236062725983e-05, "loss": 0.0084, "step": 12954 }, { "epoch": 2.507546439628483, "grad_norm": 0.04281531646847725, "learning_rate": 8.69804303690537e-05, "loss": 0.0075, "step": 12955 }, { "epoch": 2.5077399380804954, "grad_norm": 0.050752364099025726, "learning_rate": 8.697849999195223e-05, "loss": 0.0058, "step": 12956 }, { "epoch": 2.507933436532508, "grad_norm": 0.06947063654661179, "learning_rate": 8.697656949596257e-05, "loss": 0.0089, "step": 12957 }, { "epoch": 2.50812693498452, "grad_norm": 0.05198896676301956, "learning_rate": 8.69746388810919e-05, "loss": 0.0074, "step": 12958 }, { "epoch": 2.5083204334365323, "grad_norm": 0.06098126247525215, "learning_rate": 8.697270814734741e-05, "loss": 0.0074, "step": 12959 }, { "epoch": 2.5085139318885448, "grad_norm": 0.053736839443445206, "learning_rate": 8.697077729473626e-05, "loss": 0.0071, "step": 12960 }, { "epoch": 2.508707430340557, "grad_norm": 0.0528324693441391, "learning_rate": 8.696884632326564e-05, "loss": 0.0074, "step": 12961 }, { "epoch": 2.5089009287925697, "grad_norm": 0.06483957171440125, "learning_rate": 8.696691523294273e-05, "loss": 0.0066, "step": 12962 }, { "epoch": 2.509094427244582, "grad_norm": 0.04018620401620865, "learning_rate": 8.696498402377471e-05, "loss": 0.0066, "step": 12963 }, { "epoch": 2.5092879256965945, "grad_norm": 0.06282375752925873, "learning_rate": 8.696305269576873e-05, "loss": 0.0063, "step": 12964 }, { "epoch": 2.509481424148607, "grad_norm": 0.08857613056898117, "learning_rate": 8.696112124893203e-05, "loss": 0.0063, "step": 12965 }, { "epoch": 2.5096749226006194, "grad_norm": 0.07726190239191055, "learning_rate": 8.695918968327174e-05, "loss": 0.0077, "step": 12966 }, { "epoch": 2.5098684210526314, "grad_norm": 0.09765840321779251, "learning_rate": 8.695725799879506e-05, "loss": 0.0064, "step": 12967 }, { "epoch": 2.510061919504644, "grad_norm": 0.08740661293268204, "learning_rate": 8.695532619550917e-05, "loss": 0.0074, "step": 12968 }, { "epoch": 2.5102554179566563, "grad_norm": 0.07375258952379227, "learning_rate": 8.695339427342125e-05, "loss": 0.0053, "step": 12969 }, { "epoch": 2.5104489164086687, "grad_norm": 0.09621766209602356, "learning_rate": 8.695146223253849e-05, "loss": 0.006, "step": 12970 }, { "epoch": 2.510642414860681, "grad_norm": 0.06025310233235359, "learning_rate": 8.694953007286806e-05, "loss": 0.0058, "step": 12971 }, { "epoch": 2.5108359133126936, "grad_norm": 0.09287378191947937, "learning_rate": 8.694759779441715e-05, "loss": 0.0062, "step": 12972 }, { "epoch": 2.5110294117647056, "grad_norm": 0.032292913645505905, "learning_rate": 8.694566539719293e-05, "loss": 0.0068, "step": 12973 }, { "epoch": 2.511222910216718, "grad_norm": 0.07643942534923553, "learning_rate": 8.694373288120262e-05, "loss": 0.0071, "step": 12974 }, { "epoch": 2.5114164086687305, "grad_norm": 0.05476856231689453, "learning_rate": 8.694180024645338e-05, "loss": 0.0085, "step": 12975 }, { "epoch": 2.511609907120743, "grad_norm": 0.0732310488820076, "learning_rate": 8.693986749295239e-05, "loss": 0.0077, "step": 12976 }, { "epoch": 2.5118034055727554, "grad_norm": 0.06338329613208771, "learning_rate": 8.693793462070683e-05, "loss": 0.0061, "step": 12977 }, { "epoch": 2.511996904024768, "grad_norm": 0.08328372985124588, "learning_rate": 8.693600162972393e-05, "loss": 0.0084, "step": 12978 }, { "epoch": 2.5121904024767803, "grad_norm": 0.06950473040342331, "learning_rate": 8.693406852001082e-05, "loss": 0.0075, "step": 12979 }, { "epoch": 2.5123839009287927, "grad_norm": 0.11341145634651184, "learning_rate": 8.693213529157471e-05, "loss": 0.0069, "step": 12980 }, { "epoch": 2.512577399380805, "grad_norm": 0.08820094168186188, "learning_rate": 8.693020194442281e-05, "loss": 0.0072, "step": 12981 }, { "epoch": 2.5127708978328176, "grad_norm": 0.10082326829433441, "learning_rate": 8.692826847856226e-05, "loss": 0.008, "step": 12982 }, { "epoch": 2.5129643962848296, "grad_norm": 0.0805627629160881, "learning_rate": 8.69263348940003e-05, "loss": 0.0079, "step": 12983 }, { "epoch": 2.513157894736842, "grad_norm": 0.07321058213710785, "learning_rate": 8.692440119074407e-05, "loss": 0.0066, "step": 12984 }, { "epoch": 2.5133513931888545, "grad_norm": 0.10326206684112549, "learning_rate": 8.692246736880082e-05, "loss": 0.0082, "step": 12985 }, { "epoch": 2.513544891640867, "grad_norm": 0.07403389364480972, "learning_rate": 8.692053342817767e-05, "loss": 0.007, "step": 12986 }, { "epoch": 2.5137383900928794, "grad_norm": 0.09978911280632019, "learning_rate": 8.691859936888186e-05, "loss": 0.0074, "step": 12987 }, { "epoch": 2.513931888544892, "grad_norm": 0.10059527307748795, "learning_rate": 8.691666519092054e-05, "loss": 0.0072, "step": 12988 }, { "epoch": 2.514125386996904, "grad_norm": 0.10263857245445251, "learning_rate": 8.691473089430095e-05, "loss": 0.0073, "step": 12989 }, { "epoch": 2.5143188854489162, "grad_norm": 0.10535278171300888, "learning_rate": 8.691279647903024e-05, "loss": 0.0069, "step": 12990 }, { "epoch": 2.5145123839009287, "grad_norm": 0.09205339848995209, "learning_rate": 8.691086194511562e-05, "loss": 0.0069, "step": 12991 }, { "epoch": 2.514705882352941, "grad_norm": 0.09841259568929672, "learning_rate": 8.690892729256428e-05, "loss": 0.0069, "step": 12992 }, { "epoch": 2.5148993808049536, "grad_norm": 0.0976075604557991, "learning_rate": 8.69069925213834e-05, "loss": 0.0064, "step": 12993 }, { "epoch": 2.515092879256966, "grad_norm": 0.07945410907268524, "learning_rate": 8.69050576315802e-05, "loss": 0.0061, "step": 12994 }, { "epoch": 2.5152863777089784, "grad_norm": 0.10518453270196915, "learning_rate": 8.690312262316183e-05, "loss": 0.0069, "step": 12995 }, { "epoch": 2.515479876160991, "grad_norm": 0.06126413121819496, "learning_rate": 8.690118749613553e-05, "loss": 0.0089, "step": 12996 }, { "epoch": 2.5156733746130033, "grad_norm": 0.09390296787023544, "learning_rate": 8.68992522505085e-05, "loss": 0.0067, "step": 12997 }, { "epoch": 2.5158668730650153, "grad_norm": 0.04203234985470772, "learning_rate": 8.689731688628787e-05, "loss": 0.0074, "step": 12998 }, { "epoch": 2.5160603715170278, "grad_norm": 0.08643889427185059, "learning_rate": 8.689538140348089e-05, "loss": 0.0065, "step": 12999 }, { "epoch": 2.51625386996904, "grad_norm": 0.06183408945798874, "learning_rate": 8.689344580209475e-05, "loss": 0.0052, "step": 13000 }, { "epoch": 2.5164473684210527, "grad_norm": 0.04835997894406319, "learning_rate": 8.689151008213662e-05, "loss": 0.0056, "step": 13001 }, { "epoch": 2.516640866873065, "grad_norm": 0.0771191418170929, "learning_rate": 8.688957424361372e-05, "loss": 0.0082, "step": 13002 }, { "epoch": 2.5168343653250775, "grad_norm": 0.0453120581805706, "learning_rate": 8.688763828653326e-05, "loss": 0.0069, "step": 13003 }, { "epoch": 2.5170278637770895, "grad_norm": 0.05567750707268715, "learning_rate": 8.688570221090239e-05, "loss": 0.007, "step": 13004 }, { "epoch": 2.517221362229102, "grad_norm": 0.08478760719299316, "learning_rate": 8.688376601672835e-05, "loss": 0.0059, "step": 13005 }, { "epoch": 2.5174148606811144, "grad_norm": 0.0524589866399765, "learning_rate": 8.688182970401832e-05, "loss": 0.0081, "step": 13006 }, { "epoch": 2.517608359133127, "grad_norm": 0.07992168515920639, "learning_rate": 8.68798932727795e-05, "loss": 0.0057, "step": 13007 }, { "epoch": 2.5178018575851393, "grad_norm": 0.06942272931337357, "learning_rate": 8.687795672301911e-05, "loss": 0.0081, "step": 13008 }, { "epoch": 2.5179953560371517, "grad_norm": 0.10127700120210648, "learning_rate": 8.687602005474432e-05, "loss": 0.0077, "step": 13009 }, { "epoch": 2.518188854489164, "grad_norm": 0.07127043604850769, "learning_rate": 8.687408326796233e-05, "loss": 0.0079, "step": 13010 }, { "epoch": 2.5183823529411766, "grad_norm": 0.0835934728384018, "learning_rate": 8.687214636268036e-05, "loss": 0.0066, "step": 13011 }, { "epoch": 2.518575851393189, "grad_norm": 0.10611974447965622, "learning_rate": 8.687020933890561e-05, "loss": 0.0078, "step": 13012 }, { "epoch": 2.5187693498452015, "grad_norm": 0.07440948486328125, "learning_rate": 8.686827219664528e-05, "loss": 0.0065, "step": 13013 }, { "epoch": 2.5189628482972135, "grad_norm": 0.10522191971540451, "learning_rate": 8.686633493590656e-05, "loss": 0.0071, "step": 13014 }, { "epoch": 2.519156346749226, "grad_norm": 0.043701671063899994, "learning_rate": 8.686439755669667e-05, "loss": 0.0076, "step": 13015 }, { "epoch": 2.5193498452012384, "grad_norm": 0.10012149065732956, "learning_rate": 8.686246005902277e-05, "loss": 0.0064, "step": 13016 }, { "epoch": 2.519543343653251, "grad_norm": 0.05584404990077019, "learning_rate": 8.686052244289212e-05, "loss": 0.0075, "step": 13017 }, { "epoch": 2.5197368421052633, "grad_norm": 0.09244600683450699, "learning_rate": 8.685858470831188e-05, "loss": 0.0079, "step": 13018 }, { "epoch": 2.5199303405572753, "grad_norm": 0.08498740196228027, "learning_rate": 8.68566468552893e-05, "loss": 0.0079, "step": 13019 }, { "epoch": 2.5201238390092877, "grad_norm": 0.06843625754117966, "learning_rate": 8.685470888383152e-05, "loss": 0.0091, "step": 13020 }, { "epoch": 2.5203173374613, "grad_norm": 0.07894507795572281, "learning_rate": 8.685277079394582e-05, "loss": 0.007, "step": 13021 }, { "epoch": 2.5205108359133126, "grad_norm": 0.06342172622680664, "learning_rate": 8.685083258563934e-05, "loss": 0.0084, "step": 13022 }, { "epoch": 2.520704334365325, "grad_norm": 0.09345704317092896, "learning_rate": 8.684889425891934e-05, "loss": 0.0063, "step": 13023 }, { "epoch": 2.5208978328173375, "grad_norm": 0.06397434324026108, "learning_rate": 8.684695581379297e-05, "loss": 0.0066, "step": 13024 }, { "epoch": 2.52109133126935, "grad_norm": 0.08184678107500076, "learning_rate": 8.684501725026747e-05, "loss": 0.008, "step": 13025 }, { "epoch": 2.5212848297213624, "grad_norm": 0.05309759080410004, "learning_rate": 8.684307856835005e-05, "loss": 0.0067, "step": 13026 }, { "epoch": 2.521478328173375, "grad_norm": 0.07336001843214035, "learning_rate": 8.68411397680479e-05, "loss": 0.0065, "step": 13027 }, { "epoch": 2.5216718266253872, "grad_norm": 0.06280350685119629, "learning_rate": 8.683920084936825e-05, "loss": 0.0075, "step": 13028 }, { "epoch": 2.5218653250773992, "grad_norm": 0.06550619751214981, "learning_rate": 8.68372618123183e-05, "loss": 0.0069, "step": 13029 }, { "epoch": 2.5220588235294117, "grad_norm": 0.06646037846803665, "learning_rate": 8.683532265690524e-05, "loss": 0.007, "step": 13030 }, { "epoch": 2.522252321981424, "grad_norm": 0.08120459318161011, "learning_rate": 8.683338338313631e-05, "loss": 0.0077, "step": 13031 }, { "epoch": 2.5224458204334366, "grad_norm": 0.058176059275865555, "learning_rate": 8.68314439910187e-05, "loss": 0.0072, "step": 13032 }, { "epoch": 2.522639318885449, "grad_norm": 0.05224836245179176, "learning_rate": 8.682950448055963e-05, "loss": 0.0063, "step": 13033 }, { "epoch": 2.5228328173374615, "grad_norm": 0.06530890613794327, "learning_rate": 8.68275648517663e-05, "loss": 0.0069, "step": 13034 }, { "epoch": 2.5230263157894735, "grad_norm": 0.03560058772563934, "learning_rate": 8.682562510464591e-05, "loss": 0.0062, "step": 13035 }, { "epoch": 2.523219814241486, "grad_norm": 0.07964463531970978, "learning_rate": 8.682368523920572e-05, "loss": 0.007, "step": 13036 }, { "epoch": 2.5234133126934983, "grad_norm": 0.034120772033929825, "learning_rate": 8.682174525545288e-05, "loss": 0.0073, "step": 13037 }, { "epoch": 2.5236068111455108, "grad_norm": 0.08815155923366547, "learning_rate": 8.681980515339464e-05, "loss": 0.0081, "step": 13038 }, { "epoch": 2.523800309597523, "grad_norm": 0.0426759347319603, "learning_rate": 8.68178649330382e-05, "loss": 0.0068, "step": 13039 }, { "epoch": 2.5239938080495357, "grad_norm": 0.09420058876276016, "learning_rate": 8.68159245943908e-05, "loss": 0.0059, "step": 13040 }, { "epoch": 2.524187306501548, "grad_norm": 0.04961223155260086, "learning_rate": 8.681398413745962e-05, "loss": 0.0078, "step": 13041 }, { "epoch": 2.5243808049535605, "grad_norm": 0.11488834023475647, "learning_rate": 8.681204356225189e-05, "loss": 0.0062, "step": 13042 }, { "epoch": 2.524574303405573, "grad_norm": 0.03557470440864563, "learning_rate": 8.681010286877481e-05, "loss": 0.0054, "step": 13043 }, { "epoch": 2.524767801857585, "grad_norm": 0.11575721949338913, "learning_rate": 8.680816205703561e-05, "loss": 0.0078, "step": 13044 }, { "epoch": 2.5249613003095974, "grad_norm": 0.06284740567207336, "learning_rate": 8.68062211270415e-05, "loss": 0.0082, "step": 13045 }, { "epoch": 2.52515479876161, "grad_norm": 0.06842026859521866, "learning_rate": 8.68042800787997e-05, "loss": 0.0078, "step": 13046 }, { "epoch": 2.5253482972136223, "grad_norm": 0.09264452755451202, "learning_rate": 8.680233891231743e-05, "loss": 0.0069, "step": 13047 }, { "epoch": 2.5255417956656347, "grad_norm": 0.06170015409588814, "learning_rate": 8.68003976276019e-05, "loss": 0.0086, "step": 13048 }, { "epoch": 2.525735294117647, "grad_norm": 0.11194539815187454, "learning_rate": 8.679845622466032e-05, "loss": 0.0075, "step": 13049 }, { "epoch": 2.525928792569659, "grad_norm": 0.06803490221500397, "learning_rate": 8.679651470349991e-05, "loss": 0.0066, "step": 13050 }, { "epoch": 2.5261222910216716, "grad_norm": 0.11577629297971725, "learning_rate": 8.679457306412791e-05, "loss": 0.0069, "step": 13051 }, { "epoch": 2.526315789473684, "grad_norm": 0.0902552604675293, "learning_rate": 8.679263130655151e-05, "loss": 0.0073, "step": 13052 }, { "epoch": 2.5265092879256965, "grad_norm": 0.10335889458656311, "learning_rate": 8.679068943077794e-05, "loss": 0.0058, "step": 13053 }, { "epoch": 2.526702786377709, "grad_norm": 0.13502418994903564, "learning_rate": 8.678874743681442e-05, "loss": 0.0077, "step": 13054 }, { "epoch": 2.5268962848297214, "grad_norm": 0.05419456586241722, "learning_rate": 8.678680532466818e-05, "loss": 0.0072, "step": 13055 }, { "epoch": 2.527089783281734, "grad_norm": 0.1257980316877365, "learning_rate": 8.678486309434643e-05, "loss": 0.0074, "step": 13056 }, { "epoch": 2.5272832817337463, "grad_norm": 0.07390353083610535, "learning_rate": 8.678292074585637e-05, "loss": 0.0068, "step": 13057 }, { "epoch": 2.5274767801857587, "grad_norm": 0.10810765624046326, "learning_rate": 8.678097827920526e-05, "loss": 0.0054, "step": 13058 }, { "epoch": 2.527670278637771, "grad_norm": 0.11657576262950897, "learning_rate": 8.67790356944003e-05, "loss": 0.0075, "step": 13059 }, { "epoch": 2.527863777089783, "grad_norm": 0.057638928294181824, "learning_rate": 8.67770929914487e-05, "loss": 0.0063, "step": 13060 }, { "epoch": 2.5280572755417956, "grad_norm": 0.13592898845672607, "learning_rate": 8.677515017035772e-05, "loss": 0.0061, "step": 13061 }, { "epoch": 2.528250773993808, "grad_norm": 0.044712331146001816, "learning_rate": 8.677320723113457e-05, "loss": 0.0074, "step": 13062 }, { "epoch": 2.5284442724458205, "grad_norm": 0.12708744406700134, "learning_rate": 8.677126417378644e-05, "loss": 0.0068, "step": 13063 }, { "epoch": 2.528637770897833, "grad_norm": 0.10239191353321075, "learning_rate": 8.676932099832058e-05, "loss": 0.0079, "step": 13064 }, { "epoch": 2.5288312693498454, "grad_norm": 0.0806393027305603, "learning_rate": 8.676737770474424e-05, "loss": 0.0072, "step": 13065 }, { "epoch": 2.5290247678018574, "grad_norm": 0.10746464878320694, "learning_rate": 8.676543429306459e-05, "loss": 0.0083, "step": 13066 }, { "epoch": 2.52921826625387, "grad_norm": 0.03824793919920921, "learning_rate": 8.676349076328888e-05, "loss": 0.0059, "step": 13067 }, { "epoch": 2.5294117647058822, "grad_norm": 0.1559685617685318, "learning_rate": 8.676154711542435e-05, "loss": 0.0078, "step": 13068 }, { "epoch": 2.5296052631578947, "grad_norm": 0.06304518133401871, "learning_rate": 8.675960334947821e-05, "loss": 0.0069, "step": 13069 }, { "epoch": 2.529798761609907, "grad_norm": 0.0911964699625969, "learning_rate": 8.67576594654577e-05, "loss": 0.0065, "step": 13070 }, { "epoch": 2.5299922600619196, "grad_norm": 0.06207354739308357, "learning_rate": 8.675571546337003e-05, "loss": 0.0053, "step": 13071 }, { "epoch": 2.530185758513932, "grad_norm": 0.10409951955080032, "learning_rate": 8.675377134322245e-05, "loss": 0.0072, "step": 13072 }, { "epoch": 2.5303792569659445, "grad_norm": 0.05651874467730522, "learning_rate": 8.675182710502215e-05, "loss": 0.0085, "step": 13073 }, { "epoch": 2.530572755417957, "grad_norm": 0.14272278547286987, "learning_rate": 8.674988274877639e-05, "loss": 0.0057, "step": 13074 }, { "epoch": 2.530766253869969, "grad_norm": 0.06017637625336647, "learning_rate": 8.674793827449239e-05, "loss": 0.0076, "step": 13075 }, { "epoch": 2.5309597523219813, "grad_norm": 0.16611547768115997, "learning_rate": 8.674599368217738e-05, "loss": 0.0083, "step": 13076 }, { "epoch": 2.531153250773994, "grad_norm": 0.08534608036279678, "learning_rate": 8.674404897183858e-05, "loss": 0.0079, "step": 13077 }, { "epoch": 2.531346749226006, "grad_norm": 0.1160806193947792, "learning_rate": 8.674210414348324e-05, "loss": 0.0058, "step": 13078 }, { "epoch": 2.5315402476780187, "grad_norm": 0.1288784295320511, "learning_rate": 8.674015919711857e-05, "loss": 0.0073, "step": 13079 }, { "epoch": 2.531733746130031, "grad_norm": 0.08163158595561981, "learning_rate": 8.673821413275181e-05, "loss": 0.0073, "step": 13080 }, { "epoch": 2.531927244582043, "grad_norm": 0.12261601537466049, "learning_rate": 8.673626895039017e-05, "loss": 0.0091, "step": 13081 }, { "epoch": 2.5321207430340555, "grad_norm": 0.0607588067650795, "learning_rate": 8.673432365004094e-05, "loss": 0.0059, "step": 13082 }, { "epoch": 2.532314241486068, "grad_norm": 0.11250206083059311, "learning_rate": 8.673237823171129e-05, "loss": 0.0071, "step": 13083 }, { "epoch": 2.5325077399380804, "grad_norm": 0.06581415235996246, "learning_rate": 8.673043269540848e-05, "loss": 0.0067, "step": 13084 }, { "epoch": 2.532701238390093, "grad_norm": 0.08736634254455566, "learning_rate": 8.672848704113975e-05, "loss": 0.0071, "step": 13085 }, { "epoch": 2.5328947368421053, "grad_norm": 0.06771072745323181, "learning_rate": 8.672654126891231e-05, "loss": 0.008, "step": 13086 }, { "epoch": 2.5330882352941178, "grad_norm": 0.07711745798587799, "learning_rate": 8.672459537873342e-05, "loss": 0.0085, "step": 13087 }, { "epoch": 2.53328173374613, "grad_norm": 0.07142733037471771, "learning_rate": 8.672264937061029e-05, "loss": 0.0072, "step": 13088 }, { "epoch": 2.5334752321981426, "grad_norm": 0.07087491452693939, "learning_rate": 8.672070324455017e-05, "loss": 0.0079, "step": 13089 }, { "epoch": 2.5336687306501546, "grad_norm": 0.07753299921751022, "learning_rate": 8.671875700056029e-05, "loss": 0.0064, "step": 13090 }, { "epoch": 2.533862229102167, "grad_norm": 0.051609184592962265, "learning_rate": 8.671681063864788e-05, "loss": 0.0054, "step": 13091 }, { "epoch": 2.5340557275541795, "grad_norm": 0.0723264291882515, "learning_rate": 8.67148641588202e-05, "loss": 0.0077, "step": 13092 }, { "epoch": 2.534249226006192, "grad_norm": 0.07743193954229355, "learning_rate": 8.671291756108446e-05, "loss": 0.0076, "step": 13093 }, { "epoch": 2.5344427244582044, "grad_norm": 0.060382094234228134, "learning_rate": 8.67109708454479e-05, "loss": 0.0074, "step": 13094 }, { "epoch": 2.534636222910217, "grad_norm": 0.0847369059920311, "learning_rate": 8.670902401191777e-05, "loss": 0.008, "step": 13095 }, { "epoch": 2.534829721362229, "grad_norm": 0.08156588673591614, "learning_rate": 8.670707706050131e-05, "loss": 0.0058, "step": 13096 }, { "epoch": 2.5350232198142413, "grad_norm": 0.0752178281545639, "learning_rate": 8.670512999120575e-05, "loss": 0.0068, "step": 13097 }, { "epoch": 2.5352167182662537, "grad_norm": 0.09273295849561691, "learning_rate": 8.670318280403833e-05, "loss": 0.0064, "step": 13098 }, { "epoch": 2.535410216718266, "grad_norm": 0.03562365099787712, "learning_rate": 8.670123549900628e-05, "loss": 0.0052, "step": 13099 }, { "epoch": 2.5356037151702786, "grad_norm": 0.08577914535999298, "learning_rate": 8.669928807611685e-05, "loss": 0.0068, "step": 13100 }, { "epoch": 2.535797213622291, "grad_norm": 0.04614400863647461, "learning_rate": 8.669734053537728e-05, "loss": 0.0057, "step": 13101 }, { "epoch": 2.5359907120743035, "grad_norm": 0.06619856506586075, "learning_rate": 8.669539287679481e-05, "loss": 0.0058, "step": 13102 }, { "epoch": 2.536184210526316, "grad_norm": 0.04335476830601692, "learning_rate": 8.669344510037667e-05, "loss": 0.0067, "step": 13103 }, { "epoch": 2.5363777089783284, "grad_norm": 0.03734368830919266, "learning_rate": 8.669149720613014e-05, "loss": 0.0087, "step": 13104 }, { "epoch": 2.536571207430341, "grad_norm": 0.09590684622526169, "learning_rate": 8.66895491940624e-05, "loss": 0.0067, "step": 13105 }, { "epoch": 2.536764705882353, "grad_norm": 0.07234455645084381, "learning_rate": 8.668760106418072e-05, "loss": 0.0065, "step": 13106 }, { "epoch": 2.5369582043343653, "grad_norm": 0.07123709470033646, "learning_rate": 8.668565281649237e-05, "loss": 0.0068, "step": 13107 }, { "epoch": 2.5371517027863777, "grad_norm": 0.06851023435592651, "learning_rate": 8.668370445100457e-05, "loss": 0.0064, "step": 13108 }, { "epoch": 2.53734520123839, "grad_norm": 0.09067650884389877, "learning_rate": 8.668175596772454e-05, "loss": 0.0078, "step": 13109 }, { "epoch": 2.5375386996904026, "grad_norm": 0.055973999202251434, "learning_rate": 8.667980736665956e-05, "loss": 0.0069, "step": 13110 }, { "epoch": 2.537732198142415, "grad_norm": 0.10058651864528656, "learning_rate": 8.667785864781687e-05, "loss": 0.0075, "step": 13111 }, { "epoch": 2.537925696594427, "grad_norm": 0.03471839055418968, "learning_rate": 8.66759098112037e-05, "loss": 0.0072, "step": 13112 }, { "epoch": 2.5381191950464395, "grad_norm": 0.09957989305257797, "learning_rate": 8.66739608568273e-05, "loss": 0.0076, "step": 13113 }, { "epoch": 2.538312693498452, "grad_norm": 0.04440075159072876, "learning_rate": 8.667201178469492e-05, "loss": 0.0085, "step": 13114 }, { "epoch": 2.5385061919504643, "grad_norm": 0.07577457278966904, "learning_rate": 8.667006259481382e-05, "loss": 0.0073, "step": 13115 }, { "epoch": 2.538699690402477, "grad_norm": 0.05317545682191849, "learning_rate": 8.666811328719122e-05, "loss": 0.0078, "step": 13116 }, { "epoch": 2.5388931888544892, "grad_norm": 0.06027699261903763, "learning_rate": 8.666616386183437e-05, "loss": 0.0063, "step": 13117 }, { "epoch": 2.5390866873065017, "grad_norm": 0.07454735785722733, "learning_rate": 8.666421431875054e-05, "loss": 0.0074, "step": 13118 }, { "epoch": 2.539280185758514, "grad_norm": 0.028719794005155563, "learning_rate": 8.666226465794696e-05, "loss": 0.0059, "step": 13119 }, { "epoch": 2.5394736842105265, "grad_norm": 0.060474567115306854, "learning_rate": 8.666031487943087e-05, "loss": 0.0082, "step": 13120 }, { "epoch": 2.5396671826625385, "grad_norm": 0.044290583580732346, "learning_rate": 8.665836498320954e-05, "loss": 0.0064, "step": 13121 }, { "epoch": 2.539860681114551, "grad_norm": 0.04729023575782776, "learning_rate": 8.66564149692902e-05, "loss": 0.0069, "step": 13122 }, { "epoch": 2.5400541795665634, "grad_norm": 0.04018671438097954, "learning_rate": 8.665446483768012e-05, "loss": 0.0056, "step": 13123 }, { "epoch": 2.540247678018576, "grad_norm": 0.025458291172981262, "learning_rate": 8.665251458838653e-05, "loss": 0.0065, "step": 13124 }, { "epoch": 2.5404411764705883, "grad_norm": 0.06361842900514603, "learning_rate": 8.66505642214167e-05, "loss": 0.0063, "step": 13125 }, { "epoch": 2.5406346749226008, "grad_norm": 0.02893776260316372, "learning_rate": 8.664861373677789e-05, "loss": 0.0065, "step": 13126 }, { "epoch": 2.5408281733746128, "grad_norm": 0.05539955198764801, "learning_rate": 8.66466631344773e-05, "loss": 0.006, "step": 13127 }, { "epoch": 2.541021671826625, "grad_norm": 0.04943542927503586, "learning_rate": 8.664471241452222e-05, "loss": 0.0075, "step": 13128 }, { "epoch": 2.5412151702786376, "grad_norm": 0.054928045719861984, "learning_rate": 8.66427615769199e-05, "loss": 0.006, "step": 13129 }, { "epoch": 2.54140866873065, "grad_norm": 0.07587266713380814, "learning_rate": 8.66408106216776e-05, "loss": 0.0072, "step": 13130 }, { "epoch": 2.5416021671826625, "grad_norm": 0.05286148190498352, "learning_rate": 8.663885954880256e-05, "loss": 0.0066, "step": 13131 }, { "epoch": 2.541795665634675, "grad_norm": 0.07014163583517075, "learning_rate": 8.6636908358302e-05, "loss": 0.0057, "step": 13132 }, { "epoch": 2.5419891640866874, "grad_norm": 0.047864772379398346, "learning_rate": 8.663495705018326e-05, "loss": 0.0067, "step": 13133 }, { "epoch": 2.5421826625387, "grad_norm": 0.05681662634015083, "learning_rate": 8.663300562445351e-05, "loss": 0.0056, "step": 13134 }, { "epoch": 2.5423761609907123, "grad_norm": 0.055520568042993546, "learning_rate": 8.663105408112005e-05, "loss": 0.0084, "step": 13135 }, { "epoch": 2.5425696594427247, "grad_norm": 0.07164613902568817, "learning_rate": 8.662910242019014e-05, "loss": 0.0061, "step": 13136 }, { "epoch": 2.5427631578947367, "grad_norm": 0.04517049714922905, "learning_rate": 8.662715064167101e-05, "loss": 0.0077, "step": 13137 }, { "epoch": 2.542956656346749, "grad_norm": 0.06933312863111496, "learning_rate": 8.662519874556993e-05, "loss": 0.0072, "step": 13138 }, { "epoch": 2.5431501547987616, "grad_norm": 0.034902703016996384, "learning_rate": 8.662324673189415e-05, "loss": 0.0071, "step": 13139 }, { "epoch": 2.543343653250774, "grad_norm": 0.06779929250478745, "learning_rate": 8.662129460065093e-05, "loss": 0.008, "step": 13140 }, { "epoch": 2.5435371517027865, "grad_norm": 0.060243960469961166, "learning_rate": 8.661934235184752e-05, "loss": 0.007, "step": 13141 }, { "epoch": 2.5437306501547985, "grad_norm": 0.055547360330820084, "learning_rate": 8.661738998549121e-05, "loss": 0.0087, "step": 13142 }, { "epoch": 2.543924148606811, "grad_norm": 0.10605181008577347, "learning_rate": 8.661543750158922e-05, "loss": 0.0064, "step": 13143 }, { "epoch": 2.5441176470588234, "grad_norm": 0.07023779302835464, "learning_rate": 8.661348490014882e-05, "loss": 0.0064, "step": 13144 }, { "epoch": 2.544311145510836, "grad_norm": 0.10174450278282166, "learning_rate": 8.661153218117729e-05, "loss": 0.0087, "step": 13145 }, { "epoch": 2.5445046439628483, "grad_norm": 0.05710938945412636, "learning_rate": 8.660957934468186e-05, "loss": 0.0059, "step": 13146 }, { "epoch": 2.5446981424148607, "grad_norm": 0.08404408395290375, "learning_rate": 8.66076263906698e-05, "loss": 0.0072, "step": 13147 }, { "epoch": 2.544891640866873, "grad_norm": 0.06483238190412521, "learning_rate": 8.660567331914838e-05, "loss": 0.0056, "step": 13148 }, { "epoch": 2.5450851393188856, "grad_norm": 0.08396302908658981, "learning_rate": 8.660372013012484e-05, "loss": 0.0091, "step": 13149 }, { "epoch": 2.545278637770898, "grad_norm": 0.04337075725197792, "learning_rate": 8.660176682360646e-05, "loss": 0.0069, "step": 13150 }, { "epoch": 2.5454721362229105, "grad_norm": 0.06477484107017517, "learning_rate": 8.659981339960051e-05, "loss": 0.0057, "step": 13151 }, { "epoch": 2.5456656346749225, "grad_norm": 0.10414513200521469, "learning_rate": 8.659785985811423e-05, "loss": 0.008, "step": 13152 }, { "epoch": 2.545859133126935, "grad_norm": 0.052974872291088104, "learning_rate": 8.659590619915491e-05, "loss": 0.0065, "step": 13153 }, { "epoch": 2.5460526315789473, "grad_norm": 0.11675308644771576, "learning_rate": 8.659395242272978e-05, "loss": 0.0064, "step": 13154 }, { "epoch": 2.54624613003096, "grad_norm": 0.03591315075755119, "learning_rate": 8.659199852884611e-05, "loss": 0.0077, "step": 13155 }, { "epoch": 2.5464396284829722, "grad_norm": 0.12692736089229584, "learning_rate": 8.65900445175112e-05, "loss": 0.0091, "step": 13156 }, { "epoch": 2.5466331269349847, "grad_norm": 0.04505431652069092, "learning_rate": 8.658809038873227e-05, "loss": 0.0083, "step": 13157 }, { "epoch": 2.5468266253869967, "grad_norm": 0.07687314599752426, "learning_rate": 8.658613614251661e-05, "loss": 0.0073, "step": 13158 }, { "epoch": 2.547020123839009, "grad_norm": 0.057566989213228226, "learning_rate": 8.658418177887148e-05, "loss": 0.0074, "step": 13159 }, { "epoch": 2.5472136222910216, "grad_norm": 0.03543208912014961, "learning_rate": 8.658222729780415e-05, "loss": 0.0058, "step": 13160 }, { "epoch": 2.547407120743034, "grad_norm": 0.10170984268188477, "learning_rate": 8.658027269932188e-05, "loss": 0.0062, "step": 13161 }, { "epoch": 2.5476006191950464, "grad_norm": 0.054867226630449295, "learning_rate": 8.657831798343193e-05, "loss": 0.0064, "step": 13162 }, { "epoch": 2.547794117647059, "grad_norm": 0.07389643788337708, "learning_rate": 8.657636315014157e-05, "loss": 0.0075, "step": 13163 }, { "epoch": 2.5479876160990713, "grad_norm": 0.06257826089859009, "learning_rate": 8.657440819945807e-05, "loss": 0.0049, "step": 13164 }, { "epoch": 2.5481811145510838, "grad_norm": 0.049538955092430115, "learning_rate": 8.657245313138872e-05, "loss": 0.0077, "step": 13165 }, { "epoch": 2.548374613003096, "grad_norm": 0.0828755646944046, "learning_rate": 8.657049794594075e-05, "loss": 0.0055, "step": 13166 }, { "epoch": 2.548568111455108, "grad_norm": 0.04811619222164154, "learning_rate": 8.656854264312145e-05, "loss": 0.0066, "step": 13167 }, { "epoch": 2.5487616099071206, "grad_norm": 0.0885121077299118, "learning_rate": 8.65665872229381e-05, "loss": 0.0093, "step": 13168 }, { "epoch": 2.548955108359133, "grad_norm": 0.05811522901058197, "learning_rate": 8.656463168539795e-05, "loss": 0.0067, "step": 13169 }, { "epoch": 2.5491486068111455, "grad_norm": 0.08797717839479446, "learning_rate": 8.656267603050828e-05, "loss": 0.0077, "step": 13170 }, { "epoch": 2.549342105263158, "grad_norm": 0.0460578016936779, "learning_rate": 8.656072025827635e-05, "loss": 0.0078, "step": 13171 }, { "epoch": 2.5495356037151704, "grad_norm": 0.11148520559072495, "learning_rate": 8.655876436870946e-05, "loss": 0.0057, "step": 13172 }, { "epoch": 2.5497291021671824, "grad_norm": 0.046357106417417526, "learning_rate": 8.655680836181484e-05, "loss": 0.007, "step": 13173 }, { "epoch": 2.549922600619195, "grad_norm": 0.12422294914722443, "learning_rate": 8.655485223759978e-05, "loss": 0.0068, "step": 13174 }, { "epoch": 2.5501160990712073, "grad_norm": 0.05711397901177406, "learning_rate": 8.655289599607157e-05, "loss": 0.0094, "step": 13175 }, { "epoch": 2.5503095975232197, "grad_norm": 0.11171947419643402, "learning_rate": 8.655093963723745e-05, "loss": 0.0079, "step": 13176 }, { "epoch": 2.550503095975232, "grad_norm": 0.059819240123033524, "learning_rate": 8.654898316110472e-05, "loss": 0.0067, "step": 13177 }, { "epoch": 2.5506965944272446, "grad_norm": 0.06469883024692535, "learning_rate": 8.654702656768063e-05, "loss": 0.0087, "step": 13178 }, { "epoch": 2.550890092879257, "grad_norm": 0.1010785922408104, "learning_rate": 8.65450698569725e-05, "loss": 0.0057, "step": 13179 }, { "epoch": 2.5510835913312695, "grad_norm": 0.05578303337097168, "learning_rate": 8.654311302898754e-05, "loss": 0.007, "step": 13180 }, { "epoch": 2.551277089783282, "grad_norm": 0.09068373590707779, "learning_rate": 8.654115608373308e-05, "loss": 0.0069, "step": 13181 }, { "epoch": 2.5514705882352944, "grad_norm": 0.06722996383905411, "learning_rate": 8.653919902121638e-05, "loss": 0.0069, "step": 13182 }, { "epoch": 2.5516640866873064, "grad_norm": 0.062456876039505005, "learning_rate": 8.653724184144469e-05, "loss": 0.0068, "step": 13183 }, { "epoch": 2.551857585139319, "grad_norm": 0.06463287025690079, "learning_rate": 8.65352845444253e-05, "loss": 0.0071, "step": 13184 }, { "epoch": 2.5520510835913313, "grad_norm": 0.06523948162794113, "learning_rate": 8.65333271301655e-05, "loss": 0.0073, "step": 13185 }, { "epoch": 2.5522445820433437, "grad_norm": 0.07932629436254501, "learning_rate": 8.653136959867258e-05, "loss": 0.0064, "step": 13186 }, { "epoch": 2.552438080495356, "grad_norm": 0.05433806777000427, "learning_rate": 8.652941194995377e-05, "loss": 0.0065, "step": 13187 }, { "epoch": 2.5526315789473686, "grad_norm": 0.07395539432764053, "learning_rate": 8.652745418401641e-05, "loss": 0.0053, "step": 13188 }, { "epoch": 2.5528250773993806, "grad_norm": 0.07016729563474655, "learning_rate": 8.65254963008677e-05, "loss": 0.0066, "step": 13189 }, { "epoch": 2.553018575851393, "grad_norm": 0.08516465872526169, "learning_rate": 8.652353830051499e-05, "loss": 0.0066, "step": 13190 }, { "epoch": 2.5532120743034055, "grad_norm": 0.062024299055337906, "learning_rate": 8.652158018296553e-05, "loss": 0.0065, "step": 13191 }, { "epoch": 2.553405572755418, "grad_norm": 0.07913975417613983, "learning_rate": 8.651962194822659e-05, "loss": 0.0075, "step": 13192 }, { "epoch": 2.5535990712074303, "grad_norm": 0.03441163897514343, "learning_rate": 8.651766359630548e-05, "loss": 0.0056, "step": 13193 }, { "epoch": 2.553792569659443, "grad_norm": 0.07670123130083084, "learning_rate": 8.651570512720945e-05, "loss": 0.0065, "step": 13194 }, { "epoch": 2.5539860681114552, "grad_norm": 0.03045564517378807, "learning_rate": 8.651374654094579e-05, "loss": 0.0067, "step": 13195 }, { "epoch": 2.5541795665634677, "grad_norm": 0.08743678033351898, "learning_rate": 8.651178783752181e-05, "loss": 0.0065, "step": 13196 }, { "epoch": 2.55437306501548, "grad_norm": 0.04162009060382843, "learning_rate": 8.650982901694474e-05, "loss": 0.007, "step": 13197 }, { "epoch": 2.554566563467492, "grad_norm": 0.1049954742193222, "learning_rate": 8.65078700792219e-05, "loss": 0.0075, "step": 13198 }, { "epoch": 2.5547600619195046, "grad_norm": 0.04338859021663666, "learning_rate": 8.650591102436058e-05, "loss": 0.0071, "step": 13199 }, { "epoch": 2.554953560371517, "grad_norm": 0.09884333610534668, "learning_rate": 8.650395185236801e-05, "loss": 0.007, "step": 13200 }, { "epoch": 2.5551470588235294, "grad_norm": 0.08511155843734741, "learning_rate": 8.650199256325154e-05, "loss": 0.0069, "step": 13201 }, { "epoch": 2.555340557275542, "grad_norm": 0.060845501720905304, "learning_rate": 8.65000331570184e-05, "loss": 0.0082, "step": 13202 }, { "epoch": 2.5555340557275543, "grad_norm": 0.09773347526788712, "learning_rate": 8.649807363367592e-05, "loss": 0.0071, "step": 13203 }, { "epoch": 2.5557275541795663, "grad_norm": 0.058231182396411896, "learning_rate": 8.649611399323136e-05, "loss": 0.0067, "step": 13204 }, { "epoch": 2.5559210526315788, "grad_norm": 0.08384235203266144, "learning_rate": 8.649415423569201e-05, "loss": 0.0071, "step": 13205 }, { "epoch": 2.556114551083591, "grad_norm": 0.06961546838283539, "learning_rate": 8.649219436106514e-05, "loss": 0.0065, "step": 13206 }, { "epoch": 2.5563080495356036, "grad_norm": 0.052501123398542404, "learning_rate": 8.649023436935808e-05, "loss": 0.0069, "step": 13207 }, { "epoch": 2.556501547987616, "grad_norm": 0.0667884424328804, "learning_rate": 8.648827426057804e-05, "loss": 0.0069, "step": 13208 }, { "epoch": 2.5566950464396285, "grad_norm": 0.04079623147845268, "learning_rate": 8.64863140347324e-05, "loss": 0.005, "step": 13209 }, { "epoch": 2.556888544891641, "grad_norm": 0.10884450376033783, "learning_rate": 8.648435369182837e-05, "loss": 0.0068, "step": 13210 }, { "epoch": 2.5570820433436534, "grad_norm": 0.05404680222272873, "learning_rate": 8.64823932318733e-05, "loss": 0.0073, "step": 13211 }, { "epoch": 2.557275541795666, "grad_norm": 0.09860816597938538, "learning_rate": 8.648043265487444e-05, "loss": 0.0071, "step": 13212 }, { "epoch": 2.5574690402476783, "grad_norm": 0.0520390085875988, "learning_rate": 8.647847196083908e-05, "loss": 0.0076, "step": 13213 }, { "epoch": 2.5576625386996903, "grad_norm": 0.08301123976707458, "learning_rate": 8.647651114977452e-05, "loss": 0.0074, "step": 13214 }, { "epoch": 2.5578560371517027, "grad_norm": 0.06601431965827942, "learning_rate": 8.647455022168806e-05, "loss": 0.0073, "step": 13215 }, { "epoch": 2.558049535603715, "grad_norm": 0.07046042382717133, "learning_rate": 8.647258917658698e-05, "loss": 0.0077, "step": 13216 }, { "epoch": 2.5582430340557276, "grad_norm": 0.07541782408952713, "learning_rate": 8.647062801447853e-05, "loss": 0.0068, "step": 13217 }, { "epoch": 2.55843653250774, "grad_norm": 0.07241165637969971, "learning_rate": 8.646866673537008e-05, "loss": 0.0064, "step": 13218 }, { "epoch": 2.558630030959752, "grad_norm": 0.0925992876291275, "learning_rate": 8.646670533926887e-05, "loss": 0.007, "step": 13219 }, { "epoch": 2.5588235294117645, "grad_norm": 0.06035558879375458, "learning_rate": 8.646474382618218e-05, "loss": 0.0075, "step": 13220 }, { "epoch": 2.559017027863777, "grad_norm": 0.08804376423358917, "learning_rate": 8.646278219611737e-05, "loss": 0.0079, "step": 13221 }, { "epoch": 2.5592105263157894, "grad_norm": 0.06221868470311165, "learning_rate": 8.646082044908165e-05, "loss": 0.0069, "step": 13222 }, { "epoch": 2.559404024767802, "grad_norm": 0.0722435712814331, "learning_rate": 8.645885858508238e-05, "loss": 0.007, "step": 13223 }, { "epoch": 2.5595975232198143, "grad_norm": 0.08075936138629913, "learning_rate": 8.645689660412679e-05, "loss": 0.0069, "step": 13224 }, { "epoch": 2.5597910216718267, "grad_norm": 0.04283832758665085, "learning_rate": 8.645493450622224e-05, "loss": 0.0075, "step": 13225 }, { "epoch": 2.559984520123839, "grad_norm": 0.0854937881231308, "learning_rate": 8.645297229137598e-05, "loss": 0.0057, "step": 13226 }, { "epoch": 2.5601780185758516, "grad_norm": 0.03963550552725792, "learning_rate": 8.645100995959533e-05, "loss": 0.0067, "step": 13227 }, { "epoch": 2.560371517027864, "grad_norm": 0.07904832810163498, "learning_rate": 8.644904751088756e-05, "loss": 0.0078, "step": 13228 }, { "epoch": 2.560565015479876, "grad_norm": 0.09589586406946182, "learning_rate": 8.644708494526001e-05, "loss": 0.0054, "step": 13229 }, { "epoch": 2.5607585139318885, "grad_norm": 0.07850238680839539, "learning_rate": 8.644512226271993e-05, "loss": 0.0077, "step": 13230 }, { "epoch": 2.560952012383901, "grad_norm": 0.12211450189352036, "learning_rate": 8.644315946327462e-05, "loss": 0.0076, "step": 13231 }, { "epoch": 2.5611455108359134, "grad_norm": 0.06591774523258209, "learning_rate": 8.64411965469314e-05, "loss": 0.0062, "step": 13232 }, { "epoch": 2.561339009287926, "grad_norm": 0.13398803770542145, "learning_rate": 8.643923351369754e-05, "loss": 0.0076, "step": 13233 }, { "epoch": 2.5615325077399382, "grad_norm": 0.08018225431442261, "learning_rate": 8.643727036358039e-05, "loss": 0.0077, "step": 13234 }, { "epoch": 2.5617260061919502, "grad_norm": 0.09683267772197723, "learning_rate": 8.643530709658721e-05, "loss": 0.0082, "step": 13235 }, { "epoch": 2.5619195046439627, "grad_norm": 0.09103526920080185, "learning_rate": 8.643334371272529e-05, "loss": 0.0078, "step": 13236 }, { "epoch": 2.562113003095975, "grad_norm": 0.0523667074739933, "learning_rate": 8.643138021200195e-05, "loss": 0.0057, "step": 13237 }, { "epoch": 2.5623065015479876, "grad_norm": 0.0795331820845604, "learning_rate": 8.642941659442448e-05, "loss": 0.0067, "step": 13238 }, { "epoch": 2.5625, "grad_norm": 0.028233205899596214, "learning_rate": 8.64274528600002e-05, "loss": 0.0077, "step": 13239 }, { "epoch": 2.5626934984520124, "grad_norm": 0.060199182480573654, "learning_rate": 8.642548900873636e-05, "loss": 0.0082, "step": 13240 }, { "epoch": 2.562886996904025, "grad_norm": 0.049227021634578705, "learning_rate": 8.642352504064033e-05, "loss": 0.0074, "step": 13241 }, { "epoch": 2.5630804953560373, "grad_norm": 0.04779913276433945, "learning_rate": 8.642156095571935e-05, "loss": 0.0057, "step": 13242 }, { "epoch": 2.5632739938080498, "grad_norm": 0.041798368096351624, "learning_rate": 8.641959675398077e-05, "loss": 0.0068, "step": 13243 }, { "epoch": 2.5634674922600618, "grad_norm": 0.05557578057050705, "learning_rate": 8.641763243543186e-05, "loss": 0.0071, "step": 13244 }, { "epoch": 2.563660990712074, "grad_norm": 0.036459989845752716, "learning_rate": 8.641566800007994e-05, "loss": 0.008, "step": 13245 }, { "epoch": 2.5638544891640866, "grad_norm": 0.06780023872852325, "learning_rate": 8.64137034479323e-05, "loss": 0.0066, "step": 13246 }, { "epoch": 2.564047987616099, "grad_norm": 0.07644157111644745, "learning_rate": 8.641173877899625e-05, "loss": 0.0071, "step": 13247 }, { "epoch": 2.5642414860681115, "grad_norm": 0.0739012062549591, "learning_rate": 8.640977399327911e-05, "loss": 0.0062, "step": 13248 }, { "epoch": 2.564434984520124, "grad_norm": 0.09175460785627365, "learning_rate": 8.640780909078814e-05, "loss": 0.0071, "step": 13249 }, { "epoch": 2.564628482972136, "grad_norm": 0.05875364691019058, "learning_rate": 8.640584407153071e-05, "loss": 0.0079, "step": 13250 }, { "epoch": 2.5648219814241484, "grad_norm": 0.08791891485452652, "learning_rate": 8.640387893551407e-05, "loss": 0.0071, "step": 13251 }, { "epoch": 2.565015479876161, "grad_norm": 0.05427499860525131, "learning_rate": 8.640191368274555e-05, "loss": 0.0067, "step": 13252 }, { "epoch": 2.5652089783281733, "grad_norm": 0.07744428515434265, "learning_rate": 8.639994831323246e-05, "loss": 0.0068, "step": 13253 }, { "epoch": 2.5654024767801857, "grad_norm": 0.07012704014778137, "learning_rate": 8.639798282698209e-05, "loss": 0.009, "step": 13254 }, { "epoch": 2.565595975232198, "grad_norm": 0.11833148449659348, "learning_rate": 8.639601722400177e-05, "loss": 0.0058, "step": 13255 }, { "epoch": 2.5657894736842106, "grad_norm": 0.04370834678411484, "learning_rate": 8.639405150429876e-05, "loss": 0.0067, "step": 13256 }, { "epoch": 2.565982972136223, "grad_norm": 0.10866908729076385, "learning_rate": 8.639208566788045e-05, "loss": 0.0067, "step": 13257 }, { "epoch": 2.5661764705882355, "grad_norm": 0.06880427151918411, "learning_rate": 8.639011971475407e-05, "loss": 0.0074, "step": 13258 }, { "epoch": 2.566369969040248, "grad_norm": 0.10218778252601624, "learning_rate": 8.638815364492695e-05, "loss": 0.0074, "step": 13259 }, { "epoch": 2.56656346749226, "grad_norm": 0.07266558706760406, "learning_rate": 8.638618745840645e-05, "loss": 0.0066, "step": 13260 }, { "epoch": 2.5667569659442724, "grad_norm": 0.05790490657091141, "learning_rate": 8.63842211551998e-05, "loss": 0.0067, "step": 13261 }, { "epoch": 2.566950464396285, "grad_norm": 0.11098780483007431, "learning_rate": 8.638225473531436e-05, "loss": 0.007, "step": 13262 }, { "epoch": 2.5671439628482973, "grad_norm": 0.07858389616012573, "learning_rate": 8.638028819875744e-05, "loss": 0.0076, "step": 13263 }, { "epoch": 2.5673374613003097, "grad_norm": 0.08567934483289719, "learning_rate": 8.637832154553634e-05, "loss": 0.0083, "step": 13264 }, { "epoch": 2.5675309597523217, "grad_norm": 0.1115558072924614, "learning_rate": 8.637635477565837e-05, "loss": 0.0078, "step": 13265 }, { "epoch": 2.567724458204334, "grad_norm": 0.02626527100801468, "learning_rate": 8.637438788913082e-05, "loss": 0.0059, "step": 13266 }, { "epoch": 2.5679179566563466, "grad_norm": 0.14088016748428345, "learning_rate": 8.637242088596106e-05, "loss": 0.0075, "step": 13267 }, { "epoch": 2.568111455108359, "grad_norm": 0.046237342059612274, "learning_rate": 8.637045376615635e-05, "loss": 0.0069, "step": 13268 }, { "epoch": 2.5683049535603715, "grad_norm": 0.09598053246736526, "learning_rate": 8.636848652972403e-05, "loss": 0.0053, "step": 13269 }, { "epoch": 2.568498452012384, "grad_norm": 0.11527081578969955, "learning_rate": 8.636651917667141e-05, "loss": 0.0071, "step": 13270 }, { "epoch": 2.5686919504643964, "grad_norm": 0.09904184192419052, "learning_rate": 8.63645517070058e-05, "loss": 0.0078, "step": 13271 }, { "epoch": 2.568885448916409, "grad_norm": 0.14236746728420258, "learning_rate": 8.636258412073451e-05, "loss": 0.0078, "step": 13272 }, { "epoch": 2.5690789473684212, "grad_norm": 0.07826979458332062, "learning_rate": 8.636061641786486e-05, "loss": 0.0081, "step": 13273 }, { "epoch": 2.5692724458204337, "grad_norm": 0.17889970541000366, "learning_rate": 8.635864859840417e-05, "loss": 0.0065, "step": 13274 }, { "epoch": 2.5694659442724457, "grad_norm": 0.08252605050802231, "learning_rate": 8.635668066235975e-05, "loss": 0.0079, "step": 13275 }, { "epoch": 2.569659442724458, "grad_norm": 0.1635720133781433, "learning_rate": 8.635471260973891e-05, "loss": 0.0067, "step": 13276 }, { "epoch": 2.5698529411764706, "grad_norm": 0.11574885249137878, "learning_rate": 8.635274444054899e-05, "loss": 0.0077, "step": 13277 }, { "epoch": 2.570046439628483, "grad_norm": 0.14396843314170837, "learning_rate": 8.635077615479728e-05, "loss": 0.0065, "step": 13278 }, { "epoch": 2.5702399380804954, "grad_norm": 0.1207571029663086, "learning_rate": 8.634880775249111e-05, "loss": 0.0062, "step": 13279 }, { "epoch": 2.570433436532508, "grad_norm": 0.11807946115732193, "learning_rate": 8.634683923363779e-05, "loss": 0.0074, "step": 13280 }, { "epoch": 2.57062693498452, "grad_norm": 0.10733937472105026, "learning_rate": 8.634487059824466e-05, "loss": 0.0071, "step": 13281 }, { "epoch": 2.5708204334365323, "grad_norm": 0.10969246923923492, "learning_rate": 8.634290184631903e-05, "loss": 0.0068, "step": 13282 }, { "epoch": 2.5710139318885448, "grad_norm": 0.08435608446598053, "learning_rate": 8.634093297786818e-05, "loss": 0.0083, "step": 13283 }, { "epoch": 2.571207430340557, "grad_norm": 0.14446449279785156, "learning_rate": 8.633896399289949e-05, "loss": 0.0073, "step": 13284 }, { "epoch": 2.5714009287925697, "grad_norm": 0.06296858191490173, "learning_rate": 8.633699489142026e-05, "loss": 0.0082, "step": 13285 }, { "epoch": 2.571594427244582, "grad_norm": 0.13653810322284698, "learning_rate": 8.633502567343781e-05, "loss": 0.0069, "step": 13286 }, { "epoch": 2.5717879256965945, "grad_norm": 0.08835276961326599, "learning_rate": 8.633305633895945e-05, "loss": 0.0065, "step": 13287 }, { "epoch": 2.571981424148607, "grad_norm": 0.10890737175941467, "learning_rate": 8.633108688799249e-05, "loss": 0.0073, "step": 13288 }, { "epoch": 2.5721749226006194, "grad_norm": 0.1539168506860733, "learning_rate": 8.632911732054428e-05, "loss": 0.0064, "step": 13289 }, { "epoch": 2.5723684210526314, "grad_norm": 0.08070029318332672, "learning_rate": 8.632714763662211e-05, "loss": 0.0073, "step": 13290 }, { "epoch": 2.572561919504644, "grad_norm": 0.1723780333995819, "learning_rate": 8.632517783623334e-05, "loss": 0.0071, "step": 13291 }, { "epoch": 2.5727554179566563, "grad_norm": 0.028625624254345894, "learning_rate": 8.632320791938529e-05, "loss": 0.0062, "step": 13292 }, { "epoch": 2.5729489164086687, "grad_norm": 0.15895985066890717, "learning_rate": 8.632123788608526e-05, "loss": 0.0068, "step": 13293 }, { "epoch": 2.573142414860681, "grad_norm": 0.08732829242944717, "learning_rate": 8.631926773634058e-05, "loss": 0.0062, "step": 13294 }, { "epoch": 2.5733359133126936, "grad_norm": 0.09226584434509277, "learning_rate": 8.631729747015859e-05, "loss": 0.0055, "step": 13295 }, { "epoch": 2.5735294117647056, "grad_norm": 0.12261103093624115, "learning_rate": 8.63153270875466e-05, "loss": 0.0059, "step": 13296 }, { "epoch": 2.573722910216718, "grad_norm": 0.06616335362195969, "learning_rate": 8.631335658851194e-05, "loss": 0.0079, "step": 13297 }, { "epoch": 2.5739164086687305, "grad_norm": 0.07259633392095566, "learning_rate": 8.631138597306194e-05, "loss": 0.007, "step": 13298 }, { "epoch": 2.574109907120743, "grad_norm": 0.05194486305117607, "learning_rate": 8.630941524120393e-05, "loss": 0.0067, "step": 13299 }, { "epoch": 2.5743034055727554, "grad_norm": 0.049949027597904205, "learning_rate": 8.630744439294521e-05, "loss": 0.0064, "step": 13300 }, { "epoch": 2.574496904024768, "grad_norm": 0.06545014679431915, "learning_rate": 8.630547342829313e-05, "loss": 0.0058, "step": 13301 }, { "epoch": 2.5746904024767803, "grad_norm": 0.04545099288225174, "learning_rate": 8.630350234725504e-05, "loss": 0.0069, "step": 13302 }, { "epoch": 2.5748839009287927, "grad_norm": 0.06694813817739487, "learning_rate": 8.630153114983821e-05, "loss": 0.0065, "step": 13303 }, { "epoch": 2.575077399380805, "grad_norm": 0.05258655175566673, "learning_rate": 8.629955983605002e-05, "loss": 0.007, "step": 13304 }, { "epoch": 2.5752708978328176, "grad_norm": 0.06385467201471329, "learning_rate": 8.629758840589777e-05, "loss": 0.007, "step": 13305 }, { "epoch": 2.5754643962848296, "grad_norm": 0.07001382857561111, "learning_rate": 8.629561685938879e-05, "loss": 0.0071, "step": 13306 }, { "epoch": 2.575657894736842, "grad_norm": 0.04247017204761505, "learning_rate": 8.629364519653042e-05, "loss": 0.0064, "step": 13307 }, { "epoch": 2.5758513931888545, "grad_norm": 0.04014170542359352, "learning_rate": 8.629167341733e-05, "loss": 0.0083, "step": 13308 }, { "epoch": 2.576044891640867, "grad_norm": 0.037760861217975616, "learning_rate": 8.628970152179484e-05, "loss": 0.0067, "step": 13309 }, { "epoch": 2.5762383900928794, "grad_norm": 0.038374513387680054, "learning_rate": 8.62877295099323e-05, "loss": 0.0059, "step": 13310 }, { "epoch": 2.576431888544892, "grad_norm": 0.029707293957471848, "learning_rate": 8.628575738174966e-05, "loss": 0.0057, "step": 13311 }, { "epoch": 2.576625386996904, "grad_norm": 0.07378020882606506, "learning_rate": 8.62837851372543e-05, "loss": 0.0061, "step": 13312 }, { "epoch": 2.5768188854489162, "grad_norm": 0.04403337091207504, "learning_rate": 8.628181277645354e-05, "loss": 0.0054, "step": 13313 }, { "epoch": 2.5770123839009287, "grad_norm": 0.04563327878713608, "learning_rate": 8.62798402993547e-05, "loss": 0.0072, "step": 13314 }, { "epoch": 2.577205882352941, "grad_norm": 0.025790240615606308, "learning_rate": 8.62778677059651e-05, "loss": 0.0073, "step": 13315 }, { "epoch": 2.5773993808049536, "grad_norm": 0.04332004114985466, "learning_rate": 8.627589499629214e-05, "loss": 0.0055, "step": 13316 }, { "epoch": 2.577592879256966, "grad_norm": 0.03092736192047596, "learning_rate": 8.627392217034306e-05, "loss": 0.0062, "step": 13317 }, { "epoch": 2.5777863777089784, "grad_norm": 0.054940514266490936, "learning_rate": 8.627194922812529e-05, "loss": 0.0064, "step": 13318 }, { "epoch": 2.577979876160991, "grad_norm": 0.021498138085007668, "learning_rate": 8.626997616964609e-05, "loss": 0.007, "step": 13319 }, { "epoch": 2.5781733746130033, "grad_norm": 0.046678319573402405, "learning_rate": 8.626800299491284e-05, "loss": 0.0063, "step": 13320 }, { "epoch": 2.5783668730650153, "grad_norm": 0.045840006321668625, "learning_rate": 8.626602970393285e-05, "loss": 0.0077, "step": 13321 }, { "epoch": 2.5785603715170278, "grad_norm": 0.04488377273082733, "learning_rate": 8.626405629671345e-05, "loss": 0.0058, "step": 13322 }, { "epoch": 2.57875386996904, "grad_norm": 0.04954904317855835, "learning_rate": 8.626208277326202e-05, "loss": 0.008, "step": 13323 }, { "epoch": 2.5789473684210527, "grad_norm": 0.07634580135345459, "learning_rate": 8.626010913358585e-05, "loss": 0.0059, "step": 13324 }, { "epoch": 2.579140866873065, "grad_norm": 0.05725938826799393, "learning_rate": 8.62581353776923e-05, "loss": 0.007, "step": 13325 }, { "epoch": 2.5793343653250775, "grad_norm": 0.0918986052274704, "learning_rate": 8.625616150558872e-05, "loss": 0.0073, "step": 13326 }, { "epoch": 2.5795278637770895, "grad_norm": 0.06108255311846733, "learning_rate": 8.625418751728243e-05, "loss": 0.0064, "step": 13327 }, { "epoch": 2.579721362229102, "grad_norm": 0.10038138926029205, "learning_rate": 8.625221341278076e-05, "loss": 0.0054, "step": 13328 }, { "epoch": 2.5799148606811144, "grad_norm": 0.05339231714606285, "learning_rate": 8.625023919209107e-05, "loss": 0.0071, "step": 13329 }, { "epoch": 2.580108359133127, "grad_norm": 0.052749063819646835, "learning_rate": 8.624826485522067e-05, "loss": 0.0065, "step": 13330 }, { "epoch": 2.5803018575851393, "grad_norm": 0.044655941426754, "learning_rate": 8.624629040217696e-05, "loss": 0.0087, "step": 13331 }, { "epoch": 2.5804953560371517, "grad_norm": 0.0391913540661335, "learning_rate": 8.624431583296721e-05, "loss": 0.0064, "step": 13332 }, { "epoch": 2.580688854489164, "grad_norm": 0.03817296028137207, "learning_rate": 8.624234114759881e-05, "loss": 0.0064, "step": 13333 }, { "epoch": 2.5808823529411766, "grad_norm": 0.04736332967877388, "learning_rate": 8.624036634607907e-05, "loss": 0.0067, "step": 13334 }, { "epoch": 2.581075851393189, "grad_norm": 0.018585175275802612, "learning_rate": 8.623839142841535e-05, "loss": 0.006, "step": 13335 }, { "epoch": 2.5812693498452015, "grad_norm": 0.03898194804787636, "learning_rate": 8.623641639461499e-05, "loss": 0.006, "step": 13336 }, { "epoch": 2.5814628482972135, "grad_norm": 0.03278140723705292, "learning_rate": 8.623444124468534e-05, "loss": 0.0076, "step": 13337 }, { "epoch": 2.581656346749226, "grad_norm": 0.033197399228811264, "learning_rate": 8.623246597863371e-05, "loss": 0.0069, "step": 13338 }, { "epoch": 2.5818498452012384, "grad_norm": 0.04447506368160248, "learning_rate": 8.62304905964675e-05, "loss": 0.0068, "step": 13339 }, { "epoch": 2.582043343653251, "grad_norm": 0.04532432556152344, "learning_rate": 8.6228515098194e-05, "loss": 0.0072, "step": 13340 }, { "epoch": 2.5822368421052633, "grad_norm": 0.021518612280488014, "learning_rate": 8.622653948382057e-05, "loss": 0.0069, "step": 13341 }, { "epoch": 2.5824303405572753, "grad_norm": 0.04998287558555603, "learning_rate": 8.622456375335457e-05, "loss": 0.0067, "step": 13342 }, { "epoch": 2.5826238390092877, "grad_norm": 0.03604747727513313, "learning_rate": 8.622258790680335e-05, "loss": 0.0072, "step": 13343 }, { "epoch": 2.5828173374613, "grad_norm": 0.04043649882078171, "learning_rate": 8.622061194417422e-05, "loss": 0.007, "step": 13344 }, { "epoch": 2.5830108359133126, "grad_norm": 0.06973637640476227, "learning_rate": 8.621863586547455e-05, "loss": 0.0087, "step": 13345 }, { "epoch": 2.583204334365325, "grad_norm": 0.031576499342918396, "learning_rate": 8.621665967071168e-05, "loss": 0.0079, "step": 13346 }, { "epoch": 2.5833978328173375, "grad_norm": 0.07326182723045349, "learning_rate": 8.621468335989298e-05, "loss": 0.0057, "step": 13347 }, { "epoch": 2.58359133126935, "grad_norm": 0.03462991863489151, "learning_rate": 8.621270693302577e-05, "loss": 0.0077, "step": 13348 }, { "epoch": 2.5837848297213624, "grad_norm": 0.0736517459154129, "learning_rate": 8.62107303901174e-05, "loss": 0.0076, "step": 13349 }, { "epoch": 2.583978328173375, "grad_norm": 0.05735673010349274, "learning_rate": 8.620875373117522e-05, "loss": 0.007, "step": 13350 }, { "epoch": 2.5841718266253872, "grad_norm": 0.08513139933347702, "learning_rate": 8.62067769562066e-05, "loss": 0.007, "step": 13351 }, { "epoch": 2.5843653250773992, "grad_norm": 0.06454640626907349, "learning_rate": 8.620480006521887e-05, "loss": 0.0067, "step": 13352 }, { "epoch": 2.5845588235294117, "grad_norm": 0.07745055109262466, "learning_rate": 8.620282305821937e-05, "loss": 0.0067, "step": 13353 }, { "epoch": 2.584752321981424, "grad_norm": 0.06189202517271042, "learning_rate": 8.620084593521548e-05, "loss": 0.0077, "step": 13354 }, { "epoch": 2.5849458204334366, "grad_norm": 0.05594022572040558, "learning_rate": 8.619886869621453e-05, "loss": 0.0059, "step": 13355 }, { "epoch": 2.585139318885449, "grad_norm": 0.06531348079442978, "learning_rate": 8.619689134122388e-05, "loss": 0.0062, "step": 13356 }, { "epoch": 2.5853328173374615, "grad_norm": 0.06250976026058197, "learning_rate": 8.619491387025086e-05, "loss": 0.0074, "step": 13357 }, { "epoch": 2.5855263157894735, "grad_norm": 0.06629678606987, "learning_rate": 8.619293628330283e-05, "loss": 0.007, "step": 13358 }, { "epoch": 2.585719814241486, "grad_norm": 0.07633120566606522, "learning_rate": 8.619095858038717e-05, "loss": 0.0069, "step": 13359 }, { "epoch": 2.5859133126934983, "grad_norm": 0.036340635269880295, "learning_rate": 8.61889807615112e-05, "loss": 0.0066, "step": 13360 }, { "epoch": 2.5861068111455108, "grad_norm": 0.07233813405036926, "learning_rate": 8.618700282668228e-05, "loss": 0.0071, "step": 13361 }, { "epoch": 2.586300309597523, "grad_norm": 0.033385299146175385, "learning_rate": 8.618502477590777e-05, "loss": 0.0061, "step": 13362 }, { "epoch": 2.5864938080495357, "grad_norm": 0.06830821931362152, "learning_rate": 8.618304660919504e-05, "loss": 0.006, "step": 13363 }, { "epoch": 2.586687306501548, "grad_norm": 0.05194179341197014, "learning_rate": 8.61810683265514e-05, "loss": 0.0071, "step": 13364 }, { "epoch": 2.5868808049535605, "grad_norm": 0.047557100653648376, "learning_rate": 8.617908992798425e-05, "loss": 0.0067, "step": 13365 }, { "epoch": 2.587074303405573, "grad_norm": 0.058441244065761566, "learning_rate": 8.617711141350093e-05, "loss": 0.0074, "step": 13366 }, { "epoch": 2.587267801857585, "grad_norm": 0.04462122172117233, "learning_rate": 8.617513278310878e-05, "loss": 0.0078, "step": 13367 }, { "epoch": 2.5874613003095974, "grad_norm": 0.05205034092068672, "learning_rate": 8.617315403681517e-05, "loss": 0.0072, "step": 13368 }, { "epoch": 2.58765479876161, "grad_norm": 0.05811724811792374, "learning_rate": 8.617117517462745e-05, "loss": 0.0069, "step": 13369 }, { "epoch": 2.5878482972136223, "grad_norm": 0.036446113139390945, "learning_rate": 8.616919619655298e-05, "loss": 0.0071, "step": 13370 }, { "epoch": 2.5880417956656347, "grad_norm": 0.06403099745512009, "learning_rate": 8.616721710259913e-05, "loss": 0.0073, "step": 13371 }, { "epoch": 2.588235294117647, "grad_norm": 0.04492080956697464, "learning_rate": 8.616523789277323e-05, "loss": 0.0066, "step": 13372 }, { "epoch": 2.588428792569659, "grad_norm": 0.050682585686445236, "learning_rate": 8.616325856708267e-05, "loss": 0.0068, "step": 13373 }, { "epoch": 2.5886222910216716, "grad_norm": 0.05399812385439873, "learning_rate": 8.61612791255348e-05, "loss": 0.0081, "step": 13374 }, { "epoch": 2.588815789473684, "grad_norm": 0.08704613894224167, "learning_rate": 8.615929956813695e-05, "loss": 0.0068, "step": 13375 }, { "epoch": 2.5890092879256965, "grad_norm": 0.03825066611170769, "learning_rate": 8.615731989489651e-05, "loss": 0.0074, "step": 13376 }, { "epoch": 2.589202786377709, "grad_norm": 0.08259417861700058, "learning_rate": 8.615534010582084e-05, "loss": 0.0066, "step": 13377 }, { "epoch": 2.5893962848297214, "grad_norm": 0.05908811837434769, "learning_rate": 8.615336020091728e-05, "loss": 0.007, "step": 13378 }, { "epoch": 2.589589783281734, "grad_norm": 0.10203161090612411, "learning_rate": 8.615138018019323e-05, "loss": 0.0073, "step": 13379 }, { "epoch": 2.5897832817337463, "grad_norm": 0.08141858875751495, "learning_rate": 8.6149400043656e-05, "loss": 0.0066, "step": 13380 }, { "epoch": 2.5899767801857587, "grad_norm": 0.08248491585254669, "learning_rate": 8.614741979131299e-05, "loss": 0.0065, "step": 13381 }, { "epoch": 2.590170278637771, "grad_norm": 0.05380100756883621, "learning_rate": 8.614543942317154e-05, "loss": 0.0073, "step": 13382 }, { "epoch": 2.590363777089783, "grad_norm": 0.06111292541027069, "learning_rate": 8.614345893923902e-05, "loss": 0.0077, "step": 13383 }, { "epoch": 2.5905572755417956, "grad_norm": 0.032778456807136536, "learning_rate": 8.614147833952279e-05, "loss": 0.0071, "step": 13384 }, { "epoch": 2.590750773993808, "grad_norm": 0.05621891841292381, "learning_rate": 8.613949762403024e-05, "loss": 0.0061, "step": 13385 }, { "epoch": 2.5909442724458205, "grad_norm": 0.03315278887748718, "learning_rate": 8.613751679276869e-05, "loss": 0.0075, "step": 13386 }, { "epoch": 2.591137770897833, "grad_norm": 0.06536655873060226, "learning_rate": 8.613553584574552e-05, "loss": 0.0069, "step": 13387 }, { "epoch": 2.5913312693498454, "grad_norm": 0.04194313660264015, "learning_rate": 8.61335547829681e-05, "loss": 0.0079, "step": 13388 }, { "epoch": 2.5915247678018574, "grad_norm": 0.06637681275606155, "learning_rate": 8.61315736044438e-05, "loss": 0.0074, "step": 13389 }, { "epoch": 2.59171826625387, "grad_norm": 0.047327496111392975, "learning_rate": 8.612959231017999e-05, "loss": 0.0077, "step": 13390 }, { "epoch": 2.5919117647058822, "grad_norm": 0.06840575486421585, "learning_rate": 8.612761090018402e-05, "loss": 0.0083, "step": 13391 }, { "epoch": 2.5921052631578947, "grad_norm": 0.05995369702577591, "learning_rate": 8.612562937446325e-05, "loss": 0.0068, "step": 13392 }, { "epoch": 2.592298761609907, "grad_norm": 0.07907106727361679, "learning_rate": 8.612364773302508e-05, "loss": 0.0073, "step": 13393 }, { "epoch": 2.5924922600619196, "grad_norm": 0.07797836512327194, "learning_rate": 8.612166597587685e-05, "loss": 0.0079, "step": 13394 }, { "epoch": 2.592685758513932, "grad_norm": 0.09424726665019989, "learning_rate": 8.611968410302592e-05, "loss": 0.0056, "step": 13395 }, { "epoch": 2.5928792569659445, "grad_norm": 0.08935653418302536, "learning_rate": 8.611770211447969e-05, "loss": 0.0064, "step": 13396 }, { "epoch": 2.593072755417957, "grad_norm": 0.10386916995048523, "learning_rate": 8.611572001024551e-05, "loss": 0.0072, "step": 13397 }, { "epoch": 2.593266253869969, "grad_norm": 0.09144353866577148, "learning_rate": 8.611373779033074e-05, "loss": 0.0074, "step": 13398 }, { "epoch": 2.5934597523219813, "grad_norm": 0.09683986753225327, "learning_rate": 8.611175545474276e-05, "loss": 0.0066, "step": 13399 }, { "epoch": 2.593653250773994, "grad_norm": 0.11015166342258453, "learning_rate": 8.610977300348895e-05, "loss": 0.0068, "step": 13400 }, { "epoch": 2.593846749226006, "grad_norm": 0.08070053160190582, "learning_rate": 8.610779043657666e-05, "loss": 0.0072, "step": 13401 }, { "epoch": 2.5940402476780187, "grad_norm": 0.12396366149187088, "learning_rate": 8.610580775401328e-05, "loss": 0.0076, "step": 13402 }, { "epoch": 2.594233746130031, "grad_norm": 0.060909297317266464, "learning_rate": 8.610382495580617e-05, "loss": 0.0072, "step": 13403 }, { "epoch": 2.594427244582043, "grad_norm": 0.11762645840644836, "learning_rate": 8.61018420419627e-05, "loss": 0.0059, "step": 13404 }, { "epoch": 2.5946207430340555, "grad_norm": 0.08585414290428162, "learning_rate": 8.609985901249025e-05, "loss": 0.0064, "step": 13405 }, { "epoch": 2.594814241486068, "grad_norm": 0.07319196313619614, "learning_rate": 8.609787586739619e-05, "loss": 0.0069, "step": 13406 }, { "epoch": 2.5950077399380804, "grad_norm": 0.0746866837143898, "learning_rate": 8.609589260668786e-05, "loss": 0.0072, "step": 13407 }, { "epoch": 2.595201238390093, "grad_norm": 0.06136840954422951, "learning_rate": 8.609390923037269e-05, "loss": 0.0081, "step": 13408 }, { "epoch": 2.5953947368421053, "grad_norm": 0.08226807415485382, "learning_rate": 8.609192573845802e-05, "loss": 0.0075, "step": 13409 }, { "epoch": 2.5955882352941178, "grad_norm": 0.06990727037191391, "learning_rate": 8.608994213095122e-05, "loss": 0.007, "step": 13410 }, { "epoch": 2.59578173374613, "grad_norm": 0.06180575117468834, "learning_rate": 8.60879584078597e-05, "loss": 0.0069, "step": 13411 }, { "epoch": 2.5959752321981426, "grad_norm": 0.09251496195793152, "learning_rate": 8.608597456919081e-05, "loss": 0.0062, "step": 13412 }, { "epoch": 2.5961687306501546, "grad_norm": 0.03505516052246094, "learning_rate": 8.608399061495192e-05, "loss": 0.0064, "step": 13413 }, { "epoch": 2.596362229102167, "grad_norm": 0.09956321120262146, "learning_rate": 8.60820065451504e-05, "loss": 0.0073, "step": 13414 }, { "epoch": 2.5965557275541795, "grad_norm": 0.05435534566640854, "learning_rate": 8.608002235979365e-05, "loss": 0.0069, "step": 13415 }, { "epoch": 2.596749226006192, "grad_norm": 0.09359301626682281, "learning_rate": 8.607803805888903e-05, "loss": 0.007, "step": 13416 }, { "epoch": 2.5969427244582044, "grad_norm": 0.048520803451538086, "learning_rate": 8.607605364244393e-05, "loss": 0.0065, "step": 13417 }, { "epoch": 2.597136222910217, "grad_norm": 0.059740666300058365, "learning_rate": 8.60740691104657e-05, "loss": 0.0072, "step": 13418 }, { "epoch": 2.597329721362229, "grad_norm": 0.08782221376895905, "learning_rate": 8.607208446296177e-05, "loss": 0.0061, "step": 13419 }, { "epoch": 2.5975232198142413, "grad_norm": 0.042281605303287506, "learning_rate": 8.607009969993946e-05, "loss": 0.0076, "step": 13420 }, { "epoch": 2.5977167182662537, "grad_norm": 0.1241864413022995, "learning_rate": 8.606811482140618e-05, "loss": 0.0061, "step": 13421 }, { "epoch": 2.597910216718266, "grad_norm": 0.04079578444361687, "learning_rate": 8.606612982736933e-05, "loss": 0.0052, "step": 13422 }, { "epoch": 2.5981037151702786, "grad_norm": 0.07469043880701065, "learning_rate": 8.606414471783622e-05, "loss": 0.0063, "step": 13423 }, { "epoch": 2.598297213622291, "grad_norm": 0.08749416470527649, "learning_rate": 8.60621594928143e-05, "loss": 0.0084, "step": 13424 }, { "epoch": 2.5984907120743035, "grad_norm": 0.04744338244199753, "learning_rate": 8.606017415231093e-05, "loss": 0.0067, "step": 13425 }, { "epoch": 2.598684210526316, "grad_norm": 0.17407318949699402, "learning_rate": 8.605818869633347e-05, "loss": 0.0061, "step": 13426 }, { "epoch": 2.5988777089783284, "grad_norm": 0.04829676076769829, "learning_rate": 8.605620312488934e-05, "loss": 0.0081, "step": 13427 }, { "epoch": 2.599071207430341, "grad_norm": 0.14462777972221375, "learning_rate": 8.605421743798588e-05, "loss": 0.0065, "step": 13428 }, { "epoch": 2.599264705882353, "grad_norm": 0.08288999646902084, "learning_rate": 8.605223163563049e-05, "loss": 0.0084, "step": 13429 }, { "epoch": 2.5994582043343653, "grad_norm": 0.12792818248271942, "learning_rate": 8.605024571783057e-05, "loss": 0.0054, "step": 13430 }, { "epoch": 2.5996517027863777, "grad_norm": 0.1086166724562645, "learning_rate": 8.604825968459348e-05, "loss": 0.0086, "step": 13431 }, { "epoch": 2.59984520123839, "grad_norm": 0.09327618777751923, "learning_rate": 8.60462735359266e-05, "loss": 0.0072, "step": 13432 }, { "epoch": 2.6000386996904026, "grad_norm": 0.11018449068069458, "learning_rate": 8.604428727183735e-05, "loss": 0.0074, "step": 13433 }, { "epoch": 2.600232198142415, "grad_norm": 0.06595055758953094, "learning_rate": 8.604230089233306e-05, "loss": 0.0092, "step": 13434 }, { "epoch": 2.600425696594427, "grad_norm": 0.12921462953090668, "learning_rate": 8.604031439742115e-05, "loss": 0.007, "step": 13435 }, { "epoch": 2.6006191950464395, "grad_norm": 0.05213303491473198, "learning_rate": 8.603832778710903e-05, "loss": 0.0066, "step": 13436 }, { "epoch": 2.600812693498452, "grad_norm": 0.12363817542791367, "learning_rate": 8.603634106140401e-05, "loss": 0.0067, "step": 13437 }, { "epoch": 2.6010061919504643, "grad_norm": 0.026339950039982796, "learning_rate": 8.603435422031355e-05, "loss": 0.0068, "step": 13438 }, { "epoch": 2.601199690402477, "grad_norm": 0.10164738446474075, "learning_rate": 8.6032367263845e-05, "loss": 0.0081, "step": 13439 }, { "epoch": 2.6013931888544892, "grad_norm": 0.031174443662166595, "learning_rate": 8.603038019200575e-05, "loss": 0.0068, "step": 13440 }, { "epoch": 2.6015866873065017, "grad_norm": 0.08711804449558258, "learning_rate": 8.60283930048032e-05, "loss": 0.0072, "step": 13441 }, { "epoch": 2.601780185758514, "grad_norm": 0.10032517462968826, "learning_rate": 8.602640570224474e-05, "loss": 0.0067, "step": 13442 }, { "epoch": 2.6019736842105265, "grad_norm": 0.07902207970619202, "learning_rate": 8.602441828433774e-05, "loss": 0.0087, "step": 13443 }, { "epoch": 2.6021671826625385, "grad_norm": 0.11366825550794601, "learning_rate": 8.60224307510896e-05, "loss": 0.0062, "step": 13444 }, { "epoch": 2.602360681114551, "grad_norm": 0.06864620000123978, "learning_rate": 8.602044310250768e-05, "loss": 0.0071, "step": 13445 }, { "epoch": 2.6025541795665634, "grad_norm": 0.1179393082857132, "learning_rate": 8.601845533859943e-05, "loss": 0.0074, "step": 13446 }, { "epoch": 2.602747678018576, "grad_norm": 0.05106712132692337, "learning_rate": 8.601646745937218e-05, "loss": 0.0077, "step": 13447 }, { "epoch": 2.6029411764705883, "grad_norm": 0.11188806593418121, "learning_rate": 8.601447946483339e-05, "loss": 0.0069, "step": 13448 }, { "epoch": 2.6031346749226008, "grad_norm": 0.059937186539173126, "learning_rate": 8.601249135499036e-05, "loss": 0.0063, "step": 13449 }, { "epoch": 2.6033281733746128, "grad_norm": 0.07629076391458511, "learning_rate": 8.601050312985055e-05, "loss": 0.006, "step": 13450 }, { "epoch": 2.603521671826625, "grad_norm": 0.06898824125528336, "learning_rate": 8.600851478942133e-05, "loss": 0.0071, "step": 13451 }, { "epoch": 2.6037151702786376, "grad_norm": 0.05941547080874443, "learning_rate": 8.60065263337101e-05, "loss": 0.0068, "step": 13452 }, { "epoch": 2.60390866873065, "grad_norm": 0.04574136808514595, "learning_rate": 8.600453776272424e-05, "loss": 0.0064, "step": 13453 }, { "epoch": 2.6041021671826625, "grad_norm": 0.05863987281918526, "learning_rate": 8.600254907647113e-05, "loss": 0.0073, "step": 13454 }, { "epoch": 2.604295665634675, "grad_norm": 0.04059375450015068, "learning_rate": 8.60005602749582e-05, "loss": 0.008, "step": 13455 }, { "epoch": 2.6044891640866874, "grad_norm": 0.07553590834140778, "learning_rate": 8.599857135819283e-05, "loss": 0.0071, "step": 13456 }, { "epoch": 2.6046826625387, "grad_norm": 0.05490518733859062, "learning_rate": 8.599658232618241e-05, "loss": 0.0072, "step": 13457 }, { "epoch": 2.6048761609907123, "grad_norm": 0.07146021723747253, "learning_rate": 8.59945931789343e-05, "loss": 0.0073, "step": 13458 }, { "epoch": 2.6050696594427247, "grad_norm": 0.06430532038211823, "learning_rate": 8.599260391645596e-05, "loss": 0.0063, "step": 13459 }, { "epoch": 2.6052631578947367, "grad_norm": 0.09774135053157806, "learning_rate": 8.599061453875477e-05, "loss": 0.0062, "step": 13460 }, { "epoch": 2.605456656346749, "grad_norm": 0.060622893273830414, "learning_rate": 8.598862504583808e-05, "loss": 0.0057, "step": 13461 }, { "epoch": 2.6056501547987616, "grad_norm": 0.07718146592378616, "learning_rate": 8.598663543771333e-05, "loss": 0.0067, "step": 13462 }, { "epoch": 2.605843653250774, "grad_norm": 0.041126884520053864, "learning_rate": 8.59846457143879e-05, "loss": 0.0087, "step": 13463 }, { "epoch": 2.6060371517027865, "grad_norm": 0.059159055352211, "learning_rate": 8.598265587586922e-05, "loss": 0.0065, "step": 13464 }, { "epoch": 2.6062306501547985, "grad_norm": 0.04236210137605667, "learning_rate": 8.598066592216463e-05, "loss": 0.0069, "step": 13465 }, { "epoch": 2.606424148606811, "grad_norm": 0.05087704956531525, "learning_rate": 8.597867585328155e-05, "loss": 0.0067, "step": 13466 }, { "epoch": 2.6066176470588234, "grad_norm": 0.06065475195646286, "learning_rate": 8.59766856692274e-05, "loss": 0.0078, "step": 13467 }, { "epoch": 2.606811145510836, "grad_norm": 0.036788735538721085, "learning_rate": 8.597469537000957e-05, "loss": 0.0074, "step": 13468 }, { "epoch": 2.6070046439628483, "grad_norm": 0.13339227437973022, "learning_rate": 8.597270495563545e-05, "loss": 0.0088, "step": 13469 }, { "epoch": 2.6071981424148607, "grad_norm": 0.061468902975320816, "learning_rate": 8.597071442611246e-05, "loss": 0.0078, "step": 13470 }, { "epoch": 2.607391640866873, "grad_norm": 0.12647679448127747, "learning_rate": 8.596872378144797e-05, "loss": 0.007, "step": 13471 }, { "epoch": 2.6075851393188856, "grad_norm": 0.0632837638258934, "learning_rate": 8.59667330216494e-05, "loss": 0.0071, "step": 13472 }, { "epoch": 2.607778637770898, "grad_norm": 0.12737591564655304, "learning_rate": 8.596474214672415e-05, "loss": 0.0064, "step": 13473 }, { "epoch": 2.6079721362229105, "grad_norm": 0.07297208905220032, "learning_rate": 8.596275115667961e-05, "loss": 0.0066, "step": 13474 }, { "epoch": 2.6081656346749225, "grad_norm": 0.06086986884474754, "learning_rate": 8.596076005152321e-05, "loss": 0.0072, "step": 13475 }, { "epoch": 2.608359133126935, "grad_norm": 0.12529312074184418, "learning_rate": 8.595876883126232e-05, "loss": 0.006, "step": 13476 }, { "epoch": 2.6085526315789473, "grad_norm": 0.025233209133148193, "learning_rate": 8.595677749590434e-05, "loss": 0.0067, "step": 13477 }, { "epoch": 2.60874613003096, "grad_norm": 0.12625491619110107, "learning_rate": 8.59547860454567e-05, "loss": 0.0075, "step": 13478 }, { "epoch": 2.6089396284829722, "grad_norm": 0.05986976623535156, "learning_rate": 8.59527944799268e-05, "loss": 0.0058, "step": 13479 }, { "epoch": 2.6091331269349847, "grad_norm": 0.11729945987462997, "learning_rate": 8.595080279932203e-05, "loss": 0.0069, "step": 13480 }, { "epoch": 2.6093266253869967, "grad_norm": 0.07835809141397476, "learning_rate": 8.594881100364981e-05, "loss": 0.0067, "step": 13481 }, { "epoch": 2.609520123839009, "grad_norm": 0.04969019070267677, "learning_rate": 8.594681909291754e-05, "loss": 0.0077, "step": 13482 }, { "epoch": 2.6097136222910216, "grad_norm": 0.14567643404006958, "learning_rate": 8.59448270671326e-05, "loss": 0.007, "step": 13483 }, { "epoch": 2.609907120743034, "grad_norm": 0.120271235704422, "learning_rate": 8.594283492630242e-05, "loss": 0.007, "step": 13484 }, { "epoch": 2.6101006191950464, "grad_norm": 0.24369391798973083, "learning_rate": 8.594084267043443e-05, "loss": 0.006, "step": 13485 }, { "epoch": 2.610294117647059, "grad_norm": 0.08935204893350601, "learning_rate": 8.593885029953598e-05, "loss": 0.0062, "step": 13486 }, { "epoch": 2.6104876160990713, "grad_norm": 0.13173234462738037, "learning_rate": 8.593685781361453e-05, "loss": 0.0089, "step": 13487 }, { "epoch": 2.6106811145510838, "grad_norm": 0.15010565519332886, "learning_rate": 8.593486521267746e-05, "loss": 0.007, "step": 13488 }, { "epoch": 2.610874613003096, "grad_norm": 0.04503459855914116, "learning_rate": 8.593287249673219e-05, "loss": 0.0066, "step": 13489 }, { "epoch": 2.611068111455108, "grad_norm": 0.1520383059978485, "learning_rate": 8.593087966578611e-05, "loss": 0.0061, "step": 13490 }, { "epoch": 2.6112616099071206, "grad_norm": 0.04581652954220772, "learning_rate": 8.592888671984665e-05, "loss": 0.0086, "step": 13491 }, { "epoch": 2.611455108359133, "grad_norm": 0.07953424751758575, "learning_rate": 8.592689365892121e-05, "loss": 0.0059, "step": 13492 }, { "epoch": 2.6116486068111455, "grad_norm": 0.07879848778247833, "learning_rate": 8.592490048301719e-05, "loss": 0.0072, "step": 13493 }, { "epoch": 2.611842105263158, "grad_norm": 0.039578381925821304, "learning_rate": 8.592290719214203e-05, "loss": 0.0067, "step": 13494 }, { "epoch": 2.6120356037151704, "grad_norm": 0.09534615278244019, "learning_rate": 8.59209137863031e-05, "loss": 0.007, "step": 13495 }, { "epoch": 2.6122291021671824, "grad_norm": 0.04931594058871269, "learning_rate": 8.591892026550783e-05, "loss": 0.0057, "step": 13496 }, { "epoch": 2.612422600619195, "grad_norm": 0.03540399298071861, "learning_rate": 8.591692662976364e-05, "loss": 0.0057, "step": 13497 }, { "epoch": 2.6126160990712073, "grad_norm": 0.09357815980911255, "learning_rate": 8.591493287907795e-05, "loss": 0.0057, "step": 13498 }, { "epoch": 2.6128095975232197, "grad_norm": 0.02497752010822296, "learning_rate": 8.591293901345815e-05, "loss": 0.0058, "step": 13499 }, { "epoch": 2.613003095975232, "grad_norm": 0.10361578315496445, "learning_rate": 8.591094503291165e-05, "loss": 0.0074, "step": 13500 }, { "epoch": 2.6131965944272446, "grad_norm": 0.040913425385951996, "learning_rate": 8.590895093744587e-05, "loss": 0.0063, "step": 13501 }, { "epoch": 2.613390092879257, "grad_norm": 0.062069233506917953, "learning_rate": 8.590695672706824e-05, "loss": 0.0076, "step": 13502 }, { "epoch": 2.6135835913312695, "grad_norm": 0.11564268171787262, "learning_rate": 8.590496240178615e-05, "loss": 0.007, "step": 13503 }, { "epoch": 2.613777089783282, "grad_norm": 0.03175186365842819, "learning_rate": 8.590296796160705e-05, "loss": 0.0062, "step": 13504 }, { "epoch": 2.6139705882352944, "grad_norm": 0.09675098210573196, "learning_rate": 8.590097340653829e-05, "loss": 0.0077, "step": 13505 }, { "epoch": 2.6141640866873064, "grad_norm": 0.0420963428914547, "learning_rate": 8.589897873658734e-05, "loss": 0.0062, "step": 13506 }, { "epoch": 2.614357585139319, "grad_norm": 0.04990484565496445, "learning_rate": 8.589698395176163e-05, "loss": 0.0065, "step": 13507 }, { "epoch": 2.6145510835913313, "grad_norm": 0.09904174506664276, "learning_rate": 8.58949890520685e-05, "loss": 0.0075, "step": 13508 }, { "epoch": 2.6147445820433437, "grad_norm": 0.04766892269253731, "learning_rate": 8.589299403751545e-05, "loss": 0.0062, "step": 13509 }, { "epoch": 2.614938080495356, "grad_norm": 0.08649395406246185, "learning_rate": 8.589099890810984e-05, "loss": 0.0063, "step": 13510 }, { "epoch": 2.6151315789473686, "grad_norm": 0.03494790196418762, "learning_rate": 8.588900366385911e-05, "loss": 0.0063, "step": 13511 }, { "epoch": 2.6153250773993806, "grad_norm": 0.10702863335609436, "learning_rate": 8.588700830477068e-05, "loss": 0.0055, "step": 13512 }, { "epoch": 2.615518575851393, "grad_norm": 0.03764805570244789, "learning_rate": 8.588501283085196e-05, "loss": 0.007, "step": 13513 }, { "epoch": 2.6157120743034055, "grad_norm": 0.06184440478682518, "learning_rate": 8.588301724211037e-05, "loss": 0.0072, "step": 13514 }, { "epoch": 2.615905572755418, "grad_norm": 0.06322392076253891, "learning_rate": 8.588102153855334e-05, "loss": 0.0061, "step": 13515 }, { "epoch": 2.6160990712074303, "grad_norm": 0.08317699283361435, "learning_rate": 8.587902572018828e-05, "loss": 0.0068, "step": 13516 }, { "epoch": 2.616292569659443, "grad_norm": 0.04386841878294945, "learning_rate": 8.587702978702261e-05, "loss": 0.0076, "step": 13517 }, { "epoch": 2.6164860681114552, "grad_norm": 0.08247501403093338, "learning_rate": 8.587503373906376e-05, "loss": 0.0065, "step": 13518 }, { "epoch": 2.6166795665634677, "grad_norm": 0.04040692746639252, "learning_rate": 8.587303757631911e-05, "loss": 0.007, "step": 13519 }, { "epoch": 2.61687306501548, "grad_norm": 0.06456247717142105, "learning_rate": 8.587104129879615e-05, "loss": 0.0058, "step": 13520 }, { "epoch": 2.617066563467492, "grad_norm": 0.0470777302980423, "learning_rate": 8.586904490650225e-05, "loss": 0.0077, "step": 13521 }, { "epoch": 2.6172600619195046, "grad_norm": 0.05892947316169739, "learning_rate": 8.586704839944485e-05, "loss": 0.0067, "step": 13522 }, { "epoch": 2.617453560371517, "grad_norm": 0.03456278145313263, "learning_rate": 8.586505177763138e-05, "loss": 0.0079, "step": 13523 }, { "epoch": 2.6176470588235294, "grad_norm": 0.10133498162031174, "learning_rate": 8.586305504106924e-05, "loss": 0.0077, "step": 13524 }, { "epoch": 2.617840557275542, "grad_norm": 0.09055618941783905, "learning_rate": 8.586105818976587e-05, "loss": 0.0075, "step": 13525 }, { "epoch": 2.6180340557275543, "grad_norm": 0.05526036396622658, "learning_rate": 8.585906122372869e-05, "loss": 0.0075, "step": 13526 }, { "epoch": 2.6182275541795663, "grad_norm": 0.08843294531106949, "learning_rate": 8.585706414296511e-05, "loss": 0.0064, "step": 13527 }, { "epoch": 2.6184210526315788, "grad_norm": 0.08103082329034805, "learning_rate": 8.585506694748259e-05, "loss": 0.0064, "step": 13528 }, { "epoch": 2.618614551083591, "grad_norm": 0.08300565928220749, "learning_rate": 8.585306963728854e-05, "loss": 0.0064, "step": 13529 }, { "epoch": 2.6188080495356036, "grad_norm": 0.09046165645122528, "learning_rate": 8.585107221239035e-05, "loss": 0.0078, "step": 13530 }, { "epoch": 2.619001547987616, "grad_norm": 0.06088393181562424, "learning_rate": 8.584907467279549e-05, "loss": 0.0081, "step": 13531 }, { "epoch": 2.6191950464396285, "grad_norm": 0.10657478123903275, "learning_rate": 8.584707701851138e-05, "loss": 0.009, "step": 13532 }, { "epoch": 2.619388544891641, "grad_norm": 0.08847777545452118, "learning_rate": 8.584507924954542e-05, "loss": 0.0077, "step": 13533 }, { "epoch": 2.6195820433436534, "grad_norm": 0.09852742403745651, "learning_rate": 8.584308136590506e-05, "loss": 0.0064, "step": 13534 }, { "epoch": 2.619775541795666, "grad_norm": 0.07090464234352112, "learning_rate": 8.584108336759774e-05, "loss": 0.0076, "step": 13535 }, { "epoch": 2.6199690402476783, "grad_norm": 0.10653577744960785, "learning_rate": 8.583908525463086e-05, "loss": 0.0061, "step": 13536 }, { "epoch": 2.6201625386996903, "grad_norm": 0.07396112382411957, "learning_rate": 8.583708702701187e-05, "loss": 0.0074, "step": 13537 }, { "epoch": 2.6203560371517027, "grad_norm": 0.12715300917625427, "learning_rate": 8.583508868474818e-05, "loss": 0.0064, "step": 13538 }, { "epoch": 2.620549535603715, "grad_norm": 0.040184348821640015, "learning_rate": 8.583309022784722e-05, "loss": 0.0074, "step": 13539 }, { "epoch": 2.6207430340557276, "grad_norm": 0.05237365886569023, "learning_rate": 8.583109165631645e-05, "loss": 0.0067, "step": 13540 }, { "epoch": 2.62093653250774, "grad_norm": 0.0525171235203743, "learning_rate": 8.582909297016325e-05, "loss": 0.0073, "step": 13541 }, { "epoch": 2.621130030959752, "grad_norm": 0.018310999497771263, "learning_rate": 8.58270941693951e-05, "loss": 0.0049, "step": 13542 }, { "epoch": 2.6213235294117645, "grad_norm": 0.05931204929947853, "learning_rate": 8.582509525401939e-05, "loss": 0.0072, "step": 13543 }, { "epoch": 2.621517027863777, "grad_norm": 0.033451031893491745, "learning_rate": 8.582309622404359e-05, "loss": 0.007, "step": 13544 }, { "epoch": 2.6217105263157894, "grad_norm": 0.057690396904945374, "learning_rate": 8.58210970794751e-05, "loss": 0.0068, "step": 13545 }, { "epoch": 2.621904024767802, "grad_norm": 0.04813654348254204, "learning_rate": 8.58190978203214e-05, "loss": 0.0071, "step": 13546 }, { "epoch": 2.6220975232198143, "grad_norm": 0.0573716014623642, "learning_rate": 8.581709844658985e-05, "loss": 0.0065, "step": 13547 }, { "epoch": 2.6222910216718267, "grad_norm": 0.039781246334314346, "learning_rate": 8.581509895828793e-05, "loss": 0.0075, "step": 13548 }, { "epoch": 2.622484520123839, "grad_norm": 0.04254777729511261, "learning_rate": 8.581309935542307e-05, "loss": 0.0063, "step": 13549 }, { "epoch": 2.6226780185758516, "grad_norm": 0.0462811216711998, "learning_rate": 8.58110996380027e-05, "loss": 0.0064, "step": 13550 }, { "epoch": 2.622871517027864, "grad_norm": 0.04145253077149391, "learning_rate": 8.580909980603424e-05, "loss": 0.0061, "step": 13551 }, { "epoch": 2.623065015479876, "grad_norm": 0.04574514552950859, "learning_rate": 8.580709985952515e-05, "loss": 0.0071, "step": 13552 }, { "epoch": 2.6232585139318885, "grad_norm": 0.06451213359832764, "learning_rate": 8.580509979848286e-05, "loss": 0.0072, "step": 13553 }, { "epoch": 2.623452012383901, "grad_norm": 0.04127398878335953, "learning_rate": 8.58030996229148e-05, "loss": 0.0078, "step": 13554 }, { "epoch": 2.6236455108359134, "grad_norm": 0.07766008377075195, "learning_rate": 8.58010993328284e-05, "loss": 0.0064, "step": 13555 }, { "epoch": 2.623839009287926, "grad_norm": 0.02825484238564968, "learning_rate": 8.57990989282311e-05, "loss": 0.0061, "step": 13556 }, { "epoch": 2.6240325077399382, "grad_norm": 0.07404767721891403, "learning_rate": 8.579709840913032e-05, "loss": 0.0069, "step": 13557 }, { "epoch": 2.6242260061919502, "grad_norm": 0.03661109879612923, "learning_rate": 8.579509777553355e-05, "loss": 0.0061, "step": 13558 }, { "epoch": 2.6244195046439627, "grad_norm": 0.11832328140735626, "learning_rate": 8.579309702744818e-05, "loss": 0.0078, "step": 13559 }, { "epoch": 2.624613003095975, "grad_norm": 0.08439269661903381, "learning_rate": 8.579109616488166e-05, "loss": 0.007, "step": 13560 }, { "epoch": 2.6248065015479876, "grad_norm": 0.07345297187566757, "learning_rate": 8.578909518784144e-05, "loss": 0.0063, "step": 13561 }, { "epoch": 2.625, "grad_norm": 0.05953943356871605, "learning_rate": 8.578709409633494e-05, "loss": 0.0061, "step": 13562 }, { "epoch": 2.6251934984520124, "grad_norm": 0.08747284859418869, "learning_rate": 8.578509289036962e-05, "loss": 0.0074, "step": 13563 }, { "epoch": 2.625386996904025, "grad_norm": 0.04155397042632103, "learning_rate": 8.578309156995292e-05, "loss": 0.007, "step": 13564 }, { "epoch": 2.6255804953560373, "grad_norm": 0.08688192069530487, "learning_rate": 8.578109013509224e-05, "loss": 0.0062, "step": 13565 }, { "epoch": 2.6257739938080498, "grad_norm": 0.06144116818904877, "learning_rate": 8.577908858579508e-05, "loss": 0.0073, "step": 13566 }, { "epoch": 2.6259674922600618, "grad_norm": 0.08666546642780304, "learning_rate": 8.577708692206884e-05, "loss": 0.0086, "step": 13567 }, { "epoch": 2.626160990712074, "grad_norm": 0.09337396919727325, "learning_rate": 8.577508514392098e-05, "loss": 0.0065, "step": 13568 }, { "epoch": 2.6263544891640866, "grad_norm": 0.08599600940942764, "learning_rate": 8.577308325135894e-05, "loss": 0.0062, "step": 13569 }, { "epoch": 2.626547987616099, "grad_norm": 0.1649218201637268, "learning_rate": 8.577108124439015e-05, "loss": 0.0065, "step": 13570 }, { "epoch": 2.6267414860681115, "grad_norm": 0.09286695718765259, "learning_rate": 8.576907912302206e-05, "loss": 0.006, "step": 13571 }, { "epoch": 2.626934984520124, "grad_norm": 0.1717040091753006, "learning_rate": 8.576707688726212e-05, "loss": 0.0092, "step": 13572 }, { "epoch": 2.627128482972136, "grad_norm": 0.05803735554218292, "learning_rate": 8.576507453711777e-05, "loss": 0.0069, "step": 13573 }, { "epoch": 2.6273219814241484, "grad_norm": 0.1475435048341751, "learning_rate": 8.576307207259645e-05, "loss": 0.0064, "step": 13574 }, { "epoch": 2.627515479876161, "grad_norm": 0.06569478660821915, "learning_rate": 8.576106949370561e-05, "loss": 0.0073, "step": 13575 }, { "epoch": 2.6277089783281733, "grad_norm": 0.13149358332157135, "learning_rate": 8.575906680045269e-05, "loss": 0.0075, "step": 13576 }, { "epoch": 2.6279024767801857, "grad_norm": 0.087716244161129, "learning_rate": 8.575706399284515e-05, "loss": 0.0075, "step": 13577 }, { "epoch": 2.628095975232198, "grad_norm": 0.10760682076215744, "learning_rate": 8.57550610708904e-05, "loss": 0.0071, "step": 13578 }, { "epoch": 2.6282894736842106, "grad_norm": 0.08709559589624405, "learning_rate": 8.575305803459595e-05, "loss": 0.006, "step": 13579 }, { "epoch": 2.628482972136223, "grad_norm": 0.08450505137443542, "learning_rate": 8.575105488396918e-05, "loss": 0.0084, "step": 13580 }, { "epoch": 2.6286764705882355, "grad_norm": 0.08938004076480865, "learning_rate": 8.574905161901757e-05, "loss": 0.0055, "step": 13581 }, { "epoch": 2.628869969040248, "grad_norm": 0.05148419737815857, "learning_rate": 8.574704823974857e-05, "loss": 0.0064, "step": 13582 }, { "epoch": 2.62906346749226, "grad_norm": 0.08855399489402771, "learning_rate": 8.57450447461696e-05, "loss": 0.0075, "step": 13583 }, { "epoch": 2.6292569659442724, "grad_norm": 0.06683109700679779, "learning_rate": 8.574304113828817e-05, "loss": 0.007, "step": 13584 }, { "epoch": 2.629450464396285, "grad_norm": 0.08276914805173874, "learning_rate": 8.574103741611165e-05, "loss": 0.0071, "step": 13585 }, { "epoch": 2.6296439628482973, "grad_norm": 0.07582561671733856, "learning_rate": 8.573903357964754e-05, "loss": 0.0072, "step": 13586 }, { "epoch": 2.6298374613003097, "grad_norm": 0.0658862516283989, "learning_rate": 8.573702962890328e-05, "loss": 0.0065, "step": 13587 }, { "epoch": 2.6300309597523217, "grad_norm": 0.07269846647977829, "learning_rate": 8.573502556388633e-05, "loss": 0.0086, "step": 13588 }, { "epoch": 2.630224458204334, "grad_norm": 0.0823959931731224, "learning_rate": 8.573302138460409e-05, "loss": 0.0072, "step": 13589 }, { "epoch": 2.6304179566563466, "grad_norm": 0.06320079416036606, "learning_rate": 8.573101709106409e-05, "loss": 0.0073, "step": 13590 }, { "epoch": 2.630611455108359, "grad_norm": 0.06309346109628677, "learning_rate": 8.572901268327372e-05, "loss": 0.007, "step": 13591 }, { "epoch": 2.6308049535603715, "grad_norm": 0.057408157736063004, "learning_rate": 8.572700816124046e-05, "loss": 0.0073, "step": 13592 }, { "epoch": 2.630998452012384, "grad_norm": 0.04523666948080063, "learning_rate": 8.572500352497177e-05, "loss": 0.006, "step": 13593 }, { "epoch": 2.6311919504643964, "grad_norm": 0.06710377335548401, "learning_rate": 8.572299877447506e-05, "loss": 0.0075, "step": 13594 }, { "epoch": 2.631385448916409, "grad_norm": 0.044332314282655716, "learning_rate": 8.572099390975781e-05, "loss": 0.007, "step": 13595 }, { "epoch": 2.6315789473684212, "grad_norm": 0.05283458158373833, "learning_rate": 8.57189889308275e-05, "loss": 0.0071, "step": 13596 }, { "epoch": 2.6317724458204337, "grad_norm": 0.04953937232494354, "learning_rate": 8.571698383769154e-05, "loss": 0.0068, "step": 13597 }, { "epoch": 2.6319659442724457, "grad_norm": 0.03548550605773926, "learning_rate": 8.571497863035741e-05, "loss": 0.0069, "step": 13598 }, { "epoch": 2.632159442724458, "grad_norm": 0.05089037865400314, "learning_rate": 8.571297330883255e-05, "loss": 0.0057, "step": 13599 }, { "epoch": 2.6323529411764706, "grad_norm": 0.022564809769392014, "learning_rate": 8.571096787312442e-05, "loss": 0.0056, "step": 13600 }, { "epoch": 2.632546439628483, "grad_norm": 0.05552217364311218, "learning_rate": 8.570896232324049e-05, "loss": 0.0067, "step": 13601 }, { "epoch": 2.6327399380804954, "grad_norm": 0.041600991040468216, "learning_rate": 8.57069566591882e-05, "loss": 0.0049, "step": 13602 }, { "epoch": 2.632933436532508, "grad_norm": 0.07176625728607178, "learning_rate": 8.570495088097501e-05, "loss": 0.0072, "step": 13603 }, { "epoch": 2.63312693498452, "grad_norm": 0.0787511020898819, "learning_rate": 8.570294498860837e-05, "loss": 0.0072, "step": 13604 }, { "epoch": 2.6333204334365323, "grad_norm": 0.07292235642671585, "learning_rate": 8.570093898209577e-05, "loss": 0.007, "step": 13605 }, { "epoch": 2.6335139318885448, "grad_norm": 0.06170840561389923, "learning_rate": 8.569893286144463e-05, "loss": 0.0067, "step": 13606 }, { "epoch": 2.633707430340557, "grad_norm": 0.07534520328044891, "learning_rate": 8.569692662666242e-05, "loss": 0.0068, "step": 13607 }, { "epoch": 2.6339009287925697, "grad_norm": 0.040467873215675354, "learning_rate": 8.569492027775661e-05, "loss": 0.0068, "step": 13608 }, { "epoch": 2.634094427244582, "grad_norm": 0.0917520597577095, "learning_rate": 8.569291381473465e-05, "loss": 0.0065, "step": 13609 }, { "epoch": 2.6342879256965945, "grad_norm": 0.030464746057987213, "learning_rate": 8.569090723760397e-05, "loss": 0.0069, "step": 13610 }, { "epoch": 2.634481424148607, "grad_norm": 0.060068584978580475, "learning_rate": 8.56889005463721e-05, "loss": 0.0075, "step": 13611 }, { "epoch": 2.6346749226006194, "grad_norm": 0.07002129405736923, "learning_rate": 8.568689374104645e-05, "loss": 0.0062, "step": 13612 }, { "epoch": 2.6348684210526314, "grad_norm": 0.044161733239889145, "learning_rate": 8.568488682163448e-05, "loss": 0.006, "step": 13613 }, { "epoch": 2.635061919504644, "grad_norm": 0.05259659141302109, "learning_rate": 8.568287978814366e-05, "loss": 0.0094, "step": 13614 }, { "epoch": 2.6352554179566563, "grad_norm": 0.061066921800374985, "learning_rate": 8.568087264058145e-05, "loss": 0.0077, "step": 13615 }, { "epoch": 2.6354489164086687, "grad_norm": 0.021044088527560234, "learning_rate": 8.567886537895535e-05, "loss": 0.0053, "step": 13616 }, { "epoch": 2.635642414860681, "grad_norm": 0.07302552461624146, "learning_rate": 8.567685800327274e-05, "loss": 0.0066, "step": 13617 }, { "epoch": 2.6358359133126936, "grad_norm": 0.04819733276963234, "learning_rate": 8.567485051354117e-05, "loss": 0.0073, "step": 13618 }, { "epoch": 2.6360294117647056, "grad_norm": 0.03657551482319832, "learning_rate": 8.567284290976803e-05, "loss": 0.0056, "step": 13619 }, { "epoch": 2.636222910216718, "grad_norm": 0.08461883664131165, "learning_rate": 8.567083519196084e-05, "loss": 0.0063, "step": 13620 }, { "epoch": 2.6364164086687305, "grad_norm": 0.02588752470910549, "learning_rate": 8.566882736012703e-05, "loss": 0.0065, "step": 13621 }, { "epoch": 2.636609907120743, "grad_norm": 0.08409348875284195, "learning_rate": 8.566681941427411e-05, "loss": 0.0067, "step": 13622 }, { "epoch": 2.6368034055727554, "grad_norm": 0.054967835545539856, "learning_rate": 8.566481135440946e-05, "loss": 0.007, "step": 13623 }, { "epoch": 2.636996904024768, "grad_norm": 0.07674837112426758, "learning_rate": 8.566280318054062e-05, "loss": 0.0061, "step": 13624 }, { "epoch": 2.6371904024767803, "grad_norm": 0.06461982429027557, "learning_rate": 8.566079489267504e-05, "loss": 0.006, "step": 13625 }, { "epoch": 2.6373839009287927, "grad_norm": 0.030909638851881027, "learning_rate": 8.565878649082018e-05, "loss": 0.0077, "step": 13626 }, { "epoch": 2.637577399380805, "grad_norm": 0.07512884587049484, "learning_rate": 8.565677797498349e-05, "loss": 0.0075, "step": 13627 }, { "epoch": 2.6377708978328176, "grad_norm": 0.06486186385154724, "learning_rate": 8.565476934517247e-05, "loss": 0.0064, "step": 13628 }, { "epoch": 2.6379643962848296, "grad_norm": 0.05554869771003723, "learning_rate": 8.565276060139457e-05, "loss": 0.0066, "step": 13629 }, { "epoch": 2.638157894736842, "grad_norm": 0.06504839658737183, "learning_rate": 8.565075174365727e-05, "loss": 0.0081, "step": 13630 }, { "epoch": 2.6383513931888545, "grad_norm": 0.08782907575368881, "learning_rate": 8.5648742771968e-05, "loss": 0.0058, "step": 13631 }, { "epoch": 2.638544891640867, "grad_norm": 0.09278924018144608, "learning_rate": 8.564673368633427e-05, "loss": 0.0064, "step": 13632 }, { "epoch": 2.6387383900928794, "grad_norm": 0.06791693717241287, "learning_rate": 8.564472448676355e-05, "loss": 0.0075, "step": 13633 }, { "epoch": 2.638931888544892, "grad_norm": 0.06623119860887527, "learning_rate": 8.564271517326327e-05, "loss": 0.0062, "step": 13634 }, { "epoch": 2.639125386996904, "grad_norm": 0.05426763743162155, "learning_rate": 8.564070574584093e-05, "loss": 0.0064, "step": 13635 }, { "epoch": 2.6393188854489162, "grad_norm": 0.05746486037969589, "learning_rate": 8.563869620450401e-05, "loss": 0.0076, "step": 13636 }, { "epoch": 2.6395123839009287, "grad_norm": 0.04036272317171097, "learning_rate": 8.563668654925995e-05, "loss": 0.0075, "step": 13637 }, { "epoch": 2.639705882352941, "grad_norm": 0.06235058978199959, "learning_rate": 8.563467678011626e-05, "loss": 0.0066, "step": 13638 }, { "epoch": 2.6398993808049536, "grad_norm": 0.06275325268507004, "learning_rate": 8.563266689708036e-05, "loss": 0.0064, "step": 13639 }, { "epoch": 2.640092879256966, "grad_norm": 0.08006224781274796, "learning_rate": 8.563065690015979e-05, "loss": 0.0089, "step": 13640 }, { "epoch": 2.6402863777089784, "grad_norm": 0.03256966173648834, "learning_rate": 8.562864678936196e-05, "loss": 0.0084, "step": 13641 }, { "epoch": 2.640479876160991, "grad_norm": 0.07156343013048172, "learning_rate": 8.562663656469437e-05, "loss": 0.0076, "step": 13642 }, { "epoch": 2.6406733746130033, "grad_norm": 0.04302803799510002, "learning_rate": 8.56246262261645e-05, "loss": 0.0068, "step": 13643 }, { "epoch": 2.6408668730650153, "grad_norm": 0.03951249644160271, "learning_rate": 8.562261577377983e-05, "loss": 0.0069, "step": 13644 }, { "epoch": 2.6410603715170278, "grad_norm": 0.07038403302431107, "learning_rate": 8.562060520754778e-05, "loss": 0.0069, "step": 13645 }, { "epoch": 2.64125386996904, "grad_norm": 0.03079988807439804, "learning_rate": 8.561859452747591e-05, "loss": 0.0085, "step": 13646 }, { "epoch": 2.6414473684210527, "grad_norm": 0.06225409358739853, "learning_rate": 8.561658373357163e-05, "loss": 0.0067, "step": 13647 }, { "epoch": 2.641640866873065, "grad_norm": 0.05488916486501694, "learning_rate": 8.561457282584241e-05, "loss": 0.0072, "step": 13648 }, { "epoch": 2.6418343653250775, "grad_norm": 0.11082855612039566, "learning_rate": 8.561256180429577e-05, "loss": 0.0055, "step": 13649 }, { "epoch": 2.6420278637770895, "grad_norm": 0.0408426895737648, "learning_rate": 8.561055066893919e-05, "loss": 0.0075, "step": 13650 }, { "epoch": 2.642221362229102, "grad_norm": 0.08577492833137512, "learning_rate": 8.56085394197801e-05, "loss": 0.007, "step": 13651 }, { "epoch": 2.6424148606811144, "grad_norm": 0.06809111684560776, "learning_rate": 8.5606528056826e-05, "loss": 0.0067, "step": 13652 }, { "epoch": 2.642608359133127, "grad_norm": 0.05227355286478996, "learning_rate": 8.560451658008438e-05, "loss": 0.0064, "step": 13653 }, { "epoch": 2.6428018575851393, "grad_norm": 0.08350130170583725, "learning_rate": 8.560250498956271e-05, "loss": 0.0071, "step": 13654 }, { "epoch": 2.6429953560371517, "grad_norm": 0.07402388751506805, "learning_rate": 8.560049328526847e-05, "loss": 0.0074, "step": 13655 }, { "epoch": 2.643188854489164, "grad_norm": 0.0619020089507103, "learning_rate": 8.559848146720914e-05, "loss": 0.0073, "step": 13656 }, { "epoch": 2.6433823529411766, "grad_norm": 0.08019481599330902, "learning_rate": 8.559646953539219e-05, "loss": 0.0072, "step": 13657 }, { "epoch": 2.643575851393189, "grad_norm": 0.07041653245687485, "learning_rate": 8.55944574898251e-05, "loss": 0.0073, "step": 13658 }, { "epoch": 2.6437693498452015, "grad_norm": 0.0422450453042984, "learning_rate": 8.559244533051536e-05, "loss": 0.0074, "step": 13659 }, { "epoch": 2.6439628482972135, "grad_norm": 0.05028913542628288, "learning_rate": 8.559043305747046e-05, "loss": 0.007, "step": 13660 }, { "epoch": 2.644156346749226, "grad_norm": 0.04777681455016136, "learning_rate": 8.558842067069784e-05, "loss": 0.008, "step": 13661 }, { "epoch": 2.6443498452012384, "grad_norm": 0.04864852875471115, "learning_rate": 8.558640817020501e-05, "loss": 0.0069, "step": 13662 }, { "epoch": 2.644543343653251, "grad_norm": 0.059869080781936646, "learning_rate": 8.558439555599948e-05, "loss": 0.0058, "step": 13663 }, { "epoch": 2.6447368421052633, "grad_norm": 0.04998587444424629, "learning_rate": 8.558238282808868e-05, "loss": 0.0074, "step": 13664 }, { "epoch": 2.6449303405572753, "grad_norm": 0.03743642196059227, "learning_rate": 8.558036998648014e-05, "loss": 0.006, "step": 13665 }, { "epoch": 2.6451238390092877, "grad_norm": 0.048759136348962784, "learning_rate": 8.55783570311813e-05, "loss": 0.0062, "step": 13666 }, { "epoch": 2.6453173374613, "grad_norm": 0.04682046175003052, "learning_rate": 8.557634396219966e-05, "loss": 0.0062, "step": 13667 }, { "epoch": 2.6455108359133126, "grad_norm": 0.0661325454711914, "learning_rate": 8.557433077954272e-05, "loss": 0.0066, "step": 13668 }, { "epoch": 2.645704334365325, "grad_norm": 0.05473761633038521, "learning_rate": 8.557231748321795e-05, "loss": 0.0086, "step": 13669 }, { "epoch": 2.6458978328173375, "grad_norm": 0.03422101214528084, "learning_rate": 8.557030407323283e-05, "loss": 0.0059, "step": 13670 }, { "epoch": 2.64609133126935, "grad_norm": 0.06170567125082016, "learning_rate": 8.556829054959485e-05, "loss": 0.0063, "step": 13671 }, { "epoch": 2.6462848297213624, "grad_norm": 0.057716213166713715, "learning_rate": 8.55662769123115e-05, "loss": 0.0087, "step": 13672 }, { "epoch": 2.646478328173375, "grad_norm": 0.11342728137969971, "learning_rate": 8.556426316139029e-05, "loss": 0.0074, "step": 13673 }, { "epoch": 2.6466718266253872, "grad_norm": 0.05738211050629616, "learning_rate": 8.556224929683863e-05, "loss": 0.0069, "step": 13674 }, { "epoch": 2.6468653250773992, "grad_norm": 0.10159085690975189, "learning_rate": 8.556023531866408e-05, "loss": 0.0067, "step": 13675 }, { "epoch": 2.6470588235294117, "grad_norm": 0.06460481882095337, "learning_rate": 8.555822122687411e-05, "loss": 0.0067, "step": 13676 }, { "epoch": 2.647252321981424, "grad_norm": 0.09212172776460648, "learning_rate": 8.555620702147621e-05, "loss": 0.0072, "step": 13677 }, { "epoch": 2.6474458204334366, "grad_norm": 0.08284354209899902, "learning_rate": 8.555419270247786e-05, "loss": 0.0071, "step": 13678 }, { "epoch": 2.647639318885449, "grad_norm": 0.0557556077837944, "learning_rate": 8.555217826988655e-05, "loss": 0.0058, "step": 13679 }, { "epoch": 2.6478328173374615, "grad_norm": 0.07675811648368835, "learning_rate": 8.555016372370975e-05, "loss": 0.0067, "step": 13680 }, { "epoch": 2.6480263157894735, "grad_norm": 0.03639645129442215, "learning_rate": 8.554814906395499e-05, "loss": 0.0069, "step": 13681 }, { "epoch": 2.648219814241486, "grad_norm": 0.07022400200366974, "learning_rate": 8.554613429062973e-05, "loss": 0.0063, "step": 13682 }, { "epoch": 2.6484133126934983, "grad_norm": 0.04687417671084404, "learning_rate": 8.554411940374147e-05, "loss": 0.0073, "step": 13683 }, { "epoch": 2.6486068111455108, "grad_norm": 0.056720953434705734, "learning_rate": 8.554210440329771e-05, "loss": 0.0068, "step": 13684 }, { "epoch": 2.648800309597523, "grad_norm": 0.05838649719953537, "learning_rate": 8.554008928930593e-05, "loss": 0.0075, "step": 13685 }, { "epoch": 2.6489938080495357, "grad_norm": 0.06598248332738876, "learning_rate": 8.553807406177362e-05, "loss": 0.0069, "step": 13686 }, { "epoch": 2.649187306501548, "grad_norm": 0.05210283026099205, "learning_rate": 8.553605872070827e-05, "loss": 0.0076, "step": 13687 }, { "epoch": 2.6493808049535605, "grad_norm": 0.09711194038391113, "learning_rate": 8.55340432661174e-05, "loss": 0.008, "step": 13688 }, { "epoch": 2.649574303405573, "grad_norm": 0.06920956075191498, "learning_rate": 8.553202769800844e-05, "loss": 0.007, "step": 13689 }, { "epoch": 2.649767801857585, "grad_norm": 0.0799383595585823, "learning_rate": 8.553001201638896e-05, "loss": 0.0064, "step": 13690 }, { "epoch": 2.6499613003095974, "grad_norm": 0.09523548930883408, "learning_rate": 8.552799622126639e-05, "loss": 0.0074, "step": 13691 }, { "epoch": 2.65015479876161, "grad_norm": 0.0839572325348854, "learning_rate": 8.552598031264828e-05, "loss": 0.0067, "step": 13692 }, { "epoch": 2.6503482972136223, "grad_norm": 0.11134294420480728, "learning_rate": 8.552396429054207e-05, "loss": 0.0079, "step": 13693 }, { "epoch": 2.6505417956656347, "grad_norm": 0.046562936156988144, "learning_rate": 8.55219481549553e-05, "loss": 0.0064, "step": 13694 }, { "epoch": 2.650735294117647, "grad_norm": 0.11600013077259064, "learning_rate": 8.551993190589546e-05, "loss": 0.0079, "step": 13695 }, { "epoch": 2.650928792569659, "grad_norm": 0.02488231286406517, "learning_rate": 8.551791554337001e-05, "loss": 0.0057, "step": 13696 }, { "epoch": 2.6511222910216716, "grad_norm": 0.09937707334756851, "learning_rate": 8.551589906738647e-05, "loss": 0.0074, "step": 13697 }, { "epoch": 2.651315789473684, "grad_norm": 0.06436362862586975, "learning_rate": 8.551388247795233e-05, "loss": 0.0072, "step": 13698 }, { "epoch": 2.6515092879256965, "grad_norm": 0.07461507618427277, "learning_rate": 8.551186577507511e-05, "loss": 0.0057, "step": 13699 }, { "epoch": 2.651702786377709, "grad_norm": 0.09704068303108215, "learning_rate": 8.55098489587623e-05, "loss": 0.0067, "step": 13700 }, { "epoch": 2.6518962848297214, "grad_norm": 0.0710533931851387, "learning_rate": 8.550783202902137e-05, "loss": 0.008, "step": 13701 }, { "epoch": 2.652089783281734, "grad_norm": 0.08991610258817673, "learning_rate": 8.550581498585985e-05, "loss": 0.0069, "step": 13702 }, { "epoch": 2.6522832817337463, "grad_norm": 0.03369719907641411, "learning_rate": 8.550379782928522e-05, "loss": 0.0066, "step": 13703 }, { "epoch": 2.6524767801857587, "grad_norm": 0.11083530634641647, "learning_rate": 8.550178055930498e-05, "loss": 0.0062, "step": 13704 }, { "epoch": 2.652670278637771, "grad_norm": 0.05047587305307388, "learning_rate": 8.549976317592665e-05, "loss": 0.0062, "step": 13705 }, { "epoch": 2.652863777089783, "grad_norm": 0.07215649634599686, "learning_rate": 8.549774567915771e-05, "loss": 0.0065, "step": 13706 }, { "epoch": 2.6530572755417956, "grad_norm": 0.04990946128964424, "learning_rate": 8.549572806900566e-05, "loss": 0.0076, "step": 13707 }, { "epoch": 2.653250773993808, "grad_norm": 0.07190926373004913, "learning_rate": 8.549371034547803e-05, "loss": 0.0068, "step": 13708 }, { "epoch": 2.6534442724458205, "grad_norm": 0.04865363612771034, "learning_rate": 8.549169250858228e-05, "loss": 0.0072, "step": 13709 }, { "epoch": 2.653637770897833, "grad_norm": 0.04724913462996483, "learning_rate": 8.548967455832591e-05, "loss": 0.0079, "step": 13710 }, { "epoch": 2.6538312693498454, "grad_norm": 0.05749444290995598, "learning_rate": 8.548765649471649e-05, "loss": 0.0081, "step": 13711 }, { "epoch": 2.6540247678018574, "grad_norm": 0.06986658275127411, "learning_rate": 8.548563831776144e-05, "loss": 0.0073, "step": 13712 }, { "epoch": 2.65421826625387, "grad_norm": 0.06359332799911499, "learning_rate": 8.548362002746831e-05, "loss": 0.0071, "step": 13713 }, { "epoch": 2.6544117647058822, "grad_norm": 0.07545825093984604, "learning_rate": 8.54816016238446e-05, "loss": 0.0073, "step": 13714 }, { "epoch": 2.6546052631578947, "grad_norm": 0.03784972429275513, "learning_rate": 8.54795831068978e-05, "loss": 0.0056, "step": 13715 }, { "epoch": 2.654798761609907, "grad_norm": 0.05545874312520027, "learning_rate": 8.547756447663542e-05, "loss": 0.0084, "step": 13716 }, { "epoch": 2.6549922600619196, "grad_norm": 0.05696083977818489, "learning_rate": 8.547554573306496e-05, "loss": 0.006, "step": 13717 }, { "epoch": 2.655185758513932, "grad_norm": 0.047311894595623016, "learning_rate": 8.547352687619393e-05, "loss": 0.0067, "step": 13718 }, { "epoch": 2.6553792569659445, "grad_norm": 0.05923513323068619, "learning_rate": 8.547150790602984e-05, "loss": 0.007, "step": 13719 }, { "epoch": 2.655572755417957, "grad_norm": 0.044668737798929214, "learning_rate": 8.54694888225802e-05, "loss": 0.0057, "step": 13720 }, { "epoch": 2.655766253869969, "grad_norm": 0.04350388050079346, "learning_rate": 8.546746962585247e-05, "loss": 0.0066, "step": 13721 }, { "epoch": 2.6559597523219813, "grad_norm": 0.07571732997894287, "learning_rate": 8.546545031585426e-05, "loss": 0.0066, "step": 13722 }, { "epoch": 2.656153250773994, "grad_norm": 0.03391991928219795, "learning_rate": 8.546343089259296e-05, "loss": 0.0074, "step": 13723 }, { "epoch": 2.656346749226006, "grad_norm": 0.05544965714216232, "learning_rate": 8.546141135607614e-05, "loss": 0.0065, "step": 13724 }, { "epoch": 2.6565402476780187, "grad_norm": 0.03157976269721985, "learning_rate": 8.54593917063113e-05, "loss": 0.0062, "step": 13725 }, { "epoch": 2.656733746130031, "grad_norm": 0.05808211863040924, "learning_rate": 8.545737194330597e-05, "loss": 0.0073, "step": 13726 }, { "epoch": 2.656927244582043, "grad_norm": 0.03094145469367504, "learning_rate": 8.545535206706761e-05, "loss": 0.0067, "step": 13727 }, { "epoch": 2.6571207430340555, "grad_norm": 0.06575296074151993, "learning_rate": 8.545333207760379e-05, "loss": 0.0068, "step": 13728 }, { "epoch": 2.657314241486068, "grad_norm": 0.03196118399500847, "learning_rate": 8.545131197492195e-05, "loss": 0.0068, "step": 13729 }, { "epoch": 2.6575077399380804, "grad_norm": 0.05789756029844284, "learning_rate": 8.544929175902964e-05, "loss": 0.0085, "step": 13730 }, { "epoch": 2.657701238390093, "grad_norm": 0.060355156660079956, "learning_rate": 8.544727142993437e-05, "loss": 0.0064, "step": 13731 }, { "epoch": 2.6578947368421053, "grad_norm": 0.050681933760643005, "learning_rate": 8.544525098764365e-05, "loss": 0.0072, "step": 13732 }, { "epoch": 2.6580882352941178, "grad_norm": 0.08218426257371902, "learning_rate": 8.544323043216498e-05, "loss": 0.0052, "step": 13733 }, { "epoch": 2.65828173374613, "grad_norm": 0.03926457464694977, "learning_rate": 8.54412097635059e-05, "loss": 0.0076, "step": 13734 }, { "epoch": 2.6584752321981426, "grad_norm": 0.09388633817434311, "learning_rate": 8.543918898167389e-05, "loss": 0.0062, "step": 13735 }, { "epoch": 2.6586687306501546, "grad_norm": 0.05669906735420227, "learning_rate": 8.543716808667648e-05, "loss": 0.0084, "step": 13736 }, { "epoch": 2.658862229102167, "grad_norm": 0.05612995848059654, "learning_rate": 8.543514707852117e-05, "loss": 0.0073, "step": 13737 }, { "epoch": 2.6590557275541795, "grad_norm": 0.10974998772144318, "learning_rate": 8.54331259572155e-05, "loss": 0.0065, "step": 13738 }, { "epoch": 2.659249226006192, "grad_norm": 0.06224783509969711, "learning_rate": 8.543110472276695e-05, "loss": 0.0079, "step": 13739 }, { "epoch": 2.6594427244582044, "grad_norm": 0.16435596346855164, "learning_rate": 8.542908337518306e-05, "loss": 0.0079, "step": 13740 }, { "epoch": 2.659636222910217, "grad_norm": 0.059319689869880676, "learning_rate": 8.542706191447133e-05, "loss": 0.0077, "step": 13741 }, { "epoch": 2.659829721362229, "grad_norm": 0.12075798213481903, "learning_rate": 8.542504034063928e-05, "loss": 0.0063, "step": 13742 }, { "epoch": 2.6600232198142413, "grad_norm": 0.12734778225421906, "learning_rate": 8.542301865369445e-05, "loss": 0.0063, "step": 13743 }, { "epoch": 2.6602167182662537, "grad_norm": 0.09480758756399155, "learning_rate": 8.54209968536443e-05, "loss": 0.0065, "step": 13744 }, { "epoch": 2.660410216718266, "grad_norm": 0.1616157740354538, "learning_rate": 8.541897494049639e-05, "loss": 0.0076, "step": 13745 }, { "epoch": 2.6606037151702786, "grad_norm": 0.052753716707229614, "learning_rate": 8.541695291425823e-05, "loss": 0.0078, "step": 13746 }, { "epoch": 2.660797213622291, "grad_norm": 0.1394921839237213, "learning_rate": 8.541493077493734e-05, "loss": 0.0074, "step": 13747 }, { "epoch": 2.6609907120743035, "grad_norm": 0.09178869426250458, "learning_rate": 8.541290852254122e-05, "loss": 0.0071, "step": 13748 }, { "epoch": 2.661184210526316, "grad_norm": 0.10632945597171783, "learning_rate": 8.54108861570774e-05, "loss": 0.0067, "step": 13749 }, { "epoch": 2.6613777089783284, "grad_norm": 0.11769267171621323, "learning_rate": 8.54088636785534e-05, "loss": 0.0064, "step": 13750 }, { "epoch": 2.661571207430341, "grad_norm": 0.040275778621435165, "learning_rate": 8.540684108697675e-05, "loss": 0.0066, "step": 13751 }, { "epoch": 2.661764705882353, "grad_norm": 0.11952060461044312, "learning_rate": 8.540481838235495e-05, "loss": 0.0075, "step": 13752 }, { "epoch": 2.6619582043343653, "grad_norm": 0.03415064886212349, "learning_rate": 8.540279556469552e-05, "loss": 0.0097, "step": 13753 }, { "epoch": 2.6621517027863777, "grad_norm": 0.06538546085357666, "learning_rate": 8.5400772634006e-05, "loss": 0.008, "step": 13754 }, { "epoch": 2.66234520123839, "grad_norm": 0.029317429289221764, "learning_rate": 8.539874959029389e-05, "loss": 0.0066, "step": 13755 }, { "epoch": 2.6625386996904026, "grad_norm": 0.05165591463446617, "learning_rate": 8.539672643356672e-05, "loss": 0.006, "step": 13756 }, { "epoch": 2.662732198142415, "grad_norm": 0.022987276315689087, "learning_rate": 8.539470316383201e-05, "loss": 0.0068, "step": 13757 }, { "epoch": 2.662925696594427, "grad_norm": 0.02908845618367195, "learning_rate": 8.53926797810973e-05, "loss": 0.0069, "step": 13758 }, { "epoch": 2.6631191950464395, "grad_norm": 0.058079950511455536, "learning_rate": 8.539065628537009e-05, "loss": 0.0059, "step": 13759 }, { "epoch": 2.663312693498452, "grad_norm": 0.042514994740486145, "learning_rate": 8.538863267665789e-05, "loss": 0.0074, "step": 13760 }, { "epoch": 2.6635061919504643, "grad_norm": 0.04602735489606857, "learning_rate": 8.538660895496825e-05, "loss": 0.0089, "step": 13761 }, { "epoch": 2.663699690402477, "grad_norm": 0.05891193076968193, "learning_rate": 8.538458512030869e-05, "loss": 0.0072, "step": 13762 }, { "epoch": 2.6638931888544892, "grad_norm": 0.05282164365053177, "learning_rate": 8.538256117268672e-05, "loss": 0.009, "step": 13763 }, { "epoch": 2.6640866873065017, "grad_norm": 0.06732587516307831, "learning_rate": 8.538053711210989e-05, "loss": 0.0089, "step": 13764 }, { "epoch": 2.664280185758514, "grad_norm": 0.07367580384016037, "learning_rate": 8.537851293858568e-05, "loss": 0.0074, "step": 13765 }, { "epoch": 2.6644736842105265, "grad_norm": 0.07373161613941193, "learning_rate": 8.537648865212167e-05, "loss": 0.0055, "step": 13766 }, { "epoch": 2.6646671826625385, "grad_norm": 0.0632641538977623, "learning_rate": 8.537446425272535e-05, "loss": 0.0061, "step": 13767 }, { "epoch": 2.664860681114551, "grad_norm": 0.05254619941115379, "learning_rate": 8.537243974040427e-05, "loss": 0.0067, "step": 13768 }, { "epoch": 2.6650541795665634, "grad_norm": 0.07292228937149048, "learning_rate": 8.537041511516592e-05, "loss": 0.0091, "step": 13769 }, { "epoch": 2.665247678018576, "grad_norm": 0.048702239990234375, "learning_rate": 8.536839037701787e-05, "loss": 0.0081, "step": 13770 }, { "epoch": 2.6654411764705883, "grad_norm": 0.09083181619644165, "learning_rate": 8.536636552596762e-05, "loss": 0.0057, "step": 13771 }, { "epoch": 2.6656346749226008, "grad_norm": 0.04344138875603676, "learning_rate": 8.53643405620227e-05, "loss": 0.0065, "step": 13772 }, { "epoch": 2.6658281733746128, "grad_norm": 0.07915135473012924, "learning_rate": 8.536231548519065e-05, "loss": 0.0061, "step": 13773 }, { "epoch": 2.666021671826625, "grad_norm": 0.06682591140270233, "learning_rate": 8.536029029547899e-05, "loss": 0.0068, "step": 13774 }, { "epoch": 2.6662151702786376, "grad_norm": 0.09632283449172974, "learning_rate": 8.535826499289527e-05, "loss": 0.0069, "step": 13775 }, { "epoch": 2.66640866873065, "grad_norm": 0.08681412041187286, "learning_rate": 8.535623957744697e-05, "loss": 0.0074, "step": 13776 }, { "epoch": 2.6666021671826625, "grad_norm": 0.052352361381053925, "learning_rate": 8.535421404914166e-05, "loss": 0.0073, "step": 13777 }, { "epoch": 2.666795665634675, "grad_norm": 0.05577309429645538, "learning_rate": 8.535218840798688e-05, "loss": 0.0074, "step": 13778 }, { "epoch": 2.6669891640866874, "grad_norm": 0.0500044971704483, "learning_rate": 8.535016265399013e-05, "loss": 0.007, "step": 13779 }, { "epoch": 2.6671826625387, "grad_norm": 0.049031369388103485, "learning_rate": 8.534813678715896e-05, "loss": 0.0073, "step": 13780 }, { "epoch": 2.6673761609907123, "grad_norm": 0.04121354967355728, "learning_rate": 8.534611080750088e-05, "loss": 0.0074, "step": 13781 }, { "epoch": 2.6675696594427247, "grad_norm": 0.049892082810401917, "learning_rate": 8.534408471502345e-05, "loss": 0.0071, "step": 13782 }, { "epoch": 2.6677631578947367, "grad_norm": 0.029742548242211342, "learning_rate": 8.53420585097342e-05, "loss": 0.0083, "step": 13783 }, { "epoch": 2.667956656346749, "grad_norm": 0.044257987290620804, "learning_rate": 8.534003219164064e-05, "loss": 0.0075, "step": 13784 }, { "epoch": 2.6681501547987616, "grad_norm": 0.07128135114908218, "learning_rate": 8.53380057607503e-05, "loss": 0.0084, "step": 13785 }, { "epoch": 2.668343653250774, "grad_norm": 0.03696548193693161, "learning_rate": 8.533597921707076e-05, "loss": 0.0049, "step": 13786 }, { "epoch": 2.6685371517027865, "grad_norm": 0.06350899487733841, "learning_rate": 8.533395256060951e-05, "loss": 0.0065, "step": 13787 }, { "epoch": 2.6687306501547985, "grad_norm": 0.0659961998462677, "learning_rate": 8.533192579137409e-05, "loss": 0.0059, "step": 13788 }, { "epoch": 2.668924148606811, "grad_norm": 0.05562708154320717, "learning_rate": 8.532989890937205e-05, "loss": 0.0088, "step": 13789 }, { "epoch": 2.6691176470588234, "grad_norm": 0.07921630889177322, "learning_rate": 8.532787191461094e-05, "loss": 0.0062, "step": 13790 }, { "epoch": 2.669311145510836, "grad_norm": 0.08991437405347824, "learning_rate": 8.532584480709825e-05, "loss": 0.0065, "step": 13791 }, { "epoch": 2.6695046439628483, "grad_norm": 0.0433553084731102, "learning_rate": 8.532381758684155e-05, "loss": 0.0071, "step": 13792 }, { "epoch": 2.6696981424148607, "grad_norm": 0.11427738517522812, "learning_rate": 8.532179025384837e-05, "loss": 0.0061, "step": 13793 }, { "epoch": 2.669891640866873, "grad_norm": 0.05327591672539711, "learning_rate": 8.531976280812624e-05, "loss": 0.0081, "step": 13794 }, { "epoch": 2.6700851393188856, "grad_norm": 0.09646065533161163, "learning_rate": 8.531773524968271e-05, "loss": 0.0063, "step": 13795 }, { "epoch": 2.670278637770898, "grad_norm": 0.04793784022331238, "learning_rate": 8.531570757852529e-05, "loss": 0.0058, "step": 13796 }, { "epoch": 2.6704721362229105, "grad_norm": 0.08310665935277939, "learning_rate": 8.531367979466156e-05, "loss": 0.0075, "step": 13797 }, { "epoch": 2.6706656346749225, "grad_norm": 0.06385891139507294, "learning_rate": 8.531165189809904e-05, "loss": 0.0075, "step": 13798 }, { "epoch": 2.670859133126935, "grad_norm": 0.08333953469991684, "learning_rate": 8.530962388884526e-05, "loss": 0.0076, "step": 13799 }, { "epoch": 2.6710526315789473, "grad_norm": 0.06556177884340286, "learning_rate": 8.530759576690776e-05, "loss": 0.0067, "step": 13800 }, { "epoch": 2.67124613003096, "grad_norm": 0.09936144948005676, "learning_rate": 8.530556753229409e-05, "loss": 0.0062, "step": 13801 }, { "epoch": 2.6714396284829722, "grad_norm": 0.06377901136875153, "learning_rate": 8.53035391850118e-05, "loss": 0.0072, "step": 13802 }, { "epoch": 2.6716331269349847, "grad_norm": 0.08287740498781204, "learning_rate": 8.530151072506841e-05, "loss": 0.0054, "step": 13803 }, { "epoch": 2.6718266253869967, "grad_norm": 0.079451784491539, "learning_rate": 8.529948215247147e-05, "loss": 0.0065, "step": 13804 }, { "epoch": 2.672020123839009, "grad_norm": 0.07189676910638809, "learning_rate": 8.529745346722852e-05, "loss": 0.0064, "step": 13805 }, { "epoch": 2.6722136222910216, "grad_norm": 0.06700845062732697, "learning_rate": 8.529542466934711e-05, "loss": 0.0064, "step": 13806 }, { "epoch": 2.672407120743034, "grad_norm": 0.07622106373310089, "learning_rate": 8.529339575883477e-05, "loss": 0.0073, "step": 13807 }, { "epoch": 2.6726006191950464, "grad_norm": 0.09310256689786911, "learning_rate": 8.529136673569905e-05, "loss": 0.0054, "step": 13808 }, { "epoch": 2.672794117647059, "grad_norm": 0.06713953614234924, "learning_rate": 8.52893375999475e-05, "loss": 0.0084, "step": 13809 }, { "epoch": 2.6729876160990713, "grad_norm": 0.08942972868680954, "learning_rate": 8.528730835158764e-05, "loss": 0.0064, "step": 13810 }, { "epoch": 2.6731811145510838, "grad_norm": 0.07707984745502472, "learning_rate": 8.528527899062705e-05, "loss": 0.0068, "step": 13811 }, { "epoch": 2.673374613003096, "grad_norm": 0.10397662967443466, "learning_rate": 8.528324951707324e-05, "loss": 0.0075, "step": 13812 }, { "epoch": 2.673568111455108, "grad_norm": 0.0785878449678421, "learning_rate": 8.528121993093376e-05, "loss": 0.0071, "step": 13813 }, { "epoch": 2.6737616099071206, "grad_norm": 0.09013296663761139, "learning_rate": 8.527919023221619e-05, "loss": 0.0059, "step": 13814 }, { "epoch": 2.673955108359133, "grad_norm": 0.0920441746711731, "learning_rate": 8.527716042092804e-05, "loss": 0.0062, "step": 13815 }, { "epoch": 2.6741486068111455, "grad_norm": 0.08871647715568542, "learning_rate": 8.527513049707686e-05, "loss": 0.0082, "step": 13816 }, { "epoch": 2.674342105263158, "grad_norm": 0.10049314796924591, "learning_rate": 8.527310046067021e-05, "loss": 0.0056, "step": 13817 }, { "epoch": 2.6745356037151704, "grad_norm": 0.04879026487469673, "learning_rate": 8.527107031171564e-05, "loss": 0.0064, "step": 13818 }, { "epoch": 2.6747291021671824, "grad_norm": 0.11998790502548218, "learning_rate": 8.526904005022071e-05, "loss": 0.0066, "step": 13819 }, { "epoch": 2.674922600619195, "grad_norm": 0.0516805425286293, "learning_rate": 8.526700967619291e-05, "loss": 0.0066, "step": 13820 }, { "epoch": 2.6751160990712073, "grad_norm": 0.09406540542840958, "learning_rate": 8.526497918963984e-05, "loss": 0.0065, "step": 13821 }, { "epoch": 2.6753095975232197, "grad_norm": 0.09944197535514832, "learning_rate": 8.526294859056902e-05, "loss": 0.008, "step": 13822 }, { "epoch": 2.675503095975232, "grad_norm": 0.0882016271352768, "learning_rate": 8.526091787898803e-05, "loss": 0.0064, "step": 13823 }, { "epoch": 2.6756965944272446, "grad_norm": 0.08079846948385239, "learning_rate": 8.525888705490443e-05, "loss": 0.0088, "step": 13824 }, { "epoch": 2.675890092879257, "grad_norm": 0.11167806386947632, "learning_rate": 8.525685611832571e-05, "loss": 0.0073, "step": 13825 }, { "epoch": 2.6760835913312695, "grad_norm": 0.09908328950405121, "learning_rate": 8.525482506925945e-05, "loss": 0.007, "step": 13826 }, { "epoch": 2.676277089783282, "grad_norm": 0.06720536202192307, "learning_rate": 8.525279390771323e-05, "loss": 0.0063, "step": 13827 }, { "epoch": 2.6764705882352944, "grad_norm": 0.11127540469169617, "learning_rate": 8.525076263369454e-05, "loss": 0.0081, "step": 13828 }, { "epoch": 2.6766640866873064, "grad_norm": 0.0812499076128006, "learning_rate": 8.524873124721101e-05, "loss": 0.0067, "step": 13829 }, { "epoch": 2.676857585139319, "grad_norm": 0.09227660298347473, "learning_rate": 8.524669974827013e-05, "loss": 0.0068, "step": 13830 }, { "epoch": 2.6770510835913313, "grad_norm": 0.08317521214485168, "learning_rate": 8.524466813687948e-05, "loss": 0.0095, "step": 13831 }, { "epoch": 2.6772445820433437, "grad_norm": 0.061198215931653976, "learning_rate": 8.524263641304658e-05, "loss": 0.0061, "step": 13832 }, { "epoch": 2.677438080495356, "grad_norm": 0.08675147593021393, "learning_rate": 8.524060457677905e-05, "loss": 0.0083, "step": 13833 }, { "epoch": 2.6776315789473686, "grad_norm": 0.0780632346868515, "learning_rate": 8.52385726280844e-05, "loss": 0.0063, "step": 13834 }, { "epoch": 2.6778250773993806, "grad_norm": 0.06966406106948853, "learning_rate": 8.523654056697014e-05, "loss": 0.0066, "step": 13835 }, { "epoch": 2.678018575851393, "grad_norm": 0.08309343457221985, "learning_rate": 8.523450839344391e-05, "loss": 0.0068, "step": 13836 }, { "epoch": 2.6782120743034055, "grad_norm": 0.040070533752441406, "learning_rate": 8.523247610751324e-05, "loss": 0.0061, "step": 13837 }, { "epoch": 2.678405572755418, "grad_norm": 0.06668365746736526, "learning_rate": 8.523044370918565e-05, "loss": 0.0052, "step": 13838 }, { "epoch": 2.6785990712074303, "grad_norm": 0.04170214384794235, "learning_rate": 8.522841119846873e-05, "loss": 0.0071, "step": 13839 }, { "epoch": 2.678792569659443, "grad_norm": 0.11539134383201599, "learning_rate": 8.522637857537001e-05, "loss": 0.0069, "step": 13840 }, { "epoch": 2.6789860681114552, "grad_norm": 0.0643831118941307, "learning_rate": 8.52243458398971e-05, "loss": 0.0052, "step": 13841 }, { "epoch": 2.6791795665634677, "grad_norm": 0.06067505478858948, "learning_rate": 8.522231299205746e-05, "loss": 0.0059, "step": 13842 }, { "epoch": 2.67937306501548, "grad_norm": 0.052862029522657394, "learning_rate": 8.522028003185875e-05, "loss": 0.0069, "step": 13843 }, { "epoch": 2.679566563467492, "grad_norm": 0.07073961198329926, "learning_rate": 8.521824695930848e-05, "loss": 0.0056, "step": 13844 }, { "epoch": 2.6797600619195046, "grad_norm": 0.10477473586797714, "learning_rate": 8.52162137744142e-05, "loss": 0.0073, "step": 13845 }, { "epoch": 2.679953560371517, "grad_norm": 0.051575593650341034, "learning_rate": 8.52141804771835e-05, "loss": 0.0071, "step": 13846 }, { "epoch": 2.6801470588235294, "grad_norm": 0.08601722121238708, "learning_rate": 8.521214706762392e-05, "loss": 0.0073, "step": 13847 }, { "epoch": 2.680340557275542, "grad_norm": 0.04948641359806061, "learning_rate": 8.5210113545743e-05, "loss": 0.0067, "step": 13848 }, { "epoch": 2.6805340557275543, "grad_norm": 0.07880083471536636, "learning_rate": 8.520807991154834e-05, "loss": 0.0062, "step": 13849 }, { "epoch": 2.6807275541795663, "grad_norm": 0.059577178210020065, "learning_rate": 8.520604616504749e-05, "loss": 0.0047, "step": 13850 }, { "epoch": 2.6809210526315788, "grad_norm": 0.07010889053344727, "learning_rate": 8.5204012306248e-05, "loss": 0.0055, "step": 13851 }, { "epoch": 2.681114551083591, "grad_norm": 0.05369698256254196, "learning_rate": 8.520197833515742e-05, "loss": 0.0067, "step": 13852 }, { "epoch": 2.6813080495356036, "grad_norm": 0.06620270013809204, "learning_rate": 8.519994425178335e-05, "loss": 0.0074, "step": 13853 }, { "epoch": 2.681501547987616, "grad_norm": 0.04674416035413742, "learning_rate": 8.519791005613331e-05, "loss": 0.0068, "step": 13854 }, { "epoch": 2.6816950464396285, "grad_norm": 0.07546578347682953, "learning_rate": 8.519587574821489e-05, "loss": 0.0069, "step": 13855 }, { "epoch": 2.681888544891641, "grad_norm": 0.028702404350042343, "learning_rate": 8.519384132803562e-05, "loss": 0.0055, "step": 13856 }, { "epoch": 2.6820820433436534, "grad_norm": 0.06489798426628113, "learning_rate": 8.519180679560311e-05, "loss": 0.0072, "step": 13857 }, { "epoch": 2.682275541795666, "grad_norm": 0.03756996989250183, "learning_rate": 8.51897721509249e-05, "loss": 0.0061, "step": 13858 }, { "epoch": 2.6824690402476783, "grad_norm": 0.043950680643320084, "learning_rate": 8.518773739400856e-05, "loss": 0.0084, "step": 13859 }, { "epoch": 2.6826625386996903, "grad_norm": 0.043231405317783356, "learning_rate": 8.518570252486164e-05, "loss": 0.0077, "step": 13860 }, { "epoch": 2.6828560371517027, "grad_norm": 0.06892223656177521, "learning_rate": 8.518366754349172e-05, "loss": 0.0069, "step": 13861 }, { "epoch": 2.683049535603715, "grad_norm": 0.04849860444664955, "learning_rate": 8.518163244990637e-05, "loss": 0.0062, "step": 13862 }, { "epoch": 2.6832430340557276, "grad_norm": 0.07175469398498535, "learning_rate": 8.517959724411314e-05, "loss": 0.0072, "step": 13863 }, { "epoch": 2.68343653250774, "grad_norm": 0.06459245085716248, "learning_rate": 8.51775619261196e-05, "loss": 0.0083, "step": 13864 }, { "epoch": 2.683630030959752, "grad_norm": 0.07952985912561417, "learning_rate": 8.517552649593333e-05, "loss": 0.0051, "step": 13865 }, { "epoch": 2.6838235294117645, "grad_norm": 0.0460411012172699, "learning_rate": 8.517349095356188e-05, "loss": 0.006, "step": 13866 }, { "epoch": 2.684017027863777, "grad_norm": 0.041183412075042725, "learning_rate": 8.517145529901282e-05, "loss": 0.0065, "step": 13867 }, { "epoch": 2.6842105263157894, "grad_norm": 0.051501594483852386, "learning_rate": 8.516941953229375e-05, "loss": 0.0058, "step": 13868 }, { "epoch": 2.684404024767802, "grad_norm": 0.027832817286252975, "learning_rate": 8.516738365341219e-05, "loss": 0.0049, "step": 13869 }, { "epoch": 2.6845975232198143, "grad_norm": 0.05132628232240677, "learning_rate": 8.516534766237572e-05, "loss": 0.007, "step": 13870 }, { "epoch": 2.6847910216718267, "grad_norm": 0.04761204496026039, "learning_rate": 8.516331155919195e-05, "loss": 0.0063, "step": 13871 }, { "epoch": 2.684984520123839, "grad_norm": 0.04838559031486511, "learning_rate": 8.516127534386841e-05, "loss": 0.0065, "step": 13872 }, { "epoch": 2.6851780185758516, "grad_norm": 0.04720572009682655, "learning_rate": 8.515923901641268e-05, "loss": 0.0065, "step": 13873 }, { "epoch": 2.685371517027864, "grad_norm": 0.05513344705104828, "learning_rate": 8.515720257683231e-05, "loss": 0.0067, "step": 13874 }, { "epoch": 2.685565015479876, "grad_norm": 0.04300147294998169, "learning_rate": 8.515516602513492e-05, "loss": 0.0077, "step": 13875 }, { "epoch": 2.6857585139318885, "grad_norm": 0.06161396950483322, "learning_rate": 8.515312936132805e-05, "loss": 0.0065, "step": 13876 }, { "epoch": 2.685952012383901, "grad_norm": 0.04036793112754822, "learning_rate": 8.515109258541926e-05, "loss": 0.0052, "step": 13877 }, { "epoch": 2.6861455108359134, "grad_norm": 0.0572669692337513, "learning_rate": 8.514905569741616e-05, "loss": 0.0081, "step": 13878 }, { "epoch": 2.686339009287926, "grad_norm": 0.05916636809706688, "learning_rate": 8.514701869732627e-05, "loss": 0.0077, "step": 13879 }, { "epoch": 2.6865325077399382, "grad_norm": 0.04867562651634216, "learning_rate": 8.51449815851572e-05, "loss": 0.0072, "step": 13880 }, { "epoch": 2.6867260061919502, "grad_norm": 0.06136693060398102, "learning_rate": 8.514294436091654e-05, "loss": 0.0068, "step": 13881 }, { "epoch": 2.6869195046439627, "grad_norm": 0.0300407987087965, "learning_rate": 8.514090702461182e-05, "loss": 0.0075, "step": 13882 }, { "epoch": 2.687113003095975, "grad_norm": 0.05708925426006317, "learning_rate": 8.513886957625063e-05, "loss": 0.0068, "step": 13883 }, { "epoch": 2.6873065015479876, "grad_norm": 0.047972001135349274, "learning_rate": 8.513683201584055e-05, "loss": 0.007, "step": 13884 }, { "epoch": 2.6875, "grad_norm": 0.05902192369103432, "learning_rate": 8.513479434338916e-05, "loss": 0.0071, "step": 13885 }, { "epoch": 2.6876934984520124, "grad_norm": 0.07007832825183868, "learning_rate": 8.513275655890404e-05, "loss": 0.0073, "step": 13886 }, { "epoch": 2.687886996904025, "grad_norm": 0.051509790122509, "learning_rate": 8.513071866239275e-05, "loss": 0.0068, "step": 13887 }, { "epoch": 2.6880804953560373, "grad_norm": 0.0942201316356659, "learning_rate": 8.512868065386286e-05, "loss": 0.0069, "step": 13888 }, { "epoch": 2.6882739938080498, "grad_norm": 0.04651828855276108, "learning_rate": 8.512664253332197e-05, "loss": 0.007, "step": 13889 }, { "epoch": 2.6884674922600618, "grad_norm": 0.11701526492834091, "learning_rate": 8.512460430077764e-05, "loss": 0.0063, "step": 13890 }, { "epoch": 2.688660990712074, "grad_norm": 0.07805899530649185, "learning_rate": 8.512256595623747e-05, "loss": 0.006, "step": 13891 }, { "epoch": 2.6888544891640866, "grad_norm": 0.08966129273176193, "learning_rate": 8.512052749970902e-05, "loss": 0.0059, "step": 13892 }, { "epoch": 2.689047987616099, "grad_norm": 0.11245819181203842, "learning_rate": 8.511848893119986e-05, "loss": 0.0066, "step": 13893 }, { "epoch": 2.6892414860681115, "grad_norm": 0.04073505476117134, "learning_rate": 8.511645025071757e-05, "loss": 0.0062, "step": 13894 }, { "epoch": 2.689434984520124, "grad_norm": 0.08025291562080383, "learning_rate": 8.511441145826976e-05, "loss": 0.0072, "step": 13895 }, { "epoch": 2.689628482972136, "grad_norm": 0.11286848783493042, "learning_rate": 8.511237255386397e-05, "loss": 0.0076, "step": 13896 }, { "epoch": 2.6898219814241484, "grad_norm": 0.08299148082733154, "learning_rate": 8.511033353750781e-05, "loss": 0.0063, "step": 13897 }, { "epoch": 2.690015479876161, "grad_norm": 0.13051031529903412, "learning_rate": 8.510829440920886e-05, "loss": 0.0081, "step": 13898 }, { "epoch": 2.6902089783281733, "grad_norm": 0.05561860278248787, "learning_rate": 8.510625516897466e-05, "loss": 0.0064, "step": 13899 }, { "epoch": 2.6904024767801857, "grad_norm": 0.14194990694522858, "learning_rate": 8.510421581681286e-05, "loss": 0.0059, "step": 13900 }, { "epoch": 2.690595975232198, "grad_norm": 0.03453981876373291, "learning_rate": 8.510217635273097e-05, "loss": 0.0073, "step": 13901 }, { "epoch": 2.6907894736842106, "grad_norm": 0.12744732201099396, "learning_rate": 8.510013677673663e-05, "loss": 0.0066, "step": 13902 }, { "epoch": 2.690982972136223, "grad_norm": 0.08328738063573837, "learning_rate": 8.50980970888374e-05, "loss": 0.0081, "step": 13903 }, { "epoch": 2.6911764705882355, "grad_norm": 0.11997564882040024, "learning_rate": 8.509605728904085e-05, "loss": 0.0059, "step": 13904 }, { "epoch": 2.691369969040248, "grad_norm": 0.10061929374933243, "learning_rate": 8.509401737735456e-05, "loss": 0.0066, "step": 13905 }, { "epoch": 2.69156346749226, "grad_norm": 0.10943589359521866, "learning_rate": 8.509197735378613e-05, "loss": 0.0063, "step": 13906 }, { "epoch": 2.6917569659442724, "grad_norm": 0.10631822049617767, "learning_rate": 8.508993721834315e-05, "loss": 0.0074, "step": 13907 }, { "epoch": 2.691950464396285, "grad_norm": 0.06407774984836578, "learning_rate": 8.508789697103322e-05, "loss": 0.0065, "step": 13908 }, { "epoch": 2.6921439628482973, "grad_norm": 0.09427382051944733, "learning_rate": 8.508585661186388e-05, "loss": 0.0063, "step": 13909 }, { "epoch": 2.6923374613003097, "grad_norm": 0.06063511222600937, "learning_rate": 8.508381614084276e-05, "loss": 0.0058, "step": 13910 }, { "epoch": 2.6925309597523217, "grad_norm": 0.07839152961969376, "learning_rate": 8.508177555797738e-05, "loss": 0.006, "step": 13911 }, { "epoch": 2.692724458204334, "grad_norm": 0.07270021736621857, "learning_rate": 8.50797348632754e-05, "loss": 0.0069, "step": 13912 }, { "epoch": 2.6929179566563466, "grad_norm": 0.06884754449129105, "learning_rate": 8.507769405674438e-05, "loss": 0.0088, "step": 13913 }, { "epoch": 2.693111455108359, "grad_norm": 0.08260902017354965, "learning_rate": 8.50756531383919e-05, "loss": 0.0081, "step": 13914 }, { "epoch": 2.6933049535603715, "grad_norm": 0.05815067142248154, "learning_rate": 8.507361210822556e-05, "loss": 0.0076, "step": 13915 }, { "epoch": 2.693498452012384, "grad_norm": 0.07151849567890167, "learning_rate": 8.507157096625294e-05, "loss": 0.0068, "step": 13916 }, { "epoch": 2.6936919504643964, "grad_norm": 0.05493999645113945, "learning_rate": 8.506952971248162e-05, "loss": 0.0061, "step": 13917 }, { "epoch": 2.693885448916409, "grad_norm": 0.056334931403398514, "learning_rate": 8.50674883469192e-05, "loss": 0.0059, "step": 13918 }, { "epoch": 2.6940789473684212, "grad_norm": 0.048716966062784195, "learning_rate": 8.506544686957327e-05, "loss": 0.0076, "step": 13919 }, { "epoch": 2.6942724458204337, "grad_norm": 0.05556677654385567, "learning_rate": 8.506340528045142e-05, "loss": 0.008, "step": 13920 }, { "epoch": 2.6944659442724457, "grad_norm": 0.060255806893110275, "learning_rate": 8.506136357956122e-05, "loss": 0.0061, "step": 13921 }, { "epoch": 2.694659442724458, "grad_norm": 0.06516838073730469, "learning_rate": 8.50593217669103e-05, "loss": 0.0063, "step": 13922 }, { "epoch": 2.6948529411764706, "grad_norm": 0.06568261235952377, "learning_rate": 8.505727984250622e-05, "loss": 0.0094, "step": 13923 }, { "epoch": 2.695046439628483, "grad_norm": 0.031742729246616364, "learning_rate": 8.505523780635657e-05, "loss": 0.0073, "step": 13924 }, { "epoch": 2.6952399380804954, "grad_norm": 0.056693948805332184, "learning_rate": 8.505319565846897e-05, "loss": 0.0063, "step": 13925 }, { "epoch": 2.695433436532508, "grad_norm": 0.03107236512005329, "learning_rate": 8.505115339885098e-05, "loss": 0.007, "step": 13926 }, { "epoch": 2.69562693498452, "grad_norm": 0.0546792596578598, "learning_rate": 8.50491110275102e-05, "loss": 0.0058, "step": 13927 }, { "epoch": 2.6958204334365323, "grad_norm": 0.049580421298742294, "learning_rate": 8.504706854445424e-05, "loss": 0.0082, "step": 13928 }, { "epoch": 2.6960139318885448, "grad_norm": 0.08035283535718918, "learning_rate": 8.504502594969069e-05, "loss": 0.007, "step": 13929 }, { "epoch": 2.696207430340557, "grad_norm": 0.055480699986219406, "learning_rate": 8.504298324322713e-05, "loss": 0.0063, "step": 13930 }, { "epoch": 2.6964009287925697, "grad_norm": 0.08594276756048203, "learning_rate": 8.504094042507116e-05, "loss": 0.0066, "step": 13931 }, { "epoch": 2.696594427244582, "grad_norm": 0.07132802903652191, "learning_rate": 8.503889749523039e-05, "loss": 0.0074, "step": 13932 }, { "epoch": 2.6967879256965945, "grad_norm": 0.11489267647266388, "learning_rate": 8.503685445371238e-05, "loss": 0.0072, "step": 13933 }, { "epoch": 2.696981424148607, "grad_norm": 0.04935932904481888, "learning_rate": 8.503481130052474e-05, "loss": 0.0066, "step": 13934 }, { "epoch": 2.6971749226006194, "grad_norm": 0.10290385037660599, "learning_rate": 8.503276803567508e-05, "loss": 0.0063, "step": 13935 }, { "epoch": 2.6973684210526314, "grad_norm": 0.0486658439040184, "learning_rate": 8.5030724659171e-05, "loss": 0.0086, "step": 13936 }, { "epoch": 2.697561919504644, "grad_norm": 0.07419761270284653, "learning_rate": 8.502868117102009e-05, "loss": 0.0083, "step": 13937 }, { "epoch": 2.6977554179566563, "grad_norm": 0.05019855126738548, "learning_rate": 8.502663757122992e-05, "loss": 0.0056, "step": 13938 }, { "epoch": 2.6979489164086687, "grad_norm": 0.077766053378582, "learning_rate": 8.502459385980814e-05, "loss": 0.008, "step": 13939 }, { "epoch": 2.698142414860681, "grad_norm": 0.045414336025714874, "learning_rate": 8.502255003676228e-05, "loss": 0.0089, "step": 13940 }, { "epoch": 2.6983359133126936, "grad_norm": 0.07929668575525284, "learning_rate": 8.50205061021e-05, "loss": 0.0068, "step": 13941 }, { "epoch": 2.6985294117647056, "grad_norm": 0.038454242050647736, "learning_rate": 8.501846205582888e-05, "loss": 0.0078, "step": 13942 }, { "epoch": 2.698722910216718, "grad_norm": 0.11022354662418365, "learning_rate": 8.50164178979565e-05, "loss": 0.0084, "step": 13943 }, { "epoch": 2.6989164086687305, "grad_norm": 0.054670363664627075, "learning_rate": 8.50143736284905e-05, "loss": 0.0056, "step": 13944 }, { "epoch": 2.699109907120743, "grad_norm": 0.10165925323963165, "learning_rate": 8.501232924743843e-05, "loss": 0.0084, "step": 13945 }, { "epoch": 2.6993034055727554, "grad_norm": 0.07752387225627899, "learning_rate": 8.501028475480791e-05, "loss": 0.0067, "step": 13946 }, { "epoch": 2.699496904024768, "grad_norm": 0.06384260207414627, "learning_rate": 8.500824015060656e-05, "loss": 0.0066, "step": 13947 }, { "epoch": 2.6996904024767803, "grad_norm": 0.08541060984134674, "learning_rate": 8.500619543484198e-05, "loss": 0.0067, "step": 13948 }, { "epoch": 2.6998839009287927, "grad_norm": 0.03259382024407387, "learning_rate": 8.500415060752174e-05, "loss": 0.0081, "step": 13949 }, { "epoch": 2.700077399380805, "grad_norm": 0.060848306864500046, "learning_rate": 8.500210566865347e-05, "loss": 0.0076, "step": 13950 }, { "epoch": 2.7002708978328176, "grad_norm": 0.043395448476076126, "learning_rate": 8.500006061824476e-05, "loss": 0.0056, "step": 13951 }, { "epoch": 2.7004643962848296, "grad_norm": 0.05467221140861511, "learning_rate": 8.499801545630323e-05, "loss": 0.0078, "step": 13952 }, { "epoch": 2.700657894736842, "grad_norm": 0.03934159129858017, "learning_rate": 8.499597018283643e-05, "loss": 0.0077, "step": 13953 }, { "epoch": 2.7008513931888545, "grad_norm": 0.03190183266997337, "learning_rate": 8.499392479785206e-05, "loss": 0.0062, "step": 13954 }, { "epoch": 2.701044891640867, "grad_norm": 0.031388700008392334, "learning_rate": 8.499187930135762e-05, "loss": 0.0071, "step": 13955 }, { "epoch": 2.7012383900928794, "grad_norm": 0.07810481637716293, "learning_rate": 8.498983369336078e-05, "loss": 0.0069, "step": 13956 }, { "epoch": 2.701431888544892, "grad_norm": 0.032927997410297394, "learning_rate": 8.498778797386914e-05, "loss": 0.0073, "step": 13957 }, { "epoch": 2.701625386996904, "grad_norm": 0.0718827173113823, "learning_rate": 8.498574214289027e-05, "loss": 0.0064, "step": 13958 }, { "epoch": 2.7018188854489162, "grad_norm": 0.029048895463347435, "learning_rate": 8.498369620043181e-05, "loss": 0.0064, "step": 13959 }, { "epoch": 2.7020123839009287, "grad_norm": 0.061817873269319534, "learning_rate": 8.498165014650136e-05, "loss": 0.0064, "step": 13960 }, { "epoch": 2.702205882352941, "grad_norm": 0.0515320785343647, "learning_rate": 8.497960398110653e-05, "loss": 0.0071, "step": 13961 }, { "epoch": 2.7023993808049536, "grad_norm": 0.06758685410022736, "learning_rate": 8.497755770425491e-05, "loss": 0.0073, "step": 13962 }, { "epoch": 2.702592879256966, "grad_norm": 0.07218466699123383, "learning_rate": 8.497551131595411e-05, "loss": 0.0068, "step": 13963 }, { "epoch": 2.7027863777089784, "grad_norm": 0.04516717419028282, "learning_rate": 8.497346481621175e-05, "loss": 0.007, "step": 13964 }, { "epoch": 2.702979876160991, "grad_norm": 0.08070903271436691, "learning_rate": 8.497141820503544e-05, "loss": 0.006, "step": 13965 }, { "epoch": 2.7031733746130033, "grad_norm": 0.05774427950382233, "learning_rate": 8.496937148243279e-05, "loss": 0.0065, "step": 13966 }, { "epoch": 2.7033668730650153, "grad_norm": 0.0734565332531929, "learning_rate": 8.496732464841139e-05, "loss": 0.0061, "step": 13967 }, { "epoch": 2.7035603715170278, "grad_norm": 0.07281321287155151, "learning_rate": 8.496527770297887e-05, "loss": 0.0047, "step": 13968 }, { "epoch": 2.70375386996904, "grad_norm": 0.04352392628788948, "learning_rate": 8.49632306461428e-05, "loss": 0.0076, "step": 13969 }, { "epoch": 2.7039473684210527, "grad_norm": 0.08218887448310852, "learning_rate": 8.496118347791086e-05, "loss": 0.0059, "step": 13970 }, { "epoch": 2.704140866873065, "grad_norm": 0.03790518641471863, "learning_rate": 8.495913619829059e-05, "loss": 0.0065, "step": 13971 }, { "epoch": 2.7043343653250775, "grad_norm": 0.07575509697198868, "learning_rate": 8.495708880728965e-05, "loss": 0.007, "step": 13972 }, { "epoch": 2.7045278637770895, "grad_norm": 0.055578265339136124, "learning_rate": 8.495504130491565e-05, "loss": 0.0071, "step": 13973 }, { "epoch": 2.704721362229102, "grad_norm": 0.05557388812303543, "learning_rate": 8.495299369117616e-05, "loss": 0.007, "step": 13974 }, { "epoch": 2.7049148606811144, "grad_norm": 0.05375927686691284, "learning_rate": 8.495094596607884e-05, "loss": 0.0066, "step": 13975 }, { "epoch": 2.705108359133127, "grad_norm": 0.05437483638525009, "learning_rate": 8.494889812963128e-05, "loss": 0.0064, "step": 13976 }, { "epoch": 2.7053018575851393, "grad_norm": 0.05100049450993538, "learning_rate": 8.494685018184109e-05, "loss": 0.006, "step": 13977 }, { "epoch": 2.7054953560371517, "grad_norm": 0.07536368072032928, "learning_rate": 8.494480212271588e-05, "loss": 0.0064, "step": 13978 }, { "epoch": 2.705688854489164, "grad_norm": 0.056919194757938385, "learning_rate": 8.49427539522633e-05, "loss": 0.0074, "step": 13979 }, { "epoch": 2.7058823529411766, "grad_norm": 0.04860398918390274, "learning_rate": 8.494070567049092e-05, "loss": 0.0076, "step": 13980 }, { "epoch": 2.706075851393189, "grad_norm": 0.0490117073059082, "learning_rate": 8.493865727740639e-05, "loss": 0.0069, "step": 13981 }, { "epoch": 2.7062693498452015, "grad_norm": 0.025293176993727684, "learning_rate": 8.49366087730173e-05, "loss": 0.0064, "step": 13982 }, { "epoch": 2.7064628482972135, "grad_norm": 0.05211208388209343, "learning_rate": 8.493456015733127e-05, "loss": 0.0061, "step": 13983 }, { "epoch": 2.706656346749226, "grad_norm": 0.03936251625418663, "learning_rate": 8.493251143035592e-05, "loss": 0.0064, "step": 13984 }, { "epoch": 2.7068498452012384, "grad_norm": 0.06863048672676086, "learning_rate": 8.493046259209886e-05, "loss": 0.0063, "step": 13985 }, { "epoch": 2.707043343653251, "grad_norm": 0.026707788929343224, "learning_rate": 8.492841364256773e-05, "loss": 0.0055, "step": 13986 }, { "epoch": 2.7072368421052633, "grad_norm": 0.04541477560997009, "learning_rate": 8.492636458177015e-05, "loss": 0.0057, "step": 13987 }, { "epoch": 2.7074303405572753, "grad_norm": 0.03529303893446922, "learning_rate": 8.492431540971369e-05, "loss": 0.0065, "step": 13988 }, { "epoch": 2.7076238390092877, "grad_norm": 0.04531726613640785, "learning_rate": 8.492226612640601e-05, "loss": 0.0058, "step": 13989 }, { "epoch": 2.7078173374613, "grad_norm": 0.0506577230989933, "learning_rate": 8.492021673185472e-05, "loss": 0.0062, "step": 13990 }, { "epoch": 2.7080108359133126, "grad_norm": 0.04609766975045204, "learning_rate": 8.491816722606744e-05, "loss": 0.0074, "step": 13991 }, { "epoch": 2.708204334365325, "grad_norm": 0.0677114948630333, "learning_rate": 8.491611760905177e-05, "loss": 0.0053, "step": 13992 }, { "epoch": 2.7083978328173375, "grad_norm": 0.03386596590280533, "learning_rate": 8.491406788081536e-05, "loss": 0.0076, "step": 13993 }, { "epoch": 2.70859133126935, "grad_norm": 0.07884036004543304, "learning_rate": 8.491201804136582e-05, "loss": 0.0082, "step": 13994 }, { "epoch": 2.7087848297213624, "grad_norm": 0.04001868888735771, "learning_rate": 8.490996809071077e-05, "loss": 0.0049, "step": 13995 }, { "epoch": 2.708978328173375, "grad_norm": 0.07253225892782211, "learning_rate": 8.490791802885781e-05, "loss": 0.0066, "step": 13996 }, { "epoch": 2.7091718266253872, "grad_norm": 0.03381142392754555, "learning_rate": 8.49058678558146e-05, "loss": 0.0086, "step": 13997 }, { "epoch": 2.7093653250773992, "grad_norm": 0.1141267642378807, "learning_rate": 8.490381757158873e-05, "loss": 0.0067, "step": 13998 }, { "epoch": 2.7095588235294117, "grad_norm": 0.042967647314071655, "learning_rate": 8.490176717618783e-05, "loss": 0.0063, "step": 13999 }, { "epoch": 2.709752321981424, "grad_norm": 0.11783541738986969, "learning_rate": 8.489971666961954e-05, "loss": 0.0076, "step": 14000 }, { "epoch": 2.7099458204334366, "grad_norm": 0.06098232790827751, "learning_rate": 8.489766605189147e-05, "loss": 0.0057, "step": 14001 }, { "epoch": 2.710139318885449, "grad_norm": 0.09891101717948914, "learning_rate": 8.489561532301123e-05, "loss": 0.0071, "step": 14002 }, { "epoch": 2.7103328173374615, "grad_norm": 0.05214949697256088, "learning_rate": 8.489356448298647e-05, "loss": 0.0074, "step": 14003 }, { "epoch": 2.7105263157894735, "grad_norm": 0.11366834491491318, "learning_rate": 8.489151353182481e-05, "loss": 0.0071, "step": 14004 }, { "epoch": 2.710719814241486, "grad_norm": 0.04493863880634308, "learning_rate": 8.488946246953385e-05, "loss": 0.0071, "step": 14005 }, { "epoch": 2.7109133126934983, "grad_norm": 0.09005748480558395, "learning_rate": 8.488741129612123e-05, "loss": 0.0063, "step": 14006 }, { "epoch": 2.7111068111455108, "grad_norm": 0.043424349278211594, "learning_rate": 8.48853600115946e-05, "loss": 0.0062, "step": 14007 }, { "epoch": 2.711300309597523, "grad_norm": 0.07915118336677551, "learning_rate": 8.488330861596157e-05, "loss": 0.0066, "step": 14008 }, { "epoch": 2.7114938080495357, "grad_norm": 0.05883041024208069, "learning_rate": 8.488125710922974e-05, "loss": 0.0083, "step": 14009 }, { "epoch": 2.711687306501548, "grad_norm": 0.0890551283955574, "learning_rate": 8.487920549140678e-05, "loss": 0.0077, "step": 14010 }, { "epoch": 2.7118808049535605, "grad_norm": 0.07628942281007767, "learning_rate": 8.487715376250028e-05, "loss": 0.0081, "step": 14011 }, { "epoch": 2.712074303405573, "grad_norm": 0.09628549963235855, "learning_rate": 8.48751019225179e-05, "loss": 0.0065, "step": 14012 }, { "epoch": 2.712267801857585, "grad_norm": 0.0770740658044815, "learning_rate": 8.487304997146723e-05, "loss": 0.0071, "step": 14013 }, { "epoch": 2.7124613003095974, "grad_norm": 0.11751768738031387, "learning_rate": 8.487099790935593e-05, "loss": 0.0078, "step": 14014 }, { "epoch": 2.71265479876161, "grad_norm": 0.09680168330669403, "learning_rate": 8.486894573619163e-05, "loss": 0.0066, "step": 14015 }, { "epoch": 2.7128482972136223, "grad_norm": 0.13192282617092133, "learning_rate": 8.486689345198194e-05, "loss": 0.0081, "step": 14016 }, { "epoch": 2.7130417956656347, "grad_norm": 0.05507504567503929, "learning_rate": 8.48648410567345e-05, "loss": 0.0076, "step": 14017 }, { "epoch": 2.713235294117647, "grad_norm": 0.0621083602309227, "learning_rate": 8.486278855045695e-05, "loss": 0.008, "step": 14018 }, { "epoch": 2.713428792569659, "grad_norm": 0.049460507929325104, "learning_rate": 8.48607359331569e-05, "loss": 0.0062, "step": 14019 }, { "epoch": 2.7136222910216716, "grad_norm": 0.0362236425280571, "learning_rate": 8.4858683204842e-05, "loss": 0.0076, "step": 14020 }, { "epoch": 2.713815789473684, "grad_norm": 0.032927174121141434, "learning_rate": 8.485663036551988e-05, "loss": 0.0068, "step": 14021 }, { "epoch": 2.7140092879256965, "grad_norm": 0.05621998384594917, "learning_rate": 8.485457741519813e-05, "loss": 0.0079, "step": 14022 }, { "epoch": 2.714202786377709, "grad_norm": 0.05251815915107727, "learning_rate": 8.485252435388446e-05, "loss": 0.0078, "step": 14023 }, { "epoch": 2.7143962848297214, "grad_norm": 0.07156117260456085, "learning_rate": 8.485047118158644e-05, "loss": 0.0072, "step": 14024 }, { "epoch": 2.714589783281734, "grad_norm": 0.04916292801499367, "learning_rate": 8.484841789831171e-05, "loss": 0.0069, "step": 14025 }, { "epoch": 2.7147832817337463, "grad_norm": 0.114454485476017, "learning_rate": 8.484636450406793e-05, "loss": 0.0067, "step": 14026 }, { "epoch": 2.7149767801857587, "grad_norm": 0.07408425211906433, "learning_rate": 8.484431099886275e-05, "loss": 0.007, "step": 14027 }, { "epoch": 2.715170278637771, "grad_norm": 0.05886492878198624, "learning_rate": 8.484225738270373e-05, "loss": 0.0067, "step": 14028 }, { "epoch": 2.715363777089783, "grad_norm": 0.10649633407592773, "learning_rate": 8.484020365559858e-05, "loss": 0.0076, "step": 14029 }, { "epoch": 2.7155572755417956, "grad_norm": 0.04608522355556488, "learning_rate": 8.483814981755488e-05, "loss": 0.0079, "step": 14030 }, { "epoch": 2.715750773993808, "grad_norm": 0.0679708942770958, "learning_rate": 8.483609586858031e-05, "loss": 0.0073, "step": 14031 }, { "epoch": 2.7159442724458205, "grad_norm": 0.06051850691437721, "learning_rate": 8.483404180868249e-05, "loss": 0.0085, "step": 14032 }, { "epoch": 2.716137770897833, "grad_norm": 0.04795384034514427, "learning_rate": 8.483198763786903e-05, "loss": 0.0065, "step": 14033 }, { "epoch": 2.7163312693498454, "grad_norm": 0.058121249079704285, "learning_rate": 8.482993335614762e-05, "loss": 0.0069, "step": 14034 }, { "epoch": 2.7165247678018574, "grad_norm": 0.04551471769809723, "learning_rate": 8.482787896352584e-05, "loss": 0.0059, "step": 14035 }, { "epoch": 2.71671826625387, "grad_norm": 0.06347332149744034, "learning_rate": 8.482582446001139e-05, "loss": 0.0062, "step": 14036 }, { "epoch": 2.7169117647058822, "grad_norm": 0.04123475030064583, "learning_rate": 8.482376984561184e-05, "loss": 0.0078, "step": 14037 }, { "epoch": 2.7171052631578947, "grad_norm": 0.032193757593631744, "learning_rate": 8.482171512033488e-05, "loss": 0.0066, "step": 14038 }, { "epoch": 2.717298761609907, "grad_norm": 0.030384721234440804, "learning_rate": 8.481966028418814e-05, "loss": 0.0063, "step": 14039 }, { "epoch": 2.7174922600619196, "grad_norm": 0.029000569134950638, "learning_rate": 8.481760533717922e-05, "loss": 0.0064, "step": 14040 }, { "epoch": 2.717685758513932, "grad_norm": 0.031093865633010864, "learning_rate": 8.481555027931583e-05, "loss": 0.0073, "step": 14041 }, { "epoch": 2.7178792569659445, "grad_norm": 0.03030897118151188, "learning_rate": 8.481349511060554e-05, "loss": 0.007, "step": 14042 }, { "epoch": 2.718072755417957, "grad_norm": 0.03719276934862137, "learning_rate": 8.481143983105604e-05, "loss": 0.0082, "step": 14043 }, { "epoch": 2.718266253869969, "grad_norm": 0.047994308173656464, "learning_rate": 8.480938444067494e-05, "loss": 0.0074, "step": 14044 }, { "epoch": 2.7184597523219813, "grad_norm": 0.05431051552295685, "learning_rate": 8.48073289394699e-05, "loss": 0.0059, "step": 14045 }, { "epoch": 2.718653250773994, "grad_norm": 0.043265700340270996, "learning_rate": 8.480527332744855e-05, "loss": 0.0061, "step": 14046 }, { "epoch": 2.718846749226006, "grad_norm": 0.03448183834552765, "learning_rate": 8.480321760461855e-05, "loss": 0.0086, "step": 14047 }, { "epoch": 2.7190402476780187, "grad_norm": 0.07183621823787689, "learning_rate": 8.480116177098752e-05, "loss": 0.0067, "step": 14048 }, { "epoch": 2.719233746130031, "grad_norm": 0.04168286174535751, "learning_rate": 8.479910582656311e-05, "loss": 0.0086, "step": 14049 }, { "epoch": 2.719427244582043, "grad_norm": 0.07139938324689865, "learning_rate": 8.479704977135298e-05, "loss": 0.0081, "step": 14050 }, { "epoch": 2.7196207430340555, "grad_norm": 0.0406818725168705, "learning_rate": 8.479499360536476e-05, "loss": 0.0066, "step": 14051 }, { "epoch": 2.719814241486068, "grad_norm": 0.07605531066656113, "learning_rate": 8.479293732860609e-05, "loss": 0.0078, "step": 14052 }, { "epoch": 2.7200077399380804, "grad_norm": 0.06636383384466171, "learning_rate": 8.479088094108461e-05, "loss": 0.0062, "step": 14053 }, { "epoch": 2.720201238390093, "grad_norm": 0.06533651798963547, "learning_rate": 8.478882444280798e-05, "loss": 0.0069, "step": 14054 }, { "epoch": 2.7203947368421053, "grad_norm": 0.07103121280670166, "learning_rate": 8.478676783378387e-05, "loss": 0.0053, "step": 14055 }, { "epoch": 2.7205882352941178, "grad_norm": 0.05097277835011482, "learning_rate": 8.478471111401986e-05, "loss": 0.0048, "step": 14056 }, { "epoch": 2.72078173374613, "grad_norm": 0.07563823461532593, "learning_rate": 8.478265428352364e-05, "loss": 0.006, "step": 14057 }, { "epoch": 2.7209752321981426, "grad_norm": 0.051424216479063034, "learning_rate": 8.478059734230288e-05, "loss": 0.0061, "step": 14058 }, { "epoch": 2.7211687306501546, "grad_norm": 0.061139095574617386, "learning_rate": 8.477854029036518e-05, "loss": 0.0059, "step": 14059 }, { "epoch": 2.721362229102167, "grad_norm": 0.05208120495080948, "learning_rate": 8.477648312771817e-05, "loss": 0.0071, "step": 14060 }, { "epoch": 2.7215557275541795, "grad_norm": 0.06741464883089066, "learning_rate": 8.477442585436959e-05, "loss": 0.0069, "step": 14061 }, { "epoch": 2.721749226006192, "grad_norm": 0.05306413024663925, "learning_rate": 8.477236847032698e-05, "loss": 0.0073, "step": 14062 }, { "epoch": 2.7219427244582044, "grad_norm": 0.06805805116891861, "learning_rate": 8.477031097559807e-05, "loss": 0.0057, "step": 14063 }, { "epoch": 2.722136222910217, "grad_norm": 0.06072172522544861, "learning_rate": 8.476825337019048e-05, "loss": 0.0069, "step": 14064 }, { "epoch": 2.722329721362229, "grad_norm": 0.046317074447870255, "learning_rate": 8.476619565411186e-05, "loss": 0.0063, "step": 14065 }, { "epoch": 2.7225232198142413, "grad_norm": 0.06927601248025894, "learning_rate": 8.476413782736985e-05, "loss": 0.0068, "step": 14066 }, { "epoch": 2.7227167182662537, "grad_norm": 0.09103485941886902, "learning_rate": 8.47620798899721e-05, "loss": 0.0057, "step": 14067 }, { "epoch": 2.722910216718266, "grad_norm": 0.08740498125553131, "learning_rate": 8.476002184192629e-05, "loss": 0.0071, "step": 14068 }, { "epoch": 2.7231037151702786, "grad_norm": 0.0645732656121254, "learning_rate": 8.475796368324002e-05, "loss": 0.008, "step": 14069 }, { "epoch": 2.723297213622291, "grad_norm": 0.051139943301677704, "learning_rate": 8.475590541392101e-05, "loss": 0.0055, "step": 14070 }, { "epoch": 2.7234907120743035, "grad_norm": 0.07594604790210724, "learning_rate": 8.475384703397686e-05, "loss": 0.006, "step": 14071 }, { "epoch": 2.723684210526316, "grad_norm": 0.04725005105137825, "learning_rate": 8.475178854341525e-05, "loss": 0.0064, "step": 14072 }, { "epoch": 2.7238777089783284, "grad_norm": 0.07156519591808319, "learning_rate": 8.47497299422438e-05, "loss": 0.0068, "step": 14073 }, { "epoch": 2.724071207430341, "grad_norm": 0.053199589252471924, "learning_rate": 8.47476712304702e-05, "loss": 0.0065, "step": 14074 }, { "epoch": 2.724264705882353, "grad_norm": 0.06725720316171646, "learning_rate": 8.47456124081021e-05, "loss": 0.0062, "step": 14075 }, { "epoch": 2.7244582043343653, "grad_norm": 0.043998777866363525, "learning_rate": 8.474355347514711e-05, "loss": 0.0062, "step": 14076 }, { "epoch": 2.7246517027863777, "grad_norm": 0.07495571672916412, "learning_rate": 8.474149443161294e-05, "loss": 0.0077, "step": 14077 }, { "epoch": 2.72484520123839, "grad_norm": 0.05547860637307167, "learning_rate": 8.473943527750722e-05, "loss": 0.0072, "step": 14078 }, { "epoch": 2.7250386996904026, "grad_norm": 0.09620712697505951, "learning_rate": 8.47373760128376e-05, "loss": 0.0065, "step": 14079 }, { "epoch": 2.725232198142415, "grad_norm": 0.08608081936836243, "learning_rate": 8.473531663761175e-05, "loss": 0.0066, "step": 14080 }, { "epoch": 2.725425696594427, "grad_norm": 0.09127330780029297, "learning_rate": 8.47332571518373e-05, "loss": 0.0063, "step": 14081 }, { "epoch": 2.7256191950464395, "grad_norm": 0.0835874155163765, "learning_rate": 8.473119755552196e-05, "loss": 0.0073, "step": 14082 }, { "epoch": 2.725812693498452, "grad_norm": 0.07020096480846405, "learning_rate": 8.472913784867333e-05, "loss": 0.0084, "step": 14083 }, { "epoch": 2.7260061919504643, "grad_norm": 0.10107138752937317, "learning_rate": 8.472707803129912e-05, "loss": 0.0059, "step": 14084 }, { "epoch": 2.726199690402477, "grad_norm": 0.07127167284488678, "learning_rate": 8.472501810340693e-05, "loss": 0.008, "step": 14085 }, { "epoch": 2.7263931888544892, "grad_norm": 0.09510315954685211, "learning_rate": 8.472295806500444e-05, "loss": 0.0063, "step": 14086 }, { "epoch": 2.7265866873065017, "grad_norm": 0.09391654282808304, "learning_rate": 8.472089791609935e-05, "loss": 0.0081, "step": 14087 }, { "epoch": 2.726780185758514, "grad_norm": 0.07742880284786224, "learning_rate": 8.471883765669925e-05, "loss": 0.0055, "step": 14088 }, { "epoch": 2.7269736842105265, "grad_norm": 0.11349690705537796, "learning_rate": 8.471677728681185e-05, "loss": 0.0062, "step": 14089 }, { "epoch": 2.7271671826625385, "grad_norm": 0.05617315322160721, "learning_rate": 8.471471680644481e-05, "loss": 0.0063, "step": 14090 }, { "epoch": 2.727360681114551, "grad_norm": 0.12421545386314392, "learning_rate": 8.471265621560575e-05, "loss": 0.0091, "step": 14091 }, { "epoch": 2.7275541795665634, "grad_norm": 0.06655561178922653, "learning_rate": 8.471059551430236e-05, "loss": 0.0051, "step": 14092 }, { "epoch": 2.727747678018576, "grad_norm": 0.09556194394826889, "learning_rate": 8.470853470254231e-05, "loss": 0.0061, "step": 14093 }, { "epoch": 2.7279411764705883, "grad_norm": 0.08204919099807739, "learning_rate": 8.470647378033324e-05, "loss": 0.0058, "step": 14094 }, { "epoch": 2.7281346749226008, "grad_norm": 0.05647752061486244, "learning_rate": 8.47044127476828e-05, "loss": 0.0067, "step": 14095 }, { "epoch": 2.7283281733746128, "grad_norm": 0.10937904566526413, "learning_rate": 8.47023516045987e-05, "loss": 0.0079, "step": 14096 }, { "epoch": 2.728521671826625, "grad_norm": 0.05365245044231415, "learning_rate": 8.470029035108856e-05, "loss": 0.0078, "step": 14097 }, { "epoch": 2.7287151702786376, "grad_norm": 0.09888855367898941, "learning_rate": 8.469822898716007e-05, "loss": 0.0068, "step": 14098 }, { "epoch": 2.72890866873065, "grad_norm": 0.05321153998374939, "learning_rate": 8.469616751282088e-05, "loss": 0.0077, "step": 14099 }, { "epoch": 2.7291021671826625, "grad_norm": 0.08390230685472488, "learning_rate": 8.469410592807866e-05, "loss": 0.0074, "step": 14100 }, { "epoch": 2.729295665634675, "grad_norm": 0.08626548945903778, "learning_rate": 8.469204423294108e-05, "loss": 0.0078, "step": 14101 }, { "epoch": 2.7294891640866874, "grad_norm": 0.11281691491603851, "learning_rate": 8.468998242741576e-05, "loss": 0.0056, "step": 14102 }, { "epoch": 2.7296826625387, "grad_norm": 0.06765966862440109, "learning_rate": 8.468792051151042e-05, "loss": 0.0066, "step": 14103 }, { "epoch": 2.7298761609907123, "grad_norm": 0.1594173014163971, "learning_rate": 8.468585848523271e-05, "loss": 0.0065, "step": 14104 }, { "epoch": 2.7300696594427247, "grad_norm": 0.027132965624332428, "learning_rate": 8.468379634859029e-05, "loss": 0.0066, "step": 14105 }, { "epoch": 2.7302631578947367, "grad_norm": 0.1709844470024109, "learning_rate": 8.468173410159083e-05, "loss": 0.0085, "step": 14106 }, { "epoch": 2.730456656346749, "grad_norm": 0.0435745045542717, "learning_rate": 8.467967174424201e-05, "loss": 0.0056, "step": 14107 }, { "epoch": 2.7306501547987616, "grad_norm": 0.1470494568347931, "learning_rate": 8.467760927655147e-05, "loss": 0.0077, "step": 14108 }, { "epoch": 2.730843653250774, "grad_norm": 0.08508098870515823, "learning_rate": 8.46755466985269e-05, "loss": 0.0066, "step": 14109 }, { "epoch": 2.7310371517027865, "grad_norm": 0.10742814093828201, "learning_rate": 8.467348401017595e-05, "loss": 0.0064, "step": 14110 }, { "epoch": 2.7312306501547985, "grad_norm": 0.11477074772119522, "learning_rate": 8.467142121150631e-05, "loss": 0.0065, "step": 14111 }, { "epoch": 2.731424148606811, "grad_norm": 0.07111861556768417, "learning_rate": 8.466935830252564e-05, "loss": 0.0068, "step": 14112 }, { "epoch": 2.7316176470588234, "grad_norm": 0.1474601775407791, "learning_rate": 8.466729528324158e-05, "loss": 0.0074, "step": 14113 }, { "epoch": 2.731811145510836, "grad_norm": 0.11419114470481873, "learning_rate": 8.466523215366184e-05, "loss": 0.0059, "step": 14114 }, { "epoch": 2.7320046439628483, "grad_norm": 0.12443508207798004, "learning_rate": 8.466316891379408e-05, "loss": 0.0078, "step": 14115 }, { "epoch": 2.7321981424148607, "grad_norm": 0.1361408233642578, "learning_rate": 8.466110556364597e-05, "loss": 0.0072, "step": 14116 }, { "epoch": 2.732391640866873, "grad_norm": 0.08381105214357376, "learning_rate": 8.465904210322517e-05, "loss": 0.0072, "step": 14117 }, { "epoch": 2.7325851393188856, "grad_norm": 0.14802880585193634, "learning_rate": 8.465697853253935e-05, "loss": 0.0064, "step": 14118 }, { "epoch": 2.732778637770898, "grad_norm": 0.04856017976999283, "learning_rate": 8.465491485159622e-05, "loss": 0.0064, "step": 14119 }, { "epoch": 2.7329721362229105, "grad_norm": 0.1170642152428627, "learning_rate": 8.46528510604034e-05, "loss": 0.0067, "step": 14120 }, { "epoch": 2.7331656346749225, "grad_norm": 0.14041437208652496, "learning_rate": 8.465078715896862e-05, "loss": 0.0057, "step": 14121 }, { "epoch": 2.733359133126935, "grad_norm": 0.08972673118114471, "learning_rate": 8.464872314729948e-05, "loss": 0.0081, "step": 14122 }, { "epoch": 2.7335526315789473, "grad_norm": 0.2050001323223114, "learning_rate": 8.464665902540371e-05, "loss": 0.0073, "step": 14123 }, { "epoch": 2.73374613003096, "grad_norm": 0.07150979340076447, "learning_rate": 8.464459479328896e-05, "loss": 0.0064, "step": 14124 }, { "epoch": 2.7339396284829722, "grad_norm": 0.19299136102199554, "learning_rate": 8.46425304509629e-05, "loss": 0.0081, "step": 14125 }, { "epoch": 2.7341331269349847, "grad_norm": 0.15216727554798126, "learning_rate": 8.464046599843324e-05, "loss": 0.0069, "step": 14126 }, { "epoch": 2.7343266253869967, "grad_norm": 0.11870034784078598, "learning_rate": 8.463840143570763e-05, "loss": 0.008, "step": 14127 }, { "epoch": 2.734520123839009, "grad_norm": 0.17630809545516968, "learning_rate": 8.463633676279374e-05, "loss": 0.0063, "step": 14128 }, { "epoch": 2.7347136222910216, "grad_norm": 0.04754329472780228, "learning_rate": 8.463427197969925e-05, "loss": 0.0065, "step": 14129 }, { "epoch": 2.734907120743034, "grad_norm": 0.16637858748435974, "learning_rate": 8.463220708643184e-05, "loss": 0.0071, "step": 14130 }, { "epoch": 2.7351006191950464, "grad_norm": 0.06333478540182114, "learning_rate": 8.463014208299917e-05, "loss": 0.0068, "step": 14131 }, { "epoch": 2.735294117647059, "grad_norm": 0.13104431331157684, "learning_rate": 8.462807696940896e-05, "loss": 0.0068, "step": 14132 }, { "epoch": 2.7354876160990713, "grad_norm": 0.12095583975315094, "learning_rate": 8.462601174566885e-05, "loss": 0.0084, "step": 14133 }, { "epoch": 2.7356811145510838, "grad_norm": 0.07348925620317459, "learning_rate": 8.462394641178652e-05, "loss": 0.0078, "step": 14134 }, { "epoch": 2.735874613003096, "grad_norm": 0.1121949553489685, "learning_rate": 8.462188096776966e-05, "loss": 0.0054, "step": 14135 }, { "epoch": 2.736068111455108, "grad_norm": 0.05594082549214363, "learning_rate": 8.461981541362597e-05, "loss": 0.0064, "step": 14136 }, { "epoch": 2.7362616099071206, "grad_norm": 0.07174081355333328, "learning_rate": 8.461774974936308e-05, "loss": 0.0077, "step": 14137 }, { "epoch": 2.736455108359133, "grad_norm": 0.049933623522520065, "learning_rate": 8.46156839749887e-05, "loss": 0.0062, "step": 14138 }, { "epoch": 2.7366486068111455, "grad_norm": 0.051346950232982635, "learning_rate": 8.461361809051051e-05, "loss": 0.0078, "step": 14139 }, { "epoch": 2.736842105263158, "grad_norm": 0.07479807734489441, "learning_rate": 8.461155209593618e-05, "loss": 0.0058, "step": 14140 }, { "epoch": 2.7370356037151704, "grad_norm": 0.07488308846950531, "learning_rate": 8.460948599127339e-05, "loss": 0.0065, "step": 14141 }, { "epoch": 2.7372291021671824, "grad_norm": 0.09374025464057922, "learning_rate": 8.460741977652983e-05, "loss": 0.0073, "step": 14142 }, { "epoch": 2.737422600619195, "grad_norm": 0.08975128084421158, "learning_rate": 8.460535345171318e-05, "loss": 0.0063, "step": 14143 }, { "epoch": 2.7376160990712073, "grad_norm": 0.12738212943077087, "learning_rate": 8.460328701683111e-05, "loss": 0.006, "step": 14144 }, { "epoch": 2.7378095975232197, "grad_norm": 0.07728876918554306, "learning_rate": 8.460122047189133e-05, "loss": 0.0077, "step": 14145 }, { "epoch": 2.738003095975232, "grad_norm": 0.1342071145772934, "learning_rate": 8.45991538169015e-05, "loss": 0.0071, "step": 14146 }, { "epoch": 2.7381965944272446, "grad_norm": 0.08947412669658661, "learning_rate": 8.459708705186931e-05, "loss": 0.0084, "step": 14147 }, { "epoch": 2.738390092879257, "grad_norm": 0.15702053904533386, "learning_rate": 8.459502017680245e-05, "loss": 0.0082, "step": 14148 }, { "epoch": 2.7385835913312695, "grad_norm": 0.09808791428804398, "learning_rate": 8.45929531917086e-05, "loss": 0.0062, "step": 14149 }, { "epoch": 2.738777089783282, "grad_norm": 0.11184165626764297, "learning_rate": 8.45908860965954e-05, "loss": 0.0062, "step": 14150 }, { "epoch": 2.7389705882352944, "grad_norm": 0.13503582775592804, "learning_rate": 8.458881889147064e-05, "loss": 0.0063, "step": 14151 }, { "epoch": 2.7391640866873064, "grad_norm": 0.05539152771234512, "learning_rate": 8.45867515763419e-05, "loss": 0.0061, "step": 14152 }, { "epoch": 2.739357585139319, "grad_norm": 0.18511340022087097, "learning_rate": 8.458468415121691e-05, "loss": 0.0066, "step": 14153 }, { "epoch": 2.7395510835913313, "grad_norm": 0.07696977257728577, "learning_rate": 8.458261661610338e-05, "loss": 0.0067, "step": 14154 }, { "epoch": 2.7397445820433437, "grad_norm": 0.08786360919475555, "learning_rate": 8.458054897100894e-05, "loss": 0.0061, "step": 14155 }, { "epoch": 2.739938080495356, "grad_norm": 0.08690127730369568, "learning_rate": 8.457848121594132e-05, "loss": 0.0069, "step": 14156 }, { "epoch": 2.7401315789473686, "grad_norm": 0.07915610074996948, "learning_rate": 8.45764133509082e-05, "loss": 0.0074, "step": 14157 }, { "epoch": 2.7403250773993806, "grad_norm": 0.10564686357975006, "learning_rate": 8.457434537591726e-05, "loss": 0.0071, "step": 14158 }, { "epoch": 2.740518575851393, "grad_norm": 0.06801775842905045, "learning_rate": 8.45722772909762e-05, "loss": 0.0076, "step": 14159 }, { "epoch": 2.7407120743034055, "grad_norm": 0.10419094562530518, "learning_rate": 8.457020909609269e-05, "loss": 0.0066, "step": 14160 }, { "epoch": 2.740905572755418, "grad_norm": 0.05228707194328308, "learning_rate": 8.456814079127443e-05, "loss": 0.0065, "step": 14161 }, { "epoch": 2.7410990712074303, "grad_norm": 0.10657121986150742, "learning_rate": 8.45660723765291e-05, "loss": 0.0068, "step": 14162 }, { "epoch": 2.741292569659443, "grad_norm": 0.08394840359687805, "learning_rate": 8.456400385186441e-05, "loss": 0.0072, "step": 14163 }, { "epoch": 2.7414860681114552, "grad_norm": 0.06905276328325272, "learning_rate": 8.456193521728803e-05, "loss": 0.0079, "step": 14164 }, { "epoch": 2.7416795665634677, "grad_norm": 0.10831116139888763, "learning_rate": 8.455986647280768e-05, "loss": 0.0068, "step": 14165 }, { "epoch": 2.74187306501548, "grad_norm": 0.037788983434438705, "learning_rate": 8.4557797618431e-05, "loss": 0.0079, "step": 14166 }, { "epoch": 2.742066563467492, "grad_norm": 0.0693402886390686, "learning_rate": 8.455572865416572e-05, "loss": 0.0069, "step": 14167 }, { "epoch": 2.7422600619195046, "grad_norm": 0.07105459272861481, "learning_rate": 8.455365958001954e-05, "loss": 0.0078, "step": 14168 }, { "epoch": 2.742453560371517, "grad_norm": 0.03375018388032913, "learning_rate": 8.455159039600013e-05, "loss": 0.0069, "step": 14169 }, { "epoch": 2.7426470588235294, "grad_norm": 0.07001487165689468, "learning_rate": 8.454952110211519e-05, "loss": 0.007, "step": 14170 }, { "epoch": 2.742840557275542, "grad_norm": 0.047121789306402206, "learning_rate": 8.454745169837239e-05, "loss": 0.0069, "step": 14171 }, { "epoch": 2.7430340557275543, "grad_norm": 0.03818568214774132, "learning_rate": 8.454538218477945e-05, "loss": 0.0065, "step": 14172 }, { "epoch": 2.7432275541795663, "grad_norm": 0.038957465440034866, "learning_rate": 8.45433125613441e-05, "loss": 0.0065, "step": 14173 }, { "epoch": 2.7434210526315788, "grad_norm": 0.06300319731235504, "learning_rate": 8.454124282807394e-05, "loss": 0.0071, "step": 14174 }, { "epoch": 2.743614551083591, "grad_norm": 0.036539576947689056, "learning_rate": 8.453917298497675e-05, "loss": 0.006, "step": 14175 }, { "epoch": 2.7438080495356036, "grad_norm": 0.06367438286542892, "learning_rate": 8.453710303206019e-05, "loss": 0.0071, "step": 14176 }, { "epoch": 2.744001547987616, "grad_norm": 0.05578646808862686, "learning_rate": 8.453503296933195e-05, "loss": 0.006, "step": 14177 }, { "epoch": 2.7441950464396285, "grad_norm": 0.030933629721403122, "learning_rate": 8.453296279679973e-05, "loss": 0.0073, "step": 14178 }, { "epoch": 2.744388544891641, "grad_norm": 0.056147895753383636, "learning_rate": 8.453089251447124e-05, "loss": 0.0068, "step": 14179 }, { "epoch": 2.7445820433436534, "grad_norm": 0.04328693449497223, "learning_rate": 8.452882212235418e-05, "loss": 0.0066, "step": 14180 }, { "epoch": 2.744775541795666, "grad_norm": 0.04993806406855583, "learning_rate": 8.452675162045622e-05, "loss": 0.0065, "step": 14181 }, { "epoch": 2.7449690402476783, "grad_norm": 0.05551685020327568, "learning_rate": 8.452468100878508e-05, "loss": 0.0078, "step": 14182 }, { "epoch": 2.7451625386996903, "grad_norm": 0.043519459664821625, "learning_rate": 8.452261028734845e-05, "loss": 0.0091, "step": 14183 }, { "epoch": 2.7453560371517027, "grad_norm": 0.06646306067705154, "learning_rate": 8.452053945615402e-05, "loss": 0.0072, "step": 14184 }, { "epoch": 2.745549535603715, "grad_norm": 0.042239148169755936, "learning_rate": 8.451846851520952e-05, "loss": 0.0063, "step": 14185 }, { "epoch": 2.7457430340557276, "grad_norm": 0.05853850394487381, "learning_rate": 8.451639746452261e-05, "loss": 0.0083, "step": 14186 }, { "epoch": 2.74593653250774, "grad_norm": 0.04222891107201576, "learning_rate": 8.4514326304101e-05, "loss": 0.0064, "step": 14187 }, { "epoch": 2.746130030959752, "grad_norm": 0.07276244461536407, "learning_rate": 8.451225503395242e-05, "loss": 0.0073, "step": 14188 }, { "epoch": 2.7463235294117645, "grad_norm": 0.06903396546840668, "learning_rate": 8.451018365408452e-05, "loss": 0.0061, "step": 14189 }, { "epoch": 2.746517027863777, "grad_norm": 0.045439645648002625, "learning_rate": 8.450811216450506e-05, "loss": 0.0087, "step": 14190 }, { "epoch": 2.7467105263157894, "grad_norm": 0.09300032258033752, "learning_rate": 8.45060405652217e-05, "loss": 0.0057, "step": 14191 }, { "epoch": 2.746904024767802, "grad_norm": 0.041478853672742844, "learning_rate": 8.450396885624214e-05, "loss": 0.0063, "step": 14192 }, { "epoch": 2.7470975232198143, "grad_norm": 0.07430002093315125, "learning_rate": 8.45018970375741e-05, "loss": 0.0076, "step": 14193 }, { "epoch": 2.7472910216718267, "grad_norm": 0.05209987610578537, "learning_rate": 8.449982510922527e-05, "loss": 0.007, "step": 14194 }, { "epoch": 2.747484520123839, "grad_norm": 0.061782557517290115, "learning_rate": 8.449775307120338e-05, "loss": 0.0055, "step": 14195 }, { "epoch": 2.7476780185758516, "grad_norm": 0.0548827163875103, "learning_rate": 8.44956809235161e-05, "loss": 0.0079, "step": 14196 }, { "epoch": 2.747871517027864, "grad_norm": 0.07953236252069473, "learning_rate": 8.449360866617114e-05, "loss": 0.0075, "step": 14197 }, { "epoch": 2.748065015479876, "grad_norm": 0.0371130108833313, "learning_rate": 8.449153629917621e-05, "loss": 0.007, "step": 14198 }, { "epoch": 2.7482585139318885, "grad_norm": 0.06982148438692093, "learning_rate": 8.448946382253902e-05, "loss": 0.0065, "step": 14199 }, { "epoch": 2.748452012383901, "grad_norm": 0.0764608308672905, "learning_rate": 8.448739123626727e-05, "loss": 0.0068, "step": 14200 }, { "epoch": 2.7486455108359134, "grad_norm": 0.06228678300976753, "learning_rate": 8.448531854036866e-05, "loss": 0.0064, "step": 14201 }, { "epoch": 2.748839009287926, "grad_norm": 0.06987099349498749, "learning_rate": 8.448324573485091e-05, "loss": 0.0067, "step": 14202 }, { "epoch": 2.7490325077399382, "grad_norm": 0.05082486942410469, "learning_rate": 8.448117281972172e-05, "loss": 0.0059, "step": 14203 }, { "epoch": 2.7492260061919502, "grad_norm": 0.06725840270519257, "learning_rate": 8.447909979498878e-05, "loss": 0.0074, "step": 14204 }, { "epoch": 2.7494195046439627, "grad_norm": 0.035024043172597885, "learning_rate": 8.447702666065982e-05, "loss": 0.007, "step": 14205 }, { "epoch": 2.749613003095975, "grad_norm": 0.094821035861969, "learning_rate": 8.447495341674255e-05, "loss": 0.0063, "step": 14206 }, { "epoch": 2.7498065015479876, "grad_norm": 0.029227543622255325, "learning_rate": 8.447288006324462e-05, "loss": 0.0075, "step": 14207 }, { "epoch": 2.75, "grad_norm": 0.07173427939414978, "learning_rate": 8.447080660017383e-05, "loss": 0.0066, "step": 14208 }, { "epoch": 2.7501934984520124, "grad_norm": 0.05294164642691612, "learning_rate": 8.446873302753784e-05, "loss": 0.0068, "step": 14209 }, { "epoch": 2.750386996904025, "grad_norm": 0.06150050461292267, "learning_rate": 8.446665934534433e-05, "loss": 0.0074, "step": 14210 }, { "epoch": 2.7505804953560373, "grad_norm": 0.06302624195814133, "learning_rate": 8.446458555360109e-05, "loss": 0.0059, "step": 14211 }, { "epoch": 2.7507739938080498, "grad_norm": 0.05197368189692497, "learning_rate": 8.446251165231575e-05, "loss": 0.0072, "step": 14212 }, { "epoch": 2.7509674922600618, "grad_norm": 0.0287840124219656, "learning_rate": 8.446043764149604e-05, "loss": 0.0076, "step": 14213 }, { "epoch": 2.751160990712074, "grad_norm": 0.08962938189506531, "learning_rate": 8.44583635211497e-05, "loss": 0.0074, "step": 14214 }, { "epoch": 2.7513544891640866, "grad_norm": 0.08439639955759048, "learning_rate": 8.445628929128442e-05, "loss": 0.0067, "step": 14215 }, { "epoch": 2.751547987616099, "grad_norm": 0.07746980339288712, "learning_rate": 8.44542149519079e-05, "loss": 0.0076, "step": 14216 }, { "epoch": 2.7517414860681115, "grad_norm": 0.09353505074977875, "learning_rate": 8.44521405030279e-05, "loss": 0.0071, "step": 14217 }, { "epoch": 2.751934984520124, "grad_norm": 0.07757457345724106, "learning_rate": 8.445006594465208e-05, "loss": 0.0064, "step": 14218 }, { "epoch": 2.752128482972136, "grad_norm": 0.08050812035799026, "learning_rate": 8.444799127678817e-05, "loss": 0.0065, "step": 14219 }, { "epoch": 2.7523219814241484, "grad_norm": 0.11306094378232956, "learning_rate": 8.444591649944389e-05, "loss": 0.0065, "step": 14220 }, { "epoch": 2.752515479876161, "grad_norm": 0.06825783848762512, "learning_rate": 8.444384161262694e-05, "loss": 0.0077, "step": 14221 }, { "epoch": 2.7527089783281733, "grad_norm": 0.08077285438776016, "learning_rate": 8.444176661634504e-05, "loss": 0.0065, "step": 14222 }, { "epoch": 2.7529024767801857, "grad_norm": 0.12695898115634918, "learning_rate": 8.443969151060591e-05, "loss": 0.0066, "step": 14223 }, { "epoch": 2.753095975232198, "grad_norm": 0.0670587494969368, "learning_rate": 8.443761629541727e-05, "loss": 0.0071, "step": 14224 }, { "epoch": 2.7532894736842106, "grad_norm": 0.11191829293966293, "learning_rate": 8.443554097078682e-05, "loss": 0.0062, "step": 14225 }, { "epoch": 2.753482972136223, "grad_norm": 0.052960123866796494, "learning_rate": 8.443346553672227e-05, "loss": 0.0081, "step": 14226 }, { "epoch": 2.7536764705882355, "grad_norm": 0.09551037847995758, "learning_rate": 8.443138999323139e-05, "loss": 0.0064, "step": 14227 }, { "epoch": 2.753869969040248, "grad_norm": 0.06521958857774734, "learning_rate": 8.442931434032181e-05, "loss": 0.0067, "step": 14228 }, { "epoch": 2.75406346749226, "grad_norm": 0.06744678318500519, "learning_rate": 8.44272385780013e-05, "loss": 0.0057, "step": 14229 }, { "epoch": 2.7542569659442724, "grad_norm": 0.08870063722133636, "learning_rate": 8.442516270627759e-05, "loss": 0.0079, "step": 14230 }, { "epoch": 2.754450464396285, "grad_norm": 0.06943046301603317, "learning_rate": 8.442308672515835e-05, "loss": 0.0077, "step": 14231 }, { "epoch": 2.7546439628482973, "grad_norm": 0.08354371786117554, "learning_rate": 8.442101063465132e-05, "loss": 0.0067, "step": 14232 }, { "epoch": 2.7548374613003097, "grad_norm": 0.07208284735679626, "learning_rate": 8.441893443476424e-05, "loss": 0.006, "step": 14233 }, { "epoch": 2.7550309597523217, "grad_norm": 0.099787637591362, "learning_rate": 8.44168581255048e-05, "loss": 0.0056, "step": 14234 }, { "epoch": 2.755224458204334, "grad_norm": 0.06127849593758583, "learning_rate": 8.441478170688074e-05, "loss": 0.0063, "step": 14235 }, { "epoch": 2.7554179566563466, "grad_norm": 0.12423712760210037, "learning_rate": 8.441270517889975e-05, "loss": 0.0061, "step": 14236 }, { "epoch": 2.755611455108359, "grad_norm": 0.054663851857185364, "learning_rate": 8.44106285415696e-05, "loss": 0.0073, "step": 14237 }, { "epoch": 2.7558049535603715, "grad_norm": 0.11262987554073334, "learning_rate": 8.440855179489795e-05, "loss": 0.0059, "step": 14238 }, { "epoch": 2.755998452012384, "grad_norm": 0.05774414539337158, "learning_rate": 8.440647493889257e-05, "loss": 0.0072, "step": 14239 }, { "epoch": 2.7561919504643964, "grad_norm": 0.1171078309416771, "learning_rate": 8.440439797356116e-05, "loss": 0.008, "step": 14240 }, { "epoch": 2.756385448916409, "grad_norm": 0.047564808279275894, "learning_rate": 8.440232089891143e-05, "loss": 0.0076, "step": 14241 }, { "epoch": 2.7565789473684212, "grad_norm": 0.08849415928125381, "learning_rate": 8.440024371495113e-05, "loss": 0.006, "step": 14242 }, { "epoch": 2.7567724458204337, "grad_norm": 0.030229739844799042, "learning_rate": 8.439816642168797e-05, "loss": 0.0078, "step": 14243 }, { "epoch": 2.7569659442724457, "grad_norm": 0.08355194330215454, "learning_rate": 8.439608901912967e-05, "loss": 0.0058, "step": 14244 }, { "epoch": 2.757159442724458, "grad_norm": 0.06721586734056473, "learning_rate": 8.439401150728394e-05, "loss": 0.0082, "step": 14245 }, { "epoch": 2.7573529411764706, "grad_norm": 0.13501547276973724, "learning_rate": 8.439193388615852e-05, "loss": 0.0065, "step": 14246 }, { "epoch": 2.757546439628483, "grad_norm": 0.07324790954589844, "learning_rate": 8.438985615576114e-05, "loss": 0.0062, "step": 14247 }, { "epoch": 2.7577399380804954, "grad_norm": 0.09719092398881912, "learning_rate": 8.438777831609953e-05, "loss": 0.0069, "step": 14248 }, { "epoch": 2.757933436532508, "grad_norm": 0.09436411410570145, "learning_rate": 8.438570036718138e-05, "loss": 0.0069, "step": 14249 }, { "epoch": 2.75812693498452, "grad_norm": 0.052584998309612274, "learning_rate": 8.438362230901445e-05, "loss": 0.0075, "step": 14250 }, { "epoch": 2.7583204334365323, "grad_norm": 0.09100615233182907, "learning_rate": 8.438154414160644e-05, "loss": 0.0072, "step": 14251 }, { "epoch": 2.7585139318885448, "grad_norm": 0.03344978764653206, "learning_rate": 8.437946586496511e-05, "loss": 0.0073, "step": 14252 }, { "epoch": 2.758707430340557, "grad_norm": 0.07843136042356491, "learning_rate": 8.437738747909813e-05, "loss": 0.0073, "step": 14253 }, { "epoch": 2.7589009287925697, "grad_norm": 0.04352836683392525, "learning_rate": 8.437530898401329e-05, "loss": 0.0056, "step": 14254 }, { "epoch": 2.759094427244582, "grad_norm": 0.08827530592679977, "learning_rate": 8.43732303797183e-05, "loss": 0.0076, "step": 14255 }, { "epoch": 2.7592879256965945, "grad_norm": 0.05807788297533989, "learning_rate": 8.437115166622086e-05, "loss": 0.0072, "step": 14256 }, { "epoch": 2.759481424148607, "grad_norm": 0.06412049382925034, "learning_rate": 8.436907284352871e-05, "loss": 0.007, "step": 14257 }, { "epoch": 2.7596749226006194, "grad_norm": 0.061565808951854706, "learning_rate": 8.436699391164958e-05, "loss": 0.0077, "step": 14258 }, { "epoch": 2.7598684210526314, "grad_norm": 0.0377238467335701, "learning_rate": 8.436491487059121e-05, "loss": 0.0084, "step": 14259 }, { "epoch": 2.760061919504644, "grad_norm": 0.07299201935529709, "learning_rate": 8.436283572036134e-05, "loss": 0.0069, "step": 14260 }, { "epoch": 2.7602554179566563, "grad_norm": 0.06359244138002396, "learning_rate": 8.436075646096766e-05, "loss": 0.0067, "step": 14261 }, { "epoch": 2.7604489164086687, "grad_norm": 0.0408906415104866, "learning_rate": 8.435867709241792e-05, "loss": 0.0069, "step": 14262 }, { "epoch": 2.760642414860681, "grad_norm": 0.048228174448013306, "learning_rate": 8.435659761471987e-05, "loss": 0.0058, "step": 14263 }, { "epoch": 2.7608359133126936, "grad_norm": 0.06938207149505615, "learning_rate": 8.435451802788121e-05, "loss": 0.0062, "step": 14264 }, { "epoch": 2.7610294117647056, "grad_norm": 0.03846685215830803, "learning_rate": 8.43524383319097e-05, "loss": 0.0065, "step": 14265 }, { "epoch": 2.761222910216718, "grad_norm": 0.06656485795974731, "learning_rate": 8.435035852681303e-05, "loss": 0.0086, "step": 14266 }, { "epoch": 2.7614164086687305, "grad_norm": 0.060941148549318314, "learning_rate": 8.434827861259899e-05, "loss": 0.0065, "step": 14267 }, { "epoch": 2.761609907120743, "grad_norm": 0.05083153396844864, "learning_rate": 8.434619858927525e-05, "loss": 0.0065, "step": 14268 }, { "epoch": 2.7618034055727554, "grad_norm": 0.0454162061214447, "learning_rate": 8.434411845684961e-05, "loss": 0.0062, "step": 14269 }, { "epoch": 2.761996904024768, "grad_norm": 0.050458695739507675, "learning_rate": 8.434203821532974e-05, "loss": 0.0066, "step": 14270 }, { "epoch": 2.7621904024767803, "grad_norm": 0.030943812802433968, "learning_rate": 8.433995786472342e-05, "loss": 0.0072, "step": 14271 }, { "epoch": 2.7623839009287927, "grad_norm": 0.044309794902801514, "learning_rate": 8.433787740503836e-05, "loss": 0.0077, "step": 14272 }, { "epoch": 2.762577399380805, "grad_norm": 0.05001823976635933, "learning_rate": 8.43357968362823e-05, "loss": 0.0063, "step": 14273 }, { "epoch": 2.7627708978328176, "grad_norm": 0.020840803161263466, "learning_rate": 8.433371615846297e-05, "loss": 0.0064, "step": 14274 }, { "epoch": 2.7629643962848296, "grad_norm": 0.06377348303794861, "learning_rate": 8.433163537158811e-05, "loss": 0.0076, "step": 14275 }, { "epoch": 2.763157894736842, "grad_norm": 0.038378991186618805, "learning_rate": 8.432955447566547e-05, "loss": 0.0077, "step": 14276 }, { "epoch": 2.7633513931888545, "grad_norm": 0.09996773302555084, "learning_rate": 8.432747347070277e-05, "loss": 0.0075, "step": 14277 }, { "epoch": 2.763544891640867, "grad_norm": 0.040232837200164795, "learning_rate": 8.432539235670776e-05, "loss": 0.0054, "step": 14278 }, { "epoch": 2.7637383900928794, "grad_norm": 0.09958823770284653, "learning_rate": 8.432331113368815e-05, "loss": 0.008, "step": 14279 }, { "epoch": 2.763931888544892, "grad_norm": 0.0565226748585701, "learning_rate": 8.432122980165169e-05, "loss": 0.0086, "step": 14280 }, { "epoch": 2.764125386996904, "grad_norm": 0.06058130040764809, "learning_rate": 8.431914836060614e-05, "loss": 0.0075, "step": 14281 }, { "epoch": 2.7643188854489162, "grad_norm": 0.08691991120576859, "learning_rate": 8.431706681055921e-05, "loss": 0.0082, "step": 14282 }, { "epoch": 2.7645123839009287, "grad_norm": 0.02754971943795681, "learning_rate": 8.431498515151865e-05, "loss": 0.0075, "step": 14283 }, { "epoch": 2.764705882352941, "grad_norm": 0.07936052978038788, "learning_rate": 8.43129033834922e-05, "loss": 0.0084, "step": 14284 }, { "epoch": 2.7648993808049536, "grad_norm": 0.06046433001756668, "learning_rate": 8.43108215064876e-05, "loss": 0.0062, "step": 14285 }, { "epoch": 2.765092879256966, "grad_norm": 0.05154716968536377, "learning_rate": 8.43087395205126e-05, "loss": 0.0067, "step": 14286 }, { "epoch": 2.7652863777089784, "grad_norm": 0.09251632541418076, "learning_rate": 8.43066574255749e-05, "loss": 0.0077, "step": 14287 }, { "epoch": 2.765479876160991, "grad_norm": 0.03345147520303726, "learning_rate": 8.430457522168228e-05, "loss": 0.007, "step": 14288 }, { "epoch": 2.7656733746130033, "grad_norm": 0.09083105623722076, "learning_rate": 8.430249290884248e-05, "loss": 0.0076, "step": 14289 }, { "epoch": 2.7658668730650153, "grad_norm": 0.07488022744655609, "learning_rate": 8.430041048706323e-05, "loss": 0.0062, "step": 14290 }, { "epoch": 2.7660603715170278, "grad_norm": 0.11271540820598602, "learning_rate": 8.429832795635227e-05, "loss": 0.0065, "step": 14291 }, { "epoch": 2.76625386996904, "grad_norm": 0.05940308794379234, "learning_rate": 8.429624531671733e-05, "loss": 0.0068, "step": 14292 }, { "epoch": 2.7664473684210527, "grad_norm": 0.050093624740839005, "learning_rate": 8.429416256816621e-05, "loss": 0.0059, "step": 14293 }, { "epoch": 2.766640866873065, "grad_norm": 0.07725764811038971, "learning_rate": 8.429207971070657e-05, "loss": 0.0062, "step": 14294 }, { "epoch": 2.7668343653250775, "grad_norm": 0.044202499091625214, "learning_rate": 8.42899967443462e-05, "loss": 0.0055, "step": 14295 }, { "epoch": 2.7670278637770895, "grad_norm": 0.055540427565574646, "learning_rate": 8.428791366909287e-05, "loss": 0.0066, "step": 14296 }, { "epoch": 2.767221362229102, "grad_norm": 0.05657879635691643, "learning_rate": 8.428583048495427e-05, "loss": 0.0064, "step": 14297 }, { "epoch": 2.7674148606811144, "grad_norm": 0.07572949677705765, "learning_rate": 8.428374719193817e-05, "loss": 0.0069, "step": 14298 }, { "epoch": 2.767608359133127, "grad_norm": 0.07358493655920029, "learning_rate": 8.428166379005232e-05, "loss": 0.0064, "step": 14299 }, { "epoch": 2.7678018575851393, "grad_norm": 0.05338266119360924, "learning_rate": 8.427958027930446e-05, "loss": 0.0066, "step": 14300 }, { "epoch": 2.7679953560371517, "grad_norm": 0.06842586398124695, "learning_rate": 8.427749665970233e-05, "loss": 0.0073, "step": 14301 }, { "epoch": 2.768188854489164, "grad_norm": 0.05879881978034973, "learning_rate": 8.427541293125367e-05, "loss": 0.0083, "step": 14302 }, { "epoch": 2.7683823529411766, "grad_norm": 0.06313230842351913, "learning_rate": 8.427332909396627e-05, "loss": 0.0088, "step": 14303 }, { "epoch": 2.768575851393189, "grad_norm": 0.044643618166446686, "learning_rate": 8.427124514784782e-05, "loss": 0.0066, "step": 14304 }, { "epoch": 2.7687693498452015, "grad_norm": 0.052695028483867645, "learning_rate": 8.426916109290611e-05, "loss": 0.0064, "step": 14305 }, { "epoch": 2.7689628482972135, "grad_norm": 0.03444577008485794, "learning_rate": 8.426707692914885e-05, "loss": 0.0069, "step": 14306 }, { "epoch": 2.769156346749226, "grad_norm": 0.0819794237613678, "learning_rate": 8.426499265658382e-05, "loss": 0.008, "step": 14307 }, { "epoch": 2.7693498452012384, "grad_norm": 0.024274185299873352, "learning_rate": 8.426290827521875e-05, "loss": 0.0061, "step": 14308 }, { "epoch": 2.769543343653251, "grad_norm": 0.08201169967651367, "learning_rate": 8.426082378506142e-05, "loss": 0.006, "step": 14309 }, { "epoch": 2.7697368421052633, "grad_norm": 0.0558042898774147, "learning_rate": 8.425873918611953e-05, "loss": 0.0063, "step": 14310 }, { "epoch": 2.7699303405572753, "grad_norm": 0.0645131915807724, "learning_rate": 8.425665447840088e-05, "loss": 0.0067, "step": 14311 }, { "epoch": 2.7701238390092877, "grad_norm": 0.0634748637676239, "learning_rate": 8.425456966191319e-05, "loss": 0.0069, "step": 14312 }, { "epoch": 2.7703173374613, "grad_norm": 0.05181925371289253, "learning_rate": 8.425248473666421e-05, "loss": 0.0059, "step": 14313 }, { "epoch": 2.7705108359133126, "grad_norm": 0.06914083659648895, "learning_rate": 8.42503997026617e-05, "loss": 0.0077, "step": 14314 }, { "epoch": 2.770704334365325, "grad_norm": 0.057094499468803406, "learning_rate": 8.424831455991342e-05, "loss": 0.0064, "step": 14315 }, { "epoch": 2.7708978328173375, "grad_norm": 0.11387230455875397, "learning_rate": 8.424622930842711e-05, "loss": 0.0055, "step": 14316 }, { "epoch": 2.77109133126935, "grad_norm": 0.06123245507478714, "learning_rate": 8.424414394821053e-05, "loss": 0.0065, "step": 14317 }, { "epoch": 2.7712848297213624, "grad_norm": 0.10509660840034485, "learning_rate": 8.424205847927143e-05, "loss": 0.0078, "step": 14318 }, { "epoch": 2.771478328173375, "grad_norm": 0.07202010601758957, "learning_rate": 8.423997290161756e-05, "loss": 0.0075, "step": 14319 }, { "epoch": 2.7716718266253872, "grad_norm": 0.0804644376039505, "learning_rate": 8.423788721525667e-05, "loss": 0.0075, "step": 14320 }, { "epoch": 2.7718653250773992, "grad_norm": 0.09128554910421371, "learning_rate": 8.423580142019653e-05, "loss": 0.0052, "step": 14321 }, { "epoch": 2.7720588235294117, "grad_norm": 0.05686086788773537, "learning_rate": 8.423371551644487e-05, "loss": 0.0072, "step": 14322 }, { "epoch": 2.772252321981424, "grad_norm": 0.11294678598642349, "learning_rate": 8.423162950400948e-05, "loss": 0.0069, "step": 14323 }, { "epoch": 2.7724458204334366, "grad_norm": 0.07134696841239929, "learning_rate": 8.422954338289808e-05, "loss": 0.0073, "step": 14324 }, { "epoch": 2.772639318885449, "grad_norm": 0.09912959486246109, "learning_rate": 8.422745715311843e-05, "loss": 0.006, "step": 14325 }, { "epoch": 2.7728328173374615, "grad_norm": 0.07880458980798721, "learning_rate": 8.422537081467831e-05, "loss": 0.0066, "step": 14326 }, { "epoch": 2.7730263157894735, "grad_norm": 0.05833188816905022, "learning_rate": 8.422328436758547e-05, "loss": 0.0064, "step": 14327 }, { "epoch": 2.773219814241486, "grad_norm": 0.11806969344615936, "learning_rate": 8.422119781184764e-05, "loss": 0.0064, "step": 14328 }, { "epoch": 2.7734133126934983, "grad_norm": 0.04278305917978287, "learning_rate": 8.42191111474726e-05, "loss": 0.0069, "step": 14329 }, { "epoch": 2.7736068111455108, "grad_norm": 0.10457990318536758, "learning_rate": 8.421702437446811e-05, "loss": 0.0077, "step": 14330 }, { "epoch": 2.773800309597523, "grad_norm": 0.05965150147676468, "learning_rate": 8.421493749284193e-05, "loss": 0.0089, "step": 14331 }, { "epoch": 2.7739938080495357, "grad_norm": 0.10039706528186798, "learning_rate": 8.421285050260179e-05, "loss": 0.0077, "step": 14332 }, { "epoch": 2.774187306501548, "grad_norm": 0.07288198173046112, "learning_rate": 8.42107634037555e-05, "loss": 0.0064, "step": 14333 }, { "epoch": 2.7743808049535605, "grad_norm": 0.10255072265863419, "learning_rate": 8.420867619631077e-05, "loss": 0.0061, "step": 14334 }, { "epoch": 2.774574303405573, "grad_norm": 0.0799456238746643, "learning_rate": 8.42065888802754e-05, "loss": 0.0076, "step": 14335 }, { "epoch": 2.774767801857585, "grad_norm": 0.07905981689691544, "learning_rate": 8.420450145565709e-05, "loss": 0.0071, "step": 14336 }, { "epoch": 2.7749613003095974, "grad_norm": 0.10120394080877304, "learning_rate": 8.420241392246368e-05, "loss": 0.0086, "step": 14337 }, { "epoch": 2.77515479876161, "grad_norm": 0.12036161124706268, "learning_rate": 8.420032628070287e-05, "loss": 0.0058, "step": 14338 }, { "epoch": 2.7753482972136223, "grad_norm": 0.08324403315782547, "learning_rate": 8.419823853038244e-05, "loss": 0.0058, "step": 14339 }, { "epoch": 2.7755417956656347, "grad_norm": 0.11777432262897491, "learning_rate": 8.419615067151016e-05, "loss": 0.0079, "step": 14340 }, { "epoch": 2.775735294117647, "grad_norm": 0.0929582267999649, "learning_rate": 8.419406270409378e-05, "loss": 0.0075, "step": 14341 }, { "epoch": 2.775928792569659, "grad_norm": 0.076052226126194, "learning_rate": 8.419197462814107e-05, "loss": 0.0068, "step": 14342 }, { "epoch": 2.7761222910216716, "grad_norm": 0.09483295679092407, "learning_rate": 8.41898864436598e-05, "loss": 0.0082, "step": 14343 }, { "epoch": 2.776315789473684, "grad_norm": 0.0842536985874176, "learning_rate": 8.418779815065772e-05, "loss": 0.0075, "step": 14344 }, { "epoch": 2.7765092879256965, "grad_norm": 0.05411534756422043, "learning_rate": 8.418570974914259e-05, "loss": 0.0079, "step": 14345 }, { "epoch": 2.776702786377709, "grad_norm": 0.0994703471660614, "learning_rate": 8.418362123912219e-05, "loss": 0.0069, "step": 14346 }, { "epoch": 2.7768962848297214, "grad_norm": 0.03173217549920082, "learning_rate": 8.418153262060428e-05, "loss": 0.0073, "step": 14347 }, { "epoch": 2.777089783281734, "grad_norm": 0.07176783680915833, "learning_rate": 8.417944389359663e-05, "loss": 0.0076, "step": 14348 }, { "epoch": 2.7772832817337463, "grad_norm": 0.051308803260326385, "learning_rate": 8.417735505810698e-05, "loss": 0.0069, "step": 14349 }, { "epoch": 2.7774767801857587, "grad_norm": 0.0701804980635643, "learning_rate": 8.417526611414312e-05, "loss": 0.0076, "step": 14350 }, { "epoch": 2.777670278637771, "grad_norm": 0.09348494559526443, "learning_rate": 8.417317706171281e-05, "loss": 0.0065, "step": 14351 }, { "epoch": 2.777863777089783, "grad_norm": 0.05556061118841171, "learning_rate": 8.417108790082382e-05, "loss": 0.0067, "step": 14352 }, { "epoch": 2.7780572755417956, "grad_norm": 0.08353187888860703, "learning_rate": 8.416899863148391e-05, "loss": 0.0075, "step": 14353 }, { "epoch": 2.778250773993808, "grad_norm": 0.08409477025270462, "learning_rate": 8.416690925370085e-05, "loss": 0.0076, "step": 14354 }, { "epoch": 2.7784442724458205, "grad_norm": 0.06708426028490067, "learning_rate": 8.41648197674824e-05, "loss": 0.0057, "step": 14355 }, { "epoch": 2.778637770897833, "grad_norm": 0.07619456946849823, "learning_rate": 8.416273017283635e-05, "loss": 0.006, "step": 14356 }, { "epoch": 2.7788312693498454, "grad_norm": 0.06567572802305222, "learning_rate": 8.416064046977045e-05, "loss": 0.0072, "step": 14357 }, { "epoch": 2.7790247678018574, "grad_norm": 0.07110650092363358, "learning_rate": 8.415855065829247e-05, "loss": 0.0057, "step": 14358 }, { "epoch": 2.77921826625387, "grad_norm": 0.06365097314119339, "learning_rate": 8.415646073841019e-05, "loss": 0.0069, "step": 14359 }, { "epoch": 2.7794117647058822, "grad_norm": 0.062058571726083755, "learning_rate": 8.415437071013136e-05, "loss": 0.0061, "step": 14360 }, { "epoch": 2.7796052631578947, "grad_norm": 0.0439055860042572, "learning_rate": 8.415228057346378e-05, "loss": 0.0069, "step": 14361 }, { "epoch": 2.779798761609907, "grad_norm": 0.07664100080728531, "learning_rate": 8.41501903284152e-05, "loss": 0.0064, "step": 14362 }, { "epoch": 2.7799922600619196, "grad_norm": 0.05624159425497055, "learning_rate": 8.41480999749934e-05, "loss": 0.0056, "step": 14363 }, { "epoch": 2.780185758513932, "grad_norm": 0.058365438133478165, "learning_rate": 8.414600951320613e-05, "loss": 0.0072, "step": 14364 }, { "epoch": 2.7803792569659445, "grad_norm": 0.0758703425526619, "learning_rate": 8.414391894306118e-05, "loss": 0.0065, "step": 14365 }, { "epoch": 2.780572755417957, "grad_norm": 0.04662242904305458, "learning_rate": 8.414182826456634e-05, "loss": 0.0063, "step": 14366 }, { "epoch": 2.780766253869969, "grad_norm": 0.05653157830238342, "learning_rate": 8.413973747772936e-05, "loss": 0.0064, "step": 14367 }, { "epoch": 2.7809597523219813, "grad_norm": 0.05550704896450043, "learning_rate": 8.4137646582558e-05, "loss": 0.0077, "step": 14368 }, { "epoch": 2.781153250773994, "grad_norm": 0.046617407351732254, "learning_rate": 8.413555557906006e-05, "loss": 0.0068, "step": 14369 }, { "epoch": 2.781346749226006, "grad_norm": 0.06443360447883606, "learning_rate": 8.41334644672433e-05, "loss": 0.0063, "step": 14370 }, { "epoch": 2.7815402476780187, "grad_norm": 0.05152752250432968, "learning_rate": 8.41313732471155e-05, "loss": 0.0071, "step": 14371 }, { "epoch": 2.781733746130031, "grad_norm": 0.04612063989043236, "learning_rate": 8.412928191868445e-05, "loss": 0.0068, "step": 14372 }, { "epoch": 2.781927244582043, "grad_norm": 0.061052337288856506, "learning_rate": 8.412719048195789e-05, "loss": 0.0092, "step": 14373 }, { "epoch": 2.7821207430340555, "grad_norm": 0.07465449720621109, "learning_rate": 8.41250989369436e-05, "loss": 0.0074, "step": 14374 }, { "epoch": 2.782314241486068, "grad_norm": 0.030429519712924957, "learning_rate": 8.412300728364939e-05, "loss": 0.0066, "step": 14375 }, { "epoch": 2.7825077399380804, "grad_norm": 0.05157911777496338, "learning_rate": 8.412091552208299e-05, "loss": 0.0064, "step": 14376 }, { "epoch": 2.782701238390093, "grad_norm": 0.04699620604515076, "learning_rate": 8.411882365225222e-05, "loss": 0.0075, "step": 14377 }, { "epoch": 2.7828947368421053, "grad_norm": 0.07018142193555832, "learning_rate": 8.411673167416484e-05, "loss": 0.006, "step": 14378 }, { "epoch": 2.7830882352941178, "grad_norm": 0.0420965701341629, "learning_rate": 8.411463958782862e-05, "loss": 0.0065, "step": 14379 }, { "epoch": 2.78328173374613, "grad_norm": 0.04990728944540024, "learning_rate": 8.411254739325134e-05, "loss": 0.0083, "step": 14380 }, { "epoch": 2.7834752321981426, "grad_norm": 0.045417461544275284, "learning_rate": 8.41104550904408e-05, "loss": 0.0076, "step": 14381 }, { "epoch": 2.7836687306501546, "grad_norm": 0.047816868871450424, "learning_rate": 8.410836267940475e-05, "loss": 0.0078, "step": 14382 }, { "epoch": 2.783862229102167, "grad_norm": 0.037088267505168915, "learning_rate": 8.410627016015098e-05, "loss": 0.0062, "step": 14383 }, { "epoch": 2.7840557275541795, "grad_norm": 0.06334662437438965, "learning_rate": 8.410417753268726e-05, "loss": 0.0064, "step": 14384 }, { "epoch": 2.784249226006192, "grad_norm": 0.03761209920048714, "learning_rate": 8.410208479702139e-05, "loss": 0.0061, "step": 14385 }, { "epoch": 2.7844427244582044, "grad_norm": 0.06019312143325806, "learning_rate": 8.409999195316114e-05, "loss": 0.008, "step": 14386 }, { "epoch": 2.784636222910217, "grad_norm": 0.03326718509197235, "learning_rate": 8.409789900111429e-05, "loss": 0.006, "step": 14387 }, { "epoch": 2.784829721362229, "grad_norm": 0.0456421785056591, "learning_rate": 8.409580594088861e-05, "loss": 0.0063, "step": 14388 }, { "epoch": 2.7850232198142413, "grad_norm": 0.046799685806035995, "learning_rate": 8.409371277249192e-05, "loss": 0.0061, "step": 14389 }, { "epoch": 2.7852167182662537, "grad_norm": 0.05584275349974632, "learning_rate": 8.409161949593195e-05, "loss": 0.0066, "step": 14390 }, { "epoch": 2.785410216718266, "grad_norm": 0.03214903920888901, "learning_rate": 8.408952611121652e-05, "loss": 0.0066, "step": 14391 }, { "epoch": 2.7856037151702786, "grad_norm": 0.032313231378793716, "learning_rate": 8.40874326183534e-05, "loss": 0.008, "step": 14392 }, { "epoch": 2.785797213622291, "grad_norm": 0.0585363507270813, "learning_rate": 8.408533901735034e-05, "loss": 0.0081, "step": 14393 }, { "epoch": 2.7859907120743035, "grad_norm": 0.05872600898146629, "learning_rate": 8.40832453082152e-05, "loss": 0.0068, "step": 14394 }, { "epoch": 2.786184210526316, "grad_norm": 0.060201819986104965, "learning_rate": 8.408115149095571e-05, "loss": 0.0063, "step": 14395 }, { "epoch": 2.7863777089783284, "grad_norm": 0.07819848507642746, "learning_rate": 8.407905756557967e-05, "loss": 0.0081, "step": 14396 }, { "epoch": 2.786571207430341, "grad_norm": 0.06451290100812912, "learning_rate": 8.407696353209484e-05, "loss": 0.0069, "step": 14397 }, { "epoch": 2.786764705882353, "grad_norm": 0.06928173452615738, "learning_rate": 8.407486939050903e-05, "loss": 0.0078, "step": 14398 }, { "epoch": 2.7869582043343653, "grad_norm": 0.08371837437152863, "learning_rate": 8.407277514083001e-05, "loss": 0.006, "step": 14399 }, { "epoch": 2.7871517027863777, "grad_norm": 0.07512663304805756, "learning_rate": 8.407068078306561e-05, "loss": 0.0068, "step": 14400 }, { "epoch": 2.78734520123839, "grad_norm": 0.07840050011873245, "learning_rate": 8.406858631722355e-05, "loss": 0.0051, "step": 14401 }, { "epoch": 2.7875386996904026, "grad_norm": 0.06966853886842728, "learning_rate": 8.406649174331164e-05, "loss": 0.0084, "step": 14402 }, { "epoch": 2.787732198142415, "grad_norm": 0.09166646003723145, "learning_rate": 8.406439706133771e-05, "loss": 0.0079, "step": 14403 }, { "epoch": 2.787925696594427, "grad_norm": 0.04453781992197037, "learning_rate": 8.406230227130949e-05, "loss": 0.0077, "step": 14404 }, { "epoch": 2.7881191950464395, "grad_norm": 0.08436562120914459, "learning_rate": 8.40602073732348e-05, "loss": 0.007, "step": 14405 }, { "epoch": 2.788312693498452, "grad_norm": 0.07620402425527573, "learning_rate": 8.405811236712143e-05, "loss": 0.0066, "step": 14406 }, { "epoch": 2.7885061919504643, "grad_norm": 0.061536528170108795, "learning_rate": 8.405601725297713e-05, "loss": 0.0084, "step": 14407 }, { "epoch": 2.788699690402477, "grad_norm": 0.07340744137763977, "learning_rate": 8.405392203080975e-05, "loss": 0.0068, "step": 14408 }, { "epoch": 2.7888931888544892, "grad_norm": 0.03684301674365997, "learning_rate": 8.405182670062703e-05, "loss": 0.006, "step": 14409 }, { "epoch": 2.7890866873065017, "grad_norm": 0.06171330437064171, "learning_rate": 8.404973126243677e-05, "loss": 0.006, "step": 14410 }, { "epoch": 2.789280185758514, "grad_norm": 0.023279044777154922, "learning_rate": 8.404763571624678e-05, "loss": 0.0067, "step": 14411 }, { "epoch": 2.7894736842105265, "grad_norm": 0.05338405445218086, "learning_rate": 8.404554006206482e-05, "loss": 0.0064, "step": 14412 }, { "epoch": 2.7896671826625385, "grad_norm": 0.04987549036741257, "learning_rate": 8.404344429989872e-05, "loss": 0.0068, "step": 14413 }, { "epoch": 2.789860681114551, "grad_norm": 0.05220876634120941, "learning_rate": 8.404134842975626e-05, "loss": 0.0068, "step": 14414 }, { "epoch": 2.7900541795665634, "grad_norm": 0.051891691982746124, "learning_rate": 8.403925245164519e-05, "loss": 0.0069, "step": 14415 }, { "epoch": 2.790247678018576, "grad_norm": 0.05100870877504349, "learning_rate": 8.403715636557336e-05, "loss": 0.0064, "step": 14416 }, { "epoch": 2.7904411764705883, "grad_norm": 0.05340312048792839, "learning_rate": 8.403506017154852e-05, "loss": 0.0058, "step": 14417 }, { "epoch": 2.7906346749226008, "grad_norm": 0.045900922268629074, "learning_rate": 8.403296386957848e-05, "loss": 0.0074, "step": 14418 }, { "epoch": 2.7908281733746128, "grad_norm": 0.05381020903587341, "learning_rate": 8.403086745967105e-05, "loss": 0.006, "step": 14419 }, { "epoch": 2.791021671826625, "grad_norm": 0.03948979079723358, "learning_rate": 8.4028770941834e-05, "loss": 0.0068, "step": 14420 }, { "epoch": 2.7912151702786376, "grad_norm": 0.05220187455415726, "learning_rate": 8.402667431607514e-05, "loss": 0.0075, "step": 14421 }, { "epoch": 2.79140866873065, "grad_norm": 0.019189080223441124, "learning_rate": 8.402457758240224e-05, "loss": 0.0062, "step": 14422 }, { "epoch": 2.7916021671826625, "grad_norm": 0.05540717765688896, "learning_rate": 8.402248074082314e-05, "loss": 0.0068, "step": 14423 }, { "epoch": 2.791795665634675, "grad_norm": 0.02583405375480652, "learning_rate": 8.402038379134559e-05, "loss": 0.005, "step": 14424 }, { "epoch": 2.7919891640866874, "grad_norm": 0.04267217218875885, "learning_rate": 8.40182867339774e-05, "loss": 0.0074, "step": 14425 }, { "epoch": 2.7921826625387, "grad_norm": 0.04165530949831009, "learning_rate": 8.401618956872636e-05, "loss": 0.007, "step": 14426 }, { "epoch": 2.7923761609907123, "grad_norm": 0.033963385969400406, "learning_rate": 8.40140922956003e-05, "loss": 0.0057, "step": 14427 }, { "epoch": 2.7925696594427247, "grad_norm": 0.050141751766204834, "learning_rate": 8.401199491460699e-05, "loss": 0.0074, "step": 14428 }, { "epoch": 2.7927631578947367, "grad_norm": 0.04118651896715164, "learning_rate": 8.400989742575422e-05, "loss": 0.0057, "step": 14429 }, { "epoch": 2.792956656346749, "grad_norm": 0.049486394971609116, "learning_rate": 8.400779982904982e-05, "loss": 0.0082, "step": 14430 }, { "epoch": 2.7931501547987616, "grad_norm": 0.05714605376124382, "learning_rate": 8.400570212450154e-05, "loss": 0.0058, "step": 14431 }, { "epoch": 2.793343653250774, "grad_norm": 0.0701555535197258, "learning_rate": 8.400360431211722e-05, "loss": 0.0083, "step": 14432 }, { "epoch": 2.7935371517027865, "grad_norm": 0.05489228293299675, "learning_rate": 8.400150639190466e-05, "loss": 0.0076, "step": 14433 }, { "epoch": 2.7937306501547985, "grad_norm": 0.05657323822379112, "learning_rate": 8.399940836387163e-05, "loss": 0.0063, "step": 14434 }, { "epoch": 2.793924148606811, "grad_norm": 0.059650495648384094, "learning_rate": 8.399731022802596e-05, "loss": 0.0063, "step": 14435 }, { "epoch": 2.7941176470588234, "grad_norm": 0.045883867889642715, "learning_rate": 8.39952119843754e-05, "loss": 0.0066, "step": 14436 }, { "epoch": 2.794311145510836, "grad_norm": 0.06634873151779175, "learning_rate": 8.399311363292782e-05, "loss": 0.0056, "step": 14437 }, { "epoch": 2.7945046439628483, "grad_norm": 0.01921052671968937, "learning_rate": 8.399101517369099e-05, "loss": 0.0062, "step": 14438 }, { "epoch": 2.7946981424148607, "grad_norm": 0.07150159776210785, "learning_rate": 8.398891660667268e-05, "loss": 0.0068, "step": 14439 }, { "epoch": 2.794891640866873, "grad_norm": 0.04639342799782753, "learning_rate": 8.398681793188075e-05, "loss": 0.0055, "step": 14440 }, { "epoch": 2.7950851393188856, "grad_norm": 0.04629119858145714, "learning_rate": 8.398471914932294e-05, "loss": 0.0068, "step": 14441 }, { "epoch": 2.795278637770898, "grad_norm": 0.07901376485824585, "learning_rate": 8.398262025900714e-05, "loss": 0.006, "step": 14442 }, { "epoch": 2.7954721362229105, "grad_norm": 0.039519816637039185, "learning_rate": 8.398052126094106e-05, "loss": 0.0054, "step": 14443 }, { "epoch": 2.7956656346749225, "grad_norm": 0.058995746076107025, "learning_rate": 8.397842215513255e-05, "loss": 0.0073, "step": 14444 }, { "epoch": 2.795859133126935, "grad_norm": 0.07336856424808502, "learning_rate": 8.39763229415894e-05, "loss": 0.0065, "step": 14445 }, { "epoch": 2.7960526315789473, "grad_norm": 0.04995299503207207, "learning_rate": 8.397422362031944e-05, "loss": 0.0056, "step": 14446 }, { "epoch": 2.79624613003096, "grad_norm": 0.07884044200181961, "learning_rate": 8.397212419133044e-05, "loss": 0.0081, "step": 14447 }, { "epoch": 2.7964396284829722, "grad_norm": 0.06441880017518997, "learning_rate": 8.397002465463022e-05, "loss": 0.0068, "step": 14448 }, { "epoch": 2.7966331269349847, "grad_norm": 0.06361757218837738, "learning_rate": 8.39679250102266e-05, "loss": 0.0069, "step": 14449 }, { "epoch": 2.7968266253869967, "grad_norm": 0.06009155139327049, "learning_rate": 8.396582525812735e-05, "loss": 0.0075, "step": 14450 }, { "epoch": 2.797020123839009, "grad_norm": 0.0419335812330246, "learning_rate": 8.396372539834032e-05, "loss": 0.0066, "step": 14451 }, { "epoch": 2.7972136222910216, "grad_norm": 0.07563773542642593, "learning_rate": 8.396162543087329e-05, "loss": 0.0065, "step": 14452 }, { "epoch": 2.797407120743034, "grad_norm": 0.06788771599531174, "learning_rate": 8.395952535573407e-05, "loss": 0.0063, "step": 14453 }, { "epoch": 2.7976006191950464, "grad_norm": 0.08904232084751129, "learning_rate": 8.395742517293048e-05, "loss": 0.0064, "step": 14454 }, { "epoch": 2.797794117647059, "grad_norm": 0.0758616179227829, "learning_rate": 8.395532488247031e-05, "loss": 0.006, "step": 14455 }, { "epoch": 2.7979876160990713, "grad_norm": 0.0868852436542511, "learning_rate": 8.395322448436137e-05, "loss": 0.006, "step": 14456 }, { "epoch": 2.7981811145510838, "grad_norm": 0.06375564634799957, "learning_rate": 8.39511239786115e-05, "loss": 0.0066, "step": 14457 }, { "epoch": 2.798374613003096, "grad_norm": 0.09148740023374557, "learning_rate": 8.394902336522846e-05, "loss": 0.0068, "step": 14458 }, { "epoch": 2.798568111455108, "grad_norm": 0.04744185507297516, "learning_rate": 8.39469226442201e-05, "loss": 0.0053, "step": 14459 }, { "epoch": 2.7987616099071206, "grad_norm": 0.08962589502334595, "learning_rate": 8.39448218155942e-05, "loss": 0.007, "step": 14460 }, { "epoch": 2.798955108359133, "grad_norm": 0.06671454012393951, "learning_rate": 8.394272087935858e-05, "loss": 0.008, "step": 14461 }, { "epoch": 2.7991486068111455, "grad_norm": 0.055582087486982346, "learning_rate": 8.394061983552107e-05, "loss": 0.008, "step": 14462 }, { "epoch": 2.799342105263158, "grad_norm": 0.1319381594657898, "learning_rate": 8.393851868408948e-05, "loss": 0.0083, "step": 14463 }, { "epoch": 2.7995356037151704, "grad_norm": 0.02865743450820446, "learning_rate": 8.393641742507159e-05, "loss": 0.006, "step": 14464 }, { "epoch": 2.7997291021671824, "grad_norm": 0.13769719004631042, "learning_rate": 8.393431605847522e-05, "loss": 0.0081, "step": 14465 }, { "epoch": 2.799922600619195, "grad_norm": 0.030550707131624222, "learning_rate": 8.393221458430822e-05, "loss": 0.0063, "step": 14466 }, { "epoch": 2.8001160990712073, "grad_norm": 0.10622383654117584, "learning_rate": 8.393011300257835e-05, "loss": 0.0067, "step": 14467 }, { "epoch": 2.8003095975232197, "grad_norm": 0.0805019810795784, "learning_rate": 8.392801131329347e-05, "loss": 0.0067, "step": 14468 }, { "epoch": 2.800503095975232, "grad_norm": 0.16464978456497192, "learning_rate": 8.392590951646135e-05, "loss": 0.0083, "step": 14469 }, { "epoch": 2.8006965944272446, "grad_norm": 0.04176339507102966, "learning_rate": 8.392380761208985e-05, "loss": 0.0072, "step": 14470 }, { "epoch": 2.800890092879257, "grad_norm": 0.14702211320400238, "learning_rate": 8.392170560018674e-05, "loss": 0.0083, "step": 14471 }, { "epoch": 2.8010835913312695, "grad_norm": 0.05905534699559212, "learning_rate": 8.391960348075987e-05, "loss": 0.0064, "step": 14472 }, { "epoch": 2.801277089783282, "grad_norm": 0.1280064433813095, "learning_rate": 8.391750125381703e-05, "loss": 0.0053, "step": 14473 }, { "epoch": 2.8014705882352944, "grad_norm": 0.09749758243560791, "learning_rate": 8.391539891936604e-05, "loss": 0.0057, "step": 14474 }, { "epoch": 2.8016640866873064, "grad_norm": 0.07172427326440811, "learning_rate": 8.391329647741473e-05, "loss": 0.0071, "step": 14475 }, { "epoch": 2.801857585139319, "grad_norm": 0.1498810201883316, "learning_rate": 8.391119392797091e-05, "loss": 0.0071, "step": 14476 }, { "epoch": 2.8020510835913313, "grad_norm": 0.025655293837189674, "learning_rate": 8.390909127104239e-05, "loss": 0.0069, "step": 14477 }, { "epoch": 2.8022445820433437, "grad_norm": 0.16087830066680908, "learning_rate": 8.390698850663698e-05, "loss": 0.0059, "step": 14478 }, { "epoch": 2.802438080495356, "grad_norm": 0.1084364503622055, "learning_rate": 8.390488563476254e-05, "loss": 0.0058, "step": 14479 }, { "epoch": 2.8026315789473686, "grad_norm": 0.2114388346672058, "learning_rate": 8.390278265542685e-05, "loss": 0.0071, "step": 14480 }, { "epoch": 2.8028250773993806, "grad_norm": 0.11187721788883209, "learning_rate": 8.390067956863772e-05, "loss": 0.006, "step": 14481 }, { "epoch": 2.803018575851393, "grad_norm": 0.1651713103055954, "learning_rate": 8.389857637440299e-05, "loss": 0.0068, "step": 14482 }, { "epoch": 2.8032120743034055, "grad_norm": 0.14949004352092743, "learning_rate": 8.389647307273048e-05, "loss": 0.007, "step": 14483 }, { "epoch": 2.803405572755418, "grad_norm": 0.10587380826473236, "learning_rate": 8.389436966362799e-05, "loss": 0.0073, "step": 14484 }, { "epoch": 2.8035990712074303, "grad_norm": 0.15999209880828857, "learning_rate": 8.389226614710337e-05, "loss": 0.0052, "step": 14485 }, { "epoch": 2.803792569659443, "grad_norm": 0.10789553821086884, "learning_rate": 8.389016252316443e-05, "loss": 0.0072, "step": 14486 }, { "epoch": 2.8039860681114552, "grad_norm": 0.17144910991191864, "learning_rate": 8.388805879181895e-05, "loss": 0.0072, "step": 14487 }, { "epoch": 2.8041795665634677, "grad_norm": 0.17099595069885254, "learning_rate": 8.38859549530748e-05, "loss": 0.0065, "step": 14488 }, { "epoch": 2.80437306501548, "grad_norm": 0.05477128550410271, "learning_rate": 8.388385100693981e-05, "loss": 0.0072, "step": 14489 }, { "epoch": 2.804566563467492, "grad_norm": 0.22531656920909882, "learning_rate": 8.388174695342177e-05, "loss": 0.0061, "step": 14490 }, { "epoch": 2.8047600619195046, "grad_norm": 0.08210328966379166, "learning_rate": 8.38796427925285e-05, "loss": 0.0072, "step": 14491 }, { "epoch": 2.804953560371517, "grad_norm": 0.17810450494289398, "learning_rate": 8.387753852426785e-05, "loss": 0.0066, "step": 14492 }, { "epoch": 2.8051470588235294, "grad_norm": 0.168079674243927, "learning_rate": 8.387543414864761e-05, "loss": 0.0081, "step": 14493 }, { "epoch": 2.805340557275542, "grad_norm": 0.0982033833861351, "learning_rate": 8.387332966567563e-05, "loss": 0.0071, "step": 14494 }, { "epoch": 2.8055340557275543, "grad_norm": 0.20726081728935242, "learning_rate": 8.387122507535971e-05, "loss": 0.0064, "step": 14495 }, { "epoch": 2.8057275541795663, "grad_norm": 0.02266670949757099, "learning_rate": 8.38691203777077e-05, "loss": 0.0079, "step": 14496 }, { "epoch": 2.8059210526315788, "grad_norm": 0.19245214760303497, "learning_rate": 8.386701557272741e-05, "loss": 0.0062, "step": 14497 }, { "epoch": 2.806114551083591, "grad_norm": 0.10346841812133789, "learning_rate": 8.386491066042669e-05, "loss": 0.0078, "step": 14498 }, { "epoch": 2.8063080495356036, "grad_norm": 0.14940612018108368, "learning_rate": 8.386280564081333e-05, "loss": 0.0071, "step": 14499 }, { "epoch": 2.806501547987616, "grad_norm": 0.14788825809955597, "learning_rate": 8.386070051389517e-05, "loss": 0.0077, "step": 14500 }, { "epoch": 2.8066950464396285, "grad_norm": 0.09722644835710526, "learning_rate": 8.385859527968002e-05, "loss": 0.0066, "step": 14501 }, { "epoch": 2.806888544891641, "grad_norm": 0.1606491506099701, "learning_rate": 8.385648993817574e-05, "loss": 0.0083, "step": 14502 }, { "epoch": 2.8070820433436534, "grad_norm": 0.07683345675468445, "learning_rate": 8.385438448939014e-05, "loss": 0.0068, "step": 14503 }, { "epoch": 2.807275541795666, "grad_norm": 0.12271817028522491, "learning_rate": 8.385227893333106e-05, "loss": 0.0064, "step": 14504 }, { "epoch": 2.8074690402476783, "grad_norm": 0.07877343893051147, "learning_rate": 8.38501732700063e-05, "loss": 0.0067, "step": 14505 }, { "epoch": 2.8076625386996903, "grad_norm": 0.10600458830595016, "learning_rate": 8.384806749942371e-05, "loss": 0.0061, "step": 14506 }, { "epoch": 2.8078560371517027, "grad_norm": 0.1041049212217331, "learning_rate": 8.384596162159112e-05, "loss": 0.0062, "step": 14507 }, { "epoch": 2.808049535603715, "grad_norm": 0.07090949267148972, "learning_rate": 8.384385563651634e-05, "loss": 0.0065, "step": 14508 }, { "epoch": 2.8082430340557276, "grad_norm": 0.10232974588871002, "learning_rate": 8.384174954420722e-05, "loss": 0.0063, "step": 14509 }, { "epoch": 2.80843653250774, "grad_norm": 0.06106971576809883, "learning_rate": 8.383964334467158e-05, "loss": 0.0052, "step": 14510 }, { "epoch": 2.808630030959752, "grad_norm": 0.07329820841550827, "learning_rate": 8.383753703791725e-05, "loss": 0.0082, "step": 14511 }, { "epoch": 2.8088235294117645, "grad_norm": 0.16822528839111328, "learning_rate": 8.383543062395208e-05, "loss": 0.0054, "step": 14512 }, { "epoch": 2.809017027863777, "grad_norm": 0.050279952585697174, "learning_rate": 8.383332410278386e-05, "loss": 0.0055, "step": 14513 }, { "epoch": 2.8092105263157894, "grad_norm": 0.15850681066513062, "learning_rate": 8.383121747442046e-05, "loss": 0.0063, "step": 14514 }, { "epoch": 2.809404024767802, "grad_norm": 0.06821394711732864, "learning_rate": 8.38291107388697e-05, "loss": 0.007, "step": 14515 }, { "epoch": 2.8095975232198143, "grad_norm": 0.10414258390665054, "learning_rate": 8.38270038961394e-05, "loss": 0.0069, "step": 14516 }, { "epoch": 2.8097910216718267, "grad_norm": 0.1213628351688385, "learning_rate": 8.382489694623739e-05, "loss": 0.007, "step": 14517 }, { "epoch": 2.809984520123839, "grad_norm": 0.05182814225554466, "learning_rate": 8.382278988917155e-05, "loss": 0.0054, "step": 14518 }, { "epoch": 2.8101780185758516, "grad_norm": 0.11331998556852341, "learning_rate": 8.382068272494965e-05, "loss": 0.0067, "step": 14519 }, { "epoch": 2.810371517027864, "grad_norm": 0.06846592575311661, "learning_rate": 8.381857545357957e-05, "loss": 0.0072, "step": 14520 }, { "epoch": 2.810565015479876, "grad_norm": 0.10205728560686111, "learning_rate": 8.381646807506913e-05, "loss": 0.0075, "step": 14521 }, { "epoch": 2.8107585139318885, "grad_norm": 0.09624545276165009, "learning_rate": 8.381436058942615e-05, "loss": 0.0067, "step": 14522 }, { "epoch": 2.810952012383901, "grad_norm": 0.08498183637857437, "learning_rate": 8.381225299665847e-05, "loss": 0.007, "step": 14523 }, { "epoch": 2.8111455108359134, "grad_norm": 0.066105417907238, "learning_rate": 8.381014529677395e-05, "loss": 0.0066, "step": 14524 }, { "epoch": 2.811339009287926, "grad_norm": 0.10944084078073502, "learning_rate": 8.38080374897804e-05, "loss": 0.0066, "step": 14525 }, { "epoch": 2.8115325077399382, "grad_norm": 0.054476186633110046, "learning_rate": 8.380592957568566e-05, "loss": 0.007, "step": 14526 }, { "epoch": 2.8117260061919502, "grad_norm": 0.08926353603601456, "learning_rate": 8.380382155449758e-05, "loss": 0.0072, "step": 14527 }, { "epoch": 2.8119195046439627, "grad_norm": 0.037553660571575165, "learning_rate": 8.380171342622399e-05, "loss": 0.007, "step": 14528 }, { "epoch": 2.812113003095975, "grad_norm": 0.080763079226017, "learning_rate": 8.379960519087272e-05, "loss": 0.006, "step": 14529 }, { "epoch": 2.8123065015479876, "grad_norm": 0.032793477177619934, "learning_rate": 8.379749684845162e-05, "loss": 0.0074, "step": 14530 }, { "epoch": 2.8125, "grad_norm": 0.07831648737192154, "learning_rate": 8.379538839896852e-05, "loss": 0.0078, "step": 14531 }, { "epoch": 2.8126934984520124, "grad_norm": 0.04710783436894417, "learning_rate": 8.379327984243127e-05, "loss": 0.0066, "step": 14532 }, { "epoch": 2.812886996904025, "grad_norm": 0.04911717027425766, "learning_rate": 8.379117117884768e-05, "loss": 0.0069, "step": 14533 }, { "epoch": 2.8130804953560373, "grad_norm": 0.08146193623542786, "learning_rate": 8.378906240822563e-05, "loss": 0.0072, "step": 14534 }, { "epoch": 2.8132739938080498, "grad_norm": 0.071369968354702, "learning_rate": 8.378695353057294e-05, "loss": 0.007, "step": 14535 }, { "epoch": 2.8134674922600618, "grad_norm": 0.09932202100753784, "learning_rate": 8.378484454589745e-05, "loss": 0.0068, "step": 14536 }, { "epoch": 2.813660990712074, "grad_norm": 0.06921406835317612, "learning_rate": 8.378273545420698e-05, "loss": 0.0064, "step": 14537 }, { "epoch": 2.8138544891640866, "grad_norm": 0.11507783830165863, "learning_rate": 8.37806262555094e-05, "loss": 0.0075, "step": 14538 }, { "epoch": 2.814047987616099, "grad_norm": 0.07569538801908493, "learning_rate": 8.377851694981256e-05, "loss": 0.0081, "step": 14539 }, { "epoch": 2.8142414860681115, "grad_norm": 0.08057709038257599, "learning_rate": 8.377640753712428e-05, "loss": 0.0062, "step": 14540 }, { "epoch": 2.814434984520124, "grad_norm": 0.07660982012748718, "learning_rate": 8.377429801745239e-05, "loss": 0.0063, "step": 14541 }, { "epoch": 2.814628482972136, "grad_norm": 0.050546277314424515, "learning_rate": 8.377218839080476e-05, "loss": 0.0079, "step": 14542 }, { "epoch": 2.8148219814241484, "grad_norm": 0.06433282047510147, "learning_rate": 8.377007865718923e-05, "loss": 0.0082, "step": 14543 }, { "epoch": 2.815015479876161, "grad_norm": 0.0447612926363945, "learning_rate": 8.376796881661364e-05, "loss": 0.0061, "step": 14544 }, { "epoch": 2.8152089783281733, "grad_norm": 0.06500263512134552, "learning_rate": 8.376585886908582e-05, "loss": 0.008, "step": 14545 }, { "epoch": 2.8154024767801857, "grad_norm": 0.04667801409959793, "learning_rate": 8.376374881461362e-05, "loss": 0.0072, "step": 14546 }, { "epoch": 2.815595975232198, "grad_norm": 0.046530745923519135, "learning_rate": 8.37616386532049e-05, "loss": 0.0054, "step": 14547 }, { "epoch": 2.8157894736842106, "grad_norm": 0.05573274567723274, "learning_rate": 8.375952838486748e-05, "loss": 0.0067, "step": 14548 }, { "epoch": 2.815982972136223, "grad_norm": 0.03709528595209122, "learning_rate": 8.375741800960923e-05, "loss": 0.0065, "step": 14549 }, { "epoch": 2.8161764705882355, "grad_norm": 0.05860069766640663, "learning_rate": 8.375530752743799e-05, "loss": 0.0065, "step": 14550 }, { "epoch": 2.816369969040248, "grad_norm": 0.06560295075178146, "learning_rate": 8.37531969383616e-05, "loss": 0.0067, "step": 14551 }, { "epoch": 2.81656346749226, "grad_norm": 0.05764421448111534, "learning_rate": 8.375108624238791e-05, "loss": 0.0057, "step": 14552 }, { "epoch": 2.8167569659442724, "grad_norm": 0.06336647272109985, "learning_rate": 8.374897543952475e-05, "loss": 0.0069, "step": 14553 }, { "epoch": 2.816950464396285, "grad_norm": 0.06675788760185242, "learning_rate": 8.374686452978001e-05, "loss": 0.0057, "step": 14554 }, { "epoch": 2.8171439628482973, "grad_norm": 0.07321875542402267, "learning_rate": 8.374475351316148e-05, "loss": 0.0076, "step": 14555 }, { "epoch": 2.8173374613003097, "grad_norm": 0.06267897039651871, "learning_rate": 8.374264238967707e-05, "loss": 0.006, "step": 14556 }, { "epoch": 2.8175309597523217, "grad_norm": 0.0789187103509903, "learning_rate": 8.37405311593346e-05, "loss": 0.0061, "step": 14557 }, { "epoch": 2.817724458204334, "grad_norm": 0.05961932986974716, "learning_rate": 8.373841982214188e-05, "loss": 0.0082, "step": 14558 }, { "epoch": 2.8179179566563466, "grad_norm": 0.08813256770372391, "learning_rate": 8.373630837810684e-05, "loss": 0.0062, "step": 14559 }, { "epoch": 2.818111455108359, "grad_norm": 0.07019014656543732, "learning_rate": 8.373419682723726e-05, "loss": 0.0069, "step": 14560 }, { "epoch": 2.8183049535603715, "grad_norm": 0.0901578962802887, "learning_rate": 8.373208516954102e-05, "loss": 0.0059, "step": 14561 }, { "epoch": 2.818498452012384, "grad_norm": 0.09570012241601944, "learning_rate": 8.372997340502596e-05, "loss": 0.0056, "step": 14562 }, { "epoch": 2.8186919504643964, "grad_norm": 0.06813343614339828, "learning_rate": 8.372786153369995e-05, "loss": 0.006, "step": 14563 }, { "epoch": 2.818885448916409, "grad_norm": 0.07885567843914032, "learning_rate": 8.372574955557083e-05, "loss": 0.0059, "step": 14564 }, { "epoch": 2.8190789473684212, "grad_norm": 0.07744564116001129, "learning_rate": 8.372363747064644e-05, "loss": 0.0072, "step": 14565 }, { "epoch": 2.8192724458204337, "grad_norm": 0.0756942555308342, "learning_rate": 8.372152527893467e-05, "loss": 0.0062, "step": 14566 }, { "epoch": 2.8194659442724457, "grad_norm": 0.057728879153728485, "learning_rate": 8.371941298044332e-05, "loss": 0.0062, "step": 14567 }, { "epoch": 2.819659442724458, "grad_norm": 0.030642163008451462, "learning_rate": 8.371730057518026e-05, "loss": 0.007, "step": 14568 }, { "epoch": 2.8198529411764706, "grad_norm": 0.06847406178712845, "learning_rate": 8.371518806315338e-05, "loss": 0.0059, "step": 14569 }, { "epoch": 2.820046439628483, "grad_norm": 0.04259307309985161, "learning_rate": 8.37130754443705e-05, "loss": 0.0067, "step": 14570 }, { "epoch": 2.8202399380804954, "grad_norm": 0.0745851993560791, "learning_rate": 8.371096271883947e-05, "loss": 0.0068, "step": 14571 }, { "epoch": 2.820433436532508, "grad_norm": 0.04967408627271652, "learning_rate": 8.370884988656815e-05, "loss": 0.0084, "step": 14572 }, { "epoch": 2.82062693498452, "grad_norm": 0.07114062458276749, "learning_rate": 8.370673694756442e-05, "loss": 0.0066, "step": 14573 }, { "epoch": 2.8208204334365323, "grad_norm": 0.0757712870836258, "learning_rate": 8.37046239018361e-05, "loss": 0.0073, "step": 14574 }, { "epoch": 2.8210139318885448, "grad_norm": 0.07369225472211838, "learning_rate": 8.370251074939107e-05, "loss": 0.007, "step": 14575 }, { "epoch": 2.821207430340557, "grad_norm": 0.06190820410847664, "learning_rate": 8.370039749023718e-05, "loss": 0.0068, "step": 14576 }, { "epoch": 2.8214009287925697, "grad_norm": 0.05285414308309555, "learning_rate": 8.369828412438228e-05, "loss": 0.006, "step": 14577 }, { "epoch": 2.821594427244582, "grad_norm": 0.057923123240470886, "learning_rate": 8.369617065183422e-05, "loss": 0.0073, "step": 14578 }, { "epoch": 2.8217879256965945, "grad_norm": 0.05625203996896744, "learning_rate": 8.369405707260088e-05, "loss": 0.0063, "step": 14579 }, { "epoch": 2.821981424148607, "grad_norm": 0.056785810738801956, "learning_rate": 8.369194338669009e-05, "loss": 0.0075, "step": 14580 }, { "epoch": 2.8221749226006194, "grad_norm": 0.06949085742235184, "learning_rate": 8.368982959410975e-05, "loss": 0.0071, "step": 14581 }, { "epoch": 2.8223684210526314, "grad_norm": 0.04964013025164604, "learning_rate": 8.368771569486768e-05, "loss": 0.0057, "step": 14582 }, { "epoch": 2.822561919504644, "grad_norm": 0.11363309621810913, "learning_rate": 8.368560168897175e-05, "loss": 0.0064, "step": 14583 }, { "epoch": 2.8227554179566563, "grad_norm": 0.058188196271657944, "learning_rate": 8.368348757642984e-05, "loss": 0.0077, "step": 14584 }, { "epoch": 2.8229489164086687, "grad_norm": 0.09973182529211044, "learning_rate": 8.368137335724977e-05, "loss": 0.0069, "step": 14585 }, { "epoch": 2.823142414860681, "grad_norm": 0.06332483887672424, "learning_rate": 8.367925903143944e-05, "loss": 0.0064, "step": 14586 }, { "epoch": 2.8233359133126936, "grad_norm": 0.061291925609111786, "learning_rate": 8.367714459900667e-05, "loss": 0.0078, "step": 14587 }, { "epoch": 2.8235294117647056, "grad_norm": 0.09101337939500809, "learning_rate": 8.367503005995935e-05, "loss": 0.0067, "step": 14588 }, { "epoch": 2.823722910216718, "grad_norm": 0.04461805522441864, "learning_rate": 8.367291541430535e-05, "loss": 0.006, "step": 14589 }, { "epoch": 2.8239164086687305, "grad_norm": 0.0723189115524292, "learning_rate": 8.367080066205249e-05, "loss": 0.0063, "step": 14590 }, { "epoch": 2.824109907120743, "grad_norm": 0.059442583471536636, "learning_rate": 8.366868580320867e-05, "loss": 0.006, "step": 14591 }, { "epoch": 2.8243034055727554, "grad_norm": 0.06819440424442291, "learning_rate": 8.366657083778173e-05, "loss": 0.0063, "step": 14592 }, { "epoch": 2.824496904024768, "grad_norm": 0.06635668873786926, "learning_rate": 8.366445576577956e-05, "loss": 0.0077, "step": 14593 }, { "epoch": 2.8246904024767803, "grad_norm": 0.049708250910043716, "learning_rate": 8.366234058720999e-05, "loss": 0.0064, "step": 14594 }, { "epoch": 2.8248839009287927, "grad_norm": 0.08781465888023376, "learning_rate": 8.366022530208091e-05, "loss": 0.0072, "step": 14595 }, { "epoch": 2.825077399380805, "grad_norm": 0.05676686763763428, "learning_rate": 8.365810991040019e-05, "loss": 0.0055, "step": 14596 }, { "epoch": 2.8252708978328176, "grad_norm": 0.05596192181110382, "learning_rate": 8.365599441217566e-05, "loss": 0.0056, "step": 14597 }, { "epoch": 2.8254643962848296, "grad_norm": 0.05893557891249657, "learning_rate": 8.36538788074152e-05, "loss": 0.006, "step": 14598 }, { "epoch": 2.825657894736842, "grad_norm": 0.06478339433670044, "learning_rate": 8.365176309612668e-05, "loss": 0.0084, "step": 14599 }, { "epoch": 2.8258513931888545, "grad_norm": 0.08437809348106384, "learning_rate": 8.364964727831799e-05, "loss": 0.0067, "step": 14600 }, { "epoch": 2.826044891640867, "grad_norm": 0.05499451607465744, "learning_rate": 8.364753135399694e-05, "loss": 0.0059, "step": 14601 }, { "epoch": 2.8262383900928794, "grad_norm": 0.07337705045938492, "learning_rate": 8.364541532317143e-05, "loss": 0.0056, "step": 14602 }, { "epoch": 2.826431888544892, "grad_norm": 0.053354162722826004, "learning_rate": 8.364329918584934e-05, "loss": 0.0062, "step": 14603 }, { "epoch": 2.826625386996904, "grad_norm": 0.050314027816057205, "learning_rate": 8.36411829420385e-05, "loss": 0.0075, "step": 14604 }, { "epoch": 2.8268188854489162, "grad_norm": 0.07493920624256134, "learning_rate": 8.363906659174682e-05, "loss": 0.0068, "step": 14605 }, { "epoch": 2.8270123839009287, "grad_norm": 0.0297863632440567, "learning_rate": 8.363695013498215e-05, "loss": 0.0063, "step": 14606 }, { "epoch": 2.827205882352941, "grad_norm": 0.09995763003826141, "learning_rate": 8.363483357175236e-05, "loss": 0.0063, "step": 14607 }, { "epoch": 2.8273993808049536, "grad_norm": 0.05078989639878273, "learning_rate": 8.363271690206529e-05, "loss": 0.0085, "step": 14608 }, { "epoch": 2.827592879256966, "grad_norm": 0.07937997579574585, "learning_rate": 8.363060012592885e-05, "loss": 0.0077, "step": 14609 }, { "epoch": 2.8277863777089784, "grad_norm": 0.0820564478635788, "learning_rate": 8.362848324335089e-05, "loss": 0.0046, "step": 14610 }, { "epoch": 2.827979876160991, "grad_norm": 0.08619271218776703, "learning_rate": 8.362636625433928e-05, "loss": 0.0074, "step": 14611 }, { "epoch": 2.8281733746130033, "grad_norm": 0.04223858565092087, "learning_rate": 8.36242491589019e-05, "loss": 0.0072, "step": 14612 }, { "epoch": 2.8283668730650153, "grad_norm": 0.05682298541069031, "learning_rate": 8.36221319570466e-05, "loss": 0.0048, "step": 14613 }, { "epoch": 2.8285603715170278, "grad_norm": 0.06288344413042068, "learning_rate": 8.362001464878128e-05, "loss": 0.0065, "step": 14614 }, { "epoch": 2.82875386996904, "grad_norm": 0.03389255329966545, "learning_rate": 8.36178972341138e-05, "loss": 0.0068, "step": 14615 }, { "epoch": 2.8289473684210527, "grad_norm": 0.04134302958846092, "learning_rate": 8.361577971305202e-05, "loss": 0.0063, "step": 14616 }, { "epoch": 2.829140866873065, "grad_norm": 0.04924062639474869, "learning_rate": 8.361366208560383e-05, "loss": 0.0076, "step": 14617 }, { "epoch": 2.8293343653250775, "grad_norm": 0.052839495241642, "learning_rate": 8.361154435177708e-05, "loss": 0.0062, "step": 14618 }, { "epoch": 2.8295278637770895, "grad_norm": 0.05727959796786308, "learning_rate": 8.360942651157967e-05, "loss": 0.0068, "step": 14619 }, { "epoch": 2.829721362229102, "grad_norm": 0.04207232594490051, "learning_rate": 8.360730856501946e-05, "loss": 0.0071, "step": 14620 }, { "epoch": 2.8299148606811144, "grad_norm": 0.06439176946878433, "learning_rate": 8.360519051210433e-05, "loss": 0.0088, "step": 14621 }, { "epoch": 2.830108359133127, "grad_norm": 0.04513297975063324, "learning_rate": 8.360307235284215e-05, "loss": 0.006, "step": 14622 }, { "epoch": 2.8303018575851393, "grad_norm": 0.04949866607785225, "learning_rate": 8.360095408724078e-05, "loss": 0.006, "step": 14623 }, { "epoch": 2.8304953560371517, "grad_norm": 0.059879280626773834, "learning_rate": 8.359883571530812e-05, "loss": 0.0067, "step": 14624 }, { "epoch": 2.830688854489164, "grad_norm": 0.04229457676410675, "learning_rate": 8.359671723705205e-05, "loss": 0.0067, "step": 14625 }, { "epoch": 2.8308823529411766, "grad_norm": 0.059822384268045425, "learning_rate": 8.359459865248041e-05, "loss": 0.0077, "step": 14626 }, { "epoch": 2.831075851393189, "grad_norm": 0.0797354057431221, "learning_rate": 8.359247996160111e-05, "loss": 0.0055, "step": 14627 }, { "epoch": 2.8312693498452015, "grad_norm": 0.05639035999774933, "learning_rate": 8.359036116442202e-05, "loss": 0.0063, "step": 14628 }, { "epoch": 2.8314628482972135, "grad_norm": 0.05141589418053627, "learning_rate": 8.3588242260951e-05, "loss": 0.0063, "step": 14629 }, { "epoch": 2.831656346749226, "grad_norm": 0.062399592250585556, "learning_rate": 8.358612325119595e-05, "loss": 0.0067, "step": 14630 }, { "epoch": 2.8318498452012384, "grad_norm": 0.06279953569173813, "learning_rate": 8.358400413516472e-05, "loss": 0.0059, "step": 14631 }, { "epoch": 2.832043343653251, "grad_norm": 0.06948360800743103, "learning_rate": 8.358188491286523e-05, "loss": 0.0072, "step": 14632 }, { "epoch": 2.8322368421052633, "grad_norm": 0.06339646130800247, "learning_rate": 8.357976558430532e-05, "loss": 0.0077, "step": 14633 }, { "epoch": 2.8324303405572753, "grad_norm": 0.06019357591867447, "learning_rate": 8.357764614949288e-05, "loss": 0.0066, "step": 14634 }, { "epoch": 2.8326238390092877, "grad_norm": 0.08320212364196777, "learning_rate": 8.357552660843581e-05, "loss": 0.0062, "step": 14635 }, { "epoch": 2.8328173374613, "grad_norm": 0.07623408734798431, "learning_rate": 8.357340696114196e-05, "loss": 0.0068, "step": 14636 }, { "epoch": 2.8330108359133126, "grad_norm": 0.08030704408884048, "learning_rate": 8.357128720761922e-05, "loss": 0.0062, "step": 14637 }, { "epoch": 2.833204334365325, "grad_norm": 0.05604070797562599, "learning_rate": 8.356916734787549e-05, "loss": 0.0071, "step": 14638 }, { "epoch": 2.8333978328173375, "grad_norm": 0.09893852472305298, "learning_rate": 8.356704738191861e-05, "loss": 0.0064, "step": 14639 }, { "epoch": 2.83359133126935, "grad_norm": 0.02293039858341217, "learning_rate": 8.35649273097565e-05, "loss": 0.0073, "step": 14640 }, { "epoch": 2.8337848297213624, "grad_norm": 0.09806495159864426, "learning_rate": 8.356280713139705e-05, "loss": 0.0065, "step": 14641 }, { "epoch": 2.833978328173375, "grad_norm": 0.034719936549663544, "learning_rate": 8.35606868468481e-05, "loss": 0.007, "step": 14642 }, { "epoch": 2.8341718266253872, "grad_norm": 0.09112871438264847, "learning_rate": 8.355856645611756e-05, "loss": 0.0056, "step": 14643 }, { "epoch": 2.8343653250773992, "grad_norm": 0.057162221521139145, "learning_rate": 8.35564459592133e-05, "loss": 0.0066, "step": 14644 }, { "epoch": 2.8345588235294117, "grad_norm": 0.07587756961584091, "learning_rate": 8.35543253561432e-05, "loss": 0.0066, "step": 14645 }, { "epoch": 2.834752321981424, "grad_norm": 0.09551426768302917, "learning_rate": 8.355220464691518e-05, "loss": 0.0059, "step": 14646 }, { "epoch": 2.8349458204334366, "grad_norm": 0.09280483424663544, "learning_rate": 8.355008383153709e-05, "loss": 0.0069, "step": 14647 }, { "epoch": 2.835139318885449, "grad_norm": 0.07990638166666031, "learning_rate": 8.354796291001681e-05, "loss": 0.0064, "step": 14648 }, { "epoch": 2.8353328173374615, "grad_norm": 0.10804102569818497, "learning_rate": 8.354584188236226e-05, "loss": 0.0067, "step": 14649 }, { "epoch": 2.8355263157894735, "grad_norm": 0.06203618273139, "learning_rate": 8.354372074858128e-05, "loss": 0.0061, "step": 14650 }, { "epoch": 2.835719814241486, "grad_norm": 0.09912196546792984, "learning_rate": 8.354159950868178e-05, "loss": 0.0063, "step": 14651 }, { "epoch": 2.8359133126934983, "grad_norm": 0.08684594929218292, "learning_rate": 8.353947816267168e-05, "loss": 0.0067, "step": 14652 }, { "epoch": 2.8361068111455108, "grad_norm": 0.10452567040920258, "learning_rate": 8.35373567105588e-05, "loss": 0.0067, "step": 14653 }, { "epoch": 2.836300309597523, "grad_norm": 0.07834738492965698, "learning_rate": 8.353523515235106e-05, "loss": 0.0064, "step": 14654 }, { "epoch": 2.8364938080495357, "grad_norm": 0.10055194050073624, "learning_rate": 8.353311348805637e-05, "loss": 0.0084, "step": 14655 }, { "epoch": 2.836687306501548, "grad_norm": 0.03310065716505051, "learning_rate": 8.353099171768258e-05, "loss": 0.0065, "step": 14656 }, { "epoch": 2.8368808049535605, "grad_norm": 0.105103999376297, "learning_rate": 8.35288698412376e-05, "loss": 0.0077, "step": 14657 }, { "epoch": 2.837074303405573, "grad_norm": 0.05699494108557701, "learning_rate": 8.352674785872929e-05, "loss": 0.0071, "step": 14658 }, { "epoch": 2.837267801857585, "grad_norm": 0.07191666215658188, "learning_rate": 8.352462577016559e-05, "loss": 0.0066, "step": 14659 }, { "epoch": 2.8374613003095974, "grad_norm": 0.04647829011082649, "learning_rate": 8.352250357555435e-05, "loss": 0.0065, "step": 14660 }, { "epoch": 2.83765479876161, "grad_norm": 0.06572619825601578, "learning_rate": 8.352038127490345e-05, "loss": 0.0074, "step": 14661 }, { "epoch": 2.8378482972136223, "grad_norm": 0.05856125056743622, "learning_rate": 8.351825886822083e-05, "loss": 0.0065, "step": 14662 }, { "epoch": 2.8380417956656347, "grad_norm": 0.07907672226428986, "learning_rate": 8.351613635551434e-05, "loss": 0.0047, "step": 14663 }, { "epoch": 2.838235294117647, "grad_norm": 0.04939945787191391, "learning_rate": 8.351401373679187e-05, "loss": 0.0076, "step": 14664 }, { "epoch": 2.838428792569659, "grad_norm": 0.06219185143709183, "learning_rate": 8.351189101206133e-05, "loss": 0.0071, "step": 14665 }, { "epoch": 2.8386222910216716, "grad_norm": 0.04657990112900734, "learning_rate": 8.35097681813306e-05, "loss": 0.0071, "step": 14666 }, { "epoch": 2.838815789473684, "grad_norm": 0.06066637486219406, "learning_rate": 8.35076452446076e-05, "loss": 0.0065, "step": 14667 }, { "epoch": 2.8390092879256965, "grad_norm": 0.032123077660799026, "learning_rate": 8.350552220190018e-05, "loss": 0.0082, "step": 14668 }, { "epoch": 2.839202786377709, "grad_norm": 0.07997947186231613, "learning_rate": 8.350339905321626e-05, "loss": 0.0068, "step": 14669 }, { "epoch": 2.8393962848297214, "grad_norm": 0.04829573631286621, "learning_rate": 8.350127579856371e-05, "loss": 0.0076, "step": 14670 }, { "epoch": 2.839589783281734, "grad_norm": 0.055539321154356, "learning_rate": 8.349915243795045e-05, "loss": 0.0056, "step": 14671 }, { "epoch": 2.8397832817337463, "grad_norm": 0.07077446579933167, "learning_rate": 8.349702897138435e-05, "loss": 0.0072, "step": 14672 }, { "epoch": 2.8399767801857587, "grad_norm": 0.07758209109306335, "learning_rate": 8.349490539887333e-05, "loss": 0.0076, "step": 14673 }, { "epoch": 2.840170278637771, "grad_norm": 0.0752878338098526, "learning_rate": 8.34927817204253e-05, "loss": 0.0068, "step": 14674 }, { "epoch": 2.840363777089783, "grad_norm": 0.07625667005777359, "learning_rate": 8.349065793604809e-05, "loss": 0.0058, "step": 14675 }, { "epoch": 2.8405572755417956, "grad_norm": 0.07248444110155106, "learning_rate": 8.348853404574965e-05, "loss": 0.0058, "step": 14676 }, { "epoch": 2.840750773993808, "grad_norm": 0.09141536056995392, "learning_rate": 8.348641004953786e-05, "loss": 0.0062, "step": 14677 }, { "epoch": 2.8409442724458205, "grad_norm": 0.08523005247116089, "learning_rate": 8.348428594742061e-05, "loss": 0.0079, "step": 14678 }, { "epoch": 2.841137770897833, "grad_norm": 0.06314603239297867, "learning_rate": 8.348216173940582e-05, "loss": 0.0066, "step": 14679 }, { "epoch": 2.8413312693498454, "grad_norm": 0.0861717164516449, "learning_rate": 8.348003742550134e-05, "loss": 0.0078, "step": 14680 }, { "epoch": 2.8415247678018574, "grad_norm": 0.050565361976623535, "learning_rate": 8.347791300571513e-05, "loss": 0.0071, "step": 14681 }, { "epoch": 2.84171826625387, "grad_norm": 0.07353997230529785, "learning_rate": 8.347578848005505e-05, "loss": 0.0054, "step": 14682 }, { "epoch": 2.8419117647058822, "grad_norm": 0.10394464433193207, "learning_rate": 8.3473663848529e-05, "loss": 0.0071, "step": 14683 }, { "epoch": 2.8421052631578947, "grad_norm": 0.1030040755867958, "learning_rate": 8.347153911114487e-05, "loss": 0.0064, "step": 14684 }, { "epoch": 2.842298761609907, "grad_norm": 0.1068456843495369, "learning_rate": 8.346941426791058e-05, "loss": 0.0064, "step": 14685 }, { "epoch": 2.8424922600619196, "grad_norm": 0.045848384499549866, "learning_rate": 8.346728931883405e-05, "loss": 0.008, "step": 14686 }, { "epoch": 2.842685758513932, "grad_norm": 0.13193723559379578, "learning_rate": 8.346516426392312e-05, "loss": 0.0077, "step": 14687 }, { "epoch": 2.8428792569659445, "grad_norm": 0.06803955882787704, "learning_rate": 8.346303910318572e-05, "loss": 0.0072, "step": 14688 }, { "epoch": 2.843072755417957, "grad_norm": 0.11384117603302002, "learning_rate": 8.346091383662977e-05, "loss": 0.0065, "step": 14689 }, { "epoch": 2.843266253869969, "grad_norm": 0.08194322884082794, "learning_rate": 8.345878846426317e-05, "loss": 0.0078, "step": 14690 }, { "epoch": 2.8434597523219813, "grad_norm": 0.09159479290246964, "learning_rate": 8.345666298609378e-05, "loss": 0.0075, "step": 14691 }, { "epoch": 2.843653250773994, "grad_norm": 0.08573829382658005, "learning_rate": 8.345453740212954e-05, "loss": 0.006, "step": 14692 }, { "epoch": 2.843846749226006, "grad_norm": 0.07110635936260223, "learning_rate": 8.345241171237834e-05, "loss": 0.0069, "step": 14693 }, { "epoch": 2.8440402476780187, "grad_norm": 0.045492984354496, "learning_rate": 8.34502859168481e-05, "loss": 0.0083, "step": 14694 }, { "epoch": 2.844233746130031, "grad_norm": 0.0314202755689621, "learning_rate": 8.344816001554668e-05, "loss": 0.0052, "step": 14695 }, { "epoch": 2.844427244582043, "grad_norm": 0.07040882110595703, "learning_rate": 8.344603400848203e-05, "loss": 0.0071, "step": 14696 }, { "epoch": 2.8446207430340555, "grad_norm": 0.03865611553192139, "learning_rate": 8.344390789566202e-05, "loss": 0.0077, "step": 14697 }, { "epoch": 2.844814241486068, "grad_norm": 0.0559886209666729, "learning_rate": 8.344178167709458e-05, "loss": 0.0072, "step": 14698 }, { "epoch": 2.8450077399380804, "grad_norm": 0.034960098564624786, "learning_rate": 8.34396553527876e-05, "loss": 0.0052, "step": 14699 }, { "epoch": 2.845201238390093, "grad_norm": 0.05027710646390915, "learning_rate": 8.343752892274899e-05, "loss": 0.0064, "step": 14700 }, { "epoch": 2.8453947368421053, "grad_norm": 0.07566104084253311, "learning_rate": 8.343540238698665e-05, "loss": 0.0077, "step": 14701 }, { "epoch": 2.8455882352941178, "grad_norm": 0.0658288523554802, "learning_rate": 8.343327574550851e-05, "loss": 0.0075, "step": 14702 }, { "epoch": 2.84578173374613, "grad_norm": 0.1032826229929924, "learning_rate": 8.343114899832242e-05, "loss": 0.006, "step": 14703 }, { "epoch": 2.8459752321981426, "grad_norm": 0.09432093054056168, "learning_rate": 8.342902214543637e-05, "loss": 0.0067, "step": 14704 }, { "epoch": 2.8461687306501546, "grad_norm": 0.09772106260061264, "learning_rate": 8.342689518685819e-05, "loss": 0.0055, "step": 14705 }, { "epoch": 2.846362229102167, "grad_norm": 0.05777985602617264, "learning_rate": 8.342476812259582e-05, "loss": 0.0066, "step": 14706 }, { "epoch": 2.8465557275541795, "grad_norm": 0.1287509799003601, "learning_rate": 8.342264095265718e-05, "loss": 0.0062, "step": 14707 }, { "epoch": 2.846749226006192, "grad_norm": 0.06197290122509003, "learning_rate": 8.342051367705015e-05, "loss": 0.0056, "step": 14708 }, { "epoch": 2.8469427244582044, "grad_norm": 0.11640087515115738, "learning_rate": 8.341838629578265e-05, "loss": 0.0068, "step": 14709 }, { "epoch": 2.847136222910217, "grad_norm": 0.07108864188194275, "learning_rate": 8.341625880886261e-05, "loss": 0.0064, "step": 14710 }, { "epoch": 2.847329721362229, "grad_norm": 0.06131018325686455, "learning_rate": 8.34141312162979e-05, "loss": 0.0066, "step": 14711 }, { "epoch": 2.8475232198142413, "grad_norm": 0.09911826252937317, "learning_rate": 8.341200351809648e-05, "loss": 0.0064, "step": 14712 }, { "epoch": 2.8477167182662537, "grad_norm": 0.04820353537797928, "learning_rate": 8.340987571426621e-05, "loss": 0.0073, "step": 14713 }, { "epoch": 2.847910216718266, "grad_norm": 0.09079241752624512, "learning_rate": 8.340774780481503e-05, "loss": 0.0069, "step": 14714 }, { "epoch": 2.8481037151702786, "grad_norm": 0.0429370142519474, "learning_rate": 8.340561978975085e-05, "loss": 0.0078, "step": 14715 }, { "epoch": 2.848297213622291, "grad_norm": 0.11519763618707657, "learning_rate": 8.340349166908156e-05, "loss": 0.0077, "step": 14716 }, { "epoch": 2.8484907120743035, "grad_norm": 0.06425537168979645, "learning_rate": 8.340136344281508e-05, "loss": 0.0072, "step": 14717 }, { "epoch": 2.848684210526316, "grad_norm": 0.13682173192501068, "learning_rate": 8.339923511095935e-05, "loss": 0.0087, "step": 14718 }, { "epoch": 2.8488777089783284, "grad_norm": 0.06113855540752411, "learning_rate": 8.339710667352226e-05, "loss": 0.0076, "step": 14719 }, { "epoch": 2.849071207430341, "grad_norm": 0.16717587411403656, "learning_rate": 8.339497813051171e-05, "loss": 0.006, "step": 14720 }, { "epoch": 2.849264705882353, "grad_norm": 0.07646502554416656, "learning_rate": 8.339284948193564e-05, "loss": 0.0067, "step": 14721 }, { "epoch": 2.8494582043343653, "grad_norm": 0.15833505988121033, "learning_rate": 8.339072072780196e-05, "loss": 0.0083, "step": 14722 }, { "epoch": 2.8496517027863777, "grad_norm": 0.1413390040397644, "learning_rate": 8.338859186811854e-05, "loss": 0.0072, "step": 14723 }, { "epoch": 2.84984520123839, "grad_norm": 0.0745522528886795, "learning_rate": 8.338646290289338e-05, "loss": 0.0076, "step": 14724 }, { "epoch": 2.8500386996904026, "grad_norm": 0.1441880166530609, "learning_rate": 8.338433383213433e-05, "loss": 0.008, "step": 14725 }, { "epoch": 2.850232198142415, "grad_norm": 0.06058936193585396, "learning_rate": 8.33822046558493e-05, "loss": 0.0093, "step": 14726 }, { "epoch": 2.850425696594427, "grad_norm": 0.13985338807106018, "learning_rate": 8.338007537404624e-05, "loss": 0.0068, "step": 14727 }, { "epoch": 2.8506191950464395, "grad_norm": 0.0588807538151741, "learning_rate": 8.337794598673304e-05, "loss": 0.0054, "step": 14728 }, { "epoch": 2.850812693498452, "grad_norm": 0.1107437014579773, "learning_rate": 8.337581649391764e-05, "loss": 0.0064, "step": 14729 }, { "epoch": 2.8510061919504643, "grad_norm": 0.07598930597305298, "learning_rate": 8.337368689560796e-05, "loss": 0.0084, "step": 14730 }, { "epoch": 2.851199690402477, "grad_norm": 0.11110245436429977, "learning_rate": 8.337155719181188e-05, "loss": 0.0076, "step": 14731 }, { "epoch": 2.8513931888544892, "grad_norm": 0.09367088973522186, "learning_rate": 8.336942738253734e-05, "loss": 0.0073, "step": 14732 }, { "epoch": 2.8515866873065017, "grad_norm": 0.12419278174638748, "learning_rate": 8.336729746779229e-05, "loss": 0.0069, "step": 14733 }, { "epoch": 2.851780185758514, "grad_norm": 0.05034218728542328, "learning_rate": 8.336516744758459e-05, "loss": 0.0069, "step": 14734 }, { "epoch": 2.8519736842105265, "grad_norm": 0.08634413778781891, "learning_rate": 8.33630373219222e-05, "loss": 0.008, "step": 14735 }, { "epoch": 2.8521671826625385, "grad_norm": 0.07779844850301743, "learning_rate": 8.336090709081301e-05, "loss": 0.0081, "step": 14736 }, { "epoch": 2.852360681114551, "grad_norm": 0.05726451426744461, "learning_rate": 8.335877675426498e-05, "loss": 0.0065, "step": 14737 }, { "epoch": 2.8525541795665634, "grad_norm": 0.10818251222372055, "learning_rate": 8.335664631228599e-05, "loss": 0.007, "step": 14738 }, { "epoch": 2.852747678018576, "grad_norm": 0.06839705258607864, "learning_rate": 8.335451576488398e-05, "loss": 0.0067, "step": 14739 }, { "epoch": 2.8529411764705883, "grad_norm": 0.11445233225822449, "learning_rate": 8.335238511206687e-05, "loss": 0.0079, "step": 14740 }, { "epoch": 2.8531346749226008, "grad_norm": 0.08560483902692795, "learning_rate": 8.335025435384257e-05, "loss": 0.0078, "step": 14741 }, { "epoch": 2.8533281733746128, "grad_norm": 0.0857200101017952, "learning_rate": 8.334812349021902e-05, "loss": 0.0084, "step": 14742 }, { "epoch": 2.853521671826625, "grad_norm": 0.09270074963569641, "learning_rate": 8.334599252120411e-05, "loss": 0.0067, "step": 14743 }, { "epoch": 2.8537151702786376, "grad_norm": 0.06371011584997177, "learning_rate": 8.334386144680582e-05, "loss": 0.0065, "step": 14744 }, { "epoch": 2.85390866873065, "grad_norm": 0.10902590304613113, "learning_rate": 8.3341730267032e-05, "loss": 0.0073, "step": 14745 }, { "epoch": 2.8541021671826625, "grad_norm": 0.0603017695248127, "learning_rate": 8.333959898189063e-05, "loss": 0.0074, "step": 14746 }, { "epoch": 2.854295665634675, "grad_norm": 0.10646309703588486, "learning_rate": 8.333746759138962e-05, "loss": 0.0053, "step": 14747 }, { "epoch": 2.8544891640866874, "grad_norm": 0.06029069796204567, "learning_rate": 8.333533609553689e-05, "loss": 0.0066, "step": 14748 }, { "epoch": 2.8546826625387, "grad_norm": 0.06815249472856522, "learning_rate": 8.333320449434035e-05, "loss": 0.0082, "step": 14749 }, { "epoch": 2.8548761609907123, "grad_norm": 0.06244488060474396, "learning_rate": 8.333107278780795e-05, "loss": 0.0074, "step": 14750 }, { "epoch": 2.8550696594427247, "grad_norm": 0.06201355159282684, "learning_rate": 8.332894097594758e-05, "loss": 0.0079, "step": 14751 }, { "epoch": 2.8552631578947367, "grad_norm": 0.05931326001882553, "learning_rate": 8.33268090587672e-05, "loss": 0.0082, "step": 14752 }, { "epoch": 2.855456656346749, "grad_norm": 0.0768909901380539, "learning_rate": 8.332467703627473e-05, "loss": 0.0074, "step": 14753 }, { "epoch": 2.8556501547987616, "grad_norm": 0.06481859087944031, "learning_rate": 8.332254490847807e-05, "loss": 0.0068, "step": 14754 }, { "epoch": 2.855843653250774, "grad_norm": 0.08458463102579117, "learning_rate": 8.332041267538518e-05, "loss": 0.006, "step": 14755 }, { "epoch": 2.8560371517027865, "grad_norm": 0.04593098163604736, "learning_rate": 8.331828033700398e-05, "loss": 0.0068, "step": 14756 }, { "epoch": 2.8562306501547985, "grad_norm": 0.10712216794490814, "learning_rate": 8.331614789334239e-05, "loss": 0.0075, "step": 14757 }, { "epoch": 2.856424148606811, "grad_norm": 0.055685073137283325, "learning_rate": 8.331401534440833e-05, "loss": 0.0072, "step": 14758 }, { "epoch": 2.8566176470588234, "grad_norm": 0.1141585260629654, "learning_rate": 8.331188269020976e-05, "loss": 0.0071, "step": 14759 }, { "epoch": 2.856811145510836, "grad_norm": 0.068593330681324, "learning_rate": 8.330974993075456e-05, "loss": 0.0071, "step": 14760 }, { "epoch": 2.8570046439628483, "grad_norm": 0.0717538520693779, "learning_rate": 8.330761706605069e-05, "loss": 0.0059, "step": 14761 }, { "epoch": 2.8571981424148607, "grad_norm": 0.054687149822711945, "learning_rate": 8.330548409610609e-05, "loss": 0.0067, "step": 14762 }, { "epoch": 2.857391640866873, "grad_norm": 0.09084828943014145, "learning_rate": 8.330335102092867e-05, "loss": 0.0067, "step": 14763 }, { "epoch": 2.8575851393188856, "grad_norm": 0.08840703964233398, "learning_rate": 8.330121784052636e-05, "loss": 0.0066, "step": 14764 }, { "epoch": 2.857778637770898, "grad_norm": 0.07406709343194962, "learning_rate": 8.329908455490709e-05, "loss": 0.0066, "step": 14765 }, { "epoch": 2.8579721362229105, "grad_norm": 0.14467160403728485, "learning_rate": 8.329695116407881e-05, "loss": 0.0058, "step": 14766 }, { "epoch": 2.8581656346749225, "grad_norm": 0.05391642823815346, "learning_rate": 8.329481766804942e-05, "loss": 0.0062, "step": 14767 }, { "epoch": 2.858359133126935, "grad_norm": 0.14324736595153809, "learning_rate": 8.329268406682688e-05, "loss": 0.0065, "step": 14768 }, { "epoch": 2.8585526315789473, "grad_norm": 0.0945434495806694, "learning_rate": 8.329055036041913e-05, "loss": 0.0056, "step": 14769 }, { "epoch": 2.85874613003096, "grad_norm": 0.09953638166189194, "learning_rate": 8.328841654883406e-05, "loss": 0.0066, "step": 14770 }, { "epoch": 2.8589396284829722, "grad_norm": 0.11956149339675903, "learning_rate": 8.328628263207962e-05, "loss": 0.0062, "step": 14771 }, { "epoch": 2.8591331269349847, "grad_norm": 0.052403535693883896, "learning_rate": 8.328414861016378e-05, "loss": 0.007, "step": 14772 }, { "epoch": 2.8593266253869967, "grad_norm": 0.07682052999734879, "learning_rate": 8.328201448309445e-05, "loss": 0.0063, "step": 14773 }, { "epoch": 2.859520123839009, "grad_norm": 0.05308067426085472, "learning_rate": 8.327988025087951e-05, "loss": 0.0067, "step": 14774 }, { "epoch": 2.8597136222910216, "grad_norm": 0.05047038570046425, "learning_rate": 8.327774591352699e-05, "loss": 0.0067, "step": 14775 }, { "epoch": 2.859907120743034, "grad_norm": 0.04031677916646004, "learning_rate": 8.327561147104477e-05, "loss": 0.006, "step": 14776 }, { "epoch": 2.8601006191950464, "grad_norm": 0.052424270659685135, "learning_rate": 8.327347692344078e-05, "loss": 0.0061, "step": 14777 }, { "epoch": 2.860294117647059, "grad_norm": 0.055359672755002975, "learning_rate": 8.327134227072298e-05, "loss": 0.0073, "step": 14778 }, { "epoch": 2.8604876160990713, "grad_norm": 0.04968269169330597, "learning_rate": 8.326920751289928e-05, "loss": 0.0082, "step": 14779 }, { "epoch": 2.8606811145510838, "grad_norm": 0.09337067604064941, "learning_rate": 8.326707264997766e-05, "loss": 0.0078, "step": 14780 }, { "epoch": 2.860874613003096, "grad_norm": 0.05134810879826546, "learning_rate": 8.326493768196599e-05, "loss": 0.0068, "step": 14781 }, { "epoch": 2.861068111455108, "grad_norm": 0.12072189152240753, "learning_rate": 8.326280260887227e-05, "loss": 0.0069, "step": 14782 }, { "epoch": 2.8612616099071206, "grad_norm": 0.07703961431980133, "learning_rate": 8.326066743070441e-05, "loss": 0.0054, "step": 14783 }, { "epoch": 2.861455108359133, "grad_norm": 0.08451967686414719, "learning_rate": 8.325853214747037e-05, "loss": 0.0079, "step": 14784 }, { "epoch": 2.8616486068111455, "grad_norm": 0.13882355391979218, "learning_rate": 8.325639675917806e-05, "loss": 0.0082, "step": 14785 }, { "epoch": 2.861842105263158, "grad_norm": 0.06544695794582367, "learning_rate": 8.325426126583542e-05, "loss": 0.007, "step": 14786 }, { "epoch": 2.8620356037151704, "grad_norm": 0.16916464269161224, "learning_rate": 8.32521256674504e-05, "loss": 0.0064, "step": 14787 }, { "epoch": 2.8622291021671824, "grad_norm": 0.07123136520385742, "learning_rate": 8.324998996403095e-05, "loss": 0.0077, "step": 14788 }, { "epoch": 2.862422600619195, "grad_norm": 0.08955852687358856, "learning_rate": 8.324785415558498e-05, "loss": 0.0067, "step": 14789 }, { "epoch": 2.8626160990712073, "grad_norm": 0.08420555293560028, "learning_rate": 8.324571824212046e-05, "loss": 0.0075, "step": 14790 }, { "epoch": 2.8628095975232197, "grad_norm": 0.055730562657117844, "learning_rate": 8.324358222364532e-05, "loss": 0.0067, "step": 14791 }, { "epoch": 2.863003095975232, "grad_norm": 0.054677847772836685, "learning_rate": 8.324144610016749e-05, "loss": 0.0061, "step": 14792 }, { "epoch": 2.8631965944272446, "grad_norm": 0.05868416652083397, "learning_rate": 8.323930987169493e-05, "loss": 0.007, "step": 14793 }, { "epoch": 2.863390092879257, "grad_norm": 0.053042009472846985, "learning_rate": 8.323717353823558e-05, "loss": 0.0064, "step": 14794 }, { "epoch": 2.8635835913312695, "grad_norm": 0.06568806618452072, "learning_rate": 8.323503709979738e-05, "loss": 0.0072, "step": 14795 }, { "epoch": 2.863777089783282, "grad_norm": 0.05124257504940033, "learning_rate": 8.323290055638827e-05, "loss": 0.0062, "step": 14796 }, { "epoch": 2.8639705882352944, "grad_norm": 0.0592743456363678, "learning_rate": 8.323076390801618e-05, "loss": 0.006, "step": 14797 }, { "epoch": 2.8641640866873064, "grad_norm": 0.0445539765059948, "learning_rate": 8.322862715468906e-05, "loss": 0.0064, "step": 14798 }, { "epoch": 2.864357585139319, "grad_norm": 0.06198683753609657, "learning_rate": 8.322649029641487e-05, "loss": 0.0074, "step": 14799 }, { "epoch": 2.8645510835913313, "grad_norm": 0.02925046905875206, "learning_rate": 8.322435333320155e-05, "loss": 0.0063, "step": 14800 }, { "epoch": 2.8647445820433437, "grad_norm": 0.0466136634349823, "learning_rate": 8.322221626505703e-05, "loss": 0.0082, "step": 14801 }, { "epoch": 2.864938080495356, "grad_norm": 0.058659765869379044, "learning_rate": 8.322007909198927e-05, "loss": 0.0063, "step": 14802 }, { "epoch": 2.8651315789473686, "grad_norm": 0.04718741774559021, "learning_rate": 8.32179418140062e-05, "loss": 0.0064, "step": 14803 }, { "epoch": 2.8653250773993806, "grad_norm": 0.07958999276161194, "learning_rate": 8.321580443111579e-05, "loss": 0.0076, "step": 14804 }, { "epoch": 2.865518575851393, "grad_norm": 0.028255848214030266, "learning_rate": 8.321366694332597e-05, "loss": 0.007, "step": 14805 }, { "epoch": 2.8657120743034055, "grad_norm": 0.08985986560583115, "learning_rate": 8.321152935064468e-05, "loss": 0.0056, "step": 14806 }, { "epoch": 2.865905572755418, "grad_norm": 0.04344868287444115, "learning_rate": 8.320939165307988e-05, "loss": 0.0095, "step": 14807 }, { "epoch": 2.8660990712074303, "grad_norm": 0.06515046954154968, "learning_rate": 8.320725385063952e-05, "loss": 0.0061, "step": 14808 }, { "epoch": 2.866292569659443, "grad_norm": 0.034853495657444, "learning_rate": 8.320511594333151e-05, "loss": 0.0055, "step": 14809 }, { "epoch": 2.8664860681114552, "grad_norm": 0.07029096782207489, "learning_rate": 8.320297793116386e-05, "loss": 0.0062, "step": 14810 }, { "epoch": 2.8666795665634677, "grad_norm": 0.03537783771753311, "learning_rate": 8.320083981414447e-05, "loss": 0.0069, "step": 14811 }, { "epoch": 2.86687306501548, "grad_norm": 0.057965584099292755, "learning_rate": 8.319870159228131e-05, "loss": 0.0064, "step": 14812 }, { "epoch": 2.867066563467492, "grad_norm": 0.06216452643275261, "learning_rate": 8.319656326558234e-05, "loss": 0.0077, "step": 14813 }, { "epoch": 2.8672600619195046, "grad_norm": 0.07359194755554199, "learning_rate": 8.319442483405548e-05, "loss": 0.0055, "step": 14814 }, { "epoch": 2.867453560371517, "grad_norm": 0.04648340493440628, "learning_rate": 8.31922862977087e-05, "loss": 0.0051, "step": 14815 }, { "epoch": 2.8676470588235294, "grad_norm": 0.07039918005466461, "learning_rate": 8.319014765654995e-05, "loss": 0.0066, "step": 14816 }, { "epoch": 2.867840557275542, "grad_norm": 0.06074881553649902, "learning_rate": 8.318800891058718e-05, "loss": 0.0069, "step": 14817 }, { "epoch": 2.8680340557275543, "grad_norm": 0.04189847409725189, "learning_rate": 8.318587005982831e-05, "loss": 0.007, "step": 14818 }, { "epoch": 2.8682275541795663, "grad_norm": 0.07105112820863724, "learning_rate": 8.318373110428136e-05, "loss": 0.007, "step": 14819 }, { "epoch": 2.8684210526315788, "grad_norm": 0.03440031409263611, "learning_rate": 8.318159204395424e-05, "loss": 0.006, "step": 14820 }, { "epoch": 2.868614551083591, "grad_norm": 0.0467563271522522, "learning_rate": 8.317945287885488e-05, "loss": 0.0077, "step": 14821 }, { "epoch": 2.8688080495356036, "grad_norm": 0.07984539121389389, "learning_rate": 8.317731360899127e-05, "loss": 0.0067, "step": 14822 }, { "epoch": 2.869001547987616, "grad_norm": 0.03753297030925751, "learning_rate": 8.317517423437134e-05, "loss": 0.0075, "step": 14823 }, { "epoch": 2.8691950464396285, "grad_norm": 0.06656403094530106, "learning_rate": 8.317303475500307e-05, "loss": 0.0059, "step": 14824 }, { "epoch": 2.869388544891641, "grad_norm": 0.02866603061556816, "learning_rate": 8.317089517089439e-05, "loss": 0.0075, "step": 14825 }, { "epoch": 2.8695820433436534, "grad_norm": 0.07180748879909515, "learning_rate": 8.316875548205327e-05, "loss": 0.0067, "step": 14826 }, { "epoch": 2.869775541795666, "grad_norm": 0.03634496033191681, "learning_rate": 8.316661568848764e-05, "loss": 0.0067, "step": 14827 }, { "epoch": 2.8699690402476783, "grad_norm": 0.06893033534288406, "learning_rate": 8.31644757902055e-05, "loss": 0.0069, "step": 14828 }, { "epoch": 2.8701625386996903, "grad_norm": 0.0454891137778759, "learning_rate": 8.316233578721478e-05, "loss": 0.0078, "step": 14829 }, { "epoch": 2.8703560371517027, "grad_norm": 0.06364613771438599, "learning_rate": 8.316019567952342e-05, "loss": 0.0061, "step": 14830 }, { "epoch": 2.870549535603715, "grad_norm": 0.0467609278857708, "learning_rate": 8.315805546713938e-05, "loss": 0.0067, "step": 14831 }, { "epoch": 2.8707430340557276, "grad_norm": 0.07071808725595474, "learning_rate": 8.315591515007065e-05, "loss": 0.0064, "step": 14832 }, { "epoch": 2.87093653250774, "grad_norm": 0.04332935810089111, "learning_rate": 8.315377472832513e-05, "loss": 0.006, "step": 14833 }, { "epoch": 2.871130030959752, "grad_norm": 0.07046432793140411, "learning_rate": 8.315163420191085e-05, "loss": 0.0071, "step": 14834 }, { "epoch": 2.8713235294117645, "grad_norm": 0.057357002049684525, "learning_rate": 8.314949357083572e-05, "loss": 0.007, "step": 14835 }, { "epoch": 2.871517027863777, "grad_norm": 0.043440695852041245, "learning_rate": 8.314735283510771e-05, "loss": 0.0068, "step": 14836 }, { "epoch": 2.8717105263157894, "grad_norm": 0.07919364422559738, "learning_rate": 8.314521199473479e-05, "loss": 0.0064, "step": 14837 }, { "epoch": 2.871904024767802, "grad_norm": 0.03572724387049675, "learning_rate": 8.314307104972489e-05, "loss": 0.0071, "step": 14838 }, { "epoch": 2.8720975232198143, "grad_norm": 0.04948665574193001, "learning_rate": 8.314093000008598e-05, "loss": 0.0077, "step": 14839 }, { "epoch": 2.8722910216718267, "grad_norm": 0.048286572098731995, "learning_rate": 8.313878884582604e-05, "loss": 0.0064, "step": 14840 }, { "epoch": 2.872484520123839, "grad_norm": 0.053999241441488266, "learning_rate": 8.313664758695301e-05, "loss": 0.0063, "step": 14841 }, { "epoch": 2.8726780185758516, "grad_norm": 0.05110275372862816, "learning_rate": 8.313450622347486e-05, "loss": 0.0066, "step": 14842 }, { "epoch": 2.872871517027864, "grad_norm": 0.0398113913834095, "learning_rate": 8.313236475539955e-05, "loss": 0.006, "step": 14843 }, { "epoch": 2.873065015479876, "grad_norm": 0.06262794137001038, "learning_rate": 8.313022318273504e-05, "loss": 0.0064, "step": 14844 }, { "epoch": 2.8732585139318885, "grad_norm": 0.0511116161942482, "learning_rate": 8.312808150548928e-05, "loss": 0.0064, "step": 14845 }, { "epoch": 2.873452012383901, "grad_norm": 0.04110081121325493, "learning_rate": 8.312593972367026e-05, "loss": 0.0067, "step": 14846 }, { "epoch": 2.8736455108359134, "grad_norm": 0.03629155829548836, "learning_rate": 8.312379783728593e-05, "loss": 0.0065, "step": 14847 }, { "epoch": 2.873839009287926, "grad_norm": 0.026407256722450256, "learning_rate": 8.312165584634424e-05, "loss": 0.0087, "step": 14848 }, { "epoch": 2.8740325077399382, "grad_norm": 0.041160888969898224, "learning_rate": 8.311951375085317e-05, "loss": 0.0066, "step": 14849 }, { "epoch": 2.8742260061919502, "grad_norm": 0.042367931455373764, "learning_rate": 8.311737155082067e-05, "loss": 0.0061, "step": 14850 }, { "epoch": 2.8744195046439627, "grad_norm": 0.06444296985864639, "learning_rate": 8.31152292462547e-05, "loss": 0.007, "step": 14851 }, { "epoch": 2.874613003095975, "grad_norm": 0.04325226694345474, "learning_rate": 8.311308683716325e-05, "loss": 0.0064, "step": 14852 }, { "epoch": 2.8748065015479876, "grad_norm": 0.06475456058979034, "learning_rate": 8.311094432355426e-05, "loss": 0.0057, "step": 14853 }, { "epoch": 2.875, "grad_norm": 0.05604936555027962, "learning_rate": 8.310880170543571e-05, "loss": 0.0059, "step": 14854 }, { "epoch": 2.8751934984520124, "grad_norm": 0.09215633571147919, "learning_rate": 8.310665898281557e-05, "loss": 0.0077, "step": 14855 }, { "epoch": 2.875386996904025, "grad_norm": 0.08567152917385101, "learning_rate": 8.310451615570178e-05, "loss": 0.0069, "step": 14856 }, { "epoch": 2.8755804953560373, "grad_norm": 0.0534760020673275, "learning_rate": 8.310237322410233e-05, "loss": 0.0071, "step": 14857 }, { "epoch": 2.8757739938080498, "grad_norm": 0.05215388163924217, "learning_rate": 8.310023018802519e-05, "loss": 0.0067, "step": 14858 }, { "epoch": 2.8759674922600618, "grad_norm": 0.04444291070103645, "learning_rate": 8.309808704747831e-05, "loss": 0.0071, "step": 14859 }, { "epoch": 2.876160990712074, "grad_norm": 0.05840284749865532, "learning_rate": 8.309594380246969e-05, "loss": 0.007, "step": 14860 }, { "epoch": 2.8763544891640866, "grad_norm": 0.05409097671508789, "learning_rate": 8.309380045300725e-05, "loss": 0.0058, "step": 14861 }, { "epoch": 2.876547987616099, "grad_norm": 0.07913121581077576, "learning_rate": 8.309165699909899e-05, "loss": 0.0067, "step": 14862 }, { "epoch": 2.8767414860681115, "grad_norm": 0.07508764415979385, "learning_rate": 8.308951344075286e-05, "loss": 0.0084, "step": 14863 }, { "epoch": 2.876934984520124, "grad_norm": 0.08883979171514511, "learning_rate": 8.308736977797685e-05, "loss": 0.0063, "step": 14864 }, { "epoch": 2.877128482972136, "grad_norm": 0.07969454675912857, "learning_rate": 8.308522601077892e-05, "loss": 0.0064, "step": 14865 }, { "epoch": 2.8773219814241484, "grad_norm": 0.03434372320771217, "learning_rate": 8.308308213916704e-05, "loss": 0.0053, "step": 14866 }, { "epoch": 2.877515479876161, "grad_norm": 0.07183944433927536, "learning_rate": 8.308093816314917e-05, "loss": 0.0063, "step": 14867 }, { "epoch": 2.8777089783281733, "grad_norm": 0.04561365395784378, "learning_rate": 8.30787940827333e-05, "loss": 0.0063, "step": 14868 }, { "epoch": 2.8779024767801857, "grad_norm": 0.045475274324417114, "learning_rate": 8.30766498979274e-05, "loss": 0.0069, "step": 14869 }, { "epoch": 2.878095975232198, "grad_norm": 0.06759922951459885, "learning_rate": 8.307450560873943e-05, "loss": 0.0063, "step": 14870 }, { "epoch": 2.8782894736842106, "grad_norm": 0.03682788461446762, "learning_rate": 8.307236121517735e-05, "loss": 0.0066, "step": 14871 }, { "epoch": 2.878482972136223, "grad_norm": 0.03440878540277481, "learning_rate": 8.307021671724917e-05, "loss": 0.0065, "step": 14872 }, { "epoch": 2.8786764705882355, "grad_norm": 0.04913286864757538, "learning_rate": 8.306807211496283e-05, "loss": 0.0062, "step": 14873 }, { "epoch": 2.878869969040248, "grad_norm": 0.05715061351656914, "learning_rate": 8.30659274083263e-05, "loss": 0.0061, "step": 14874 }, { "epoch": 2.87906346749226, "grad_norm": 0.0455268919467926, "learning_rate": 8.306378259734759e-05, "loss": 0.0066, "step": 14875 }, { "epoch": 2.8792569659442724, "grad_norm": 0.06201731786131859, "learning_rate": 8.306163768203466e-05, "loss": 0.0058, "step": 14876 }, { "epoch": 2.879450464396285, "grad_norm": 0.06658420711755753, "learning_rate": 8.305949266239545e-05, "loss": 0.0063, "step": 14877 }, { "epoch": 2.8796439628482973, "grad_norm": 0.05154453217983246, "learning_rate": 8.305734753843797e-05, "loss": 0.0061, "step": 14878 }, { "epoch": 2.8798374613003097, "grad_norm": 0.08768563717603683, "learning_rate": 8.305520231017017e-05, "loss": 0.0078, "step": 14879 }, { "epoch": 2.8800309597523217, "grad_norm": 0.0916585624217987, "learning_rate": 8.305305697760008e-05, "loss": 0.0049, "step": 14880 }, { "epoch": 2.880224458204334, "grad_norm": 0.07910355925559998, "learning_rate": 8.30509115407356e-05, "loss": 0.0076, "step": 14881 }, { "epoch": 2.8804179566563466, "grad_norm": 0.09940546005964279, "learning_rate": 8.304876599958476e-05, "loss": 0.0063, "step": 14882 }, { "epoch": 2.880611455108359, "grad_norm": 0.03896790370345116, "learning_rate": 8.304662035415551e-05, "loss": 0.0064, "step": 14883 }, { "epoch": 2.8808049535603715, "grad_norm": 0.061790358275175095, "learning_rate": 8.304447460445583e-05, "loss": 0.0076, "step": 14884 }, { "epoch": 2.880998452012384, "grad_norm": 0.06910683214664459, "learning_rate": 8.304232875049373e-05, "loss": 0.0064, "step": 14885 }, { "epoch": 2.8811919504643964, "grad_norm": 0.04868201166391373, "learning_rate": 8.304018279227714e-05, "loss": 0.0055, "step": 14886 }, { "epoch": 2.881385448916409, "grad_norm": 0.06859797984361649, "learning_rate": 8.303803672981406e-05, "loss": 0.007, "step": 14887 }, { "epoch": 2.8815789473684212, "grad_norm": 0.033738359808921814, "learning_rate": 8.303589056311248e-05, "loss": 0.006, "step": 14888 }, { "epoch": 2.8817724458204337, "grad_norm": 0.051367729902267456, "learning_rate": 8.303374429218037e-05, "loss": 0.0057, "step": 14889 }, { "epoch": 2.8819659442724457, "grad_norm": 0.061423540115356445, "learning_rate": 8.303159791702569e-05, "loss": 0.0062, "step": 14890 }, { "epoch": 2.882159442724458, "grad_norm": 0.0583190955221653, "learning_rate": 8.302945143765644e-05, "loss": 0.0066, "step": 14891 }, { "epoch": 2.8823529411764706, "grad_norm": 0.06217014417052269, "learning_rate": 8.302730485408062e-05, "loss": 0.0063, "step": 14892 }, { "epoch": 2.882546439628483, "grad_norm": 0.05578777566552162, "learning_rate": 8.302515816630616e-05, "loss": 0.0072, "step": 14893 }, { "epoch": 2.8827399380804954, "grad_norm": 0.05138193443417549, "learning_rate": 8.302301137434108e-05, "loss": 0.0062, "step": 14894 }, { "epoch": 2.882933436532508, "grad_norm": 0.05178389325737953, "learning_rate": 8.302086447819334e-05, "loss": 0.006, "step": 14895 }, { "epoch": 2.88312693498452, "grad_norm": 0.05389006435871124, "learning_rate": 8.301871747787092e-05, "loss": 0.0072, "step": 14896 }, { "epoch": 2.8833204334365323, "grad_norm": 0.05197520926594734, "learning_rate": 8.301657037338183e-05, "loss": 0.0061, "step": 14897 }, { "epoch": 2.8835139318885448, "grad_norm": 0.07512707263231277, "learning_rate": 8.301442316473404e-05, "loss": 0.0077, "step": 14898 }, { "epoch": 2.883707430340557, "grad_norm": 0.05583097040653229, "learning_rate": 8.301227585193552e-05, "loss": 0.0074, "step": 14899 }, { "epoch": 2.8839009287925697, "grad_norm": 0.048769060522317886, "learning_rate": 8.301012843499425e-05, "loss": 0.0063, "step": 14900 }, { "epoch": 2.884094427244582, "grad_norm": 0.04839593172073364, "learning_rate": 8.300798091391823e-05, "loss": 0.006, "step": 14901 }, { "epoch": 2.8842879256965945, "grad_norm": 0.030750740319490433, "learning_rate": 8.300583328871545e-05, "loss": 0.0063, "step": 14902 }, { "epoch": 2.884481424148607, "grad_norm": 0.035927820950746536, "learning_rate": 8.300368555939386e-05, "loss": 0.0052, "step": 14903 }, { "epoch": 2.8846749226006194, "grad_norm": 0.03530086949467659, "learning_rate": 8.300153772596148e-05, "loss": 0.0071, "step": 14904 }, { "epoch": 2.8848684210526314, "grad_norm": 0.028491757810115814, "learning_rate": 8.299938978842629e-05, "loss": 0.0075, "step": 14905 }, { "epoch": 2.885061919504644, "grad_norm": 0.057783834636211395, "learning_rate": 8.299724174679627e-05, "loss": 0.0066, "step": 14906 }, { "epoch": 2.8852554179566563, "grad_norm": 0.019590402022004128, "learning_rate": 8.29950936010794e-05, "loss": 0.0061, "step": 14907 }, { "epoch": 2.8854489164086687, "grad_norm": 0.06045100465416908, "learning_rate": 8.299294535128367e-05, "loss": 0.0064, "step": 14908 }, { "epoch": 2.885642414860681, "grad_norm": 0.025290941819548607, "learning_rate": 8.299079699741707e-05, "loss": 0.0067, "step": 14909 }, { "epoch": 2.8858359133126936, "grad_norm": 0.037810031324625015, "learning_rate": 8.298864853948756e-05, "loss": 0.006, "step": 14910 }, { "epoch": 2.8860294117647056, "grad_norm": 0.03927876800298691, "learning_rate": 8.298649997750317e-05, "loss": 0.0074, "step": 14911 }, { "epoch": 2.886222910216718, "grad_norm": 0.020693538710474968, "learning_rate": 8.298435131147187e-05, "loss": 0.0059, "step": 14912 }, { "epoch": 2.8864164086687305, "grad_norm": 0.0466732420027256, "learning_rate": 8.298220254140163e-05, "loss": 0.0068, "step": 14913 }, { "epoch": 2.886609907120743, "grad_norm": 0.026102935895323753, "learning_rate": 8.298005366730048e-05, "loss": 0.0059, "step": 14914 }, { "epoch": 2.8868034055727554, "grad_norm": 0.04042886197566986, "learning_rate": 8.297790468917639e-05, "loss": 0.0052, "step": 14915 }, { "epoch": 2.886996904024768, "grad_norm": 0.07405424118041992, "learning_rate": 8.297575560703732e-05, "loss": 0.0074, "step": 14916 }, { "epoch": 2.8871904024767803, "grad_norm": 0.029630206525325775, "learning_rate": 8.297360642089129e-05, "loss": 0.0074, "step": 14917 }, { "epoch": 2.8873839009287927, "grad_norm": 0.06808974593877792, "learning_rate": 8.297145713074629e-05, "loss": 0.0067, "step": 14918 }, { "epoch": 2.887577399380805, "grad_norm": 0.039563946425914764, "learning_rate": 8.296930773661031e-05, "loss": 0.008, "step": 14919 }, { "epoch": 2.8877708978328176, "grad_norm": 0.04673514887690544, "learning_rate": 8.296715823849131e-05, "loss": 0.0071, "step": 14920 }, { "epoch": 2.8879643962848296, "grad_norm": 0.054106008261442184, "learning_rate": 8.296500863639733e-05, "loss": 0.0071, "step": 14921 }, { "epoch": 2.888157894736842, "grad_norm": 0.030632399022579193, "learning_rate": 8.296285893033634e-05, "loss": 0.0071, "step": 14922 }, { "epoch": 2.8883513931888545, "grad_norm": 0.04193047806620598, "learning_rate": 8.296070912031633e-05, "loss": 0.006, "step": 14923 }, { "epoch": 2.888544891640867, "grad_norm": 0.030934549868106842, "learning_rate": 8.295855920634528e-05, "loss": 0.0069, "step": 14924 }, { "epoch": 2.8887383900928794, "grad_norm": 0.053567998111248016, "learning_rate": 8.295640918843121e-05, "loss": 0.0085, "step": 14925 }, { "epoch": 2.888931888544892, "grad_norm": 0.04522173851728439, "learning_rate": 8.295425906658209e-05, "loss": 0.0051, "step": 14926 }, { "epoch": 2.889125386996904, "grad_norm": 0.05434845760464668, "learning_rate": 8.295210884080593e-05, "loss": 0.0076, "step": 14927 }, { "epoch": 2.8893188854489162, "grad_norm": 0.09476827085018158, "learning_rate": 8.294995851111071e-05, "loss": 0.007, "step": 14928 }, { "epoch": 2.8895123839009287, "grad_norm": 0.04809904471039772, "learning_rate": 8.294780807750442e-05, "loss": 0.0067, "step": 14929 }, { "epoch": 2.889705882352941, "grad_norm": 0.08429233729839325, "learning_rate": 8.29456575399951e-05, "loss": 0.008, "step": 14930 }, { "epoch": 2.8898993808049536, "grad_norm": 0.04313211515545845, "learning_rate": 8.29435068985907e-05, "loss": 0.0068, "step": 14931 }, { "epoch": 2.890092879256966, "grad_norm": 0.08195655047893524, "learning_rate": 8.29413561532992e-05, "loss": 0.008, "step": 14932 }, { "epoch": 2.8902863777089784, "grad_norm": 0.06765986979007721, "learning_rate": 8.293920530412864e-05, "loss": 0.0057, "step": 14933 }, { "epoch": 2.890479876160991, "grad_norm": 0.059527359902858734, "learning_rate": 8.293705435108701e-05, "loss": 0.006, "step": 14934 }, { "epoch": 2.8906733746130033, "grad_norm": 0.06671534478664398, "learning_rate": 8.293490329418228e-05, "loss": 0.0063, "step": 14935 }, { "epoch": 2.8908668730650153, "grad_norm": 0.04795948043465614, "learning_rate": 8.293275213342246e-05, "loss": 0.0066, "step": 14936 }, { "epoch": 2.8910603715170278, "grad_norm": 0.05196724832057953, "learning_rate": 8.293060086881557e-05, "loss": 0.0064, "step": 14937 }, { "epoch": 2.89125386996904, "grad_norm": 0.04604698717594147, "learning_rate": 8.292844950036957e-05, "loss": 0.0067, "step": 14938 }, { "epoch": 2.8914473684210527, "grad_norm": 0.06888945400714874, "learning_rate": 8.292629802809249e-05, "loss": 0.007, "step": 14939 }, { "epoch": 2.891640866873065, "grad_norm": 0.05009855329990387, "learning_rate": 8.292414645199232e-05, "loss": 0.0063, "step": 14940 }, { "epoch": 2.8918343653250775, "grad_norm": 0.058877404779195786, "learning_rate": 8.292199477207704e-05, "loss": 0.0083, "step": 14941 }, { "epoch": 2.8920278637770895, "grad_norm": 0.0808069109916687, "learning_rate": 8.291984298835466e-05, "loss": 0.0079, "step": 14942 }, { "epoch": 2.892221362229102, "grad_norm": 0.055804114788770676, "learning_rate": 8.29176911008332e-05, "loss": 0.007, "step": 14943 }, { "epoch": 2.8924148606811144, "grad_norm": 0.09397167712450027, "learning_rate": 8.291553910952062e-05, "loss": 0.0054, "step": 14944 }, { "epoch": 2.892608359133127, "grad_norm": 0.055636536329984665, "learning_rate": 8.291338701442495e-05, "loss": 0.0068, "step": 14945 }, { "epoch": 2.8928018575851393, "grad_norm": 0.08682293444871902, "learning_rate": 8.29112348155542e-05, "loss": 0.0064, "step": 14946 }, { "epoch": 2.8929953560371517, "grad_norm": 0.05080777779221535, "learning_rate": 8.290908251291634e-05, "loss": 0.0054, "step": 14947 }, { "epoch": 2.893188854489164, "grad_norm": 0.0719379410147667, "learning_rate": 8.290693010651941e-05, "loss": 0.0061, "step": 14948 }, { "epoch": 2.8933823529411766, "grad_norm": 0.05784370377659798, "learning_rate": 8.290477759637137e-05, "loss": 0.0063, "step": 14949 }, { "epoch": 2.893575851393189, "grad_norm": 0.05160728469491005, "learning_rate": 8.290262498248025e-05, "loss": 0.0059, "step": 14950 }, { "epoch": 2.8937693498452015, "grad_norm": 0.058985501527786255, "learning_rate": 8.290047226485405e-05, "loss": 0.0071, "step": 14951 }, { "epoch": 2.8939628482972135, "grad_norm": 0.0332237184047699, "learning_rate": 8.289831944350078e-05, "loss": 0.0067, "step": 14952 }, { "epoch": 2.894156346749226, "grad_norm": 0.07020070403814316, "learning_rate": 8.289616651842841e-05, "loss": 0.0051, "step": 14953 }, { "epoch": 2.8943498452012384, "grad_norm": 0.04021095857024193, "learning_rate": 8.289401348964497e-05, "loss": 0.0071, "step": 14954 }, { "epoch": 2.894543343653251, "grad_norm": 0.0734991654753685, "learning_rate": 8.289186035715847e-05, "loss": 0.0069, "step": 14955 }, { "epoch": 2.8947368421052633, "grad_norm": 0.07332593202590942, "learning_rate": 8.28897071209769e-05, "loss": 0.0067, "step": 14956 }, { "epoch": 2.8949303405572753, "grad_norm": 0.06921502202749252, "learning_rate": 8.288755378110827e-05, "loss": 0.0055, "step": 14957 }, { "epoch": 2.8951238390092877, "grad_norm": 0.06794002652168274, "learning_rate": 8.288540033756059e-05, "loss": 0.0059, "step": 14958 }, { "epoch": 2.8953173374613, "grad_norm": 0.05144727602601051, "learning_rate": 8.288324679034186e-05, "loss": 0.0073, "step": 14959 }, { "epoch": 2.8955108359133126, "grad_norm": 0.08811908960342407, "learning_rate": 8.28810931394601e-05, "loss": 0.008, "step": 14960 }, { "epoch": 2.895704334365325, "grad_norm": 0.06934258341789246, "learning_rate": 8.28789393849233e-05, "loss": 0.0059, "step": 14961 }, { "epoch": 2.8958978328173375, "grad_norm": 0.08358258754014969, "learning_rate": 8.287678552673946e-05, "loss": 0.0077, "step": 14962 }, { "epoch": 2.89609133126935, "grad_norm": 0.07070216536521912, "learning_rate": 8.287463156491662e-05, "loss": 0.0064, "step": 14963 }, { "epoch": 2.8962848297213624, "grad_norm": 0.08187314867973328, "learning_rate": 8.287247749946276e-05, "loss": 0.0066, "step": 14964 }, { "epoch": 2.896478328173375, "grad_norm": 0.07349472492933273, "learning_rate": 8.28703233303859e-05, "loss": 0.0081, "step": 14965 }, { "epoch": 2.8966718266253872, "grad_norm": 0.08288735151290894, "learning_rate": 8.286816905769403e-05, "loss": 0.0064, "step": 14966 }, { "epoch": 2.8968653250773992, "grad_norm": 0.07822359353303909, "learning_rate": 8.28660146813952e-05, "loss": 0.0058, "step": 14967 }, { "epoch": 2.8970588235294117, "grad_norm": 0.0761532261967659, "learning_rate": 8.286386020149736e-05, "loss": 0.0073, "step": 14968 }, { "epoch": 2.897252321981424, "grad_norm": 0.07748842239379883, "learning_rate": 8.286170561800858e-05, "loss": 0.0057, "step": 14969 }, { "epoch": 2.8974458204334366, "grad_norm": 0.07096032053232193, "learning_rate": 8.285955093093683e-05, "loss": 0.0061, "step": 14970 }, { "epoch": 2.897639318885449, "grad_norm": 0.07175815105438232, "learning_rate": 8.285739614029014e-05, "loss": 0.0062, "step": 14971 }, { "epoch": 2.8978328173374615, "grad_norm": 0.05372445285320282, "learning_rate": 8.285524124607652e-05, "loss": 0.0074, "step": 14972 }, { "epoch": 2.8980263157894735, "grad_norm": 0.10664494335651398, "learning_rate": 8.285308624830396e-05, "loss": 0.0058, "step": 14973 }, { "epoch": 2.898219814241486, "grad_norm": 0.05632268637418747, "learning_rate": 8.285093114698052e-05, "loss": 0.0075, "step": 14974 }, { "epoch": 2.8984133126934983, "grad_norm": 0.1220550611615181, "learning_rate": 8.284877594211415e-05, "loss": 0.0063, "step": 14975 }, { "epoch": 2.8986068111455108, "grad_norm": 0.05713849142193794, "learning_rate": 8.28466206337129e-05, "loss": 0.0057, "step": 14976 }, { "epoch": 2.898800309597523, "grad_norm": 0.11627510190010071, "learning_rate": 8.284446522178478e-05, "loss": 0.0079, "step": 14977 }, { "epoch": 2.8989938080495357, "grad_norm": 0.060976505279541016, "learning_rate": 8.284230970633781e-05, "loss": 0.0063, "step": 14978 }, { "epoch": 2.899187306501548, "grad_norm": 0.048967987298965454, "learning_rate": 8.284015408737997e-05, "loss": 0.0067, "step": 14979 }, { "epoch": 2.8993808049535605, "grad_norm": 0.08431150764226913, "learning_rate": 8.283799836491932e-05, "loss": 0.0072, "step": 14980 }, { "epoch": 2.899574303405573, "grad_norm": 0.030379164963960648, "learning_rate": 8.283584253896384e-05, "loss": 0.0058, "step": 14981 }, { "epoch": 2.899767801857585, "grad_norm": 0.08870147913694382, "learning_rate": 8.283368660952157e-05, "loss": 0.0083, "step": 14982 }, { "epoch": 2.8999613003095974, "grad_norm": 0.040771979838609695, "learning_rate": 8.28315305766005e-05, "loss": 0.0068, "step": 14983 }, { "epoch": 2.90015479876161, "grad_norm": 0.07004397362470627, "learning_rate": 8.282937444020865e-05, "loss": 0.0065, "step": 14984 }, { "epoch": 2.9003482972136223, "grad_norm": 0.06966598331928253, "learning_rate": 8.282721820035405e-05, "loss": 0.0084, "step": 14985 }, { "epoch": 2.9005417956656347, "grad_norm": 0.07108893245458603, "learning_rate": 8.282506185704472e-05, "loss": 0.0079, "step": 14986 }, { "epoch": 2.900735294117647, "grad_norm": 0.08443406224250793, "learning_rate": 8.282290541028865e-05, "loss": 0.0062, "step": 14987 }, { "epoch": 2.900928792569659, "grad_norm": 0.04865262284874916, "learning_rate": 8.282074886009387e-05, "loss": 0.0063, "step": 14988 }, { "epoch": 2.9011222910216716, "grad_norm": 0.08941791951656342, "learning_rate": 8.281859220646841e-05, "loss": 0.0056, "step": 14989 }, { "epoch": 2.901315789473684, "grad_norm": 0.028576895594596863, "learning_rate": 8.281643544942028e-05, "loss": 0.0063, "step": 14990 }, { "epoch": 2.9015092879256965, "grad_norm": 0.0739930272102356, "learning_rate": 8.28142785889575e-05, "loss": 0.0067, "step": 14991 }, { "epoch": 2.901702786377709, "grad_norm": 0.0515425018966198, "learning_rate": 8.281212162508809e-05, "loss": 0.0069, "step": 14992 }, { "epoch": 2.9018962848297214, "grad_norm": 0.03833479806780815, "learning_rate": 8.280996455782005e-05, "loss": 0.0065, "step": 14993 }, { "epoch": 2.902089783281734, "grad_norm": 0.057206153869628906, "learning_rate": 8.280780738716142e-05, "loss": 0.0071, "step": 14994 }, { "epoch": 2.9022832817337463, "grad_norm": 0.0476960651576519, "learning_rate": 8.280565011312022e-05, "loss": 0.0073, "step": 14995 }, { "epoch": 2.9024767801857587, "grad_norm": 0.0352315828204155, "learning_rate": 8.280349273570445e-05, "loss": 0.0055, "step": 14996 }, { "epoch": 2.902670278637771, "grad_norm": 0.0329107791185379, "learning_rate": 8.280133525492217e-05, "loss": 0.0064, "step": 14997 }, { "epoch": 2.902863777089783, "grad_norm": 0.038548070937395096, "learning_rate": 8.279917767078136e-05, "loss": 0.0074, "step": 14998 }, { "epoch": 2.9030572755417956, "grad_norm": 0.029887063428759575, "learning_rate": 8.279701998329005e-05, "loss": 0.0077, "step": 14999 }, { "epoch": 2.903250773993808, "grad_norm": 0.07076343148946762, "learning_rate": 8.279486219245627e-05, "loss": 0.0064, "step": 15000 }, { "epoch": 2.9034442724458205, "grad_norm": 0.04051118716597557, "learning_rate": 8.279270429828806e-05, "loss": 0.0065, "step": 15001 }, { "epoch": 2.903637770897833, "grad_norm": 0.07240545749664307, "learning_rate": 8.279054630079342e-05, "loss": 0.0079, "step": 15002 }, { "epoch": 2.9038312693498454, "grad_norm": 0.053808361291885376, "learning_rate": 8.278838819998036e-05, "loss": 0.0062, "step": 15003 }, { "epoch": 2.9040247678018574, "grad_norm": 0.0381811261177063, "learning_rate": 8.278622999585692e-05, "loss": 0.007, "step": 15004 }, { "epoch": 2.90421826625387, "grad_norm": 0.07700664550065994, "learning_rate": 8.278407168843114e-05, "loss": 0.0065, "step": 15005 }, { "epoch": 2.9044117647058822, "grad_norm": 0.03936970233917236, "learning_rate": 8.2781913277711e-05, "loss": 0.006, "step": 15006 }, { "epoch": 2.9046052631578947, "grad_norm": 0.09157522022724152, "learning_rate": 8.277975476370458e-05, "loss": 0.0073, "step": 15007 }, { "epoch": 2.904798761609907, "grad_norm": 0.06841222941875458, "learning_rate": 8.277759614641986e-05, "loss": 0.0067, "step": 15008 }, { "epoch": 2.9049922600619196, "grad_norm": 0.062106288969516754, "learning_rate": 8.27754374258649e-05, "loss": 0.0067, "step": 15009 }, { "epoch": 2.905185758513932, "grad_norm": 0.091316819190979, "learning_rate": 8.277327860204771e-05, "loss": 0.0065, "step": 15010 }, { "epoch": 2.9053792569659445, "grad_norm": 0.025775697082281113, "learning_rate": 8.277111967497629e-05, "loss": 0.0059, "step": 15011 }, { "epoch": 2.905572755417957, "grad_norm": 0.10475312918424606, "learning_rate": 8.27689606446587e-05, "loss": 0.0065, "step": 15012 }, { "epoch": 2.905766253869969, "grad_norm": 0.026397502049803734, "learning_rate": 8.276680151110297e-05, "loss": 0.0052, "step": 15013 }, { "epoch": 2.9059597523219813, "grad_norm": 0.08292735368013382, "learning_rate": 8.27646422743171e-05, "loss": 0.0054, "step": 15014 }, { "epoch": 2.906153250773994, "grad_norm": 0.02877957746386528, "learning_rate": 8.276248293430912e-05, "loss": 0.0063, "step": 15015 }, { "epoch": 2.906346749226006, "grad_norm": 0.08001028001308441, "learning_rate": 8.27603234910871e-05, "loss": 0.0063, "step": 15016 }, { "epoch": 2.9065402476780187, "grad_norm": 0.040427472442388535, "learning_rate": 8.275816394465902e-05, "loss": 0.0066, "step": 15017 }, { "epoch": 2.906733746130031, "grad_norm": 0.048929668962955475, "learning_rate": 8.275600429503292e-05, "loss": 0.0068, "step": 15018 }, { "epoch": 2.906927244582043, "grad_norm": 0.037099309265613556, "learning_rate": 8.275384454221684e-05, "loss": 0.0069, "step": 15019 }, { "epoch": 2.9071207430340555, "grad_norm": 0.035745829343795776, "learning_rate": 8.275168468621881e-05, "loss": 0.0052, "step": 15020 }, { "epoch": 2.907314241486068, "grad_norm": 0.041541680693626404, "learning_rate": 8.274952472704685e-05, "loss": 0.0065, "step": 15021 }, { "epoch": 2.9075077399380804, "grad_norm": 0.03667224943637848, "learning_rate": 8.274736466470899e-05, "loss": 0.0074, "step": 15022 }, { "epoch": 2.907701238390093, "grad_norm": 0.04673359915614128, "learning_rate": 8.274520449921328e-05, "loss": 0.0066, "step": 15023 }, { "epoch": 2.9078947368421053, "grad_norm": 0.03870641440153122, "learning_rate": 8.274304423056773e-05, "loss": 0.0066, "step": 15024 }, { "epoch": 2.9080882352941178, "grad_norm": 0.06898170709609985, "learning_rate": 8.274088385878037e-05, "loss": 0.0062, "step": 15025 }, { "epoch": 2.90828173374613, "grad_norm": 0.05269841104745865, "learning_rate": 8.273872338385925e-05, "loss": 0.0074, "step": 15026 }, { "epoch": 2.9084752321981426, "grad_norm": 0.07101938873529434, "learning_rate": 8.273656280581239e-05, "loss": 0.0065, "step": 15027 }, { "epoch": 2.9086687306501546, "grad_norm": 0.05376514419913292, "learning_rate": 8.273440212464783e-05, "loss": 0.0059, "step": 15028 }, { "epoch": 2.908862229102167, "grad_norm": 0.05647997558116913, "learning_rate": 8.273224134037357e-05, "loss": 0.0068, "step": 15029 }, { "epoch": 2.9090557275541795, "grad_norm": 0.06589242815971375, "learning_rate": 8.273008045299769e-05, "loss": 0.0054, "step": 15030 }, { "epoch": 2.909249226006192, "grad_norm": 0.04102081060409546, "learning_rate": 8.27279194625282e-05, "loss": 0.0065, "step": 15031 }, { "epoch": 2.9094427244582044, "grad_norm": 0.08759059756994247, "learning_rate": 8.272575836897313e-05, "loss": 0.0056, "step": 15032 }, { "epoch": 2.909636222910217, "grad_norm": 0.03272036835551262, "learning_rate": 8.272359717234055e-05, "loss": 0.0071, "step": 15033 }, { "epoch": 2.909829721362229, "grad_norm": 0.09608873724937439, "learning_rate": 8.272143587263845e-05, "loss": 0.0088, "step": 15034 }, { "epoch": 2.9100232198142413, "grad_norm": 0.05465612933039665, "learning_rate": 8.271927446987488e-05, "loss": 0.0064, "step": 15035 }, { "epoch": 2.9102167182662537, "grad_norm": 0.0574331060051918, "learning_rate": 8.271711296405787e-05, "loss": 0.0071, "step": 15036 }, { "epoch": 2.910410216718266, "grad_norm": 0.055843811482191086, "learning_rate": 8.271495135519546e-05, "loss": 0.0061, "step": 15037 }, { "epoch": 2.9106037151702786, "grad_norm": 0.048230547457933426, "learning_rate": 8.27127896432957e-05, "loss": 0.0061, "step": 15038 }, { "epoch": 2.910797213622291, "grad_norm": 0.03906312584877014, "learning_rate": 8.271062782836663e-05, "loss": 0.007, "step": 15039 }, { "epoch": 2.9109907120743035, "grad_norm": 0.04532040283083916, "learning_rate": 8.270846591041625e-05, "loss": 0.0062, "step": 15040 }, { "epoch": 2.911184210526316, "grad_norm": 0.049670811742544174, "learning_rate": 8.270630388945264e-05, "loss": 0.0067, "step": 15041 }, { "epoch": 2.9113777089783284, "grad_norm": 0.04750099778175354, "learning_rate": 8.27041417654838e-05, "loss": 0.0071, "step": 15042 }, { "epoch": 2.911571207430341, "grad_norm": 0.0571846105158329, "learning_rate": 8.270197953851782e-05, "loss": 0.0081, "step": 15043 }, { "epoch": 2.911764705882353, "grad_norm": 0.06561941653490067, "learning_rate": 8.269981720856268e-05, "loss": 0.0067, "step": 15044 }, { "epoch": 2.9119582043343653, "grad_norm": 0.045411497354507446, "learning_rate": 8.269765477562646e-05, "loss": 0.0063, "step": 15045 }, { "epoch": 2.9121517027863777, "grad_norm": 0.0753064975142479, "learning_rate": 8.269549223971717e-05, "loss": 0.0071, "step": 15046 }, { "epoch": 2.91234520123839, "grad_norm": 0.04406223073601723, "learning_rate": 8.269332960084288e-05, "loss": 0.0055, "step": 15047 }, { "epoch": 2.9125386996904026, "grad_norm": 0.06461746990680695, "learning_rate": 8.269116685901158e-05, "loss": 0.0065, "step": 15048 }, { "epoch": 2.912732198142415, "grad_norm": 0.06624750047922134, "learning_rate": 8.268900401423138e-05, "loss": 0.0076, "step": 15049 }, { "epoch": 2.912925696594427, "grad_norm": 0.055934835225343704, "learning_rate": 8.268684106651026e-05, "loss": 0.007, "step": 15050 }, { "epoch": 2.9131191950464395, "grad_norm": 0.0655256062746048, "learning_rate": 8.26846780158563e-05, "loss": 0.0076, "step": 15051 }, { "epoch": 2.913312693498452, "grad_norm": 0.10439025610685349, "learning_rate": 8.268251486227753e-05, "loss": 0.0063, "step": 15052 }, { "epoch": 2.9135061919504643, "grad_norm": 0.04714400693774223, "learning_rate": 8.2680351605782e-05, "loss": 0.0063, "step": 15053 }, { "epoch": 2.913699690402477, "grad_norm": 0.10589759051799774, "learning_rate": 8.267818824637774e-05, "loss": 0.0057, "step": 15054 }, { "epoch": 2.9138931888544892, "grad_norm": 0.06821981817483902, "learning_rate": 8.267602478407279e-05, "loss": 0.0065, "step": 15055 }, { "epoch": 2.9140866873065017, "grad_norm": 0.090072400867939, "learning_rate": 8.26738612188752e-05, "loss": 0.0055, "step": 15056 }, { "epoch": 2.914280185758514, "grad_norm": 0.07433033734560013, "learning_rate": 8.267169755079301e-05, "loss": 0.0067, "step": 15057 }, { "epoch": 2.9144736842105265, "grad_norm": 0.08283645659685135, "learning_rate": 8.266953377983426e-05, "loss": 0.0062, "step": 15058 }, { "epoch": 2.9146671826625385, "grad_norm": 0.07172881811857224, "learning_rate": 8.266736990600703e-05, "loss": 0.0068, "step": 15059 }, { "epoch": 2.914860681114551, "grad_norm": 0.07515189796686172, "learning_rate": 8.266520592931931e-05, "loss": 0.0078, "step": 15060 }, { "epoch": 2.9150541795665634, "grad_norm": 0.08095557242631912, "learning_rate": 8.266304184977918e-05, "loss": 0.007, "step": 15061 }, { "epoch": 2.915247678018576, "grad_norm": 0.05849799886345863, "learning_rate": 8.266087766739468e-05, "loss": 0.0064, "step": 15062 }, { "epoch": 2.9154411764705883, "grad_norm": 0.06627203524112701, "learning_rate": 8.265871338217384e-05, "loss": 0.0057, "step": 15063 }, { "epoch": 2.9156346749226008, "grad_norm": 0.052823763340711594, "learning_rate": 8.265654899412473e-05, "loss": 0.0063, "step": 15064 }, { "epoch": 2.9158281733746128, "grad_norm": 0.06063155457377434, "learning_rate": 8.265438450325538e-05, "loss": 0.0066, "step": 15065 }, { "epoch": 2.916021671826625, "grad_norm": 0.07319660484790802, "learning_rate": 8.265221990957386e-05, "loss": 0.0058, "step": 15066 }, { "epoch": 2.9162151702786376, "grad_norm": 0.04472925513982773, "learning_rate": 8.265005521308817e-05, "loss": 0.0065, "step": 15067 }, { "epoch": 2.91640866873065, "grad_norm": 0.11574438214302063, "learning_rate": 8.26478904138064e-05, "loss": 0.0067, "step": 15068 }, { "epoch": 2.9166021671826625, "grad_norm": 0.05556468665599823, "learning_rate": 8.264572551173659e-05, "loss": 0.0064, "step": 15069 }, { "epoch": 2.916795665634675, "grad_norm": 0.11072740703821182, "learning_rate": 8.264356050688678e-05, "loss": 0.007, "step": 15070 }, { "epoch": 2.9169891640866874, "grad_norm": 0.09756755083799362, "learning_rate": 8.264139539926504e-05, "loss": 0.0076, "step": 15071 }, { "epoch": 2.9171826625387, "grad_norm": 0.08209788799285889, "learning_rate": 8.263923018887938e-05, "loss": 0.0078, "step": 15072 }, { "epoch": 2.9173761609907123, "grad_norm": 0.13336151838302612, "learning_rate": 8.26370648757379e-05, "loss": 0.0077, "step": 15073 }, { "epoch": 2.9175696594427247, "grad_norm": 0.0658889189362526, "learning_rate": 8.26348994598486e-05, "loss": 0.0066, "step": 15074 }, { "epoch": 2.9177631578947367, "grad_norm": 0.1201232448220253, "learning_rate": 8.263273394121956e-05, "loss": 0.0079, "step": 15075 }, { "epoch": 2.917956656346749, "grad_norm": 0.07945406436920166, "learning_rate": 8.263056831985881e-05, "loss": 0.0073, "step": 15076 }, { "epoch": 2.9181501547987616, "grad_norm": 0.10196497291326523, "learning_rate": 8.262840259577444e-05, "loss": 0.0052, "step": 15077 }, { "epoch": 2.918343653250774, "grad_norm": 0.06857866793870926, "learning_rate": 8.262623676897447e-05, "loss": 0.0063, "step": 15078 }, { "epoch": 2.9185371517027865, "grad_norm": 0.08085349202156067, "learning_rate": 8.262407083946696e-05, "loss": 0.0062, "step": 15079 }, { "epoch": 2.9187306501547985, "grad_norm": 0.057668279856443405, "learning_rate": 8.262190480725993e-05, "loss": 0.0064, "step": 15080 }, { "epoch": 2.918924148606811, "grad_norm": 0.0660226047039032, "learning_rate": 8.261973867236151e-05, "loss": 0.008, "step": 15081 }, { "epoch": 2.9191176470588234, "grad_norm": 0.08153603225946426, "learning_rate": 8.261757243477968e-05, "loss": 0.0058, "step": 15082 }, { "epoch": 2.919311145510836, "grad_norm": 0.05747081711888313, "learning_rate": 8.261540609452253e-05, "loss": 0.0058, "step": 15083 }, { "epoch": 2.9195046439628483, "grad_norm": 0.06261894106864929, "learning_rate": 8.261323965159809e-05, "loss": 0.0086, "step": 15084 }, { "epoch": 2.9196981424148607, "grad_norm": 0.0511922724545002, "learning_rate": 8.261107310601446e-05, "loss": 0.0071, "step": 15085 }, { "epoch": 2.919891640866873, "grad_norm": 0.1052166149020195, "learning_rate": 8.260890645777964e-05, "loss": 0.007, "step": 15086 }, { "epoch": 2.9200851393188856, "grad_norm": 0.11101843416690826, "learning_rate": 8.260673970690172e-05, "loss": 0.0067, "step": 15087 }, { "epoch": 2.920278637770898, "grad_norm": 0.07062829285860062, "learning_rate": 8.260457285338873e-05, "loss": 0.0061, "step": 15088 }, { "epoch": 2.9204721362229105, "grad_norm": 0.09296010434627533, "learning_rate": 8.260240589724875e-05, "loss": 0.0068, "step": 15089 }, { "epoch": 2.9206656346749225, "grad_norm": 0.11019456386566162, "learning_rate": 8.260023883848981e-05, "loss": 0.0068, "step": 15090 }, { "epoch": 2.920859133126935, "grad_norm": 0.05445221811532974, "learning_rate": 8.259807167712001e-05, "loss": 0.0063, "step": 15091 }, { "epoch": 2.9210526315789473, "grad_norm": 0.1302148699760437, "learning_rate": 8.259590441314736e-05, "loss": 0.006, "step": 15092 }, { "epoch": 2.92124613003096, "grad_norm": 0.05995253473520279, "learning_rate": 8.259373704657995e-05, "loss": 0.0059, "step": 15093 }, { "epoch": 2.9214396284829722, "grad_norm": 0.10515661537647247, "learning_rate": 8.259156957742582e-05, "loss": 0.0054, "step": 15094 }, { "epoch": 2.9216331269349847, "grad_norm": 0.08606795966625214, "learning_rate": 8.258940200569304e-05, "loss": 0.0072, "step": 15095 }, { "epoch": 2.9218266253869967, "grad_norm": 0.08607925474643707, "learning_rate": 8.258723433138963e-05, "loss": 0.007, "step": 15096 }, { "epoch": 2.922020123839009, "grad_norm": 0.10080035775899887, "learning_rate": 8.258506655452372e-05, "loss": 0.0067, "step": 15097 }, { "epoch": 2.9222136222910216, "grad_norm": 0.054518233984708786, "learning_rate": 8.25828986751033e-05, "loss": 0.0087, "step": 15098 }, { "epoch": 2.922407120743034, "grad_norm": 0.09420152008533478, "learning_rate": 8.258073069313649e-05, "loss": 0.007, "step": 15099 }, { "epoch": 2.9226006191950464, "grad_norm": 0.06079515814781189, "learning_rate": 8.257856260863131e-05, "loss": 0.0072, "step": 15100 }, { "epoch": 2.922794117647059, "grad_norm": 0.06506462395191193, "learning_rate": 8.257639442159581e-05, "loss": 0.0071, "step": 15101 }, { "epoch": 2.9229876160990713, "grad_norm": 0.06082313507795334, "learning_rate": 8.257422613203809e-05, "loss": 0.0065, "step": 15102 }, { "epoch": 2.9231811145510838, "grad_norm": 0.041000861674547195, "learning_rate": 8.257205773996619e-05, "loss": 0.0063, "step": 15103 }, { "epoch": 2.923374613003096, "grad_norm": 0.05991233512759209, "learning_rate": 8.256988924538817e-05, "loss": 0.0074, "step": 15104 }, { "epoch": 2.923568111455108, "grad_norm": 0.056221213191747665, "learning_rate": 8.256772064831207e-05, "loss": 0.007, "step": 15105 }, { "epoch": 2.9237616099071206, "grad_norm": 0.05581451207399368, "learning_rate": 8.2565551948746e-05, "loss": 0.007, "step": 15106 }, { "epoch": 2.923955108359133, "grad_norm": 0.046307723969221115, "learning_rate": 8.2563383146698e-05, "loss": 0.007, "step": 15107 }, { "epoch": 2.9241486068111455, "grad_norm": 0.06517359614372253, "learning_rate": 8.256121424217615e-05, "loss": 0.0072, "step": 15108 }, { "epoch": 2.924342105263158, "grad_norm": 0.0946149155497551, "learning_rate": 8.255904523518847e-05, "loss": 0.007, "step": 15109 }, { "epoch": 2.9245356037151704, "grad_norm": 0.05393528193235397, "learning_rate": 8.255687612574307e-05, "loss": 0.007, "step": 15110 }, { "epoch": 2.9247291021671824, "grad_norm": 0.09366369992494583, "learning_rate": 8.255470691384798e-05, "loss": 0.0061, "step": 15111 }, { "epoch": 2.924922600619195, "grad_norm": 0.044482920318841934, "learning_rate": 8.255253759951129e-05, "loss": 0.005, "step": 15112 }, { "epoch": 2.9251160990712073, "grad_norm": 0.09070595353841782, "learning_rate": 8.255036818274105e-05, "loss": 0.0071, "step": 15113 }, { "epoch": 2.9253095975232197, "grad_norm": 0.038892634212970734, "learning_rate": 8.254819866354533e-05, "loss": 0.007, "step": 15114 }, { "epoch": 2.925503095975232, "grad_norm": 0.046359241008758545, "learning_rate": 8.254602904193218e-05, "loss": 0.0062, "step": 15115 }, { "epoch": 2.9256965944272446, "grad_norm": 0.04355519637465477, "learning_rate": 8.25438593179097e-05, "loss": 0.0082, "step": 15116 }, { "epoch": 2.925890092879257, "grad_norm": 0.03195415437221527, "learning_rate": 8.254168949148594e-05, "loss": 0.0061, "step": 15117 }, { "epoch": 2.9260835913312695, "grad_norm": 0.026821821928024292, "learning_rate": 8.253951956266896e-05, "loss": 0.0063, "step": 15118 }, { "epoch": 2.926277089783282, "grad_norm": 0.044643644243478775, "learning_rate": 8.253734953146683e-05, "loss": 0.0084, "step": 15119 }, { "epoch": 2.9264705882352944, "grad_norm": 0.026702823117375374, "learning_rate": 8.253517939788761e-05, "loss": 0.0061, "step": 15120 }, { "epoch": 2.9266640866873064, "grad_norm": 0.04379396140575409, "learning_rate": 8.25330091619394e-05, "loss": 0.0068, "step": 15121 }, { "epoch": 2.926857585139319, "grad_norm": 0.06369974464178085, "learning_rate": 8.253083882363024e-05, "loss": 0.0063, "step": 15122 }, { "epoch": 2.9270510835913313, "grad_norm": 0.039767637848854065, "learning_rate": 8.252866838296819e-05, "loss": 0.0067, "step": 15123 }, { "epoch": 2.9272445820433437, "grad_norm": 0.06730061769485474, "learning_rate": 8.252649783996134e-05, "loss": 0.0058, "step": 15124 }, { "epoch": 2.927438080495356, "grad_norm": 0.059298157691955566, "learning_rate": 8.252432719461777e-05, "loss": 0.0064, "step": 15125 }, { "epoch": 2.9276315789473686, "grad_norm": 0.033728208392858505, "learning_rate": 8.252215644694553e-05, "loss": 0.0067, "step": 15126 }, { "epoch": 2.9278250773993806, "grad_norm": 0.08755821734666824, "learning_rate": 8.251998559695269e-05, "loss": 0.0064, "step": 15127 }, { "epoch": 2.928018575851393, "grad_norm": 0.04690588638186455, "learning_rate": 8.251781464464731e-05, "loss": 0.005, "step": 15128 }, { "epoch": 2.9282120743034055, "grad_norm": 0.09216810017824173, "learning_rate": 8.251564359003748e-05, "loss": 0.005, "step": 15129 }, { "epoch": 2.928405572755418, "grad_norm": 0.034835848957300186, "learning_rate": 8.251347243313127e-05, "loss": 0.0083, "step": 15130 }, { "epoch": 2.9285990712074303, "grad_norm": 0.10634936392307281, "learning_rate": 8.251130117393677e-05, "loss": 0.0071, "step": 15131 }, { "epoch": 2.928792569659443, "grad_norm": 0.03389584273099899, "learning_rate": 8.2509129812462e-05, "loss": 0.0078, "step": 15132 }, { "epoch": 2.9289860681114552, "grad_norm": 0.0840369164943695, "learning_rate": 8.250695834871508e-05, "loss": 0.0065, "step": 15133 }, { "epoch": 2.9291795665634677, "grad_norm": 0.06410456448793411, "learning_rate": 8.250478678270406e-05, "loss": 0.0077, "step": 15134 }, { "epoch": 2.92937306501548, "grad_norm": 0.09650279581546783, "learning_rate": 8.250261511443702e-05, "loss": 0.0075, "step": 15135 }, { "epoch": 2.929566563467492, "grad_norm": 0.06384016573429108, "learning_rate": 8.250044334392204e-05, "loss": 0.007, "step": 15136 }, { "epoch": 2.9297600619195046, "grad_norm": 0.08795001357793808, "learning_rate": 8.249827147116718e-05, "loss": 0.0062, "step": 15137 }, { "epoch": 2.929953560371517, "grad_norm": 0.07079941779375076, "learning_rate": 8.249609949618053e-05, "loss": 0.0063, "step": 15138 }, { "epoch": 2.9301470588235294, "grad_norm": 0.057210907340049744, "learning_rate": 8.249392741897016e-05, "loss": 0.0063, "step": 15139 }, { "epoch": 2.930340557275542, "grad_norm": 0.08602426946163177, "learning_rate": 8.249175523954414e-05, "loss": 0.0062, "step": 15140 }, { "epoch": 2.9305340557275543, "grad_norm": 0.03170114383101463, "learning_rate": 8.248958295791054e-05, "loss": 0.0069, "step": 15141 }, { "epoch": 2.9307275541795663, "grad_norm": 0.0729905441403389, "learning_rate": 8.248741057407746e-05, "loss": 0.0057, "step": 15142 }, { "epoch": 2.9309210526315788, "grad_norm": 0.0361483171582222, "learning_rate": 8.248523808805294e-05, "loss": 0.0061, "step": 15143 }, { "epoch": 2.931114551083591, "grad_norm": 0.04371962323784828, "learning_rate": 8.24830654998451e-05, "loss": 0.006, "step": 15144 }, { "epoch": 2.9313080495356036, "grad_norm": 0.017357690259814262, "learning_rate": 8.248089280946198e-05, "loss": 0.006, "step": 15145 }, { "epoch": 2.931501547987616, "grad_norm": 0.06531047075986862, "learning_rate": 8.247872001691167e-05, "loss": 0.0058, "step": 15146 }, { "epoch": 2.9316950464396285, "grad_norm": 0.07908393442630768, "learning_rate": 8.247654712220225e-05, "loss": 0.0068, "step": 15147 }, { "epoch": 2.931888544891641, "grad_norm": 0.0902978852391243, "learning_rate": 8.24743741253418e-05, "loss": 0.0052, "step": 15148 }, { "epoch": 2.9320820433436534, "grad_norm": 0.07604116201400757, "learning_rate": 8.24722010263384e-05, "loss": 0.0064, "step": 15149 }, { "epoch": 2.932275541795666, "grad_norm": 0.0580773688852787, "learning_rate": 8.247002782520014e-05, "loss": 0.0074, "step": 15150 }, { "epoch": 2.9324690402476783, "grad_norm": 0.08906033635139465, "learning_rate": 8.246785452193506e-05, "loss": 0.0078, "step": 15151 }, { "epoch": 2.9326625386996903, "grad_norm": 0.028290018439292908, "learning_rate": 8.246568111655128e-05, "loss": 0.0064, "step": 15152 }, { "epoch": 2.9328560371517027, "grad_norm": 0.0926906019449234, "learning_rate": 8.246350760905685e-05, "loss": 0.0054, "step": 15153 }, { "epoch": 2.933049535603715, "grad_norm": 0.04387539252638817, "learning_rate": 8.24613339994599e-05, "loss": 0.0061, "step": 15154 }, { "epoch": 2.9332430340557276, "grad_norm": 0.08023316413164139, "learning_rate": 8.245916028776844e-05, "loss": 0.0065, "step": 15155 }, { "epoch": 2.93343653250774, "grad_norm": 0.0837385356426239, "learning_rate": 8.245698647399061e-05, "loss": 0.0048, "step": 15156 }, { "epoch": 2.933630030959752, "grad_norm": 0.06260949373245239, "learning_rate": 8.245481255813446e-05, "loss": 0.0078, "step": 15157 }, { "epoch": 2.9338235294117645, "grad_norm": 0.10693725943565369, "learning_rate": 8.245263854020808e-05, "loss": 0.0081, "step": 15158 }, { "epoch": 2.934017027863777, "grad_norm": 0.08416043967008591, "learning_rate": 8.245046442021956e-05, "loss": 0.0052, "step": 15159 }, { "epoch": 2.9342105263157894, "grad_norm": 0.11721652746200562, "learning_rate": 8.244829019817697e-05, "loss": 0.009, "step": 15160 }, { "epoch": 2.934404024767802, "grad_norm": 0.1616121381521225, "learning_rate": 8.244611587408842e-05, "loss": 0.0076, "step": 15161 }, { "epoch": 2.9345975232198143, "grad_norm": 0.07655666768550873, "learning_rate": 8.244394144796194e-05, "loss": 0.0066, "step": 15162 }, { "epoch": 2.9347910216718267, "grad_norm": 0.13956482708454132, "learning_rate": 8.244176691980569e-05, "loss": 0.0058, "step": 15163 }, { "epoch": 2.934984520123839, "grad_norm": 0.11078332364559174, "learning_rate": 8.243959228962768e-05, "loss": 0.0071, "step": 15164 }, { "epoch": 2.9351780185758516, "grad_norm": 0.09489104896783829, "learning_rate": 8.243741755743604e-05, "loss": 0.0077, "step": 15165 }, { "epoch": 2.935371517027864, "grad_norm": 0.13062013685703278, "learning_rate": 8.243524272323883e-05, "loss": 0.0064, "step": 15166 }, { "epoch": 2.935565015479876, "grad_norm": 0.06900672614574432, "learning_rate": 8.243306778704415e-05, "loss": 0.0071, "step": 15167 }, { "epoch": 2.9357585139318885, "grad_norm": 0.10916517674922943, "learning_rate": 8.24308927488601e-05, "loss": 0.0064, "step": 15168 }, { "epoch": 2.935952012383901, "grad_norm": 0.06674432754516602, "learning_rate": 8.242871760869473e-05, "loss": 0.0063, "step": 15169 }, { "epoch": 2.9361455108359134, "grad_norm": 0.061288561671972275, "learning_rate": 8.242654236655617e-05, "loss": 0.0068, "step": 15170 }, { "epoch": 2.936339009287926, "grad_norm": 0.07949437201023102, "learning_rate": 8.242436702245248e-05, "loss": 0.0075, "step": 15171 }, { "epoch": 2.9365325077399382, "grad_norm": 0.04831305518746376, "learning_rate": 8.242219157639175e-05, "loss": 0.0081, "step": 15172 }, { "epoch": 2.9367260061919502, "grad_norm": 0.04988796263933182, "learning_rate": 8.242001602838205e-05, "loss": 0.0063, "step": 15173 }, { "epoch": 2.9369195046439627, "grad_norm": 0.034149132668972015, "learning_rate": 8.241784037843151e-05, "loss": 0.0053, "step": 15174 }, { "epoch": 2.937113003095975, "grad_norm": 0.06064126268029213, "learning_rate": 8.241566462654816e-05, "loss": 0.0072, "step": 15175 }, { "epoch": 2.9373065015479876, "grad_norm": 0.059457845985889435, "learning_rate": 8.241348877274015e-05, "loss": 0.0063, "step": 15176 }, { "epoch": 2.9375, "grad_norm": 0.030636770650744438, "learning_rate": 8.241131281701556e-05, "loss": 0.007, "step": 15177 }, { "epoch": 2.9376934984520124, "grad_norm": 0.05819789320230484, "learning_rate": 8.240913675938243e-05, "loss": 0.0071, "step": 15178 }, { "epoch": 2.937886996904025, "grad_norm": 0.05220366641879082, "learning_rate": 8.240696059984889e-05, "loss": 0.0068, "step": 15179 }, { "epoch": 2.9380804953560373, "grad_norm": 0.04095339775085449, "learning_rate": 8.240478433842305e-05, "loss": 0.0054, "step": 15180 }, { "epoch": 2.9382739938080498, "grad_norm": 0.06828716397285461, "learning_rate": 8.240260797511295e-05, "loss": 0.0061, "step": 15181 }, { "epoch": 2.9384674922600618, "grad_norm": 0.03261466696858406, "learning_rate": 8.24004315099267e-05, "loss": 0.0063, "step": 15182 }, { "epoch": 2.938660990712074, "grad_norm": 0.07390492409467697, "learning_rate": 8.239825494287242e-05, "loss": 0.0063, "step": 15183 }, { "epoch": 2.9388544891640866, "grad_norm": 0.052630335092544556, "learning_rate": 8.239607827395816e-05, "loss": 0.0065, "step": 15184 }, { "epoch": 2.939047987616099, "grad_norm": 0.04230126738548279, "learning_rate": 8.239390150319204e-05, "loss": 0.0066, "step": 15185 }, { "epoch": 2.9392414860681115, "grad_norm": 0.053354695439338684, "learning_rate": 8.239172463058215e-05, "loss": 0.0071, "step": 15186 }, { "epoch": 2.939434984520124, "grad_norm": 0.04817672818899155, "learning_rate": 8.238954765613657e-05, "loss": 0.0062, "step": 15187 }, { "epoch": 2.939628482972136, "grad_norm": 0.056639377027750015, "learning_rate": 8.23873705798634e-05, "loss": 0.0066, "step": 15188 }, { "epoch": 2.9398219814241484, "grad_norm": 0.07337728142738342, "learning_rate": 8.238519340177073e-05, "loss": 0.008, "step": 15189 }, { "epoch": 2.940015479876161, "grad_norm": 0.06074371561408043, "learning_rate": 8.238301612186667e-05, "loss": 0.0063, "step": 15190 }, { "epoch": 2.9402089783281733, "grad_norm": 0.0667869970202446, "learning_rate": 8.238083874015931e-05, "loss": 0.0073, "step": 15191 }, { "epoch": 2.9404024767801857, "grad_norm": 0.07771776616573334, "learning_rate": 8.237866125665672e-05, "loss": 0.0065, "step": 15192 }, { "epoch": 2.940595975232198, "grad_norm": 0.05899680778384209, "learning_rate": 8.237648367136701e-05, "loss": 0.0079, "step": 15193 }, { "epoch": 2.9407894736842106, "grad_norm": 0.08185942471027374, "learning_rate": 8.237430598429829e-05, "loss": 0.0068, "step": 15194 }, { "epoch": 2.940982972136223, "grad_norm": 0.0579049177467823, "learning_rate": 8.237212819545865e-05, "loss": 0.0067, "step": 15195 }, { "epoch": 2.9411764705882355, "grad_norm": 0.06833459436893463, "learning_rate": 8.236995030485617e-05, "loss": 0.0073, "step": 15196 }, { "epoch": 2.941369969040248, "grad_norm": 0.0373382493853569, "learning_rate": 8.2367772312499e-05, "loss": 0.0069, "step": 15197 }, { "epoch": 2.94156346749226, "grad_norm": 0.0591873936355114, "learning_rate": 8.236559421839515e-05, "loss": 0.0086, "step": 15198 }, { "epoch": 2.9417569659442724, "grad_norm": 0.046403225511312485, "learning_rate": 8.236341602255278e-05, "loss": 0.0072, "step": 15199 }, { "epoch": 2.941950464396285, "grad_norm": 0.03927428275346756, "learning_rate": 8.236123772497995e-05, "loss": 0.008, "step": 15200 }, { "epoch": 2.9421439628482973, "grad_norm": 0.05973624438047409, "learning_rate": 8.23590593256848e-05, "loss": 0.0081, "step": 15201 }, { "epoch": 2.9423374613003097, "grad_norm": 0.04466531425714493, "learning_rate": 8.235688082467541e-05, "loss": 0.0065, "step": 15202 }, { "epoch": 2.9425309597523217, "grad_norm": 0.07193194329738617, "learning_rate": 8.235470222195988e-05, "loss": 0.0077, "step": 15203 }, { "epoch": 2.942724458204334, "grad_norm": 0.06272964179515839, "learning_rate": 8.23525235175463e-05, "loss": 0.0076, "step": 15204 }, { "epoch": 2.9429179566563466, "grad_norm": 0.08281108736991882, "learning_rate": 8.235034471144279e-05, "loss": 0.007, "step": 15205 }, { "epoch": 2.943111455108359, "grad_norm": 0.10147243738174438, "learning_rate": 8.234816580365742e-05, "loss": 0.0067, "step": 15206 }, { "epoch": 2.9433049535603715, "grad_norm": 0.08289336413145065, "learning_rate": 8.234598679419831e-05, "loss": 0.007, "step": 15207 }, { "epoch": 2.943498452012384, "grad_norm": 0.1321662962436676, "learning_rate": 8.234380768307357e-05, "loss": 0.0068, "step": 15208 }, { "epoch": 2.9436919504643964, "grad_norm": 0.06685299426317215, "learning_rate": 8.234162847029129e-05, "loss": 0.0072, "step": 15209 }, { "epoch": 2.943885448916409, "grad_norm": 0.15843415260314941, "learning_rate": 8.233944915585956e-05, "loss": 0.0063, "step": 15210 }, { "epoch": 2.9440789473684212, "grad_norm": 0.08141818642616272, "learning_rate": 8.23372697397865e-05, "loss": 0.0056, "step": 15211 }, { "epoch": 2.9442724458204337, "grad_norm": 0.10879582166671753, "learning_rate": 8.233509022208022e-05, "loss": 0.0075, "step": 15212 }, { "epoch": 2.9444659442724457, "grad_norm": 0.12702828645706177, "learning_rate": 8.23329106027488e-05, "loss": 0.0067, "step": 15213 }, { "epoch": 2.944659442724458, "grad_norm": 0.07087816298007965, "learning_rate": 8.233073088180036e-05, "loss": 0.0064, "step": 15214 }, { "epoch": 2.9448529411764706, "grad_norm": 0.14273399114608765, "learning_rate": 8.232855105924297e-05, "loss": 0.0073, "step": 15215 }, { "epoch": 2.945046439628483, "grad_norm": 0.055478740483522415, "learning_rate": 8.232637113508479e-05, "loss": 0.008, "step": 15216 }, { "epoch": 2.9452399380804954, "grad_norm": 0.09263662993907928, "learning_rate": 8.232419110933389e-05, "loss": 0.0076, "step": 15217 }, { "epoch": 2.945433436532508, "grad_norm": 0.08399120718240738, "learning_rate": 8.232201098199838e-05, "loss": 0.0073, "step": 15218 }, { "epoch": 2.94562693498452, "grad_norm": 0.025193262845277786, "learning_rate": 8.231983075308635e-05, "loss": 0.0067, "step": 15219 }, { "epoch": 2.9458204334365323, "grad_norm": 0.10148913413286209, "learning_rate": 8.231765042260594e-05, "loss": 0.0058, "step": 15220 }, { "epoch": 2.9460139318885448, "grad_norm": 0.059921909123659134, "learning_rate": 8.231546999056524e-05, "loss": 0.0077, "step": 15221 }, { "epoch": 2.946207430340557, "grad_norm": 0.06162342056632042, "learning_rate": 8.231328945697236e-05, "loss": 0.0059, "step": 15222 }, { "epoch": 2.9464009287925697, "grad_norm": 0.0531008280813694, "learning_rate": 8.231110882183536e-05, "loss": 0.0076, "step": 15223 }, { "epoch": 2.946594427244582, "grad_norm": 0.09867909550666809, "learning_rate": 8.230892808516243e-05, "loss": 0.0068, "step": 15224 }, { "epoch": 2.9467879256965945, "grad_norm": 0.066287562251091, "learning_rate": 8.230674724696161e-05, "loss": 0.0068, "step": 15225 }, { "epoch": 2.946981424148607, "grad_norm": 0.09899581968784332, "learning_rate": 8.230456630724104e-05, "loss": 0.0064, "step": 15226 }, { "epoch": 2.9471749226006194, "grad_norm": 0.06320302933454514, "learning_rate": 8.23023852660088e-05, "loss": 0.006, "step": 15227 }, { "epoch": 2.9473684210526314, "grad_norm": 0.09643692523241043, "learning_rate": 8.230020412327306e-05, "loss": 0.0071, "step": 15228 }, { "epoch": 2.947561919504644, "grad_norm": 0.08222543448209763, "learning_rate": 8.229802287904186e-05, "loss": 0.007, "step": 15229 }, { "epoch": 2.9477554179566563, "grad_norm": 0.056987084448337555, "learning_rate": 8.229584153332334e-05, "loss": 0.0066, "step": 15230 }, { "epoch": 2.9479489164086687, "grad_norm": 0.1384967714548111, "learning_rate": 8.229366008612562e-05, "loss": 0.0071, "step": 15231 }, { "epoch": 2.948142414860681, "grad_norm": 0.03258943185210228, "learning_rate": 8.229147853745677e-05, "loss": 0.0054, "step": 15232 }, { "epoch": 2.9483359133126936, "grad_norm": 0.15366299450397491, "learning_rate": 8.228929688732496e-05, "loss": 0.0088, "step": 15233 }, { "epoch": 2.9485294117647056, "grad_norm": 0.03920157253742218, "learning_rate": 8.228711513573824e-05, "loss": 0.0071, "step": 15234 }, { "epoch": 2.948722910216718, "grad_norm": 0.10180827975273132, "learning_rate": 8.228493328270477e-05, "loss": 0.0072, "step": 15235 }, { "epoch": 2.9489164086687305, "grad_norm": 0.08994201570749283, "learning_rate": 8.228275132823262e-05, "loss": 0.006, "step": 15236 }, { "epoch": 2.949109907120743, "grad_norm": 0.06846748292446136, "learning_rate": 8.228056927232994e-05, "loss": 0.0064, "step": 15237 }, { "epoch": 2.9493034055727554, "grad_norm": 0.09186945855617523, "learning_rate": 8.227838711500483e-05, "loss": 0.0073, "step": 15238 }, { "epoch": 2.949496904024768, "grad_norm": 0.03489527851343155, "learning_rate": 8.22762048562654e-05, "loss": 0.007, "step": 15239 }, { "epoch": 2.9496904024767803, "grad_norm": 0.07645570486783981, "learning_rate": 8.227402249611975e-05, "loss": 0.0057, "step": 15240 }, { "epoch": 2.9498839009287927, "grad_norm": 0.05432485044002533, "learning_rate": 8.227184003457601e-05, "loss": 0.0067, "step": 15241 }, { "epoch": 2.950077399380805, "grad_norm": 0.05637192726135254, "learning_rate": 8.226965747164228e-05, "loss": 0.008, "step": 15242 }, { "epoch": 2.9502708978328176, "grad_norm": 0.08287837356328964, "learning_rate": 8.226747480732668e-05, "loss": 0.0066, "step": 15243 }, { "epoch": 2.9504643962848296, "grad_norm": 0.05156978592276573, "learning_rate": 8.226529204163735e-05, "loss": 0.0071, "step": 15244 }, { "epoch": 2.950657894736842, "grad_norm": 0.04955901578068733, "learning_rate": 8.226310917458236e-05, "loss": 0.0073, "step": 15245 }, { "epoch": 2.9508513931888545, "grad_norm": 0.05621962994337082, "learning_rate": 8.226092620616987e-05, "loss": 0.0065, "step": 15246 }, { "epoch": 2.951044891640867, "grad_norm": 0.029147997498512268, "learning_rate": 8.225874313640794e-05, "loss": 0.006, "step": 15247 }, { "epoch": 2.9512383900928794, "grad_norm": 0.06627822667360306, "learning_rate": 8.225655996530475e-05, "loss": 0.0056, "step": 15248 }, { "epoch": 2.951431888544892, "grad_norm": 0.04332950338721275, "learning_rate": 8.225437669286839e-05, "loss": 0.0069, "step": 15249 }, { "epoch": 2.951625386996904, "grad_norm": 0.06880175322294235, "learning_rate": 8.225219331910695e-05, "loss": 0.0068, "step": 15250 }, { "epoch": 2.9518188854489162, "grad_norm": 0.043121322989463806, "learning_rate": 8.225000984402858e-05, "loss": 0.0066, "step": 15251 }, { "epoch": 2.9520123839009287, "grad_norm": 0.0676194354891777, "learning_rate": 8.224782626764138e-05, "loss": 0.0076, "step": 15252 }, { "epoch": 2.952205882352941, "grad_norm": 0.07804978638887405, "learning_rate": 8.22456425899535e-05, "loss": 0.0049, "step": 15253 }, { "epoch": 2.9523993808049536, "grad_norm": 0.05478764697909355, "learning_rate": 8.224345881097301e-05, "loss": 0.0064, "step": 15254 }, { "epoch": 2.952592879256966, "grad_norm": 0.06251054257154465, "learning_rate": 8.224127493070806e-05, "loss": 0.0065, "step": 15255 }, { "epoch": 2.9527863777089784, "grad_norm": 0.04739109054207802, "learning_rate": 8.223909094916676e-05, "loss": 0.0076, "step": 15256 }, { "epoch": 2.952979876160991, "grad_norm": 0.04032222554087639, "learning_rate": 8.223690686635724e-05, "loss": 0.007, "step": 15257 }, { "epoch": 2.9531733746130033, "grad_norm": 0.04131486639380455, "learning_rate": 8.22347226822876e-05, "loss": 0.0057, "step": 15258 }, { "epoch": 2.9533668730650153, "grad_norm": 0.06720364838838577, "learning_rate": 8.223253839696597e-05, "loss": 0.0065, "step": 15259 }, { "epoch": 2.9535603715170278, "grad_norm": 0.03868032246828079, "learning_rate": 8.223035401040049e-05, "loss": 0.008, "step": 15260 }, { "epoch": 2.95375386996904, "grad_norm": 0.0806732326745987, "learning_rate": 8.222816952259926e-05, "loss": 0.0054, "step": 15261 }, { "epoch": 2.9539473684210527, "grad_norm": 0.06529932469129562, "learning_rate": 8.22259849335704e-05, "loss": 0.0051, "step": 15262 }, { "epoch": 2.954140866873065, "grad_norm": 0.05279122665524483, "learning_rate": 8.222380024332204e-05, "loss": 0.0062, "step": 15263 }, { "epoch": 2.9543343653250775, "grad_norm": 0.125966876745224, "learning_rate": 8.222161545186228e-05, "loss": 0.0064, "step": 15264 }, { "epoch": 2.9545278637770895, "grad_norm": 0.019997915253043175, "learning_rate": 8.221943055919928e-05, "loss": 0.0065, "step": 15265 }, { "epoch": 2.954721362229102, "grad_norm": 0.11286061257123947, "learning_rate": 8.221724556534114e-05, "loss": 0.0068, "step": 15266 }, { "epoch": 2.9549148606811144, "grad_norm": 0.04144645854830742, "learning_rate": 8.221506047029599e-05, "loss": 0.0071, "step": 15267 }, { "epoch": 2.955108359133127, "grad_norm": 0.09556671977043152, "learning_rate": 8.221287527407195e-05, "loss": 0.0069, "step": 15268 }, { "epoch": 2.9553018575851393, "grad_norm": 0.05814246088266373, "learning_rate": 8.221068997667715e-05, "loss": 0.0054, "step": 15269 }, { "epoch": 2.9554953560371517, "grad_norm": 0.07702149450778961, "learning_rate": 8.22085045781197e-05, "loss": 0.0066, "step": 15270 }, { "epoch": 2.955688854489164, "grad_norm": 0.09363117814064026, "learning_rate": 8.220631907840774e-05, "loss": 0.0067, "step": 15271 }, { "epoch": 2.9558823529411766, "grad_norm": 0.05942271649837494, "learning_rate": 8.22041334775494e-05, "loss": 0.0054, "step": 15272 }, { "epoch": 2.956075851393189, "grad_norm": 0.08195037394762039, "learning_rate": 8.220194777555276e-05, "loss": 0.0069, "step": 15273 }, { "epoch": 2.9562693498452015, "grad_norm": 0.03670015558600426, "learning_rate": 8.219976197242601e-05, "loss": 0.0074, "step": 15274 }, { "epoch": 2.9564628482972135, "grad_norm": 0.04901985079050064, "learning_rate": 8.219757606817724e-05, "loss": 0.0051, "step": 15275 }, { "epoch": 2.956656346749226, "grad_norm": 0.05720330402255058, "learning_rate": 8.219539006281459e-05, "loss": 0.007, "step": 15276 }, { "epoch": 2.9568498452012384, "grad_norm": 0.05304465442895889, "learning_rate": 8.219320395634617e-05, "loss": 0.0064, "step": 15277 }, { "epoch": 2.957043343653251, "grad_norm": 0.06200587749481201, "learning_rate": 8.219101774878013e-05, "loss": 0.0059, "step": 15278 }, { "epoch": 2.9572368421052633, "grad_norm": 0.04355982691049576, "learning_rate": 8.218883144012459e-05, "loss": 0.0076, "step": 15279 }, { "epoch": 2.9574303405572753, "grad_norm": 0.05881846323609352, "learning_rate": 8.218664503038765e-05, "loss": 0.0069, "step": 15280 }, { "epoch": 2.9576238390092877, "grad_norm": 0.038098085671663284, "learning_rate": 8.218445851957747e-05, "loss": 0.006, "step": 15281 }, { "epoch": 2.9578173374613, "grad_norm": 0.033495333045721054, "learning_rate": 8.218227190770219e-05, "loss": 0.0069, "step": 15282 }, { "epoch": 2.9580108359133126, "grad_norm": 0.04468851909041405, "learning_rate": 8.21800851947699e-05, "loss": 0.0063, "step": 15283 }, { "epoch": 2.958204334365325, "grad_norm": 0.0517374686896801, "learning_rate": 8.217789838078877e-05, "loss": 0.0064, "step": 15284 }, { "epoch": 2.9583978328173375, "grad_norm": 0.027624912559986115, "learning_rate": 8.21757114657669e-05, "loss": 0.0069, "step": 15285 }, { "epoch": 2.95859133126935, "grad_norm": 0.05321241542696953, "learning_rate": 8.217352444971242e-05, "loss": 0.0066, "step": 15286 }, { "epoch": 2.9587848297213624, "grad_norm": 0.029278934001922607, "learning_rate": 8.217133733263349e-05, "loss": 0.0059, "step": 15287 }, { "epoch": 2.958978328173375, "grad_norm": 0.026807455345988274, "learning_rate": 8.216915011453822e-05, "loss": 0.0068, "step": 15288 }, { "epoch": 2.9591718266253872, "grad_norm": 0.04245290905237198, "learning_rate": 8.216696279543473e-05, "loss": 0.0066, "step": 15289 }, { "epoch": 2.9593653250773992, "grad_norm": 0.017663374543190002, "learning_rate": 8.216477537533118e-05, "loss": 0.0054, "step": 15290 }, { "epoch": 2.9595588235294117, "grad_norm": 0.0373883880674839, "learning_rate": 8.21625878542357e-05, "loss": 0.0056, "step": 15291 }, { "epoch": 2.959752321981424, "grad_norm": 0.01750197261571884, "learning_rate": 8.21604002321564e-05, "loss": 0.0051, "step": 15292 }, { "epoch": 2.9599458204334366, "grad_norm": 0.03712387755513191, "learning_rate": 8.215821250910142e-05, "loss": 0.0059, "step": 15293 }, { "epoch": 2.960139318885449, "grad_norm": 0.025462452322244644, "learning_rate": 8.21560246850789e-05, "loss": 0.0063, "step": 15294 }, { "epoch": 2.9603328173374615, "grad_norm": 0.02846599742770195, "learning_rate": 8.215383676009696e-05, "loss": 0.0071, "step": 15295 }, { "epoch": 2.9605263157894735, "grad_norm": 0.020709212869405746, "learning_rate": 8.215164873416377e-05, "loss": 0.0049, "step": 15296 }, { "epoch": 2.960719814241486, "grad_norm": 0.03232249245047569, "learning_rate": 8.214946060728742e-05, "loss": 0.0068, "step": 15297 }, { "epoch": 2.9609133126934983, "grad_norm": 0.04066953808069229, "learning_rate": 8.214727237947607e-05, "loss": 0.0067, "step": 15298 }, { "epoch": 2.9611068111455108, "grad_norm": 0.026608314365148544, "learning_rate": 8.214508405073785e-05, "loss": 0.0056, "step": 15299 }, { "epoch": 2.961300309597523, "grad_norm": 0.046523112803697586, "learning_rate": 8.21428956210809e-05, "loss": 0.007, "step": 15300 }, { "epoch": 2.9614938080495357, "grad_norm": 0.035190172493457794, "learning_rate": 8.214070709051335e-05, "loss": 0.0064, "step": 15301 }, { "epoch": 2.961687306501548, "grad_norm": 0.04766770824790001, "learning_rate": 8.213851845904333e-05, "loss": 0.0057, "step": 15302 }, { "epoch": 2.9618808049535605, "grad_norm": 0.04641337692737579, "learning_rate": 8.2136329726679e-05, "loss": 0.0072, "step": 15303 }, { "epoch": 2.962074303405573, "grad_norm": 0.05876104161143303, "learning_rate": 8.213414089342849e-05, "loss": 0.0077, "step": 15304 }, { "epoch": 2.962267801857585, "grad_norm": 0.0412542000412941, "learning_rate": 8.213195195929991e-05, "loss": 0.0059, "step": 15305 }, { "epoch": 2.9624613003095974, "grad_norm": 0.059322379529476166, "learning_rate": 8.212976292430142e-05, "loss": 0.0062, "step": 15306 }, { "epoch": 2.96265479876161, "grad_norm": 0.03808813914656639, "learning_rate": 8.212757378844117e-05, "loss": 0.0061, "step": 15307 }, { "epoch": 2.9628482972136223, "grad_norm": 0.03311784192919731, "learning_rate": 8.212538455172728e-05, "loss": 0.0057, "step": 15308 }, { "epoch": 2.9630417956656347, "grad_norm": 0.04040207341313362, "learning_rate": 8.212319521416788e-05, "loss": 0.0064, "step": 15309 }, { "epoch": 2.963235294117647, "grad_norm": 0.02944548986852169, "learning_rate": 8.212100577577115e-05, "loss": 0.0073, "step": 15310 }, { "epoch": 2.963428792569659, "grad_norm": 0.040434855967760086, "learning_rate": 8.211881623654518e-05, "loss": 0.007, "step": 15311 }, { "epoch": 2.9636222910216716, "grad_norm": 0.048453520983457565, "learning_rate": 8.211662659649814e-05, "loss": 0.0072, "step": 15312 }, { "epoch": 2.963815789473684, "grad_norm": 0.03978889808058739, "learning_rate": 8.211443685563818e-05, "loss": 0.007, "step": 15313 }, { "epoch": 2.9640092879256965, "grad_norm": 0.04353795200586319, "learning_rate": 8.21122470139734e-05, "loss": 0.0071, "step": 15314 }, { "epoch": 2.964202786377709, "grad_norm": 0.05743393674492836, "learning_rate": 8.211005707151198e-05, "loss": 0.0073, "step": 15315 }, { "epoch": 2.9643962848297214, "grad_norm": 0.027606381103396416, "learning_rate": 8.210786702826206e-05, "loss": 0.0059, "step": 15316 }, { "epoch": 2.964589783281734, "grad_norm": 0.08970653265714645, "learning_rate": 8.210567688423174e-05, "loss": 0.0073, "step": 15317 }, { "epoch": 2.9647832817337463, "grad_norm": 0.04169391840696335, "learning_rate": 8.210348663942922e-05, "loss": 0.0065, "step": 15318 }, { "epoch": 2.9649767801857587, "grad_norm": 0.09368917346000671, "learning_rate": 8.210129629386259e-05, "loss": 0.0067, "step": 15319 }, { "epoch": 2.965170278637771, "grad_norm": 0.04757098853588104, "learning_rate": 8.209910584754003e-05, "loss": 0.0067, "step": 15320 }, { "epoch": 2.965363777089783, "grad_norm": 0.07483617961406708, "learning_rate": 8.209691530046968e-05, "loss": 0.0091, "step": 15321 }, { "epoch": 2.9655572755417956, "grad_norm": 0.040697671473026276, "learning_rate": 8.209472465265967e-05, "loss": 0.0065, "step": 15322 }, { "epoch": 2.965750773993808, "grad_norm": 0.03774655610322952, "learning_rate": 8.209253390411815e-05, "loss": 0.0061, "step": 15323 }, { "epoch": 2.9659442724458205, "grad_norm": 0.06609867513179779, "learning_rate": 8.209034305485326e-05, "loss": 0.0057, "step": 15324 }, { "epoch": 2.966137770897833, "grad_norm": 0.019509321078658104, "learning_rate": 8.208815210487316e-05, "loss": 0.0053, "step": 15325 }, { "epoch": 2.9663312693498454, "grad_norm": 0.07174770534038544, "learning_rate": 8.208596105418598e-05, "loss": 0.0066, "step": 15326 }, { "epoch": 2.9665247678018574, "grad_norm": 0.04281029850244522, "learning_rate": 8.208376990279987e-05, "loss": 0.006, "step": 15327 }, { "epoch": 2.96671826625387, "grad_norm": 0.059653088450431824, "learning_rate": 8.208157865072296e-05, "loss": 0.007, "step": 15328 }, { "epoch": 2.9669117647058822, "grad_norm": 0.06760513782501221, "learning_rate": 8.207938729796345e-05, "loss": 0.006, "step": 15329 }, { "epoch": 2.9671052631578947, "grad_norm": 0.06042863801121712, "learning_rate": 8.207719584452943e-05, "loss": 0.0061, "step": 15330 }, { "epoch": 2.967298761609907, "grad_norm": 0.07408032566308975, "learning_rate": 8.207500429042904e-05, "loss": 0.0073, "step": 15331 }, { "epoch": 2.9674922600619196, "grad_norm": 0.0620703250169754, "learning_rate": 8.207281263567049e-05, "loss": 0.0076, "step": 15332 }, { "epoch": 2.967685758513932, "grad_norm": 0.04982313513755798, "learning_rate": 8.20706208802619e-05, "loss": 0.0044, "step": 15333 }, { "epoch": 2.9678792569659445, "grad_norm": 0.06384672969579697, "learning_rate": 8.20684290242114e-05, "loss": 0.0068, "step": 15334 }, { "epoch": 2.968072755417957, "grad_norm": 0.05322954058647156, "learning_rate": 8.206623706752715e-05, "loss": 0.0066, "step": 15335 }, { "epoch": 2.968266253869969, "grad_norm": 0.04782611504197121, "learning_rate": 8.20640450102173e-05, "loss": 0.007, "step": 15336 }, { "epoch": 2.9684597523219813, "grad_norm": 0.030607419088482857, "learning_rate": 8.206185285229e-05, "loss": 0.0059, "step": 15337 }, { "epoch": 2.968653250773994, "grad_norm": 0.05627722293138504, "learning_rate": 8.20596605937534e-05, "loss": 0.0069, "step": 15338 }, { "epoch": 2.968846749226006, "grad_norm": 0.043283816426992416, "learning_rate": 8.205746823461565e-05, "loss": 0.0062, "step": 15339 }, { "epoch": 2.9690402476780187, "grad_norm": 0.026124268770217896, "learning_rate": 8.20552757748849e-05, "loss": 0.0058, "step": 15340 }, { "epoch": 2.969233746130031, "grad_norm": 0.0456174798309803, "learning_rate": 8.205308321456929e-05, "loss": 0.0061, "step": 15341 }, { "epoch": 2.969427244582043, "grad_norm": 0.04115191474556923, "learning_rate": 8.2050890553677e-05, "loss": 0.0072, "step": 15342 }, { "epoch": 2.9696207430340555, "grad_norm": 0.03960319235920906, "learning_rate": 8.204869779221615e-05, "loss": 0.0083, "step": 15343 }, { "epoch": 2.969814241486068, "grad_norm": 0.0329570472240448, "learning_rate": 8.204650493019493e-05, "loss": 0.0074, "step": 15344 }, { "epoch": 2.9700077399380804, "grad_norm": 0.048534244298934937, "learning_rate": 8.204431196762144e-05, "loss": 0.0069, "step": 15345 }, { "epoch": 2.970201238390093, "grad_norm": 0.024731053039431572, "learning_rate": 8.204211890450386e-05, "loss": 0.0074, "step": 15346 }, { "epoch": 2.9703947368421053, "grad_norm": 0.04774786904454231, "learning_rate": 8.203992574085037e-05, "loss": 0.0079, "step": 15347 }, { "epoch": 2.9705882352941178, "grad_norm": 0.02667161263525486, "learning_rate": 8.20377324766691e-05, "loss": 0.0058, "step": 15348 }, { "epoch": 2.97078173374613, "grad_norm": 0.04049695283174515, "learning_rate": 8.203553911196819e-05, "loss": 0.0072, "step": 15349 }, { "epoch": 2.9709752321981426, "grad_norm": 0.035935305058956146, "learning_rate": 8.20333456467558e-05, "loss": 0.0067, "step": 15350 }, { "epoch": 2.9711687306501546, "grad_norm": 0.0358356349170208, "learning_rate": 8.203115208104009e-05, "loss": 0.0066, "step": 15351 }, { "epoch": 2.971362229102167, "grad_norm": 0.04552140459418297, "learning_rate": 8.202895841482924e-05, "loss": 0.0057, "step": 15352 }, { "epoch": 2.9715557275541795, "grad_norm": 0.026833105832338333, "learning_rate": 8.202676464813137e-05, "loss": 0.0076, "step": 15353 }, { "epoch": 2.971749226006192, "grad_norm": 0.04621711000800133, "learning_rate": 8.202457078095465e-05, "loss": 0.0066, "step": 15354 }, { "epoch": 2.9719427244582044, "grad_norm": 0.0496283657848835, "learning_rate": 8.202237681330724e-05, "loss": 0.0072, "step": 15355 }, { "epoch": 2.972136222910217, "grad_norm": 0.0321817547082901, "learning_rate": 8.202018274519727e-05, "loss": 0.0064, "step": 15356 }, { "epoch": 2.972329721362229, "grad_norm": 0.042533453553915024, "learning_rate": 8.201798857663295e-05, "loss": 0.0054, "step": 15357 }, { "epoch": 2.9725232198142413, "grad_norm": 0.04118246212601662, "learning_rate": 8.201579430762238e-05, "loss": 0.0068, "step": 15358 }, { "epoch": 2.9727167182662537, "grad_norm": 0.056189171969890594, "learning_rate": 8.201359993817375e-05, "loss": 0.0068, "step": 15359 }, { "epoch": 2.972910216718266, "grad_norm": 0.05640275403857231, "learning_rate": 8.201140546829522e-05, "loss": 0.0081, "step": 15360 }, { "epoch": 2.9731037151702786, "grad_norm": 0.05423747003078461, "learning_rate": 8.200921089799494e-05, "loss": 0.0065, "step": 15361 }, { "epoch": 2.973297213622291, "grad_norm": 0.05115759000182152, "learning_rate": 8.200701622728108e-05, "loss": 0.0063, "step": 15362 }, { "epoch": 2.9734907120743035, "grad_norm": 0.06094157695770264, "learning_rate": 8.200482145616176e-05, "loss": 0.0073, "step": 15363 }, { "epoch": 2.973684210526316, "grad_norm": 0.05504923686385155, "learning_rate": 8.200262658464519e-05, "loss": 0.0078, "step": 15364 }, { "epoch": 2.9738777089783284, "grad_norm": 0.05006632208824158, "learning_rate": 8.200043161273949e-05, "loss": 0.0079, "step": 15365 }, { "epoch": 2.974071207430341, "grad_norm": 0.07335608452558517, "learning_rate": 8.199823654045284e-05, "loss": 0.0057, "step": 15366 }, { "epoch": 2.974264705882353, "grad_norm": 0.04306716099381447, "learning_rate": 8.19960413677934e-05, "loss": 0.0067, "step": 15367 }, { "epoch": 2.9744582043343653, "grad_norm": 0.06382033228874207, "learning_rate": 8.199384609476935e-05, "loss": 0.0065, "step": 15368 }, { "epoch": 2.9746517027863777, "grad_norm": 0.05172649025917053, "learning_rate": 8.199165072138882e-05, "loss": 0.0069, "step": 15369 }, { "epoch": 2.97484520123839, "grad_norm": 0.06926679611206055, "learning_rate": 8.198945524765998e-05, "loss": 0.0058, "step": 15370 }, { "epoch": 2.9750386996904026, "grad_norm": 0.06979184597730637, "learning_rate": 8.198725967359099e-05, "loss": 0.0071, "step": 15371 }, { "epoch": 2.975232198142415, "grad_norm": 0.06255684792995453, "learning_rate": 8.198506399919004e-05, "loss": 0.006, "step": 15372 }, { "epoch": 2.975425696594427, "grad_norm": 0.06041024625301361, "learning_rate": 8.198286822446525e-05, "loss": 0.0073, "step": 15373 }, { "epoch": 2.9756191950464395, "grad_norm": 0.03612755239009857, "learning_rate": 8.198067234942482e-05, "loss": 0.0083, "step": 15374 }, { "epoch": 2.975812693498452, "grad_norm": 0.05892634764313698, "learning_rate": 8.197847637407687e-05, "loss": 0.0068, "step": 15375 }, { "epoch": 2.9760061919504643, "grad_norm": 0.028796102851629257, "learning_rate": 8.197628029842962e-05, "loss": 0.0062, "step": 15376 }, { "epoch": 2.976199690402477, "grad_norm": 0.06234700232744217, "learning_rate": 8.197408412249121e-05, "loss": 0.0063, "step": 15377 }, { "epoch": 2.9763931888544892, "grad_norm": 0.04812251403927803, "learning_rate": 8.197188784626978e-05, "loss": 0.006, "step": 15378 }, { "epoch": 2.9765866873065017, "grad_norm": 0.07613256573677063, "learning_rate": 8.196969146977353e-05, "loss": 0.0071, "step": 15379 }, { "epoch": 2.976780185758514, "grad_norm": 0.047497060149908066, "learning_rate": 8.19674949930106e-05, "loss": 0.0069, "step": 15380 }, { "epoch": 2.9769736842105265, "grad_norm": 0.08190268278121948, "learning_rate": 8.19652984159892e-05, "loss": 0.0055, "step": 15381 }, { "epoch": 2.9771671826625385, "grad_norm": 0.03505755588412285, "learning_rate": 8.196310173871744e-05, "loss": 0.0071, "step": 15382 }, { "epoch": 2.977360681114551, "grad_norm": 0.08523774147033691, "learning_rate": 8.196090496120352e-05, "loss": 0.0062, "step": 15383 }, { "epoch": 2.9775541795665634, "grad_norm": 0.04271665960550308, "learning_rate": 8.195870808345559e-05, "loss": 0.0078, "step": 15384 }, { "epoch": 2.977747678018576, "grad_norm": 0.08819470554590225, "learning_rate": 8.195651110548184e-05, "loss": 0.0061, "step": 15385 }, { "epoch": 2.9779411764705883, "grad_norm": 0.04522686451673508, "learning_rate": 8.195431402729041e-05, "loss": 0.0058, "step": 15386 }, { "epoch": 2.9781346749226008, "grad_norm": 0.0831618458032608, "learning_rate": 8.195211684888948e-05, "loss": 0.006, "step": 15387 }, { "epoch": 2.9783281733746128, "grad_norm": 0.07219675183296204, "learning_rate": 8.194991957028722e-05, "loss": 0.0072, "step": 15388 }, { "epoch": 2.978521671826625, "grad_norm": 0.0591864176094532, "learning_rate": 8.194772219149182e-05, "loss": 0.0064, "step": 15389 }, { "epoch": 2.9787151702786376, "grad_norm": 0.07704749703407288, "learning_rate": 8.194552471251141e-05, "loss": 0.0068, "step": 15390 }, { "epoch": 2.97890866873065, "grad_norm": 0.0387692004442215, "learning_rate": 8.19433271333542e-05, "loss": 0.0061, "step": 15391 }, { "epoch": 2.9791021671826625, "grad_norm": 0.050739794969558716, "learning_rate": 8.194112945402831e-05, "loss": 0.0072, "step": 15392 }, { "epoch": 2.979295665634675, "grad_norm": 0.06597662717103958, "learning_rate": 8.193893167454196e-05, "loss": 0.0086, "step": 15393 }, { "epoch": 2.9794891640866874, "grad_norm": 0.048781875520944595, "learning_rate": 8.193673379490328e-05, "loss": 0.0068, "step": 15394 }, { "epoch": 2.9796826625387, "grad_norm": 0.08504582196474075, "learning_rate": 8.193453581512048e-05, "loss": 0.0077, "step": 15395 }, { "epoch": 2.9798761609907123, "grad_norm": 0.05135317146778107, "learning_rate": 8.193233773520172e-05, "loss": 0.0064, "step": 15396 }, { "epoch": 2.9800696594427247, "grad_norm": 0.06507550925016403, "learning_rate": 8.193013955515513e-05, "loss": 0.006, "step": 15397 }, { "epoch": 2.9802631578947367, "grad_norm": 0.053396355360746384, "learning_rate": 8.192794127498896e-05, "loss": 0.0059, "step": 15398 }, { "epoch": 2.980456656346749, "grad_norm": 0.03565545380115509, "learning_rate": 8.192574289471131e-05, "loss": 0.0051, "step": 15399 }, { "epoch": 2.9806501547987616, "grad_norm": 0.09726542979478836, "learning_rate": 8.192354441433038e-05, "loss": 0.0053, "step": 15400 }, { "epoch": 2.980843653250774, "grad_norm": 0.062032803893089294, "learning_rate": 8.192134583385437e-05, "loss": 0.0059, "step": 15401 }, { "epoch": 2.9810371517027865, "grad_norm": 0.07812414318323135, "learning_rate": 8.191914715329141e-05, "loss": 0.0069, "step": 15402 }, { "epoch": 2.9812306501547985, "grad_norm": 0.042377132922410965, "learning_rate": 8.19169483726497e-05, "loss": 0.0064, "step": 15403 }, { "epoch": 2.981424148606811, "grad_norm": 0.08958682417869568, "learning_rate": 8.191474949193741e-05, "loss": 0.0057, "step": 15404 }, { "epoch": 2.9816176470588234, "grad_norm": 0.05766074359416962, "learning_rate": 8.191255051116271e-05, "loss": 0.0065, "step": 15405 }, { "epoch": 2.981811145510836, "grad_norm": 0.07504203170537949, "learning_rate": 8.191035143033379e-05, "loss": 0.0072, "step": 15406 }, { "epoch": 2.9820046439628483, "grad_norm": 0.10245230793952942, "learning_rate": 8.19081522494588e-05, "loss": 0.0057, "step": 15407 }, { "epoch": 2.9821981424148607, "grad_norm": 0.041566524654626846, "learning_rate": 8.190595296854594e-05, "loss": 0.0066, "step": 15408 }, { "epoch": 2.982391640866873, "grad_norm": 0.08215641230344772, "learning_rate": 8.190375358760338e-05, "loss": 0.008, "step": 15409 }, { "epoch": 2.9825851393188856, "grad_norm": 0.03822670504450798, "learning_rate": 8.190155410663926e-05, "loss": 0.0069, "step": 15410 }, { "epoch": 2.982778637770898, "grad_norm": 0.09768465906381607, "learning_rate": 8.189935452566183e-05, "loss": 0.0057, "step": 15411 }, { "epoch": 2.9829721362229105, "grad_norm": 0.03693569824099541, "learning_rate": 8.189715484467921e-05, "loss": 0.0069, "step": 15412 }, { "epoch": 2.9831656346749225, "grad_norm": 0.1360153704881668, "learning_rate": 8.189495506369959e-05, "loss": 0.0058, "step": 15413 }, { "epoch": 2.983359133126935, "grad_norm": 0.06855817884206772, "learning_rate": 8.189275518273118e-05, "loss": 0.0078, "step": 15414 }, { "epoch": 2.9835526315789473, "grad_norm": 0.1490601748228073, "learning_rate": 8.189055520178212e-05, "loss": 0.0056, "step": 15415 }, { "epoch": 2.98374613003096, "grad_norm": 0.09105536341667175, "learning_rate": 8.188835512086059e-05, "loss": 0.0062, "step": 15416 }, { "epoch": 2.9839396284829722, "grad_norm": 0.09617592394351959, "learning_rate": 8.188615493997479e-05, "loss": 0.0063, "step": 15417 }, { "epoch": 2.9841331269349847, "grad_norm": 0.10878632962703705, "learning_rate": 8.18839546591329e-05, "loss": 0.0069, "step": 15418 }, { "epoch": 2.9843266253869967, "grad_norm": 0.08228028565645218, "learning_rate": 8.188175427834307e-05, "loss": 0.0069, "step": 15419 }, { "epoch": 2.984520123839009, "grad_norm": 0.029654471203684807, "learning_rate": 8.187955379761351e-05, "loss": 0.0065, "step": 15420 }, { "epoch": 2.9847136222910216, "grad_norm": 0.04373199865221977, "learning_rate": 8.18773532169524e-05, "loss": 0.0068, "step": 15421 }, { "epoch": 2.984907120743034, "grad_norm": 0.024740595370531082, "learning_rate": 8.187515253636791e-05, "loss": 0.0066, "step": 15422 }, { "epoch": 2.9851006191950464, "grad_norm": 0.037574730813503265, "learning_rate": 8.187295175586823e-05, "loss": 0.007, "step": 15423 }, { "epoch": 2.985294117647059, "grad_norm": 0.03775998204946518, "learning_rate": 8.187075087546153e-05, "loss": 0.0072, "step": 15424 }, { "epoch": 2.9854876160990713, "grad_norm": 0.028162553906440735, "learning_rate": 8.186854989515599e-05, "loss": 0.0078, "step": 15425 }, { "epoch": 2.9856811145510838, "grad_norm": 0.04429111257195473, "learning_rate": 8.18663488149598e-05, "loss": 0.0065, "step": 15426 }, { "epoch": 2.985874613003096, "grad_norm": 0.050206877291202545, "learning_rate": 8.186414763488117e-05, "loss": 0.0071, "step": 15427 }, { "epoch": 2.986068111455108, "grad_norm": 0.03653600066900253, "learning_rate": 8.186194635492824e-05, "loss": 0.0068, "step": 15428 }, { "epoch": 2.9862616099071206, "grad_norm": 0.07372379302978516, "learning_rate": 8.185974497510922e-05, "loss": 0.0088, "step": 15429 }, { "epoch": 2.986455108359133, "grad_norm": 0.057409122586250305, "learning_rate": 8.185754349543229e-05, "loss": 0.0068, "step": 15430 }, { "epoch": 2.9866486068111455, "grad_norm": 0.06164286285638809, "learning_rate": 8.185534191590563e-05, "loss": 0.006, "step": 15431 }, { "epoch": 2.986842105263158, "grad_norm": 0.07104127109050751, "learning_rate": 8.185314023653742e-05, "loss": 0.0079, "step": 15432 }, { "epoch": 2.9870356037151704, "grad_norm": 0.06052866950631142, "learning_rate": 8.185093845733586e-05, "loss": 0.0067, "step": 15433 }, { "epoch": 2.9872291021671824, "grad_norm": 0.06512489914894104, "learning_rate": 8.184873657830912e-05, "loss": 0.005, "step": 15434 }, { "epoch": 2.987422600619195, "grad_norm": 0.06034085154533386, "learning_rate": 8.184653459946538e-05, "loss": 0.0079, "step": 15435 }, { "epoch": 2.9876160990712073, "grad_norm": 0.08109501749277115, "learning_rate": 8.184433252081286e-05, "loss": 0.0057, "step": 15436 }, { "epoch": 2.9878095975232197, "grad_norm": 0.05815853178501129, "learning_rate": 8.184213034235972e-05, "loss": 0.0064, "step": 15437 }, { "epoch": 2.988003095975232, "grad_norm": 0.07472524791955948, "learning_rate": 8.183992806411415e-05, "loss": 0.007, "step": 15438 }, { "epoch": 2.9881965944272446, "grad_norm": 0.08499956130981445, "learning_rate": 8.183772568608434e-05, "loss": 0.0053, "step": 15439 }, { "epoch": 2.988390092879257, "grad_norm": 0.08732026815414429, "learning_rate": 8.183552320827849e-05, "loss": 0.0059, "step": 15440 }, { "epoch": 2.9885835913312695, "grad_norm": 0.0612160861492157, "learning_rate": 8.183332063070475e-05, "loss": 0.0058, "step": 15441 }, { "epoch": 2.988777089783282, "grad_norm": 0.06871750205755234, "learning_rate": 8.183111795337138e-05, "loss": 0.0074, "step": 15442 }, { "epoch": 2.9889705882352944, "grad_norm": 0.07107586413621902, "learning_rate": 8.18289151762865e-05, "loss": 0.0071, "step": 15443 }, { "epoch": 2.9891640866873064, "grad_norm": 0.06832833588123322, "learning_rate": 8.182671229945832e-05, "loss": 0.0058, "step": 15444 }, { "epoch": 2.989357585139319, "grad_norm": 0.07765626162290573, "learning_rate": 8.182450932289504e-05, "loss": 0.0076, "step": 15445 }, { "epoch": 2.9895510835913313, "grad_norm": 0.051041729748249054, "learning_rate": 8.182230624660483e-05, "loss": 0.0059, "step": 15446 }, { "epoch": 2.9897445820433437, "grad_norm": 0.07277365773916245, "learning_rate": 8.182010307059591e-05, "loss": 0.0076, "step": 15447 }, { "epoch": 2.989938080495356, "grad_norm": 0.06387326866388321, "learning_rate": 8.181789979487644e-05, "loss": 0.0071, "step": 15448 }, { "epoch": 2.9901315789473686, "grad_norm": 0.07584819197654724, "learning_rate": 8.181569641945464e-05, "loss": 0.0068, "step": 15449 }, { "epoch": 2.9903250773993806, "grad_norm": 0.04451903700828552, "learning_rate": 8.181349294433869e-05, "loss": 0.0057, "step": 15450 }, { "epoch": 2.990518575851393, "grad_norm": 0.07607383280992508, "learning_rate": 8.181128936953678e-05, "loss": 0.0072, "step": 15451 }, { "epoch": 2.9907120743034055, "grad_norm": 0.04362209141254425, "learning_rate": 8.18090856950571e-05, "loss": 0.0061, "step": 15452 }, { "epoch": 2.990905572755418, "grad_norm": 0.07980763167142868, "learning_rate": 8.180688192090787e-05, "loss": 0.0065, "step": 15453 }, { "epoch": 2.9910990712074303, "grad_norm": 0.058369144797325134, "learning_rate": 8.180467804709722e-05, "loss": 0.0052, "step": 15454 }, { "epoch": 2.991292569659443, "grad_norm": 0.07371558248996735, "learning_rate": 8.180247407363338e-05, "loss": 0.0077, "step": 15455 }, { "epoch": 2.9914860681114552, "grad_norm": 0.07323945313692093, "learning_rate": 8.180027000052457e-05, "loss": 0.0066, "step": 15456 }, { "epoch": 2.9916795665634677, "grad_norm": 0.03240741044282913, "learning_rate": 8.179806582777894e-05, "loss": 0.0056, "step": 15457 }, { "epoch": 2.99187306501548, "grad_norm": 0.1198902577161789, "learning_rate": 8.179586155540474e-05, "loss": 0.0064, "step": 15458 }, { "epoch": 2.992066563467492, "grad_norm": 0.03243204206228256, "learning_rate": 8.17936571834101e-05, "loss": 0.0058, "step": 15459 }, { "epoch": 2.9922600619195046, "grad_norm": 0.10181189328432083, "learning_rate": 8.179145271180324e-05, "loss": 0.0065, "step": 15460 }, { "epoch": 2.992453560371517, "grad_norm": 0.06360314786434174, "learning_rate": 8.178924814059238e-05, "loss": 0.0062, "step": 15461 }, { "epoch": 2.9926470588235294, "grad_norm": 0.12428248673677444, "learning_rate": 8.178704346978568e-05, "loss": 0.0068, "step": 15462 }, { "epoch": 2.992840557275542, "grad_norm": 0.07620604336261749, "learning_rate": 8.178483869939135e-05, "loss": 0.0071, "step": 15463 }, { "epoch": 2.9930340557275543, "grad_norm": 0.11009927093982697, "learning_rate": 8.17826338294176e-05, "loss": 0.0062, "step": 15464 }, { "epoch": 2.9932275541795663, "grad_norm": 0.08119432628154755, "learning_rate": 8.178042885987262e-05, "loss": 0.0059, "step": 15465 }, { "epoch": 2.9934210526315788, "grad_norm": 0.09417528659105301, "learning_rate": 8.177822379076461e-05, "loss": 0.0062, "step": 15466 }, { "epoch": 2.993614551083591, "grad_norm": 0.08382543921470642, "learning_rate": 8.177601862210174e-05, "loss": 0.0074, "step": 15467 }, { "epoch": 2.9938080495356036, "grad_norm": 0.07176286727190018, "learning_rate": 8.177381335389226e-05, "loss": 0.0069, "step": 15468 }, { "epoch": 2.994001547987616, "grad_norm": 0.0931873619556427, "learning_rate": 8.177160798614431e-05, "loss": 0.0074, "step": 15469 }, { "epoch": 2.9941950464396285, "grad_norm": 0.03694702684879303, "learning_rate": 8.176940251886613e-05, "loss": 0.007, "step": 15470 }, { "epoch": 2.994388544891641, "grad_norm": 0.0993291586637497, "learning_rate": 8.176719695206592e-05, "loss": 0.0075, "step": 15471 }, { "epoch": 2.9945820433436534, "grad_norm": 0.04184037074446678, "learning_rate": 8.176499128575185e-05, "loss": 0.0073, "step": 15472 }, { "epoch": 2.994775541795666, "grad_norm": 0.10220685601234436, "learning_rate": 8.176278551993213e-05, "loss": 0.0056, "step": 15473 }, { "epoch": 2.9949690402476783, "grad_norm": 0.06563838571310043, "learning_rate": 8.176057965461497e-05, "loss": 0.0072, "step": 15474 }, { "epoch": 2.9951625386996903, "grad_norm": 0.09286236017942429, "learning_rate": 8.17583736898086e-05, "loss": 0.0071, "step": 15475 }, { "epoch": 2.9953560371517027, "grad_norm": 0.07231929898262024, "learning_rate": 8.175616762552118e-05, "loss": 0.0047, "step": 15476 }, { "epoch": 2.995549535603715, "grad_norm": 0.07190852612257004, "learning_rate": 8.17539614617609e-05, "loss": 0.0065, "step": 15477 }, { "epoch": 2.9957430340557276, "grad_norm": 0.10384007543325424, "learning_rate": 8.1751755198536e-05, "loss": 0.0068, "step": 15478 }, { "epoch": 2.99593653250774, "grad_norm": 0.06736840307712555, "learning_rate": 8.174954883585465e-05, "loss": 0.0058, "step": 15479 }, { "epoch": 2.996130030959752, "grad_norm": 0.09611637890338898, "learning_rate": 8.174734237372509e-05, "loss": 0.0065, "step": 15480 }, { "epoch": 2.9963235294117645, "grad_norm": 0.06717394292354584, "learning_rate": 8.174513581215549e-05, "loss": 0.0058, "step": 15481 }, { "epoch": 2.996517027863777, "grad_norm": 0.0830846056342125, "learning_rate": 8.174292915115405e-05, "loss": 0.007, "step": 15482 }, { "epoch": 2.9967105263157894, "grad_norm": 0.04724760353565216, "learning_rate": 8.174072239072902e-05, "loss": 0.0057, "step": 15483 }, { "epoch": 2.996904024767802, "grad_norm": 0.033527739346027374, "learning_rate": 8.173851553088858e-05, "loss": 0.0065, "step": 15484 }, { "epoch": 2.9970975232198143, "grad_norm": 0.04417102411389351, "learning_rate": 8.173630857164089e-05, "loss": 0.0075, "step": 15485 }, { "epoch": 2.9972910216718267, "grad_norm": 0.02401742897927761, "learning_rate": 8.17341015129942e-05, "loss": 0.0068, "step": 15486 }, { "epoch": 2.997484520123839, "grad_norm": 0.04520117864012718, "learning_rate": 8.173189435495672e-05, "loss": 0.0064, "step": 15487 }, { "epoch": 2.9976780185758516, "grad_norm": 0.04542190209031105, "learning_rate": 8.172968709753664e-05, "loss": 0.0065, "step": 15488 }, { "epoch": 2.997871517027864, "grad_norm": 0.025033418089151382, "learning_rate": 8.172747974074218e-05, "loss": 0.0053, "step": 15489 }, { "epoch": 2.998065015479876, "grad_norm": 0.04305480420589447, "learning_rate": 8.172527228458153e-05, "loss": 0.0071, "step": 15490 }, { "epoch": 2.9982585139318885, "grad_norm": 0.02991136908531189, "learning_rate": 8.172306472906289e-05, "loss": 0.0051, "step": 15491 }, { "epoch": 2.998452012383901, "grad_norm": 0.043199557811021805, "learning_rate": 8.172085707419449e-05, "loss": 0.0084, "step": 15492 }, { "epoch": 2.9986455108359134, "grad_norm": 0.038242440670728683, "learning_rate": 8.171864931998452e-05, "loss": 0.0072, "step": 15493 }, { "epoch": 2.998839009287926, "grad_norm": 0.031570665538311005, "learning_rate": 8.17164414664412e-05, "loss": 0.006, "step": 15494 }, { "epoch": 2.9990325077399382, "grad_norm": 0.05032234638929367, "learning_rate": 8.171423351357273e-05, "loss": 0.0056, "step": 15495 }, { "epoch": 2.9992260061919502, "grad_norm": 0.03328258916735649, "learning_rate": 8.171202546138735e-05, "loss": 0.0079, "step": 15496 }, { "epoch": 2.9994195046439627, "grad_norm": 0.06876565515995026, "learning_rate": 8.170981730989321e-05, "loss": 0.0063, "step": 15497 }, { "epoch": 2.999613003095975, "grad_norm": 0.031180065125226974, "learning_rate": 8.170760905909856e-05, "loss": 0.0061, "step": 15498 }, { "epoch": 3.0001934984520124, "grad_norm": 0.052294567227363586, "learning_rate": 8.17054007090116e-05, "loss": 0.0058, "step": 15499 }, { "epoch": 3.000386996904025, "grad_norm": 0.03418160229921341, "learning_rate": 8.170319225964055e-05, "loss": 0.0061, "step": 15500 }, { "epoch": 3.0005804953560373, "grad_norm": 0.04953083395957947, "learning_rate": 8.170098371099361e-05, "loss": 0.006, "step": 15501 }, { "epoch": 3.0007739938080493, "grad_norm": 0.07903042435646057, "learning_rate": 8.169877506307897e-05, "loss": 0.0066, "step": 15502 }, { "epoch": 3.0009674922600618, "grad_norm": 0.061124853789806366, "learning_rate": 8.169656631590488e-05, "loss": 0.008, "step": 15503 }, { "epoch": 3.001160990712074, "grad_norm": 0.06265649944543839, "learning_rate": 8.169435746947955e-05, "loss": 0.0071, "step": 15504 }, { "epoch": 3.0013544891640866, "grad_norm": 0.0755658969283104, "learning_rate": 8.169214852381115e-05, "loss": 0.0084, "step": 15505 }, { "epoch": 3.001547987616099, "grad_norm": 0.04905408248305321, "learning_rate": 8.168993947890793e-05, "loss": 0.0079, "step": 15506 }, { "epoch": 3.0017414860681115, "grad_norm": 0.09089414775371552, "learning_rate": 8.168773033477809e-05, "loss": 0.0069, "step": 15507 }, { "epoch": 3.001934984520124, "grad_norm": 0.03591137006878853, "learning_rate": 8.168552109142984e-05, "loss": 0.0065, "step": 15508 }, { "epoch": 3.0021284829721364, "grad_norm": 0.06694997102022171, "learning_rate": 8.168331174887141e-05, "loss": 0.0063, "step": 15509 }, { "epoch": 3.0023219814241484, "grad_norm": 0.07166649401187897, "learning_rate": 8.168110230711098e-05, "loss": 0.0066, "step": 15510 }, { "epoch": 3.002515479876161, "grad_norm": 0.05045405402779579, "learning_rate": 8.16788927661568e-05, "loss": 0.0053, "step": 15511 }, { "epoch": 3.0027089783281733, "grad_norm": 0.09453176707029343, "learning_rate": 8.167668312601707e-05, "loss": 0.0058, "step": 15512 }, { "epoch": 3.0029024767801857, "grad_norm": 0.03149018436670303, "learning_rate": 8.167447338670002e-05, "loss": 0.0067, "step": 15513 }, { "epoch": 3.003095975232198, "grad_norm": 0.08028929680585861, "learning_rate": 8.167226354821384e-05, "loss": 0.0081, "step": 15514 }, { "epoch": 3.0032894736842106, "grad_norm": 0.06289844214916229, "learning_rate": 8.167005361056675e-05, "loss": 0.0075, "step": 15515 }, { "epoch": 3.003482972136223, "grad_norm": 0.05568254366517067, "learning_rate": 8.166784357376697e-05, "loss": 0.0063, "step": 15516 }, { "epoch": 3.0036764705882355, "grad_norm": 0.06256961822509766, "learning_rate": 8.166563343782273e-05, "loss": 0.0057, "step": 15517 }, { "epoch": 3.0038699690402475, "grad_norm": 0.021857360377907753, "learning_rate": 8.166342320274221e-05, "loss": 0.0062, "step": 15518 }, { "epoch": 3.00406346749226, "grad_norm": 0.04544958844780922, "learning_rate": 8.166121286853369e-05, "loss": 0.0059, "step": 15519 }, { "epoch": 3.0042569659442724, "grad_norm": 0.034430861473083496, "learning_rate": 8.165900243520533e-05, "loss": 0.0061, "step": 15520 }, { "epoch": 3.004450464396285, "grad_norm": 0.04532259702682495, "learning_rate": 8.165679190276535e-05, "loss": 0.0079, "step": 15521 }, { "epoch": 3.0046439628482973, "grad_norm": 0.04193935915827751, "learning_rate": 8.165458127122202e-05, "loss": 0.0063, "step": 15522 }, { "epoch": 3.0048374613003097, "grad_norm": 0.02544453553855419, "learning_rate": 8.16523705405835e-05, "loss": 0.006, "step": 15523 }, { "epoch": 3.005030959752322, "grad_norm": 0.04518295079469681, "learning_rate": 8.165015971085806e-05, "loss": 0.0071, "step": 15524 }, { "epoch": 3.005224458204334, "grad_norm": 0.07068804651498795, "learning_rate": 8.164794878205386e-05, "loss": 0.007, "step": 15525 }, { "epoch": 3.0054179566563466, "grad_norm": 0.08825758099555969, "learning_rate": 8.164573775417918e-05, "loss": 0.0065, "step": 15526 }, { "epoch": 3.005611455108359, "grad_norm": 0.04953806847333908, "learning_rate": 8.164352662724221e-05, "loss": 0.0061, "step": 15527 }, { "epoch": 3.0058049535603715, "grad_norm": 0.07700671255588531, "learning_rate": 8.164131540125116e-05, "loss": 0.0056, "step": 15528 }, { "epoch": 3.005998452012384, "grad_norm": 0.03682546690106392, "learning_rate": 8.163910407621426e-05, "loss": 0.0072, "step": 15529 }, { "epoch": 3.0061919504643964, "grad_norm": 0.0633544996380806, "learning_rate": 8.163689265213975e-05, "loss": 0.0076, "step": 15530 }, { "epoch": 3.006385448916409, "grad_norm": 0.053219884634017944, "learning_rate": 8.163468112903583e-05, "loss": 0.0071, "step": 15531 }, { "epoch": 3.0065789473684212, "grad_norm": 0.045527491718530655, "learning_rate": 8.163246950691075e-05, "loss": 0.0069, "step": 15532 }, { "epoch": 3.0067724458204332, "grad_norm": 0.0787639394402504, "learning_rate": 8.163025778577267e-05, "loss": 0.0076, "step": 15533 }, { "epoch": 3.0069659442724457, "grad_norm": 0.03455934673547745, "learning_rate": 8.162804596562989e-05, "loss": 0.0068, "step": 15534 }, { "epoch": 3.007159442724458, "grad_norm": 0.0909118801355362, "learning_rate": 8.162583404649057e-05, "loss": 0.0073, "step": 15535 }, { "epoch": 3.0073529411764706, "grad_norm": 0.05398683622479439, "learning_rate": 8.162362202836298e-05, "loss": 0.0055, "step": 15536 }, { "epoch": 3.007546439628483, "grad_norm": 0.06332696229219437, "learning_rate": 8.16214099112553e-05, "loss": 0.0062, "step": 15537 }, { "epoch": 3.0077399380804954, "grad_norm": 0.0885360985994339, "learning_rate": 8.16191976951758e-05, "loss": 0.0048, "step": 15538 }, { "epoch": 3.007933436532508, "grad_norm": 0.05011531338095665, "learning_rate": 8.161698538013267e-05, "loss": 0.0056, "step": 15539 }, { "epoch": 3.0081269349845203, "grad_norm": 0.07113545387983322, "learning_rate": 8.161477296613414e-05, "loss": 0.0074, "step": 15540 }, { "epoch": 3.0083204334365323, "grad_norm": 0.06337352097034454, "learning_rate": 8.161256045318846e-05, "loss": 0.0063, "step": 15541 }, { "epoch": 3.0085139318885448, "grad_norm": 0.04575405642390251, "learning_rate": 8.161034784130384e-05, "loss": 0.0066, "step": 15542 }, { "epoch": 3.008707430340557, "grad_norm": 0.10854306071996689, "learning_rate": 8.160813513048849e-05, "loss": 0.007, "step": 15543 }, { "epoch": 3.0089009287925697, "grad_norm": 0.05498022586107254, "learning_rate": 8.160592232075065e-05, "loss": 0.0072, "step": 15544 }, { "epoch": 3.009094427244582, "grad_norm": 0.12603574991226196, "learning_rate": 8.160370941209855e-05, "loss": 0.0061, "step": 15545 }, { "epoch": 3.0092879256965945, "grad_norm": 0.07762596011161804, "learning_rate": 8.16014964045404e-05, "loss": 0.0086, "step": 15546 }, { "epoch": 3.009481424148607, "grad_norm": 0.17312729358673096, "learning_rate": 8.159928329808447e-05, "loss": 0.0069, "step": 15547 }, { "epoch": 3.0096749226006194, "grad_norm": 0.07831432670354843, "learning_rate": 8.159707009273893e-05, "loss": 0.007, "step": 15548 }, { "epoch": 3.0098684210526314, "grad_norm": 0.14796492457389832, "learning_rate": 8.159485678851206e-05, "loss": 0.0069, "step": 15549 }, { "epoch": 3.010061919504644, "grad_norm": 0.0860467478632927, "learning_rate": 8.159264338541206e-05, "loss": 0.006, "step": 15550 }, { "epoch": 3.0102554179566563, "grad_norm": 0.10733220726251602, "learning_rate": 8.159042988344717e-05, "loss": 0.006, "step": 15551 }, { "epoch": 3.0104489164086687, "grad_norm": 0.13651007413864136, "learning_rate": 8.158821628262559e-05, "loss": 0.0071, "step": 15552 }, { "epoch": 3.010642414860681, "grad_norm": 0.09152419120073318, "learning_rate": 8.15860025829556e-05, "loss": 0.0068, "step": 15553 }, { "epoch": 3.0108359133126936, "grad_norm": 0.15442204475402832, "learning_rate": 8.158378878444539e-05, "loss": 0.0072, "step": 15554 }, { "epoch": 3.011029411764706, "grad_norm": 0.06510499864816666, "learning_rate": 8.15815748871032e-05, "loss": 0.0078, "step": 15555 }, { "epoch": 3.011222910216718, "grad_norm": 0.1271013766527176, "learning_rate": 8.157936089093728e-05, "loss": 0.0069, "step": 15556 }, { "epoch": 3.0114164086687305, "grad_norm": 0.07578635960817337, "learning_rate": 8.157714679595584e-05, "loss": 0.0061, "step": 15557 }, { "epoch": 3.011609907120743, "grad_norm": 0.0971064567565918, "learning_rate": 8.157493260216711e-05, "loss": 0.0075, "step": 15558 }, { "epoch": 3.0118034055727554, "grad_norm": 0.07150892168283463, "learning_rate": 8.157271830957934e-05, "loss": 0.0067, "step": 15559 }, { "epoch": 3.011996904024768, "grad_norm": 0.05784595385193825, "learning_rate": 8.157050391820074e-05, "loss": 0.0077, "step": 15560 }, { "epoch": 3.0121904024767803, "grad_norm": 0.06172819435596466, "learning_rate": 8.156828942803955e-05, "loss": 0.0049, "step": 15561 }, { "epoch": 3.0123839009287927, "grad_norm": 0.049042969942092896, "learning_rate": 8.1566074839104e-05, "loss": 0.0069, "step": 15562 }, { "epoch": 3.012577399380805, "grad_norm": 0.06186496838927269, "learning_rate": 8.156386015140235e-05, "loss": 0.0064, "step": 15563 }, { "epoch": 3.012770897832817, "grad_norm": 0.05277475714683533, "learning_rate": 8.15616453649428e-05, "loss": 0.0053, "step": 15564 }, { "epoch": 3.0129643962848296, "grad_norm": 0.038318756967782974, "learning_rate": 8.15594304797336e-05, "loss": 0.0063, "step": 15565 }, { "epoch": 3.013157894736842, "grad_norm": 0.05771010369062424, "learning_rate": 8.155721549578301e-05, "loss": 0.0069, "step": 15566 }, { "epoch": 3.0133513931888545, "grad_norm": 0.05099112540483475, "learning_rate": 8.155500041309919e-05, "loss": 0.0058, "step": 15567 }, { "epoch": 3.013544891640867, "grad_norm": 0.06482847779989243, "learning_rate": 8.155278523169044e-05, "loss": 0.0063, "step": 15568 }, { "epoch": 3.0137383900928794, "grad_norm": 0.10394886881113052, "learning_rate": 8.155056995156499e-05, "loss": 0.0074, "step": 15569 }, { "epoch": 3.013931888544892, "grad_norm": 0.04752621799707413, "learning_rate": 8.154835457273105e-05, "loss": 0.0065, "step": 15570 }, { "epoch": 3.0141253869969042, "grad_norm": 0.08378182351589203, "learning_rate": 8.154613909519688e-05, "loss": 0.0061, "step": 15571 }, { "epoch": 3.0143188854489162, "grad_norm": 0.0606035552918911, "learning_rate": 8.154392351897069e-05, "loss": 0.006, "step": 15572 }, { "epoch": 3.0145123839009287, "grad_norm": 0.06898041814565659, "learning_rate": 8.154170784406075e-05, "loss": 0.0065, "step": 15573 }, { "epoch": 3.014705882352941, "grad_norm": 0.07704652100801468, "learning_rate": 8.153949207047527e-05, "loss": 0.0071, "step": 15574 }, { "epoch": 3.0148993808049536, "grad_norm": 0.05147183686494827, "learning_rate": 8.15372761982225e-05, "loss": 0.0061, "step": 15575 }, { "epoch": 3.015092879256966, "grad_norm": 0.08531574159860611, "learning_rate": 8.153506022731067e-05, "loss": 0.0059, "step": 15576 }, { "epoch": 3.0152863777089784, "grad_norm": 0.04224173352122307, "learning_rate": 8.153284415774805e-05, "loss": 0.0057, "step": 15577 }, { "epoch": 3.015479876160991, "grad_norm": 0.10523281991481781, "learning_rate": 8.153062798954283e-05, "loss": 0.0072, "step": 15578 }, { "epoch": 3.015673374613003, "grad_norm": 0.02592831663787365, "learning_rate": 8.152841172270326e-05, "loss": 0.0064, "step": 15579 }, { "epoch": 3.0158668730650153, "grad_norm": 0.08797582238912582, "learning_rate": 8.152619535723762e-05, "loss": 0.0059, "step": 15580 }, { "epoch": 3.0160603715170278, "grad_norm": 0.04531266540288925, "learning_rate": 8.152397889315412e-05, "loss": 0.0057, "step": 15581 }, { "epoch": 3.01625386996904, "grad_norm": 0.0609738789498806, "learning_rate": 8.152176233046099e-05, "loss": 0.0069, "step": 15582 }, { "epoch": 3.0164473684210527, "grad_norm": 0.0785505622625351, "learning_rate": 8.151954566916648e-05, "loss": 0.0065, "step": 15583 }, { "epoch": 3.016640866873065, "grad_norm": 0.058701492846012115, "learning_rate": 8.151732890927884e-05, "loss": 0.0074, "step": 15584 }, { "epoch": 3.0168343653250775, "grad_norm": 0.07016938924789429, "learning_rate": 8.151511205080629e-05, "loss": 0.007, "step": 15585 }, { "epoch": 3.01702786377709, "grad_norm": 0.04707230627536774, "learning_rate": 8.151289509375712e-05, "loss": 0.0052, "step": 15586 }, { "epoch": 3.017221362229102, "grad_norm": 0.06015471741557121, "learning_rate": 8.151067803813951e-05, "loss": 0.0061, "step": 15587 }, { "epoch": 3.0174148606811144, "grad_norm": 0.04004235193133354, "learning_rate": 8.150846088396175e-05, "loss": 0.0073, "step": 15588 }, { "epoch": 3.017608359133127, "grad_norm": 0.055572260171175, "learning_rate": 8.150624363123207e-05, "loss": 0.0063, "step": 15589 }, { "epoch": 3.0178018575851393, "grad_norm": 0.08788624405860901, "learning_rate": 8.150402627995868e-05, "loss": 0.0073, "step": 15590 }, { "epoch": 3.0179953560371517, "grad_norm": 0.11943650990724564, "learning_rate": 8.150180883014985e-05, "loss": 0.0079, "step": 15591 }, { "epoch": 3.018188854489164, "grad_norm": 0.05682443082332611, "learning_rate": 8.149959128181386e-05, "loss": 0.0066, "step": 15592 }, { "epoch": 3.0183823529411766, "grad_norm": 0.08569732308387756, "learning_rate": 8.149737363495888e-05, "loss": 0.0064, "step": 15593 }, { "epoch": 3.018575851393189, "grad_norm": 0.07816831767559052, "learning_rate": 8.149515588959322e-05, "loss": 0.0065, "step": 15594 }, { "epoch": 3.018769349845201, "grad_norm": 0.08083245903253555, "learning_rate": 8.149293804572509e-05, "loss": 0.0077, "step": 15595 }, { "epoch": 3.0189628482972135, "grad_norm": 0.07524871826171875, "learning_rate": 8.149072010336274e-05, "loss": 0.0066, "step": 15596 }, { "epoch": 3.019156346749226, "grad_norm": 0.06582868099212646, "learning_rate": 8.148850206251444e-05, "loss": 0.0068, "step": 15597 }, { "epoch": 3.0193498452012384, "grad_norm": 0.09429990500211716, "learning_rate": 8.14862839231884e-05, "loss": 0.0075, "step": 15598 }, { "epoch": 3.019543343653251, "grad_norm": 0.0710943192243576, "learning_rate": 8.148406568539288e-05, "loss": 0.0072, "step": 15599 }, { "epoch": 3.0197368421052633, "grad_norm": 0.11046669632196426, "learning_rate": 8.148184734913614e-05, "loss": 0.0062, "step": 15600 }, { "epoch": 3.0199303405572757, "grad_norm": 0.04617217183113098, "learning_rate": 8.147962891442641e-05, "loss": 0.0059, "step": 15601 }, { "epoch": 3.0201238390092877, "grad_norm": 0.10439040511846542, "learning_rate": 8.147741038127194e-05, "loss": 0.0072, "step": 15602 }, { "epoch": 3.0203173374613, "grad_norm": 0.05418398231267929, "learning_rate": 8.1475191749681e-05, "loss": 0.0072, "step": 15603 }, { "epoch": 3.0205108359133126, "grad_norm": 0.09670300781726837, "learning_rate": 8.147297301966181e-05, "loss": 0.0066, "step": 15604 }, { "epoch": 3.020704334365325, "grad_norm": 0.07095917314291, "learning_rate": 8.147075419122263e-05, "loss": 0.0046, "step": 15605 }, { "epoch": 3.0208978328173375, "grad_norm": 0.07309656590223312, "learning_rate": 8.14685352643717e-05, "loss": 0.0084, "step": 15606 }, { "epoch": 3.02109133126935, "grad_norm": 0.06522960215806961, "learning_rate": 8.146631623911729e-05, "loss": 0.0055, "step": 15607 }, { "epoch": 3.0212848297213624, "grad_norm": 0.02337140589952469, "learning_rate": 8.146409711546762e-05, "loss": 0.0062, "step": 15608 }, { "epoch": 3.021478328173375, "grad_norm": 0.0590689480304718, "learning_rate": 8.146187789343098e-05, "loss": 0.0064, "step": 15609 }, { "epoch": 3.021671826625387, "grad_norm": 0.06706658750772476, "learning_rate": 8.145965857301559e-05, "loss": 0.0064, "step": 15610 }, { "epoch": 3.0218653250773992, "grad_norm": 0.05776214599609375, "learning_rate": 8.145743915422971e-05, "loss": 0.0079, "step": 15611 }, { "epoch": 3.0220588235294117, "grad_norm": 0.06446816772222519, "learning_rate": 8.145521963708158e-05, "loss": 0.0062, "step": 15612 }, { "epoch": 3.022252321981424, "grad_norm": 0.032128769904375076, "learning_rate": 8.145300002157947e-05, "loss": 0.0067, "step": 15613 }, { "epoch": 3.0224458204334366, "grad_norm": 0.06390345096588135, "learning_rate": 8.145078030773164e-05, "loss": 0.0063, "step": 15614 }, { "epoch": 3.022639318885449, "grad_norm": 0.03848465159535408, "learning_rate": 8.14485604955463e-05, "loss": 0.006, "step": 15615 }, { "epoch": 3.0228328173374615, "grad_norm": 0.06246311590075493, "learning_rate": 8.144634058503173e-05, "loss": 0.0073, "step": 15616 }, { "epoch": 3.023026315789474, "grad_norm": 0.0450749546289444, "learning_rate": 8.14441205761962e-05, "loss": 0.0059, "step": 15617 }, { "epoch": 3.023219814241486, "grad_norm": 0.04169926792383194, "learning_rate": 8.144190046904794e-05, "loss": 0.0068, "step": 15618 }, { "epoch": 3.0234133126934983, "grad_norm": 0.05282062292098999, "learning_rate": 8.143968026359521e-05, "loss": 0.0059, "step": 15619 }, { "epoch": 3.0236068111455108, "grad_norm": 0.05116686224937439, "learning_rate": 8.143745995984625e-05, "loss": 0.0053, "step": 15620 }, { "epoch": 3.023800309597523, "grad_norm": 0.04444443807005882, "learning_rate": 8.143523955780935e-05, "loss": 0.007, "step": 15621 }, { "epoch": 3.0239938080495357, "grad_norm": 0.07310768216848373, "learning_rate": 8.143301905749273e-05, "loss": 0.0045, "step": 15622 }, { "epoch": 3.024187306501548, "grad_norm": 0.05039193853735924, "learning_rate": 8.143079845890466e-05, "loss": 0.0057, "step": 15623 }, { "epoch": 3.0243808049535605, "grad_norm": 0.08202531188726425, "learning_rate": 8.142857776205339e-05, "loss": 0.0052, "step": 15624 }, { "epoch": 3.0245743034055725, "grad_norm": 0.0426933690905571, "learning_rate": 8.142635696694719e-05, "loss": 0.008, "step": 15625 }, { "epoch": 3.024767801857585, "grad_norm": 0.07847563922405243, "learning_rate": 8.14241360735943e-05, "loss": 0.0081, "step": 15626 }, { "epoch": 3.0249613003095974, "grad_norm": 0.0868898332118988, "learning_rate": 8.1421915082003e-05, "loss": 0.0061, "step": 15627 }, { "epoch": 3.02515479876161, "grad_norm": 0.06510727107524872, "learning_rate": 8.14196939921815e-05, "loss": 0.0073, "step": 15628 }, { "epoch": 3.0253482972136223, "grad_norm": 0.12114138156175613, "learning_rate": 8.141747280413811e-05, "loss": 0.0074, "step": 15629 }, { "epoch": 3.0255417956656347, "grad_norm": 0.06204745173454285, "learning_rate": 8.141525151788106e-05, "loss": 0.0067, "step": 15630 }, { "epoch": 3.025735294117647, "grad_norm": 0.11561211198568344, "learning_rate": 8.14130301334186e-05, "loss": 0.0057, "step": 15631 }, { "epoch": 3.0259287925696596, "grad_norm": 0.0939849317073822, "learning_rate": 8.141080865075902e-05, "loss": 0.0076, "step": 15632 }, { "epoch": 3.0261222910216716, "grad_norm": 0.09976375102996826, "learning_rate": 8.140858706991055e-05, "loss": 0.0062, "step": 15633 }, { "epoch": 3.026315789473684, "grad_norm": 0.11520072817802429, "learning_rate": 8.140636539088148e-05, "loss": 0.0064, "step": 15634 }, { "epoch": 3.0265092879256965, "grad_norm": 0.10255514830350876, "learning_rate": 8.140414361368003e-05, "loss": 0.0071, "step": 15635 }, { "epoch": 3.026702786377709, "grad_norm": 0.1720058023929596, "learning_rate": 8.140192173831447e-05, "loss": 0.0082, "step": 15636 }, { "epoch": 3.0268962848297214, "grad_norm": 0.10320526361465454, "learning_rate": 8.13996997647931e-05, "loss": 0.0075, "step": 15637 }, { "epoch": 3.027089783281734, "grad_norm": 0.15150539577007294, "learning_rate": 8.139747769312413e-05, "loss": 0.0083, "step": 15638 }, { "epoch": 3.0272832817337463, "grad_norm": 0.13084819912910461, "learning_rate": 8.139525552331587e-05, "loss": 0.0083, "step": 15639 }, { "epoch": 3.0274767801857587, "grad_norm": 0.10396067053079605, "learning_rate": 8.139303325537652e-05, "loss": 0.0078, "step": 15640 }, { "epoch": 3.0276702786377707, "grad_norm": 0.15032707154750824, "learning_rate": 8.13908108893144e-05, "loss": 0.0068, "step": 15641 }, { "epoch": 3.027863777089783, "grad_norm": 0.06451667845249176, "learning_rate": 8.138858842513773e-05, "loss": 0.0073, "step": 15642 }, { "epoch": 3.0280572755417956, "grad_norm": 0.15572476387023926, "learning_rate": 8.13863658628548e-05, "loss": 0.0075, "step": 15643 }, { "epoch": 3.028250773993808, "grad_norm": 0.04105352982878685, "learning_rate": 8.138414320247388e-05, "loss": 0.0069, "step": 15644 }, { "epoch": 3.0284442724458205, "grad_norm": 0.13937066495418549, "learning_rate": 8.138192044400319e-05, "loss": 0.0078, "step": 15645 }, { "epoch": 3.028637770897833, "grad_norm": 0.06909504532814026, "learning_rate": 8.137969758745103e-05, "loss": 0.0058, "step": 15646 }, { "epoch": 3.0288312693498454, "grad_norm": 0.10335583984851837, "learning_rate": 8.137747463282564e-05, "loss": 0.0058, "step": 15647 }, { "epoch": 3.0290247678018574, "grad_norm": 0.09110487997531891, "learning_rate": 8.137525158013533e-05, "loss": 0.0061, "step": 15648 }, { "epoch": 3.02921826625387, "grad_norm": 0.0748472511768341, "learning_rate": 8.137302842938831e-05, "loss": 0.0065, "step": 15649 }, { "epoch": 3.0294117647058822, "grad_norm": 0.10171446949243546, "learning_rate": 8.137080518059288e-05, "loss": 0.0064, "step": 15650 }, { "epoch": 3.0296052631578947, "grad_norm": 0.057658206671476364, "learning_rate": 8.136858183375728e-05, "loss": 0.0079, "step": 15651 }, { "epoch": 3.029798761609907, "grad_norm": 0.09651049971580505, "learning_rate": 8.13663583888898e-05, "loss": 0.0069, "step": 15652 }, { "epoch": 3.0299922600619196, "grad_norm": 0.072331503033638, "learning_rate": 8.136413484599871e-05, "loss": 0.0071, "step": 15653 }, { "epoch": 3.030185758513932, "grad_norm": 0.08692698180675507, "learning_rate": 8.136191120509224e-05, "loss": 0.0063, "step": 15654 }, { "epoch": 3.0303792569659445, "grad_norm": 0.06850513815879822, "learning_rate": 8.135968746617869e-05, "loss": 0.0054, "step": 15655 }, { "epoch": 3.0305727554179565, "grad_norm": 0.07749993354082108, "learning_rate": 8.13574636292663e-05, "loss": 0.0066, "step": 15656 }, { "epoch": 3.030766253869969, "grad_norm": 0.040742821991443634, "learning_rate": 8.135523969436338e-05, "loss": 0.0053, "step": 15657 }, { "epoch": 3.0309597523219813, "grad_norm": 0.08823660016059875, "learning_rate": 8.135301566147817e-05, "loss": 0.007, "step": 15658 }, { "epoch": 3.031153250773994, "grad_norm": 0.028653915971517563, "learning_rate": 8.135079153061893e-05, "loss": 0.0062, "step": 15659 }, { "epoch": 3.031346749226006, "grad_norm": 0.10907109081745148, "learning_rate": 8.134856730179396e-05, "loss": 0.0067, "step": 15660 }, { "epoch": 3.0315402476780187, "grad_norm": 0.03570198640227318, "learning_rate": 8.134634297501149e-05, "loss": 0.0078, "step": 15661 }, { "epoch": 3.031733746130031, "grad_norm": 0.0916225016117096, "learning_rate": 8.134411855027981e-05, "loss": 0.0069, "step": 15662 }, { "epoch": 3.0319272445820435, "grad_norm": 0.04169857129454613, "learning_rate": 8.134189402760719e-05, "loss": 0.0067, "step": 15663 }, { "epoch": 3.0321207430340555, "grad_norm": 0.09502663463354111, "learning_rate": 8.13396694070019e-05, "loss": 0.0061, "step": 15664 }, { "epoch": 3.032314241486068, "grad_norm": 0.043235499411821365, "learning_rate": 8.133744468847221e-05, "loss": 0.0057, "step": 15665 }, { "epoch": 3.0325077399380804, "grad_norm": 0.10472416132688522, "learning_rate": 8.13352198720264e-05, "loss": 0.0072, "step": 15666 }, { "epoch": 3.032701238390093, "grad_norm": 0.07824569195508957, "learning_rate": 8.13329949576727e-05, "loss": 0.0061, "step": 15667 }, { "epoch": 3.0328947368421053, "grad_norm": 0.12261959165334702, "learning_rate": 8.133076994541946e-05, "loss": 0.0056, "step": 15668 }, { "epoch": 3.0330882352941178, "grad_norm": 0.04122211039066315, "learning_rate": 8.132854483527488e-05, "loss": 0.0073, "step": 15669 }, { "epoch": 3.03328173374613, "grad_norm": 0.1419772207736969, "learning_rate": 8.132631962724726e-05, "loss": 0.0077, "step": 15670 }, { "epoch": 3.0334752321981426, "grad_norm": 0.15812718868255615, "learning_rate": 8.132409432134485e-05, "loss": 0.0067, "step": 15671 }, { "epoch": 3.0336687306501546, "grad_norm": 0.1707974225282669, "learning_rate": 8.132186891757597e-05, "loss": 0.0077, "step": 15672 }, { "epoch": 3.033862229102167, "grad_norm": 0.19219738245010376, "learning_rate": 8.131964341594887e-05, "loss": 0.0079, "step": 15673 }, { "epoch": 3.0340557275541795, "grad_norm": 0.0459730289876461, "learning_rate": 8.13174178164718e-05, "loss": 0.0056, "step": 15674 }, { "epoch": 3.034249226006192, "grad_norm": 0.20044654607772827, "learning_rate": 8.131519211915307e-05, "loss": 0.0087, "step": 15675 }, { "epoch": 3.0344427244582044, "grad_norm": 0.09011893719434738, "learning_rate": 8.131296632400094e-05, "loss": 0.0067, "step": 15676 }, { "epoch": 3.034636222910217, "grad_norm": 0.1367914080619812, "learning_rate": 8.131074043102368e-05, "loss": 0.0071, "step": 15677 }, { "epoch": 3.0348297213622293, "grad_norm": 0.13401001691818237, "learning_rate": 8.130851444022957e-05, "loss": 0.0073, "step": 15678 }, { "epoch": 3.0350232198142413, "grad_norm": 0.11950521171092987, "learning_rate": 8.130628835162687e-05, "loss": 0.0067, "step": 15679 }, { "epoch": 3.0352167182662537, "grad_norm": 0.14717455208301544, "learning_rate": 8.13040621652239e-05, "loss": 0.0076, "step": 15680 }, { "epoch": 3.035410216718266, "grad_norm": 0.0823729932308197, "learning_rate": 8.13018358810289e-05, "loss": 0.0064, "step": 15681 }, { "epoch": 3.0356037151702786, "grad_norm": 0.17961058020591736, "learning_rate": 8.129960949905016e-05, "loss": 0.0067, "step": 15682 }, { "epoch": 3.035797213622291, "grad_norm": 0.05255435034632683, "learning_rate": 8.129738301929593e-05, "loss": 0.0055, "step": 15683 }, { "epoch": 3.0359907120743035, "grad_norm": 0.18169748783111572, "learning_rate": 8.129515644177455e-05, "loss": 0.0073, "step": 15684 }, { "epoch": 3.036184210526316, "grad_norm": 0.10097884386777878, "learning_rate": 8.129292976649422e-05, "loss": 0.0074, "step": 15685 }, { "epoch": 3.0363777089783284, "grad_norm": 0.13272695243358612, "learning_rate": 8.129070299346326e-05, "loss": 0.0082, "step": 15686 }, { "epoch": 3.0365712074303404, "grad_norm": 0.1502784788608551, "learning_rate": 8.128847612268995e-05, "loss": 0.0056, "step": 15687 }, { "epoch": 3.036764705882353, "grad_norm": 0.09726372361183167, "learning_rate": 8.128624915418256e-05, "loss": 0.007, "step": 15688 }, { "epoch": 3.0369582043343653, "grad_norm": 0.19079087674617767, "learning_rate": 8.128402208794939e-05, "loss": 0.0066, "step": 15689 }, { "epoch": 3.0371517027863777, "grad_norm": 0.06611734628677368, "learning_rate": 8.12817949239987e-05, "loss": 0.0079, "step": 15690 }, { "epoch": 3.03734520123839, "grad_norm": 0.14201410114765167, "learning_rate": 8.127956766233876e-05, "loss": 0.0057, "step": 15691 }, { "epoch": 3.0375386996904026, "grad_norm": 0.08104671537876129, "learning_rate": 8.127734030297787e-05, "loss": 0.0061, "step": 15692 }, { "epoch": 3.037732198142415, "grad_norm": 0.10939732193946838, "learning_rate": 8.127511284592429e-05, "loss": 0.0076, "step": 15693 }, { "epoch": 3.0379256965944275, "grad_norm": 0.0737222358584404, "learning_rate": 8.127288529118632e-05, "loss": 0.0063, "step": 15694 }, { "epoch": 3.0381191950464395, "grad_norm": 0.05893601477146149, "learning_rate": 8.127065763877227e-05, "loss": 0.0073, "step": 15695 }, { "epoch": 3.038312693498452, "grad_norm": 0.06530003994703293, "learning_rate": 8.126842988869034e-05, "loss": 0.0081, "step": 15696 }, { "epoch": 3.0385061919504643, "grad_norm": 0.049864333122968674, "learning_rate": 8.126620204094889e-05, "loss": 0.0061, "step": 15697 }, { "epoch": 3.038699690402477, "grad_norm": 0.05921940132975578, "learning_rate": 8.126397409555615e-05, "loss": 0.0063, "step": 15698 }, { "epoch": 3.0388931888544892, "grad_norm": 0.031259629875421524, "learning_rate": 8.126174605252045e-05, "loss": 0.0059, "step": 15699 }, { "epoch": 3.0390866873065017, "grad_norm": 0.04645070806145668, "learning_rate": 8.125951791185003e-05, "loss": 0.0075, "step": 15700 }, { "epoch": 3.039280185758514, "grad_norm": 0.03680640831589699, "learning_rate": 8.125728967355321e-05, "loss": 0.0062, "step": 15701 }, { "epoch": 3.039473684210526, "grad_norm": 0.04097449779510498, "learning_rate": 8.125506133763824e-05, "loss": 0.0087, "step": 15702 }, { "epoch": 3.0396671826625385, "grad_norm": 0.08031507581472397, "learning_rate": 8.125283290411342e-05, "loss": 0.0059, "step": 15703 }, { "epoch": 3.039860681114551, "grad_norm": 0.07915200293064117, "learning_rate": 8.125060437298706e-05, "loss": 0.0081, "step": 15704 }, { "epoch": 3.0400541795665634, "grad_norm": 0.07617223262786865, "learning_rate": 8.12483757442674e-05, "loss": 0.0065, "step": 15705 }, { "epoch": 3.040247678018576, "grad_norm": 0.0680093914270401, "learning_rate": 8.124614701796276e-05, "loss": 0.006, "step": 15706 }, { "epoch": 3.0404411764705883, "grad_norm": 0.07090547680854797, "learning_rate": 8.12439181940814e-05, "loss": 0.006, "step": 15707 }, { "epoch": 3.0406346749226008, "grad_norm": 0.09298869967460632, "learning_rate": 8.124168927263162e-05, "loss": 0.0081, "step": 15708 }, { "epoch": 3.040828173374613, "grad_norm": 0.10482200980186462, "learning_rate": 8.123946025362171e-05, "loss": 0.0063, "step": 15709 }, { "epoch": 3.041021671826625, "grad_norm": 0.07094790786504745, "learning_rate": 8.123723113705995e-05, "loss": 0.007, "step": 15710 }, { "epoch": 3.0412151702786376, "grad_norm": 0.12422189861536026, "learning_rate": 8.123500192295464e-05, "loss": 0.0069, "step": 15711 }, { "epoch": 3.04140866873065, "grad_norm": 0.054023027420043945, "learning_rate": 8.123277261131405e-05, "loss": 0.0074, "step": 15712 }, { "epoch": 3.0416021671826625, "grad_norm": 0.12022092938423157, "learning_rate": 8.123054320214646e-05, "loss": 0.0064, "step": 15713 }, { "epoch": 3.041795665634675, "grad_norm": 0.07299325615167618, "learning_rate": 8.12283136954602e-05, "loss": 0.0068, "step": 15714 }, { "epoch": 3.0419891640866874, "grad_norm": 0.07962998002767563, "learning_rate": 8.122608409126352e-05, "loss": 0.007, "step": 15715 }, { "epoch": 3.0421826625387, "grad_norm": 0.055086489766836166, "learning_rate": 8.122385438956473e-05, "loss": 0.0068, "step": 15716 }, { "epoch": 3.0423761609907123, "grad_norm": 0.05650367960333824, "learning_rate": 8.12216245903721e-05, "loss": 0.0066, "step": 15717 }, { "epoch": 3.0425696594427243, "grad_norm": 0.04987764731049538, "learning_rate": 8.121939469369394e-05, "loss": 0.0073, "step": 15718 }, { "epoch": 3.0427631578947367, "grad_norm": 0.050918031483888626, "learning_rate": 8.121716469953852e-05, "loss": 0.0062, "step": 15719 }, { "epoch": 3.042956656346749, "grad_norm": 0.05117787793278694, "learning_rate": 8.121493460791414e-05, "loss": 0.0063, "step": 15720 }, { "epoch": 3.0431501547987616, "grad_norm": 0.08494937419891357, "learning_rate": 8.121270441882912e-05, "loss": 0.0077, "step": 15721 }, { "epoch": 3.043343653250774, "grad_norm": 0.0714186355471611, "learning_rate": 8.121047413229169e-05, "loss": 0.0072, "step": 15722 }, { "epoch": 3.0435371517027865, "grad_norm": 0.09090924263000488, "learning_rate": 8.120824374831019e-05, "loss": 0.0063, "step": 15723 }, { "epoch": 3.043730650154799, "grad_norm": 0.042878661304712296, "learning_rate": 8.120601326689291e-05, "loss": 0.0079, "step": 15724 }, { "epoch": 3.043924148606811, "grad_norm": 0.028360139578580856, "learning_rate": 8.120378268804811e-05, "loss": 0.0056, "step": 15725 }, { "epoch": 3.0441176470588234, "grad_norm": 0.04188363999128342, "learning_rate": 8.120155201178411e-05, "loss": 0.0065, "step": 15726 }, { "epoch": 3.044311145510836, "grad_norm": 0.05111601948738098, "learning_rate": 8.11993212381092e-05, "loss": 0.006, "step": 15727 }, { "epoch": 3.0445046439628483, "grad_norm": 0.040537770837545395, "learning_rate": 8.119709036703166e-05, "loss": 0.0066, "step": 15728 }, { "epoch": 3.0446981424148607, "grad_norm": 0.05705828592181206, "learning_rate": 8.11948593985598e-05, "loss": 0.0065, "step": 15729 }, { "epoch": 3.044891640866873, "grad_norm": 0.043722525238990784, "learning_rate": 8.11926283327019e-05, "loss": 0.0069, "step": 15730 }, { "epoch": 3.0450851393188856, "grad_norm": 0.03587530925869942, "learning_rate": 8.119039716946627e-05, "loss": 0.0069, "step": 15731 }, { "epoch": 3.045278637770898, "grad_norm": 0.028197258710861206, "learning_rate": 8.118816590886119e-05, "loss": 0.006, "step": 15732 }, { "epoch": 3.04547213622291, "grad_norm": 0.057707589119672775, "learning_rate": 8.118593455089497e-05, "loss": 0.0073, "step": 15733 }, { "epoch": 3.0456656346749225, "grad_norm": 0.05775090306997299, "learning_rate": 8.118370309557589e-05, "loss": 0.0072, "step": 15734 }, { "epoch": 3.045859133126935, "grad_norm": 0.03483075276017189, "learning_rate": 8.118147154291227e-05, "loss": 0.0061, "step": 15735 }, { "epoch": 3.0460526315789473, "grad_norm": 0.05441102758049965, "learning_rate": 8.117923989291238e-05, "loss": 0.0067, "step": 15736 }, { "epoch": 3.04624613003096, "grad_norm": 0.09467079490423203, "learning_rate": 8.117700814558452e-05, "loss": 0.0057, "step": 15737 }, { "epoch": 3.0464396284829722, "grad_norm": 0.03652586415410042, "learning_rate": 8.117477630093699e-05, "loss": 0.0061, "step": 15738 }, { "epoch": 3.0466331269349847, "grad_norm": 0.0952904000878334, "learning_rate": 8.11725443589781e-05, "loss": 0.0067, "step": 15739 }, { "epoch": 3.046826625386997, "grad_norm": 0.048881832510232925, "learning_rate": 8.117031231971616e-05, "loss": 0.0079, "step": 15740 }, { "epoch": 3.047020123839009, "grad_norm": 0.051414694637060165, "learning_rate": 8.116808018315941e-05, "loss": 0.0069, "step": 15741 }, { "epoch": 3.0472136222910216, "grad_norm": 0.0830475240945816, "learning_rate": 8.11658479493162e-05, "loss": 0.0069, "step": 15742 }, { "epoch": 3.047407120743034, "grad_norm": 0.04601482301950455, "learning_rate": 8.116361561819482e-05, "loss": 0.0062, "step": 15743 }, { "epoch": 3.0476006191950464, "grad_norm": 0.07193619757890701, "learning_rate": 8.116138318980357e-05, "loss": 0.0054, "step": 15744 }, { "epoch": 3.047794117647059, "grad_norm": 0.05191060155630112, "learning_rate": 8.115915066415072e-05, "loss": 0.0075, "step": 15745 }, { "epoch": 3.0479876160990713, "grad_norm": 0.06332030147314072, "learning_rate": 8.115691804124462e-05, "loss": 0.007, "step": 15746 }, { "epoch": 3.0481811145510838, "grad_norm": 0.05727110803127289, "learning_rate": 8.115468532109354e-05, "loss": 0.006, "step": 15747 }, { "epoch": 3.048374613003096, "grad_norm": 0.09008006751537323, "learning_rate": 8.115245250370578e-05, "loss": 0.0077, "step": 15748 }, { "epoch": 3.048568111455108, "grad_norm": 0.0576430968940258, "learning_rate": 8.115021958908962e-05, "loss": 0.0061, "step": 15749 }, { "epoch": 3.0487616099071206, "grad_norm": 0.08763919770717621, "learning_rate": 8.114798657725342e-05, "loss": 0.0066, "step": 15750 }, { "epoch": 3.048955108359133, "grad_norm": 0.06092378869652748, "learning_rate": 8.114575346820544e-05, "loss": 0.0074, "step": 15751 }, { "epoch": 3.0491486068111455, "grad_norm": 0.07354600727558136, "learning_rate": 8.1143520261954e-05, "loss": 0.0073, "step": 15752 }, { "epoch": 3.049342105263158, "grad_norm": 0.06974136829376221, "learning_rate": 8.114128695850737e-05, "loss": 0.0062, "step": 15753 }, { "epoch": 3.0495356037151704, "grad_norm": 0.07469306886196136, "learning_rate": 8.113905355787389e-05, "loss": 0.0069, "step": 15754 }, { "epoch": 3.049729102167183, "grad_norm": 0.041692327708005905, "learning_rate": 8.113682006006184e-05, "loss": 0.0073, "step": 15755 }, { "epoch": 3.049922600619195, "grad_norm": 0.09525983035564423, "learning_rate": 8.113458646507956e-05, "loss": 0.0057, "step": 15756 }, { "epoch": 3.0501160990712073, "grad_norm": 0.02685699053108692, "learning_rate": 8.11323527729353e-05, "loss": 0.0064, "step": 15757 }, { "epoch": 3.0503095975232197, "grad_norm": 0.09591828286647797, "learning_rate": 8.11301189836374e-05, "loss": 0.0075, "step": 15758 }, { "epoch": 3.050503095975232, "grad_norm": 0.044501643627882004, "learning_rate": 8.112788509719416e-05, "loss": 0.0061, "step": 15759 }, { "epoch": 3.0506965944272446, "grad_norm": 0.09141615033149719, "learning_rate": 8.112565111361388e-05, "loss": 0.0072, "step": 15760 }, { "epoch": 3.050890092879257, "grad_norm": 0.07982304692268372, "learning_rate": 8.112341703290487e-05, "loss": 0.0061, "step": 15761 }, { "epoch": 3.0510835913312695, "grad_norm": 0.08748704195022583, "learning_rate": 8.112118285507541e-05, "loss": 0.0069, "step": 15762 }, { "epoch": 3.051277089783282, "grad_norm": 0.07237230986356735, "learning_rate": 8.111894858013387e-05, "loss": 0.0077, "step": 15763 }, { "epoch": 3.051470588235294, "grad_norm": 0.06746838241815567, "learning_rate": 8.111671420808849e-05, "loss": 0.0064, "step": 15764 }, { "epoch": 3.0516640866873064, "grad_norm": 0.06644684821367264, "learning_rate": 8.111447973894759e-05, "loss": 0.0077, "step": 15765 }, { "epoch": 3.051857585139319, "grad_norm": 0.08442084491252899, "learning_rate": 8.111224517271952e-05, "loss": 0.0058, "step": 15766 }, { "epoch": 3.0520510835913313, "grad_norm": 0.06354110687971115, "learning_rate": 8.111001050941254e-05, "loss": 0.0072, "step": 15767 }, { "epoch": 3.0522445820433437, "grad_norm": 0.07947411388158798, "learning_rate": 8.110777574903496e-05, "loss": 0.0077, "step": 15768 }, { "epoch": 3.052438080495356, "grad_norm": 0.06349656730890274, "learning_rate": 8.110554089159511e-05, "loss": 0.0067, "step": 15769 }, { "epoch": 3.0526315789473686, "grad_norm": 0.0787181556224823, "learning_rate": 8.110330593710131e-05, "loss": 0.0059, "step": 15770 }, { "epoch": 3.0528250773993806, "grad_norm": 0.111675925552845, "learning_rate": 8.110107088556183e-05, "loss": 0.0068, "step": 15771 }, { "epoch": 3.053018575851393, "grad_norm": 0.08302686363458633, "learning_rate": 8.109883573698502e-05, "loss": 0.0071, "step": 15772 }, { "epoch": 3.0532120743034055, "grad_norm": 0.07529958337545395, "learning_rate": 8.109660049137915e-05, "loss": 0.0058, "step": 15773 }, { "epoch": 3.053405572755418, "grad_norm": 0.08896157890558243, "learning_rate": 8.109436514875256e-05, "loss": 0.0068, "step": 15774 }, { "epoch": 3.0535990712074303, "grad_norm": 0.07456190884113312, "learning_rate": 8.109212970911354e-05, "loss": 0.0064, "step": 15775 }, { "epoch": 3.053792569659443, "grad_norm": 0.06694822013378143, "learning_rate": 8.108989417247042e-05, "loss": 0.007, "step": 15776 }, { "epoch": 3.0539860681114552, "grad_norm": 0.11931256204843521, "learning_rate": 8.108765853883152e-05, "loss": 0.0063, "step": 15777 }, { "epoch": 3.0541795665634677, "grad_norm": 0.07760219275951385, "learning_rate": 8.10854228082051e-05, "loss": 0.0061, "step": 15778 }, { "epoch": 3.0543730650154797, "grad_norm": 0.09682498127222061, "learning_rate": 8.108318698059952e-05, "loss": 0.0069, "step": 15779 }, { "epoch": 3.054566563467492, "grad_norm": 0.10653680562973022, "learning_rate": 8.10809510560231e-05, "loss": 0.0069, "step": 15780 }, { "epoch": 3.0547600619195046, "grad_norm": 0.03777588531374931, "learning_rate": 8.107871503448408e-05, "loss": 0.006, "step": 15781 }, { "epoch": 3.054953560371517, "grad_norm": 0.12086395919322968, "learning_rate": 8.107647891599086e-05, "loss": 0.0079, "step": 15782 }, { "epoch": 3.0551470588235294, "grad_norm": 0.0376926064491272, "learning_rate": 8.107424270055172e-05, "loss": 0.0063, "step": 15783 }, { "epoch": 3.055340557275542, "grad_norm": 0.09517455101013184, "learning_rate": 8.107200638817495e-05, "loss": 0.0076, "step": 15784 }, { "epoch": 3.0555340557275543, "grad_norm": 0.07793329656124115, "learning_rate": 8.106976997886889e-05, "loss": 0.0077, "step": 15785 }, { "epoch": 3.0557275541795668, "grad_norm": 0.09305528551340103, "learning_rate": 8.106753347264186e-05, "loss": 0.0061, "step": 15786 }, { "epoch": 3.0559210526315788, "grad_norm": 0.04910259321331978, "learning_rate": 8.106529686950216e-05, "loss": 0.0065, "step": 15787 }, { "epoch": 3.056114551083591, "grad_norm": 0.0742914006114006, "learning_rate": 8.10630601694581e-05, "loss": 0.0078, "step": 15788 }, { "epoch": 3.0563080495356036, "grad_norm": 0.06235881522297859, "learning_rate": 8.1060823372518e-05, "loss": 0.0074, "step": 15789 }, { "epoch": 3.056501547987616, "grad_norm": 0.06373677402734756, "learning_rate": 8.10585864786902e-05, "loss": 0.0072, "step": 15790 }, { "epoch": 3.0566950464396285, "grad_norm": 0.09565097838640213, "learning_rate": 8.105634948798299e-05, "loss": 0.0073, "step": 15791 }, { "epoch": 3.056888544891641, "grad_norm": 0.060570258647203445, "learning_rate": 8.10541124004047e-05, "loss": 0.0063, "step": 15792 }, { "epoch": 3.0570820433436534, "grad_norm": 0.09290729463100433, "learning_rate": 8.105187521596362e-05, "loss": 0.0064, "step": 15793 }, { "epoch": 3.057275541795666, "grad_norm": 0.054867617785930634, "learning_rate": 8.104963793466809e-05, "loss": 0.0064, "step": 15794 }, { "epoch": 3.057469040247678, "grad_norm": 0.09910723567008972, "learning_rate": 8.104740055652644e-05, "loss": 0.0059, "step": 15795 }, { "epoch": 3.0576625386996903, "grad_norm": 0.04514811560511589, "learning_rate": 8.104516308154697e-05, "loss": 0.0065, "step": 15796 }, { "epoch": 3.0578560371517027, "grad_norm": 0.04447109252214432, "learning_rate": 8.1042925509738e-05, "loss": 0.0057, "step": 15797 }, { "epoch": 3.058049535603715, "grad_norm": 0.06876056641340256, "learning_rate": 8.104068784110784e-05, "loss": 0.0073, "step": 15798 }, { "epoch": 3.0582430340557276, "grad_norm": 0.05198029428720474, "learning_rate": 8.103845007566483e-05, "loss": 0.0075, "step": 15799 }, { "epoch": 3.05843653250774, "grad_norm": 0.08310813456773758, "learning_rate": 8.103621221341728e-05, "loss": 0.007, "step": 15800 }, { "epoch": 3.0586300309597525, "grad_norm": 0.057004231959581375, "learning_rate": 8.10339742543735e-05, "loss": 0.007, "step": 15801 }, { "epoch": 3.0588235294117645, "grad_norm": 0.09098401665687561, "learning_rate": 8.103173619854182e-05, "loss": 0.0075, "step": 15802 }, { "epoch": 3.059017027863777, "grad_norm": 0.09361635893583298, "learning_rate": 8.102949804593056e-05, "loss": 0.0061, "step": 15803 }, { "epoch": 3.0592105263157894, "grad_norm": 0.09322871267795563, "learning_rate": 8.102725979654804e-05, "loss": 0.0059, "step": 15804 }, { "epoch": 3.059404024767802, "grad_norm": 0.05779131501913071, "learning_rate": 8.102502145040259e-05, "loss": 0.0065, "step": 15805 }, { "epoch": 3.0595975232198143, "grad_norm": 0.10239209979772568, "learning_rate": 8.102278300750252e-05, "loss": 0.0059, "step": 15806 }, { "epoch": 3.0597910216718267, "grad_norm": 0.026998255401849747, "learning_rate": 8.102054446785614e-05, "loss": 0.0064, "step": 15807 }, { "epoch": 3.059984520123839, "grad_norm": 0.10096785426139832, "learning_rate": 8.10183058314718e-05, "loss": 0.0076, "step": 15808 }, { "epoch": 3.0601780185758516, "grad_norm": 0.08014532923698425, "learning_rate": 8.10160670983578e-05, "loss": 0.0062, "step": 15809 }, { "epoch": 3.0603715170278636, "grad_norm": 0.07363829016685486, "learning_rate": 8.101382826852249e-05, "loss": 0.0067, "step": 15810 }, { "epoch": 3.060565015479876, "grad_norm": 0.09477303922176361, "learning_rate": 8.101158934197418e-05, "loss": 0.006, "step": 15811 }, { "epoch": 3.0607585139318885, "grad_norm": 0.06982675939798355, "learning_rate": 8.100935031872117e-05, "loss": 0.0065, "step": 15812 }, { "epoch": 3.060952012383901, "grad_norm": 0.06265334039926529, "learning_rate": 8.100711119877181e-05, "loss": 0.0063, "step": 15813 }, { "epoch": 3.0611455108359134, "grad_norm": 0.09263240545988083, "learning_rate": 8.10048719821344e-05, "loss": 0.0076, "step": 15814 }, { "epoch": 3.061339009287926, "grad_norm": 0.048125628381967545, "learning_rate": 8.100263266881733e-05, "loss": 0.0079, "step": 15815 }, { "epoch": 3.0615325077399382, "grad_norm": 0.07130351662635803, "learning_rate": 8.100039325882884e-05, "loss": 0.0071, "step": 15816 }, { "epoch": 3.0617260061919507, "grad_norm": 0.060563087463378906, "learning_rate": 8.09981537521773e-05, "loss": 0.0064, "step": 15817 }, { "epoch": 3.0619195046439627, "grad_norm": 0.059630122035741806, "learning_rate": 8.099591414887104e-05, "loss": 0.0059, "step": 15818 }, { "epoch": 3.062113003095975, "grad_norm": 0.057924944907426834, "learning_rate": 8.099367444891837e-05, "loss": 0.0075, "step": 15819 }, { "epoch": 3.0623065015479876, "grad_norm": 0.07804090529680252, "learning_rate": 8.099143465232765e-05, "loss": 0.0057, "step": 15820 }, { "epoch": 3.0625, "grad_norm": 0.04253308102488518, "learning_rate": 8.098919475910715e-05, "loss": 0.0059, "step": 15821 }, { "epoch": 3.0626934984520124, "grad_norm": 0.06715739518404007, "learning_rate": 8.098695476926524e-05, "loss": 0.0077, "step": 15822 }, { "epoch": 3.062886996904025, "grad_norm": 0.0641247034072876, "learning_rate": 8.098471468281023e-05, "loss": 0.0061, "step": 15823 }, { "epoch": 3.0630804953560373, "grad_norm": 0.05172388628125191, "learning_rate": 8.098247449975045e-05, "loss": 0.007, "step": 15824 }, { "epoch": 3.0632739938080493, "grad_norm": 0.07080136984586716, "learning_rate": 8.098023422009425e-05, "loss": 0.006, "step": 15825 }, { "epoch": 3.0634674922600618, "grad_norm": 0.03644381836056709, "learning_rate": 8.097799384384992e-05, "loss": 0.0052, "step": 15826 }, { "epoch": 3.063660990712074, "grad_norm": 0.06300678849220276, "learning_rate": 8.09757533710258e-05, "loss": 0.0057, "step": 15827 }, { "epoch": 3.0638544891640866, "grad_norm": 0.0291669312864542, "learning_rate": 8.097351280163026e-05, "loss": 0.005, "step": 15828 }, { "epoch": 3.064047987616099, "grad_norm": 0.10866013914346695, "learning_rate": 8.097127213567159e-05, "loss": 0.0053, "step": 15829 }, { "epoch": 3.0642414860681115, "grad_norm": 0.052712421864271164, "learning_rate": 8.096903137315813e-05, "loss": 0.0073, "step": 15830 }, { "epoch": 3.064434984520124, "grad_norm": 0.07274789363145828, "learning_rate": 8.096679051409822e-05, "loss": 0.0069, "step": 15831 }, { "epoch": 3.0646284829721364, "grad_norm": 0.07440201193094254, "learning_rate": 8.096454955850015e-05, "loss": 0.0084, "step": 15832 }, { "epoch": 3.0648219814241484, "grad_norm": 0.06925363838672638, "learning_rate": 8.09623085063723e-05, "loss": 0.0065, "step": 15833 }, { "epoch": 3.065015479876161, "grad_norm": 0.062217798084020615, "learning_rate": 8.096006735772301e-05, "loss": 0.0055, "step": 15834 }, { "epoch": 3.0652089783281733, "grad_norm": 0.0604785792529583, "learning_rate": 8.095782611256055e-05, "loss": 0.0064, "step": 15835 }, { "epoch": 3.0654024767801857, "grad_norm": 0.09766857326030731, "learning_rate": 8.09555847708933e-05, "loss": 0.0074, "step": 15836 }, { "epoch": 3.065595975232198, "grad_norm": 0.07583147287368774, "learning_rate": 8.095334333272958e-05, "loss": 0.0063, "step": 15837 }, { "epoch": 3.0657894736842106, "grad_norm": 0.10137496888637543, "learning_rate": 8.095110179807772e-05, "loss": 0.0046, "step": 15838 }, { "epoch": 3.065982972136223, "grad_norm": 0.07527279853820801, "learning_rate": 8.094886016694608e-05, "loss": 0.0063, "step": 15839 }, { "epoch": 3.0661764705882355, "grad_norm": 0.08931799232959747, "learning_rate": 8.094661843934294e-05, "loss": 0.0076, "step": 15840 }, { "epoch": 3.0663699690402475, "grad_norm": 0.07879337668418884, "learning_rate": 8.094437661527669e-05, "loss": 0.007, "step": 15841 }, { "epoch": 3.06656346749226, "grad_norm": 0.08847976475954056, "learning_rate": 8.094213469475565e-05, "loss": 0.0051, "step": 15842 }, { "epoch": 3.0667569659442724, "grad_norm": 0.08476878702640533, "learning_rate": 8.093989267778811e-05, "loss": 0.0057, "step": 15843 }, { "epoch": 3.066950464396285, "grad_norm": 0.05494067445397377, "learning_rate": 8.093765056438246e-05, "loss": 0.0059, "step": 15844 }, { "epoch": 3.0671439628482973, "grad_norm": 0.09870932996273041, "learning_rate": 8.093540835454703e-05, "loss": 0.0054, "step": 15845 }, { "epoch": 3.0673374613003097, "grad_norm": 0.03822731971740723, "learning_rate": 8.093316604829011e-05, "loss": 0.0063, "step": 15846 }, { "epoch": 3.067530959752322, "grad_norm": 0.04359329119324684, "learning_rate": 8.09309236456201e-05, "loss": 0.0065, "step": 15847 }, { "epoch": 3.067724458204334, "grad_norm": 0.0456986278295517, "learning_rate": 8.092868114654528e-05, "loss": 0.0068, "step": 15848 }, { "epoch": 3.0679179566563466, "grad_norm": 0.0460599809885025, "learning_rate": 8.092643855107403e-05, "loss": 0.0065, "step": 15849 }, { "epoch": 3.068111455108359, "grad_norm": 0.031248662620782852, "learning_rate": 8.092419585921466e-05, "loss": 0.0067, "step": 15850 }, { "epoch": 3.0683049535603715, "grad_norm": 0.07604284584522247, "learning_rate": 8.092195307097553e-05, "loss": 0.0073, "step": 15851 }, { "epoch": 3.068498452012384, "grad_norm": 0.02651527151465416, "learning_rate": 8.091971018636496e-05, "loss": 0.0062, "step": 15852 }, { "epoch": 3.0686919504643964, "grad_norm": 0.06378672271966934, "learning_rate": 8.091746720539129e-05, "loss": 0.0062, "step": 15853 }, { "epoch": 3.068885448916409, "grad_norm": 0.029711291193962097, "learning_rate": 8.091522412806286e-05, "loss": 0.0061, "step": 15854 }, { "epoch": 3.0690789473684212, "grad_norm": 0.04635312035679817, "learning_rate": 8.091298095438802e-05, "loss": 0.007, "step": 15855 }, { "epoch": 3.0692724458204332, "grad_norm": 0.04103204980492592, "learning_rate": 8.091073768437509e-05, "loss": 0.0073, "step": 15856 }, { "epoch": 3.0694659442724457, "grad_norm": 0.03813593089580536, "learning_rate": 8.090849431803243e-05, "loss": 0.0062, "step": 15857 }, { "epoch": 3.069659442724458, "grad_norm": 0.04164796322584152, "learning_rate": 8.09062508553684e-05, "loss": 0.0061, "step": 15858 }, { "epoch": 3.0698529411764706, "grad_norm": 0.03303803876042366, "learning_rate": 8.090400729639127e-05, "loss": 0.0064, "step": 15859 }, { "epoch": 3.070046439628483, "grad_norm": 0.029951535165309906, "learning_rate": 8.090176364110944e-05, "loss": 0.0069, "step": 15860 }, { "epoch": 3.0702399380804954, "grad_norm": 0.02719344198703766, "learning_rate": 8.089951988953125e-05, "loss": 0.0058, "step": 15861 }, { "epoch": 3.070433436532508, "grad_norm": 0.02767738327383995, "learning_rate": 8.0897276041665e-05, "loss": 0.0053, "step": 15862 }, { "epoch": 3.0706269349845203, "grad_norm": 0.046608466655015945, "learning_rate": 8.089503209751906e-05, "loss": 0.0068, "step": 15863 }, { "epoch": 3.0708204334365323, "grad_norm": 0.054656852036714554, "learning_rate": 8.08927880571018e-05, "loss": 0.0066, "step": 15864 }, { "epoch": 3.0710139318885448, "grad_norm": 0.050466954708099365, "learning_rate": 8.089054392042151e-05, "loss": 0.0078, "step": 15865 }, { "epoch": 3.071207430340557, "grad_norm": 0.05117667838931084, "learning_rate": 8.088829968748657e-05, "loss": 0.0068, "step": 15866 }, { "epoch": 3.0714009287925697, "grad_norm": 0.05093974992632866, "learning_rate": 8.08860553583053e-05, "loss": 0.0071, "step": 15867 }, { "epoch": 3.071594427244582, "grad_norm": 0.04149652272462845, "learning_rate": 8.088381093288605e-05, "loss": 0.0068, "step": 15868 }, { "epoch": 3.0717879256965945, "grad_norm": 0.06253618746995926, "learning_rate": 8.088156641123718e-05, "loss": 0.0051, "step": 15869 }, { "epoch": 3.071981424148607, "grad_norm": 0.031458690762519836, "learning_rate": 8.087932179336704e-05, "loss": 0.0068, "step": 15870 }, { "epoch": 3.0721749226006194, "grad_norm": 0.05607972666621208, "learning_rate": 8.087707707928392e-05, "loss": 0.0068, "step": 15871 }, { "epoch": 3.0723684210526314, "grad_norm": 0.052257806062698364, "learning_rate": 8.087483226899623e-05, "loss": 0.0076, "step": 15872 }, { "epoch": 3.072561919504644, "grad_norm": 0.05486224591732025, "learning_rate": 8.087258736251227e-05, "loss": 0.0049, "step": 15873 }, { "epoch": 3.0727554179566563, "grad_norm": 0.04972708597779274, "learning_rate": 8.087034235984043e-05, "loss": 0.0073, "step": 15874 }, { "epoch": 3.0729489164086687, "grad_norm": 0.03233577683568001, "learning_rate": 8.086809726098903e-05, "loss": 0.006, "step": 15875 }, { "epoch": 3.073142414860681, "grad_norm": 0.06723298132419586, "learning_rate": 8.08658520659664e-05, "loss": 0.0059, "step": 15876 }, { "epoch": 3.0733359133126936, "grad_norm": 0.03746641054749489, "learning_rate": 8.086360677478091e-05, "loss": 0.0059, "step": 15877 }, { "epoch": 3.073529411764706, "grad_norm": 0.046024616807699203, "learning_rate": 8.086136138744091e-05, "loss": 0.0064, "step": 15878 }, { "epoch": 3.073722910216718, "grad_norm": 0.046327896416187286, "learning_rate": 8.085911590395473e-05, "loss": 0.005, "step": 15879 }, { "epoch": 3.0739164086687305, "grad_norm": 0.031423021107912064, "learning_rate": 8.085687032433074e-05, "loss": 0.0074, "step": 15880 }, { "epoch": 3.074109907120743, "grad_norm": 0.047644052654504776, "learning_rate": 8.085462464857728e-05, "loss": 0.0061, "step": 15881 }, { "epoch": 3.0743034055727554, "grad_norm": 0.05350858345627785, "learning_rate": 8.08523788767027e-05, "loss": 0.0071, "step": 15882 }, { "epoch": 3.074496904024768, "grad_norm": 0.07675282657146454, "learning_rate": 8.085013300871533e-05, "loss": 0.0066, "step": 15883 }, { "epoch": 3.0746904024767803, "grad_norm": 0.04222710430622101, "learning_rate": 8.084788704462354e-05, "loss": 0.0064, "step": 15884 }, { "epoch": 3.0748839009287927, "grad_norm": 0.06134389713406563, "learning_rate": 8.084564098443568e-05, "loss": 0.0062, "step": 15885 }, { "epoch": 3.075077399380805, "grad_norm": 0.05723334476351738, "learning_rate": 8.084339482816009e-05, "loss": 0.0058, "step": 15886 }, { "epoch": 3.075270897832817, "grad_norm": 0.04268386960029602, "learning_rate": 8.084114857580512e-05, "loss": 0.0061, "step": 15887 }, { "epoch": 3.0754643962848296, "grad_norm": 0.08361394703388214, "learning_rate": 8.083890222737913e-05, "loss": 0.0056, "step": 15888 }, { "epoch": 3.075657894736842, "grad_norm": 0.09539971500635147, "learning_rate": 8.08366557828905e-05, "loss": 0.0068, "step": 15889 }, { "epoch": 3.0758513931888545, "grad_norm": 0.08309377729892731, "learning_rate": 8.083440924234751e-05, "loss": 0.0067, "step": 15890 }, { "epoch": 3.076044891640867, "grad_norm": 0.09534382820129395, "learning_rate": 8.083216260575856e-05, "loss": 0.0079, "step": 15891 }, { "epoch": 3.0762383900928794, "grad_norm": 0.0401584692299366, "learning_rate": 8.0829915873132e-05, "loss": 0.0067, "step": 15892 }, { "epoch": 3.076431888544892, "grad_norm": 0.11104269325733185, "learning_rate": 8.082766904447617e-05, "loss": 0.0063, "step": 15893 }, { "epoch": 3.076625386996904, "grad_norm": 0.10016793012619019, "learning_rate": 8.082542211979944e-05, "loss": 0.0067, "step": 15894 }, { "epoch": 3.0768188854489162, "grad_norm": 0.13560736179351807, "learning_rate": 8.082317509911016e-05, "loss": 0.006, "step": 15895 }, { "epoch": 3.0770123839009287, "grad_norm": 0.1312933713197708, "learning_rate": 8.082092798241666e-05, "loss": 0.0065, "step": 15896 }, { "epoch": 3.077205882352941, "grad_norm": 0.11079990118741989, "learning_rate": 8.081868076972733e-05, "loss": 0.0074, "step": 15897 }, { "epoch": 3.0773993808049536, "grad_norm": 0.17102304100990295, "learning_rate": 8.08164334610505e-05, "loss": 0.0081, "step": 15898 }, { "epoch": 3.077592879256966, "grad_norm": 0.06134480983018875, "learning_rate": 8.081418605639452e-05, "loss": 0.0061, "step": 15899 }, { "epoch": 3.0777863777089784, "grad_norm": 0.14679282903671265, "learning_rate": 8.081193855576777e-05, "loss": 0.0054, "step": 15900 }, { "epoch": 3.077979876160991, "grad_norm": 0.10072571784257889, "learning_rate": 8.080969095917859e-05, "loss": 0.0068, "step": 15901 }, { "epoch": 3.078173374613003, "grad_norm": 0.09670276939868927, "learning_rate": 8.080744326663534e-05, "loss": 0.0055, "step": 15902 }, { "epoch": 3.0783668730650153, "grad_norm": 0.1574425995349884, "learning_rate": 8.080519547814637e-05, "loss": 0.007, "step": 15903 }, { "epoch": 3.0785603715170278, "grad_norm": 0.06481355428695679, "learning_rate": 8.080294759372006e-05, "loss": 0.0067, "step": 15904 }, { "epoch": 3.07875386996904, "grad_norm": 0.15952657163143158, "learning_rate": 8.080069961336474e-05, "loss": 0.0052, "step": 15905 }, { "epoch": 3.0789473684210527, "grad_norm": 0.0722411647439003, "learning_rate": 8.079845153708877e-05, "loss": 0.0076, "step": 15906 }, { "epoch": 3.079140866873065, "grad_norm": 0.15500003099441528, "learning_rate": 8.079620336490051e-05, "loss": 0.0072, "step": 15907 }, { "epoch": 3.0793343653250775, "grad_norm": 0.12322192639112473, "learning_rate": 8.079395509680833e-05, "loss": 0.006, "step": 15908 }, { "epoch": 3.07952786377709, "grad_norm": 0.08318328857421875, "learning_rate": 8.079170673282057e-05, "loss": 0.0076, "step": 15909 }, { "epoch": 3.079721362229102, "grad_norm": 0.18899603188037872, "learning_rate": 8.078945827294561e-05, "loss": 0.0056, "step": 15910 }, { "epoch": 3.0799148606811144, "grad_norm": 0.052980560809373856, "learning_rate": 8.07872097171918e-05, "loss": 0.008, "step": 15911 }, { "epoch": 3.080108359133127, "grad_norm": 0.14688940346240997, "learning_rate": 8.078496106556749e-05, "loss": 0.0071, "step": 15912 }, { "epoch": 3.0803018575851393, "grad_norm": 0.06959889829158783, "learning_rate": 8.078271231808105e-05, "loss": 0.0077, "step": 15913 }, { "epoch": 3.0804953560371517, "grad_norm": 0.10878085345029831, "learning_rate": 8.078046347474084e-05, "loss": 0.0064, "step": 15914 }, { "epoch": 3.080688854489164, "grad_norm": 0.11380622535943985, "learning_rate": 8.077821453555523e-05, "loss": 0.0071, "step": 15915 }, { "epoch": 3.0808823529411766, "grad_norm": 0.06700744479894638, "learning_rate": 8.077596550053256e-05, "loss": 0.0064, "step": 15916 }, { "epoch": 3.081075851393189, "grad_norm": 0.1190263032913208, "learning_rate": 8.07737163696812e-05, "loss": 0.0061, "step": 15917 }, { "epoch": 3.081269349845201, "grad_norm": 0.044660937041044235, "learning_rate": 8.077146714300951e-05, "loss": 0.0073, "step": 15918 }, { "epoch": 3.0814628482972135, "grad_norm": 0.11329913139343262, "learning_rate": 8.076921782052588e-05, "loss": 0.0068, "step": 15919 }, { "epoch": 3.081656346749226, "grad_norm": 0.07202169299125671, "learning_rate": 8.076696840223861e-05, "loss": 0.0069, "step": 15920 }, { "epoch": 3.0818498452012384, "grad_norm": 0.09081745147705078, "learning_rate": 8.076471888815613e-05, "loss": 0.0089, "step": 15921 }, { "epoch": 3.082043343653251, "grad_norm": 0.0630737692117691, "learning_rate": 8.076246927828676e-05, "loss": 0.0066, "step": 15922 }, { "epoch": 3.0822368421052633, "grad_norm": 0.06603676825761795, "learning_rate": 8.076021957263887e-05, "loss": 0.007, "step": 15923 }, { "epoch": 3.0824303405572757, "grad_norm": 0.043924953788518906, "learning_rate": 8.075796977122085e-05, "loss": 0.0063, "step": 15924 }, { "epoch": 3.0826238390092877, "grad_norm": 0.04600190743803978, "learning_rate": 8.075571987404104e-05, "loss": 0.0063, "step": 15925 }, { "epoch": 3.0828173374613, "grad_norm": 0.04529747739434242, "learning_rate": 8.075346988110781e-05, "loss": 0.0065, "step": 15926 }, { "epoch": 3.0830108359133126, "grad_norm": 0.08536691963672638, "learning_rate": 8.075121979242951e-05, "loss": 0.0062, "step": 15927 }, { "epoch": 3.083204334365325, "grad_norm": 0.027013037353754044, "learning_rate": 8.074896960801454e-05, "loss": 0.0057, "step": 15928 }, { "epoch": 3.0833978328173375, "grad_norm": 0.07758485525846481, "learning_rate": 8.074671932787125e-05, "loss": 0.0065, "step": 15929 }, { "epoch": 3.08359133126935, "grad_norm": 0.05022582784295082, "learning_rate": 8.074446895200797e-05, "loss": 0.0064, "step": 15930 }, { "epoch": 3.0837848297213624, "grad_norm": 0.0716233104467392, "learning_rate": 8.074221848043313e-05, "loss": 0.0078, "step": 15931 }, { "epoch": 3.083978328173375, "grad_norm": 0.07284626364707947, "learning_rate": 8.073996791315505e-05, "loss": 0.0061, "step": 15932 }, { "epoch": 3.084171826625387, "grad_norm": 0.05322440713644028, "learning_rate": 8.073771725018211e-05, "loss": 0.0051, "step": 15933 }, { "epoch": 3.0843653250773992, "grad_norm": 0.07864896208047867, "learning_rate": 8.073546649152268e-05, "loss": 0.0072, "step": 15934 }, { "epoch": 3.0845588235294117, "grad_norm": 0.07682536542415619, "learning_rate": 8.073321563718514e-05, "loss": 0.0065, "step": 15935 }, { "epoch": 3.084752321981424, "grad_norm": 0.0614294670522213, "learning_rate": 8.073096468717785e-05, "loss": 0.0078, "step": 15936 }, { "epoch": 3.0849458204334366, "grad_norm": 0.08149351924657822, "learning_rate": 8.072871364150914e-05, "loss": 0.007, "step": 15937 }, { "epoch": 3.085139318885449, "grad_norm": 0.04111921787261963, "learning_rate": 8.072646250018744e-05, "loss": 0.0077, "step": 15938 }, { "epoch": 3.0853328173374615, "grad_norm": 0.09359050542116165, "learning_rate": 8.072421126322108e-05, "loss": 0.006, "step": 15939 }, { "epoch": 3.085526315789474, "grad_norm": 0.0723487138748169, "learning_rate": 8.072195993061843e-05, "loss": 0.0071, "step": 15940 }, { "epoch": 3.085719814241486, "grad_norm": 0.07961513847112656, "learning_rate": 8.071970850238789e-05, "loss": 0.0062, "step": 15941 }, { "epoch": 3.0859133126934983, "grad_norm": 0.04436001181602478, "learning_rate": 8.071745697853781e-05, "loss": 0.0071, "step": 15942 }, { "epoch": 3.0861068111455108, "grad_norm": 0.07186964154243469, "learning_rate": 8.071520535907655e-05, "loss": 0.0071, "step": 15943 }, { "epoch": 3.086300309597523, "grad_norm": 0.036780454218387604, "learning_rate": 8.071295364401251e-05, "loss": 0.0062, "step": 15944 }, { "epoch": 3.0864938080495357, "grad_norm": 0.09690513461828232, "learning_rate": 8.071070183335402e-05, "loss": 0.0053, "step": 15945 }, { "epoch": 3.086687306501548, "grad_norm": 0.04805006831884384, "learning_rate": 8.070844992710951e-05, "loss": 0.007, "step": 15946 }, { "epoch": 3.0868808049535605, "grad_norm": 0.08835820108652115, "learning_rate": 8.07061979252873e-05, "loss": 0.0066, "step": 15947 }, { "epoch": 3.087074303405573, "grad_norm": 0.08566027134656906, "learning_rate": 8.070394582789577e-05, "loss": 0.0074, "step": 15948 }, { "epoch": 3.087267801857585, "grad_norm": 0.05083800479769707, "learning_rate": 8.07016936349433e-05, "loss": 0.0063, "step": 15949 }, { "epoch": 3.0874613003095974, "grad_norm": 0.09193243831396103, "learning_rate": 8.06994413464383e-05, "loss": 0.0067, "step": 15950 }, { "epoch": 3.08765479876161, "grad_norm": 0.028736570850014687, "learning_rate": 8.069718896238907e-05, "loss": 0.0065, "step": 15951 }, { "epoch": 3.0878482972136223, "grad_norm": 0.051471855491399765, "learning_rate": 8.069493648280406e-05, "loss": 0.0059, "step": 15952 }, { "epoch": 3.0880417956656347, "grad_norm": 0.06518740206956863, "learning_rate": 8.069268390769159e-05, "loss": 0.0063, "step": 15953 }, { "epoch": 3.088235294117647, "grad_norm": 0.024608580395579338, "learning_rate": 8.069043123706003e-05, "loss": 0.0055, "step": 15954 }, { "epoch": 3.0884287925696596, "grad_norm": 0.07197488099336624, "learning_rate": 8.068817847091781e-05, "loss": 0.0069, "step": 15955 }, { "epoch": 3.0886222910216716, "grad_norm": 0.04456227272748947, "learning_rate": 8.068592560927326e-05, "loss": 0.0062, "step": 15956 }, { "epoch": 3.088815789473684, "grad_norm": 0.07164701819419861, "learning_rate": 8.068367265213477e-05, "loss": 0.008, "step": 15957 }, { "epoch": 3.0890092879256965, "grad_norm": 0.07392638176679611, "learning_rate": 8.068141959951072e-05, "loss": 0.0058, "step": 15958 }, { "epoch": 3.089202786377709, "grad_norm": 0.022068457677960396, "learning_rate": 8.067916645140946e-05, "loss": 0.0052, "step": 15959 }, { "epoch": 3.0893962848297214, "grad_norm": 0.10835855454206467, "learning_rate": 8.06769132078394e-05, "loss": 0.0052, "step": 15960 }, { "epoch": 3.089589783281734, "grad_norm": 0.036578401923179626, "learning_rate": 8.067465986880891e-05, "loss": 0.0065, "step": 15961 }, { "epoch": 3.0897832817337463, "grad_norm": 0.06736665964126587, "learning_rate": 8.067240643432634e-05, "loss": 0.0062, "step": 15962 }, { "epoch": 3.0899767801857587, "grad_norm": 0.0508112758398056, "learning_rate": 8.06701529044001e-05, "loss": 0.0074, "step": 15963 }, { "epoch": 3.0901702786377707, "grad_norm": 0.07126893103122711, "learning_rate": 8.066789927903856e-05, "loss": 0.0062, "step": 15964 }, { "epoch": 3.090363777089783, "grad_norm": 0.04980367794632912, "learning_rate": 8.066564555825009e-05, "loss": 0.0055, "step": 15965 }, { "epoch": 3.0905572755417956, "grad_norm": 0.03070603311061859, "learning_rate": 8.066339174204308e-05, "loss": 0.0058, "step": 15966 }, { "epoch": 3.090750773993808, "grad_norm": 0.08355394005775452, "learning_rate": 8.06611378304259e-05, "loss": 0.0076, "step": 15967 }, { "epoch": 3.0909442724458205, "grad_norm": 0.04710666835308075, "learning_rate": 8.065888382340694e-05, "loss": 0.0058, "step": 15968 }, { "epoch": 3.091137770897833, "grad_norm": 0.0600069984793663, "learning_rate": 8.065662972099457e-05, "loss": 0.0068, "step": 15969 }, { "epoch": 3.0913312693498454, "grad_norm": 0.05755188688635826, "learning_rate": 8.065437552319715e-05, "loss": 0.0074, "step": 15970 }, { "epoch": 3.0915247678018574, "grad_norm": 0.04567650705575943, "learning_rate": 8.065212123002312e-05, "loss": 0.006, "step": 15971 }, { "epoch": 3.09171826625387, "grad_norm": 0.07826796174049377, "learning_rate": 8.06498668414808e-05, "loss": 0.0082, "step": 15972 }, { "epoch": 3.0919117647058822, "grad_norm": 0.04686490818858147, "learning_rate": 8.06476123575786e-05, "loss": 0.006, "step": 15973 }, { "epoch": 3.0921052631578947, "grad_norm": 0.07318686693906784, "learning_rate": 8.06453577783249e-05, "loss": 0.0068, "step": 15974 }, { "epoch": 3.092298761609907, "grad_norm": 0.047992970794439316, "learning_rate": 8.064310310372806e-05, "loss": 0.0075, "step": 15975 }, { "epoch": 3.0924922600619196, "grad_norm": 0.07721929252147675, "learning_rate": 8.064084833379652e-05, "loss": 0.0056, "step": 15976 }, { "epoch": 3.092685758513932, "grad_norm": 0.04569276422262192, "learning_rate": 8.06385934685386e-05, "loss": 0.0062, "step": 15977 }, { "epoch": 3.0928792569659445, "grad_norm": 0.09749356657266617, "learning_rate": 8.06363385079627e-05, "loss": 0.0068, "step": 15978 }, { "epoch": 3.0930727554179565, "grad_norm": 0.06475051492452621, "learning_rate": 8.063408345207723e-05, "loss": 0.0066, "step": 15979 }, { "epoch": 3.093266253869969, "grad_norm": 0.061528682708740234, "learning_rate": 8.063182830089053e-05, "loss": 0.0065, "step": 15980 }, { "epoch": 3.0934597523219813, "grad_norm": 0.08943607658147812, "learning_rate": 8.062957305441103e-05, "loss": 0.0082, "step": 15981 }, { "epoch": 3.093653250773994, "grad_norm": 0.03477975353598595, "learning_rate": 8.062731771264708e-05, "loss": 0.0076, "step": 15982 }, { "epoch": 3.093846749226006, "grad_norm": 0.08357558399438858, "learning_rate": 8.062506227560708e-05, "loss": 0.0063, "step": 15983 }, { "epoch": 3.0940402476780187, "grad_norm": 0.05966409668326378, "learning_rate": 8.062280674329942e-05, "loss": 0.0071, "step": 15984 }, { "epoch": 3.094233746130031, "grad_norm": 0.07943699508905411, "learning_rate": 8.062055111573247e-05, "loss": 0.0057, "step": 15985 }, { "epoch": 3.0944272445820435, "grad_norm": 0.08875546604394913, "learning_rate": 8.061829539291463e-05, "loss": 0.0051, "step": 15986 }, { "epoch": 3.0946207430340555, "grad_norm": 0.05437939614057541, "learning_rate": 8.061603957485427e-05, "loss": 0.0048, "step": 15987 }, { "epoch": 3.094814241486068, "grad_norm": 0.1138281375169754, "learning_rate": 8.06137836615598e-05, "loss": 0.0063, "step": 15988 }, { "epoch": 3.0950077399380804, "grad_norm": 0.02494533360004425, "learning_rate": 8.061152765303959e-05, "loss": 0.0061, "step": 15989 }, { "epoch": 3.095201238390093, "grad_norm": 0.1065574511885643, "learning_rate": 8.060927154930203e-05, "loss": 0.0062, "step": 15990 }, { "epoch": 3.0953947368421053, "grad_norm": 0.07970479130744934, "learning_rate": 8.060701535035549e-05, "loss": 0.0067, "step": 15991 }, { "epoch": 3.0955882352941178, "grad_norm": 0.05378276854753494, "learning_rate": 8.060475905620841e-05, "loss": 0.006, "step": 15992 }, { "epoch": 3.09578173374613, "grad_norm": 0.10790449380874634, "learning_rate": 8.060250266686913e-05, "loss": 0.0075, "step": 15993 }, { "epoch": 3.0959752321981426, "grad_norm": 0.04959825798869133, "learning_rate": 8.060024618234605e-05, "loss": 0.0059, "step": 15994 }, { "epoch": 3.0961687306501546, "grad_norm": 0.10411974787712097, "learning_rate": 8.059798960264756e-05, "loss": 0.0078, "step": 15995 }, { "epoch": 3.096362229102167, "grad_norm": 0.03752641752362251, "learning_rate": 8.059573292778206e-05, "loss": 0.0064, "step": 15996 }, { "epoch": 3.0965557275541795, "grad_norm": 0.07606230676174164, "learning_rate": 8.059347615775794e-05, "loss": 0.0063, "step": 15997 }, { "epoch": 3.096749226006192, "grad_norm": 0.037055544555187225, "learning_rate": 8.059121929258356e-05, "loss": 0.006, "step": 15998 }, { "epoch": 3.0969427244582044, "grad_norm": 0.07752557843923569, "learning_rate": 8.058896233226735e-05, "loss": 0.007, "step": 15999 }, { "epoch": 3.097136222910217, "grad_norm": 0.047215282917022705, "learning_rate": 8.058670527681768e-05, "loss": 0.0069, "step": 16000 }, { "epoch": 3.0973297213622293, "grad_norm": 0.030939554795622826, "learning_rate": 8.058444812624294e-05, "loss": 0.007, "step": 16001 }, { "epoch": 3.0975232198142413, "grad_norm": 0.06463851779699326, "learning_rate": 8.058219088055153e-05, "loss": 0.006, "step": 16002 }, { "epoch": 3.0977167182662537, "grad_norm": 0.03688448667526245, "learning_rate": 8.057993353975182e-05, "loss": 0.0058, "step": 16003 }, { "epoch": 3.097910216718266, "grad_norm": 0.05600644275546074, "learning_rate": 8.057767610385224e-05, "loss": 0.0073, "step": 16004 }, { "epoch": 3.0981037151702786, "grad_norm": 0.04654860123991966, "learning_rate": 8.057541857286117e-05, "loss": 0.0058, "step": 16005 }, { "epoch": 3.098297213622291, "grad_norm": 0.023384440690279007, "learning_rate": 8.057316094678697e-05, "loss": 0.0078, "step": 16006 }, { "epoch": 3.0984907120743035, "grad_norm": 0.04651440680027008, "learning_rate": 8.057090322563808e-05, "loss": 0.0062, "step": 16007 }, { "epoch": 3.098684210526316, "grad_norm": 0.02715623565018177, "learning_rate": 8.056864540942286e-05, "loss": 0.0057, "step": 16008 }, { "epoch": 3.0988777089783284, "grad_norm": 0.027806829661130905, "learning_rate": 8.056638749814973e-05, "loss": 0.0053, "step": 16009 }, { "epoch": 3.0990712074303404, "grad_norm": 0.032256513833999634, "learning_rate": 8.056412949182706e-05, "loss": 0.0077, "step": 16010 }, { "epoch": 3.099264705882353, "grad_norm": 0.04905220493674278, "learning_rate": 8.056187139046326e-05, "loss": 0.0063, "step": 16011 }, { "epoch": 3.0994582043343653, "grad_norm": 0.044726986438035965, "learning_rate": 8.055961319406672e-05, "loss": 0.0075, "step": 16012 }, { "epoch": 3.0996517027863777, "grad_norm": 0.0560033842921257, "learning_rate": 8.055735490264584e-05, "loss": 0.0062, "step": 16013 }, { "epoch": 3.09984520123839, "grad_norm": 0.042707908898591995, "learning_rate": 8.0555096516209e-05, "loss": 0.0068, "step": 16014 }, { "epoch": 3.1000386996904026, "grad_norm": 0.035764239728450775, "learning_rate": 8.055283803476463e-05, "loss": 0.0058, "step": 16015 }, { "epoch": 3.100232198142415, "grad_norm": 0.04583074152469635, "learning_rate": 8.055057945832107e-05, "loss": 0.0059, "step": 16016 }, { "epoch": 3.100425696594427, "grad_norm": 0.026933716610074043, "learning_rate": 8.054832078688677e-05, "loss": 0.0063, "step": 16017 }, { "epoch": 3.1006191950464395, "grad_norm": 0.05710979923605919, "learning_rate": 8.054606202047011e-05, "loss": 0.0056, "step": 16018 }, { "epoch": 3.100812693498452, "grad_norm": 0.02786019816994667, "learning_rate": 8.05438031590795e-05, "loss": 0.0074, "step": 16019 }, { "epoch": 3.1010061919504643, "grad_norm": 0.046034861356019974, "learning_rate": 8.05415442027233e-05, "loss": 0.0079, "step": 16020 }, { "epoch": 3.101199690402477, "grad_norm": 0.04570130631327629, "learning_rate": 8.053928515140995e-05, "loss": 0.0067, "step": 16021 }, { "epoch": 3.1013931888544892, "grad_norm": 0.041849877685308456, "learning_rate": 8.053702600514781e-05, "loss": 0.0065, "step": 16022 }, { "epoch": 3.1015866873065017, "grad_norm": 0.05329369008541107, "learning_rate": 8.053476676394533e-05, "loss": 0.0051, "step": 16023 }, { "epoch": 3.101780185758514, "grad_norm": 0.023512262850999832, "learning_rate": 8.053250742781086e-05, "loss": 0.0065, "step": 16024 }, { "epoch": 3.101973684210526, "grad_norm": 0.04950593784451485, "learning_rate": 8.053024799675283e-05, "loss": 0.0065, "step": 16025 }, { "epoch": 3.1021671826625385, "grad_norm": 0.030440479516983032, "learning_rate": 8.052798847077962e-05, "loss": 0.0059, "step": 16026 }, { "epoch": 3.102360681114551, "grad_norm": 0.05167098715901375, "learning_rate": 8.052572884989963e-05, "loss": 0.0061, "step": 16027 }, { "epoch": 3.1025541795665634, "grad_norm": 0.03189562261104584, "learning_rate": 8.052346913412128e-05, "loss": 0.0067, "step": 16028 }, { "epoch": 3.102747678018576, "grad_norm": 0.04385913908481598, "learning_rate": 8.052120932345296e-05, "loss": 0.0065, "step": 16029 }, { "epoch": 3.1029411764705883, "grad_norm": 0.031562622636556625, "learning_rate": 8.051894941790306e-05, "loss": 0.0078, "step": 16030 }, { "epoch": 3.1031346749226008, "grad_norm": 0.04730696976184845, "learning_rate": 8.051668941748001e-05, "loss": 0.0056, "step": 16031 }, { "epoch": 3.103328173374613, "grad_norm": 0.03693872690200806, "learning_rate": 8.05144293221922e-05, "loss": 0.0075, "step": 16032 }, { "epoch": 3.103521671826625, "grad_norm": 0.06221655383706093, "learning_rate": 8.051216913204804e-05, "loss": 0.0076, "step": 16033 }, { "epoch": 3.1037151702786376, "grad_norm": 0.018906310200691223, "learning_rate": 8.050990884705588e-05, "loss": 0.0053, "step": 16034 }, { "epoch": 3.10390866873065, "grad_norm": 0.05812673270702362, "learning_rate": 8.050764846722419e-05, "loss": 0.0066, "step": 16035 }, { "epoch": 3.1041021671826625, "grad_norm": 0.022856660187244415, "learning_rate": 8.050538799256134e-05, "loss": 0.0055, "step": 16036 }, { "epoch": 3.104295665634675, "grad_norm": 0.04242242872714996, "learning_rate": 8.050312742307576e-05, "loss": 0.0065, "step": 16037 }, { "epoch": 3.1044891640866874, "grad_norm": 0.03723170608282089, "learning_rate": 8.050086675877583e-05, "loss": 0.0059, "step": 16038 }, { "epoch": 3.1046826625387, "grad_norm": 0.034270185977220535, "learning_rate": 8.049860599966995e-05, "loss": 0.0065, "step": 16039 }, { "epoch": 3.1048761609907123, "grad_norm": 0.0321277491748333, "learning_rate": 8.049634514576654e-05, "loss": 0.005, "step": 16040 }, { "epoch": 3.1050696594427243, "grad_norm": 0.017162363976240158, "learning_rate": 8.049408419707399e-05, "loss": 0.005, "step": 16041 }, { "epoch": 3.1052631578947367, "grad_norm": 0.02665593847632408, "learning_rate": 8.049182315360073e-05, "loss": 0.0078, "step": 16042 }, { "epoch": 3.105456656346749, "grad_norm": 0.02009926736354828, "learning_rate": 8.048956201535516e-05, "loss": 0.0064, "step": 16043 }, { "epoch": 3.1056501547987616, "grad_norm": 0.04756353422999382, "learning_rate": 8.048730078234566e-05, "loss": 0.0061, "step": 16044 }, { "epoch": 3.105843653250774, "grad_norm": 0.055836282670497894, "learning_rate": 8.048503945458067e-05, "loss": 0.0069, "step": 16045 }, { "epoch": 3.1060371517027865, "grad_norm": 0.04880405217409134, "learning_rate": 8.048277803206859e-05, "loss": 0.0054, "step": 16046 }, { "epoch": 3.106230650154799, "grad_norm": 0.04737730324268341, "learning_rate": 8.048051651481782e-05, "loss": 0.0064, "step": 16047 }, { "epoch": 3.106424148606811, "grad_norm": 0.07704494893550873, "learning_rate": 8.047825490283675e-05, "loss": 0.0051, "step": 16048 }, { "epoch": 3.1066176470588234, "grad_norm": 0.04038458317518234, "learning_rate": 8.047599319613382e-05, "loss": 0.006, "step": 16049 }, { "epoch": 3.106811145510836, "grad_norm": 0.04620497301220894, "learning_rate": 8.047373139471741e-05, "loss": 0.0074, "step": 16050 }, { "epoch": 3.1070046439628483, "grad_norm": 0.02522394247353077, "learning_rate": 8.047146949859596e-05, "loss": 0.0063, "step": 16051 }, { "epoch": 3.1071981424148607, "grad_norm": 0.03382269665598869, "learning_rate": 8.046920750777785e-05, "loss": 0.0073, "step": 16052 }, { "epoch": 3.107391640866873, "grad_norm": 0.03209012374281883, "learning_rate": 8.046694542227151e-05, "loss": 0.0062, "step": 16053 }, { "epoch": 3.1075851393188856, "grad_norm": 0.03598002716898918, "learning_rate": 8.046468324208535e-05, "loss": 0.0067, "step": 16054 }, { "epoch": 3.107778637770898, "grad_norm": 0.03361821547150612, "learning_rate": 8.046242096722775e-05, "loss": 0.006, "step": 16055 }, { "epoch": 3.10797213622291, "grad_norm": 0.03434312716126442, "learning_rate": 8.046015859770716e-05, "loss": 0.0064, "step": 16056 }, { "epoch": 3.1081656346749225, "grad_norm": 0.045134253799915314, "learning_rate": 8.045789613353197e-05, "loss": 0.0059, "step": 16057 }, { "epoch": 3.108359133126935, "grad_norm": 0.03404166176915169, "learning_rate": 8.04556335747106e-05, "loss": 0.0065, "step": 16058 }, { "epoch": 3.1085526315789473, "grad_norm": 0.061424173414707184, "learning_rate": 8.045337092125143e-05, "loss": 0.0071, "step": 16059 }, { "epoch": 3.10874613003096, "grad_norm": 0.02993430383503437, "learning_rate": 8.045110817316294e-05, "loss": 0.0073, "step": 16060 }, { "epoch": 3.1089396284829722, "grad_norm": 0.05724070966243744, "learning_rate": 8.044884533045348e-05, "loss": 0.0067, "step": 16061 }, { "epoch": 3.1091331269349847, "grad_norm": 0.05690775811672211, "learning_rate": 8.044658239313148e-05, "loss": 0.0075, "step": 16062 }, { "epoch": 3.109326625386997, "grad_norm": 0.03229778632521629, "learning_rate": 8.044431936120537e-05, "loss": 0.0057, "step": 16063 }, { "epoch": 3.109520123839009, "grad_norm": 0.05941925197839737, "learning_rate": 8.044205623468354e-05, "loss": 0.0067, "step": 16064 }, { "epoch": 3.1097136222910216, "grad_norm": 0.038038138300180435, "learning_rate": 8.043979301357441e-05, "loss": 0.0065, "step": 16065 }, { "epoch": 3.109907120743034, "grad_norm": 0.05019332468509674, "learning_rate": 8.04375296978864e-05, "loss": 0.007, "step": 16066 }, { "epoch": 3.1101006191950464, "grad_norm": 0.03662009537220001, "learning_rate": 8.043526628762793e-05, "loss": 0.0085, "step": 16067 }, { "epoch": 3.110294117647059, "grad_norm": 0.04188196733593941, "learning_rate": 8.043300278280741e-05, "loss": 0.0049, "step": 16068 }, { "epoch": 3.1104876160990713, "grad_norm": 0.0327276736497879, "learning_rate": 8.043073918343324e-05, "loss": 0.0051, "step": 16069 }, { "epoch": 3.1106811145510838, "grad_norm": 0.038756150752305984, "learning_rate": 8.042847548951386e-05, "loss": 0.0057, "step": 16070 }, { "epoch": 3.110874613003096, "grad_norm": 0.03448387607932091, "learning_rate": 8.042621170105768e-05, "loss": 0.0071, "step": 16071 }, { "epoch": 3.111068111455108, "grad_norm": 0.02899155393242836, "learning_rate": 8.04239478180731e-05, "loss": 0.0073, "step": 16072 }, { "epoch": 3.1112616099071206, "grad_norm": 0.06081137806177139, "learning_rate": 8.042168384056854e-05, "loss": 0.006, "step": 16073 }, { "epoch": 3.111455108359133, "grad_norm": 0.023900259286165237, "learning_rate": 8.041941976855241e-05, "loss": 0.0049, "step": 16074 }, { "epoch": 3.1116486068111455, "grad_norm": 0.03837106004357338, "learning_rate": 8.041715560203317e-05, "loss": 0.0073, "step": 16075 }, { "epoch": 3.111842105263158, "grad_norm": 0.045269452035427094, "learning_rate": 8.04148913410192e-05, "loss": 0.0057, "step": 16076 }, { "epoch": 3.1120356037151704, "grad_norm": 0.05130424350500107, "learning_rate": 8.041262698551893e-05, "loss": 0.0074, "step": 16077 }, { "epoch": 3.112229102167183, "grad_norm": 0.06207463890314102, "learning_rate": 8.041036253554076e-05, "loss": 0.0067, "step": 16078 }, { "epoch": 3.112422600619195, "grad_norm": 0.06476379185914993, "learning_rate": 8.040809799109314e-05, "loss": 0.0073, "step": 16079 }, { "epoch": 3.1126160990712073, "grad_norm": 0.09772833436727524, "learning_rate": 8.040583335218446e-05, "loss": 0.0067, "step": 16080 }, { "epoch": 3.1128095975232197, "grad_norm": 0.052173465490341187, "learning_rate": 8.040356861882315e-05, "loss": 0.0081, "step": 16081 }, { "epoch": 3.113003095975232, "grad_norm": 0.11195635050535202, "learning_rate": 8.040130379101764e-05, "loss": 0.0062, "step": 16082 }, { "epoch": 3.1131965944272446, "grad_norm": 0.0594843290746212, "learning_rate": 8.039903886877635e-05, "loss": 0.0069, "step": 16083 }, { "epoch": 3.113390092879257, "grad_norm": 0.08430910855531693, "learning_rate": 8.039677385210767e-05, "loss": 0.0062, "step": 16084 }, { "epoch": 3.1135835913312695, "grad_norm": 0.05842422693967819, "learning_rate": 8.039450874102004e-05, "loss": 0.0062, "step": 16085 }, { "epoch": 3.113777089783282, "grad_norm": 0.046796832233667374, "learning_rate": 8.039224353552192e-05, "loss": 0.0059, "step": 16086 }, { "epoch": 3.113970588235294, "grad_norm": 0.06693385541439056, "learning_rate": 8.038997823562168e-05, "loss": 0.0047, "step": 16087 }, { "epoch": 3.1141640866873064, "grad_norm": 0.050858691334724426, "learning_rate": 8.038771284132773e-05, "loss": 0.0057, "step": 16088 }, { "epoch": 3.114357585139319, "grad_norm": 0.044621843844652176, "learning_rate": 8.038544735264855e-05, "loss": 0.0066, "step": 16089 }, { "epoch": 3.1145510835913313, "grad_norm": 0.05472586303949356, "learning_rate": 8.038318176959252e-05, "loss": 0.0059, "step": 16090 }, { "epoch": 3.1147445820433437, "grad_norm": 0.02797026000916958, "learning_rate": 8.038091609216807e-05, "loss": 0.0074, "step": 16091 }, { "epoch": 3.114938080495356, "grad_norm": 0.08393126726150513, "learning_rate": 8.037865032038364e-05, "loss": 0.006, "step": 16092 }, { "epoch": 3.1151315789473686, "grad_norm": 0.044983167201280594, "learning_rate": 8.037638445424763e-05, "loss": 0.0069, "step": 16093 }, { "epoch": 3.1153250773993806, "grad_norm": 0.06053929775953293, "learning_rate": 8.03741184937685e-05, "loss": 0.0049, "step": 16094 }, { "epoch": 3.115518575851393, "grad_norm": 0.07425856590270996, "learning_rate": 8.037185243895464e-05, "loss": 0.0079, "step": 16095 }, { "epoch": 3.1157120743034055, "grad_norm": 0.07282013446092606, "learning_rate": 8.036958628981446e-05, "loss": 0.0077, "step": 16096 }, { "epoch": 3.115905572755418, "grad_norm": 0.057916782796382904, "learning_rate": 8.036732004635643e-05, "loss": 0.0065, "step": 16097 }, { "epoch": 3.1160990712074303, "grad_norm": 0.08622388541698456, "learning_rate": 8.036505370858895e-05, "loss": 0.0064, "step": 16098 }, { "epoch": 3.116292569659443, "grad_norm": 0.032310374081134796, "learning_rate": 8.036278727652045e-05, "loss": 0.0073, "step": 16099 }, { "epoch": 3.1164860681114552, "grad_norm": 0.07310334593057632, "learning_rate": 8.036052075015935e-05, "loss": 0.0056, "step": 16100 }, { "epoch": 3.1166795665634677, "grad_norm": 0.04475109279155731, "learning_rate": 8.03582541295141e-05, "loss": 0.0066, "step": 16101 }, { "epoch": 3.1168730650154797, "grad_norm": 0.04148273169994354, "learning_rate": 8.03559874145931e-05, "loss": 0.0087, "step": 16102 }, { "epoch": 3.117066563467492, "grad_norm": 0.06675535440444946, "learning_rate": 8.035372060540478e-05, "loss": 0.007, "step": 16103 }, { "epoch": 3.1172600619195046, "grad_norm": 0.047449640929698944, "learning_rate": 8.035145370195759e-05, "loss": 0.0071, "step": 16104 }, { "epoch": 3.117453560371517, "grad_norm": 0.07754306495189667, "learning_rate": 8.034918670425992e-05, "loss": 0.0059, "step": 16105 }, { "epoch": 3.1176470588235294, "grad_norm": 0.04292893409729004, "learning_rate": 8.034691961232023e-05, "loss": 0.0065, "step": 16106 }, { "epoch": 3.117840557275542, "grad_norm": 0.07279513031244278, "learning_rate": 8.034465242614694e-05, "loss": 0.0062, "step": 16107 }, { "epoch": 3.1180340557275543, "grad_norm": 0.04601701721549034, "learning_rate": 8.034238514574846e-05, "loss": 0.0064, "step": 16108 }, { "epoch": 3.1182275541795668, "grad_norm": 0.033448219299316406, "learning_rate": 8.034011777113327e-05, "loss": 0.0064, "step": 16109 }, { "epoch": 3.1184210526315788, "grad_norm": 0.04960400238633156, "learning_rate": 8.033785030230975e-05, "loss": 0.0062, "step": 16110 }, { "epoch": 3.118614551083591, "grad_norm": 0.05026794597506523, "learning_rate": 8.033558273928635e-05, "loss": 0.0056, "step": 16111 }, { "epoch": 3.1188080495356036, "grad_norm": 0.03181847557425499, "learning_rate": 8.03333150820715e-05, "loss": 0.0058, "step": 16112 }, { "epoch": 3.119001547987616, "grad_norm": 0.04634549841284752, "learning_rate": 8.03310473306736e-05, "loss": 0.0082, "step": 16113 }, { "epoch": 3.1191950464396285, "grad_norm": 0.03213467448949814, "learning_rate": 8.032877948510113e-05, "loss": 0.0083, "step": 16114 }, { "epoch": 3.119388544891641, "grad_norm": 0.04806758090853691, "learning_rate": 8.032651154536251e-05, "loss": 0.0067, "step": 16115 }, { "epoch": 3.1195820433436534, "grad_norm": 0.038114555180072784, "learning_rate": 8.032424351146613e-05, "loss": 0.0065, "step": 16116 }, { "epoch": 3.119775541795666, "grad_norm": 0.04413871839642525, "learning_rate": 8.032197538342048e-05, "loss": 0.0065, "step": 16117 }, { "epoch": 3.119969040247678, "grad_norm": 0.03290562331676483, "learning_rate": 8.031970716123396e-05, "loss": 0.0059, "step": 16118 }, { "epoch": 3.1201625386996903, "grad_norm": 0.05385195463895798, "learning_rate": 8.031743884491501e-05, "loss": 0.0069, "step": 16119 }, { "epoch": 3.1203560371517027, "grad_norm": 0.03910542279481888, "learning_rate": 8.031517043447203e-05, "loss": 0.0078, "step": 16120 }, { "epoch": 3.120549535603715, "grad_norm": 0.04023028537631035, "learning_rate": 8.031290192991353e-05, "loss": 0.0074, "step": 16121 }, { "epoch": 3.1207430340557276, "grad_norm": 0.03592175617814064, "learning_rate": 8.031063333124785e-05, "loss": 0.0074, "step": 16122 }, { "epoch": 3.12093653250774, "grad_norm": 0.05898067727684975, "learning_rate": 8.03083646384835e-05, "loss": 0.0061, "step": 16123 }, { "epoch": 3.1211300309597525, "grad_norm": 0.027207767590880394, "learning_rate": 8.03060958516289e-05, "loss": 0.0058, "step": 16124 }, { "epoch": 3.1213235294117645, "grad_norm": 0.0541485995054245, "learning_rate": 8.030382697069244e-05, "loss": 0.0064, "step": 16125 }, { "epoch": 3.121517027863777, "grad_norm": 0.04016357660293579, "learning_rate": 8.030155799568261e-05, "loss": 0.0056, "step": 16126 }, { "epoch": 3.1217105263157894, "grad_norm": 0.043960198760032654, "learning_rate": 8.029928892660782e-05, "loss": 0.0067, "step": 16127 }, { "epoch": 3.121904024767802, "grad_norm": 0.05641710013151169, "learning_rate": 8.02970197634765e-05, "loss": 0.0076, "step": 16128 }, { "epoch": 3.1220975232198143, "grad_norm": 0.0434572696685791, "learning_rate": 8.029475050629708e-05, "loss": 0.0067, "step": 16129 }, { "epoch": 3.1222910216718267, "grad_norm": 0.060012336820364, "learning_rate": 8.029248115507803e-05, "loss": 0.0059, "step": 16130 }, { "epoch": 3.122484520123839, "grad_norm": 0.044585857540369034, "learning_rate": 8.029021170982776e-05, "loss": 0.0067, "step": 16131 }, { "epoch": 3.1226780185758516, "grad_norm": 0.05017326399683952, "learning_rate": 8.028794217055471e-05, "loss": 0.0091, "step": 16132 }, { "epoch": 3.1228715170278636, "grad_norm": 0.05312224105000496, "learning_rate": 8.028567253726733e-05, "loss": 0.0061, "step": 16133 }, { "epoch": 3.123065015479876, "grad_norm": 0.056161586195230484, "learning_rate": 8.028340280997405e-05, "loss": 0.0081, "step": 16134 }, { "epoch": 3.1232585139318885, "grad_norm": 0.06995942443609238, "learning_rate": 8.028113298868331e-05, "loss": 0.0083, "step": 16135 }, { "epoch": 3.123452012383901, "grad_norm": 0.04984138160943985, "learning_rate": 8.027886307340354e-05, "loss": 0.0065, "step": 16136 }, { "epoch": 3.1236455108359134, "grad_norm": 0.059487029910087585, "learning_rate": 8.02765930641432e-05, "loss": 0.0088, "step": 16137 }, { "epoch": 3.123839009287926, "grad_norm": 0.08030228316783905, "learning_rate": 8.02743229609107e-05, "loss": 0.0066, "step": 16138 }, { "epoch": 3.1240325077399382, "grad_norm": 0.062346234917640686, "learning_rate": 8.02720527637145e-05, "loss": 0.0071, "step": 16139 }, { "epoch": 3.1242260061919507, "grad_norm": 0.09515538066625595, "learning_rate": 8.026978247256302e-05, "loss": 0.0055, "step": 16140 }, { "epoch": 3.1244195046439627, "grad_norm": 0.04609207808971405, "learning_rate": 8.026751208746474e-05, "loss": 0.0073, "step": 16141 }, { "epoch": 3.124613003095975, "grad_norm": 0.0947709009051323, "learning_rate": 8.026524160842808e-05, "loss": 0.008, "step": 16142 }, { "epoch": 3.1248065015479876, "grad_norm": 0.040556903928518295, "learning_rate": 8.026297103546146e-05, "loss": 0.0077, "step": 16143 }, { "epoch": 3.125, "grad_norm": 0.08590617775917053, "learning_rate": 8.026070036857334e-05, "loss": 0.0066, "step": 16144 }, { "epoch": 3.1251934984520124, "grad_norm": 0.0672692358493805, "learning_rate": 8.025842960777217e-05, "loss": 0.0063, "step": 16145 }, { "epoch": 3.125386996904025, "grad_norm": 0.06259758770465851, "learning_rate": 8.025615875306637e-05, "loss": 0.0063, "step": 16146 }, { "epoch": 3.1255804953560373, "grad_norm": 0.06391216069459915, "learning_rate": 8.02538878044644e-05, "loss": 0.0063, "step": 16147 }, { "epoch": 3.1257739938080498, "grad_norm": 0.045531563460826874, "learning_rate": 8.025161676197469e-05, "loss": 0.0061, "step": 16148 }, { "epoch": 3.1259674922600618, "grad_norm": 0.04579515755176544, "learning_rate": 8.02493456256057e-05, "loss": 0.007, "step": 16149 }, { "epoch": 3.126160990712074, "grad_norm": 0.03995219245553017, "learning_rate": 8.024707439536586e-05, "loss": 0.0069, "step": 16150 }, { "epoch": 3.1263544891640866, "grad_norm": 0.03151221200823784, "learning_rate": 8.024480307126362e-05, "loss": 0.0058, "step": 16151 }, { "epoch": 3.126547987616099, "grad_norm": 0.04336898401379585, "learning_rate": 8.024253165330743e-05, "loss": 0.0065, "step": 16152 }, { "epoch": 3.1267414860681115, "grad_norm": 0.0445588193833828, "learning_rate": 8.024026014150573e-05, "loss": 0.007, "step": 16153 }, { "epoch": 3.126934984520124, "grad_norm": 0.027165012434124947, "learning_rate": 8.023798853586694e-05, "loss": 0.0059, "step": 16154 }, { "epoch": 3.1271284829721364, "grad_norm": 0.08177915215492249, "learning_rate": 8.023571683639956e-05, "loss": 0.0074, "step": 16155 }, { "epoch": 3.1273219814241484, "grad_norm": 0.02393912710249424, "learning_rate": 8.023344504311197e-05, "loss": 0.0062, "step": 16156 }, { "epoch": 3.127515479876161, "grad_norm": 0.0681537464261055, "learning_rate": 8.023117315601267e-05, "loss": 0.0061, "step": 16157 }, { "epoch": 3.1277089783281733, "grad_norm": 0.055769648402929306, "learning_rate": 8.022890117511007e-05, "loss": 0.0071, "step": 16158 }, { "epoch": 3.1279024767801857, "grad_norm": 0.07175350189208984, "learning_rate": 8.022662910041265e-05, "loss": 0.0079, "step": 16159 }, { "epoch": 3.128095975232198, "grad_norm": 0.06900826841592789, "learning_rate": 8.022435693192882e-05, "loss": 0.0055, "step": 16160 }, { "epoch": 3.1282894736842106, "grad_norm": 0.03629038482904434, "learning_rate": 8.022208466966705e-05, "loss": 0.0065, "step": 16161 }, { "epoch": 3.128482972136223, "grad_norm": 0.07473783940076828, "learning_rate": 8.021981231363582e-05, "loss": 0.008, "step": 16162 }, { "epoch": 3.1286764705882355, "grad_norm": 0.02935970574617386, "learning_rate": 8.02175398638435e-05, "loss": 0.0075, "step": 16163 }, { "epoch": 3.1288699690402475, "grad_norm": 0.04912163317203522, "learning_rate": 8.021526732029859e-05, "loss": 0.0071, "step": 16164 }, { "epoch": 3.12906346749226, "grad_norm": 0.029762450605630875, "learning_rate": 8.021299468300954e-05, "loss": 0.0058, "step": 16165 }, { "epoch": 3.1292569659442724, "grad_norm": 0.04710046947002411, "learning_rate": 8.021072195198477e-05, "loss": 0.0059, "step": 16166 }, { "epoch": 3.129450464396285, "grad_norm": 0.056666720658540726, "learning_rate": 8.020844912723276e-05, "loss": 0.0068, "step": 16167 }, { "epoch": 3.1296439628482973, "grad_norm": 0.05470992624759674, "learning_rate": 8.020617620876195e-05, "loss": 0.0072, "step": 16168 }, { "epoch": 3.1298374613003097, "grad_norm": 0.06651932001113892, "learning_rate": 8.020390319658079e-05, "loss": 0.0054, "step": 16169 }, { "epoch": 3.130030959752322, "grad_norm": 0.09191794693470001, "learning_rate": 8.020163009069772e-05, "loss": 0.007, "step": 16170 }, { "epoch": 3.130224458204334, "grad_norm": 0.07145263254642487, "learning_rate": 8.019935689112118e-05, "loss": 0.0067, "step": 16171 }, { "epoch": 3.1304179566563466, "grad_norm": 0.13248856365680695, "learning_rate": 8.019708359785968e-05, "loss": 0.0059, "step": 16172 }, { "epoch": 3.130611455108359, "grad_norm": 0.06752386689186096, "learning_rate": 8.019481021092161e-05, "loss": 0.0047, "step": 16173 }, { "epoch": 3.1308049535603715, "grad_norm": 0.11287771910429001, "learning_rate": 8.019253673031545e-05, "loss": 0.0075, "step": 16174 }, { "epoch": 3.130998452012384, "grad_norm": 0.037274401634931564, "learning_rate": 8.019026315604965e-05, "loss": 0.0051, "step": 16175 }, { "epoch": 3.1311919504643964, "grad_norm": 0.09149788320064545, "learning_rate": 8.018798948813265e-05, "loss": 0.0073, "step": 16176 }, { "epoch": 3.131385448916409, "grad_norm": 0.05601176247000694, "learning_rate": 8.01857157265729e-05, "loss": 0.0064, "step": 16177 }, { "epoch": 3.1315789473684212, "grad_norm": 0.08540129661560059, "learning_rate": 8.018344187137888e-05, "loss": 0.0077, "step": 16178 }, { "epoch": 3.1317724458204332, "grad_norm": 0.05833707004785538, "learning_rate": 8.018116792255905e-05, "loss": 0.0083, "step": 16179 }, { "epoch": 3.1319659442724457, "grad_norm": 0.0456419363617897, "learning_rate": 8.017889388012181e-05, "loss": 0.0078, "step": 16180 }, { "epoch": 3.132159442724458, "grad_norm": 0.038994427770376205, "learning_rate": 8.017661974407564e-05, "loss": 0.0066, "step": 16181 }, { "epoch": 3.1323529411764706, "grad_norm": 0.04575946927070618, "learning_rate": 8.017434551442903e-05, "loss": 0.0042, "step": 16182 }, { "epoch": 3.132546439628483, "grad_norm": 0.03399762511253357, "learning_rate": 8.017207119119038e-05, "loss": 0.0064, "step": 16183 }, { "epoch": 3.1327399380804954, "grad_norm": 0.044334203004837036, "learning_rate": 8.01697967743682e-05, "loss": 0.008, "step": 16184 }, { "epoch": 3.132933436532508, "grad_norm": 0.07737503945827484, "learning_rate": 8.01675222639709e-05, "loss": 0.0058, "step": 16185 }, { "epoch": 3.1331269349845203, "grad_norm": 0.03181066736578941, "learning_rate": 8.016524766000695e-05, "loss": 0.0052, "step": 16186 }, { "epoch": 3.1333204334365323, "grad_norm": 0.055905796587467194, "learning_rate": 8.016297296248483e-05, "loss": 0.0066, "step": 16187 }, { "epoch": 3.1335139318885448, "grad_norm": 0.036460891366004944, "learning_rate": 8.016069817141297e-05, "loss": 0.0072, "step": 16188 }, { "epoch": 3.133707430340557, "grad_norm": 0.05417202413082123, "learning_rate": 8.015842328679982e-05, "loss": 0.0064, "step": 16189 }, { "epoch": 3.1339009287925697, "grad_norm": 0.039182260632514954, "learning_rate": 8.015614830865386e-05, "loss": 0.0058, "step": 16190 }, { "epoch": 3.134094427244582, "grad_norm": 0.05101172626018524, "learning_rate": 8.015387323698354e-05, "loss": 0.0073, "step": 16191 }, { "epoch": 3.1342879256965945, "grad_norm": 0.0581134594976902, "learning_rate": 8.015159807179731e-05, "loss": 0.0071, "step": 16192 }, { "epoch": 3.134481424148607, "grad_norm": 0.02952883392572403, "learning_rate": 8.014932281310365e-05, "loss": 0.0046, "step": 16193 }, { "epoch": 3.1346749226006194, "grad_norm": 0.05323818698525429, "learning_rate": 8.014704746091099e-05, "loss": 0.0058, "step": 16194 }, { "epoch": 3.1348684210526314, "grad_norm": 0.06624577939510345, "learning_rate": 8.014477201522781e-05, "loss": 0.0067, "step": 16195 }, { "epoch": 3.135061919504644, "grad_norm": 0.06518033146858215, "learning_rate": 8.014249647606257e-05, "loss": 0.0066, "step": 16196 }, { "epoch": 3.1352554179566563, "grad_norm": 0.06775999814271927, "learning_rate": 8.014022084342372e-05, "loss": 0.0059, "step": 16197 }, { "epoch": 3.1354489164086687, "grad_norm": 0.039538417011499405, "learning_rate": 8.013794511731973e-05, "loss": 0.0058, "step": 16198 }, { "epoch": 3.135642414860681, "grad_norm": 0.09326257556676865, "learning_rate": 8.013566929775902e-05, "loss": 0.0059, "step": 16199 }, { "epoch": 3.1358359133126936, "grad_norm": 0.036839090287685394, "learning_rate": 8.013339338475012e-05, "loss": 0.0071, "step": 16200 }, { "epoch": 3.136029411764706, "grad_norm": 0.08536232262849808, "learning_rate": 8.013111737830145e-05, "loss": 0.0063, "step": 16201 }, { "epoch": 3.136222910216718, "grad_norm": 0.03908781334757805, "learning_rate": 8.012884127842148e-05, "loss": 0.0069, "step": 16202 }, { "epoch": 3.1364164086687305, "grad_norm": 0.07072623074054718, "learning_rate": 8.012656508511866e-05, "loss": 0.0077, "step": 16203 }, { "epoch": 3.136609907120743, "grad_norm": 0.04466537758708, "learning_rate": 8.012428879840147e-05, "loss": 0.0062, "step": 16204 }, { "epoch": 3.1368034055727554, "grad_norm": 0.04289897903800011, "learning_rate": 8.012201241827836e-05, "loss": 0.0058, "step": 16205 }, { "epoch": 3.136996904024768, "grad_norm": 0.037525907158851624, "learning_rate": 8.01197359447578e-05, "loss": 0.0059, "step": 16206 }, { "epoch": 3.1371904024767803, "grad_norm": 0.047977082431316376, "learning_rate": 8.011745937784825e-05, "loss": 0.0061, "step": 16207 }, { "epoch": 3.1373839009287927, "grad_norm": 0.06075870990753174, "learning_rate": 8.011518271755818e-05, "loss": 0.0064, "step": 16208 }, { "epoch": 3.137577399380805, "grad_norm": 0.04846605658531189, "learning_rate": 8.011290596389605e-05, "loss": 0.0061, "step": 16209 }, { "epoch": 3.137770897832817, "grad_norm": 0.03734539821743965, "learning_rate": 8.011062911687031e-05, "loss": 0.0068, "step": 16210 }, { "epoch": 3.1379643962848296, "grad_norm": 0.04892057925462723, "learning_rate": 8.010835217648945e-05, "loss": 0.0058, "step": 16211 }, { "epoch": 3.138157894736842, "grad_norm": 0.032130979001522064, "learning_rate": 8.010607514276192e-05, "loss": 0.0067, "step": 16212 }, { "epoch": 3.1383513931888545, "grad_norm": 0.05948496609926224, "learning_rate": 8.01037980156962e-05, "loss": 0.0068, "step": 16213 }, { "epoch": 3.138544891640867, "grad_norm": 0.025633245706558228, "learning_rate": 8.010152079530074e-05, "loss": 0.0063, "step": 16214 }, { "epoch": 3.1387383900928794, "grad_norm": 0.04910721257328987, "learning_rate": 8.0099243481584e-05, "loss": 0.0067, "step": 16215 }, { "epoch": 3.138931888544892, "grad_norm": 0.031247438862919807, "learning_rate": 8.009696607455446e-05, "loss": 0.0048, "step": 16216 }, { "epoch": 3.139125386996904, "grad_norm": 0.028560323640704155, "learning_rate": 8.009468857422059e-05, "loss": 0.0063, "step": 16217 }, { "epoch": 3.1393188854489162, "grad_norm": 0.057350631803274155, "learning_rate": 8.009241098059085e-05, "loss": 0.0049, "step": 16218 }, { "epoch": 3.1395123839009287, "grad_norm": 0.035131145268678665, "learning_rate": 8.00901332936737e-05, "loss": 0.005, "step": 16219 }, { "epoch": 3.139705882352941, "grad_norm": 0.042256444692611694, "learning_rate": 8.008785551347764e-05, "loss": 0.0055, "step": 16220 }, { "epoch": 3.1398993808049536, "grad_norm": 0.04205423966050148, "learning_rate": 8.00855776400111e-05, "loss": 0.0079, "step": 16221 }, { "epoch": 3.140092879256966, "grad_norm": 0.03552691265940666, "learning_rate": 8.008329967328255e-05, "loss": 0.0058, "step": 16222 }, { "epoch": 3.1402863777089784, "grad_norm": 0.03241920471191406, "learning_rate": 8.00810216133005e-05, "loss": 0.0081, "step": 16223 }, { "epoch": 3.140479876160991, "grad_norm": 0.019998937845230103, "learning_rate": 8.007874346007337e-05, "loss": 0.0072, "step": 16224 }, { "epoch": 3.140673374613003, "grad_norm": 0.024026283994317055, "learning_rate": 8.007646521360966e-05, "loss": 0.0057, "step": 16225 }, { "epoch": 3.1408668730650153, "grad_norm": 0.022312600165605545, "learning_rate": 8.007418687391782e-05, "loss": 0.0052, "step": 16226 }, { "epoch": 3.1410603715170278, "grad_norm": 0.03879920020699501, "learning_rate": 8.007190844100634e-05, "loss": 0.0069, "step": 16227 }, { "epoch": 3.14125386996904, "grad_norm": 0.04596243426203728, "learning_rate": 8.006962991488368e-05, "loss": 0.0064, "step": 16228 }, { "epoch": 3.1414473684210527, "grad_norm": 0.0421254001557827, "learning_rate": 8.006735129555832e-05, "loss": 0.0064, "step": 16229 }, { "epoch": 3.141640866873065, "grad_norm": 0.040850620716810226, "learning_rate": 8.006507258303873e-05, "loss": 0.0067, "step": 16230 }, { "epoch": 3.1418343653250775, "grad_norm": 0.03472265973687172, "learning_rate": 8.006279377733337e-05, "loss": 0.0068, "step": 16231 }, { "epoch": 3.14202786377709, "grad_norm": 0.03736359626054764, "learning_rate": 8.006051487845071e-05, "loss": 0.0063, "step": 16232 }, { "epoch": 3.142221362229102, "grad_norm": 0.04181179031729698, "learning_rate": 8.005823588639924e-05, "loss": 0.0068, "step": 16233 }, { "epoch": 3.1424148606811144, "grad_norm": 0.05587128922343254, "learning_rate": 8.005595680118741e-05, "loss": 0.0065, "step": 16234 }, { "epoch": 3.142608359133127, "grad_norm": 0.05006376653909683, "learning_rate": 8.005367762282373e-05, "loss": 0.007, "step": 16235 }, { "epoch": 3.1428018575851393, "grad_norm": 0.04440557211637497, "learning_rate": 8.005139835131664e-05, "loss": 0.0059, "step": 16236 }, { "epoch": 3.1429953560371517, "grad_norm": 0.08467831462621689, "learning_rate": 8.004911898667462e-05, "loss": 0.0074, "step": 16237 }, { "epoch": 3.143188854489164, "grad_norm": 0.028868209570646286, "learning_rate": 8.004683952890615e-05, "loss": 0.0058, "step": 16238 }, { "epoch": 3.1433823529411766, "grad_norm": 0.06250438839197159, "learning_rate": 8.004455997801969e-05, "loss": 0.0064, "step": 16239 }, { "epoch": 3.143575851393189, "grad_norm": 0.058211442083120346, "learning_rate": 8.004228033402374e-05, "loss": 0.0056, "step": 16240 }, { "epoch": 3.143769349845201, "grad_norm": 0.03881778195500374, "learning_rate": 8.004000059692676e-05, "loss": 0.0068, "step": 16241 }, { "epoch": 3.1439628482972135, "grad_norm": 0.07964242249727249, "learning_rate": 8.003772076673725e-05, "loss": 0.006, "step": 16242 }, { "epoch": 3.144156346749226, "grad_norm": 0.040788352489471436, "learning_rate": 8.003544084346362e-05, "loss": 0.006, "step": 16243 }, { "epoch": 3.1443498452012384, "grad_norm": 0.09676963835954666, "learning_rate": 8.003316082711441e-05, "loss": 0.0074, "step": 16244 }, { "epoch": 3.144543343653251, "grad_norm": 0.045987602323293686, "learning_rate": 8.003088071769809e-05, "loss": 0.0063, "step": 16245 }, { "epoch": 3.1447368421052633, "grad_norm": 0.06438631564378738, "learning_rate": 8.00286005152231e-05, "loss": 0.006, "step": 16246 }, { "epoch": 3.1449303405572757, "grad_norm": 0.059187110513448715, "learning_rate": 8.002632021969796e-05, "loss": 0.0057, "step": 16247 }, { "epoch": 3.1451238390092877, "grad_norm": 0.03882197290658951, "learning_rate": 8.002403983113113e-05, "loss": 0.0054, "step": 16248 }, { "epoch": 3.1453173374613, "grad_norm": 0.07838273793458939, "learning_rate": 8.002175934953106e-05, "loss": 0.0061, "step": 16249 }, { "epoch": 3.1455108359133126, "grad_norm": 0.04360954090952873, "learning_rate": 8.001947877490627e-05, "loss": 0.0061, "step": 16250 }, { "epoch": 3.145704334365325, "grad_norm": 0.07367207109928131, "learning_rate": 8.001719810726523e-05, "loss": 0.0065, "step": 16251 }, { "epoch": 3.1458978328173375, "grad_norm": 0.06011268123984337, "learning_rate": 8.00149173466164e-05, "loss": 0.0059, "step": 16252 }, { "epoch": 3.14609133126935, "grad_norm": 0.04723281413316727, "learning_rate": 8.001263649296828e-05, "loss": 0.007, "step": 16253 }, { "epoch": 3.1462848297213624, "grad_norm": 0.08131274580955505, "learning_rate": 8.001035554632935e-05, "loss": 0.0062, "step": 16254 }, { "epoch": 3.146478328173375, "grad_norm": 0.04084129258990288, "learning_rate": 8.000807450670805e-05, "loss": 0.0074, "step": 16255 }, { "epoch": 3.146671826625387, "grad_norm": 0.031457580626010895, "learning_rate": 8.000579337411291e-05, "loss": 0.0062, "step": 16256 }, { "epoch": 3.1468653250773992, "grad_norm": 0.10438154637813568, "learning_rate": 8.00035121485524e-05, "loss": 0.0054, "step": 16257 }, { "epoch": 3.1470588235294117, "grad_norm": 0.04208401218056679, "learning_rate": 8.000123083003497e-05, "loss": 0.007, "step": 16258 }, { "epoch": 3.147252321981424, "grad_norm": 0.08733299374580383, "learning_rate": 7.999894941856914e-05, "loss": 0.0067, "step": 16259 }, { "epoch": 3.1474458204334366, "grad_norm": 0.03106444701552391, "learning_rate": 7.999666791416339e-05, "loss": 0.0059, "step": 16260 }, { "epoch": 3.147639318885449, "grad_norm": 0.09150075912475586, "learning_rate": 7.999438631682616e-05, "loss": 0.006, "step": 16261 }, { "epoch": 3.1478328173374615, "grad_norm": 0.04683360829949379, "learning_rate": 7.999210462656597e-05, "loss": 0.007, "step": 16262 }, { "epoch": 3.1480263157894735, "grad_norm": 0.07325445860624313, "learning_rate": 7.99898228433913e-05, "loss": 0.007, "step": 16263 }, { "epoch": 3.148219814241486, "grad_norm": 0.10224318504333496, "learning_rate": 7.998754096731063e-05, "loss": 0.0061, "step": 16264 }, { "epoch": 3.1484133126934983, "grad_norm": 0.049891531467437744, "learning_rate": 7.998525899833241e-05, "loss": 0.007, "step": 16265 }, { "epoch": 3.1486068111455108, "grad_norm": 0.12558123469352722, "learning_rate": 7.998297693646519e-05, "loss": 0.006, "step": 16266 }, { "epoch": 3.148800309597523, "grad_norm": 0.04386301338672638, "learning_rate": 7.99806947817174e-05, "loss": 0.0079, "step": 16267 }, { "epoch": 3.1489938080495357, "grad_norm": 0.09007285535335541, "learning_rate": 7.997841253409755e-05, "loss": 0.0074, "step": 16268 }, { "epoch": 3.149187306501548, "grad_norm": 0.08445748686790466, "learning_rate": 7.99761301936141e-05, "loss": 0.0059, "step": 16269 }, { "epoch": 3.1493808049535605, "grad_norm": 0.05369837209582329, "learning_rate": 7.997384776027558e-05, "loss": 0.0066, "step": 16270 }, { "epoch": 3.149574303405573, "grad_norm": 0.08193565905094147, "learning_rate": 7.997156523409042e-05, "loss": 0.0063, "step": 16271 }, { "epoch": 3.149767801857585, "grad_norm": 0.05692688003182411, "learning_rate": 7.996928261506715e-05, "loss": 0.0078, "step": 16272 }, { "epoch": 3.1499613003095974, "grad_norm": 0.08264919370412827, "learning_rate": 7.996699990321424e-05, "loss": 0.0074, "step": 16273 }, { "epoch": 3.15015479876161, "grad_norm": 0.043355390429496765, "learning_rate": 7.996471709854017e-05, "loss": 0.0057, "step": 16274 }, { "epoch": 3.1503482972136223, "grad_norm": 0.09107237309217453, "learning_rate": 7.996243420105343e-05, "loss": 0.007, "step": 16275 }, { "epoch": 3.1505417956656347, "grad_norm": 0.05121007561683655, "learning_rate": 7.996015121076251e-05, "loss": 0.0058, "step": 16276 }, { "epoch": 3.150735294117647, "grad_norm": 0.08347166329622269, "learning_rate": 7.99578681276759e-05, "loss": 0.0074, "step": 16277 }, { "epoch": 3.1509287925696596, "grad_norm": 0.06277859956026077, "learning_rate": 7.99555849518021e-05, "loss": 0.0071, "step": 16278 }, { "epoch": 3.1511222910216716, "grad_norm": 0.09403517842292786, "learning_rate": 7.995330168314957e-05, "loss": 0.0093, "step": 16279 }, { "epoch": 3.151315789473684, "grad_norm": 0.06986477971076965, "learning_rate": 7.995101832172682e-05, "loss": 0.006, "step": 16280 }, { "epoch": 3.1515092879256965, "grad_norm": 0.08141037821769714, "learning_rate": 7.994873486754233e-05, "loss": 0.0068, "step": 16281 }, { "epoch": 3.151702786377709, "grad_norm": 0.06352640688419342, "learning_rate": 7.994645132060458e-05, "loss": 0.0069, "step": 16282 }, { "epoch": 3.1518962848297214, "grad_norm": 0.08957783877849579, "learning_rate": 7.99441676809221e-05, "loss": 0.0062, "step": 16283 }, { "epoch": 3.152089783281734, "grad_norm": 0.058066289871931076, "learning_rate": 7.994188394850333e-05, "loss": 0.0071, "step": 16284 }, { "epoch": 3.1522832817337463, "grad_norm": 0.10714475810527802, "learning_rate": 7.993960012335678e-05, "loss": 0.0071, "step": 16285 }, { "epoch": 3.1524767801857587, "grad_norm": 0.07106625288724899, "learning_rate": 7.993731620549095e-05, "loss": 0.0066, "step": 16286 }, { "epoch": 3.1526702786377707, "grad_norm": 0.08494646102190018, "learning_rate": 7.993503219491432e-05, "loss": 0.0061, "step": 16287 }, { "epoch": 3.152863777089783, "grad_norm": 0.0866604819893837, "learning_rate": 7.993274809163538e-05, "loss": 0.0061, "step": 16288 }, { "epoch": 3.1530572755417956, "grad_norm": 0.057972025126218796, "learning_rate": 7.993046389566262e-05, "loss": 0.0081, "step": 16289 }, { "epoch": 3.153250773993808, "grad_norm": 0.08245621621608734, "learning_rate": 7.992817960700455e-05, "loss": 0.0079, "step": 16290 }, { "epoch": 3.1534442724458205, "grad_norm": 0.02583223767578602, "learning_rate": 7.992589522566964e-05, "loss": 0.0054, "step": 16291 }, { "epoch": 3.153637770897833, "grad_norm": 0.13747328519821167, "learning_rate": 7.99236107516664e-05, "loss": 0.0069, "step": 16292 }, { "epoch": 3.1538312693498454, "grad_norm": 0.058004580438137054, "learning_rate": 7.992132618500332e-05, "loss": 0.0056, "step": 16293 }, { "epoch": 3.1540247678018574, "grad_norm": 0.07586529105901718, "learning_rate": 7.991904152568888e-05, "loss": 0.0072, "step": 16294 }, { "epoch": 3.15421826625387, "grad_norm": 0.06921166181564331, "learning_rate": 7.991675677373161e-05, "loss": 0.0051, "step": 16295 }, { "epoch": 3.1544117647058822, "grad_norm": 0.08610116690397263, "learning_rate": 7.991447192913995e-05, "loss": 0.0072, "step": 16296 }, { "epoch": 3.1546052631578947, "grad_norm": 0.08311213552951813, "learning_rate": 7.991218699192244e-05, "loss": 0.0069, "step": 16297 }, { "epoch": 3.154798761609907, "grad_norm": 0.0468384325504303, "learning_rate": 7.990990196208753e-05, "loss": 0.0057, "step": 16298 }, { "epoch": 3.1549922600619196, "grad_norm": 0.12686185538768768, "learning_rate": 7.990761683964378e-05, "loss": 0.007, "step": 16299 }, { "epoch": 3.155185758513932, "grad_norm": 0.05569985508918762, "learning_rate": 7.990533162459964e-05, "loss": 0.0064, "step": 16300 }, { "epoch": 3.1553792569659445, "grad_norm": 0.11268814653158188, "learning_rate": 7.99030463169636e-05, "loss": 0.006, "step": 16301 }, { "epoch": 3.1555727554179565, "grad_norm": 0.07981997728347778, "learning_rate": 7.990076091674418e-05, "loss": 0.0075, "step": 16302 }, { "epoch": 3.155766253869969, "grad_norm": 0.07739520072937012, "learning_rate": 7.989847542394986e-05, "loss": 0.0076, "step": 16303 }, { "epoch": 3.1559597523219813, "grad_norm": 0.1274864375591278, "learning_rate": 7.989618983858913e-05, "loss": 0.0073, "step": 16304 }, { "epoch": 3.156153250773994, "grad_norm": 0.07018890231847763, "learning_rate": 7.989390416067052e-05, "loss": 0.0068, "step": 16305 }, { "epoch": 3.156346749226006, "grad_norm": 0.11145460605621338, "learning_rate": 7.98916183902025e-05, "loss": 0.0062, "step": 16306 }, { "epoch": 3.1565402476780187, "grad_norm": 0.059416551142930984, "learning_rate": 7.988933252719359e-05, "loss": 0.0064, "step": 16307 }, { "epoch": 3.156733746130031, "grad_norm": 0.07533179968595505, "learning_rate": 7.988704657165226e-05, "loss": 0.0063, "step": 16308 }, { "epoch": 3.1569272445820435, "grad_norm": 0.09699897468090057, "learning_rate": 7.988476052358701e-05, "loss": 0.0066, "step": 16309 }, { "epoch": 3.1571207430340555, "grad_norm": 0.04810654744505882, "learning_rate": 7.988247438300638e-05, "loss": 0.0063, "step": 16310 }, { "epoch": 3.157314241486068, "grad_norm": 0.10823924839496613, "learning_rate": 7.988018814991884e-05, "loss": 0.0071, "step": 16311 }, { "epoch": 3.1575077399380804, "grad_norm": 0.02970966510474682, "learning_rate": 7.987790182433288e-05, "loss": 0.0072, "step": 16312 }, { "epoch": 3.157701238390093, "grad_norm": 0.11319001764059067, "learning_rate": 7.9875615406257e-05, "loss": 0.0062, "step": 16313 }, { "epoch": 3.1578947368421053, "grad_norm": 0.024907557293772697, "learning_rate": 7.987332889569974e-05, "loss": 0.0052, "step": 16314 }, { "epoch": 3.1580882352941178, "grad_norm": 0.05311236530542374, "learning_rate": 7.987104229266955e-05, "loss": 0.0061, "step": 16315 }, { "epoch": 3.15828173374613, "grad_norm": 0.12876644730567932, "learning_rate": 7.986875559717496e-05, "loss": 0.0069, "step": 16316 }, { "epoch": 3.1584752321981426, "grad_norm": 0.04028033837676048, "learning_rate": 7.986646880922446e-05, "loss": 0.0076, "step": 16317 }, { "epoch": 3.1586687306501546, "grad_norm": 0.12452096492052078, "learning_rate": 7.986418192882657e-05, "loss": 0.0072, "step": 16318 }, { "epoch": 3.158862229102167, "grad_norm": 0.058001965284347534, "learning_rate": 7.986189495598976e-05, "loss": 0.0059, "step": 16319 }, { "epoch": 3.1590557275541795, "grad_norm": 0.08693357557058334, "learning_rate": 7.985960789072255e-05, "loss": 0.0078, "step": 16320 }, { "epoch": 3.159249226006192, "grad_norm": 0.10757935792207718, "learning_rate": 7.985732073303345e-05, "loss": 0.0077, "step": 16321 }, { "epoch": 3.1594427244582044, "grad_norm": 0.04787764325737953, "learning_rate": 7.985503348293097e-05, "loss": 0.0075, "step": 16322 }, { "epoch": 3.159636222910217, "grad_norm": 0.10970759391784668, "learning_rate": 7.98527461404236e-05, "loss": 0.0072, "step": 16323 }, { "epoch": 3.1598297213622293, "grad_norm": 0.032884303480386734, "learning_rate": 7.985045870551982e-05, "loss": 0.0072, "step": 16324 }, { "epoch": 3.1600232198142413, "grad_norm": 0.06563963741064072, "learning_rate": 7.984817117822817e-05, "loss": 0.0082, "step": 16325 }, { "epoch": 3.1602167182662537, "grad_norm": 0.15767715871334076, "learning_rate": 7.984588355855713e-05, "loss": 0.0048, "step": 16326 }, { "epoch": 3.160410216718266, "grad_norm": 0.1060003787279129, "learning_rate": 7.984359584651522e-05, "loss": 0.0064, "step": 16327 }, { "epoch": 3.1606037151702786, "grad_norm": 0.12505246698856354, "learning_rate": 7.984130804211097e-05, "loss": 0.0062, "step": 16328 }, { "epoch": 3.160797213622291, "grad_norm": 0.04832889139652252, "learning_rate": 7.983902014535285e-05, "loss": 0.0061, "step": 16329 }, { "epoch": 3.1609907120743035, "grad_norm": 0.13242606818675995, "learning_rate": 7.983673215624938e-05, "loss": 0.005, "step": 16330 }, { "epoch": 3.161184210526316, "grad_norm": 0.06182599440217018, "learning_rate": 7.983444407480902e-05, "loss": 0.0058, "step": 16331 }, { "epoch": 3.1613777089783284, "grad_norm": 0.1382005363702774, "learning_rate": 7.983215590104035e-05, "loss": 0.005, "step": 16332 }, { "epoch": 3.1615712074303404, "grad_norm": 0.08063853532075882, "learning_rate": 7.982986763495182e-05, "loss": 0.0059, "step": 16333 }, { "epoch": 3.161764705882353, "grad_norm": 0.09653457999229431, "learning_rate": 7.982757927655197e-05, "loss": 0.0061, "step": 16334 }, { "epoch": 3.1619582043343653, "grad_norm": 0.08874265104532242, "learning_rate": 7.982529082584931e-05, "loss": 0.0082, "step": 16335 }, { "epoch": 3.1621517027863777, "grad_norm": 0.07707765698432922, "learning_rate": 7.982300228285232e-05, "loss": 0.0061, "step": 16336 }, { "epoch": 3.16234520123839, "grad_norm": 0.10300486534833908, "learning_rate": 7.982071364756954e-05, "loss": 0.0066, "step": 16337 }, { "epoch": 3.1625386996904026, "grad_norm": 0.1179366484284401, "learning_rate": 7.981842492000944e-05, "loss": 0.0065, "step": 16338 }, { "epoch": 3.162732198142415, "grad_norm": 0.09247405081987381, "learning_rate": 7.981613610018057e-05, "loss": 0.0057, "step": 16339 }, { "epoch": 3.162925696594427, "grad_norm": 0.12079214304685593, "learning_rate": 7.981384718809142e-05, "loss": 0.0062, "step": 16340 }, { "epoch": 3.1631191950464395, "grad_norm": 0.08069507777690887, "learning_rate": 7.98115581837505e-05, "loss": 0.0057, "step": 16341 }, { "epoch": 3.163312693498452, "grad_norm": 0.14589154720306396, "learning_rate": 7.980926908716632e-05, "loss": 0.0066, "step": 16342 }, { "epoch": 3.1635061919504643, "grad_norm": 0.1006418764591217, "learning_rate": 7.980697989834739e-05, "loss": 0.0072, "step": 16343 }, { "epoch": 3.163699690402477, "grad_norm": 0.1071629449725151, "learning_rate": 7.980469061730221e-05, "loss": 0.0058, "step": 16344 }, { "epoch": 3.1638931888544892, "grad_norm": 0.13957610726356506, "learning_rate": 7.980240124403932e-05, "loss": 0.0078, "step": 16345 }, { "epoch": 3.1640866873065017, "grad_norm": 0.05554550141096115, "learning_rate": 7.980011177856718e-05, "loss": 0.0069, "step": 16346 }, { "epoch": 3.164280185758514, "grad_norm": 0.12260468304157257, "learning_rate": 7.979782222089437e-05, "loss": 0.0091, "step": 16347 }, { "epoch": 3.1644736842105265, "grad_norm": 0.04727831110358238, "learning_rate": 7.979553257102935e-05, "loss": 0.0069, "step": 16348 }, { "epoch": 3.1646671826625385, "grad_norm": 0.0685991495847702, "learning_rate": 7.979324282898066e-05, "loss": 0.0048, "step": 16349 }, { "epoch": 3.164860681114551, "grad_norm": 0.06694982945919037, "learning_rate": 7.97909529947568e-05, "loss": 0.0066, "step": 16350 }, { "epoch": 3.1650541795665634, "grad_norm": 0.03412020578980446, "learning_rate": 7.978866306836629e-05, "loss": 0.0064, "step": 16351 }, { "epoch": 3.165247678018576, "grad_norm": 0.1111559271812439, "learning_rate": 7.978637304981762e-05, "loss": 0.0062, "step": 16352 }, { "epoch": 3.1654411764705883, "grad_norm": 0.028089072555303574, "learning_rate": 7.978408293911935e-05, "loss": 0.0061, "step": 16353 }, { "epoch": 3.1656346749226008, "grad_norm": 0.11015041917562485, "learning_rate": 7.978179273627995e-05, "loss": 0.0064, "step": 16354 }, { "epoch": 3.165828173374613, "grad_norm": 0.07652374356985092, "learning_rate": 7.977950244130796e-05, "loss": 0.0078, "step": 16355 }, { "epoch": 3.166021671826625, "grad_norm": 0.14719855785369873, "learning_rate": 7.977721205421188e-05, "loss": 0.0077, "step": 16356 }, { "epoch": 3.1662151702786376, "grad_norm": 0.11511743068695068, "learning_rate": 7.977492157500023e-05, "loss": 0.007, "step": 16357 }, { "epoch": 3.16640866873065, "grad_norm": 0.11901960521936417, "learning_rate": 7.977263100368152e-05, "loss": 0.0064, "step": 16358 }, { "epoch": 3.1666021671826625, "grad_norm": 0.15379215776920319, "learning_rate": 7.977034034026429e-05, "loss": 0.0063, "step": 16359 }, { "epoch": 3.166795665634675, "grad_norm": 0.11136686056852341, "learning_rate": 7.976804958475701e-05, "loss": 0.0058, "step": 16360 }, { "epoch": 3.1669891640866874, "grad_norm": 0.12824435532093048, "learning_rate": 7.976575873716825e-05, "loss": 0.0066, "step": 16361 }, { "epoch": 3.1671826625387, "grad_norm": 0.06626693904399872, "learning_rate": 7.97634677975065e-05, "loss": 0.0055, "step": 16362 }, { "epoch": 3.1673761609907123, "grad_norm": 0.11635201424360275, "learning_rate": 7.976117676578026e-05, "loss": 0.0065, "step": 16363 }, { "epoch": 3.1675696594427243, "grad_norm": 0.07418442517518997, "learning_rate": 7.975888564199808e-05, "loss": 0.007, "step": 16364 }, { "epoch": 3.1677631578947367, "grad_norm": 0.1143588051199913, "learning_rate": 7.975659442616846e-05, "loss": 0.0074, "step": 16365 }, { "epoch": 3.167956656346749, "grad_norm": 0.06795729696750641, "learning_rate": 7.975430311829992e-05, "loss": 0.006, "step": 16366 }, { "epoch": 3.1681501547987616, "grad_norm": 0.06091190502047539, "learning_rate": 7.9752011718401e-05, "loss": 0.0066, "step": 16367 }, { "epoch": 3.168343653250774, "grad_norm": 0.16535800695419312, "learning_rate": 7.974972022648018e-05, "loss": 0.0061, "step": 16368 }, { "epoch": 3.1685371517027865, "grad_norm": 0.0997958555817604, "learning_rate": 7.9747428642546e-05, "loss": 0.0057, "step": 16369 }, { "epoch": 3.168730650154799, "grad_norm": 0.16409599781036377, "learning_rate": 7.974513696660698e-05, "loss": 0.0071, "step": 16370 }, { "epoch": 3.168924148606811, "grad_norm": 0.1706070452928543, "learning_rate": 7.974284519867164e-05, "loss": 0.0077, "step": 16371 }, { "epoch": 3.1691176470588234, "grad_norm": 0.11399497836828232, "learning_rate": 7.97405533387485e-05, "loss": 0.0057, "step": 16372 }, { "epoch": 3.169311145510836, "grad_norm": 0.19114243984222412, "learning_rate": 7.973826138684607e-05, "loss": 0.0058, "step": 16373 }, { "epoch": 3.1695046439628483, "grad_norm": 0.1052965298295021, "learning_rate": 7.973596934297287e-05, "loss": 0.007, "step": 16374 }, { "epoch": 3.1696981424148607, "grad_norm": 0.1694655567407608, "learning_rate": 7.973367720713744e-05, "loss": 0.0064, "step": 16375 }, { "epoch": 3.169891640866873, "grad_norm": 0.13700716197490692, "learning_rate": 7.973138497934831e-05, "loss": 0.0068, "step": 16376 }, { "epoch": 3.1700851393188856, "grad_norm": 0.13193447887897491, "learning_rate": 7.972909265961399e-05, "loss": 0.0054, "step": 16377 }, { "epoch": 3.170278637770898, "grad_norm": 0.17045558989048004, "learning_rate": 7.972680024794297e-05, "loss": 0.0076, "step": 16378 }, { "epoch": 3.17047213622291, "grad_norm": 0.07338204979896545, "learning_rate": 7.972450774434381e-05, "loss": 0.0065, "step": 16379 }, { "epoch": 3.1706656346749225, "grad_norm": 0.14661376178264618, "learning_rate": 7.972221514882503e-05, "loss": 0.0069, "step": 16380 }, { "epoch": 3.170859133126935, "grad_norm": 0.10663501173257828, "learning_rate": 7.971992246139515e-05, "loss": 0.0072, "step": 16381 }, { "epoch": 3.1710526315789473, "grad_norm": 0.07496327906847, "learning_rate": 7.971762968206266e-05, "loss": 0.0062, "step": 16382 }, { "epoch": 3.17124613003096, "grad_norm": 0.12153802812099457, "learning_rate": 7.971533681083613e-05, "loss": 0.007, "step": 16383 }, { "epoch": 3.1714396284829722, "grad_norm": 0.02465495839715004, "learning_rate": 7.971304384772408e-05, "loss": 0.0062, "step": 16384 }, { "epoch": 3.1716331269349847, "grad_norm": 0.09357800334692001, "learning_rate": 7.9710750792735e-05, "loss": 0.0065, "step": 16385 }, { "epoch": 3.171826625386997, "grad_norm": 0.07096239924430847, "learning_rate": 7.970845764587746e-05, "loss": 0.0078, "step": 16386 }, { "epoch": 3.172020123839009, "grad_norm": 0.09680011123418808, "learning_rate": 7.970616440715995e-05, "loss": 0.0068, "step": 16387 }, { "epoch": 3.1722136222910216, "grad_norm": 0.0652068555355072, "learning_rate": 7.9703871076591e-05, "loss": 0.0074, "step": 16388 }, { "epoch": 3.172407120743034, "grad_norm": 0.08734533190727234, "learning_rate": 7.970157765417916e-05, "loss": 0.0056, "step": 16389 }, { "epoch": 3.1726006191950464, "grad_norm": 0.08174718916416168, "learning_rate": 7.969928413993293e-05, "loss": 0.0062, "step": 16390 }, { "epoch": 3.172794117647059, "grad_norm": 0.10605060309171677, "learning_rate": 7.969699053386086e-05, "loss": 0.0065, "step": 16391 }, { "epoch": 3.1729876160990713, "grad_norm": 0.0640324279665947, "learning_rate": 7.969469683597145e-05, "loss": 0.0069, "step": 16392 }, { "epoch": 3.1731811145510838, "grad_norm": 0.06451188027858734, "learning_rate": 7.969240304627326e-05, "loss": 0.0068, "step": 16393 }, { "epoch": 3.173374613003096, "grad_norm": 0.07904110103845596, "learning_rate": 7.969010916477479e-05, "loss": 0.0065, "step": 16394 }, { "epoch": 3.173568111455108, "grad_norm": 0.05942939966917038, "learning_rate": 7.968781519148457e-05, "loss": 0.0067, "step": 16395 }, { "epoch": 3.1737616099071206, "grad_norm": 0.05767171084880829, "learning_rate": 7.968552112641114e-05, "loss": 0.0082, "step": 16396 }, { "epoch": 3.173955108359133, "grad_norm": 0.028399480506777763, "learning_rate": 7.968322696956305e-05, "loss": 0.0086, "step": 16397 }, { "epoch": 3.1741486068111455, "grad_norm": 0.07371895015239716, "learning_rate": 7.968093272094877e-05, "loss": 0.0061, "step": 16398 }, { "epoch": 3.174342105263158, "grad_norm": 0.03885661065578461, "learning_rate": 7.967863838057688e-05, "loss": 0.0078, "step": 16399 }, { "epoch": 3.1745356037151704, "grad_norm": 0.08463708311319351, "learning_rate": 7.967634394845589e-05, "loss": 0.0061, "step": 16400 }, { "epoch": 3.174729102167183, "grad_norm": 0.03206329420208931, "learning_rate": 7.967404942459434e-05, "loss": 0.0077, "step": 16401 }, { "epoch": 3.174922600619195, "grad_norm": 0.10717903822660446, "learning_rate": 7.967175480900075e-05, "loss": 0.0069, "step": 16402 }, { "epoch": 3.1751160990712073, "grad_norm": 0.04014679789543152, "learning_rate": 7.966946010168364e-05, "loss": 0.0068, "step": 16403 }, { "epoch": 3.1753095975232197, "grad_norm": 0.0936444029211998, "learning_rate": 7.966716530265158e-05, "loss": 0.0071, "step": 16404 }, { "epoch": 3.175503095975232, "grad_norm": 0.05325867980718613, "learning_rate": 7.966487041191307e-05, "loss": 0.0066, "step": 16405 }, { "epoch": 3.1756965944272446, "grad_norm": 0.07546846568584442, "learning_rate": 7.966257542947663e-05, "loss": 0.0057, "step": 16406 }, { "epoch": 3.175890092879257, "grad_norm": 0.0689413845539093, "learning_rate": 7.966028035535084e-05, "loss": 0.0067, "step": 16407 }, { "epoch": 3.1760835913312695, "grad_norm": 0.059102196246385574, "learning_rate": 7.965798518954419e-05, "loss": 0.0054, "step": 16408 }, { "epoch": 3.176277089783282, "grad_norm": 0.07860399037599564, "learning_rate": 7.965568993206524e-05, "loss": 0.006, "step": 16409 }, { "epoch": 3.176470588235294, "grad_norm": 0.049718573689460754, "learning_rate": 7.965339458292248e-05, "loss": 0.0062, "step": 16410 }, { "epoch": 3.1766640866873064, "grad_norm": 0.06840797513723373, "learning_rate": 7.96510991421245e-05, "loss": 0.0073, "step": 16411 }, { "epoch": 3.176857585139319, "grad_norm": 0.053701579570770264, "learning_rate": 7.96488036096798e-05, "loss": 0.0067, "step": 16412 }, { "epoch": 3.1770510835913313, "grad_norm": 0.051216669380664825, "learning_rate": 7.964650798559692e-05, "loss": 0.0069, "step": 16413 }, { "epoch": 3.1772445820433437, "grad_norm": 0.04670063406229019, "learning_rate": 7.96442122698844e-05, "loss": 0.0071, "step": 16414 }, { "epoch": 3.177438080495356, "grad_norm": 0.03481579199433327, "learning_rate": 7.964191646255076e-05, "loss": 0.0061, "step": 16415 }, { "epoch": 3.1776315789473686, "grad_norm": 0.044985368847846985, "learning_rate": 7.963962056360456e-05, "loss": 0.0057, "step": 16416 }, { "epoch": 3.1778250773993806, "grad_norm": 0.03450462967157364, "learning_rate": 7.963732457305432e-05, "loss": 0.006, "step": 16417 }, { "epoch": 3.178018575851393, "grad_norm": 0.03952305391430855, "learning_rate": 7.963502849090857e-05, "loss": 0.0071, "step": 16418 }, { "epoch": 3.1782120743034055, "grad_norm": 0.059619080275297165, "learning_rate": 7.963273231717587e-05, "loss": 0.006, "step": 16419 }, { "epoch": 3.178405572755418, "grad_norm": 0.03990284353494644, "learning_rate": 7.963043605186472e-05, "loss": 0.0069, "step": 16420 }, { "epoch": 3.1785990712074303, "grad_norm": 0.06843957304954529, "learning_rate": 7.96281396949837e-05, "loss": 0.0055, "step": 16421 }, { "epoch": 3.178792569659443, "grad_norm": 0.04341853782534599, "learning_rate": 7.962584324654132e-05, "loss": 0.0077, "step": 16422 }, { "epoch": 3.1789860681114552, "grad_norm": 0.07264012843370438, "learning_rate": 7.96235467065461e-05, "loss": 0.0061, "step": 16423 }, { "epoch": 3.1791795665634677, "grad_norm": 0.054525814950466156, "learning_rate": 7.962125007500663e-05, "loss": 0.0066, "step": 16424 }, { "epoch": 3.1793730650154797, "grad_norm": 0.05814708024263382, "learning_rate": 7.961895335193141e-05, "loss": 0.0049, "step": 16425 }, { "epoch": 3.179566563467492, "grad_norm": 0.05683733895421028, "learning_rate": 7.961665653732898e-05, "loss": 0.0055, "step": 16426 }, { "epoch": 3.1797600619195046, "grad_norm": 0.052076246589422226, "learning_rate": 7.96143596312079e-05, "loss": 0.0078, "step": 16427 }, { "epoch": 3.179953560371517, "grad_norm": 0.06476011127233505, "learning_rate": 7.961206263357668e-05, "loss": 0.0058, "step": 16428 }, { "epoch": 3.1801470588235294, "grad_norm": 0.040748756378889084, "learning_rate": 7.960976554444388e-05, "loss": 0.0073, "step": 16429 }, { "epoch": 3.180340557275542, "grad_norm": 0.04601696506142616, "learning_rate": 7.960746836381804e-05, "loss": 0.0057, "step": 16430 }, { "epoch": 3.1805340557275543, "grad_norm": 0.0634169951081276, "learning_rate": 7.960517109170769e-05, "loss": 0.006, "step": 16431 }, { "epoch": 3.1807275541795668, "grad_norm": 0.04487793892621994, "learning_rate": 7.960287372812138e-05, "loss": 0.0066, "step": 16432 }, { "epoch": 3.1809210526315788, "grad_norm": 0.06653758138418198, "learning_rate": 7.960057627306766e-05, "loss": 0.0073, "step": 16433 }, { "epoch": 3.181114551083591, "grad_norm": 0.0423586405813694, "learning_rate": 7.959827872655503e-05, "loss": 0.0071, "step": 16434 }, { "epoch": 3.1813080495356036, "grad_norm": 0.0856170803308487, "learning_rate": 7.959598108859208e-05, "loss": 0.0068, "step": 16435 }, { "epoch": 3.181501547987616, "grad_norm": 0.04858475178480148, "learning_rate": 7.959368335918735e-05, "loss": 0.0057, "step": 16436 }, { "epoch": 3.1816950464396285, "grad_norm": 0.0742805004119873, "learning_rate": 7.959138553834934e-05, "loss": 0.0064, "step": 16437 }, { "epoch": 3.181888544891641, "grad_norm": 0.08263286203145981, "learning_rate": 7.958908762608662e-05, "loss": 0.0054, "step": 16438 }, { "epoch": 3.1820820433436534, "grad_norm": 0.06875953823328018, "learning_rate": 7.958678962240774e-05, "loss": 0.0067, "step": 16439 }, { "epoch": 3.182275541795666, "grad_norm": 0.10712398588657379, "learning_rate": 7.958449152732123e-05, "loss": 0.0071, "step": 16440 }, { "epoch": 3.182469040247678, "grad_norm": 0.04765639454126358, "learning_rate": 7.958219334083564e-05, "loss": 0.005, "step": 16441 }, { "epoch": 3.1826625386996903, "grad_norm": 0.09376215189695358, "learning_rate": 7.95798950629595e-05, "loss": 0.0076, "step": 16442 }, { "epoch": 3.1828560371517027, "grad_norm": 0.055197156965732574, "learning_rate": 7.95775966937014e-05, "loss": 0.0047, "step": 16443 }, { "epoch": 3.183049535603715, "grad_norm": 0.07093676924705505, "learning_rate": 7.957529823306981e-05, "loss": 0.008, "step": 16444 }, { "epoch": 3.1832430340557276, "grad_norm": 0.07027984410524368, "learning_rate": 7.957299968107334e-05, "loss": 0.0051, "step": 16445 }, { "epoch": 3.18343653250774, "grad_norm": 0.03752856329083443, "learning_rate": 7.957070103772051e-05, "loss": 0.006, "step": 16446 }, { "epoch": 3.1836300309597525, "grad_norm": 0.06777703762054443, "learning_rate": 7.956840230301988e-05, "loss": 0.0061, "step": 16447 }, { "epoch": 3.1838235294117645, "grad_norm": 0.04440286010503769, "learning_rate": 7.956610347697997e-05, "loss": 0.0065, "step": 16448 }, { "epoch": 3.184017027863777, "grad_norm": 0.041462935507297516, "learning_rate": 7.956380455960935e-05, "loss": 0.0069, "step": 16449 }, { "epoch": 3.1842105263157894, "grad_norm": 0.06078039109706879, "learning_rate": 7.956150555091654e-05, "loss": 0.0077, "step": 16450 }, { "epoch": 3.184404024767802, "grad_norm": 0.03829367831349373, "learning_rate": 7.955920645091011e-05, "loss": 0.0055, "step": 16451 }, { "epoch": 3.1845975232198143, "grad_norm": 0.05797455459833145, "learning_rate": 7.95569072595986e-05, "loss": 0.0062, "step": 16452 }, { "epoch": 3.1847910216718267, "grad_norm": 0.06073402240872383, "learning_rate": 7.955460797699056e-05, "loss": 0.0072, "step": 16453 }, { "epoch": 3.184984520123839, "grad_norm": 0.03697784245014191, "learning_rate": 7.955230860309456e-05, "loss": 0.0066, "step": 16454 }, { "epoch": 3.1851780185758516, "grad_norm": 0.06757549196481705, "learning_rate": 7.95500091379191e-05, "loss": 0.0079, "step": 16455 }, { "epoch": 3.1853715170278636, "grad_norm": 0.04055880755186081, "learning_rate": 7.954770958147277e-05, "loss": 0.0055, "step": 16456 }, { "epoch": 3.185565015479876, "grad_norm": 0.06618571281433105, "learning_rate": 7.95454099337641e-05, "loss": 0.0062, "step": 16457 }, { "epoch": 3.1857585139318885, "grad_norm": 0.05474083870649338, "learning_rate": 7.954311019480164e-05, "loss": 0.0053, "step": 16458 }, { "epoch": 3.185952012383901, "grad_norm": 0.04976597800850868, "learning_rate": 7.954081036459394e-05, "loss": 0.0061, "step": 16459 }, { "epoch": 3.1861455108359134, "grad_norm": 0.05396519973874092, "learning_rate": 7.953851044314956e-05, "loss": 0.0059, "step": 16460 }, { "epoch": 3.186339009287926, "grad_norm": 0.03998073562979698, "learning_rate": 7.953621043047706e-05, "loss": 0.0063, "step": 16461 }, { "epoch": 3.1865325077399382, "grad_norm": 0.04107435420155525, "learning_rate": 7.953391032658495e-05, "loss": 0.0081, "step": 16462 }, { "epoch": 3.1867260061919502, "grad_norm": 0.039460111409425735, "learning_rate": 7.953161013148182e-05, "loss": 0.0056, "step": 16463 }, { "epoch": 3.1869195046439627, "grad_norm": 0.04351571202278137, "learning_rate": 7.95293098451762e-05, "loss": 0.0078, "step": 16464 }, { "epoch": 3.187113003095975, "grad_norm": 0.06234404444694519, "learning_rate": 7.952700946767666e-05, "loss": 0.0055, "step": 16465 }, { "epoch": 3.1873065015479876, "grad_norm": 0.03840992599725723, "learning_rate": 7.952470899899172e-05, "loss": 0.0069, "step": 16466 }, { "epoch": 3.1875, "grad_norm": 0.05158190429210663, "learning_rate": 7.952240843913e-05, "loss": 0.0068, "step": 16467 }, { "epoch": 3.1876934984520124, "grad_norm": 0.03713067248463631, "learning_rate": 7.952010778809998e-05, "loss": 0.0052, "step": 16468 }, { "epoch": 3.187886996904025, "grad_norm": 0.055712271481752396, "learning_rate": 7.951780704591023e-05, "loss": 0.0063, "step": 16469 }, { "epoch": 3.1880804953560373, "grad_norm": 0.04389777407050133, "learning_rate": 7.951550621256934e-05, "loss": 0.0066, "step": 16470 }, { "epoch": 3.1882739938080498, "grad_norm": 0.03812869265675545, "learning_rate": 7.951320528808583e-05, "loss": 0.007, "step": 16471 }, { "epoch": 3.1884674922600618, "grad_norm": 0.039351072162389755, "learning_rate": 7.951090427246824e-05, "loss": 0.0063, "step": 16472 }, { "epoch": 3.188660990712074, "grad_norm": 0.03454527631402016, "learning_rate": 7.950860316572517e-05, "loss": 0.0072, "step": 16473 }, { "epoch": 3.1888544891640866, "grad_norm": 0.035409342497587204, "learning_rate": 7.950630196786515e-05, "loss": 0.0057, "step": 16474 }, { "epoch": 3.189047987616099, "grad_norm": 0.0326288677752018, "learning_rate": 7.950400067889673e-05, "loss": 0.0057, "step": 16475 }, { "epoch": 3.1892414860681115, "grad_norm": 0.04588819295167923, "learning_rate": 7.950169929882849e-05, "loss": 0.0084, "step": 16476 }, { "epoch": 3.189434984520124, "grad_norm": 0.024478621780872345, "learning_rate": 7.949939782766895e-05, "loss": 0.0059, "step": 16477 }, { "epoch": 3.1896284829721364, "grad_norm": 0.046431783586740494, "learning_rate": 7.94970962654267e-05, "loss": 0.0067, "step": 16478 }, { "epoch": 3.1898219814241484, "grad_norm": 0.036468248814344406, "learning_rate": 7.949479461211029e-05, "loss": 0.007, "step": 16479 }, { "epoch": 3.190015479876161, "grad_norm": 0.052132748067379, "learning_rate": 7.949249286772826e-05, "loss": 0.008, "step": 16480 }, { "epoch": 3.1902089783281733, "grad_norm": 0.029213981702923775, "learning_rate": 7.949019103228916e-05, "loss": 0.0054, "step": 16481 }, { "epoch": 3.1904024767801857, "grad_norm": 0.05621185526251793, "learning_rate": 7.948788910580158e-05, "loss": 0.0067, "step": 16482 }, { "epoch": 3.190595975232198, "grad_norm": 0.05402431637048721, "learning_rate": 7.948558708827406e-05, "loss": 0.0061, "step": 16483 }, { "epoch": 3.1907894736842106, "grad_norm": 0.05281243100762367, "learning_rate": 7.948328497971516e-05, "loss": 0.0071, "step": 16484 }, { "epoch": 3.190982972136223, "grad_norm": 0.06352891027927399, "learning_rate": 7.948098278013344e-05, "loss": 0.0065, "step": 16485 }, { "epoch": 3.1911764705882355, "grad_norm": 0.021291200071573257, "learning_rate": 7.947868048953745e-05, "loss": 0.0058, "step": 16486 }, { "epoch": 3.1913699690402475, "grad_norm": 0.0609310045838356, "learning_rate": 7.947637810793577e-05, "loss": 0.0051, "step": 16487 }, { "epoch": 3.19156346749226, "grad_norm": 0.0367610827088356, "learning_rate": 7.947407563533695e-05, "loss": 0.0056, "step": 16488 }, { "epoch": 3.1917569659442724, "grad_norm": 0.03569726273417473, "learning_rate": 7.947177307174955e-05, "loss": 0.0074, "step": 16489 }, { "epoch": 3.191950464396285, "grad_norm": 0.07673929631710052, "learning_rate": 7.94694704171821e-05, "loss": 0.0076, "step": 16490 }, { "epoch": 3.1921439628482973, "grad_norm": 0.02465817704796791, "learning_rate": 7.94671676716432e-05, "loss": 0.0063, "step": 16491 }, { "epoch": 3.1923374613003097, "grad_norm": 0.06587585061788559, "learning_rate": 7.946486483514142e-05, "loss": 0.0051, "step": 16492 }, { "epoch": 3.192530959752322, "grad_norm": 0.0319598987698555, "learning_rate": 7.946256190768529e-05, "loss": 0.0062, "step": 16493 }, { "epoch": 3.192724458204334, "grad_norm": 0.06648840010166168, "learning_rate": 7.946025888928337e-05, "loss": 0.0075, "step": 16494 }, { "epoch": 3.1929179566563466, "grad_norm": 0.028613733127713203, "learning_rate": 7.945795577994425e-05, "loss": 0.0057, "step": 16495 }, { "epoch": 3.193111455108359, "grad_norm": 0.04178095608949661, "learning_rate": 7.945565257967647e-05, "loss": 0.0061, "step": 16496 }, { "epoch": 3.1933049535603715, "grad_norm": 0.038580019026994705, "learning_rate": 7.94533492884886e-05, "loss": 0.0068, "step": 16497 }, { "epoch": 3.193498452012384, "grad_norm": 0.051278699189424515, "learning_rate": 7.945104590638921e-05, "loss": 0.0049, "step": 16498 }, { "epoch": 3.1936919504643964, "grad_norm": 0.05836860090494156, "learning_rate": 7.944874243338685e-05, "loss": 0.0064, "step": 16499 }, { "epoch": 3.193885448916409, "grad_norm": 0.04700230807065964, "learning_rate": 7.944643886949009e-05, "loss": 0.0065, "step": 16500 }, { "epoch": 3.1940789473684212, "grad_norm": 0.061634186655282974, "learning_rate": 7.94441352147075e-05, "loss": 0.0057, "step": 16501 }, { "epoch": 3.1942724458204332, "grad_norm": 0.040700364857912064, "learning_rate": 7.944183146904763e-05, "loss": 0.0057, "step": 16502 }, { "epoch": 3.1944659442724457, "grad_norm": 0.03701329976320267, "learning_rate": 7.943952763251904e-05, "loss": 0.0068, "step": 16503 }, { "epoch": 3.194659442724458, "grad_norm": 0.06304091215133667, "learning_rate": 7.943722370513033e-05, "loss": 0.0066, "step": 16504 }, { "epoch": 3.1948529411764706, "grad_norm": 0.02475055307149887, "learning_rate": 7.943491968689004e-05, "loss": 0.0061, "step": 16505 }, { "epoch": 3.195046439628483, "grad_norm": 0.06360278278589249, "learning_rate": 7.943261557780673e-05, "loss": 0.0056, "step": 16506 }, { "epoch": 3.1952399380804954, "grad_norm": 0.06742178648710251, "learning_rate": 7.943031137788898e-05, "loss": 0.0068, "step": 16507 }, { "epoch": 3.195433436532508, "grad_norm": 0.07623162865638733, "learning_rate": 7.942800708714535e-05, "loss": 0.0066, "step": 16508 }, { "epoch": 3.1956269349845203, "grad_norm": 0.058591630309820175, "learning_rate": 7.942570270558443e-05, "loss": 0.0073, "step": 16509 }, { "epoch": 3.1958204334365323, "grad_norm": 0.10484442114830017, "learning_rate": 7.942339823321474e-05, "loss": 0.0073, "step": 16510 }, { "epoch": 3.1960139318885448, "grad_norm": 0.04458809271454811, "learning_rate": 7.942109367004488e-05, "loss": 0.0058, "step": 16511 }, { "epoch": 3.196207430340557, "grad_norm": 0.08763367682695389, "learning_rate": 7.941878901608342e-05, "loss": 0.0061, "step": 16512 }, { "epoch": 3.1964009287925697, "grad_norm": 0.0433095321059227, "learning_rate": 7.941648427133892e-05, "loss": 0.0065, "step": 16513 }, { "epoch": 3.196594427244582, "grad_norm": 0.08696284145116806, "learning_rate": 7.941417943581993e-05, "loss": 0.0065, "step": 16514 }, { "epoch": 3.1967879256965945, "grad_norm": 0.057916104793548584, "learning_rate": 7.941187450953505e-05, "loss": 0.0058, "step": 16515 }, { "epoch": 3.196981424148607, "grad_norm": 0.08181541413068771, "learning_rate": 7.940956949249283e-05, "loss": 0.0077, "step": 16516 }, { "epoch": 3.1971749226006194, "grad_norm": 0.06767499446868896, "learning_rate": 7.940726438470187e-05, "loss": 0.0064, "step": 16517 }, { "epoch": 3.1973684210526314, "grad_norm": 0.07697709649801254, "learning_rate": 7.940495918617068e-05, "loss": 0.0059, "step": 16518 }, { "epoch": 3.197561919504644, "grad_norm": 0.047008827328681946, "learning_rate": 7.940265389690788e-05, "loss": 0.0082, "step": 16519 }, { "epoch": 3.1977554179566563, "grad_norm": 0.0625741109251976, "learning_rate": 7.940034851692202e-05, "loss": 0.0078, "step": 16520 }, { "epoch": 3.1979489164086687, "grad_norm": 0.05583756044507027, "learning_rate": 7.939804304622169e-05, "loss": 0.0058, "step": 16521 }, { "epoch": 3.198142414860681, "grad_norm": 0.0478871650993824, "learning_rate": 7.939573748481545e-05, "loss": 0.0075, "step": 16522 }, { "epoch": 3.1983359133126936, "grad_norm": 0.041558217257261276, "learning_rate": 7.939343183271185e-05, "loss": 0.0071, "step": 16523 }, { "epoch": 3.198529411764706, "grad_norm": 0.04225604981184006, "learning_rate": 7.939112608991948e-05, "loss": 0.005, "step": 16524 }, { "epoch": 3.198722910216718, "grad_norm": 0.04623733460903168, "learning_rate": 7.938882025644693e-05, "loss": 0.0073, "step": 16525 }, { "epoch": 3.1989164086687305, "grad_norm": 0.02453889511525631, "learning_rate": 7.938651433230274e-05, "loss": 0.0064, "step": 16526 }, { "epoch": 3.199109907120743, "grad_norm": 0.06569749116897583, "learning_rate": 7.938420831749552e-05, "loss": 0.0067, "step": 16527 }, { "epoch": 3.1993034055727554, "grad_norm": 0.034016385674476624, "learning_rate": 7.938190221203379e-05, "loss": 0.0072, "step": 16528 }, { "epoch": 3.199496904024768, "grad_norm": 0.06521666049957275, "learning_rate": 7.937959601592619e-05, "loss": 0.0057, "step": 16529 }, { "epoch": 3.1996904024767803, "grad_norm": 0.03937181085348129, "learning_rate": 7.937728972918125e-05, "loss": 0.0066, "step": 16530 }, { "epoch": 3.1998839009287927, "grad_norm": 0.0731675773859024, "learning_rate": 7.937498335180754e-05, "loss": 0.0072, "step": 16531 }, { "epoch": 3.200077399380805, "grad_norm": 0.05459301918745041, "learning_rate": 7.937267688381366e-05, "loss": 0.0059, "step": 16532 }, { "epoch": 3.200270897832817, "grad_norm": 0.08070601522922516, "learning_rate": 7.937037032520817e-05, "loss": 0.0069, "step": 16533 }, { "epoch": 3.2004643962848296, "grad_norm": 0.05375617370009422, "learning_rate": 7.936806367599964e-05, "loss": 0.0076, "step": 16534 }, { "epoch": 3.200657894736842, "grad_norm": 0.039880841970443726, "learning_rate": 7.936575693619666e-05, "loss": 0.0065, "step": 16535 }, { "epoch": 3.2008513931888545, "grad_norm": 0.09315626323223114, "learning_rate": 7.93634501058078e-05, "loss": 0.0069, "step": 16536 }, { "epoch": 3.201044891640867, "grad_norm": 0.026698503643274307, "learning_rate": 7.936114318484164e-05, "loss": 0.0068, "step": 16537 }, { "epoch": 3.2012383900928794, "grad_norm": 0.09889759868383408, "learning_rate": 7.935883617330674e-05, "loss": 0.0074, "step": 16538 }, { "epoch": 3.201431888544892, "grad_norm": 0.03548148274421692, "learning_rate": 7.93565290712117e-05, "loss": 0.0059, "step": 16539 }, { "epoch": 3.201625386996904, "grad_norm": 0.08018023520708084, "learning_rate": 7.93542218785651e-05, "loss": 0.0073, "step": 16540 }, { "epoch": 3.2018188854489162, "grad_norm": 0.06475262343883514, "learning_rate": 7.935191459537547e-05, "loss": 0.0073, "step": 16541 }, { "epoch": 3.2020123839009287, "grad_norm": 0.07542023062705994, "learning_rate": 7.934960722165146e-05, "loss": 0.0076, "step": 16542 }, { "epoch": 3.202205882352941, "grad_norm": 0.08897372335195541, "learning_rate": 7.934729975740158e-05, "loss": 0.0074, "step": 16543 }, { "epoch": 3.2023993808049536, "grad_norm": 0.05901322141289711, "learning_rate": 7.934499220263444e-05, "loss": 0.0067, "step": 16544 }, { "epoch": 3.202592879256966, "grad_norm": 0.0809699222445488, "learning_rate": 7.934268455735863e-05, "loss": 0.0061, "step": 16545 }, { "epoch": 3.2027863777089784, "grad_norm": 0.042214278131723404, "learning_rate": 7.934037682158274e-05, "loss": 0.0062, "step": 16546 }, { "epoch": 3.202979876160991, "grad_norm": 0.08761204034090042, "learning_rate": 7.933806899531528e-05, "loss": 0.0069, "step": 16547 }, { "epoch": 3.203173374613003, "grad_norm": 0.031877949833869934, "learning_rate": 7.93357610785649e-05, "loss": 0.0063, "step": 16548 }, { "epoch": 3.2033668730650153, "grad_norm": 0.10153955221176147, "learning_rate": 7.933345307134016e-05, "loss": 0.0057, "step": 16549 }, { "epoch": 3.2035603715170278, "grad_norm": 0.057925790548324585, "learning_rate": 7.933114497364962e-05, "loss": 0.0073, "step": 16550 }, { "epoch": 3.20375386996904, "grad_norm": 0.0984954684972763, "learning_rate": 7.93288367855019e-05, "loss": 0.0077, "step": 16551 }, { "epoch": 3.2039473684210527, "grad_norm": 0.05162985250353813, "learning_rate": 7.932652850690554e-05, "loss": 0.0071, "step": 16552 }, { "epoch": 3.204140866873065, "grad_norm": 0.11045019328594208, "learning_rate": 7.932422013786916e-05, "loss": 0.0063, "step": 16553 }, { "epoch": 3.2043343653250775, "grad_norm": 0.050319235771894455, "learning_rate": 7.932191167840132e-05, "loss": 0.0072, "step": 16554 }, { "epoch": 3.20452786377709, "grad_norm": 0.08537616580724716, "learning_rate": 7.931960312851059e-05, "loss": 0.0055, "step": 16555 }, { "epoch": 3.204721362229102, "grad_norm": 0.06723754853010178, "learning_rate": 7.931729448820558e-05, "loss": 0.0066, "step": 16556 }, { "epoch": 3.2049148606811144, "grad_norm": 0.048606690019369125, "learning_rate": 7.931498575749486e-05, "loss": 0.0069, "step": 16557 }, { "epoch": 3.205108359133127, "grad_norm": 0.06385578960180283, "learning_rate": 7.9312676936387e-05, "loss": 0.0056, "step": 16558 }, { "epoch": 3.2053018575851393, "grad_norm": 0.03151211515069008, "learning_rate": 7.931036802489062e-05, "loss": 0.0074, "step": 16559 }, { "epoch": 3.2054953560371517, "grad_norm": 0.07816916704177856, "learning_rate": 7.930805902301427e-05, "loss": 0.0059, "step": 16560 }, { "epoch": 3.205688854489164, "grad_norm": 0.03522820770740509, "learning_rate": 7.930574993076655e-05, "loss": 0.0074, "step": 16561 }, { "epoch": 3.2058823529411766, "grad_norm": 0.08970940858125687, "learning_rate": 7.930344074815603e-05, "loss": 0.0066, "step": 16562 }, { "epoch": 3.206075851393189, "grad_norm": 0.047608524560928345, "learning_rate": 7.930113147519132e-05, "loss": 0.0061, "step": 16563 }, { "epoch": 3.206269349845201, "grad_norm": 0.0528254434466362, "learning_rate": 7.929882211188098e-05, "loss": 0.0081, "step": 16564 }, { "epoch": 3.2064628482972135, "grad_norm": 0.052509646862745285, "learning_rate": 7.929651265823362e-05, "loss": 0.0065, "step": 16565 }, { "epoch": 3.206656346749226, "grad_norm": 0.051064323633909225, "learning_rate": 7.929420311425781e-05, "loss": 0.0055, "step": 16566 }, { "epoch": 3.2068498452012384, "grad_norm": 0.039634767919778824, "learning_rate": 7.929189347996215e-05, "loss": 0.0079, "step": 16567 }, { "epoch": 3.207043343653251, "grad_norm": 0.04008277505636215, "learning_rate": 7.928958375535519e-05, "loss": 0.0068, "step": 16568 }, { "epoch": 3.2072368421052633, "grad_norm": 0.043396126478910446, "learning_rate": 7.928727394044555e-05, "loss": 0.0067, "step": 16569 }, { "epoch": 3.2074303405572757, "grad_norm": 0.04034954681992531, "learning_rate": 7.928496403524182e-05, "loss": 0.0069, "step": 16570 }, { "epoch": 3.2076238390092877, "grad_norm": 0.04831544682383537, "learning_rate": 7.928265403975256e-05, "loss": 0.0066, "step": 16571 }, { "epoch": 3.2078173374613, "grad_norm": 0.05544886738061905, "learning_rate": 7.928034395398639e-05, "loss": 0.0066, "step": 16572 }, { "epoch": 3.2080108359133126, "grad_norm": 0.042962830513715744, "learning_rate": 7.927803377795188e-05, "loss": 0.0066, "step": 16573 }, { "epoch": 3.208204334365325, "grad_norm": 0.04997539892792702, "learning_rate": 7.927572351165762e-05, "loss": 0.0084, "step": 16574 }, { "epoch": 3.2083978328173375, "grad_norm": 0.08644019067287445, "learning_rate": 7.927341315511221e-05, "loss": 0.0067, "step": 16575 }, { "epoch": 3.20859133126935, "grad_norm": 0.043613508343696594, "learning_rate": 7.927110270832423e-05, "loss": 0.0062, "step": 16576 }, { "epoch": 3.2087848297213624, "grad_norm": 0.08167923241853714, "learning_rate": 7.926879217130227e-05, "loss": 0.0063, "step": 16577 }, { "epoch": 3.208978328173375, "grad_norm": 0.10307860374450684, "learning_rate": 7.926648154405491e-05, "loss": 0.0075, "step": 16578 }, { "epoch": 3.209171826625387, "grad_norm": 0.05269038677215576, "learning_rate": 7.926417082659077e-05, "loss": 0.0074, "step": 16579 }, { "epoch": 3.2093653250773992, "grad_norm": 0.13268063962459564, "learning_rate": 7.92618600189184e-05, "loss": 0.0066, "step": 16580 }, { "epoch": 3.2095588235294117, "grad_norm": 0.05271982401609421, "learning_rate": 7.925954912104641e-05, "loss": 0.0075, "step": 16581 }, { "epoch": 3.209752321981424, "grad_norm": 0.09945058077573776, "learning_rate": 7.925723813298342e-05, "loss": 0.0068, "step": 16582 }, { "epoch": 3.2099458204334366, "grad_norm": 0.09686671197414398, "learning_rate": 7.925492705473799e-05, "loss": 0.0056, "step": 16583 }, { "epoch": 3.210139318885449, "grad_norm": 0.05512820556759834, "learning_rate": 7.925261588631869e-05, "loss": 0.0056, "step": 16584 }, { "epoch": 3.2103328173374615, "grad_norm": 0.06007462739944458, "learning_rate": 7.925030462773417e-05, "loss": 0.0065, "step": 16585 }, { "epoch": 3.2105263157894735, "grad_norm": 0.13735754787921906, "learning_rate": 7.924799327899299e-05, "loss": 0.0066, "step": 16586 }, { "epoch": 3.210719814241486, "grad_norm": 0.043286651372909546, "learning_rate": 7.924568184010374e-05, "loss": 0.007, "step": 16587 }, { "epoch": 3.2109133126934983, "grad_norm": 0.1580955684185028, "learning_rate": 7.924337031107502e-05, "loss": 0.0074, "step": 16588 }, { "epoch": 3.2111068111455108, "grad_norm": 0.05208853259682655, "learning_rate": 7.924105869191542e-05, "loss": 0.0078, "step": 16589 }, { "epoch": 3.211300309597523, "grad_norm": 0.10539255291223526, "learning_rate": 7.923874698263353e-05, "loss": 0.0078, "step": 16590 }, { "epoch": 3.2114938080495357, "grad_norm": 0.10505202412605286, "learning_rate": 7.923643518323797e-05, "loss": 0.0059, "step": 16591 }, { "epoch": 3.211687306501548, "grad_norm": 0.07257480174303055, "learning_rate": 7.92341232937373e-05, "loss": 0.0062, "step": 16592 }, { "epoch": 3.2118808049535605, "grad_norm": 0.11509542167186737, "learning_rate": 7.923181131414015e-05, "loss": 0.0068, "step": 16593 }, { "epoch": 3.212074303405573, "grad_norm": 0.06081023067235947, "learning_rate": 7.922949924445507e-05, "loss": 0.0074, "step": 16594 }, { "epoch": 3.212267801857585, "grad_norm": 0.12211821228265762, "learning_rate": 7.922718708469071e-05, "loss": 0.0058, "step": 16595 }, { "epoch": 3.2124613003095974, "grad_norm": 0.052895743399858475, "learning_rate": 7.92248748348556e-05, "loss": 0.0063, "step": 16596 }, { "epoch": 3.21265479876161, "grad_norm": 0.10090849548578262, "learning_rate": 7.92225624949584e-05, "loss": 0.0063, "step": 16597 }, { "epoch": 3.2128482972136223, "grad_norm": 0.07007347047328949, "learning_rate": 7.922025006500769e-05, "loss": 0.0074, "step": 16598 }, { "epoch": 3.2130417956656347, "grad_norm": 0.06604962050914764, "learning_rate": 7.921793754501204e-05, "loss": 0.0065, "step": 16599 }, { "epoch": 3.213235294117647, "grad_norm": 0.06853439658880234, "learning_rate": 7.921562493498007e-05, "loss": 0.0068, "step": 16600 }, { "epoch": 3.2134287925696596, "grad_norm": 0.06304331123828888, "learning_rate": 7.921331223492038e-05, "loss": 0.0056, "step": 16601 }, { "epoch": 3.2136222910216716, "grad_norm": 0.08097190409898758, "learning_rate": 7.921099944484157e-05, "loss": 0.0072, "step": 16602 }, { "epoch": 3.213815789473684, "grad_norm": 0.03517795726656914, "learning_rate": 7.92086865647522e-05, "loss": 0.0053, "step": 16603 }, { "epoch": 3.2140092879256965, "grad_norm": 0.07543179392814636, "learning_rate": 7.920637359466092e-05, "loss": 0.0065, "step": 16604 }, { "epoch": 3.214202786377709, "grad_norm": 0.04713328555226326, "learning_rate": 7.92040605345763e-05, "loss": 0.0073, "step": 16605 }, { "epoch": 3.2143962848297214, "grad_norm": 0.08111821860074997, "learning_rate": 7.920174738450696e-05, "loss": 0.008, "step": 16606 }, { "epoch": 3.214589783281734, "grad_norm": 0.07895790785551071, "learning_rate": 7.919943414446147e-05, "loss": 0.0065, "step": 16607 }, { "epoch": 3.2147832817337463, "grad_norm": 0.056623395532369614, "learning_rate": 7.919712081444846e-05, "loss": 0.0077, "step": 16608 }, { "epoch": 3.2149767801857587, "grad_norm": 0.08766282349824905, "learning_rate": 7.919480739447651e-05, "loss": 0.0067, "step": 16609 }, { "epoch": 3.2151702786377707, "grad_norm": 0.03480473533272743, "learning_rate": 7.919249388455422e-05, "loss": 0.0069, "step": 16610 }, { "epoch": 3.215363777089783, "grad_norm": 0.061935391277074814, "learning_rate": 7.91901802846902e-05, "loss": 0.0068, "step": 16611 }, { "epoch": 3.2155572755417956, "grad_norm": 0.052545156329870224, "learning_rate": 7.918786659489307e-05, "loss": 0.0065, "step": 16612 }, { "epoch": 3.215750773993808, "grad_norm": 0.05294257402420044, "learning_rate": 7.918555281517141e-05, "loss": 0.0054, "step": 16613 }, { "epoch": 3.2159442724458205, "grad_norm": 0.07930749654769897, "learning_rate": 7.918323894553383e-05, "loss": 0.0063, "step": 16614 }, { "epoch": 3.216137770897833, "grad_norm": 0.07398241758346558, "learning_rate": 7.918092498598888e-05, "loss": 0.0057, "step": 16615 }, { "epoch": 3.2163312693498454, "grad_norm": 0.07349591702222824, "learning_rate": 7.917861093654526e-05, "loss": 0.0074, "step": 16616 }, { "epoch": 3.2165247678018574, "grad_norm": 0.065877765417099, "learning_rate": 7.91762967972115e-05, "loss": 0.007, "step": 16617 }, { "epoch": 3.21671826625387, "grad_norm": 0.06985881179571152, "learning_rate": 7.917398256799621e-05, "loss": 0.0079, "step": 16618 }, { "epoch": 3.2169117647058822, "grad_norm": 0.055893875658512115, "learning_rate": 7.917166824890804e-05, "loss": 0.0063, "step": 16619 }, { "epoch": 3.2171052631578947, "grad_norm": 0.06406516581773758, "learning_rate": 7.916935383995555e-05, "loss": 0.0065, "step": 16620 }, { "epoch": 3.217298761609907, "grad_norm": 0.04816810041666031, "learning_rate": 7.916703934114735e-05, "loss": 0.0067, "step": 16621 }, { "epoch": 3.2174922600619196, "grad_norm": 0.0593523308634758, "learning_rate": 7.916472475249206e-05, "loss": 0.0062, "step": 16622 }, { "epoch": 3.217685758513932, "grad_norm": 0.045337799936532974, "learning_rate": 7.916241007399829e-05, "loss": 0.0069, "step": 16623 }, { "epoch": 3.2178792569659445, "grad_norm": 0.031226638704538345, "learning_rate": 7.916009530567461e-05, "loss": 0.0067, "step": 16624 }, { "epoch": 3.2180727554179565, "grad_norm": 0.04934415966272354, "learning_rate": 7.915778044752967e-05, "loss": 0.0042, "step": 16625 }, { "epoch": 3.218266253869969, "grad_norm": 0.04490214213728905, "learning_rate": 7.915546549957205e-05, "loss": 0.0068, "step": 16626 }, { "epoch": 3.2184597523219813, "grad_norm": 0.030599458143115044, "learning_rate": 7.915315046181035e-05, "loss": 0.0059, "step": 16627 }, { "epoch": 3.218653250773994, "grad_norm": 0.06498605757951736, "learning_rate": 7.91508353342532e-05, "loss": 0.0059, "step": 16628 }, { "epoch": 3.218846749226006, "grad_norm": 0.05997265502810478, "learning_rate": 7.914852011690919e-05, "loss": 0.0072, "step": 16629 }, { "epoch": 3.2190402476780187, "grad_norm": 0.05265398323535919, "learning_rate": 7.914620480978693e-05, "loss": 0.006, "step": 16630 }, { "epoch": 3.219233746130031, "grad_norm": 0.06816177815198898, "learning_rate": 7.914388941289503e-05, "loss": 0.008, "step": 16631 }, { "epoch": 3.2194272445820435, "grad_norm": 0.03063659556210041, "learning_rate": 7.914157392624212e-05, "loss": 0.0067, "step": 16632 }, { "epoch": 3.2196207430340555, "grad_norm": 0.08455955237150192, "learning_rate": 7.913925834983675e-05, "loss": 0.0076, "step": 16633 }, { "epoch": 3.219814241486068, "grad_norm": 0.044185180217027664, "learning_rate": 7.913694268368759e-05, "loss": 0.0051, "step": 16634 }, { "epoch": 3.2200077399380804, "grad_norm": 0.10596542060375214, "learning_rate": 7.913462692780321e-05, "loss": 0.0071, "step": 16635 }, { "epoch": 3.220201238390093, "grad_norm": 0.06737975031137466, "learning_rate": 7.913231108219225e-05, "loss": 0.0069, "step": 16636 }, { "epoch": 3.2203947368421053, "grad_norm": 0.08973612636327744, "learning_rate": 7.91299951468633e-05, "loss": 0.006, "step": 16637 }, { "epoch": 3.2205882352941178, "grad_norm": 0.07322134077548981, "learning_rate": 7.912767912182497e-05, "loss": 0.0066, "step": 16638 }, { "epoch": 3.22078173374613, "grad_norm": 0.06572934985160828, "learning_rate": 7.912536300708587e-05, "loss": 0.005, "step": 16639 }, { "epoch": 3.2209752321981426, "grad_norm": 0.08161117136478424, "learning_rate": 7.912304680265461e-05, "loss": 0.0076, "step": 16640 }, { "epoch": 3.2211687306501546, "grad_norm": 0.0877954438328743, "learning_rate": 7.912073050853982e-05, "loss": 0.0076, "step": 16641 }, { "epoch": 3.221362229102167, "grad_norm": 0.07296407222747803, "learning_rate": 7.91184141247501e-05, "loss": 0.0064, "step": 16642 }, { "epoch": 3.2215557275541795, "grad_norm": 0.07272271066904068, "learning_rate": 7.911609765129405e-05, "loss": 0.0057, "step": 16643 }, { "epoch": 3.221749226006192, "grad_norm": 0.08564156293869019, "learning_rate": 7.91137810881803e-05, "loss": 0.0057, "step": 16644 }, { "epoch": 3.2219427244582044, "grad_norm": 0.05086349695920944, "learning_rate": 7.911146443541745e-05, "loss": 0.0068, "step": 16645 }, { "epoch": 3.222136222910217, "grad_norm": 0.08050742745399475, "learning_rate": 7.910914769301411e-05, "loss": 0.0062, "step": 16646 }, { "epoch": 3.2223297213622293, "grad_norm": 0.07903478294610977, "learning_rate": 7.910683086097889e-05, "loss": 0.0059, "step": 16647 }, { "epoch": 3.2225232198142413, "grad_norm": 0.05469592660665512, "learning_rate": 7.910451393932043e-05, "loss": 0.0059, "step": 16648 }, { "epoch": 3.2227167182662537, "grad_norm": 0.08226386457681656, "learning_rate": 7.910219692804732e-05, "loss": 0.0077, "step": 16649 }, { "epoch": 3.222910216718266, "grad_norm": 0.048715490847826004, "learning_rate": 7.909987982716819e-05, "loss": 0.0056, "step": 16650 }, { "epoch": 3.2231037151702786, "grad_norm": 0.07191012054681778, "learning_rate": 7.909756263669164e-05, "loss": 0.006, "step": 16651 }, { "epoch": 3.223297213622291, "grad_norm": 0.038413435220718384, "learning_rate": 7.90952453566263e-05, "loss": 0.0057, "step": 16652 }, { "epoch": 3.2234907120743035, "grad_norm": 0.04909779131412506, "learning_rate": 7.909292798698074e-05, "loss": 0.0062, "step": 16653 }, { "epoch": 3.223684210526316, "grad_norm": 0.061018362641334534, "learning_rate": 7.909061052776364e-05, "loss": 0.0063, "step": 16654 }, { "epoch": 3.2238777089783284, "grad_norm": 0.026913659647107124, "learning_rate": 7.908829297898359e-05, "loss": 0.0072, "step": 16655 }, { "epoch": 3.2240712074303404, "grad_norm": 0.07015946507453918, "learning_rate": 7.908597534064918e-05, "loss": 0.0072, "step": 16656 }, { "epoch": 3.224264705882353, "grad_norm": 0.0474214032292366, "learning_rate": 7.908365761276906e-05, "loss": 0.0058, "step": 16657 }, { "epoch": 3.2244582043343653, "grad_norm": 0.07427051663398743, "learning_rate": 7.908133979535185e-05, "loss": 0.0066, "step": 16658 }, { "epoch": 3.2246517027863777, "grad_norm": 0.06781993806362152, "learning_rate": 7.907902188840613e-05, "loss": 0.0074, "step": 16659 }, { "epoch": 3.22484520123839, "grad_norm": 0.06503363698720932, "learning_rate": 7.907670389194056e-05, "loss": 0.0069, "step": 16660 }, { "epoch": 3.2250386996904026, "grad_norm": 0.06684695184230804, "learning_rate": 7.907438580596372e-05, "loss": 0.0064, "step": 16661 }, { "epoch": 3.225232198142415, "grad_norm": 0.044160228222608566, "learning_rate": 7.907206763048425e-05, "loss": 0.0073, "step": 16662 }, { "epoch": 3.225425696594427, "grad_norm": 0.05972922965884209, "learning_rate": 7.906974936551078e-05, "loss": 0.0072, "step": 16663 }, { "epoch": 3.2256191950464395, "grad_norm": 0.04395917430520058, "learning_rate": 7.906743101105189e-05, "loss": 0.0071, "step": 16664 }, { "epoch": 3.225812693498452, "grad_norm": 0.06269470602273941, "learning_rate": 7.906511256711622e-05, "loss": 0.0069, "step": 16665 }, { "epoch": 3.2260061919504643, "grad_norm": 0.061299994587898254, "learning_rate": 7.906279403371241e-05, "loss": 0.0061, "step": 16666 }, { "epoch": 3.226199690402477, "grad_norm": 0.06859177350997925, "learning_rate": 7.906047541084904e-05, "loss": 0.0082, "step": 16667 }, { "epoch": 3.2263931888544892, "grad_norm": 0.03941497206687927, "learning_rate": 7.905815669853476e-05, "loss": 0.0046, "step": 16668 }, { "epoch": 3.2265866873065017, "grad_norm": 0.0831766128540039, "learning_rate": 7.90558378967782e-05, "loss": 0.0061, "step": 16669 }, { "epoch": 3.226780185758514, "grad_norm": 0.05325917527079582, "learning_rate": 7.905351900558795e-05, "loss": 0.0073, "step": 16670 }, { "epoch": 3.2269736842105265, "grad_norm": 0.07740701735019684, "learning_rate": 7.905120002497263e-05, "loss": 0.006, "step": 16671 }, { "epoch": 3.2271671826625385, "grad_norm": 0.06266329437494278, "learning_rate": 7.904888095494088e-05, "loss": 0.0047, "step": 16672 }, { "epoch": 3.227360681114551, "grad_norm": 0.05115459859371185, "learning_rate": 7.904656179550133e-05, "loss": 0.0053, "step": 16673 }, { "epoch": 3.2275541795665634, "grad_norm": 0.041907135397195816, "learning_rate": 7.904424254666259e-05, "loss": 0.0071, "step": 16674 }, { "epoch": 3.227747678018576, "grad_norm": 0.05204097926616669, "learning_rate": 7.904192320843326e-05, "loss": 0.0065, "step": 16675 }, { "epoch": 3.2279411764705883, "grad_norm": 0.04562852904200554, "learning_rate": 7.9039603780822e-05, "loss": 0.0065, "step": 16676 }, { "epoch": 3.2281346749226008, "grad_norm": 0.05092794820666313, "learning_rate": 7.903728426383741e-05, "loss": 0.0059, "step": 16677 }, { "epoch": 3.228328173374613, "grad_norm": 0.07091785222291946, "learning_rate": 7.903496465748812e-05, "loss": 0.006, "step": 16678 }, { "epoch": 3.228521671826625, "grad_norm": 0.06831194460391998, "learning_rate": 7.903264496178275e-05, "loss": 0.0064, "step": 16679 }, { "epoch": 3.2287151702786376, "grad_norm": 0.05703822523355484, "learning_rate": 7.903032517672994e-05, "loss": 0.0063, "step": 16680 }, { "epoch": 3.22890866873065, "grad_norm": 0.08547903597354889, "learning_rate": 7.90280053023383e-05, "loss": 0.0066, "step": 16681 }, { "epoch": 3.2291021671826625, "grad_norm": 0.03718702122569084, "learning_rate": 7.902568533861643e-05, "loss": 0.0053, "step": 16682 }, { "epoch": 3.229295665634675, "grad_norm": 0.05607213079929352, "learning_rate": 7.902336528557301e-05, "loss": 0.0066, "step": 16683 }, { "epoch": 3.2294891640866874, "grad_norm": 0.04269065335392952, "learning_rate": 7.902104514321664e-05, "loss": 0.007, "step": 16684 }, { "epoch": 3.2296826625387, "grad_norm": 0.05592694878578186, "learning_rate": 7.901872491155593e-05, "loss": 0.0083, "step": 16685 }, { "epoch": 3.2298761609907123, "grad_norm": 0.060813602060079575, "learning_rate": 7.901640459059953e-05, "loss": 0.0072, "step": 16686 }, { "epoch": 3.2300696594427243, "grad_norm": 0.024269411340355873, "learning_rate": 7.901408418035604e-05, "loss": 0.0056, "step": 16687 }, { "epoch": 3.2302631578947367, "grad_norm": 0.0768585279583931, "learning_rate": 7.901176368083411e-05, "loss": 0.0058, "step": 16688 }, { "epoch": 3.230456656346749, "grad_norm": 0.06656701862812042, "learning_rate": 7.900944309204237e-05, "loss": 0.0056, "step": 16689 }, { "epoch": 3.2306501547987616, "grad_norm": 0.05478649213910103, "learning_rate": 7.900712241398944e-05, "loss": 0.0063, "step": 16690 }, { "epoch": 3.230843653250774, "grad_norm": 0.08177744597196579, "learning_rate": 7.900480164668392e-05, "loss": 0.0067, "step": 16691 }, { "epoch": 3.2310371517027865, "grad_norm": 0.05532953143119812, "learning_rate": 7.900248079013448e-05, "loss": 0.0071, "step": 16692 }, { "epoch": 3.231230650154799, "grad_norm": 0.07505904883146286, "learning_rate": 7.900015984434972e-05, "loss": 0.0063, "step": 16693 }, { "epoch": 3.231424148606811, "grad_norm": 0.08917587995529175, "learning_rate": 7.899783880933829e-05, "loss": 0.0064, "step": 16694 }, { "epoch": 3.2316176470588234, "grad_norm": 0.06193772330880165, "learning_rate": 7.89955176851088e-05, "loss": 0.0074, "step": 16695 }, { "epoch": 3.231811145510836, "grad_norm": 0.08864185959100723, "learning_rate": 7.899319647166989e-05, "loss": 0.0077, "step": 16696 }, { "epoch": 3.2320046439628483, "grad_norm": 0.05842817947268486, "learning_rate": 7.899087516903018e-05, "loss": 0.0061, "step": 16697 }, { "epoch": 3.2321981424148607, "grad_norm": 0.06994105875492096, "learning_rate": 7.898855377719832e-05, "loss": 0.0051, "step": 16698 }, { "epoch": 3.232391640866873, "grad_norm": 0.0631285011768341, "learning_rate": 7.898623229618292e-05, "loss": 0.0053, "step": 16699 }, { "epoch": 3.2325851393188856, "grad_norm": 0.0637730285525322, "learning_rate": 7.898391072599263e-05, "loss": 0.006, "step": 16700 }, { "epoch": 3.232778637770898, "grad_norm": 0.05567961931228638, "learning_rate": 7.898158906663605e-05, "loss": 0.0061, "step": 16701 }, { "epoch": 3.23297213622291, "grad_norm": 0.050806377083063126, "learning_rate": 7.897926731812185e-05, "loss": 0.0073, "step": 16702 }, { "epoch": 3.2331656346749225, "grad_norm": 0.05141349881887436, "learning_rate": 7.897694548045863e-05, "loss": 0.006, "step": 16703 }, { "epoch": 3.233359133126935, "grad_norm": 0.032678261399269104, "learning_rate": 7.897462355365503e-05, "loss": 0.0057, "step": 16704 }, { "epoch": 3.2335526315789473, "grad_norm": 0.06385892629623413, "learning_rate": 7.897230153771971e-05, "loss": 0.0072, "step": 16705 }, { "epoch": 3.23374613003096, "grad_norm": 0.022927021607756615, "learning_rate": 7.896997943266127e-05, "loss": 0.0071, "step": 16706 }, { "epoch": 3.2339396284829722, "grad_norm": 0.049219343811273575, "learning_rate": 7.896765723848836e-05, "loss": 0.0067, "step": 16707 }, { "epoch": 3.2341331269349847, "grad_norm": 0.04890193045139313, "learning_rate": 7.89653349552096e-05, "loss": 0.007, "step": 16708 }, { "epoch": 3.234326625386997, "grad_norm": 0.06765386462211609, "learning_rate": 7.896301258283362e-05, "loss": 0.0051, "step": 16709 }, { "epoch": 3.234520123839009, "grad_norm": 0.048549700528383255, "learning_rate": 7.896069012136906e-05, "loss": 0.0062, "step": 16710 }, { "epoch": 3.2347136222910216, "grad_norm": 0.075699582695961, "learning_rate": 7.895836757082458e-05, "loss": 0.0058, "step": 16711 }, { "epoch": 3.234907120743034, "grad_norm": 0.06357348710298538, "learning_rate": 7.895604493120879e-05, "loss": 0.0076, "step": 16712 }, { "epoch": 3.2351006191950464, "grad_norm": 0.04715120792388916, "learning_rate": 7.895372220253031e-05, "loss": 0.0072, "step": 16713 }, { "epoch": 3.235294117647059, "grad_norm": 0.09099943935871124, "learning_rate": 7.89513993847978e-05, "loss": 0.0068, "step": 16714 }, { "epoch": 3.2354876160990713, "grad_norm": 0.052943240851163864, "learning_rate": 7.894907647801989e-05, "loss": 0.0062, "step": 16715 }, { "epoch": 3.2356811145510838, "grad_norm": 0.06385353207588196, "learning_rate": 7.894675348220522e-05, "loss": 0.0069, "step": 16716 }, { "epoch": 3.235874613003096, "grad_norm": 0.06116108223795891, "learning_rate": 7.894443039736241e-05, "loss": 0.0071, "step": 16717 }, { "epoch": 3.236068111455108, "grad_norm": 0.04244612902402878, "learning_rate": 7.894210722350012e-05, "loss": 0.0066, "step": 16718 }, { "epoch": 3.2362616099071206, "grad_norm": 0.0470917709171772, "learning_rate": 7.893978396062696e-05, "loss": 0.0074, "step": 16719 }, { "epoch": 3.236455108359133, "grad_norm": 0.04489835724234581, "learning_rate": 7.89374606087516e-05, "loss": 0.0059, "step": 16720 }, { "epoch": 3.2366486068111455, "grad_norm": 0.06722813844680786, "learning_rate": 7.893513716788265e-05, "loss": 0.0056, "step": 16721 }, { "epoch": 3.236842105263158, "grad_norm": 0.05463423579931259, "learning_rate": 7.893281363802877e-05, "loss": 0.0065, "step": 16722 }, { "epoch": 3.2370356037151704, "grad_norm": 0.10474631190299988, "learning_rate": 7.893049001919855e-05, "loss": 0.0076, "step": 16723 }, { "epoch": 3.237229102167183, "grad_norm": 0.04412001371383667, "learning_rate": 7.89281663114007e-05, "loss": 0.0066, "step": 16724 }, { "epoch": 3.237422600619195, "grad_norm": 0.09444137662649155, "learning_rate": 7.892584251464382e-05, "loss": 0.0072, "step": 16725 }, { "epoch": 3.2376160990712073, "grad_norm": 0.04981638118624687, "learning_rate": 7.892351862893655e-05, "loss": 0.0063, "step": 16726 }, { "epoch": 3.2378095975232197, "grad_norm": 0.0823693498969078, "learning_rate": 7.892119465428753e-05, "loss": 0.0072, "step": 16727 }, { "epoch": 3.238003095975232, "grad_norm": 0.061910420656204224, "learning_rate": 7.891887059070541e-05, "loss": 0.0071, "step": 16728 }, { "epoch": 3.2381965944272446, "grad_norm": 0.07200860232114792, "learning_rate": 7.89165464381988e-05, "loss": 0.0063, "step": 16729 }, { "epoch": 3.238390092879257, "grad_norm": 0.05530092865228653, "learning_rate": 7.891422219677638e-05, "loss": 0.0066, "step": 16730 }, { "epoch": 3.2385835913312695, "grad_norm": 0.060301728546619415, "learning_rate": 7.891189786644678e-05, "loss": 0.007, "step": 16731 }, { "epoch": 3.238777089783282, "grad_norm": 0.05730138346552849, "learning_rate": 7.890957344721864e-05, "loss": 0.0073, "step": 16732 }, { "epoch": 3.238970588235294, "grad_norm": 0.03970944136381149, "learning_rate": 7.890724893910058e-05, "loss": 0.0064, "step": 16733 }, { "epoch": 3.2391640866873064, "grad_norm": 0.08059202134609222, "learning_rate": 7.890492434210128e-05, "loss": 0.0066, "step": 16734 }, { "epoch": 3.239357585139319, "grad_norm": 0.07284928113222122, "learning_rate": 7.890259965622932e-05, "loss": 0.0061, "step": 16735 }, { "epoch": 3.2395510835913313, "grad_norm": 0.0620848685503006, "learning_rate": 7.89002748814934e-05, "loss": 0.0068, "step": 16736 }, { "epoch": 3.2397445820433437, "grad_norm": 0.08343923836946487, "learning_rate": 7.889795001790218e-05, "loss": 0.0059, "step": 16737 }, { "epoch": 3.239938080495356, "grad_norm": 0.04674932733178139, "learning_rate": 7.889562506546425e-05, "loss": 0.0072, "step": 16738 }, { "epoch": 3.2401315789473686, "grad_norm": 0.08047371357679367, "learning_rate": 7.889330002418826e-05, "loss": 0.0063, "step": 16739 }, { "epoch": 3.2403250773993806, "grad_norm": 0.056458037346601486, "learning_rate": 7.88909748940829e-05, "loss": 0.0073, "step": 16740 }, { "epoch": 3.240518575851393, "grad_norm": 0.08509205281734467, "learning_rate": 7.888864967515675e-05, "loss": 0.0064, "step": 16741 }, { "epoch": 3.2407120743034055, "grad_norm": 0.05974755436182022, "learning_rate": 7.88863243674185e-05, "loss": 0.0066, "step": 16742 }, { "epoch": 3.240905572755418, "grad_norm": 0.048160944133996964, "learning_rate": 7.888399897087679e-05, "loss": 0.007, "step": 16743 }, { "epoch": 3.2410990712074303, "grad_norm": 0.07017846405506134, "learning_rate": 7.888167348554024e-05, "loss": 0.0054, "step": 16744 }, { "epoch": 3.241292569659443, "grad_norm": 0.046079907566308975, "learning_rate": 7.887934791141752e-05, "loss": 0.006, "step": 16745 }, { "epoch": 3.2414860681114552, "grad_norm": 0.05583877116441727, "learning_rate": 7.887702224851729e-05, "loss": 0.0069, "step": 16746 }, { "epoch": 3.2416795665634677, "grad_norm": 0.04528075084090233, "learning_rate": 7.887469649684814e-05, "loss": 0.0065, "step": 16747 }, { "epoch": 3.2418730650154797, "grad_norm": 0.05263575166463852, "learning_rate": 7.887237065641878e-05, "loss": 0.0063, "step": 16748 }, { "epoch": 3.242066563467492, "grad_norm": 0.022693926468491554, "learning_rate": 7.88700447272378e-05, "loss": 0.0053, "step": 16749 }, { "epoch": 3.2422600619195046, "grad_norm": 0.036642737686634064, "learning_rate": 7.886771870931388e-05, "loss": 0.0049, "step": 16750 }, { "epoch": 3.242453560371517, "grad_norm": 0.04109281674027443, "learning_rate": 7.886539260265568e-05, "loss": 0.0064, "step": 16751 }, { "epoch": 3.2426470588235294, "grad_norm": 0.03780296444892883, "learning_rate": 7.886306640727182e-05, "loss": 0.007, "step": 16752 }, { "epoch": 3.242840557275542, "grad_norm": 0.051865674555301666, "learning_rate": 7.886074012317098e-05, "loss": 0.0061, "step": 16753 }, { "epoch": 3.2430340557275543, "grad_norm": 0.04227983206510544, "learning_rate": 7.885841375036175e-05, "loss": 0.0067, "step": 16754 }, { "epoch": 3.2432275541795668, "grad_norm": 0.054971419274806976, "learning_rate": 7.885608728885285e-05, "loss": 0.0069, "step": 16755 }, { "epoch": 3.2434210526315788, "grad_norm": 0.03333481028676033, "learning_rate": 7.885376073865288e-05, "loss": 0.0056, "step": 16756 }, { "epoch": 3.243614551083591, "grad_norm": 0.06848898530006409, "learning_rate": 7.885143409977051e-05, "loss": 0.0047, "step": 16757 }, { "epoch": 3.2438080495356036, "grad_norm": 0.06912147253751755, "learning_rate": 7.88491073722144e-05, "loss": 0.0074, "step": 16758 }, { "epoch": 3.244001547987616, "grad_norm": 0.05664605647325516, "learning_rate": 7.884678055599316e-05, "loss": 0.0061, "step": 16759 }, { "epoch": 3.2441950464396285, "grad_norm": 0.03576534241437912, "learning_rate": 7.884445365111547e-05, "loss": 0.0067, "step": 16760 }, { "epoch": 3.244388544891641, "grad_norm": 0.03541089966893196, "learning_rate": 7.884212665758998e-05, "loss": 0.0077, "step": 16761 }, { "epoch": 3.2445820433436534, "grad_norm": 0.036759406328201294, "learning_rate": 7.883979957542533e-05, "loss": 0.0071, "step": 16762 }, { "epoch": 3.244775541795666, "grad_norm": 0.06465890258550644, "learning_rate": 7.88374724046302e-05, "loss": 0.006, "step": 16763 }, { "epoch": 3.244969040247678, "grad_norm": 0.04027910530567169, "learning_rate": 7.88351451452132e-05, "loss": 0.0075, "step": 16764 }, { "epoch": 3.2451625386996903, "grad_norm": 0.07987966388463974, "learning_rate": 7.883281779718301e-05, "loss": 0.0074, "step": 16765 }, { "epoch": 3.2453560371517027, "grad_norm": 0.0410672090947628, "learning_rate": 7.88304903605483e-05, "loss": 0.0051, "step": 16766 }, { "epoch": 3.245549535603715, "grad_norm": 0.07353808730840683, "learning_rate": 7.882816283531767e-05, "loss": 0.0073, "step": 16767 }, { "epoch": 3.2457430340557276, "grad_norm": 0.04900601506233215, "learning_rate": 7.882583522149978e-05, "loss": 0.0063, "step": 16768 }, { "epoch": 3.24593653250774, "grad_norm": 0.06892555952072144, "learning_rate": 7.882350751910336e-05, "loss": 0.007, "step": 16769 }, { "epoch": 3.2461300309597525, "grad_norm": 0.05109313130378723, "learning_rate": 7.882117972813698e-05, "loss": 0.0053, "step": 16770 }, { "epoch": 3.2463235294117645, "grad_norm": 0.07255623489618301, "learning_rate": 7.881885184860933e-05, "loss": 0.0058, "step": 16771 }, { "epoch": 3.246517027863777, "grad_norm": 0.05389534309506416, "learning_rate": 7.881652388052904e-05, "loss": 0.0061, "step": 16772 }, { "epoch": 3.2467105263157894, "grad_norm": 0.07534516602754593, "learning_rate": 7.881419582390482e-05, "loss": 0.0069, "step": 16773 }, { "epoch": 3.246904024767802, "grad_norm": 0.028878290206193924, "learning_rate": 7.881186767874526e-05, "loss": 0.0067, "step": 16774 }, { "epoch": 3.2470975232198143, "grad_norm": 0.08609282970428467, "learning_rate": 7.880953944505904e-05, "loss": 0.0068, "step": 16775 }, { "epoch": 3.2472910216718267, "grad_norm": 0.027838120236992836, "learning_rate": 7.880721112285483e-05, "loss": 0.0052, "step": 16776 }, { "epoch": 3.247484520123839, "grad_norm": 0.1136120930314064, "learning_rate": 7.880488271214127e-05, "loss": 0.0075, "step": 16777 }, { "epoch": 3.2476780185758516, "grad_norm": 0.04205736517906189, "learning_rate": 7.880255421292705e-05, "loss": 0.0086, "step": 16778 }, { "epoch": 3.2478715170278636, "grad_norm": 0.08710788190364838, "learning_rate": 7.880022562522075e-05, "loss": 0.0066, "step": 16779 }, { "epoch": 3.248065015479876, "grad_norm": 0.036634501069784164, "learning_rate": 7.879789694903112e-05, "loss": 0.0071, "step": 16780 }, { "epoch": 3.2482585139318885, "grad_norm": 0.10149283707141876, "learning_rate": 7.879556818436675e-05, "loss": 0.0072, "step": 16781 }, { "epoch": 3.248452012383901, "grad_norm": 0.054380614310503006, "learning_rate": 7.879323933123634e-05, "loss": 0.0063, "step": 16782 }, { "epoch": 3.2486455108359134, "grad_norm": 0.0896521508693695, "learning_rate": 7.87909103896485e-05, "loss": 0.0064, "step": 16783 }, { "epoch": 3.248839009287926, "grad_norm": 0.08567647635936737, "learning_rate": 7.878858135961192e-05, "loss": 0.0066, "step": 16784 }, { "epoch": 3.2490325077399382, "grad_norm": 0.08512010425329208, "learning_rate": 7.878625224113527e-05, "loss": 0.0064, "step": 16785 }, { "epoch": 3.2492260061919502, "grad_norm": 0.08397891372442245, "learning_rate": 7.87839230342272e-05, "loss": 0.0073, "step": 16786 }, { "epoch": 3.2494195046439627, "grad_norm": 0.07604929059743881, "learning_rate": 7.878159373889635e-05, "loss": 0.0072, "step": 16787 }, { "epoch": 3.249613003095975, "grad_norm": 0.058373358100652695, "learning_rate": 7.877926435515139e-05, "loss": 0.0067, "step": 16788 }, { "epoch": 3.2498065015479876, "grad_norm": 0.07737742364406586, "learning_rate": 7.8776934883001e-05, "loss": 0.0055, "step": 16789 }, { "epoch": 3.25, "grad_norm": 0.05174228549003601, "learning_rate": 7.877460532245384e-05, "loss": 0.0061, "step": 16790 }, { "epoch": 3.2501934984520124, "grad_norm": 0.06036771461367607, "learning_rate": 7.877227567351853e-05, "loss": 0.0063, "step": 16791 }, { "epoch": 3.250386996904025, "grad_norm": 0.0736047774553299, "learning_rate": 7.876994593620376e-05, "loss": 0.0061, "step": 16792 }, { "epoch": 3.2505804953560373, "grad_norm": 0.04097507521510124, "learning_rate": 7.876761611051818e-05, "loss": 0.0057, "step": 16793 }, { "epoch": 3.2507739938080498, "grad_norm": 0.06686895340681076, "learning_rate": 7.876528619647048e-05, "loss": 0.006, "step": 16794 }, { "epoch": 3.2509674922600618, "grad_norm": 0.036360785365104675, "learning_rate": 7.876295619406929e-05, "loss": 0.0073, "step": 16795 }, { "epoch": 3.251160990712074, "grad_norm": 0.052345260977745056, "learning_rate": 7.876062610332329e-05, "loss": 0.0059, "step": 16796 }, { "epoch": 3.2513544891640866, "grad_norm": 0.053320929408073425, "learning_rate": 7.875829592424113e-05, "loss": 0.0058, "step": 16797 }, { "epoch": 3.251547987616099, "grad_norm": 0.03797520324587822, "learning_rate": 7.875596565683149e-05, "loss": 0.0061, "step": 16798 }, { "epoch": 3.2517414860681115, "grad_norm": 0.05307088792324066, "learning_rate": 7.875363530110301e-05, "loss": 0.0066, "step": 16799 }, { "epoch": 3.251934984520124, "grad_norm": 0.02650413103401661, "learning_rate": 7.875130485706438e-05, "loss": 0.005, "step": 16800 }, { "epoch": 3.2521284829721364, "grad_norm": 0.03865385428071022, "learning_rate": 7.874897432472424e-05, "loss": 0.0062, "step": 16801 }, { "epoch": 3.2523219814241484, "grad_norm": 0.04206838831305504, "learning_rate": 7.874664370409129e-05, "loss": 0.007, "step": 16802 }, { "epoch": 3.252515479876161, "grad_norm": 0.0275440514087677, "learning_rate": 7.874431299517413e-05, "loss": 0.0062, "step": 16803 }, { "epoch": 3.2527089783281733, "grad_norm": 0.052102744579315186, "learning_rate": 7.874198219798149e-05, "loss": 0.0057, "step": 16804 }, { "epoch": 3.2529024767801857, "grad_norm": 0.03684580698609352, "learning_rate": 7.8739651312522e-05, "loss": 0.0071, "step": 16805 }, { "epoch": 3.253095975232198, "grad_norm": 0.031171049922704697, "learning_rate": 7.873732033880434e-05, "loss": 0.0069, "step": 16806 }, { "epoch": 3.2532894736842106, "grad_norm": 0.04534653201699257, "learning_rate": 7.873498927683717e-05, "loss": 0.0066, "step": 16807 }, { "epoch": 3.253482972136223, "grad_norm": 0.0351070761680603, "learning_rate": 7.873265812662918e-05, "loss": 0.0059, "step": 16808 }, { "epoch": 3.2536764705882355, "grad_norm": 0.03736831620335579, "learning_rate": 7.873032688818899e-05, "loss": 0.0055, "step": 16809 }, { "epoch": 3.2538699690402475, "grad_norm": 0.036221154034137726, "learning_rate": 7.87279955615253e-05, "loss": 0.0064, "step": 16810 }, { "epoch": 3.25406346749226, "grad_norm": 0.039714515209198, "learning_rate": 7.872566414664675e-05, "loss": 0.0072, "step": 16811 }, { "epoch": 3.2542569659442724, "grad_norm": 0.0547848604619503, "learning_rate": 7.872333264356205e-05, "loss": 0.0071, "step": 16812 }, { "epoch": 3.254450464396285, "grad_norm": 0.03722085803747177, "learning_rate": 7.872100105227985e-05, "loss": 0.0059, "step": 16813 }, { "epoch": 3.2546439628482973, "grad_norm": 0.04902864992618561, "learning_rate": 7.87186693728088e-05, "loss": 0.0058, "step": 16814 }, { "epoch": 3.2548374613003097, "grad_norm": 0.040300529450178146, "learning_rate": 7.871633760515759e-05, "loss": 0.0061, "step": 16815 }, { "epoch": 3.255030959752322, "grad_norm": 0.03864428400993347, "learning_rate": 7.871400574933488e-05, "loss": 0.0066, "step": 16816 }, { "epoch": 3.255224458204334, "grad_norm": 0.029514089226722717, "learning_rate": 7.871167380534932e-05, "loss": 0.007, "step": 16817 }, { "epoch": 3.2554179566563466, "grad_norm": 0.0548410527408123, "learning_rate": 7.870934177320961e-05, "loss": 0.0065, "step": 16818 }, { "epoch": 3.255611455108359, "grad_norm": 0.03968847543001175, "learning_rate": 7.870700965292442e-05, "loss": 0.0049, "step": 16819 }, { "epoch": 3.2558049535603715, "grad_norm": 0.0817965492606163, "learning_rate": 7.870467744450242e-05, "loss": 0.0067, "step": 16820 }, { "epoch": 3.255998452012384, "grad_norm": 0.02682553231716156, "learning_rate": 7.870234514795224e-05, "loss": 0.0057, "step": 16821 }, { "epoch": 3.2561919504643964, "grad_norm": 0.06021326035261154, "learning_rate": 7.870001276328261e-05, "loss": 0.0061, "step": 16822 }, { "epoch": 3.256385448916409, "grad_norm": 0.05940249562263489, "learning_rate": 7.869768029050214e-05, "loss": 0.0057, "step": 16823 }, { "epoch": 3.2565789473684212, "grad_norm": 0.056478507816791534, "learning_rate": 7.869534772961956e-05, "loss": 0.0059, "step": 16824 }, { "epoch": 3.2567724458204337, "grad_norm": 0.06320571154356003, "learning_rate": 7.86930150806435e-05, "loss": 0.0055, "step": 16825 }, { "epoch": 3.2569659442724457, "grad_norm": 0.05596119165420532, "learning_rate": 7.869068234358267e-05, "loss": 0.0073, "step": 16826 }, { "epoch": 3.257159442724458, "grad_norm": 0.051180608570575714, "learning_rate": 7.86883495184457e-05, "loss": 0.0083, "step": 16827 }, { "epoch": 3.2573529411764706, "grad_norm": 0.048774514347314835, "learning_rate": 7.868601660524128e-05, "loss": 0.006, "step": 16828 }, { "epoch": 3.257546439628483, "grad_norm": 0.04357382282614708, "learning_rate": 7.86836836039781e-05, "loss": 0.0055, "step": 16829 }, { "epoch": 3.2577399380804954, "grad_norm": 0.04121849313378334, "learning_rate": 7.86813505146648e-05, "loss": 0.0064, "step": 16830 }, { "epoch": 3.257933436532508, "grad_norm": 0.04528861492872238, "learning_rate": 7.86790173373101e-05, "loss": 0.0066, "step": 16831 }, { "epoch": 3.25812693498452, "grad_norm": 0.029795052483677864, "learning_rate": 7.867668407192265e-05, "loss": 0.0063, "step": 16832 }, { "epoch": 3.2583204334365323, "grad_norm": 0.051568303257226944, "learning_rate": 7.86743507185111e-05, "loss": 0.0061, "step": 16833 }, { "epoch": 3.2585139318885448, "grad_norm": 0.05023406445980072, "learning_rate": 7.867201727708416e-05, "loss": 0.0066, "step": 16834 }, { "epoch": 3.258707430340557, "grad_norm": 0.06928703933954239, "learning_rate": 7.866968374765049e-05, "loss": 0.0068, "step": 16835 }, { "epoch": 3.2589009287925697, "grad_norm": 0.061017122119665146, "learning_rate": 7.866735013021877e-05, "loss": 0.0073, "step": 16836 }, { "epoch": 3.259094427244582, "grad_norm": 0.09404563903808594, "learning_rate": 7.866501642479767e-05, "loss": 0.0068, "step": 16837 }, { "epoch": 3.2592879256965945, "grad_norm": 0.05338769406080246, "learning_rate": 7.866268263139587e-05, "loss": 0.0074, "step": 16838 }, { "epoch": 3.259481424148607, "grad_norm": 0.13249900937080383, "learning_rate": 7.866034875002205e-05, "loss": 0.0065, "step": 16839 }, { "epoch": 3.2596749226006194, "grad_norm": 0.026364412158727646, "learning_rate": 7.86580147806849e-05, "loss": 0.0073, "step": 16840 }, { "epoch": 3.2598684210526314, "grad_norm": 0.12280124425888062, "learning_rate": 7.865568072339306e-05, "loss": 0.0057, "step": 16841 }, { "epoch": 3.260061919504644, "grad_norm": 0.06882774084806442, "learning_rate": 7.865334657815523e-05, "loss": 0.0076, "step": 16842 }, { "epoch": 3.2602554179566563, "grad_norm": 0.08740537613630295, "learning_rate": 7.865101234498009e-05, "loss": 0.0064, "step": 16843 }, { "epoch": 3.2604489164086687, "grad_norm": 0.09584958851337433, "learning_rate": 7.86486780238763e-05, "loss": 0.0053, "step": 16844 }, { "epoch": 3.260642414860681, "grad_norm": 0.06442277133464813, "learning_rate": 7.864634361485257e-05, "loss": 0.0058, "step": 16845 }, { "epoch": 3.2608359133126936, "grad_norm": 0.10361994802951813, "learning_rate": 7.864400911791755e-05, "loss": 0.0059, "step": 16846 }, { "epoch": 3.261029411764706, "grad_norm": 0.04274645447731018, "learning_rate": 7.864167453307994e-05, "loss": 0.007, "step": 16847 }, { "epoch": 3.261222910216718, "grad_norm": 0.07604571431875229, "learning_rate": 7.863933986034839e-05, "loss": 0.0085, "step": 16848 }, { "epoch": 3.2614164086687305, "grad_norm": 0.05275801569223404, "learning_rate": 7.863700509973162e-05, "loss": 0.006, "step": 16849 }, { "epoch": 3.261609907120743, "grad_norm": 0.05688566714525223, "learning_rate": 7.863467025123828e-05, "loss": 0.007, "step": 16850 }, { "epoch": 3.2618034055727554, "grad_norm": 0.05459202453494072, "learning_rate": 7.863233531487708e-05, "loss": 0.007, "step": 16851 }, { "epoch": 3.261996904024768, "grad_norm": 0.06202223151922226, "learning_rate": 7.863000029065664e-05, "loss": 0.0057, "step": 16852 }, { "epoch": 3.2621904024767803, "grad_norm": 0.04536326974630356, "learning_rate": 7.862766517858573e-05, "loss": 0.0061, "step": 16853 }, { "epoch": 3.2623839009287927, "grad_norm": 0.059095825999975204, "learning_rate": 7.862532997867294e-05, "loss": 0.0057, "step": 16854 }, { "epoch": 3.262577399380805, "grad_norm": 0.049482036381959915, "learning_rate": 7.862299469092702e-05, "loss": 0.0067, "step": 16855 }, { "epoch": 3.262770897832817, "grad_norm": 0.06694476306438446, "learning_rate": 7.862065931535661e-05, "loss": 0.0067, "step": 16856 }, { "epoch": 3.2629643962848296, "grad_norm": 0.0426141694188118, "learning_rate": 7.861832385197043e-05, "loss": 0.0056, "step": 16857 }, { "epoch": 3.263157894736842, "grad_norm": 0.06330692023038864, "learning_rate": 7.861598830077713e-05, "loss": 0.006, "step": 16858 }, { "epoch": 3.2633513931888545, "grad_norm": 0.05322594568133354, "learning_rate": 7.86136526617854e-05, "loss": 0.0068, "step": 16859 }, { "epoch": 3.263544891640867, "grad_norm": 0.04677996784448624, "learning_rate": 7.861131693500393e-05, "loss": 0.0078, "step": 16860 }, { "epoch": 3.2637383900928794, "grad_norm": 0.057399120181798935, "learning_rate": 7.86089811204414e-05, "loss": 0.0073, "step": 16861 }, { "epoch": 3.263931888544892, "grad_norm": 0.03131994977593422, "learning_rate": 7.86066452181065e-05, "loss": 0.0066, "step": 16862 }, { "epoch": 3.264125386996904, "grad_norm": 0.07195054739713669, "learning_rate": 7.86043092280079e-05, "loss": 0.0061, "step": 16863 }, { "epoch": 3.2643188854489162, "grad_norm": 0.031901001930236816, "learning_rate": 7.86019731501543e-05, "loss": 0.0069, "step": 16864 }, { "epoch": 3.2645123839009287, "grad_norm": 0.050668176263570786, "learning_rate": 7.859963698455439e-05, "loss": 0.007, "step": 16865 }, { "epoch": 3.264705882352941, "grad_norm": 0.02828592248260975, "learning_rate": 7.859730073121685e-05, "loss": 0.0077, "step": 16866 }, { "epoch": 3.2648993808049536, "grad_norm": 0.04213400557637215, "learning_rate": 7.859496439015034e-05, "loss": 0.0068, "step": 16867 }, { "epoch": 3.265092879256966, "grad_norm": 0.022647447884082794, "learning_rate": 7.859262796136358e-05, "loss": 0.0062, "step": 16868 }, { "epoch": 3.2652863777089784, "grad_norm": 0.04144592210650444, "learning_rate": 7.859029144486522e-05, "loss": 0.006, "step": 16869 }, { "epoch": 3.265479876160991, "grad_norm": 0.034097325056791306, "learning_rate": 7.858795484066399e-05, "loss": 0.0071, "step": 16870 }, { "epoch": 3.2656733746130033, "grad_norm": 0.03848813474178314, "learning_rate": 7.858561814876856e-05, "loss": 0.0071, "step": 16871 }, { "epoch": 3.2658668730650153, "grad_norm": 0.04347178712487221, "learning_rate": 7.858328136918761e-05, "loss": 0.0078, "step": 16872 }, { "epoch": 3.2660603715170278, "grad_norm": 0.021150216460227966, "learning_rate": 7.858094450192983e-05, "loss": 0.0068, "step": 16873 }, { "epoch": 3.26625386996904, "grad_norm": 0.05890093371272087, "learning_rate": 7.85786075470039e-05, "loss": 0.0065, "step": 16874 }, { "epoch": 3.2664473684210527, "grad_norm": 0.029018357396125793, "learning_rate": 7.857627050441852e-05, "loss": 0.0076, "step": 16875 }, { "epoch": 3.266640866873065, "grad_norm": 0.060109496116638184, "learning_rate": 7.857393337418238e-05, "loss": 0.0065, "step": 16876 }, { "epoch": 3.2668343653250775, "grad_norm": 0.03463287651538849, "learning_rate": 7.857159615630418e-05, "loss": 0.0073, "step": 16877 }, { "epoch": 3.2670278637770895, "grad_norm": 0.060012638568878174, "learning_rate": 7.856925885079257e-05, "loss": 0.0063, "step": 16878 }, { "epoch": 3.267221362229102, "grad_norm": 0.036304425448179245, "learning_rate": 7.856692145765628e-05, "loss": 0.0075, "step": 16879 }, { "epoch": 3.2674148606811144, "grad_norm": 0.05205259099602699, "learning_rate": 7.856458397690395e-05, "loss": 0.0057, "step": 16880 }, { "epoch": 3.267608359133127, "grad_norm": 0.032099295407533646, "learning_rate": 7.856224640854432e-05, "loss": 0.0063, "step": 16881 }, { "epoch": 3.2678018575851393, "grad_norm": 0.0661984458565712, "learning_rate": 7.855990875258607e-05, "loss": 0.0062, "step": 16882 }, { "epoch": 3.2679953560371517, "grad_norm": 0.04753383249044418, "learning_rate": 7.855757100903789e-05, "loss": 0.0075, "step": 16883 }, { "epoch": 3.268188854489164, "grad_norm": 0.0618448406457901, "learning_rate": 7.855523317790846e-05, "loss": 0.0065, "step": 16884 }, { "epoch": 3.2683823529411766, "grad_norm": 0.04901062697172165, "learning_rate": 7.855289525920648e-05, "loss": 0.0056, "step": 16885 }, { "epoch": 3.268575851393189, "grad_norm": 0.04075302183628082, "learning_rate": 7.855055725294064e-05, "loss": 0.0067, "step": 16886 }, { "epoch": 3.268769349845201, "grad_norm": 0.05395699664950371, "learning_rate": 7.854821915911962e-05, "loss": 0.0064, "step": 16887 }, { "epoch": 3.2689628482972135, "grad_norm": 0.04866372421383858, "learning_rate": 7.854588097775214e-05, "loss": 0.0054, "step": 16888 }, { "epoch": 3.269156346749226, "grad_norm": 0.06227042153477669, "learning_rate": 7.854354270884687e-05, "loss": 0.0078, "step": 16889 }, { "epoch": 3.2693498452012384, "grad_norm": 0.03071434423327446, "learning_rate": 7.85412043524125e-05, "loss": 0.0073, "step": 16890 }, { "epoch": 3.269543343653251, "grad_norm": 0.04869813472032547, "learning_rate": 7.853886590845774e-05, "loss": 0.006, "step": 16891 }, { "epoch": 3.2697368421052633, "grad_norm": 0.04525449499487877, "learning_rate": 7.853652737699127e-05, "loss": 0.0065, "step": 16892 }, { "epoch": 3.2699303405572757, "grad_norm": 0.03640196844935417, "learning_rate": 7.85341887580218e-05, "loss": 0.0077, "step": 16893 }, { "epoch": 3.2701238390092877, "grad_norm": 0.04295589402318001, "learning_rate": 7.8531850051558e-05, "loss": 0.0064, "step": 16894 }, { "epoch": 3.2703173374613, "grad_norm": 0.03486213460564613, "learning_rate": 7.85295112576086e-05, "loss": 0.005, "step": 16895 }, { "epoch": 3.2705108359133126, "grad_norm": 0.03672221302986145, "learning_rate": 7.852717237618227e-05, "loss": 0.0057, "step": 16896 }, { "epoch": 3.270704334365325, "grad_norm": 0.031002582982182503, "learning_rate": 7.852483340728773e-05, "loss": 0.0067, "step": 16897 }, { "epoch": 3.2708978328173375, "grad_norm": 0.040660955011844635, "learning_rate": 7.852249435093362e-05, "loss": 0.0057, "step": 16898 }, { "epoch": 3.27109133126935, "grad_norm": 0.032793622463941574, "learning_rate": 7.852015520712867e-05, "loss": 0.0064, "step": 16899 }, { "epoch": 3.2712848297213624, "grad_norm": 0.04382281005382538, "learning_rate": 7.85178159758816e-05, "loss": 0.0057, "step": 16900 }, { "epoch": 3.271478328173375, "grad_norm": 0.03613663837313652, "learning_rate": 7.851547665720108e-05, "loss": 0.0052, "step": 16901 }, { "epoch": 3.271671826625387, "grad_norm": 0.05076317861676216, "learning_rate": 7.851313725109581e-05, "loss": 0.0061, "step": 16902 }, { "epoch": 3.2718653250773992, "grad_norm": 0.03388849273324013, "learning_rate": 7.851079775757448e-05, "loss": 0.0072, "step": 16903 }, { "epoch": 3.2720588235294117, "grad_norm": 0.06101755425333977, "learning_rate": 7.850845817664582e-05, "loss": 0.0063, "step": 16904 }, { "epoch": 3.272252321981424, "grad_norm": 0.052927080541849136, "learning_rate": 7.85061185083185e-05, "loss": 0.0065, "step": 16905 }, { "epoch": 3.2724458204334366, "grad_norm": 0.02294585295021534, "learning_rate": 7.850377875260121e-05, "loss": 0.0067, "step": 16906 }, { "epoch": 3.272639318885449, "grad_norm": 0.05354605242609978, "learning_rate": 7.850143890950267e-05, "loss": 0.0067, "step": 16907 }, { "epoch": 3.2728328173374615, "grad_norm": 0.030357705429196358, "learning_rate": 7.849909897903156e-05, "loss": 0.0064, "step": 16908 }, { "epoch": 3.2730263157894735, "grad_norm": 0.0620286799967289, "learning_rate": 7.84967589611966e-05, "loss": 0.0062, "step": 16909 }, { "epoch": 3.273219814241486, "grad_norm": 0.044876351952552795, "learning_rate": 7.849441885600648e-05, "loss": 0.0077, "step": 16910 }, { "epoch": 3.2734133126934983, "grad_norm": 0.05634557083249092, "learning_rate": 7.849207866346991e-05, "loss": 0.0082, "step": 16911 }, { "epoch": 3.2736068111455108, "grad_norm": 0.04548610374331474, "learning_rate": 7.848973838359557e-05, "loss": 0.0064, "step": 16912 }, { "epoch": 3.273800309597523, "grad_norm": 0.06391799449920654, "learning_rate": 7.848739801639215e-05, "loss": 0.0064, "step": 16913 }, { "epoch": 3.2739938080495357, "grad_norm": 0.07696065306663513, "learning_rate": 7.84850575618684e-05, "loss": 0.007, "step": 16914 }, { "epoch": 3.274187306501548, "grad_norm": 0.039666734635829926, "learning_rate": 7.848271702003299e-05, "loss": 0.0068, "step": 16915 }, { "epoch": 3.2743808049535605, "grad_norm": 0.06776433438062668, "learning_rate": 7.848037639089462e-05, "loss": 0.007, "step": 16916 }, { "epoch": 3.274574303405573, "grad_norm": 0.05060429498553276, "learning_rate": 7.8478035674462e-05, "loss": 0.0053, "step": 16917 }, { "epoch": 3.274767801857585, "grad_norm": 0.06817614287137985, "learning_rate": 7.847569487074383e-05, "loss": 0.0075, "step": 16918 }, { "epoch": 3.2749613003095974, "grad_norm": 0.060725633054971695, "learning_rate": 7.84733539797488e-05, "loss": 0.0063, "step": 16919 }, { "epoch": 3.27515479876161, "grad_norm": 0.0368368960916996, "learning_rate": 7.847101300148562e-05, "loss": 0.007, "step": 16920 }, { "epoch": 3.2753482972136223, "grad_norm": 0.07694105058908463, "learning_rate": 7.846867193596301e-05, "loss": 0.0068, "step": 16921 }, { "epoch": 3.2755417956656347, "grad_norm": 0.02902028150856495, "learning_rate": 7.846633078318966e-05, "loss": 0.0075, "step": 16922 }, { "epoch": 3.275735294117647, "grad_norm": 0.07121600210666656, "learning_rate": 7.846398954317426e-05, "loss": 0.0062, "step": 16923 }, { "epoch": 3.2759287925696596, "grad_norm": 0.028502024710178375, "learning_rate": 7.846164821592555e-05, "loss": 0.0071, "step": 16924 }, { "epoch": 3.2761222910216716, "grad_norm": 0.06359980255365372, "learning_rate": 7.845930680145219e-05, "loss": 0.0085, "step": 16925 }, { "epoch": 3.276315789473684, "grad_norm": 0.03914238139986992, "learning_rate": 7.845696529976292e-05, "loss": 0.0052, "step": 16926 }, { "epoch": 3.2765092879256965, "grad_norm": 0.06515393406152725, "learning_rate": 7.845462371086643e-05, "loss": 0.0063, "step": 16927 }, { "epoch": 3.276702786377709, "grad_norm": 0.04920588433742523, "learning_rate": 7.845228203477145e-05, "loss": 0.007, "step": 16928 }, { "epoch": 3.2768962848297214, "grad_norm": 0.05265173316001892, "learning_rate": 7.844994027148663e-05, "loss": 0.0068, "step": 16929 }, { "epoch": 3.277089783281734, "grad_norm": 0.05084403231739998, "learning_rate": 7.844759842102073e-05, "loss": 0.0054, "step": 16930 }, { "epoch": 3.2772832817337463, "grad_norm": 0.049338314682245255, "learning_rate": 7.844525648338245e-05, "loss": 0.0065, "step": 16931 }, { "epoch": 3.2774767801857587, "grad_norm": 0.03978338837623596, "learning_rate": 7.844291445858045e-05, "loss": 0.0063, "step": 16932 }, { "epoch": 3.2776702786377707, "grad_norm": 0.05009348317980766, "learning_rate": 7.844057234662349e-05, "loss": 0.0058, "step": 16933 }, { "epoch": 3.277863777089783, "grad_norm": 0.039436038583517075, "learning_rate": 7.843823014752025e-05, "loss": 0.0058, "step": 16934 }, { "epoch": 3.2780572755417956, "grad_norm": 0.06831919401884079, "learning_rate": 7.843588786127946e-05, "loss": 0.0061, "step": 16935 }, { "epoch": 3.278250773993808, "grad_norm": 0.0396273136138916, "learning_rate": 7.843354548790979e-05, "loss": 0.0062, "step": 16936 }, { "epoch": 3.2784442724458205, "grad_norm": 0.054343413561582565, "learning_rate": 7.843120302742e-05, "loss": 0.0063, "step": 16937 }, { "epoch": 3.278637770897833, "grad_norm": 0.037742696702480316, "learning_rate": 7.842886047981875e-05, "loss": 0.0075, "step": 16938 }, { "epoch": 3.2788312693498454, "grad_norm": 0.06902205944061279, "learning_rate": 7.842651784511478e-05, "loss": 0.0064, "step": 16939 }, { "epoch": 3.2790247678018574, "grad_norm": 0.03156692907214165, "learning_rate": 7.842417512331677e-05, "loss": 0.0066, "step": 16940 }, { "epoch": 3.27921826625387, "grad_norm": 0.06442605704069138, "learning_rate": 7.842183231443348e-05, "loss": 0.0076, "step": 16941 }, { "epoch": 3.2794117647058822, "grad_norm": 0.04994779825210571, "learning_rate": 7.841948941847357e-05, "loss": 0.0065, "step": 16942 }, { "epoch": 3.2796052631578947, "grad_norm": 0.040738217532634735, "learning_rate": 7.841714643544579e-05, "loss": 0.0075, "step": 16943 }, { "epoch": 3.279798761609907, "grad_norm": 0.07944350689649582, "learning_rate": 7.841480336535879e-05, "loss": 0.0073, "step": 16944 }, { "epoch": 3.2799922600619196, "grad_norm": 0.04770480841398239, "learning_rate": 7.841246020822134e-05, "loss": 0.0061, "step": 16945 }, { "epoch": 3.280185758513932, "grad_norm": 0.04583609849214554, "learning_rate": 7.841011696404215e-05, "loss": 0.0052, "step": 16946 }, { "epoch": 3.2803792569659445, "grad_norm": 0.07975093275308609, "learning_rate": 7.840777363282989e-05, "loss": 0.0069, "step": 16947 }, { "epoch": 3.280572755417957, "grad_norm": 0.024921467527747154, "learning_rate": 7.840543021459331e-05, "loss": 0.0058, "step": 16948 }, { "epoch": 3.280766253869969, "grad_norm": 0.08917833864688873, "learning_rate": 7.840308670934108e-05, "loss": 0.0064, "step": 16949 }, { "epoch": 3.2809597523219813, "grad_norm": 0.04505738988518715, "learning_rate": 7.840074311708197e-05, "loss": 0.0067, "step": 16950 }, { "epoch": 3.281153250773994, "grad_norm": 0.06925511360168457, "learning_rate": 7.839839943782466e-05, "loss": 0.0069, "step": 16951 }, { "epoch": 3.281346749226006, "grad_norm": 0.0644569844007492, "learning_rate": 7.839605567157785e-05, "loss": 0.0067, "step": 16952 }, { "epoch": 3.2815402476780187, "grad_norm": 0.07078484445810318, "learning_rate": 7.83937118183503e-05, "loss": 0.006, "step": 16953 }, { "epoch": 3.281733746130031, "grad_norm": 0.07098419219255447, "learning_rate": 7.839136787815067e-05, "loss": 0.0073, "step": 16954 }, { "epoch": 3.281927244582043, "grad_norm": 0.045027557760477066, "learning_rate": 7.83890238509877e-05, "loss": 0.0065, "step": 16955 }, { "epoch": 3.2821207430340555, "grad_norm": 0.08389786630868912, "learning_rate": 7.838667973687011e-05, "loss": 0.0087, "step": 16956 }, { "epoch": 3.282314241486068, "grad_norm": 0.0704120621085167, "learning_rate": 7.838433553580659e-05, "loss": 0.0079, "step": 16957 }, { "epoch": 3.2825077399380804, "grad_norm": 0.08486555516719818, "learning_rate": 7.838199124780589e-05, "loss": 0.0064, "step": 16958 }, { "epoch": 3.282701238390093, "grad_norm": 0.07608705759048462, "learning_rate": 7.83796468728767e-05, "loss": 0.0057, "step": 16959 }, { "epoch": 3.2828947368421053, "grad_norm": 0.09030880033969879, "learning_rate": 7.837730241102775e-05, "loss": 0.0069, "step": 16960 }, { "epoch": 3.2830882352941178, "grad_norm": 0.0722014456987381, "learning_rate": 7.837495786226774e-05, "loss": 0.0071, "step": 16961 }, { "epoch": 3.28328173374613, "grad_norm": 0.06764964014291763, "learning_rate": 7.837261322660541e-05, "loss": 0.007, "step": 16962 }, { "epoch": 3.2834752321981426, "grad_norm": 0.07608840614557266, "learning_rate": 7.837026850404946e-05, "loss": 0.0052, "step": 16963 }, { "epoch": 3.2836687306501546, "grad_norm": 0.05949985608458519, "learning_rate": 7.83679236946086e-05, "loss": 0.0059, "step": 16964 }, { "epoch": 3.283862229102167, "grad_norm": 0.11228977888822556, "learning_rate": 7.836557879829156e-05, "loss": 0.0067, "step": 16965 }, { "epoch": 3.2840557275541795, "grad_norm": 0.060878004878759384, "learning_rate": 7.836323381510707e-05, "loss": 0.0056, "step": 16966 }, { "epoch": 3.284249226006192, "grad_norm": 0.11479782313108444, "learning_rate": 7.836088874506382e-05, "loss": 0.005, "step": 16967 }, { "epoch": 3.2844427244582044, "grad_norm": 0.054972078651189804, "learning_rate": 7.835854358817054e-05, "loss": 0.0072, "step": 16968 }, { "epoch": 3.284636222910217, "grad_norm": 0.12279491126537323, "learning_rate": 7.835619834443595e-05, "loss": 0.0062, "step": 16969 }, { "epoch": 3.2848297213622293, "grad_norm": 0.055013347417116165, "learning_rate": 7.835385301386878e-05, "loss": 0.0077, "step": 16970 }, { "epoch": 3.2850232198142413, "grad_norm": 0.07608642429113388, "learning_rate": 7.835150759647773e-05, "loss": 0.0065, "step": 16971 }, { "epoch": 3.2852167182662537, "grad_norm": 0.12322521954774857, "learning_rate": 7.834916209227155e-05, "loss": 0.0063, "step": 16972 }, { "epoch": 3.285410216718266, "grad_norm": 0.08199736475944519, "learning_rate": 7.834681650125891e-05, "loss": 0.0074, "step": 16973 }, { "epoch": 3.2856037151702786, "grad_norm": 0.12863194942474365, "learning_rate": 7.83444708234486e-05, "loss": 0.0059, "step": 16974 }, { "epoch": 3.285797213622291, "grad_norm": 0.033101875334978104, "learning_rate": 7.834212505884925e-05, "loss": 0.006, "step": 16975 }, { "epoch": 3.2859907120743035, "grad_norm": 0.1427229791879654, "learning_rate": 7.833977920746965e-05, "loss": 0.0078, "step": 16976 }, { "epoch": 3.286184210526316, "grad_norm": 0.030533570796251297, "learning_rate": 7.83374332693185e-05, "loss": 0.0066, "step": 16977 }, { "epoch": 3.2863777089783284, "grad_norm": 0.14759354293346405, "learning_rate": 7.833508724440452e-05, "loss": 0.0067, "step": 16978 }, { "epoch": 3.2865712074303404, "grad_norm": 0.03269040584564209, "learning_rate": 7.833274113273645e-05, "loss": 0.0053, "step": 16979 }, { "epoch": 3.286764705882353, "grad_norm": 0.10673560202121735, "learning_rate": 7.833039493432299e-05, "loss": 0.0058, "step": 16980 }, { "epoch": 3.2869582043343653, "grad_norm": 0.0828496590256691, "learning_rate": 7.832804864917287e-05, "loss": 0.0053, "step": 16981 }, { "epoch": 3.2871517027863777, "grad_norm": 0.06407415866851807, "learning_rate": 7.832570227729481e-05, "loss": 0.0069, "step": 16982 }, { "epoch": 3.28734520123839, "grad_norm": 0.13973701000213623, "learning_rate": 7.832335581869755e-05, "loss": 0.0057, "step": 16983 }, { "epoch": 3.2875386996904026, "grad_norm": 0.07256069034337997, "learning_rate": 7.832100927338978e-05, "loss": 0.0069, "step": 16984 }, { "epoch": 3.287732198142415, "grad_norm": 0.13184912502765656, "learning_rate": 7.831866264138026e-05, "loss": 0.007, "step": 16985 }, { "epoch": 3.287925696594427, "grad_norm": 0.1035272479057312, "learning_rate": 7.83163159226777e-05, "loss": 0.0061, "step": 16986 }, { "epoch": 3.2881191950464395, "grad_norm": 0.10343099385499954, "learning_rate": 7.83139691172908e-05, "loss": 0.0056, "step": 16987 }, { "epoch": 3.288312693498452, "grad_norm": 0.10528894513845444, "learning_rate": 7.831162222522832e-05, "loss": 0.0064, "step": 16988 }, { "epoch": 3.2885061919504643, "grad_norm": 0.07243497669696808, "learning_rate": 7.830927524649899e-05, "loss": 0.0087, "step": 16989 }, { "epoch": 3.288699690402477, "grad_norm": 0.10391560941934586, "learning_rate": 7.83069281811115e-05, "loss": 0.0069, "step": 16990 }, { "epoch": 3.2888931888544892, "grad_norm": 0.08436830341815948, "learning_rate": 7.83045810290746e-05, "loss": 0.0056, "step": 16991 }, { "epoch": 3.2890866873065017, "grad_norm": 0.08174516260623932, "learning_rate": 7.8302233790397e-05, "loss": 0.0048, "step": 16992 }, { "epoch": 3.289280185758514, "grad_norm": 0.09354659914970398, "learning_rate": 7.829988646508746e-05, "loss": 0.0067, "step": 16993 }, { "epoch": 3.2894736842105265, "grad_norm": 0.05663376301527023, "learning_rate": 7.829753905315467e-05, "loss": 0.0067, "step": 16994 }, { "epoch": 3.2896671826625385, "grad_norm": 0.08008848130702972, "learning_rate": 7.829519155460735e-05, "loss": 0.0056, "step": 16995 }, { "epoch": 3.289860681114551, "grad_norm": 0.04861293360590935, "learning_rate": 7.829284396945426e-05, "loss": 0.0065, "step": 16996 }, { "epoch": 3.2900541795665634, "grad_norm": 0.08572085201740265, "learning_rate": 7.829049629770414e-05, "loss": 0.0079, "step": 16997 }, { "epoch": 3.290247678018576, "grad_norm": 0.05800856649875641, "learning_rate": 7.828814853936567e-05, "loss": 0.0066, "step": 16998 }, { "epoch": 3.2904411764705883, "grad_norm": 0.08144617825746536, "learning_rate": 7.828580069444761e-05, "loss": 0.0053, "step": 16999 }, { "epoch": 3.2906346749226008, "grad_norm": 0.06758444756269455, "learning_rate": 7.828345276295869e-05, "loss": 0.0064, "step": 17000 }, { "epoch": 3.290828173374613, "grad_norm": 0.04861221835017204, "learning_rate": 7.828110474490759e-05, "loss": 0.006, "step": 17001 }, { "epoch": 3.291021671826625, "grad_norm": 0.06882649660110474, "learning_rate": 7.827875664030312e-05, "loss": 0.0062, "step": 17002 }, { "epoch": 3.2912151702786376, "grad_norm": 0.03142296150326729, "learning_rate": 7.827640844915394e-05, "loss": 0.0065, "step": 17003 }, { "epoch": 3.29140866873065, "grad_norm": 0.07006395608186722, "learning_rate": 7.827406017146883e-05, "loss": 0.0057, "step": 17004 }, { "epoch": 3.2916021671826625, "grad_norm": 0.03892414644360542, "learning_rate": 7.82717118072565e-05, "loss": 0.0058, "step": 17005 }, { "epoch": 3.291795665634675, "grad_norm": 0.11474516242742538, "learning_rate": 7.826936335652567e-05, "loss": 0.0078, "step": 17006 }, { "epoch": 3.2919891640866874, "grad_norm": 0.042623940855264664, "learning_rate": 7.82670148192851e-05, "loss": 0.0063, "step": 17007 }, { "epoch": 3.2921826625387, "grad_norm": 0.13707764446735382, "learning_rate": 7.826466619554347e-05, "loss": 0.0069, "step": 17008 }, { "epoch": 3.2923761609907123, "grad_norm": 0.03920697048306465, "learning_rate": 7.826231748530957e-05, "loss": 0.0056, "step": 17009 }, { "epoch": 3.2925696594427243, "grad_norm": 0.08334971219301224, "learning_rate": 7.825996868859209e-05, "loss": 0.0064, "step": 17010 }, { "epoch": 3.2927631578947367, "grad_norm": 0.053708143532276154, "learning_rate": 7.825761980539979e-05, "loss": 0.0052, "step": 17011 }, { "epoch": 3.292956656346749, "grad_norm": 0.07706890255212784, "learning_rate": 7.825527083574138e-05, "loss": 0.0056, "step": 17012 }, { "epoch": 3.2931501547987616, "grad_norm": 0.06222521513700485, "learning_rate": 7.825292177962559e-05, "loss": 0.006, "step": 17013 }, { "epoch": 3.293343653250774, "grad_norm": 0.05463402718305588, "learning_rate": 7.82505726370612e-05, "loss": 0.0058, "step": 17014 }, { "epoch": 3.2935371517027865, "grad_norm": 0.07342371344566345, "learning_rate": 7.824822340805689e-05, "loss": 0.0067, "step": 17015 }, { "epoch": 3.293730650154799, "grad_norm": 0.03631860017776489, "learning_rate": 7.824587409262142e-05, "loss": 0.0054, "step": 17016 }, { "epoch": 3.293924148606811, "grad_norm": 0.06063643842935562, "learning_rate": 7.824352469076352e-05, "loss": 0.0061, "step": 17017 }, { "epoch": 3.2941176470588234, "grad_norm": 0.027997726574540138, "learning_rate": 7.824117520249192e-05, "loss": 0.0057, "step": 17018 }, { "epoch": 3.294311145510836, "grad_norm": 0.035279128700494766, "learning_rate": 7.823882562781536e-05, "loss": 0.008, "step": 17019 }, { "epoch": 3.2945046439628483, "grad_norm": 0.03877909854054451, "learning_rate": 7.823647596674256e-05, "loss": 0.0067, "step": 17020 }, { "epoch": 3.2946981424148607, "grad_norm": 0.041667915880680084, "learning_rate": 7.823412621928227e-05, "loss": 0.0073, "step": 17021 }, { "epoch": 3.294891640866873, "grad_norm": 0.05356727913022041, "learning_rate": 7.823177638544323e-05, "loss": 0.0063, "step": 17022 }, { "epoch": 3.2950851393188856, "grad_norm": 0.05262220278382301, "learning_rate": 7.822942646523417e-05, "loss": 0.0055, "step": 17023 }, { "epoch": 3.295278637770898, "grad_norm": 0.07272864133119583, "learning_rate": 7.822707645866383e-05, "loss": 0.0048, "step": 17024 }, { "epoch": 3.2954721362229105, "grad_norm": 0.05755670368671417, "learning_rate": 7.822472636574094e-05, "loss": 0.0062, "step": 17025 }, { "epoch": 3.2956656346749225, "grad_norm": 0.07796306163072586, "learning_rate": 7.822237618647423e-05, "loss": 0.0074, "step": 17026 }, { "epoch": 3.295859133126935, "grad_norm": 0.04870953783392906, "learning_rate": 7.822002592087246e-05, "loss": 0.0058, "step": 17027 }, { "epoch": 3.2960526315789473, "grad_norm": 0.07877735793590546, "learning_rate": 7.821767556894435e-05, "loss": 0.0069, "step": 17028 }, { "epoch": 3.29624613003096, "grad_norm": 0.061536163091659546, "learning_rate": 7.821532513069865e-05, "loss": 0.0069, "step": 17029 }, { "epoch": 3.2964396284829722, "grad_norm": 0.05286619812250137, "learning_rate": 7.821297460614408e-05, "loss": 0.0063, "step": 17030 }, { "epoch": 3.2966331269349847, "grad_norm": 0.052338577806949615, "learning_rate": 7.82106239952894e-05, "loss": 0.0068, "step": 17031 }, { "epoch": 3.2968266253869967, "grad_norm": 0.03305591642856598, "learning_rate": 7.820827329814335e-05, "loss": 0.0068, "step": 17032 }, { "epoch": 3.297020123839009, "grad_norm": 0.07745043188333511, "learning_rate": 7.820592251471463e-05, "loss": 0.0054, "step": 17033 }, { "epoch": 3.2972136222910216, "grad_norm": 0.04344455525279045, "learning_rate": 7.820357164501202e-05, "loss": 0.005, "step": 17034 }, { "epoch": 3.297407120743034, "grad_norm": 0.09171602129936218, "learning_rate": 7.820122068904425e-05, "loss": 0.0069, "step": 17035 }, { "epoch": 3.2976006191950464, "grad_norm": 0.04855227842926979, "learning_rate": 7.819886964682006e-05, "loss": 0.0056, "step": 17036 }, { "epoch": 3.297794117647059, "grad_norm": 0.09933622926473618, "learning_rate": 7.819651851834821e-05, "loss": 0.0069, "step": 17037 }, { "epoch": 3.2979876160990713, "grad_norm": 0.09103895723819733, "learning_rate": 7.819416730363739e-05, "loss": 0.0069, "step": 17038 }, { "epoch": 3.2981811145510838, "grad_norm": 0.052422646433115005, "learning_rate": 7.819181600269637e-05, "loss": 0.0061, "step": 17039 }, { "epoch": 3.298374613003096, "grad_norm": 0.1104675754904747, "learning_rate": 7.81894646155339e-05, "loss": 0.0058, "step": 17040 }, { "epoch": 3.298568111455108, "grad_norm": 0.07530251145362854, "learning_rate": 7.818711314215872e-05, "loss": 0.0063, "step": 17041 }, { "epoch": 3.2987616099071206, "grad_norm": 0.07995962351560593, "learning_rate": 7.818476158257958e-05, "loss": 0.0066, "step": 17042 }, { "epoch": 3.298955108359133, "grad_norm": 0.11484867334365845, "learning_rate": 7.818240993680518e-05, "loss": 0.0062, "step": 17043 }, { "epoch": 3.2991486068111455, "grad_norm": 0.06755498796701431, "learning_rate": 7.81800582048443e-05, "loss": 0.0068, "step": 17044 }, { "epoch": 3.299342105263158, "grad_norm": 0.13203534483909607, "learning_rate": 7.817770638670569e-05, "loss": 0.0057, "step": 17045 }, { "epoch": 3.2995356037151704, "grad_norm": 0.11828276515007019, "learning_rate": 7.817535448239805e-05, "loss": 0.007, "step": 17046 }, { "epoch": 3.299729102167183, "grad_norm": 0.12026026844978333, "learning_rate": 7.817300249193018e-05, "loss": 0.0071, "step": 17047 }, { "epoch": 3.299922600619195, "grad_norm": 0.12401942163705826, "learning_rate": 7.817065041531078e-05, "loss": 0.0076, "step": 17048 }, { "epoch": 3.3001160990712073, "grad_norm": 0.07256254553794861, "learning_rate": 7.816829825254862e-05, "loss": 0.0086, "step": 17049 }, { "epoch": 3.3003095975232197, "grad_norm": 0.07765578478574753, "learning_rate": 7.816594600365243e-05, "loss": 0.007, "step": 17050 }, { "epoch": 3.300503095975232, "grad_norm": 0.0739864856004715, "learning_rate": 7.816359366863097e-05, "loss": 0.008, "step": 17051 }, { "epoch": 3.3006965944272446, "grad_norm": 0.06985308974981308, "learning_rate": 7.816124124749298e-05, "loss": 0.0076, "step": 17052 }, { "epoch": 3.300890092879257, "grad_norm": 0.10864733904600143, "learning_rate": 7.815888874024719e-05, "loss": 0.0079, "step": 17053 }, { "epoch": 3.3010835913312695, "grad_norm": 0.0835685133934021, "learning_rate": 7.815653614690236e-05, "loss": 0.0064, "step": 17054 }, { "epoch": 3.301277089783282, "grad_norm": 0.09746938198804855, "learning_rate": 7.815418346746724e-05, "loss": 0.0058, "step": 17055 }, { "epoch": 3.301470588235294, "grad_norm": 0.0823947861790657, "learning_rate": 7.815183070195056e-05, "loss": 0.0098, "step": 17056 }, { "epoch": 3.3016640866873064, "grad_norm": 0.08764946460723877, "learning_rate": 7.81494778503611e-05, "loss": 0.0057, "step": 17057 }, { "epoch": 3.301857585139319, "grad_norm": 0.1231919527053833, "learning_rate": 7.814712491270756e-05, "loss": 0.0071, "step": 17058 }, { "epoch": 3.3020510835913313, "grad_norm": 0.08697890490293503, "learning_rate": 7.814477188899873e-05, "loss": 0.0064, "step": 17059 }, { "epoch": 3.3022445820433437, "grad_norm": 0.06424101442098618, "learning_rate": 7.814241877924334e-05, "loss": 0.0059, "step": 17060 }, { "epoch": 3.302438080495356, "grad_norm": 0.13308243453502655, "learning_rate": 7.814006558345014e-05, "loss": 0.0063, "step": 17061 }, { "epoch": 3.3026315789473686, "grad_norm": 0.058373380452394485, "learning_rate": 7.813771230162789e-05, "loss": 0.0064, "step": 17062 }, { "epoch": 3.3028250773993806, "grad_norm": 0.13437579572200775, "learning_rate": 7.813535893378529e-05, "loss": 0.0069, "step": 17063 }, { "epoch": 3.303018575851393, "grad_norm": 0.07083152234554291, "learning_rate": 7.813300547993116e-05, "loss": 0.0058, "step": 17064 }, { "epoch": 3.3032120743034055, "grad_norm": 0.11888937652111053, "learning_rate": 7.813065194007419e-05, "loss": 0.0062, "step": 17065 }, { "epoch": 3.303405572755418, "grad_norm": 0.052670881152153015, "learning_rate": 7.812829831422317e-05, "loss": 0.0066, "step": 17066 }, { "epoch": 3.3035990712074303, "grad_norm": 0.09087426960468292, "learning_rate": 7.812594460238683e-05, "loss": 0.0062, "step": 17067 }, { "epoch": 3.303792569659443, "grad_norm": 0.07103963941335678, "learning_rate": 7.812359080457393e-05, "loss": 0.0071, "step": 17068 }, { "epoch": 3.3039860681114552, "grad_norm": 0.08871293812990189, "learning_rate": 7.812123692079321e-05, "loss": 0.0063, "step": 17069 }, { "epoch": 3.3041795665634677, "grad_norm": 0.0635523572564125, "learning_rate": 7.811888295105346e-05, "loss": 0.0067, "step": 17070 }, { "epoch": 3.30437306501548, "grad_norm": 0.055237121880054474, "learning_rate": 7.811652889536336e-05, "loss": 0.0058, "step": 17071 }, { "epoch": 3.304566563467492, "grad_norm": 0.09480699896812439, "learning_rate": 7.811417475373171e-05, "loss": 0.0059, "step": 17072 }, { "epoch": 3.3047600619195046, "grad_norm": 0.0467781201004982, "learning_rate": 7.811182052616726e-05, "loss": 0.0074, "step": 17073 }, { "epoch": 3.304953560371517, "grad_norm": 0.08320841193199158, "learning_rate": 7.810946621267877e-05, "loss": 0.0078, "step": 17074 }, { "epoch": 3.3051470588235294, "grad_norm": 0.041106339544057846, "learning_rate": 7.810711181327496e-05, "loss": 0.0058, "step": 17075 }, { "epoch": 3.305340557275542, "grad_norm": 0.07412059605121613, "learning_rate": 7.810475732796462e-05, "loss": 0.0071, "step": 17076 }, { "epoch": 3.3055340557275543, "grad_norm": 0.057015310972929, "learning_rate": 7.810240275675646e-05, "loss": 0.0064, "step": 17077 }, { "epoch": 3.3057275541795663, "grad_norm": 0.06901585310697556, "learning_rate": 7.810004809965928e-05, "loss": 0.0066, "step": 17078 }, { "epoch": 3.3059210526315788, "grad_norm": 0.03937618434429169, "learning_rate": 7.80976933566818e-05, "loss": 0.007, "step": 17079 }, { "epoch": 3.306114551083591, "grad_norm": 0.10607070475816727, "learning_rate": 7.809533852783279e-05, "loss": 0.0059, "step": 17080 }, { "epoch": 3.3063080495356036, "grad_norm": 0.058900319039821625, "learning_rate": 7.8092983613121e-05, "loss": 0.0057, "step": 17081 }, { "epoch": 3.306501547987616, "grad_norm": 0.09982535243034363, "learning_rate": 7.80906286125552e-05, "loss": 0.0062, "step": 17082 }, { "epoch": 3.3066950464396285, "grad_norm": 0.10432734340429306, "learning_rate": 7.808827352614413e-05, "loss": 0.0071, "step": 17083 }, { "epoch": 3.306888544891641, "grad_norm": 0.04306734725832939, "learning_rate": 7.808591835389655e-05, "loss": 0.0065, "step": 17084 }, { "epoch": 3.3070820433436534, "grad_norm": 0.10969848185777664, "learning_rate": 7.80835630958212e-05, "loss": 0.0057, "step": 17085 }, { "epoch": 3.307275541795666, "grad_norm": 0.06594725698232651, "learning_rate": 7.808120775192685e-05, "loss": 0.0069, "step": 17086 }, { "epoch": 3.307469040247678, "grad_norm": 0.10492231696844101, "learning_rate": 7.807885232222227e-05, "loss": 0.0066, "step": 17087 }, { "epoch": 3.3076625386996903, "grad_norm": 0.09680123627185822, "learning_rate": 7.80764968067162e-05, "loss": 0.0053, "step": 17088 }, { "epoch": 3.3078560371517027, "grad_norm": 0.05485052987933159, "learning_rate": 7.80741412054174e-05, "loss": 0.006, "step": 17089 }, { "epoch": 3.308049535603715, "grad_norm": 0.09975200891494751, "learning_rate": 7.807178551833463e-05, "loss": 0.0061, "step": 17090 }, { "epoch": 3.3082430340557276, "grad_norm": 0.06268724054098129, "learning_rate": 7.806942974547665e-05, "loss": 0.0069, "step": 17091 }, { "epoch": 3.30843653250774, "grad_norm": 0.09009463340044022, "learning_rate": 7.80670738868522e-05, "loss": 0.0052, "step": 17092 }, { "epoch": 3.3086300309597525, "grad_norm": 0.07223672419786453, "learning_rate": 7.806471794247007e-05, "loss": 0.0073, "step": 17093 }, { "epoch": 3.3088235294117645, "grad_norm": 0.07943852245807648, "learning_rate": 7.806236191233899e-05, "loss": 0.0067, "step": 17094 }, { "epoch": 3.309017027863777, "grad_norm": 0.041588254272937775, "learning_rate": 7.806000579646774e-05, "loss": 0.0077, "step": 17095 }, { "epoch": 3.3092105263157894, "grad_norm": 0.04375683516263962, "learning_rate": 7.805764959486507e-05, "loss": 0.007, "step": 17096 }, { "epoch": 3.309404024767802, "grad_norm": 0.04505028575658798, "learning_rate": 7.805529330753973e-05, "loss": 0.007, "step": 17097 }, { "epoch": 3.3095975232198143, "grad_norm": 0.05604575574398041, "learning_rate": 7.805293693450049e-05, "loss": 0.0058, "step": 17098 }, { "epoch": 3.3097910216718267, "grad_norm": 0.038704533129930496, "learning_rate": 7.805058047575611e-05, "loss": 0.0062, "step": 17099 }, { "epoch": 3.309984520123839, "grad_norm": 0.053670402616262436, "learning_rate": 7.804822393131537e-05, "loss": 0.0071, "step": 17100 }, { "epoch": 3.3101780185758516, "grad_norm": 0.034515392035245895, "learning_rate": 7.804586730118701e-05, "loss": 0.006, "step": 17101 }, { "epoch": 3.3103715170278636, "grad_norm": 0.037529852241277695, "learning_rate": 7.804351058537977e-05, "loss": 0.0066, "step": 17102 }, { "epoch": 3.310565015479876, "grad_norm": 0.06099352613091469, "learning_rate": 7.804115378390246e-05, "loss": 0.0062, "step": 17103 }, { "epoch": 3.3107585139318885, "grad_norm": 0.12211041152477264, "learning_rate": 7.803879689676381e-05, "loss": 0.0082, "step": 17104 }, { "epoch": 3.310952012383901, "grad_norm": 0.03704022243618965, "learning_rate": 7.80364399239726e-05, "loss": 0.0067, "step": 17105 }, { "epoch": 3.3111455108359134, "grad_norm": 0.11486348509788513, "learning_rate": 7.803408286553756e-05, "loss": 0.0056, "step": 17106 }, { "epoch": 3.311339009287926, "grad_norm": 0.07817398756742477, "learning_rate": 7.803172572146748e-05, "loss": 0.0078, "step": 17107 }, { "epoch": 3.3115325077399382, "grad_norm": 0.1257002204656601, "learning_rate": 7.802936849177115e-05, "loss": 0.0062, "step": 17108 }, { "epoch": 3.3117260061919502, "grad_norm": 0.12780888378620148, "learning_rate": 7.802701117645726e-05, "loss": 0.0058, "step": 17109 }, { "epoch": 3.3119195046439627, "grad_norm": 0.056267090141773224, "learning_rate": 7.802465377553466e-05, "loss": 0.0052, "step": 17110 }, { "epoch": 3.312113003095975, "grad_norm": 0.1706797182559967, "learning_rate": 7.802229628901204e-05, "loss": 0.0067, "step": 17111 }, { "epoch": 3.3123065015479876, "grad_norm": 0.04176938161253929, "learning_rate": 7.80199387168982e-05, "loss": 0.0055, "step": 17112 }, { "epoch": 3.3125, "grad_norm": 0.10520664602518082, "learning_rate": 7.801758105920189e-05, "loss": 0.0058, "step": 17113 }, { "epoch": 3.3126934984520124, "grad_norm": 0.11561388522386551, "learning_rate": 7.801522331593191e-05, "loss": 0.0065, "step": 17114 }, { "epoch": 3.312886996904025, "grad_norm": 0.04206313192844391, "learning_rate": 7.801286548709697e-05, "loss": 0.0071, "step": 17115 }, { "epoch": 3.3130804953560373, "grad_norm": 0.0947272852063179, "learning_rate": 7.801050757270589e-05, "loss": 0.0066, "step": 17116 }, { "epoch": 3.3132739938080498, "grad_norm": 0.07022985816001892, "learning_rate": 7.800814957276741e-05, "loss": 0.0063, "step": 17117 }, { "epoch": 3.3134674922600618, "grad_norm": 0.05930324271321297, "learning_rate": 7.80057914872903e-05, "loss": 0.0067, "step": 17118 }, { "epoch": 3.313660990712074, "grad_norm": 0.0873001366853714, "learning_rate": 7.800343331628332e-05, "loss": 0.0079, "step": 17119 }, { "epoch": 3.3138544891640866, "grad_norm": 0.020104849711060524, "learning_rate": 7.800107505975523e-05, "loss": 0.0066, "step": 17120 }, { "epoch": 3.314047987616099, "grad_norm": 0.08126378059387207, "learning_rate": 7.799871671771485e-05, "loss": 0.0055, "step": 17121 }, { "epoch": 3.3142414860681115, "grad_norm": 0.03522169217467308, "learning_rate": 7.799635829017086e-05, "loss": 0.0061, "step": 17122 }, { "epoch": 3.314434984520124, "grad_norm": 0.039959438145160675, "learning_rate": 7.79939997771321e-05, "loss": 0.006, "step": 17123 }, { "epoch": 3.3146284829721364, "grad_norm": 0.0680447369813919, "learning_rate": 7.799164117860734e-05, "loss": 0.007, "step": 17124 }, { "epoch": 3.3148219814241484, "grad_norm": 0.027106449007987976, "learning_rate": 7.798928249460529e-05, "loss": 0.0067, "step": 17125 }, { "epoch": 3.315015479876161, "grad_norm": 0.05481948330998421, "learning_rate": 7.798692372513475e-05, "loss": 0.0063, "step": 17126 }, { "epoch": 3.3152089783281733, "grad_norm": 0.04177796468138695, "learning_rate": 7.798456487020452e-05, "loss": 0.0069, "step": 17127 }, { "epoch": 3.3154024767801857, "grad_norm": 0.06139344349503517, "learning_rate": 7.798220592982331e-05, "loss": 0.0064, "step": 17128 }, { "epoch": 3.315595975232198, "grad_norm": 0.06018482521176338, "learning_rate": 7.797984690399993e-05, "loss": 0.0051, "step": 17129 }, { "epoch": 3.3157894736842106, "grad_norm": 0.050891436636447906, "learning_rate": 7.797748779274316e-05, "loss": 0.006, "step": 17130 }, { "epoch": 3.315982972136223, "grad_norm": 0.07464421540498734, "learning_rate": 7.797512859606174e-05, "loss": 0.0056, "step": 17131 }, { "epoch": 3.3161764705882355, "grad_norm": 0.027927210554480553, "learning_rate": 7.797276931396445e-05, "loss": 0.006, "step": 17132 }, { "epoch": 3.3163699690402475, "grad_norm": 0.08482928574085236, "learning_rate": 7.797040994646008e-05, "loss": 0.006, "step": 17133 }, { "epoch": 3.31656346749226, "grad_norm": 0.06102115660905838, "learning_rate": 7.796805049355736e-05, "loss": 0.0061, "step": 17134 }, { "epoch": 3.3167569659442724, "grad_norm": 0.05610455200076103, "learning_rate": 7.79656909552651e-05, "loss": 0.0065, "step": 17135 }, { "epoch": 3.316950464396285, "grad_norm": 0.09773323684930801, "learning_rate": 7.796333133159206e-05, "loss": 0.0065, "step": 17136 }, { "epoch": 3.3171439628482973, "grad_norm": 0.04570159688591957, "learning_rate": 7.796097162254702e-05, "loss": 0.0057, "step": 17137 }, { "epoch": 3.3173374613003097, "grad_norm": 0.06442409753799438, "learning_rate": 7.795861182813875e-05, "loss": 0.0062, "step": 17138 }, { "epoch": 3.317530959752322, "grad_norm": 0.12232781201601028, "learning_rate": 7.795625194837599e-05, "loss": 0.0061, "step": 17139 }, { "epoch": 3.317724458204334, "grad_norm": 0.05376999080181122, "learning_rate": 7.795389198326755e-05, "loss": 0.0058, "step": 17140 }, { "epoch": 3.3179179566563466, "grad_norm": 0.10958431661128998, "learning_rate": 7.795153193282221e-05, "loss": 0.0055, "step": 17141 }, { "epoch": 3.318111455108359, "grad_norm": 0.022311899811029434, "learning_rate": 7.794917179704871e-05, "loss": 0.0065, "step": 17142 }, { "epoch": 3.3183049535603715, "grad_norm": 0.0743853896856308, "learning_rate": 7.794681157595587e-05, "loss": 0.0061, "step": 17143 }, { "epoch": 3.318498452012384, "grad_norm": 0.04782310500741005, "learning_rate": 7.794445126955242e-05, "loss": 0.0068, "step": 17144 }, { "epoch": 3.3186919504643964, "grad_norm": 0.050013069063425064, "learning_rate": 7.794209087784718e-05, "loss": 0.0055, "step": 17145 }, { "epoch": 3.318885448916409, "grad_norm": 0.07089788466691971, "learning_rate": 7.793973040084887e-05, "loss": 0.0053, "step": 17146 }, { "epoch": 3.3190789473684212, "grad_norm": 0.02935076877474785, "learning_rate": 7.79373698385663e-05, "loss": 0.0073, "step": 17147 }, { "epoch": 3.3192724458204337, "grad_norm": 0.05445663258433342, "learning_rate": 7.793500919100823e-05, "loss": 0.0071, "step": 17148 }, { "epoch": 3.3194659442724457, "grad_norm": 0.05714239925146103, "learning_rate": 7.793264845818347e-05, "loss": 0.0071, "step": 17149 }, { "epoch": 3.319659442724458, "grad_norm": 0.04264344647526741, "learning_rate": 7.793028764010076e-05, "loss": 0.0079, "step": 17150 }, { "epoch": 3.3198529411764706, "grad_norm": 0.04923379048705101, "learning_rate": 7.792792673676891e-05, "loss": 0.0059, "step": 17151 }, { "epoch": 3.320046439628483, "grad_norm": 0.04118387773633003, "learning_rate": 7.792556574819666e-05, "loss": 0.0063, "step": 17152 }, { "epoch": 3.3202399380804954, "grad_norm": 0.0202181413769722, "learning_rate": 7.792320467439281e-05, "loss": 0.0064, "step": 17153 }, { "epoch": 3.320433436532508, "grad_norm": 0.033434897661209106, "learning_rate": 7.792084351536613e-05, "loss": 0.0058, "step": 17154 }, { "epoch": 3.32062693498452, "grad_norm": 0.024874698370695114, "learning_rate": 7.79184822711254e-05, "loss": 0.0067, "step": 17155 }, { "epoch": 3.3208204334365323, "grad_norm": 0.03358045965433121, "learning_rate": 7.791612094167943e-05, "loss": 0.0065, "step": 17156 }, { "epoch": 3.3210139318885448, "grad_norm": 0.02913539484143257, "learning_rate": 7.791375952703694e-05, "loss": 0.0066, "step": 17157 }, { "epoch": 3.321207430340557, "grad_norm": 0.04406467452645302, "learning_rate": 7.791139802720673e-05, "loss": 0.0054, "step": 17158 }, { "epoch": 3.3214009287925697, "grad_norm": 0.038129132241010666, "learning_rate": 7.79090364421976e-05, "loss": 0.0063, "step": 17159 }, { "epoch": 3.321594427244582, "grad_norm": 0.030061889439821243, "learning_rate": 7.790667477201831e-05, "loss": 0.0064, "step": 17160 }, { "epoch": 3.3217879256965945, "grad_norm": 0.07609407603740692, "learning_rate": 7.790431301667766e-05, "loss": 0.0051, "step": 17161 }, { "epoch": 3.321981424148607, "grad_norm": 0.05122349411249161, "learning_rate": 7.79019511761844e-05, "loss": 0.0081, "step": 17162 }, { "epoch": 3.3221749226006194, "grad_norm": 0.11643616855144501, "learning_rate": 7.789958925054736e-05, "loss": 0.0069, "step": 17163 }, { "epoch": 3.3223684210526314, "grad_norm": 0.041711218655109406, "learning_rate": 7.789722723977526e-05, "loss": 0.0063, "step": 17164 }, { "epoch": 3.322561919504644, "grad_norm": 0.13678814470767975, "learning_rate": 7.789486514387694e-05, "loss": 0.0075, "step": 17165 }, { "epoch": 3.3227554179566563, "grad_norm": 0.06156042963266373, "learning_rate": 7.789250296286113e-05, "loss": 0.0067, "step": 17166 }, { "epoch": 3.3229489164086687, "grad_norm": 0.102239690721035, "learning_rate": 7.789014069673664e-05, "loss": 0.0075, "step": 17167 }, { "epoch": 3.323142414860681, "grad_norm": 0.11965843290090561, "learning_rate": 7.788777834551224e-05, "loss": 0.0065, "step": 17168 }, { "epoch": 3.3233359133126936, "grad_norm": 0.08610223233699799, "learning_rate": 7.788541590919673e-05, "loss": 0.0054, "step": 17169 }, { "epoch": 3.323529411764706, "grad_norm": 0.11308151483535767, "learning_rate": 7.788305338779887e-05, "loss": 0.0049, "step": 17170 }, { "epoch": 3.323722910216718, "grad_norm": 0.05446470156311989, "learning_rate": 7.788069078132746e-05, "loss": 0.0055, "step": 17171 }, { "epoch": 3.3239164086687305, "grad_norm": 0.11535120755434036, "learning_rate": 7.78783280897913e-05, "loss": 0.0081, "step": 17172 }, { "epoch": 3.324109907120743, "grad_norm": 0.04146324470639229, "learning_rate": 7.787596531319914e-05, "loss": 0.0067, "step": 17173 }, { "epoch": 3.3243034055727554, "grad_norm": 0.1268039494752884, "learning_rate": 7.787360245155977e-05, "loss": 0.0062, "step": 17174 }, { "epoch": 3.324496904024768, "grad_norm": 0.045856546610593796, "learning_rate": 7.787123950488199e-05, "loss": 0.0062, "step": 17175 }, { "epoch": 3.3246904024767803, "grad_norm": 0.06445300579071045, "learning_rate": 7.786887647317458e-05, "loss": 0.0067, "step": 17176 }, { "epoch": 3.3248839009287927, "grad_norm": 0.03814339265227318, "learning_rate": 7.786651335644632e-05, "loss": 0.0064, "step": 17177 }, { "epoch": 3.325077399380805, "grad_norm": 0.03580104932188988, "learning_rate": 7.7864150154706e-05, "loss": 0.0058, "step": 17178 }, { "epoch": 3.325270897832817, "grad_norm": 0.03085542842745781, "learning_rate": 7.78617868679624e-05, "loss": 0.006, "step": 17179 }, { "epoch": 3.3254643962848296, "grad_norm": 0.03040202707052231, "learning_rate": 7.785942349622431e-05, "loss": 0.0049, "step": 17180 }, { "epoch": 3.325657894736842, "grad_norm": 0.051252737641334534, "learning_rate": 7.785706003950052e-05, "loss": 0.0071, "step": 17181 }, { "epoch": 3.3258513931888545, "grad_norm": 0.05350470542907715, "learning_rate": 7.78546964977998e-05, "loss": 0.0078, "step": 17182 }, { "epoch": 3.326044891640867, "grad_norm": 0.054614998400211334, "learning_rate": 7.785233287113097e-05, "loss": 0.006, "step": 17183 }, { "epoch": 3.3262383900928794, "grad_norm": 0.059837326407432556, "learning_rate": 7.784996915950278e-05, "loss": 0.006, "step": 17184 }, { "epoch": 3.326431888544892, "grad_norm": 0.06802274286746979, "learning_rate": 7.784760536292403e-05, "loss": 0.0052, "step": 17185 }, { "epoch": 3.326625386996904, "grad_norm": 0.05746794864535332, "learning_rate": 7.784524148140353e-05, "loss": 0.0068, "step": 17186 }, { "epoch": 3.3268188854489162, "grad_norm": 0.06745464354753494, "learning_rate": 7.784287751495006e-05, "loss": 0.0055, "step": 17187 }, { "epoch": 3.3270123839009287, "grad_norm": 0.07538658380508423, "learning_rate": 7.784051346357237e-05, "loss": 0.007, "step": 17188 }, { "epoch": 3.327205882352941, "grad_norm": 0.0779535323381424, "learning_rate": 7.78381493272793e-05, "loss": 0.0087, "step": 17189 }, { "epoch": 3.3273993808049536, "grad_norm": 0.08785135298967361, "learning_rate": 7.783578510607961e-05, "loss": 0.0059, "step": 17190 }, { "epoch": 3.327592879256966, "grad_norm": 0.0543757826089859, "learning_rate": 7.783342079998211e-05, "loss": 0.0077, "step": 17191 }, { "epoch": 3.3277863777089784, "grad_norm": 0.11107978224754333, "learning_rate": 7.783105640899555e-05, "loss": 0.0074, "step": 17192 }, { "epoch": 3.327979876160991, "grad_norm": 0.025600852444767952, "learning_rate": 7.782869193312877e-05, "loss": 0.0061, "step": 17193 }, { "epoch": 3.3281733746130033, "grad_norm": 0.10374386608600616, "learning_rate": 7.782632737239055e-05, "loss": 0.0066, "step": 17194 }, { "epoch": 3.3283668730650153, "grad_norm": 0.06855873018503189, "learning_rate": 7.782396272678964e-05, "loss": 0.0079, "step": 17195 }, { "epoch": 3.3285603715170278, "grad_norm": 0.06116727739572525, "learning_rate": 7.782159799633488e-05, "loss": 0.0068, "step": 17196 }, { "epoch": 3.32875386996904, "grad_norm": 0.1288003772497177, "learning_rate": 7.781923318103504e-05, "loss": 0.0058, "step": 17197 }, { "epoch": 3.3289473684210527, "grad_norm": 0.02778639644384384, "learning_rate": 7.78168682808989e-05, "loss": 0.0061, "step": 17198 }, { "epoch": 3.329140866873065, "grad_norm": 0.1262044906616211, "learning_rate": 7.781450329593527e-05, "loss": 0.007, "step": 17199 }, { "epoch": 3.3293343653250775, "grad_norm": 0.03182600811123848, "learning_rate": 7.781213822615296e-05, "loss": 0.0076, "step": 17200 }, { "epoch": 3.3295278637770895, "grad_norm": 0.08281391113996506, "learning_rate": 7.780977307156072e-05, "loss": 0.0063, "step": 17201 }, { "epoch": 3.329721362229102, "grad_norm": 0.03905831277370453, "learning_rate": 7.780740783216738e-05, "loss": 0.0064, "step": 17202 }, { "epoch": 3.3299148606811144, "grad_norm": 0.0837228000164032, "learning_rate": 7.780504250798169e-05, "loss": 0.006, "step": 17203 }, { "epoch": 3.330108359133127, "grad_norm": 0.058206796646118164, "learning_rate": 7.78026770990125e-05, "loss": 0.0057, "step": 17204 }, { "epoch": 3.3303018575851393, "grad_norm": 0.08331291377544403, "learning_rate": 7.780031160526856e-05, "loss": 0.0058, "step": 17205 }, { "epoch": 3.3304953560371517, "grad_norm": 0.08066865056753159, "learning_rate": 7.779794602675868e-05, "loss": 0.0064, "step": 17206 }, { "epoch": 3.330688854489164, "grad_norm": 0.05295579507946968, "learning_rate": 7.779558036349166e-05, "loss": 0.0061, "step": 17207 }, { "epoch": 3.3308823529411766, "grad_norm": 0.0819692462682724, "learning_rate": 7.77932146154763e-05, "loss": 0.006, "step": 17208 }, { "epoch": 3.331075851393189, "grad_norm": 0.03826708719134331, "learning_rate": 7.779084878272137e-05, "loss": 0.0059, "step": 17209 }, { "epoch": 3.331269349845201, "grad_norm": 0.06627580523490906, "learning_rate": 7.778848286523568e-05, "loss": 0.0058, "step": 17210 }, { "epoch": 3.3314628482972135, "grad_norm": 0.04600289836525917, "learning_rate": 7.778611686302802e-05, "loss": 0.008, "step": 17211 }, { "epoch": 3.331656346749226, "grad_norm": 0.05920696631073952, "learning_rate": 7.77837507761072e-05, "loss": 0.0058, "step": 17212 }, { "epoch": 3.3318498452012384, "grad_norm": 0.056797437369823456, "learning_rate": 7.778138460448202e-05, "loss": 0.0072, "step": 17213 }, { "epoch": 3.332043343653251, "grad_norm": 0.04079911857843399, "learning_rate": 7.777901834816125e-05, "loss": 0.0068, "step": 17214 }, { "epoch": 3.3322368421052633, "grad_norm": 0.09231127798557281, "learning_rate": 7.777665200715371e-05, "loss": 0.0056, "step": 17215 }, { "epoch": 3.3324303405572757, "grad_norm": 0.027421502396464348, "learning_rate": 7.777428558146817e-05, "loss": 0.0065, "step": 17216 }, { "epoch": 3.3326238390092877, "grad_norm": 0.09261646121740341, "learning_rate": 7.777191907111347e-05, "loss": 0.0066, "step": 17217 }, { "epoch": 3.3328173374613, "grad_norm": 0.0348946750164032, "learning_rate": 7.776955247609838e-05, "loss": 0.0063, "step": 17218 }, { "epoch": 3.3330108359133126, "grad_norm": 0.08446933329105377, "learning_rate": 7.776718579643171e-05, "loss": 0.0061, "step": 17219 }, { "epoch": 3.333204334365325, "grad_norm": 0.06622115522623062, "learning_rate": 7.776481903212224e-05, "loss": 0.0074, "step": 17220 }, { "epoch": 3.3333978328173375, "grad_norm": 0.061421845108270645, "learning_rate": 7.776245218317879e-05, "loss": 0.0068, "step": 17221 }, { "epoch": 3.33359133126935, "grad_norm": 0.09256453067064285, "learning_rate": 7.776008524961016e-05, "loss": 0.0069, "step": 17222 }, { "epoch": 3.3337848297213624, "grad_norm": 0.04388754069805145, "learning_rate": 7.775771823142513e-05, "loss": 0.0071, "step": 17223 }, { "epoch": 3.333978328173375, "grad_norm": 0.07457222789525986, "learning_rate": 7.775535112863251e-05, "loss": 0.0066, "step": 17224 }, { "epoch": 3.334171826625387, "grad_norm": 0.05676809698343277, "learning_rate": 7.77529839412411e-05, "loss": 0.007, "step": 17225 }, { "epoch": 3.3343653250773992, "grad_norm": 0.0602264478802681, "learning_rate": 7.775061666925973e-05, "loss": 0.005, "step": 17226 }, { "epoch": 3.3345588235294117, "grad_norm": 0.0668758898973465, "learning_rate": 7.774824931269714e-05, "loss": 0.008, "step": 17227 }, { "epoch": 3.334752321981424, "grad_norm": 0.05780084803700447, "learning_rate": 7.774588187156219e-05, "loss": 0.0071, "step": 17228 }, { "epoch": 3.3349458204334366, "grad_norm": 0.06210267171263695, "learning_rate": 7.774351434586362e-05, "loss": 0.0066, "step": 17229 }, { "epoch": 3.335139318885449, "grad_norm": 0.04801308736205101, "learning_rate": 7.774114673561029e-05, "loss": 0.0057, "step": 17230 }, { "epoch": 3.3353328173374615, "grad_norm": 0.05838729068636894, "learning_rate": 7.773877904081098e-05, "loss": 0.0067, "step": 17231 }, { "epoch": 3.3355263157894735, "grad_norm": 0.06878659874200821, "learning_rate": 7.77364112614745e-05, "loss": 0.0062, "step": 17232 }, { "epoch": 3.335719814241486, "grad_norm": 0.11698515713214874, "learning_rate": 7.773404339760964e-05, "loss": 0.0061, "step": 17233 }, { "epoch": 3.3359133126934983, "grad_norm": 0.07441313564777374, "learning_rate": 7.77316754492252e-05, "loss": 0.0058, "step": 17234 }, { "epoch": 3.3361068111455108, "grad_norm": 0.09770244359970093, "learning_rate": 7.772930741633001e-05, "loss": 0.0051, "step": 17235 }, { "epoch": 3.336300309597523, "grad_norm": 0.1038878858089447, "learning_rate": 7.772693929893283e-05, "loss": 0.0074, "step": 17236 }, { "epoch": 3.3364938080495357, "grad_norm": 0.07665006071329117, "learning_rate": 7.77245710970425e-05, "loss": 0.0066, "step": 17237 }, { "epoch": 3.336687306501548, "grad_norm": 0.10196473449468613, "learning_rate": 7.772220281066782e-05, "loss": 0.0065, "step": 17238 }, { "epoch": 3.3368808049535605, "grad_norm": 0.055381760001182556, "learning_rate": 7.77198344398176e-05, "loss": 0.0064, "step": 17239 }, { "epoch": 3.337074303405573, "grad_norm": 0.11671552062034607, "learning_rate": 7.771746598450062e-05, "loss": 0.0052, "step": 17240 }, { "epoch": 3.337267801857585, "grad_norm": 0.05107150599360466, "learning_rate": 7.77150974447257e-05, "loss": 0.006, "step": 17241 }, { "epoch": 3.3374613003095974, "grad_norm": 0.09096825122833252, "learning_rate": 7.771272882050165e-05, "loss": 0.0059, "step": 17242 }, { "epoch": 3.33765479876161, "grad_norm": 0.07345344871282578, "learning_rate": 7.771036011183725e-05, "loss": 0.0061, "step": 17243 }, { "epoch": 3.3378482972136223, "grad_norm": 0.04367361590266228, "learning_rate": 7.770799131874133e-05, "loss": 0.0062, "step": 17244 }, { "epoch": 3.3380417956656347, "grad_norm": 0.09153420478105545, "learning_rate": 7.770562244122271e-05, "loss": 0.0061, "step": 17245 }, { "epoch": 3.338235294117647, "grad_norm": 0.05102737620472908, "learning_rate": 7.770325347929017e-05, "loss": 0.007, "step": 17246 }, { "epoch": 3.3384287925696596, "grad_norm": 0.06990107893943787, "learning_rate": 7.770088443295255e-05, "loss": 0.0065, "step": 17247 }, { "epoch": 3.3386222910216716, "grad_norm": 0.05139203369617462, "learning_rate": 7.769851530221862e-05, "loss": 0.005, "step": 17248 }, { "epoch": 3.338815789473684, "grad_norm": 0.04398591071367264, "learning_rate": 7.769614608709718e-05, "loss": 0.0078, "step": 17249 }, { "epoch": 3.3390092879256965, "grad_norm": 0.09152992069721222, "learning_rate": 7.769377678759711e-05, "loss": 0.0072, "step": 17250 }, { "epoch": 3.339202786377709, "grad_norm": 0.0403660386800766, "learning_rate": 7.769140740372714e-05, "loss": 0.0063, "step": 17251 }, { "epoch": 3.3393962848297214, "grad_norm": 0.09007562696933746, "learning_rate": 7.768903793549612e-05, "loss": 0.0058, "step": 17252 }, { "epoch": 3.339589783281734, "grad_norm": 0.057055212557315826, "learning_rate": 7.768666838291283e-05, "loss": 0.007, "step": 17253 }, { "epoch": 3.3397832817337463, "grad_norm": 0.0721520334482193, "learning_rate": 7.76842987459861e-05, "loss": 0.0063, "step": 17254 }, { "epoch": 3.3399767801857587, "grad_norm": 0.07678265869617462, "learning_rate": 7.768192902472475e-05, "loss": 0.0061, "step": 17255 }, { "epoch": 3.3401702786377707, "grad_norm": 0.06361400336027145, "learning_rate": 7.767955921913756e-05, "loss": 0.0065, "step": 17256 }, { "epoch": 3.340363777089783, "grad_norm": 0.11098044365644455, "learning_rate": 7.767718932923338e-05, "loss": 0.0076, "step": 17257 }, { "epoch": 3.3405572755417956, "grad_norm": 0.059004440903663635, "learning_rate": 7.767481935502098e-05, "loss": 0.0073, "step": 17258 }, { "epoch": 3.340750773993808, "grad_norm": 0.09408916532993317, "learning_rate": 7.767244929650919e-05, "loss": 0.007, "step": 17259 }, { "epoch": 3.3409442724458205, "grad_norm": 0.045141253620386124, "learning_rate": 7.767007915370683e-05, "loss": 0.0058, "step": 17260 }, { "epoch": 3.341137770897833, "grad_norm": 0.08856779336929321, "learning_rate": 7.766770892662269e-05, "loss": 0.0068, "step": 17261 }, { "epoch": 3.3413312693498454, "grad_norm": 0.05973755940794945, "learning_rate": 7.76653386152656e-05, "loss": 0.0074, "step": 17262 }, { "epoch": 3.3415247678018574, "grad_norm": 0.15585343539714813, "learning_rate": 7.766296821964435e-05, "loss": 0.0072, "step": 17263 }, { "epoch": 3.34171826625387, "grad_norm": 0.06035290285944939, "learning_rate": 7.766059773976781e-05, "loss": 0.0074, "step": 17264 }, { "epoch": 3.3419117647058822, "grad_norm": 0.07344020903110504, "learning_rate": 7.765822717564472e-05, "loss": 0.0067, "step": 17265 }, { "epoch": 3.3421052631578947, "grad_norm": 0.06533735245466232, "learning_rate": 7.765585652728393e-05, "loss": 0.0063, "step": 17266 }, { "epoch": 3.342298761609907, "grad_norm": 0.0654480829834938, "learning_rate": 7.765348579469425e-05, "loss": 0.0053, "step": 17267 }, { "epoch": 3.3424922600619196, "grad_norm": 0.07007215172052383, "learning_rate": 7.765111497788449e-05, "loss": 0.0065, "step": 17268 }, { "epoch": 3.342685758513932, "grad_norm": 0.05525116249918938, "learning_rate": 7.764874407686345e-05, "loss": 0.0055, "step": 17269 }, { "epoch": 3.3428792569659445, "grad_norm": 0.08799030631780624, "learning_rate": 7.764637309163997e-05, "loss": 0.0063, "step": 17270 }, { "epoch": 3.343072755417957, "grad_norm": 0.048597969114780426, "learning_rate": 7.764400202222287e-05, "loss": 0.0059, "step": 17271 }, { "epoch": 3.343266253869969, "grad_norm": 0.09956802427768707, "learning_rate": 7.764163086862094e-05, "loss": 0.0073, "step": 17272 }, { "epoch": 3.3434597523219813, "grad_norm": 0.0471930205821991, "learning_rate": 7.763925963084302e-05, "loss": 0.0062, "step": 17273 }, { "epoch": 3.343653250773994, "grad_norm": 0.11278446763753891, "learning_rate": 7.763688830889789e-05, "loss": 0.0055, "step": 17274 }, { "epoch": 3.343846749226006, "grad_norm": 0.05077959597110748, "learning_rate": 7.76345169027944e-05, "loss": 0.0063, "step": 17275 }, { "epoch": 3.3440402476780187, "grad_norm": 0.053394418209791183, "learning_rate": 7.763214541254136e-05, "loss": 0.0061, "step": 17276 }, { "epoch": 3.344233746130031, "grad_norm": 0.02727309614419937, "learning_rate": 7.762977383814756e-05, "loss": 0.0065, "step": 17277 }, { "epoch": 3.344427244582043, "grad_norm": 0.025648411363363266, "learning_rate": 7.762740217962185e-05, "loss": 0.0065, "step": 17278 }, { "epoch": 3.3446207430340555, "grad_norm": 0.02687882073223591, "learning_rate": 7.762503043697303e-05, "loss": 0.0078, "step": 17279 }, { "epoch": 3.344814241486068, "grad_norm": 0.048409536480903625, "learning_rate": 7.76226586102099e-05, "loss": 0.0077, "step": 17280 }, { "epoch": 3.3450077399380804, "grad_norm": 0.05306669697165489, "learning_rate": 7.762028669934134e-05, "loss": 0.0061, "step": 17281 }, { "epoch": 3.345201238390093, "grad_norm": 0.059110064059495926, "learning_rate": 7.76179147043761e-05, "loss": 0.0068, "step": 17282 }, { "epoch": 3.3453947368421053, "grad_norm": 0.052393779158592224, "learning_rate": 7.761554262532303e-05, "loss": 0.0068, "step": 17283 }, { "epoch": 3.3455882352941178, "grad_norm": 0.06389585137367249, "learning_rate": 7.761317046219096e-05, "loss": 0.0064, "step": 17284 }, { "epoch": 3.34578173374613, "grad_norm": 0.046050164848566055, "learning_rate": 7.761079821498869e-05, "loss": 0.0064, "step": 17285 }, { "epoch": 3.3459752321981426, "grad_norm": 0.057367000728845596, "learning_rate": 7.760842588372503e-05, "loss": 0.0078, "step": 17286 }, { "epoch": 3.3461687306501546, "grad_norm": 0.05698021501302719, "learning_rate": 7.760605346840882e-05, "loss": 0.0076, "step": 17287 }, { "epoch": 3.346362229102167, "grad_norm": 0.04073982313275337, "learning_rate": 7.760368096904886e-05, "loss": 0.0062, "step": 17288 }, { "epoch": 3.3465557275541795, "grad_norm": 0.0647580623626709, "learning_rate": 7.760130838565401e-05, "loss": 0.0065, "step": 17289 }, { "epoch": 3.346749226006192, "grad_norm": 0.05432165041565895, "learning_rate": 7.759893571823307e-05, "loss": 0.0058, "step": 17290 }, { "epoch": 3.3469427244582044, "grad_norm": 0.08058221638202667, "learning_rate": 7.759656296679484e-05, "loss": 0.0061, "step": 17291 }, { "epoch": 3.347136222910217, "grad_norm": 0.06468460708856583, "learning_rate": 7.759419013134814e-05, "loss": 0.006, "step": 17292 }, { "epoch": 3.3473297213622293, "grad_norm": 0.055138323456048965, "learning_rate": 7.759181721190182e-05, "loss": 0.0054, "step": 17293 }, { "epoch": 3.3475232198142413, "grad_norm": 0.058098193258047104, "learning_rate": 7.75894442084647e-05, "loss": 0.007, "step": 17294 }, { "epoch": 3.3477167182662537, "grad_norm": 0.03134271129965782, "learning_rate": 7.758707112104559e-05, "loss": 0.0055, "step": 17295 }, { "epoch": 3.347910216718266, "grad_norm": 0.05897446721792221, "learning_rate": 7.758469794965331e-05, "loss": 0.0053, "step": 17296 }, { "epoch": 3.3481037151702786, "grad_norm": 0.034386299550533295, "learning_rate": 7.75823246942967e-05, "loss": 0.0059, "step": 17297 }, { "epoch": 3.348297213622291, "grad_norm": 0.0551626980304718, "learning_rate": 7.757995135498456e-05, "loss": 0.0073, "step": 17298 }, { "epoch": 3.3484907120743035, "grad_norm": 0.042238131165504456, "learning_rate": 7.757757793172572e-05, "loss": 0.0058, "step": 17299 }, { "epoch": 3.348684210526316, "grad_norm": 0.054775726050138474, "learning_rate": 7.757520442452902e-05, "loss": 0.007, "step": 17300 }, { "epoch": 3.3488777089783284, "grad_norm": 0.07094015181064606, "learning_rate": 7.757283083340326e-05, "loss": 0.0078, "step": 17301 }, { "epoch": 3.3490712074303404, "grad_norm": 0.08122137188911438, "learning_rate": 7.757045715835729e-05, "loss": 0.0055, "step": 17302 }, { "epoch": 3.349264705882353, "grad_norm": 0.06138459965586662, "learning_rate": 7.756808339939992e-05, "loss": 0.0065, "step": 17303 }, { "epoch": 3.3494582043343653, "grad_norm": 0.07903657108545303, "learning_rate": 7.756570955653998e-05, "loss": 0.0068, "step": 17304 }, { "epoch": 3.3496517027863777, "grad_norm": 0.0630398765206337, "learning_rate": 7.756333562978628e-05, "loss": 0.0072, "step": 17305 }, { "epoch": 3.34984520123839, "grad_norm": 0.07718470692634583, "learning_rate": 7.756096161914764e-05, "loss": 0.0086, "step": 17306 }, { "epoch": 3.3500386996904026, "grad_norm": 0.0824839174747467, "learning_rate": 7.755858752463294e-05, "loss": 0.0064, "step": 17307 }, { "epoch": 3.350232198142415, "grad_norm": 0.051856182515621185, "learning_rate": 7.755621334625096e-05, "loss": 0.0059, "step": 17308 }, { "epoch": 3.350425696594427, "grad_norm": 0.06720487773418427, "learning_rate": 7.755383908401053e-05, "loss": 0.0065, "step": 17309 }, { "epoch": 3.3506191950464395, "grad_norm": 0.03666725382208824, "learning_rate": 7.755146473792049e-05, "loss": 0.0074, "step": 17310 }, { "epoch": 3.350812693498452, "grad_norm": 0.047715283930301666, "learning_rate": 7.754909030798966e-05, "loss": 0.0073, "step": 17311 }, { "epoch": 3.3510061919504643, "grad_norm": 0.04621366038918495, "learning_rate": 7.754671579422685e-05, "loss": 0.0063, "step": 17312 }, { "epoch": 3.351199690402477, "grad_norm": 0.06825949996709824, "learning_rate": 7.754434119664093e-05, "loss": 0.0066, "step": 17313 }, { "epoch": 3.3513931888544892, "grad_norm": 0.046599823981523514, "learning_rate": 7.754196651524071e-05, "loss": 0.0065, "step": 17314 }, { "epoch": 3.3515866873065017, "grad_norm": 0.05139731988310814, "learning_rate": 7.753959175003499e-05, "loss": 0.0082, "step": 17315 }, { "epoch": 3.351780185758514, "grad_norm": 0.0672752782702446, "learning_rate": 7.753721690103264e-05, "loss": 0.0065, "step": 17316 }, { "epoch": 3.3519736842105265, "grad_norm": 0.032689280807971954, "learning_rate": 7.753484196824246e-05, "loss": 0.0055, "step": 17317 }, { "epoch": 3.3521671826625385, "grad_norm": 0.07164325565099716, "learning_rate": 7.75324669516733e-05, "loss": 0.0065, "step": 17318 }, { "epoch": 3.352360681114551, "grad_norm": 0.02578827738761902, "learning_rate": 7.753009185133396e-05, "loss": 0.0065, "step": 17319 }, { "epoch": 3.3525541795665634, "grad_norm": 0.08549582958221436, "learning_rate": 7.752771666723332e-05, "loss": 0.0057, "step": 17320 }, { "epoch": 3.352747678018576, "grad_norm": 0.04439576715230942, "learning_rate": 7.752534139938016e-05, "loss": 0.0074, "step": 17321 }, { "epoch": 3.3529411764705883, "grad_norm": 0.09118778258562088, "learning_rate": 7.752296604778333e-05, "loss": 0.0063, "step": 17322 }, { "epoch": 3.3531346749226008, "grad_norm": 0.06829387694597244, "learning_rate": 7.752059061245168e-05, "loss": 0.0064, "step": 17323 }, { "epoch": 3.353328173374613, "grad_norm": 0.0415632426738739, "learning_rate": 7.7518215093394e-05, "loss": 0.006, "step": 17324 }, { "epoch": 3.353521671826625, "grad_norm": 0.08374334126710892, "learning_rate": 7.751583949061914e-05, "loss": 0.007, "step": 17325 }, { "epoch": 3.3537151702786376, "grad_norm": 0.040809329599142075, "learning_rate": 7.751346380413597e-05, "loss": 0.0067, "step": 17326 }, { "epoch": 3.35390866873065, "grad_norm": 0.07474526017904282, "learning_rate": 7.751108803395327e-05, "loss": 0.0052, "step": 17327 }, { "epoch": 3.3541021671826625, "grad_norm": 0.08732882887125015, "learning_rate": 7.750871218007989e-05, "loss": 0.0063, "step": 17328 }, { "epoch": 3.354295665634675, "grad_norm": 0.03816237300634384, "learning_rate": 7.750633624252467e-05, "loss": 0.0074, "step": 17329 }, { "epoch": 3.3544891640866874, "grad_norm": 0.10006154328584671, "learning_rate": 7.750396022129643e-05, "loss": 0.0063, "step": 17330 }, { "epoch": 3.3546826625387, "grad_norm": 0.03702196851372719, "learning_rate": 7.750158411640401e-05, "loss": 0.0067, "step": 17331 }, { "epoch": 3.3548761609907123, "grad_norm": 0.0691484585404396, "learning_rate": 7.749920792785626e-05, "loss": 0.0067, "step": 17332 }, { "epoch": 3.3550696594427243, "grad_norm": 0.07561688125133514, "learning_rate": 7.749683165566198e-05, "loss": 0.0061, "step": 17333 }, { "epoch": 3.3552631578947367, "grad_norm": 0.02976512722671032, "learning_rate": 7.749445529983004e-05, "loss": 0.0057, "step": 17334 }, { "epoch": 3.355456656346749, "grad_norm": 0.07583694159984589, "learning_rate": 7.749207886036924e-05, "loss": 0.0072, "step": 17335 }, { "epoch": 3.3556501547987616, "grad_norm": 0.06985107809305191, "learning_rate": 7.748970233728846e-05, "loss": 0.0059, "step": 17336 }, { "epoch": 3.355843653250774, "grad_norm": 0.05532484129071236, "learning_rate": 7.748732573059649e-05, "loss": 0.0081, "step": 17337 }, { "epoch": 3.3560371517027865, "grad_norm": 0.07721453160047531, "learning_rate": 7.748494904030217e-05, "loss": 0.0051, "step": 17338 }, { "epoch": 3.356230650154799, "grad_norm": 0.044536616653203964, "learning_rate": 7.748257226641437e-05, "loss": 0.0055, "step": 17339 }, { "epoch": 3.356424148606811, "grad_norm": 0.0673334151506424, "learning_rate": 7.748019540894191e-05, "loss": 0.0062, "step": 17340 }, { "epoch": 3.3566176470588234, "grad_norm": 0.07054543495178223, "learning_rate": 7.747781846789362e-05, "loss": 0.0042, "step": 17341 }, { "epoch": 3.356811145510836, "grad_norm": 0.050131071358919144, "learning_rate": 7.747544144327833e-05, "loss": 0.0057, "step": 17342 }, { "epoch": 3.3570046439628483, "grad_norm": 0.08126946538686752, "learning_rate": 7.747306433510489e-05, "loss": 0.0047, "step": 17343 }, { "epoch": 3.3571981424148607, "grad_norm": 0.021492639556527138, "learning_rate": 7.747068714338214e-05, "loss": 0.0066, "step": 17344 }, { "epoch": 3.357391640866873, "grad_norm": 0.08367355912923813, "learning_rate": 7.74683098681189e-05, "loss": 0.0072, "step": 17345 }, { "epoch": 3.3575851393188856, "grad_norm": 0.02887808345258236, "learning_rate": 7.746593250932404e-05, "loss": 0.0066, "step": 17346 }, { "epoch": 3.357778637770898, "grad_norm": 0.07519190013408661, "learning_rate": 7.746355506700636e-05, "loss": 0.0064, "step": 17347 }, { "epoch": 3.3579721362229105, "grad_norm": 0.04469148442149162, "learning_rate": 7.746117754117474e-05, "loss": 0.0066, "step": 17348 }, { "epoch": 3.3581656346749225, "grad_norm": 0.06525499373674393, "learning_rate": 7.745879993183796e-05, "loss": 0.0058, "step": 17349 }, { "epoch": 3.358359133126935, "grad_norm": 0.057035382837057114, "learning_rate": 7.745642223900491e-05, "loss": 0.0076, "step": 17350 }, { "epoch": 3.3585526315789473, "grad_norm": 0.06566623598337173, "learning_rate": 7.745404446268442e-05, "loss": 0.0061, "step": 17351 }, { "epoch": 3.35874613003096, "grad_norm": 0.059606220573186874, "learning_rate": 7.745166660288533e-05, "loss": 0.0062, "step": 17352 }, { "epoch": 3.3589396284829722, "grad_norm": 0.05386532098054886, "learning_rate": 7.744928865961648e-05, "loss": 0.0075, "step": 17353 }, { "epoch": 3.3591331269349847, "grad_norm": 0.10044913738965988, "learning_rate": 7.744691063288669e-05, "loss": 0.0072, "step": 17354 }, { "epoch": 3.3593266253869967, "grad_norm": 0.03322352096438408, "learning_rate": 7.744453252270483e-05, "loss": 0.0065, "step": 17355 }, { "epoch": 3.359520123839009, "grad_norm": 0.11349210143089294, "learning_rate": 7.744215432907972e-05, "loss": 0.0063, "step": 17356 }, { "epoch": 3.3597136222910216, "grad_norm": 0.04947192966938019, "learning_rate": 7.743977605202021e-05, "loss": 0.0057, "step": 17357 }, { "epoch": 3.359907120743034, "grad_norm": 0.10324476659297943, "learning_rate": 7.743739769153515e-05, "loss": 0.0059, "step": 17358 }, { "epoch": 3.3601006191950464, "grad_norm": 0.0429050549864769, "learning_rate": 7.743501924763339e-05, "loss": 0.0074, "step": 17359 }, { "epoch": 3.360294117647059, "grad_norm": 0.04526727646589279, "learning_rate": 7.743264072032373e-05, "loss": 0.0056, "step": 17360 }, { "epoch": 3.3604876160990713, "grad_norm": 0.04550442099571228, "learning_rate": 7.743026210961505e-05, "loss": 0.0059, "step": 17361 }, { "epoch": 3.3606811145510838, "grad_norm": 0.05226792022585869, "learning_rate": 7.742788341551618e-05, "loss": 0.0062, "step": 17362 }, { "epoch": 3.360874613003096, "grad_norm": 0.025366073474287987, "learning_rate": 7.742550463803595e-05, "loss": 0.0057, "step": 17363 }, { "epoch": 3.361068111455108, "grad_norm": 0.0576719231903553, "learning_rate": 7.742312577718325e-05, "loss": 0.0059, "step": 17364 }, { "epoch": 3.3612616099071206, "grad_norm": 0.028672242537140846, "learning_rate": 7.742074683296688e-05, "loss": 0.0057, "step": 17365 }, { "epoch": 3.361455108359133, "grad_norm": 0.06283729523420334, "learning_rate": 7.741836780539569e-05, "loss": 0.0055, "step": 17366 }, { "epoch": 3.3616486068111455, "grad_norm": 0.020095722749829292, "learning_rate": 7.741598869447853e-05, "loss": 0.006, "step": 17367 }, { "epoch": 3.361842105263158, "grad_norm": 0.04099417105317116, "learning_rate": 7.741360950022427e-05, "loss": 0.0063, "step": 17368 }, { "epoch": 3.3620356037151704, "grad_norm": 0.027016233652830124, "learning_rate": 7.741123022264171e-05, "loss": 0.007, "step": 17369 }, { "epoch": 3.362229102167183, "grad_norm": 0.02817326970398426, "learning_rate": 7.740885086173974e-05, "loss": 0.007, "step": 17370 }, { "epoch": 3.362422600619195, "grad_norm": 0.03903316706418991, "learning_rate": 7.740647141752716e-05, "loss": 0.0067, "step": 17371 }, { "epoch": 3.3626160990712073, "grad_norm": 0.02785126119852066, "learning_rate": 7.740409189001286e-05, "loss": 0.0064, "step": 17372 }, { "epoch": 3.3628095975232197, "grad_norm": 0.04938361421227455, "learning_rate": 7.740171227920567e-05, "loss": 0.0067, "step": 17373 }, { "epoch": 3.363003095975232, "grad_norm": 0.03976738080382347, "learning_rate": 7.739933258511442e-05, "loss": 0.0075, "step": 17374 }, { "epoch": 3.3631965944272446, "grad_norm": 0.056158460676670074, "learning_rate": 7.739695280774796e-05, "loss": 0.0074, "step": 17375 }, { "epoch": 3.363390092879257, "grad_norm": 0.02957489900290966, "learning_rate": 7.739457294711516e-05, "loss": 0.0058, "step": 17376 }, { "epoch": 3.3635835913312695, "grad_norm": 0.03935704380273819, "learning_rate": 7.739219300322486e-05, "loss": 0.0077, "step": 17377 }, { "epoch": 3.363777089783282, "grad_norm": 0.037368081510066986, "learning_rate": 7.738981297608589e-05, "loss": 0.0064, "step": 17378 }, { "epoch": 3.363970588235294, "grad_norm": 0.04368508979678154, "learning_rate": 7.738743286570714e-05, "loss": 0.0057, "step": 17379 }, { "epoch": 3.3641640866873064, "grad_norm": 0.04818252474069595, "learning_rate": 7.738505267209742e-05, "loss": 0.0056, "step": 17380 }, { "epoch": 3.364357585139319, "grad_norm": 0.06105441972613335, "learning_rate": 7.738267239526558e-05, "loss": 0.0074, "step": 17381 }, { "epoch": 3.3645510835913313, "grad_norm": 0.04938102886080742, "learning_rate": 7.738029203522046e-05, "loss": 0.0058, "step": 17382 }, { "epoch": 3.3647445820433437, "grad_norm": 0.030594421550631523, "learning_rate": 7.737791159197096e-05, "loss": 0.0058, "step": 17383 }, { "epoch": 3.364938080495356, "grad_norm": 0.043310992419719696, "learning_rate": 7.73755310655259e-05, "loss": 0.0065, "step": 17384 }, { "epoch": 3.3651315789473686, "grad_norm": 0.026290621608495712, "learning_rate": 7.737315045589411e-05, "loss": 0.0067, "step": 17385 }, { "epoch": 3.3653250773993806, "grad_norm": 0.027360811829566956, "learning_rate": 7.737076976308448e-05, "loss": 0.0059, "step": 17386 }, { "epoch": 3.365518575851393, "grad_norm": 0.028751932084560394, "learning_rate": 7.736838898710583e-05, "loss": 0.0066, "step": 17387 }, { "epoch": 3.3657120743034055, "grad_norm": 0.027595939114689827, "learning_rate": 7.736600812796703e-05, "loss": 0.0051, "step": 17388 }, { "epoch": 3.365905572755418, "grad_norm": 0.02631627768278122, "learning_rate": 7.73636271856769e-05, "loss": 0.0065, "step": 17389 }, { "epoch": 3.3660990712074303, "grad_norm": 0.03187793865799904, "learning_rate": 7.736124616024432e-05, "loss": 0.0047, "step": 17390 }, { "epoch": 3.366292569659443, "grad_norm": 0.04351985082030296, "learning_rate": 7.735886505167816e-05, "loss": 0.0067, "step": 17391 }, { "epoch": 3.3664860681114552, "grad_norm": 0.03298496827483177, "learning_rate": 7.735648385998726e-05, "loss": 0.0053, "step": 17392 }, { "epoch": 3.3666795665634677, "grad_norm": 0.050349943339824677, "learning_rate": 7.735410258518043e-05, "loss": 0.0064, "step": 17393 }, { "epoch": 3.36687306501548, "grad_norm": 0.033870063722133636, "learning_rate": 7.735172122726655e-05, "loss": 0.0069, "step": 17394 }, { "epoch": 3.367066563467492, "grad_norm": 0.04137279465794563, "learning_rate": 7.734933978625448e-05, "loss": 0.0062, "step": 17395 }, { "epoch": 3.3672600619195046, "grad_norm": 0.02980954758822918, "learning_rate": 7.734695826215309e-05, "loss": 0.0066, "step": 17396 }, { "epoch": 3.367453560371517, "grad_norm": 0.052470602095127106, "learning_rate": 7.734457665497121e-05, "loss": 0.0057, "step": 17397 }, { "epoch": 3.3676470588235294, "grad_norm": 0.040886275470256805, "learning_rate": 7.73421949647177e-05, "loss": 0.0059, "step": 17398 }, { "epoch": 3.367840557275542, "grad_norm": 0.04539996385574341, "learning_rate": 7.73398131914014e-05, "loss": 0.0072, "step": 17399 }, { "epoch": 3.3680340557275543, "grad_norm": 0.032776255160570145, "learning_rate": 7.733743133503119e-05, "loss": 0.0059, "step": 17400 }, { "epoch": 3.3682275541795663, "grad_norm": 0.03153360262513161, "learning_rate": 7.733504939561592e-05, "loss": 0.0067, "step": 17401 }, { "epoch": 3.3684210526315788, "grad_norm": 0.032364435493946075, "learning_rate": 7.733266737316443e-05, "loss": 0.0063, "step": 17402 }, { "epoch": 3.368614551083591, "grad_norm": 0.02105824276804924, "learning_rate": 7.733028526768559e-05, "loss": 0.0054, "step": 17403 }, { "epoch": 3.3688080495356036, "grad_norm": 0.025963999330997467, "learning_rate": 7.732790307918825e-05, "loss": 0.0066, "step": 17404 }, { "epoch": 3.369001547987616, "grad_norm": 0.030739326030015945, "learning_rate": 7.732552080768128e-05, "loss": 0.0051, "step": 17405 }, { "epoch": 3.3691950464396285, "grad_norm": 0.03404177352786064, "learning_rate": 7.73231384531735e-05, "loss": 0.0061, "step": 17406 }, { "epoch": 3.369388544891641, "grad_norm": 0.03201457858085632, "learning_rate": 7.732075601567381e-05, "loss": 0.0063, "step": 17407 }, { "epoch": 3.3695820433436534, "grad_norm": 0.03209620341658592, "learning_rate": 7.731837349519104e-05, "loss": 0.005, "step": 17408 }, { "epoch": 3.369775541795666, "grad_norm": 0.0224824957549572, "learning_rate": 7.731599089173408e-05, "loss": 0.006, "step": 17409 }, { "epoch": 3.369969040247678, "grad_norm": 0.039736174046993256, "learning_rate": 7.731360820531174e-05, "loss": 0.0062, "step": 17410 }, { "epoch": 3.3701625386996903, "grad_norm": 0.023378850892186165, "learning_rate": 7.731122543593291e-05, "loss": 0.0048, "step": 17411 }, { "epoch": 3.3703560371517027, "grad_norm": 0.03347310051321983, "learning_rate": 7.730884258360644e-05, "loss": 0.0068, "step": 17412 }, { "epoch": 3.370549535603715, "grad_norm": 0.03617139533162117, "learning_rate": 7.73064596483412e-05, "loss": 0.0074, "step": 17413 }, { "epoch": 3.3707430340557276, "grad_norm": 0.02131078578531742, "learning_rate": 7.730407663014601e-05, "loss": 0.0063, "step": 17414 }, { "epoch": 3.37093653250774, "grad_norm": 0.02053702436387539, "learning_rate": 7.730169352902979e-05, "loss": 0.0062, "step": 17415 }, { "epoch": 3.3711300309597525, "grad_norm": 0.03822307288646698, "learning_rate": 7.729931034500136e-05, "loss": 0.0076, "step": 17416 }, { "epoch": 3.3713235294117645, "grad_norm": 0.03754715994000435, "learning_rate": 7.72969270780696e-05, "loss": 0.006, "step": 17417 }, { "epoch": 3.371517027863777, "grad_norm": 0.03747090324759483, "learning_rate": 7.729454372824335e-05, "loss": 0.0071, "step": 17418 }, { "epoch": 3.3717105263157894, "grad_norm": 0.04693113639950752, "learning_rate": 7.729216029553146e-05, "loss": 0.0057, "step": 17419 }, { "epoch": 3.371904024767802, "grad_norm": 0.0287121944129467, "learning_rate": 7.728977677994283e-05, "loss": 0.0067, "step": 17420 }, { "epoch": 3.3720975232198143, "grad_norm": 0.06372032314538956, "learning_rate": 7.72873931814863e-05, "loss": 0.0065, "step": 17421 }, { "epoch": 3.3722910216718267, "grad_norm": 0.047098901122808456, "learning_rate": 7.728500950017074e-05, "loss": 0.0068, "step": 17422 }, { "epoch": 3.372484520123839, "grad_norm": 0.04084339365363121, "learning_rate": 7.7282625736005e-05, "loss": 0.0062, "step": 17423 }, { "epoch": 3.3726780185758516, "grad_norm": 0.05269940569996834, "learning_rate": 7.728024188899795e-05, "loss": 0.006, "step": 17424 }, { "epoch": 3.3728715170278636, "grad_norm": 0.030029332265257835, "learning_rate": 7.727785795915846e-05, "loss": 0.0055, "step": 17425 }, { "epoch": 3.373065015479876, "grad_norm": 0.06627967953681946, "learning_rate": 7.727547394649535e-05, "loss": 0.006, "step": 17426 }, { "epoch": 3.3732585139318885, "grad_norm": 0.02556302770972252, "learning_rate": 7.727308985101754e-05, "loss": 0.0062, "step": 17427 }, { "epoch": 3.373452012383901, "grad_norm": 0.0598175972700119, "learning_rate": 7.727070567273387e-05, "loss": 0.0054, "step": 17428 }, { "epoch": 3.3736455108359134, "grad_norm": 0.05961745232343674, "learning_rate": 7.72683214116532e-05, "loss": 0.008, "step": 17429 }, { "epoch": 3.373839009287926, "grad_norm": 0.0370723195374012, "learning_rate": 7.726593706778441e-05, "loss": 0.0073, "step": 17430 }, { "epoch": 3.3740325077399382, "grad_norm": 0.05303006246685982, "learning_rate": 7.726355264113634e-05, "loss": 0.0051, "step": 17431 }, { "epoch": 3.3742260061919502, "grad_norm": 0.029341936111450195, "learning_rate": 7.726116813171788e-05, "loss": 0.0053, "step": 17432 }, { "epoch": 3.3744195046439627, "grad_norm": 0.027389591559767723, "learning_rate": 7.725878353953786e-05, "loss": 0.006, "step": 17433 }, { "epoch": 3.374613003095975, "grad_norm": 0.036050453782081604, "learning_rate": 7.725639886460517e-05, "loss": 0.0064, "step": 17434 }, { "epoch": 3.3748065015479876, "grad_norm": 0.03174976631999016, "learning_rate": 7.72540141069287e-05, "loss": 0.0076, "step": 17435 }, { "epoch": 3.375, "grad_norm": 0.02894464135169983, "learning_rate": 7.725162926651727e-05, "loss": 0.0063, "step": 17436 }, { "epoch": 3.3751934984520124, "grad_norm": 0.03455733880400658, "learning_rate": 7.724924434337976e-05, "loss": 0.0065, "step": 17437 }, { "epoch": 3.375386996904025, "grad_norm": 0.017454199492931366, "learning_rate": 7.724685933752506e-05, "loss": 0.0055, "step": 17438 }, { "epoch": 3.3755804953560373, "grad_norm": 0.0365062952041626, "learning_rate": 7.724447424896201e-05, "loss": 0.0069, "step": 17439 }, { "epoch": 3.3757739938080498, "grad_norm": 0.04576690495014191, "learning_rate": 7.724208907769947e-05, "loss": 0.0051, "step": 17440 }, { "epoch": 3.3759674922600618, "grad_norm": 0.03197171911597252, "learning_rate": 7.723970382374635e-05, "loss": 0.0057, "step": 17441 }, { "epoch": 3.376160990712074, "grad_norm": 0.06282545626163483, "learning_rate": 7.723731848711149e-05, "loss": 0.0058, "step": 17442 }, { "epoch": 3.3763544891640866, "grad_norm": 0.025032883509993553, "learning_rate": 7.723493306780374e-05, "loss": 0.0069, "step": 17443 }, { "epoch": 3.376547987616099, "grad_norm": 0.029294753447175026, "learning_rate": 7.723254756583199e-05, "loss": 0.0067, "step": 17444 }, { "epoch": 3.3767414860681115, "grad_norm": 0.03460505232214928, "learning_rate": 7.72301619812051e-05, "loss": 0.0068, "step": 17445 }, { "epoch": 3.376934984520124, "grad_norm": 0.024885619059205055, "learning_rate": 7.722777631393196e-05, "loss": 0.0064, "step": 17446 }, { "epoch": 3.3771284829721364, "grad_norm": 0.04295476898550987, "learning_rate": 7.722539056402143e-05, "loss": 0.0064, "step": 17447 }, { "epoch": 3.3773219814241484, "grad_norm": 0.0239922683686018, "learning_rate": 7.722300473148237e-05, "loss": 0.0066, "step": 17448 }, { "epoch": 3.377515479876161, "grad_norm": 0.03689233958721161, "learning_rate": 7.722061881632366e-05, "loss": 0.0063, "step": 17449 }, { "epoch": 3.3777089783281733, "grad_norm": 0.03442429006099701, "learning_rate": 7.721823281855414e-05, "loss": 0.006, "step": 17450 }, { "epoch": 3.3779024767801857, "grad_norm": 0.06447024643421173, "learning_rate": 7.721584673818273e-05, "loss": 0.007, "step": 17451 }, { "epoch": 3.378095975232198, "grad_norm": 0.03944747895002365, "learning_rate": 7.721346057521826e-05, "loss": 0.0066, "step": 17452 }, { "epoch": 3.3782894736842106, "grad_norm": 0.035412728786468506, "learning_rate": 7.721107432966962e-05, "loss": 0.006, "step": 17453 }, { "epoch": 3.378482972136223, "grad_norm": 0.0507458858191967, "learning_rate": 7.720868800154568e-05, "loss": 0.0072, "step": 17454 }, { "epoch": 3.3786764705882355, "grad_norm": 0.030486568808555603, "learning_rate": 7.720630159085531e-05, "loss": 0.007, "step": 17455 }, { "epoch": 3.3788699690402475, "grad_norm": 0.05238877609372139, "learning_rate": 7.720391509760738e-05, "loss": 0.0083, "step": 17456 }, { "epoch": 3.37906346749226, "grad_norm": 0.023353390395641327, "learning_rate": 7.720152852181078e-05, "loss": 0.0055, "step": 17457 }, { "epoch": 3.3792569659442724, "grad_norm": 0.046327296644449234, "learning_rate": 7.719914186347434e-05, "loss": 0.0054, "step": 17458 }, { "epoch": 3.379450464396285, "grad_norm": 0.02124461717903614, "learning_rate": 7.719675512260699e-05, "loss": 0.0064, "step": 17459 }, { "epoch": 3.3796439628482973, "grad_norm": 0.04518800228834152, "learning_rate": 7.719436829921756e-05, "loss": 0.0063, "step": 17460 }, { "epoch": 3.3798374613003097, "grad_norm": 0.02535472810268402, "learning_rate": 7.719198139331493e-05, "loss": 0.0063, "step": 17461 }, { "epoch": 3.380030959752322, "grad_norm": 0.03250790759921074, "learning_rate": 7.718959440490802e-05, "loss": 0.0065, "step": 17462 }, { "epoch": 3.380224458204334, "grad_norm": 0.02706192061305046, "learning_rate": 7.718720733400564e-05, "loss": 0.0058, "step": 17463 }, { "epoch": 3.3804179566563466, "grad_norm": 0.034068863838911057, "learning_rate": 7.718482018061669e-05, "loss": 0.0047, "step": 17464 }, { "epoch": 3.380611455108359, "grad_norm": 0.055098798125982285, "learning_rate": 7.718243294475002e-05, "loss": 0.0059, "step": 17465 }, { "epoch": 3.3808049535603715, "grad_norm": 0.047302715480327606, "learning_rate": 7.718004562641456e-05, "loss": 0.0066, "step": 17466 }, { "epoch": 3.380998452012384, "grad_norm": 0.047131530940532684, "learning_rate": 7.717765822561915e-05, "loss": 0.0057, "step": 17467 }, { "epoch": 3.3811919504643964, "grad_norm": 0.040824245661497116, "learning_rate": 7.717527074237266e-05, "loss": 0.0066, "step": 17468 }, { "epoch": 3.381385448916409, "grad_norm": 0.05092407763004303, "learning_rate": 7.717288317668398e-05, "loss": 0.0056, "step": 17469 }, { "epoch": 3.3815789473684212, "grad_norm": 0.03949980065226555, "learning_rate": 7.7170495528562e-05, "loss": 0.0063, "step": 17470 }, { "epoch": 3.3817724458204337, "grad_norm": 0.054612040519714355, "learning_rate": 7.716810779801557e-05, "loss": 0.0072, "step": 17471 }, { "epoch": 3.3819659442724457, "grad_norm": 0.03471870720386505, "learning_rate": 7.716571998505358e-05, "loss": 0.0059, "step": 17472 }, { "epoch": 3.382159442724458, "grad_norm": 0.05019506812095642, "learning_rate": 7.716333208968489e-05, "loss": 0.0065, "step": 17473 }, { "epoch": 3.3823529411764706, "grad_norm": 0.041127197444438934, "learning_rate": 7.716094411191842e-05, "loss": 0.0056, "step": 17474 }, { "epoch": 3.382546439628483, "grad_norm": 0.04366074874997139, "learning_rate": 7.715855605176299e-05, "loss": 0.0064, "step": 17475 }, { "epoch": 3.3827399380804954, "grad_norm": 0.05163363739848137, "learning_rate": 7.715616790922753e-05, "loss": 0.0075, "step": 17476 }, { "epoch": 3.382933436532508, "grad_norm": 0.04963894560933113, "learning_rate": 7.715377968432089e-05, "loss": 0.0081, "step": 17477 }, { "epoch": 3.38312693498452, "grad_norm": 0.06034155935049057, "learning_rate": 7.715139137705196e-05, "loss": 0.0054, "step": 17478 }, { "epoch": 3.3833204334365323, "grad_norm": 0.03672449663281441, "learning_rate": 7.71490029874296e-05, "loss": 0.0072, "step": 17479 }, { "epoch": 3.3835139318885448, "grad_norm": 0.04723488911986351, "learning_rate": 7.714661451546272e-05, "loss": 0.006, "step": 17480 }, { "epoch": 3.383707430340557, "grad_norm": 0.022809388116002083, "learning_rate": 7.714422596116019e-05, "loss": 0.0067, "step": 17481 }, { "epoch": 3.3839009287925697, "grad_norm": 0.05082127824425697, "learning_rate": 7.714183732453089e-05, "loss": 0.006, "step": 17482 }, { "epoch": 3.384094427244582, "grad_norm": 0.019267287105321884, "learning_rate": 7.713944860558368e-05, "loss": 0.0065, "step": 17483 }, { "epoch": 3.3842879256965945, "grad_norm": 0.04366598650813103, "learning_rate": 7.713705980432744e-05, "loss": 0.006, "step": 17484 }, { "epoch": 3.384481424148607, "grad_norm": 0.03396042063832283, "learning_rate": 7.713467092077109e-05, "loss": 0.0056, "step": 17485 }, { "epoch": 3.3846749226006194, "grad_norm": 0.05259174853563309, "learning_rate": 7.713228195492349e-05, "loss": 0.0076, "step": 17486 }, { "epoch": 3.3848684210526314, "grad_norm": 0.024405281990766525, "learning_rate": 7.712989290679349e-05, "loss": 0.0058, "step": 17487 }, { "epoch": 3.385061919504644, "grad_norm": 0.015132232569158077, "learning_rate": 7.712750377639002e-05, "loss": 0.005, "step": 17488 }, { "epoch": 3.3852554179566563, "grad_norm": 0.02721494436264038, "learning_rate": 7.712511456372196e-05, "loss": 0.006, "step": 17489 }, { "epoch": 3.3854489164086687, "grad_norm": 0.0360085628926754, "learning_rate": 7.712272526879815e-05, "loss": 0.0051, "step": 17490 }, { "epoch": 3.385642414860681, "grad_norm": 0.06671032309532166, "learning_rate": 7.712033589162751e-05, "loss": 0.0064, "step": 17491 }, { "epoch": 3.3858359133126936, "grad_norm": 0.045376770198345184, "learning_rate": 7.711794643221892e-05, "loss": 0.0064, "step": 17492 }, { "epoch": 3.386029411764706, "grad_norm": 0.07453157007694244, "learning_rate": 7.711555689058124e-05, "loss": 0.0065, "step": 17493 }, { "epoch": 3.386222910216718, "grad_norm": 0.0631716400384903, "learning_rate": 7.711316726672336e-05, "loss": 0.0062, "step": 17494 }, { "epoch": 3.3864164086687305, "grad_norm": 0.04509120061993599, "learning_rate": 7.71107775606542e-05, "loss": 0.0068, "step": 17495 }, { "epoch": 3.386609907120743, "grad_norm": 0.10974337160587311, "learning_rate": 7.71083877723826e-05, "loss": 0.0065, "step": 17496 }, { "epoch": 3.3868034055727554, "grad_norm": 0.030126744881272316, "learning_rate": 7.710599790191746e-05, "loss": 0.0057, "step": 17497 }, { "epoch": 3.386996904024768, "grad_norm": 0.10945720970630646, "learning_rate": 7.710360794926768e-05, "loss": 0.0055, "step": 17498 }, { "epoch": 3.3871904024767803, "grad_norm": 0.026624711230397224, "learning_rate": 7.710121791444211e-05, "loss": 0.0075, "step": 17499 }, { "epoch": 3.3873839009287927, "grad_norm": 0.0503845177590847, "learning_rate": 7.709882779744967e-05, "loss": 0.0064, "step": 17500 }, { "epoch": 3.387577399380805, "grad_norm": 0.08420362323522568, "learning_rate": 7.709643759829925e-05, "loss": 0.0068, "step": 17501 }, { "epoch": 3.387770897832817, "grad_norm": 0.053528327494859695, "learning_rate": 7.70940473169997e-05, "loss": 0.0063, "step": 17502 }, { "epoch": 3.3879643962848296, "grad_norm": 0.07879724353551865, "learning_rate": 7.709165695355994e-05, "loss": 0.0073, "step": 17503 }, { "epoch": 3.388157894736842, "grad_norm": 0.10131910443305969, "learning_rate": 7.708926650798882e-05, "loss": 0.0067, "step": 17504 }, { "epoch": 3.3883513931888545, "grad_norm": 0.06292924284934998, "learning_rate": 7.708687598029527e-05, "loss": 0.0062, "step": 17505 }, { "epoch": 3.388544891640867, "grad_norm": 0.12314682453870773, "learning_rate": 7.708448537048815e-05, "loss": 0.007, "step": 17506 }, { "epoch": 3.3887383900928794, "grad_norm": 0.0755879357457161, "learning_rate": 7.708209467857637e-05, "loss": 0.0081, "step": 17507 }, { "epoch": 3.388931888544892, "grad_norm": 0.044264115393161774, "learning_rate": 7.707970390456879e-05, "loss": 0.0057, "step": 17508 }, { "epoch": 3.389125386996904, "grad_norm": 0.08062716573476791, "learning_rate": 7.70773130484743e-05, "loss": 0.0093, "step": 17509 }, { "epoch": 3.3893188854489162, "grad_norm": 0.0695125013589859, "learning_rate": 7.707492211030183e-05, "loss": 0.0069, "step": 17510 }, { "epoch": 3.3895123839009287, "grad_norm": 0.11410181224346161, "learning_rate": 7.707253109006022e-05, "loss": 0.0064, "step": 17511 }, { "epoch": 3.389705882352941, "grad_norm": 0.06655509769916534, "learning_rate": 7.707013998775838e-05, "loss": 0.009, "step": 17512 }, { "epoch": 3.3898993808049536, "grad_norm": 0.13408297300338745, "learning_rate": 7.70677488034052e-05, "loss": 0.0085, "step": 17513 }, { "epoch": 3.390092879256966, "grad_norm": 0.1162581816315651, "learning_rate": 7.706535753700956e-05, "loss": 0.0062, "step": 17514 }, { "epoch": 3.3902863777089784, "grad_norm": 0.14536693692207336, "learning_rate": 7.706296618858036e-05, "loss": 0.0058, "step": 17515 }, { "epoch": 3.390479876160991, "grad_norm": 0.10041771084070206, "learning_rate": 7.706057475812649e-05, "loss": 0.0072, "step": 17516 }, { "epoch": 3.3906733746130033, "grad_norm": 0.07982432097196579, "learning_rate": 7.705818324565683e-05, "loss": 0.0074, "step": 17517 }, { "epoch": 3.3908668730650153, "grad_norm": 0.07733847200870514, "learning_rate": 7.705579165118029e-05, "loss": 0.006, "step": 17518 }, { "epoch": 3.3910603715170278, "grad_norm": 0.07343466579914093, "learning_rate": 7.705339997470575e-05, "loss": 0.0062, "step": 17519 }, { "epoch": 3.39125386996904, "grad_norm": 0.05968187749385834, "learning_rate": 7.705100821624212e-05, "loss": 0.0071, "step": 17520 }, { "epoch": 3.3914473684210527, "grad_norm": 0.08642196655273438, "learning_rate": 7.704861637579825e-05, "loss": 0.0066, "step": 17521 }, { "epoch": 3.391640866873065, "grad_norm": 0.12238852679729462, "learning_rate": 7.704622445338307e-05, "loss": 0.0066, "step": 17522 }, { "epoch": 3.3918343653250775, "grad_norm": 0.0569753497838974, "learning_rate": 7.704383244900545e-05, "loss": 0.0061, "step": 17523 }, { "epoch": 3.3920278637770895, "grad_norm": 0.10070337355136871, "learning_rate": 7.70414403626743e-05, "loss": 0.0078, "step": 17524 }, { "epoch": 3.392221362229102, "grad_norm": 0.0653320923447609, "learning_rate": 7.70390481943985e-05, "loss": 0.0059, "step": 17525 }, { "epoch": 3.3924148606811144, "grad_norm": 0.09438543021678925, "learning_rate": 7.703665594418696e-05, "loss": 0.0068, "step": 17526 }, { "epoch": 3.392608359133127, "grad_norm": 0.11526191234588623, "learning_rate": 7.703426361204857e-05, "loss": 0.0073, "step": 17527 }, { "epoch": 3.3928018575851393, "grad_norm": 0.06602148711681366, "learning_rate": 7.70318711979922e-05, "loss": 0.0069, "step": 17528 }, { "epoch": 3.3929953560371517, "grad_norm": 0.06600679457187653, "learning_rate": 7.702947870202676e-05, "loss": 0.0079, "step": 17529 }, { "epoch": 3.393188854489164, "grad_norm": 0.15205319225788116, "learning_rate": 7.702708612416114e-05, "loss": 0.0091, "step": 17530 }, { "epoch": 3.3933823529411766, "grad_norm": 0.02208363078534603, "learning_rate": 7.702469346440427e-05, "loss": 0.0065, "step": 17531 }, { "epoch": 3.393575851393189, "grad_norm": 0.12922635674476624, "learning_rate": 7.702230072276499e-05, "loss": 0.0071, "step": 17532 }, { "epoch": 3.393769349845201, "grad_norm": 0.06987513601779938, "learning_rate": 7.701990789925223e-05, "loss": 0.0074, "step": 17533 }, { "epoch": 3.3939628482972135, "grad_norm": 0.12506549060344696, "learning_rate": 7.701751499387488e-05, "loss": 0.0063, "step": 17534 }, { "epoch": 3.394156346749226, "grad_norm": 0.10777436941862106, "learning_rate": 7.701512200664184e-05, "loss": 0.0063, "step": 17535 }, { "epoch": 3.3943498452012384, "grad_norm": 0.1027439758181572, "learning_rate": 7.701272893756199e-05, "loss": 0.007, "step": 17536 }, { "epoch": 3.394543343653251, "grad_norm": 0.11083923280239105, "learning_rate": 7.701033578664425e-05, "loss": 0.0059, "step": 17537 }, { "epoch": 3.3947368421052633, "grad_norm": 0.08538629114627838, "learning_rate": 7.700794255389751e-05, "loss": 0.0075, "step": 17538 }, { "epoch": 3.3949303405572757, "grad_norm": 0.06564360111951828, "learning_rate": 7.700554923933065e-05, "loss": 0.0067, "step": 17539 }, { "epoch": 3.3951238390092877, "grad_norm": 0.05429520085453987, "learning_rate": 7.700315584295256e-05, "loss": 0.007, "step": 17540 }, { "epoch": 3.3953173374613, "grad_norm": 0.04821007698774338, "learning_rate": 7.70007623647722e-05, "loss": 0.0074, "step": 17541 }, { "epoch": 3.3955108359133126, "grad_norm": 0.03852050006389618, "learning_rate": 7.69983688047984e-05, "loss": 0.0059, "step": 17542 }, { "epoch": 3.395704334365325, "grad_norm": 0.03674117475748062, "learning_rate": 7.699597516304008e-05, "loss": 0.0061, "step": 17543 }, { "epoch": 3.3958978328173375, "grad_norm": 0.03408551961183548, "learning_rate": 7.699358143950617e-05, "loss": 0.0068, "step": 17544 }, { "epoch": 3.39609133126935, "grad_norm": 0.04135293513536453, "learning_rate": 7.699118763420554e-05, "loss": 0.0075, "step": 17545 }, { "epoch": 3.3962848297213624, "grad_norm": 0.06424280256032944, "learning_rate": 7.698879374714709e-05, "loss": 0.0066, "step": 17546 }, { "epoch": 3.396478328173375, "grad_norm": 0.02217843197286129, "learning_rate": 7.698639977833969e-05, "loss": 0.0067, "step": 17547 }, { "epoch": 3.396671826625387, "grad_norm": 0.05528678372502327, "learning_rate": 7.698400572779231e-05, "loss": 0.0053, "step": 17548 }, { "epoch": 3.3968653250773992, "grad_norm": 0.11793158948421478, "learning_rate": 7.69816115955138e-05, "loss": 0.0054, "step": 17549 }, { "epoch": 3.3970588235294117, "grad_norm": 0.03103470988571644, "learning_rate": 7.697921738151308e-05, "loss": 0.0058, "step": 17550 }, { "epoch": 3.397252321981424, "grad_norm": 0.0986773818731308, "learning_rate": 7.697682308579904e-05, "loss": 0.0078, "step": 17551 }, { "epoch": 3.3974458204334366, "grad_norm": 0.04393409937620163, "learning_rate": 7.697442870838059e-05, "loss": 0.0067, "step": 17552 }, { "epoch": 3.397639318885449, "grad_norm": 0.0654318556189537, "learning_rate": 7.697203424926662e-05, "loss": 0.0066, "step": 17553 }, { "epoch": 3.3978328173374615, "grad_norm": 0.05616578087210655, "learning_rate": 7.696963970846605e-05, "loss": 0.007, "step": 17554 }, { "epoch": 3.3980263157894735, "grad_norm": 0.03416965901851654, "learning_rate": 7.696724508598778e-05, "loss": 0.0073, "step": 17555 }, { "epoch": 3.398219814241486, "grad_norm": 0.07345368713140488, "learning_rate": 7.696485038184071e-05, "loss": 0.0068, "step": 17556 }, { "epoch": 3.3984133126934983, "grad_norm": 0.02271847240626812, "learning_rate": 7.696245559603372e-05, "loss": 0.0059, "step": 17557 }, { "epoch": 3.3986068111455108, "grad_norm": 0.06373649835586548, "learning_rate": 7.696006072857575e-05, "loss": 0.0056, "step": 17558 }, { "epoch": 3.398800309597523, "grad_norm": 0.04812169820070267, "learning_rate": 7.695766577947566e-05, "loss": 0.0051, "step": 17559 }, { "epoch": 3.3989938080495357, "grad_norm": 0.04229336231946945, "learning_rate": 7.695527074874239e-05, "loss": 0.0066, "step": 17560 }, { "epoch": 3.399187306501548, "grad_norm": 0.04894009232521057, "learning_rate": 7.695287563638483e-05, "loss": 0.0062, "step": 17561 }, { "epoch": 3.3993808049535605, "grad_norm": 0.03508096560835838, "learning_rate": 7.695048044241192e-05, "loss": 0.0057, "step": 17562 }, { "epoch": 3.399574303405573, "grad_norm": 0.04204970970749855, "learning_rate": 7.69480851668325e-05, "loss": 0.0051, "step": 17563 }, { "epoch": 3.399767801857585, "grad_norm": 0.038391999900341034, "learning_rate": 7.694568980965551e-05, "loss": 0.0054, "step": 17564 }, { "epoch": 3.3999613003095974, "grad_norm": 0.039833713322877884, "learning_rate": 7.694329437088984e-05, "loss": 0.0067, "step": 17565 }, { "epoch": 3.40015479876161, "grad_norm": 0.0313817523419857, "learning_rate": 7.694089885054444e-05, "loss": 0.0064, "step": 17566 }, { "epoch": 3.4003482972136223, "grad_norm": 0.03707849606871605, "learning_rate": 7.693850324862817e-05, "loss": 0.0065, "step": 17567 }, { "epoch": 3.4005417956656347, "grad_norm": 0.06325430423021317, "learning_rate": 7.693610756514995e-05, "loss": 0.0065, "step": 17568 }, { "epoch": 3.400735294117647, "grad_norm": 0.03994651511311531, "learning_rate": 7.693371180011869e-05, "loss": 0.0065, "step": 17569 }, { "epoch": 3.4009287925696596, "grad_norm": 0.06664320081472397, "learning_rate": 7.693131595354329e-05, "loss": 0.0066, "step": 17570 }, { "epoch": 3.4011222910216716, "grad_norm": 0.03721275180578232, "learning_rate": 7.692892002543266e-05, "loss": 0.0062, "step": 17571 }, { "epoch": 3.401315789473684, "grad_norm": 0.10161769390106201, "learning_rate": 7.692652401579571e-05, "loss": 0.0061, "step": 17572 }, { "epoch": 3.4015092879256965, "grad_norm": 0.07616210728883743, "learning_rate": 7.692412792464134e-05, "loss": 0.0072, "step": 17573 }, { "epoch": 3.401702786377709, "grad_norm": 0.035894472151994705, "learning_rate": 7.692173175197847e-05, "loss": 0.0059, "step": 17574 }, { "epoch": 3.4018962848297214, "grad_norm": 0.07237987220287323, "learning_rate": 7.691933549781602e-05, "loss": 0.0056, "step": 17575 }, { "epoch": 3.402089783281734, "grad_norm": 0.05762743204832077, "learning_rate": 7.691693916216287e-05, "loss": 0.0053, "step": 17576 }, { "epoch": 3.4022832817337463, "grad_norm": 0.048587799072265625, "learning_rate": 7.691454274502795e-05, "loss": 0.0085, "step": 17577 }, { "epoch": 3.4024767801857587, "grad_norm": 0.08250513672828674, "learning_rate": 7.691214624642015e-05, "loss": 0.006, "step": 17578 }, { "epoch": 3.4026702786377707, "grad_norm": 0.033908914774656296, "learning_rate": 7.690974966634836e-05, "loss": 0.0067, "step": 17579 }, { "epoch": 3.402863777089783, "grad_norm": 0.09522027522325516, "learning_rate": 7.690735300482157e-05, "loss": 0.0077, "step": 17580 }, { "epoch": 3.4030572755417956, "grad_norm": 0.05311880633234978, "learning_rate": 7.690495626184862e-05, "loss": 0.0064, "step": 17581 }, { "epoch": 3.403250773993808, "grad_norm": 0.06563189625740051, "learning_rate": 7.690255943743842e-05, "loss": 0.007, "step": 17582 }, { "epoch": 3.4034442724458205, "grad_norm": 0.05862858146429062, "learning_rate": 7.690016253159993e-05, "loss": 0.0083, "step": 17583 }, { "epoch": 3.403637770897833, "grad_norm": 0.0609712079167366, "learning_rate": 7.689776554434201e-05, "loss": 0.006, "step": 17584 }, { "epoch": 3.4038312693498454, "grad_norm": 0.05393555387854576, "learning_rate": 7.689536847567361e-05, "loss": 0.006, "step": 17585 }, { "epoch": 3.4040247678018574, "grad_norm": 0.07295319437980652, "learning_rate": 7.689297132560362e-05, "loss": 0.0052, "step": 17586 }, { "epoch": 3.40421826625387, "grad_norm": 0.03339444100856781, "learning_rate": 7.689057409414095e-05, "loss": 0.0065, "step": 17587 }, { "epoch": 3.4044117647058822, "grad_norm": 0.06041650474071503, "learning_rate": 7.688817678129453e-05, "loss": 0.0063, "step": 17588 }, { "epoch": 3.4046052631578947, "grad_norm": 0.05146701633930206, "learning_rate": 7.688577938707325e-05, "loss": 0.0057, "step": 17589 }, { "epoch": 3.404798761609907, "grad_norm": 0.08248056471347809, "learning_rate": 7.688338191148605e-05, "loss": 0.007, "step": 17590 }, { "epoch": 3.4049922600619196, "grad_norm": 0.028777528554201126, "learning_rate": 7.688098435454182e-05, "loss": 0.0071, "step": 17591 }, { "epoch": 3.405185758513932, "grad_norm": 0.0383104607462883, "learning_rate": 7.687858671624947e-05, "loss": 0.0061, "step": 17592 }, { "epoch": 3.4053792569659445, "grad_norm": 0.029912108555436134, "learning_rate": 7.687618899661794e-05, "loss": 0.0071, "step": 17593 }, { "epoch": 3.405572755417957, "grad_norm": 0.04248017445206642, "learning_rate": 7.687379119565613e-05, "loss": 0.0053, "step": 17594 }, { "epoch": 3.405766253869969, "grad_norm": 0.03326534107327461, "learning_rate": 7.687139331337296e-05, "loss": 0.0068, "step": 17595 }, { "epoch": 3.4059597523219813, "grad_norm": 0.07567314803600311, "learning_rate": 7.686899534977733e-05, "loss": 0.0055, "step": 17596 }, { "epoch": 3.406153250773994, "grad_norm": 0.05058307945728302, "learning_rate": 7.686659730487816e-05, "loss": 0.0064, "step": 17597 }, { "epoch": 3.406346749226006, "grad_norm": 0.05567153915762901, "learning_rate": 7.686419917868437e-05, "loss": 0.0055, "step": 17598 }, { "epoch": 3.4065402476780187, "grad_norm": 0.06751666963100433, "learning_rate": 7.686180097120488e-05, "loss": 0.0056, "step": 17599 }, { "epoch": 3.406733746130031, "grad_norm": 0.037227120250463486, "learning_rate": 7.685940268244861e-05, "loss": 0.0063, "step": 17600 }, { "epoch": 3.406927244582043, "grad_norm": 0.06093898415565491, "learning_rate": 7.685700431242444e-05, "loss": 0.0054, "step": 17601 }, { "epoch": 3.4071207430340555, "grad_norm": 0.04278849810361862, "learning_rate": 7.685460586114135e-05, "loss": 0.0063, "step": 17602 }, { "epoch": 3.407314241486068, "grad_norm": 0.05179010331630707, "learning_rate": 7.68522073286082e-05, "loss": 0.0081, "step": 17603 }, { "epoch": 3.4075077399380804, "grad_norm": 0.049080293625593185, "learning_rate": 7.684980871483393e-05, "loss": 0.0071, "step": 17604 }, { "epoch": 3.407701238390093, "grad_norm": 0.028106024488806725, "learning_rate": 7.684741001982745e-05, "loss": 0.0065, "step": 17605 }, { "epoch": 3.4078947368421053, "grad_norm": 0.04875651001930237, "learning_rate": 7.684501124359769e-05, "loss": 0.0053, "step": 17606 }, { "epoch": 3.4080882352941178, "grad_norm": 0.04634995386004448, "learning_rate": 7.684261238615355e-05, "loss": 0.0049, "step": 17607 }, { "epoch": 3.40828173374613, "grad_norm": 0.0625791922211647, "learning_rate": 7.684021344750398e-05, "loss": 0.0064, "step": 17608 }, { "epoch": 3.4084752321981426, "grad_norm": 0.044035542756319046, "learning_rate": 7.683781442765786e-05, "loss": 0.0077, "step": 17609 }, { "epoch": 3.4086687306501546, "grad_norm": 0.061893101781606674, "learning_rate": 7.683541532662413e-05, "loss": 0.0065, "step": 17610 }, { "epoch": 3.408862229102167, "grad_norm": 0.03983031213283539, "learning_rate": 7.68330161444117e-05, "loss": 0.0061, "step": 17611 }, { "epoch": 3.4090557275541795, "grad_norm": 0.04999644681811333, "learning_rate": 7.68306168810295e-05, "loss": 0.0071, "step": 17612 }, { "epoch": 3.409249226006192, "grad_norm": 0.05257503315806389, "learning_rate": 7.682821753648645e-05, "loss": 0.0082, "step": 17613 }, { "epoch": 3.4094427244582044, "grad_norm": 0.033987827599048615, "learning_rate": 7.682581811079147e-05, "loss": 0.0062, "step": 17614 }, { "epoch": 3.409636222910217, "grad_norm": 0.058856699615716934, "learning_rate": 7.682341860395348e-05, "loss": 0.0057, "step": 17615 }, { "epoch": 3.4098297213622293, "grad_norm": 0.04447528347373009, "learning_rate": 7.682101901598138e-05, "loss": 0.0054, "step": 17616 }, { "epoch": 3.4100232198142413, "grad_norm": 0.052338097244501114, "learning_rate": 7.681861934688412e-05, "loss": 0.0054, "step": 17617 }, { "epoch": 3.4102167182662537, "grad_norm": 0.0632743313908577, "learning_rate": 7.68162195966706e-05, "loss": 0.006, "step": 17618 }, { "epoch": 3.410410216718266, "grad_norm": 0.03808412700891495, "learning_rate": 7.681381976534976e-05, "loss": 0.0061, "step": 17619 }, { "epoch": 3.4106037151702786, "grad_norm": 0.06375718861818314, "learning_rate": 7.68114198529305e-05, "loss": 0.0069, "step": 17620 }, { "epoch": 3.410797213622291, "grad_norm": 0.036150407046079636, "learning_rate": 7.680901985942178e-05, "loss": 0.0067, "step": 17621 }, { "epoch": 3.4109907120743035, "grad_norm": 0.04873843491077423, "learning_rate": 7.680661978483249e-05, "loss": 0.0068, "step": 17622 }, { "epoch": 3.411184210526316, "grad_norm": 0.05576487258076668, "learning_rate": 7.680421962917153e-05, "loss": 0.0058, "step": 17623 }, { "epoch": 3.4113777089783284, "grad_norm": 0.03365300968289375, "learning_rate": 7.68018193924479e-05, "loss": 0.0071, "step": 17624 }, { "epoch": 3.4115712074303404, "grad_norm": 0.044907864183187485, "learning_rate": 7.679941907467045e-05, "loss": 0.0058, "step": 17625 }, { "epoch": 3.411764705882353, "grad_norm": 0.032692041248083115, "learning_rate": 7.679701867584814e-05, "loss": 0.006, "step": 17626 }, { "epoch": 3.4119582043343653, "grad_norm": 0.04301697015762329, "learning_rate": 7.679461819598989e-05, "loss": 0.0056, "step": 17627 }, { "epoch": 3.4121517027863777, "grad_norm": 0.03848491609096527, "learning_rate": 7.679221763510462e-05, "loss": 0.0065, "step": 17628 }, { "epoch": 3.41234520123839, "grad_norm": 0.03324590623378754, "learning_rate": 7.678981699320124e-05, "loss": 0.0049, "step": 17629 }, { "epoch": 3.4125386996904026, "grad_norm": 0.043113503605127335, "learning_rate": 7.678741627028869e-05, "loss": 0.007, "step": 17630 }, { "epoch": 3.412732198142415, "grad_norm": 0.02954016998410225, "learning_rate": 7.67850154663759e-05, "loss": 0.007, "step": 17631 }, { "epoch": 3.412925696594427, "grad_norm": 0.03758421540260315, "learning_rate": 7.67826145814718e-05, "loss": 0.0072, "step": 17632 }, { "epoch": 3.4131191950464395, "grad_norm": 0.05183819308876991, "learning_rate": 7.678021361558531e-05, "loss": 0.0052, "step": 17633 }, { "epoch": 3.413312693498452, "grad_norm": 0.029331600293517113, "learning_rate": 7.677781256872533e-05, "loss": 0.006, "step": 17634 }, { "epoch": 3.4135061919504643, "grad_norm": 0.049373872578144073, "learning_rate": 7.677541144090082e-05, "loss": 0.0074, "step": 17635 }, { "epoch": 3.413699690402477, "grad_norm": 0.024315815418958664, "learning_rate": 7.67730102321207e-05, "loss": 0.0071, "step": 17636 }, { "epoch": 3.4138931888544892, "grad_norm": 0.05033310130238533, "learning_rate": 7.677060894239389e-05, "loss": 0.0068, "step": 17637 }, { "epoch": 3.4140866873065017, "grad_norm": 0.030635396018624306, "learning_rate": 7.676820757172932e-05, "loss": 0.0069, "step": 17638 }, { "epoch": 3.414280185758514, "grad_norm": 0.06408365070819855, "learning_rate": 7.676580612013592e-05, "loss": 0.0066, "step": 17639 }, { "epoch": 3.4144736842105265, "grad_norm": 0.02990514412522316, "learning_rate": 7.676340458762261e-05, "loss": 0.0075, "step": 17640 }, { "epoch": 3.4146671826625385, "grad_norm": 0.0645299181342125, "learning_rate": 7.676100297419833e-05, "loss": 0.0054, "step": 17641 }, { "epoch": 3.414860681114551, "grad_norm": 0.03845079243183136, "learning_rate": 7.675860127987199e-05, "loss": 0.0071, "step": 17642 }, { "epoch": 3.4150541795665634, "grad_norm": 0.0432058721780777, "learning_rate": 7.675619950465255e-05, "loss": 0.0069, "step": 17643 }, { "epoch": 3.415247678018576, "grad_norm": 0.03421802446246147, "learning_rate": 7.675379764854892e-05, "loss": 0.006, "step": 17644 }, { "epoch": 3.4154411764705883, "grad_norm": 0.043441291898489, "learning_rate": 7.675139571157002e-05, "loss": 0.0049, "step": 17645 }, { "epoch": 3.4156346749226008, "grad_norm": 0.04914834350347519, "learning_rate": 7.67489936937248e-05, "loss": 0.0055, "step": 17646 }, { "epoch": 3.415828173374613, "grad_norm": 0.05045771226286888, "learning_rate": 7.674659159502217e-05, "loss": 0.0067, "step": 17647 }, { "epoch": 3.416021671826625, "grad_norm": 0.0398179367184639, "learning_rate": 7.674418941547108e-05, "loss": 0.0065, "step": 17648 }, { "epoch": 3.4162151702786376, "grad_norm": 0.04341746121644974, "learning_rate": 7.674178715508045e-05, "loss": 0.0064, "step": 17649 }, { "epoch": 3.41640866873065, "grad_norm": 0.05745241418480873, "learning_rate": 7.673938481385922e-05, "loss": 0.0069, "step": 17650 }, { "epoch": 3.4166021671826625, "grad_norm": 0.035685088485479355, "learning_rate": 7.67369823918163e-05, "loss": 0.0049, "step": 17651 }, { "epoch": 3.416795665634675, "grad_norm": 0.05353257805109024, "learning_rate": 7.673457988896065e-05, "loss": 0.0063, "step": 17652 }, { "epoch": 3.4169891640866874, "grad_norm": 0.027277246117591858, "learning_rate": 7.673217730530116e-05, "loss": 0.0058, "step": 17653 }, { "epoch": 3.4171826625387, "grad_norm": 0.05540775507688522, "learning_rate": 7.672977464084682e-05, "loss": 0.0061, "step": 17654 }, { "epoch": 3.4173761609907123, "grad_norm": 0.035374682396650314, "learning_rate": 7.672737189560651e-05, "loss": 0.006, "step": 17655 }, { "epoch": 3.4175696594427243, "grad_norm": 0.04976224526762962, "learning_rate": 7.67249690695892e-05, "loss": 0.0066, "step": 17656 }, { "epoch": 3.4177631578947367, "grad_norm": 0.0502350740134716, "learning_rate": 7.67225661628038e-05, "loss": 0.0059, "step": 17657 }, { "epoch": 3.417956656346749, "grad_norm": 0.03332266956567764, "learning_rate": 7.672016317525926e-05, "loss": 0.0066, "step": 17658 }, { "epoch": 3.4181501547987616, "grad_norm": 0.05909927934408188, "learning_rate": 7.67177601069645e-05, "loss": 0.0057, "step": 17659 }, { "epoch": 3.418343653250774, "grad_norm": 0.03409068286418915, "learning_rate": 7.671535695792845e-05, "loss": 0.0072, "step": 17660 }, { "epoch": 3.4185371517027865, "grad_norm": 0.055270709097385406, "learning_rate": 7.671295372816007e-05, "loss": 0.0082, "step": 17661 }, { "epoch": 3.418730650154799, "grad_norm": 0.04876001551747322, "learning_rate": 7.671055041766827e-05, "loss": 0.0051, "step": 17662 }, { "epoch": 3.418924148606811, "grad_norm": 0.04391311854124069, "learning_rate": 7.6708147026462e-05, "loss": 0.0074, "step": 17663 }, { "epoch": 3.4191176470588234, "grad_norm": 0.07054624706506729, "learning_rate": 7.670574355455018e-05, "loss": 0.0073, "step": 17664 }, { "epoch": 3.419311145510836, "grad_norm": 0.026052327826619148, "learning_rate": 7.670334000194176e-05, "loss": 0.0059, "step": 17665 }, { "epoch": 3.4195046439628483, "grad_norm": 0.06195824220776558, "learning_rate": 7.670093636864567e-05, "loss": 0.0069, "step": 17666 }, { "epoch": 3.4196981424148607, "grad_norm": 0.04892832785844803, "learning_rate": 7.669853265467084e-05, "loss": 0.0061, "step": 17667 }, { "epoch": 3.419891640866873, "grad_norm": 0.05999457836151123, "learning_rate": 7.66961288600262e-05, "loss": 0.0064, "step": 17668 }, { "epoch": 3.4200851393188856, "grad_norm": 0.05905238911509514, "learning_rate": 7.669372498472073e-05, "loss": 0.008, "step": 17669 }, { "epoch": 3.420278637770898, "grad_norm": 0.0639621838927269, "learning_rate": 7.669132102876331e-05, "loss": 0.006, "step": 17670 }, { "epoch": 3.4204721362229105, "grad_norm": 0.04987364262342453, "learning_rate": 7.668891699216292e-05, "loss": 0.0071, "step": 17671 }, { "epoch": 3.4206656346749225, "grad_norm": 0.07647036761045456, "learning_rate": 7.668651287492847e-05, "loss": 0.0072, "step": 17672 }, { "epoch": 3.420859133126935, "grad_norm": 0.03697020187973976, "learning_rate": 7.668410867706892e-05, "loss": 0.0064, "step": 17673 }, { "epoch": 3.4210526315789473, "grad_norm": 0.06791181862354279, "learning_rate": 7.668170439859319e-05, "loss": 0.007, "step": 17674 }, { "epoch": 3.42124613003096, "grad_norm": 0.046485643833875656, "learning_rate": 7.667930003951022e-05, "loss": 0.0054, "step": 17675 }, { "epoch": 3.4214396284829722, "grad_norm": 0.06147472932934761, "learning_rate": 7.667689559982895e-05, "loss": 0.0057, "step": 17676 }, { "epoch": 3.4216331269349847, "grad_norm": 0.056898657232522964, "learning_rate": 7.667449107955834e-05, "loss": 0.0049, "step": 17677 }, { "epoch": 3.4218266253869967, "grad_norm": 0.04507824778556824, "learning_rate": 7.66720864787073e-05, "loss": 0.0065, "step": 17678 }, { "epoch": 3.422020123839009, "grad_norm": 0.06707418709993362, "learning_rate": 7.66696817972848e-05, "loss": 0.0058, "step": 17679 }, { "epoch": 3.4222136222910216, "grad_norm": 0.05393681675195694, "learning_rate": 7.666727703529975e-05, "loss": 0.0073, "step": 17680 }, { "epoch": 3.422407120743034, "grad_norm": 0.0868346095085144, "learning_rate": 7.666487219276111e-05, "loss": 0.005, "step": 17681 }, { "epoch": 3.4226006191950464, "grad_norm": 0.05718820542097092, "learning_rate": 7.666246726967779e-05, "loss": 0.0068, "step": 17682 }, { "epoch": 3.422794117647059, "grad_norm": 0.06143924593925476, "learning_rate": 7.666006226605878e-05, "loss": 0.0059, "step": 17683 }, { "epoch": 3.4229876160990713, "grad_norm": 0.07348950952291489, "learning_rate": 7.6657657181913e-05, "loss": 0.006, "step": 17684 }, { "epoch": 3.4231811145510838, "grad_norm": 0.040525875985622406, "learning_rate": 7.665525201724937e-05, "loss": 0.0058, "step": 17685 }, { "epoch": 3.423374613003096, "grad_norm": 0.08133198320865631, "learning_rate": 7.665284677207685e-05, "loss": 0.0074, "step": 17686 }, { "epoch": 3.423568111455108, "grad_norm": 0.03435536473989487, "learning_rate": 7.665044144640438e-05, "loss": 0.0069, "step": 17687 }, { "epoch": 3.4237616099071206, "grad_norm": 0.08517426252365112, "learning_rate": 7.664803604024092e-05, "loss": 0.0066, "step": 17688 }, { "epoch": 3.423955108359133, "grad_norm": 0.02756480500102043, "learning_rate": 7.664563055359538e-05, "loss": 0.0084, "step": 17689 }, { "epoch": 3.4241486068111455, "grad_norm": 0.06639926880598068, "learning_rate": 7.664322498647672e-05, "loss": 0.0069, "step": 17690 }, { "epoch": 3.424342105263158, "grad_norm": 0.042296525090932846, "learning_rate": 7.66408193388939e-05, "loss": 0.0081, "step": 17691 }, { "epoch": 3.4245356037151704, "grad_norm": 0.07658042013645172, "learning_rate": 7.663841361085583e-05, "loss": 0.0062, "step": 17692 }, { "epoch": 3.424729102167183, "grad_norm": 0.052523981779813766, "learning_rate": 7.663600780237146e-05, "loss": 0.0082, "step": 17693 }, { "epoch": 3.424922600619195, "grad_norm": 0.07064469903707504, "learning_rate": 7.663360191344976e-05, "loss": 0.0076, "step": 17694 }, { "epoch": 3.4251160990712073, "grad_norm": 0.051001135259866714, "learning_rate": 7.663119594409966e-05, "loss": 0.0067, "step": 17695 }, { "epoch": 3.4253095975232197, "grad_norm": 0.05934486910700798, "learning_rate": 7.662878989433009e-05, "loss": 0.0067, "step": 17696 }, { "epoch": 3.425503095975232, "grad_norm": 0.06401378661394119, "learning_rate": 7.662638376415002e-05, "loss": 0.0064, "step": 17697 }, { "epoch": 3.4256965944272446, "grad_norm": 0.05490615963935852, "learning_rate": 7.662397755356838e-05, "loss": 0.007, "step": 17698 }, { "epoch": 3.425890092879257, "grad_norm": 0.06949488818645477, "learning_rate": 7.662157126259412e-05, "loss": 0.006, "step": 17699 }, { "epoch": 3.4260835913312695, "grad_norm": 0.0573817603290081, "learning_rate": 7.66191648912362e-05, "loss": 0.0066, "step": 17700 }, { "epoch": 3.426277089783282, "grad_norm": 0.05805688723921776, "learning_rate": 7.661675843950353e-05, "loss": 0.0068, "step": 17701 }, { "epoch": 3.426470588235294, "grad_norm": 0.0357816107571125, "learning_rate": 7.66143519074051e-05, "loss": 0.0062, "step": 17702 }, { "epoch": 3.4266640866873064, "grad_norm": 0.04908798262476921, "learning_rate": 7.661194529494982e-05, "loss": 0.0069, "step": 17703 }, { "epoch": 3.426857585139319, "grad_norm": 0.02553635463118553, "learning_rate": 7.660953860214667e-05, "loss": 0.005, "step": 17704 }, { "epoch": 3.4270510835913313, "grad_norm": 0.04674134403467178, "learning_rate": 7.660713182900455e-05, "loss": 0.007, "step": 17705 }, { "epoch": 3.4272445820433437, "grad_norm": 0.05060513690114021, "learning_rate": 7.660472497553246e-05, "loss": 0.005, "step": 17706 }, { "epoch": 3.427438080495356, "grad_norm": 0.04358704015612602, "learning_rate": 7.66023180417393e-05, "loss": 0.0063, "step": 17707 }, { "epoch": 3.4276315789473686, "grad_norm": 0.05749882757663727, "learning_rate": 7.659991102763407e-05, "loss": 0.0063, "step": 17708 }, { "epoch": 3.4278250773993806, "grad_norm": 0.03757021576166153, "learning_rate": 7.65975039332257e-05, "loss": 0.0051, "step": 17709 }, { "epoch": 3.428018575851393, "grad_norm": 0.04687783494591713, "learning_rate": 7.659509675852312e-05, "loss": 0.0063, "step": 17710 }, { "epoch": 3.4282120743034055, "grad_norm": 0.055339887738227844, "learning_rate": 7.659268950353527e-05, "loss": 0.0067, "step": 17711 }, { "epoch": 3.428405572755418, "grad_norm": 0.04087930917739868, "learning_rate": 7.659028216827115e-05, "loss": 0.0055, "step": 17712 }, { "epoch": 3.4285990712074303, "grad_norm": 0.03405693545937538, "learning_rate": 7.658787475273969e-05, "loss": 0.0069, "step": 17713 }, { "epoch": 3.428792569659443, "grad_norm": 0.04031829163432121, "learning_rate": 7.65854672569498e-05, "loss": 0.0064, "step": 17714 }, { "epoch": 3.4289860681114552, "grad_norm": 0.02610485441982746, "learning_rate": 7.658305968091049e-05, "loss": 0.0065, "step": 17715 }, { "epoch": 3.4291795665634677, "grad_norm": 0.03651438653469086, "learning_rate": 7.658065202463066e-05, "loss": 0.0059, "step": 17716 }, { "epoch": 3.42937306501548, "grad_norm": 0.046487703919410706, "learning_rate": 7.65782442881193e-05, "loss": 0.0074, "step": 17717 }, { "epoch": 3.429566563467492, "grad_norm": 0.039234261959791183, "learning_rate": 7.657583647138533e-05, "loss": 0.0055, "step": 17718 }, { "epoch": 3.4297600619195046, "grad_norm": 0.044515471905469894, "learning_rate": 7.657342857443773e-05, "loss": 0.0075, "step": 17719 }, { "epoch": 3.429953560371517, "grad_norm": 0.05629916861653328, "learning_rate": 7.657102059728542e-05, "loss": 0.0049, "step": 17720 }, { "epoch": 3.4301470588235294, "grad_norm": 0.03993247076869011, "learning_rate": 7.65686125399374e-05, "loss": 0.0065, "step": 17721 }, { "epoch": 3.430340557275542, "grad_norm": 0.07243512570858002, "learning_rate": 7.656620440240257e-05, "loss": 0.0057, "step": 17722 }, { "epoch": 3.4305340557275543, "grad_norm": 0.03212010860443115, "learning_rate": 7.656379618468989e-05, "loss": 0.0048, "step": 17723 }, { "epoch": 3.4307275541795663, "grad_norm": 0.06966539472341537, "learning_rate": 7.656138788680835e-05, "loss": 0.0081, "step": 17724 }, { "epoch": 3.4309210526315788, "grad_norm": 0.048905834555625916, "learning_rate": 7.655897950876691e-05, "loss": 0.0051, "step": 17725 }, { "epoch": 3.431114551083591, "grad_norm": 0.05281096696853638, "learning_rate": 7.655657105057447e-05, "loss": 0.0065, "step": 17726 }, { "epoch": 3.4313080495356036, "grad_norm": 0.037671077996492386, "learning_rate": 7.655416251223999e-05, "loss": 0.0061, "step": 17727 }, { "epoch": 3.431501547987616, "grad_norm": 0.058614909648895264, "learning_rate": 7.655175389377248e-05, "loss": 0.0073, "step": 17728 }, { "epoch": 3.4316950464396285, "grad_norm": 0.0421644002199173, "learning_rate": 7.654934519518083e-05, "loss": 0.0054, "step": 17729 }, { "epoch": 3.431888544891641, "grad_norm": 0.03961366042494774, "learning_rate": 7.654693641647403e-05, "loss": 0.0067, "step": 17730 }, { "epoch": 3.4320820433436534, "grad_norm": 0.023071860894560814, "learning_rate": 7.654452755766105e-05, "loss": 0.0052, "step": 17731 }, { "epoch": 3.432275541795666, "grad_norm": 0.061894696205854416, "learning_rate": 7.654211861875081e-05, "loss": 0.0066, "step": 17732 }, { "epoch": 3.432469040247678, "grad_norm": 0.023291852325201035, "learning_rate": 7.653970959975228e-05, "loss": 0.0066, "step": 17733 }, { "epoch": 3.4326625386996903, "grad_norm": 0.0658951997756958, "learning_rate": 7.653730050067444e-05, "loss": 0.0064, "step": 17734 }, { "epoch": 3.4328560371517027, "grad_norm": 0.02819235995411873, "learning_rate": 7.65348913215262e-05, "loss": 0.0056, "step": 17735 }, { "epoch": 3.433049535603715, "grad_norm": 0.050592221319675446, "learning_rate": 7.653248206231654e-05, "loss": 0.0064, "step": 17736 }, { "epoch": 3.4332430340557276, "grad_norm": 0.027867425233125687, "learning_rate": 7.653007272305443e-05, "loss": 0.0068, "step": 17737 }, { "epoch": 3.43343653250774, "grad_norm": 0.03906083479523659, "learning_rate": 7.652766330374881e-05, "loss": 0.0078, "step": 17738 }, { "epoch": 3.4336300309597525, "grad_norm": 0.043089065700769424, "learning_rate": 7.652525380440863e-05, "loss": 0.0059, "step": 17739 }, { "epoch": 3.4338235294117645, "grad_norm": 0.05313378572463989, "learning_rate": 7.652284422504288e-05, "loss": 0.0058, "step": 17740 }, { "epoch": 3.434017027863777, "grad_norm": 0.060280974954366684, "learning_rate": 7.65204345656605e-05, "loss": 0.0057, "step": 17741 }, { "epoch": 3.4342105263157894, "grad_norm": 0.07826457917690277, "learning_rate": 7.651802482627044e-05, "loss": 0.006, "step": 17742 }, { "epoch": 3.434404024767802, "grad_norm": 0.06522616744041443, "learning_rate": 7.651561500688165e-05, "loss": 0.0059, "step": 17743 }, { "epoch": 3.4345975232198143, "grad_norm": 0.09056374430656433, "learning_rate": 7.651320510750312e-05, "loss": 0.0062, "step": 17744 }, { "epoch": 3.4347910216718267, "grad_norm": 0.06334051489830017, "learning_rate": 7.651079512814379e-05, "loss": 0.0068, "step": 17745 }, { "epoch": 3.434984520123839, "grad_norm": 0.08073122054338455, "learning_rate": 7.650838506881263e-05, "loss": 0.0063, "step": 17746 }, { "epoch": 3.4351780185758516, "grad_norm": 0.09241842478513718, "learning_rate": 7.65059749295186e-05, "loss": 0.0064, "step": 17747 }, { "epoch": 3.4353715170278636, "grad_norm": 0.028946880251169205, "learning_rate": 7.650356471027063e-05, "loss": 0.0062, "step": 17748 }, { "epoch": 3.435565015479876, "grad_norm": 0.1421540230512619, "learning_rate": 7.650115441107772e-05, "loss": 0.0068, "step": 17749 }, { "epoch": 3.4357585139318885, "grad_norm": 0.04275376722216606, "learning_rate": 7.649874403194882e-05, "loss": 0.0073, "step": 17750 }, { "epoch": 3.435952012383901, "grad_norm": 0.13224181532859802, "learning_rate": 7.649633357289288e-05, "loss": 0.0066, "step": 17751 }, { "epoch": 3.4361455108359134, "grad_norm": 0.08194594830274582, "learning_rate": 7.649392303391887e-05, "loss": 0.0073, "step": 17752 }, { "epoch": 3.436339009287926, "grad_norm": 0.11808095127344131, "learning_rate": 7.649151241503575e-05, "loss": 0.0064, "step": 17753 }, { "epoch": 3.4365325077399382, "grad_norm": 0.1023474708199501, "learning_rate": 7.648910171625249e-05, "loss": 0.0074, "step": 17754 }, { "epoch": 3.4367260061919502, "grad_norm": 0.09274230897426605, "learning_rate": 7.648669093757802e-05, "loss": 0.0055, "step": 17755 }, { "epoch": 3.4369195046439627, "grad_norm": 0.11719851940870285, "learning_rate": 7.648428007902136e-05, "loss": 0.0073, "step": 17756 }, { "epoch": 3.437113003095975, "grad_norm": 0.06251902133226395, "learning_rate": 7.648186914059142e-05, "loss": 0.0079, "step": 17757 }, { "epoch": 3.4373065015479876, "grad_norm": 0.11830580234527588, "learning_rate": 7.64794581222972e-05, "loss": 0.0054, "step": 17758 }, { "epoch": 3.4375, "grad_norm": 0.03253013268113136, "learning_rate": 7.647704702414763e-05, "loss": 0.0053, "step": 17759 }, { "epoch": 3.4376934984520124, "grad_norm": 0.11850510537624359, "learning_rate": 7.64746358461517e-05, "loss": 0.0064, "step": 17760 }, { "epoch": 3.437886996904025, "grad_norm": 0.04078439250588417, "learning_rate": 7.647222458831837e-05, "loss": 0.0062, "step": 17761 }, { "epoch": 3.4380804953560373, "grad_norm": 0.07119421660900116, "learning_rate": 7.646981325065658e-05, "loss": 0.006, "step": 17762 }, { "epoch": 3.4382739938080498, "grad_norm": 0.044140562415122986, "learning_rate": 7.646740183317533e-05, "loss": 0.0077, "step": 17763 }, { "epoch": 3.4384674922600618, "grad_norm": 0.031046822667121887, "learning_rate": 7.646499033588358e-05, "loss": 0.0072, "step": 17764 }, { "epoch": 3.438660990712074, "grad_norm": 0.07497643679380417, "learning_rate": 7.646257875879027e-05, "loss": 0.008, "step": 17765 }, { "epoch": 3.4388544891640866, "grad_norm": 0.03775550797581673, "learning_rate": 7.646016710190439e-05, "loss": 0.0064, "step": 17766 }, { "epoch": 3.439047987616099, "grad_norm": 0.05787872523069382, "learning_rate": 7.645775536523488e-05, "loss": 0.0067, "step": 17767 }, { "epoch": 3.4392414860681115, "grad_norm": 0.02668830007314682, "learning_rate": 7.645534354879075e-05, "loss": 0.0074, "step": 17768 }, { "epoch": 3.439434984520124, "grad_norm": 0.04851663112640381, "learning_rate": 7.645293165258092e-05, "loss": 0.0057, "step": 17769 }, { "epoch": 3.4396284829721364, "grad_norm": 0.018883202224969864, "learning_rate": 7.645051967661438e-05, "loss": 0.0064, "step": 17770 }, { "epoch": 3.4398219814241484, "grad_norm": 0.03877681866288185, "learning_rate": 7.64481076209001e-05, "loss": 0.0065, "step": 17771 }, { "epoch": 3.440015479876161, "grad_norm": 0.06087224930524826, "learning_rate": 7.644569548544703e-05, "loss": 0.0071, "step": 17772 }, { "epoch": 3.4402089783281733, "grad_norm": 0.09022103250026703, "learning_rate": 7.644328327026415e-05, "loss": 0.0058, "step": 17773 }, { "epoch": 3.4404024767801857, "grad_norm": 0.05174883082509041, "learning_rate": 7.644087097536042e-05, "loss": 0.0059, "step": 17774 }, { "epoch": 3.440595975232198, "grad_norm": 0.07945156842470169, "learning_rate": 7.643845860074484e-05, "loss": 0.0071, "step": 17775 }, { "epoch": 3.4407894736842106, "grad_norm": 0.08473196625709534, "learning_rate": 7.643604614642633e-05, "loss": 0.0078, "step": 17776 }, { "epoch": 3.440982972136223, "grad_norm": 0.11614051461219788, "learning_rate": 7.643363361241388e-05, "loss": 0.0074, "step": 17777 }, { "epoch": 3.4411764705882355, "grad_norm": 0.036467295140028, "learning_rate": 7.643122099871648e-05, "loss": 0.0062, "step": 17778 }, { "epoch": 3.4413699690402475, "grad_norm": 0.08338715136051178, "learning_rate": 7.642880830534306e-05, "loss": 0.0068, "step": 17779 }, { "epoch": 3.44156346749226, "grad_norm": 0.04765968769788742, "learning_rate": 7.642639553230263e-05, "loss": 0.0074, "step": 17780 }, { "epoch": 3.4417569659442724, "grad_norm": 0.11047571897506714, "learning_rate": 7.642398267960414e-05, "loss": 0.0075, "step": 17781 }, { "epoch": 3.441950464396285, "grad_norm": 0.02350364811718464, "learning_rate": 7.642156974725655e-05, "loss": 0.0052, "step": 17782 }, { "epoch": 3.4421439628482973, "grad_norm": 0.08021025359630585, "learning_rate": 7.641915673526885e-05, "loss": 0.0058, "step": 17783 }, { "epoch": 3.4423374613003097, "grad_norm": 0.039713822305202484, "learning_rate": 7.641674364365e-05, "loss": 0.0075, "step": 17784 }, { "epoch": 3.442530959752322, "grad_norm": 0.08668091893196106, "learning_rate": 7.641433047240898e-05, "loss": 0.006, "step": 17785 }, { "epoch": 3.442724458204334, "grad_norm": 0.06797392666339874, "learning_rate": 7.641191722155475e-05, "loss": 0.006, "step": 17786 }, { "epoch": 3.4429179566563466, "grad_norm": 0.07605277746915817, "learning_rate": 7.64095038910963e-05, "loss": 0.0067, "step": 17787 }, { "epoch": 3.443111455108359, "grad_norm": 0.08229828625917435, "learning_rate": 7.640709048104257e-05, "loss": 0.0061, "step": 17788 }, { "epoch": 3.4433049535603715, "grad_norm": 0.03796909376978874, "learning_rate": 7.640467699140256e-05, "loss": 0.0076, "step": 17789 }, { "epoch": 3.443498452012384, "grad_norm": 0.15020917356014252, "learning_rate": 7.640226342218523e-05, "loss": 0.0057, "step": 17790 }, { "epoch": 3.4436919504643964, "grad_norm": 0.06631677597761154, "learning_rate": 7.639984977339957e-05, "loss": 0.0062, "step": 17791 }, { "epoch": 3.443885448916409, "grad_norm": 0.1335984766483307, "learning_rate": 7.639743604505455e-05, "loss": 0.0067, "step": 17792 }, { "epoch": 3.4440789473684212, "grad_norm": 0.10542599111795425, "learning_rate": 7.639502223715911e-05, "loss": 0.0064, "step": 17793 }, { "epoch": 3.4442724458204337, "grad_norm": 0.07008855044841766, "learning_rate": 7.639260834972226e-05, "loss": 0.0055, "step": 17794 }, { "epoch": 3.4444659442724457, "grad_norm": 0.12356322258710861, "learning_rate": 7.639019438275297e-05, "loss": 0.0058, "step": 17795 }, { "epoch": 3.444659442724458, "grad_norm": 0.04596167802810669, "learning_rate": 7.63877803362602e-05, "loss": 0.0056, "step": 17796 }, { "epoch": 3.4448529411764706, "grad_norm": 0.130493625998497, "learning_rate": 7.638536621025294e-05, "loss": 0.0093, "step": 17797 }, { "epoch": 3.445046439628483, "grad_norm": 0.0602404959499836, "learning_rate": 7.638295200474015e-05, "loss": 0.0074, "step": 17798 }, { "epoch": 3.4452399380804954, "grad_norm": 0.08513538539409637, "learning_rate": 7.638053771973083e-05, "loss": 0.0069, "step": 17799 }, { "epoch": 3.445433436532508, "grad_norm": 0.08466313779354095, "learning_rate": 7.63781233552339e-05, "loss": 0.0071, "step": 17800 }, { "epoch": 3.44562693498452, "grad_norm": 0.05911727622151375, "learning_rate": 7.63757089112584e-05, "loss": 0.0057, "step": 17801 }, { "epoch": 3.4458204334365323, "grad_norm": 0.07962781935930252, "learning_rate": 7.637329438781329e-05, "loss": 0.0062, "step": 17802 }, { "epoch": 3.4460139318885448, "grad_norm": 0.024802306666970253, "learning_rate": 7.637087978490754e-05, "loss": 0.0062, "step": 17803 }, { "epoch": 3.446207430340557, "grad_norm": 0.0809173509478569, "learning_rate": 7.63684651025501e-05, "loss": 0.006, "step": 17804 }, { "epoch": 3.4464009287925697, "grad_norm": 0.03690500184893608, "learning_rate": 7.636605034075e-05, "loss": 0.0058, "step": 17805 }, { "epoch": 3.446594427244582, "grad_norm": 0.09155267477035522, "learning_rate": 7.636363549951616e-05, "loss": 0.007, "step": 17806 }, { "epoch": 3.4467879256965945, "grad_norm": 0.04721016809344292, "learning_rate": 7.636122057885761e-05, "loss": 0.0077, "step": 17807 }, { "epoch": 3.446981424148607, "grad_norm": 0.09332259744405746, "learning_rate": 7.63588055787833e-05, "loss": 0.007, "step": 17808 }, { "epoch": 3.4471749226006194, "grad_norm": 0.03495440632104874, "learning_rate": 7.63563904993022e-05, "loss": 0.0067, "step": 17809 }, { "epoch": 3.4473684210526314, "grad_norm": 0.07183536142110825, "learning_rate": 7.635397534042334e-05, "loss": 0.0063, "step": 17810 }, { "epoch": 3.447561919504644, "grad_norm": 0.038134779781103134, "learning_rate": 7.635156010215563e-05, "loss": 0.0073, "step": 17811 }, { "epoch": 3.4477554179566563, "grad_norm": 0.044354815036058426, "learning_rate": 7.634914478450807e-05, "loss": 0.0058, "step": 17812 }, { "epoch": 3.4479489164086687, "grad_norm": 0.023836426436901093, "learning_rate": 7.634672938748967e-05, "loss": 0.0063, "step": 17813 }, { "epoch": 3.448142414860681, "grad_norm": 0.12075605988502502, "learning_rate": 7.634431391110939e-05, "loss": 0.0089, "step": 17814 }, { "epoch": 3.4483359133126936, "grad_norm": 0.05191599205136299, "learning_rate": 7.634189835537622e-05, "loss": 0.0067, "step": 17815 }, { "epoch": 3.448529411764706, "grad_norm": 0.10343784093856812, "learning_rate": 7.633948272029912e-05, "loss": 0.0057, "step": 17816 }, { "epoch": 3.448722910216718, "grad_norm": 0.03020302578806877, "learning_rate": 7.633706700588707e-05, "loss": 0.0057, "step": 17817 }, { "epoch": 3.4489164086687305, "grad_norm": 0.10057400912046432, "learning_rate": 7.633465121214908e-05, "loss": 0.0063, "step": 17818 }, { "epoch": 3.449109907120743, "grad_norm": 0.04103732481598854, "learning_rate": 7.633223533909409e-05, "loss": 0.0064, "step": 17819 }, { "epoch": 3.4493034055727554, "grad_norm": 0.055213745683431625, "learning_rate": 7.632981938673112e-05, "loss": 0.0082, "step": 17820 }, { "epoch": 3.449496904024768, "grad_norm": 0.08767377585172653, "learning_rate": 7.632740335506916e-05, "loss": 0.0063, "step": 17821 }, { "epoch": 3.4496904024767803, "grad_norm": 0.0335090309381485, "learning_rate": 7.632498724411714e-05, "loss": 0.0068, "step": 17822 }, { "epoch": 3.4498839009287927, "grad_norm": 0.10715831816196442, "learning_rate": 7.632257105388409e-05, "loss": 0.0065, "step": 17823 }, { "epoch": 3.450077399380805, "grad_norm": 0.06186579167842865, "learning_rate": 7.632015478437896e-05, "loss": 0.0059, "step": 17824 }, { "epoch": 3.450270897832817, "grad_norm": 0.08841469883918762, "learning_rate": 7.631773843561074e-05, "loss": 0.0068, "step": 17825 }, { "epoch": 3.4504643962848296, "grad_norm": 0.08474095165729523, "learning_rate": 7.631532200758846e-05, "loss": 0.0065, "step": 17826 }, { "epoch": 3.450657894736842, "grad_norm": 0.03850564733147621, "learning_rate": 7.631290550032102e-05, "loss": 0.0044, "step": 17827 }, { "epoch": 3.4508513931888545, "grad_norm": 0.10217326879501343, "learning_rate": 7.631048891381746e-05, "loss": 0.0058, "step": 17828 }, { "epoch": 3.451044891640867, "grad_norm": 0.04217609763145447, "learning_rate": 7.630807224808679e-05, "loss": 0.0067, "step": 17829 }, { "epoch": 3.4512383900928794, "grad_norm": 0.1020510122179985, "learning_rate": 7.630565550313792e-05, "loss": 0.0064, "step": 17830 }, { "epoch": 3.451431888544892, "grad_norm": 0.06686945259571075, "learning_rate": 7.630323867897988e-05, "loss": 0.0066, "step": 17831 }, { "epoch": 3.451625386996904, "grad_norm": 0.07758621871471405, "learning_rate": 7.630082177562164e-05, "loss": 0.0065, "step": 17832 }, { "epoch": 3.4518188854489162, "grad_norm": 0.07659610360860825, "learning_rate": 7.629840479307221e-05, "loss": 0.0054, "step": 17833 }, { "epoch": 3.4520123839009287, "grad_norm": 0.04571741074323654, "learning_rate": 7.629598773134054e-05, "loss": 0.0066, "step": 17834 }, { "epoch": 3.452205882352941, "grad_norm": 0.08185923099517822, "learning_rate": 7.629357059043565e-05, "loss": 0.0063, "step": 17835 }, { "epoch": 3.4523993808049536, "grad_norm": 0.07129757106304169, "learning_rate": 7.629115337036652e-05, "loss": 0.0053, "step": 17836 }, { "epoch": 3.452592879256966, "grad_norm": 0.0729622095823288, "learning_rate": 7.628873607114212e-05, "loss": 0.0057, "step": 17837 }, { "epoch": 3.4527863777089784, "grad_norm": 0.08914580196142197, "learning_rate": 7.628631869277144e-05, "loss": 0.0064, "step": 17838 }, { "epoch": 3.452979876160991, "grad_norm": 0.05047745257616043, "learning_rate": 7.628390123526346e-05, "loss": 0.0071, "step": 17839 }, { "epoch": 3.4531733746130033, "grad_norm": 0.1037733256816864, "learning_rate": 7.628148369862721e-05, "loss": 0.0076, "step": 17840 }, { "epoch": 3.4533668730650153, "grad_norm": 0.07630249857902527, "learning_rate": 7.627906608287163e-05, "loss": 0.0066, "step": 17841 }, { "epoch": 3.4535603715170278, "grad_norm": 0.08362863957881927, "learning_rate": 7.627664838800572e-05, "loss": 0.0055, "step": 17842 }, { "epoch": 3.45375386996904, "grad_norm": 0.09793422371149063, "learning_rate": 7.62742306140385e-05, "loss": 0.0077, "step": 17843 }, { "epoch": 3.4539473684210527, "grad_norm": 0.03932207450270653, "learning_rate": 7.62718127609789e-05, "loss": 0.0061, "step": 17844 }, { "epoch": 3.454140866873065, "grad_norm": 0.11087261140346527, "learning_rate": 7.626939482883596e-05, "loss": 0.0065, "step": 17845 }, { "epoch": 3.4543343653250775, "grad_norm": 0.05721740797162056, "learning_rate": 7.626697681761865e-05, "loss": 0.0059, "step": 17846 }, { "epoch": 3.4545278637770895, "grad_norm": 0.052797429263591766, "learning_rate": 7.626455872733595e-05, "loss": 0.0062, "step": 17847 }, { "epoch": 3.454721362229102, "grad_norm": 0.14550377428531647, "learning_rate": 7.626214055799686e-05, "loss": 0.0064, "step": 17848 }, { "epoch": 3.4549148606811144, "grad_norm": 0.05953634902834892, "learning_rate": 7.625972230961039e-05, "loss": 0.0072, "step": 17849 }, { "epoch": 3.455108359133127, "grad_norm": 0.09301043301820755, "learning_rate": 7.62573039821855e-05, "loss": 0.0068, "step": 17850 }, { "epoch": 3.4553018575851393, "grad_norm": 0.10650533437728882, "learning_rate": 7.625488557573119e-05, "loss": 0.0058, "step": 17851 }, { "epoch": 3.4554953560371517, "grad_norm": 0.0928226187825203, "learning_rate": 7.625246709025643e-05, "loss": 0.0076, "step": 17852 }, { "epoch": 3.455688854489164, "grad_norm": 0.18638533353805542, "learning_rate": 7.625004852577027e-05, "loss": 0.007, "step": 17853 }, { "epoch": 3.4558823529411766, "grad_norm": 0.06873291730880737, "learning_rate": 7.624762988228164e-05, "loss": 0.0066, "step": 17854 }, { "epoch": 3.456075851393189, "grad_norm": 0.19293373823165894, "learning_rate": 7.624521115979958e-05, "loss": 0.0076, "step": 17855 }, { "epoch": 3.456269349845201, "grad_norm": 0.07562548667192459, "learning_rate": 7.624279235833306e-05, "loss": 0.0057, "step": 17856 }, { "epoch": 3.4564628482972135, "grad_norm": 0.16263440251350403, "learning_rate": 7.624037347789106e-05, "loss": 0.0072, "step": 17857 }, { "epoch": 3.456656346749226, "grad_norm": 0.1150532141327858, "learning_rate": 7.623795451848257e-05, "loss": 0.006, "step": 17858 }, { "epoch": 3.4568498452012384, "grad_norm": 0.07896637171506882, "learning_rate": 7.623553548011663e-05, "loss": 0.0063, "step": 17859 }, { "epoch": 3.457043343653251, "grad_norm": 0.1302533745765686, "learning_rate": 7.623311636280219e-05, "loss": 0.0061, "step": 17860 }, { "epoch": 3.4572368421052633, "grad_norm": 0.07984067499637604, "learning_rate": 7.623069716654825e-05, "loss": 0.0079, "step": 17861 }, { "epoch": 3.4574303405572757, "grad_norm": 0.11960714310407639, "learning_rate": 7.622827789136381e-05, "loss": 0.0057, "step": 17862 }, { "epoch": 3.4576238390092877, "grad_norm": 0.09851633757352829, "learning_rate": 7.622585853725787e-05, "loss": 0.0063, "step": 17863 }, { "epoch": 3.4578173374613, "grad_norm": 0.0791010782122612, "learning_rate": 7.622343910423942e-05, "loss": 0.0068, "step": 17864 }, { "epoch": 3.4580108359133126, "grad_norm": 0.09163200855255127, "learning_rate": 7.622101959231745e-05, "loss": 0.006, "step": 17865 }, { "epoch": 3.458204334365325, "grad_norm": 0.051549799740314484, "learning_rate": 7.621860000150095e-05, "loss": 0.0057, "step": 17866 }, { "epoch": 3.4583978328173375, "grad_norm": 0.098985955119133, "learning_rate": 7.621618033179892e-05, "loss": 0.0062, "step": 17867 }, { "epoch": 3.45859133126935, "grad_norm": 0.06633996218442917, "learning_rate": 7.621376058322038e-05, "loss": 0.0082, "step": 17868 }, { "epoch": 3.4587848297213624, "grad_norm": 0.06265636533498764, "learning_rate": 7.621134075577428e-05, "loss": 0.0069, "step": 17869 }, { "epoch": 3.458978328173375, "grad_norm": 0.10066785663366318, "learning_rate": 7.620892084946966e-05, "loss": 0.0056, "step": 17870 }, { "epoch": 3.459171826625387, "grad_norm": 0.023731503635644913, "learning_rate": 7.62065008643155e-05, "loss": 0.0055, "step": 17871 }, { "epoch": 3.4593653250773992, "grad_norm": 0.08152510970830917, "learning_rate": 7.620408080032078e-05, "loss": 0.0067, "step": 17872 }, { "epoch": 3.4595588235294117, "grad_norm": 0.05253327265381813, "learning_rate": 7.620166065749453e-05, "loss": 0.006, "step": 17873 }, { "epoch": 3.459752321981424, "grad_norm": 0.08364870399236679, "learning_rate": 7.619924043584573e-05, "loss": 0.008, "step": 17874 }, { "epoch": 3.4599458204334366, "grad_norm": 0.0542791448533535, "learning_rate": 7.619682013538337e-05, "loss": 0.0072, "step": 17875 }, { "epoch": 3.460139318885449, "grad_norm": 0.04626402258872986, "learning_rate": 7.619439975611644e-05, "loss": 0.0071, "step": 17876 }, { "epoch": 3.4603328173374615, "grad_norm": 0.05512981116771698, "learning_rate": 7.619197929805397e-05, "loss": 0.0059, "step": 17877 }, { "epoch": 3.4605263157894735, "grad_norm": 0.03666296601295471, "learning_rate": 7.618955876120493e-05, "loss": 0.0068, "step": 17878 }, { "epoch": 3.460719814241486, "grad_norm": 0.05320196598768234, "learning_rate": 7.618713814557836e-05, "loss": 0.0062, "step": 17879 }, { "epoch": 3.4609133126934983, "grad_norm": 0.04579402506351471, "learning_rate": 7.618471745118321e-05, "loss": 0.0046, "step": 17880 }, { "epoch": 3.4611068111455108, "grad_norm": 0.044945310801267624, "learning_rate": 7.618229667802851e-05, "loss": 0.0048, "step": 17881 }, { "epoch": 3.461300309597523, "grad_norm": 0.07794380933046341, "learning_rate": 7.617987582612323e-05, "loss": 0.0068, "step": 17882 }, { "epoch": 3.4614938080495357, "grad_norm": 0.02251574583351612, "learning_rate": 7.61774548954764e-05, "loss": 0.0063, "step": 17883 }, { "epoch": 3.461687306501548, "grad_norm": 0.07250284403562546, "learning_rate": 7.617503388609702e-05, "loss": 0.0069, "step": 17884 }, { "epoch": 3.4618808049535605, "grad_norm": 0.08424318581819534, "learning_rate": 7.617261279799409e-05, "loss": 0.0073, "step": 17885 }, { "epoch": 3.462074303405573, "grad_norm": 0.12119152396917343, "learning_rate": 7.61701916311766e-05, "loss": 0.0053, "step": 17886 }, { "epoch": 3.462267801857585, "grad_norm": 0.09196396172046661, "learning_rate": 7.616777038565355e-05, "loss": 0.0071, "step": 17887 }, { "epoch": 3.4624613003095974, "grad_norm": 0.10116476565599442, "learning_rate": 7.616534906143393e-05, "loss": 0.0055, "step": 17888 }, { "epoch": 3.46265479876161, "grad_norm": 0.1330774575471878, "learning_rate": 7.616292765852676e-05, "loss": 0.0075, "step": 17889 }, { "epoch": 3.4628482972136223, "grad_norm": 0.07310029864311218, "learning_rate": 7.616050617694105e-05, "loss": 0.0061, "step": 17890 }, { "epoch": 3.4630417956656347, "grad_norm": 0.13977186381816864, "learning_rate": 7.615808461668578e-05, "loss": 0.0058, "step": 17891 }, { "epoch": 3.463235294117647, "grad_norm": 0.06586157530546188, "learning_rate": 7.615566297776999e-05, "loss": 0.0074, "step": 17892 }, { "epoch": 3.4634287925696596, "grad_norm": 0.084894098341465, "learning_rate": 7.615324126020264e-05, "loss": 0.006, "step": 17893 }, { "epoch": 3.4636222910216716, "grad_norm": 0.09052388370037079, "learning_rate": 7.615081946399275e-05, "loss": 0.007, "step": 17894 }, { "epoch": 3.463815789473684, "grad_norm": 0.06228082627058029, "learning_rate": 7.614839758914932e-05, "loss": 0.0062, "step": 17895 }, { "epoch": 3.4640092879256965, "grad_norm": 0.1058194488286972, "learning_rate": 7.614597563568137e-05, "loss": 0.0071, "step": 17896 }, { "epoch": 3.464202786377709, "grad_norm": 0.06665971875190735, "learning_rate": 7.614355360359789e-05, "loss": 0.0061, "step": 17897 }, { "epoch": 3.4643962848297214, "grad_norm": 0.11035780608654022, "learning_rate": 7.614113149290789e-05, "loss": 0.0084, "step": 17898 }, { "epoch": 3.464589783281734, "grad_norm": 0.04425390437245369, "learning_rate": 7.613870930362036e-05, "loss": 0.0056, "step": 17899 }, { "epoch": 3.4647832817337463, "grad_norm": 0.06938129663467407, "learning_rate": 7.613628703574435e-05, "loss": 0.0073, "step": 17900 }, { "epoch": 3.4649767801857587, "grad_norm": 0.05992000550031662, "learning_rate": 7.613386468928881e-05, "loss": 0.0067, "step": 17901 }, { "epoch": 3.4651702786377707, "grad_norm": 0.04968991130590439, "learning_rate": 7.613144226426276e-05, "loss": 0.0063, "step": 17902 }, { "epoch": 3.465363777089783, "grad_norm": 0.06580612063407898, "learning_rate": 7.612901976067524e-05, "loss": 0.0051, "step": 17903 }, { "epoch": 3.4655572755417956, "grad_norm": 0.03700089082121849, "learning_rate": 7.61265971785352e-05, "loss": 0.0058, "step": 17904 }, { "epoch": 3.465750773993808, "grad_norm": 0.08421149104833603, "learning_rate": 7.612417451785169e-05, "loss": 0.0075, "step": 17905 }, { "epoch": 3.4659442724458205, "grad_norm": 0.027355704456567764, "learning_rate": 7.612175177863371e-05, "loss": 0.005, "step": 17906 }, { "epoch": 3.466137770897833, "grad_norm": 0.06584171950817108, "learning_rate": 7.611932896089027e-05, "loss": 0.0064, "step": 17907 }, { "epoch": 3.4663312693498454, "grad_norm": 0.08326059579849243, "learning_rate": 7.611690606463035e-05, "loss": 0.0047, "step": 17908 }, { "epoch": 3.4665247678018574, "grad_norm": 0.03554566204547882, "learning_rate": 7.611448308986298e-05, "loss": 0.0063, "step": 17909 }, { "epoch": 3.46671826625387, "grad_norm": 0.05653561279177666, "learning_rate": 7.611206003659717e-05, "loss": 0.0071, "step": 17910 }, { "epoch": 3.4669117647058822, "grad_norm": 0.06071716174483299, "learning_rate": 7.610963690484194e-05, "loss": 0.0067, "step": 17911 }, { "epoch": 3.4671052631578947, "grad_norm": 0.03216089308261871, "learning_rate": 7.610721369460625e-05, "loss": 0.0059, "step": 17912 }, { "epoch": 3.467298761609907, "grad_norm": 0.0390915609896183, "learning_rate": 7.610479040589915e-05, "loss": 0.0066, "step": 17913 }, { "epoch": 3.4674922600619196, "grad_norm": 0.04213748872280121, "learning_rate": 7.610236703872963e-05, "loss": 0.0063, "step": 17914 }, { "epoch": 3.467685758513932, "grad_norm": 0.03955767676234245, "learning_rate": 7.609994359310673e-05, "loss": 0.0072, "step": 17915 }, { "epoch": 3.4678792569659445, "grad_norm": 0.03102993778884411, "learning_rate": 7.609752006903944e-05, "loss": 0.0062, "step": 17916 }, { "epoch": 3.468072755417957, "grad_norm": 0.047391392290592194, "learning_rate": 7.609509646653675e-05, "loss": 0.0065, "step": 17917 }, { "epoch": 3.468266253869969, "grad_norm": 0.02746330015361309, "learning_rate": 7.60926727856077e-05, "loss": 0.0054, "step": 17918 }, { "epoch": 3.4684597523219813, "grad_norm": 0.04103251174092293, "learning_rate": 7.60902490262613e-05, "loss": 0.0058, "step": 17919 }, { "epoch": 3.468653250773994, "grad_norm": 0.04923008382320404, "learning_rate": 7.608782518850652e-05, "loss": 0.0069, "step": 17920 }, { "epoch": 3.468846749226006, "grad_norm": 0.06481172144412994, "learning_rate": 7.608540127235242e-05, "loss": 0.0059, "step": 17921 }, { "epoch": 3.4690402476780187, "grad_norm": 0.04228508844971657, "learning_rate": 7.608297727780802e-05, "loss": 0.0062, "step": 17922 }, { "epoch": 3.469233746130031, "grad_norm": 0.08060009777545929, "learning_rate": 7.608055320488227e-05, "loss": 0.0066, "step": 17923 }, { "epoch": 3.469427244582043, "grad_norm": 0.03361204266548157, "learning_rate": 7.607812905358422e-05, "loss": 0.0074, "step": 17924 }, { "epoch": 3.4696207430340555, "grad_norm": 0.07293397188186646, "learning_rate": 7.60757048239229e-05, "loss": 0.0062, "step": 17925 }, { "epoch": 3.469814241486068, "grad_norm": 0.029259562492370605, "learning_rate": 7.607328051590728e-05, "loss": 0.006, "step": 17926 }, { "epoch": 3.4700077399380804, "grad_norm": 0.055437639355659485, "learning_rate": 7.607085612954642e-05, "loss": 0.0064, "step": 17927 }, { "epoch": 3.470201238390093, "grad_norm": 0.03300835192203522, "learning_rate": 7.606843166484928e-05, "loss": 0.0056, "step": 17928 }, { "epoch": 3.4703947368421053, "grad_norm": 0.04533685743808746, "learning_rate": 7.606600712182493e-05, "loss": 0.0058, "step": 17929 }, { "epoch": 3.4705882352941178, "grad_norm": 0.028917701914906502, "learning_rate": 7.606358250048234e-05, "loss": 0.0079, "step": 17930 }, { "epoch": 3.47078173374613, "grad_norm": 0.05307383090257645, "learning_rate": 7.606115780083054e-05, "loss": 0.0076, "step": 17931 }, { "epoch": 3.4709752321981426, "grad_norm": 0.02446632832288742, "learning_rate": 7.605873302287856e-05, "loss": 0.0055, "step": 17932 }, { "epoch": 3.4711687306501546, "grad_norm": 0.05450430139899254, "learning_rate": 7.605630816663538e-05, "loss": 0.0072, "step": 17933 }, { "epoch": 3.471362229102167, "grad_norm": 0.045085713267326355, "learning_rate": 7.605388323211005e-05, "loss": 0.0066, "step": 17934 }, { "epoch": 3.4715557275541795, "grad_norm": 0.05301947891712189, "learning_rate": 7.605145821931157e-05, "loss": 0.0065, "step": 17935 }, { "epoch": 3.471749226006192, "grad_norm": 0.04206974059343338, "learning_rate": 7.604903312824895e-05, "loss": 0.0064, "step": 17936 }, { "epoch": 3.4719427244582044, "grad_norm": 0.04744143411517143, "learning_rate": 7.60466079589312e-05, "loss": 0.0063, "step": 17937 }, { "epoch": 3.472136222910217, "grad_norm": 0.03675612434744835, "learning_rate": 7.604418271136738e-05, "loss": 0.0076, "step": 17938 }, { "epoch": 3.4723297213622293, "grad_norm": 0.037112757563591, "learning_rate": 7.604175738556643e-05, "loss": 0.0064, "step": 17939 }, { "epoch": 3.4725232198142413, "grad_norm": 0.03883994743227959, "learning_rate": 7.603933198153743e-05, "loss": 0.0068, "step": 17940 }, { "epoch": 3.4727167182662537, "grad_norm": 0.028243545442819595, "learning_rate": 7.603690649928938e-05, "loss": 0.0057, "step": 17941 }, { "epoch": 3.472910216718266, "grad_norm": 0.06325741112232208, "learning_rate": 7.60344809388313e-05, "loss": 0.0074, "step": 17942 }, { "epoch": 3.4731037151702786, "grad_norm": 0.03927065432071686, "learning_rate": 7.603205530017222e-05, "loss": 0.0067, "step": 17943 }, { "epoch": 3.473297213622291, "grad_norm": 0.06500199437141418, "learning_rate": 7.60296295833211e-05, "loss": 0.0067, "step": 17944 }, { "epoch": 3.4734907120743035, "grad_norm": 0.04164176806807518, "learning_rate": 7.602720378828702e-05, "loss": 0.0069, "step": 17945 }, { "epoch": 3.473684210526316, "grad_norm": 0.07072129845619202, "learning_rate": 7.602477791507898e-05, "loss": 0.0049, "step": 17946 }, { "epoch": 3.4738777089783284, "grad_norm": 0.03938177600502968, "learning_rate": 7.6022351963706e-05, "loss": 0.0064, "step": 17947 }, { "epoch": 3.4740712074303404, "grad_norm": 0.07434361428022385, "learning_rate": 7.601992593417709e-05, "loss": 0.0057, "step": 17948 }, { "epoch": 3.474264705882353, "grad_norm": 0.03264174610376358, "learning_rate": 7.601749982650127e-05, "loss": 0.0068, "step": 17949 }, { "epoch": 3.4744582043343653, "grad_norm": 0.04057168588042259, "learning_rate": 7.601507364068757e-05, "loss": 0.0052, "step": 17950 }, { "epoch": 3.4746517027863777, "grad_norm": 0.05216557905077934, "learning_rate": 7.601264737674499e-05, "loss": 0.0058, "step": 17951 }, { "epoch": 3.47484520123839, "grad_norm": 0.029481496661901474, "learning_rate": 7.601022103468258e-05, "loss": 0.006, "step": 17952 }, { "epoch": 3.4750386996904026, "grad_norm": 0.06258513033390045, "learning_rate": 7.600779461450934e-05, "loss": 0.0063, "step": 17953 }, { "epoch": 3.475232198142415, "grad_norm": 0.034800074994564056, "learning_rate": 7.60053681162343e-05, "loss": 0.0067, "step": 17954 }, { "epoch": 3.475425696594427, "grad_norm": 0.06489436328411102, "learning_rate": 7.600294153986648e-05, "loss": 0.0063, "step": 17955 }, { "epoch": 3.4756191950464395, "grad_norm": 0.036340128630399704, "learning_rate": 7.60005148854149e-05, "loss": 0.0067, "step": 17956 }, { "epoch": 3.475812693498452, "grad_norm": 0.04421365633606911, "learning_rate": 7.599808815288858e-05, "loss": 0.0073, "step": 17957 }, { "epoch": 3.4760061919504643, "grad_norm": 0.060622815042734146, "learning_rate": 7.599566134229653e-05, "loss": 0.0057, "step": 17958 }, { "epoch": 3.476199690402477, "grad_norm": 0.04514003172516823, "learning_rate": 7.599323445364778e-05, "loss": 0.0055, "step": 17959 }, { "epoch": 3.4763931888544892, "grad_norm": 0.06244629994034767, "learning_rate": 7.599080748695138e-05, "loss": 0.0062, "step": 17960 }, { "epoch": 3.4765866873065017, "grad_norm": 0.04373208060860634, "learning_rate": 7.598838044221632e-05, "loss": 0.0056, "step": 17961 }, { "epoch": 3.476780185758514, "grad_norm": 0.03432701528072357, "learning_rate": 7.598595331945163e-05, "loss": 0.007, "step": 17962 }, { "epoch": 3.4769736842105265, "grad_norm": 0.030964914709329605, "learning_rate": 7.598352611866634e-05, "loss": 0.0064, "step": 17963 }, { "epoch": 3.4771671826625385, "grad_norm": 0.03751058503985405, "learning_rate": 7.598109883986945e-05, "loss": 0.0068, "step": 17964 }, { "epoch": 3.477360681114551, "grad_norm": 0.050873976200819016, "learning_rate": 7.597867148307003e-05, "loss": 0.0059, "step": 17965 }, { "epoch": 3.4775541795665634, "grad_norm": 0.05035838484764099, "learning_rate": 7.597624404827708e-05, "loss": 0.006, "step": 17966 }, { "epoch": 3.477747678018576, "grad_norm": 0.055778004229068756, "learning_rate": 7.597381653549961e-05, "loss": 0.0055, "step": 17967 }, { "epoch": 3.4779411764705883, "grad_norm": 0.08228939026594162, "learning_rate": 7.597138894474667e-05, "loss": 0.0065, "step": 17968 }, { "epoch": 3.4781346749226008, "grad_norm": 0.05618932098150253, "learning_rate": 7.596896127602726e-05, "loss": 0.0054, "step": 17969 }, { "epoch": 3.478328173374613, "grad_norm": 0.06370354443788528, "learning_rate": 7.596653352935043e-05, "loss": 0.0062, "step": 17970 }, { "epoch": 3.478521671826625, "grad_norm": 0.09483107179403305, "learning_rate": 7.596410570472519e-05, "loss": 0.0056, "step": 17971 }, { "epoch": 3.4787151702786376, "grad_norm": 0.050784870982170105, "learning_rate": 7.596167780216058e-05, "loss": 0.0054, "step": 17972 }, { "epoch": 3.47890866873065, "grad_norm": 0.11677591502666473, "learning_rate": 7.595924982166559e-05, "loss": 0.0062, "step": 17973 }, { "epoch": 3.4791021671826625, "grad_norm": 0.03900876268744469, "learning_rate": 7.59568217632493e-05, "loss": 0.0077, "step": 17974 }, { "epoch": 3.479295665634675, "grad_norm": 0.1104840338230133, "learning_rate": 7.59543936269207e-05, "loss": 0.006, "step": 17975 }, { "epoch": 3.4794891640866874, "grad_norm": 0.05400010943412781, "learning_rate": 7.595196541268884e-05, "loss": 0.0066, "step": 17976 }, { "epoch": 3.4796826625387, "grad_norm": 0.08110883086919785, "learning_rate": 7.594953712056272e-05, "loss": 0.0059, "step": 17977 }, { "epoch": 3.4798761609907123, "grad_norm": 0.11915391683578491, "learning_rate": 7.59471087505514e-05, "loss": 0.0064, "step": 17978 }, { "epoch": 3.4800696594427243, "grad_norm": 0.07868245244026184, "learning_rate": 7.594468030266386e-05, "loss": 0.0062, "step": 17979 }, { "epoch": 3.4802631578947367, "grad_norm": 0.11143780499696732, "learning_rate": 7.594225177690918e-05, "loss": 0.0064, "step": 17980 }, { "epoch": 3.480456656346749, "grad_norm": 0.02989918738603592, "learning_rate": 7.593982317329638e-05, "loss": 0.0061, "step": 17981 }, { "epoch": 3.4806501547987616, "grad_norm": 0.10869023203849792, "learning_rate": 7.593739449183446e-05, "loss": 0.0071, "step": 17982 }, { "epoch": 3.480843653250774, "grad_norm": 0.020360583439469337, "learning_rate": 7.593496573253247e-05, "loss": 0.0072, "step": 17983 }, { "epoch": 3.4810371517027865, "grad_norm": 0.08194731920957565, "learning_rate": 7.593253689539943e-05, "loss": 0.0065, "step": 17984 }, { "epoch": 3.481230650154799, "grad_norm": 0.03945399820804596, "learning_rate": 7.593010798044438e-05, "loss": 0.0055, "step": 17985 }, { "epoch": 3.481424148606811, "grad_norm": 0.055477771908044815, "learning_rate": 7.592767898767635e-05, "loss": 0.0054, "step": 17986 }, { "epoch": 3.4816176470588234, "grad_norm": 0.06534081697463989, "learning_rate": 7.592524991710437e-05, "loss": 0.0063, "step": 17987 }, { "epoch": 3.481811145510836, "grad_norm": 0.055705636739730835, "learning_rate": 7.592282076873746e-05, "loss": 0.0067, "step": 17988 }, { "epoch": 3.4820046439628483, "grad_norm": 0.08002690970897675, "learning_rate": 7.592039154258466e-05, "loss": 0.0064, "step": 17989 }, { "epoch": 3.4821981424148607, "grad_norm": 0.05641590431332588, "learning_rate": 7.5917962238655e-05, "loss": 0.0062, "step": 17990 }, { "epoch": 3.482391640866873, "grad_norm": 0.0697837546467781, "learning_rate": 7.59155328569575e-05, "loss": 0.0056, "step": 17991 }, { "epoch": 3.4825851393188856, "grad_norm": 0.06441397964954376, "learning_rate": 7.591310339750122e-05, "loss": 0.0066, "step": 17992 }, { "epoch": 3.482778637770898, "grad_norm": 0.04852527752518654, "learning_rate": 7.591067386029517e-05, "loss": 0.0065, "step": 17993 }, { "epoch": 3.4829721362229105, "grad_norm": 0.04875066503882408, "learning_rate": 7.590824424534839e-05, "loss": 0.0061, "step": 17994 }, { "epoch": 3.4831656346749225, "grad_norm": 0.06069791689515114, "learning_rate": 7.590581455266989e-05, "loss": 0.0071, "step": 17995 }, { "epoch": 3.483359133126935, "grad_norm": 0.038545187562704086, "learning_rate": 7.590338478226873e-05, "loss": 0.006, "step": 17996 }, { "epoch": 3.4835526315789473, "grad_norm": 0.07805539667606354, "learning_rate": 7.590095493415394e-05, "loss": 0.0062, "step": 17997 }, { "epoch": 3.48374613003096, "grad_norm": 0.07041765749454498, "learning_rate": 7.589852500833455e-05, "loss": 0.0065, "step": 17998 }, { "epoch": 3.4839396284829722, "grad_norm": 0.10723520070314407, "learning_rate": 7.58960950048196e-05, "loss": 0.0057, "step": 17999 }, { "epoch": 3.4841331269349847, "grad_norm": 0.1427195817232132, "learning_rate": 7.589366492361812e-05, "loss": 0.0067, "step": 18000 }, { "epoch": 3.4843266253869967, "grad_norm": 0.08405856788158417, "learning_rate": 7.589123476473912e-05, "loss": 0.008, "step": 18001 }, { "epoch": 3.484520123839009, "grad_norm": 0.134914830327034, "learning_rate": 7.588880452819168e-05, "loss": 0.0057, "step": 18002 }, { "epoch": 3.4847136222910216, "grad_norm": 0.11732301861047745, "learning_rate": 7.58863742139848e-05, "loss": 0.0061, "step": 18003 }, { "epoch": 3.484907120743034, "grad_norm": 0.08064558357000351, "learning_rate": 7.588394382212754e-05, "loss": 0.0068, "step": 18004 }, { "epoch": 3.4851006191950464, "grad_norm": 0.1751149743795395, "learning_rate": 7.588151335262892e-05, "loss": 0.0051, "step": 18005 }, { "epoch": 3.485294117647059, "grad_norm": 0.07761693000793457, "learning_rate": 7.587908280549797e-05, "loss": 0.0067, "step": 18006 }, { "epoch": 3.4854876160990713, "grad_norm": 0.17313607037067413, "learning_rate": 7.587665218074374e-05, "loss": 0.0074, "step": 18007 }, { "epoch": 3.4856811145510838, "grad_norm": 0.16058428585529327, "learning_rate": 7.587422147837526e-05, "loss": 0.0069, "step": 18008 }, { "epoch": 3.485874613003096, "grad_norm": 0.08657942712306976, "learning_rate": 7.587179069840156e-05, "loss": 0.008, "step": 18009 }, { "epoch": 3.486068111455108, "grad_norm": 0.19091711938381195, "learning_rate": 7.58693598408317e-05, "loss": 0.0073, "step": 18010 }, { "epoch": 3.4862616099071206, "grad_norm": 0.054858870804309845, "learning_rate": 7.586692890567469e-05, "loss": 0.0067, "step": 18011 }, { "epoch": 3.486455108359133, "grad_norm": 0.14886629581451416, "learning_rate": 7.586449789293958e-05, "loss": 0.0064, "step": 18012 }, { "epoch": 3.4866486068111455, "grad_norm": 0.11250323057174683, "learning_rate": 7.586206680263542e-05, "loss": 0.0064, "step": 18013 }, { "epoch": 3.486842105263158, "grad_norm": 0.11275298148393631, "learning_rate": 7.585963563477122e-05, "loss": 0.0068, "step": 18014 }, { "epoch": 3.4870356037151704, "grad_norm": 0.11619171500205994, "learning_rate": 7.585720438935604e-05, "loss": 0.0064, "step": 18015 }, { "epoch": 3.487229102167183, "grad_norm": 0.11512656509876251, "learning_rate": 7.585477306639892e-05, "loss": 0.0056, "step": 18016 }, { "epoch": 3.487422600619195, "grad_norm": 0.1015458032488823, "learning_rate": 7.58523416659089e-05, "loss": 0.0074, "step": 18017 }, { "epoch": 3.4876160990712073, "grad_norm": 0.08356859534978867, "learning_rate": 7.5849910187895e-05, "loss": 0.0055, "step": 18018 }, { "epoch": 3.4878095975232197, "grad_norm": 0.050839755684137344, "learning_rate": 7.584747863236627e-05, "loss": 0.0057, "step": 18019 }, { "epoch": 3.488003095975232, "grad_norm": 0.06408990174531937, "learning_rate": 7.584504699933174e-05, "loss": 0.0045, "step": 18020 }, { "epoch": 3.4881965944272446, "grad_norm": 0.039584945887327194, "learning_rate": 7.584261528880048e-05, "loss": 0.0079, "step": 18021 }, { "epoch": 3.488390092879257, "grad_norm": 0.06486999988555908, "learning_rate": 7.584018350078151e-05, "loss": 0.0057, "step": 18022 }, { "epoch": 3.4885835913312695, "grad_norm": 0.039267003536224365, "learning_rate": 7.583775163528387e-05, "loss": 0.0053, "step": 18023 }, { "epoch": 3.488777089783282, "grad_norm": 0.06892063468694687, "learning_rate": 7.583531969231661e-05, "loss": 0.0062, "step": 18024 }, { "epoch": 3.488970588235294, "grad_norm": 0.04691724479198456, "learning_rate": 7.583288767188875e-05, "loss": 0.0055, "step": 18025 }, { "epoch": 3.4891640866873064, "grad_norm": 0.06100752204656601, "learning_rate": 7.583045557400937e-05, "loss": 0.0071, "step": 18026 }, { "epoch": 3.489357585139319, "grad_norm": 0.04345303401350975, "learning_rate": 7.582802339868747e-05, "loss": 0.0066, "step": 18027 }, { "epoch": 3.4895510835913313, "grad_norm": 0.059923429042100906, "learning_rate": 7.582559114593212e-05, "loss": 0.0065, "step": 18028 }, { "epoch": 3.4897445820433437, "grad_norm": 0.03731891140341759, "learning_rate": 7.582315881575235e-05, "loss": 0.005, "step": 18029 }, { "epoch": 3.489938080495356, "grad_norm": 0.05088965967297554, "learning_rate": 7.58207264081572e-05, "loss": 0.0058, "step": 18030 }, { "epoch": 3.4901315789473686, "grad_norm": 0.027453433722257614, "learning_rate": 7.581829392315572e-05, "loss": 0.0055, "step": 18031 }, { "epoch": 3.4903250773993806, "grad_norm": 0.05288093537092209, "learning_rate": 7.581586136075698e-05, "loss": 0.0057, "step": 18032 }, { "epoch": 3.490518575851393, "grad_norm": 0.024156052619218826, "learning_rate": 7.581342872096997e-05, "loss": 0.0048, "step": 18033 }, { "epoch": 3.4907120743034055, "grad_norm": 0.06394453346729279, "learning_rate": 7.581099600380376e-05, "loss": 0.0071, "step": 18034 }, { "epoch": 3.490905572755418, "grad_norm": 0.025631457567214966, "learning_rate": 7.580856320926741e-05, "loss": 0.0055, "step": 18035 }, { "epoch": 3.4910990712074303, "grad_norm": 0.05712410435080528, "learning_rate": 7.580613033736992e-05, "loss": 0.0052, "step": 18036 }, { "epoch": 3.491292569659443, "grad_norm": 0.02815115824341774, "learning_rate": 7.58036973881204e-05, "loss": 0.0057, "step": 18037 }, { "epoch": 3.4914860681114552, "grad_norm": 0.05161997675895691, "learning_rate": 7.580126436152784e-05, "loss": 0.0059, "step": 18038 }, { "epoch": 3.4916795665634677, "grad_norm": 0.07578084617853165, "learning_rate": 7.579883125760132e-05, "loss": 0.0072, "step": 18039 }, { "epoch": 3.49187306501548, "grad_norm": 0.03600368648767471, "learning_rate": 7.579639807634986e-05, "loss": 0.0068, "step": 18040 }, { "epoch": 3.492066563467492, "grad_norm": 0.08184951543807983, "learning_rate": 7.579396481778251e-05, "loss": 0.006, "step": 18041 }, { "epoch": 3.4922600619195046, "grad_norm": 0.05166575685143471, "learning_rate": 7.579153148190832e-05, "loss": 0.007, "step": 18042 }, { "epoch": 3.492453560371517, "grad_norm": 0.08877305686473846, "learning_rate": 7.578909806873635e-05, "loss": 0.0062, "step": 18043 }, { "epoch": 3.4926470588235294, "grad_norm": 0.0667499303817749, "learning_rate": 7.578666457827564e-05, "loss": 0.0069, "step": 18044 }, { "epoch": 3.492840557275542, "grad_norm": 0.08033236116170883, "learning_rate": 7.578423101053522e-05, "loss": 0.0068, "step": 18045 }, { "epoch": 3.4930340557275543, "grad_norm": 0.08945202082395554, "learning_rate": 7.578179736552416e-05, "loss": 0.0058, "step": 18046 }, { "epoch": 3.4932275541795663, "grad_norm": 0.05509618669748306, "learning_rate": 7.577936364325149e-05, "loss": 0.0059, "step": 18047 }, { "epoch": 3.4934210526315788, "grad_norm": 0.10812398791313171, "learning_rate": 7.577692984372627e-05, "loss": 0.0063, "step": 18048 }, { "epoch": 3.493614551083591, "grad_norm": 0.06168006360530853, "learning_rate": 7.577449596695754e-05, "loss": 0.0065, "step": 18049 }, { "epoch": 3.4938080495356036, "grad_norm": 0.0903545469045639, "learning_rate": 7.577206201295437e-05, "loss": 0.0066, "step": 18050 }, { "epoch": 3.494001547987616, "grad_norm": 0.10683195292949677, "learning_rate": 7.576962798172578e-05, "loss": 0.0053, "step": 18051 }, { "epoch": 3.4941950464396285, "grad_norm": 0.054400064051151276, "learning_rate": 7.576719387328083e-05, "loss": 0.0052, "step": 18052 }, { "epoch": 3.494388544891641, "grad_norm": 0.10555966198444366, "learning_rate": 7.576475968762856e-05, "loss": 0.0051, "step": 18053 }, { "epoch": 3.4945820433436534, "grad_norm": 0.061141181737184525, "learning_rate": 7.576232542477806e-05, "loss": 0.0044, "step": 18054 }, { "epoch": 3.494775541795666, "grad_norm": 0.061357270926237106, "learning_rate": 7.575989108473832e-05, "loss": 0.0056, "step": 18055 }, { "epoch": 3.494969040247678, "grad_norm": 0.06542858481407166, "learning_rate": 7.575745666751843e-05, "loss": 0.0058, "step": 18056 }, { "epoch": 3.4951625386996903, "grad_norm": 0.02562791295349598, "learning_rate": 7.575502217312744e-05, "loss": 0.0065, "step": 18057 }, { "epoch": 3.4953560371517027, "grad_norm": 0.05287117883563042, "learning_rate": 7.575258760157437e-05, "loss": 0.008, "step": 18058 }, { "epoch": 3.495549535603715, "grad_norm": 0.0424933061003685, "learning_rate": 7.575015295286832e-05, "loss": 0.0074, "step": 18059 }, { "epoch": 3.4957430340557276, "grad_norm": 0.019713392481207848, "learning_rate": 7.57477182270183e-05, "loss": 0.0058, "step": 18060 }, { "epoch": 3.49593653250774, "grad_norm": 0.0548601932823658, "learning_rate": 7.574528342403338e-05, "loss": 0.0063, "step": 18061 }, { "epoch": 3.4961300309597525, "grad_norm": 0.0257891733199358, "learning_rate": 7.574284854392261e-05, "loss": 0.0059, "step": 18062 }, { "epoch": 3.4963235294117645, "grad_norm": 0.024040693417191505, "learning_rate": 7.574041358669502e-05, "loss": 0.0063, "step": 18063 }, { "epoch": 3.496517027863777, "grad_norm": 0.03177658095955849, "learning_rate": 7.573797855235972e-05, "loss": 0.0073, "step": 18064 }, { "epoch": 3.4967105263157894, "grad_norm": 0.02167677879333496, "learning_rate": 7.573554344092569e-05, "loss": 0.0057, "step": 18065 }, { "epoch": 3.496904024767802, "grad_norm": 0.04252578690648079, "learning_rate": 7.573310825240203e-05, "loss": 0.0071, "step": 18066 }, { "epoch": 3.4970975232198143, "grad_norm": 0.0327678881585598, "learning_rate": 7.57306729867978e-05, "loss": 0.0063, "step": 18067 }, { "epoch": 3.4972910216718267, "grad_norm": 0.046169180423021317, "learning_rate": 7.5728237644122e-05, "loss": 0.006, "step": 18068 }, { "epoch": 3.497484520123839, "grad_norm": 0.027825385332107544, "learning_rate": 7.572580222438375e-05, "loss": 0.0071, "step": 18069 }, { "epoch": 3.4976780185758516, "grad_norm": 0.05663688853383064, "learning_rate": 7.572336672759207e-05, "loss": 0.0067, "step": 18070 }, { "epoch": 3.4978715170278636, "grad_norm": 0.06394651532173157, "learning_rate": 7.572093115375602e-05, "loss": 0.0063, "step": 18071 }, { "epoch": 3.498065015479876, "grad_norm": 0.11398675292730331, "learning_rate": 7.571849550288465e-05, "loss": 0.0059, "step": 18072 }, { "epoch": 3.4982585139318885, "grad_norm": 0.07948693633079529, "learning_rate": 7.5716059774987e-05, "loss": 0.0067, "step": 18073 }, { "epoch": 3.498452012383901, "grad_norm": 0.09714939445257187, "learning_rate": 7.571362397007216e-05, "loss": 0.0076, "step": 18074 }, { "epoch": 3.4986455108359134, "grad_norm": 0.11866328865289688, "learning_rate": 7.571118808814919e-05, "loss": 0.0071, "step": 18075 }, { "epoch": 3.498839009287926, "grad_norm": 0.04488380253314972, "learning_rate": 7.57087521292271e-05, "loss": 0.007, "step": 18076 }, { "epoch": 3.4990325077399382, "grad_norm": 0.12909388542175293, "learning_rate": 7.570631609331498e-05, "loss": 0.0061, "step": 18077 }, { "epoch": 3.4992260061919502, "grad_norm": 0.06535875052213669, "learning_rate": 7.570387998042188e-05, "loss": 0.0071, "step": 18078 }, { "epoch": 3.4994195046439627, "grad_norm": 0.10229991376399994, "learning_rate": 7.570144379055685e-05, "loss": 0.0059, "step": 18079 }, { "epoch": 3.499613003095975, "grad_norm": 0.12647046148777008, "learning_rate": 7.569900752372896e-05, "loss": 0.006, "step": 18080 }, { "epoch": 3.4998065015479876, "grad_norm": 0.054134633392095566, "learning_rate": 7.569657117994725e-05, "loss": 0.0059, "step": 18081 }, { "epoch": 3.5, "grad_norm": 0.1427038311958313, "learning_rate": 7.56941347592208e-05, "loss": 0.0059, "step": 18082 }, { "epoch": 3.5001934984520124, "grad_norm": 0.03286708518862724, "learning_rate": 7.569169826155866e-05, "loss": 0.0062, "step": 18083 }, { "epoch": 3.500386996904025, "grad_norm": 0.10008670389652252, "learning_rate": 7.568926168696986e-05, "loss": 0.007, "step": 18084 }, { "epoch": 3.5005804953560373, "grad_norm": 0.10530297458171844, "learning_rate": 7.56868250354635e-05, "loss": 0.0058, "step": 18085 }, { "epoch": 3.5007739938080498, "grad_norm": 0.04700102284550667, "learning_rate": 7.568438830704864e-05, "loss": 0.0044, "step": 18086 }, { "epoch": 3.5009674922600618, "grad_norm": 0.13745935261249542, "learning_rate": 7.56819515017343e-05, "loss": 0.0082, "step": 18087 }, { "epoch": 3.501160990712074, "grad_norm": 0.038534391671419144, "learning_rate": 7.567951461952956e-05, "loss": 0.0074, "step": 18088 }, { "epoch": 3.5013544891640866, "grad_norm": 0.14076939225196838, "learning_rate": 7.567707766044348e-05, "loss": 0.0069, "step": 18089 }, { "epoch": 3.501547987616099, "grad_norm": 0.07388126850128174, "learning_rate": 7.567464062448512e-05, "loss": 0.0071, "step": 18090 }, { "epoch": 3.5017414860681115, "grad_norm": 0.1046372801065445, "learning_rate": 7.567220351166355e-05, "loss": 0.0068, "step": 18091 }, { "epoch": 3.501934984520124, "grad_norm": 0.11328081041574478, "learning_rate": 7.56697663219878e-05, "loss": 0.0062, "step": 18092 }, { "epoch": 3.502128482972136, "grad_norm": 0.05739063769578934, "learning_rate": 7.566732905546699e-05, "loss": 0.0067, "step": 18093 }, { "epoch": 3.5023219814241484, "grad_norm": 0.12467138469219208, "learning_rate": 7.56648917121101e-05, "loss": 0.006, "step": 18094 }, { "epoch": 3.502515479876161, "grad_norm": 0.035783614963293076, "learning_rate": 7.566245429192627e-05, "loss": 0.0053, "step": 18095 }, { "epoch": 3.5027089783281733, "grad_norm": 0.09912090003490448, "learning_rate": 7.566001679492452e-05, "loss": 0.0066, "step": 18096 }, { "epoch": 3.5029024767801857, "grad_norm": 0.04196188971400261, "learning_rate": 7.56575792211139e-05, "loss": 0.0071, "step": 18097 }, { "epoch": 3.503095975232198, "grad_norm": 0.0507274828851223, "learning_rate": 7.565514157050348e-05, "loss": 0.0061, "step": 18098 }, { "epoch": 3.5032894736842106, "grad_norm": 0.05916787311434746, "learning_rate": 7.565270384310237e-05, "loss": 0.0055, "step": 18099 }, { "epoch": 3.503482972136223, "grad_norm": 0.025316162034869194, "learning_rate": 7.565026603891958e-05, "loss": 0.0071, "step": 18100 }, { "epoch": 3.5036764705882355, "grad_norm": 0.059084054082632065, "learning_rate": 7.564782815796418e-05, "loss": 0.0057, "step": 18101 }, { "epoch": 3.503869969040248, "grad_norm": 0.047588050365448, "learning_rate": 7.564539020024527e-05, "loss": 0.0072, "step": 18102 }, { "epoch": 3.50406346749226, "grad_norm": 0.02523891068994999, "learning_rate": 7.564295216577187e-05, "loss": 0.0072, "step": 18103 }, { "epoch": 3.5042569659442724, "grad_norm": 0.06889311969280243, "learning_rate": 7.564051405455305e-05, "loss": 0.0065, "step": 18104 }, { "epoch": 3.504450464396285, "grad_norm": 0.03310927003622055, "learning_rate": 7.563807586659789e-05, "loss": 0.0066, "step": 18105 }, { "epoch": 3.5046439628482973, "grad_norm": 0.06358686089515686, "learning_rate": 7.563563760191546e-05, "loss": 0.0055, "step": 18106 }, { "epoch": 3.5048374613003097, "grad_norm": 0.04685680568218231, "learning_rate": 7.563319926051478e-05, "loss": 0.0058, "step": 18107 }, { "epoch": 3.5050309597523217, "grad_norm": 0.07338442653417587, "learning_rate": 7.5630760842405e-05, "loss": 0.0062, "step": 18108 }, { "epoch": 3.505224458204334, "grad_norm": 0.04292796552181244, "learning_rate": 7.562832234759511e-05, "loss": 0.0068, "step": 18109 }, { "epoch": 3.5054179566563466, "grad_norm": 0.05872076749801636, "learning_rate": 7.562588377609419e-05, "loss": 0.0073, "step": 18110 }, { "epoch": 3.505611455108359, "grad_norm": 0.06123427674174309, "learning_rate": 7.562344512791133e-05, "loss": 0.0064, "step": 18111 }, { "epoch": 3.5058049535603715, "grad_norm": 0.041456904262304306, "learning_rate": 7.562100640305558e-05, "loss": 0.0075, "step": 18112 }, { "epoch": 3.505998452012384, "grad_norm": 0.07239842414855957, "learning_rate": 7.561856760153601e-05, "loss": 0.0061, "step": 18113 }, { "epoch": 3.5061919504643964, "grad_norm": 0.032014377415180206, "learning_rate": 7.561612872336168e-05, "loss": 0.0062, "step": 18114 }, { "epoch": 3.506385448916409, "grad_norm": 0.07054194808006287, "learning_rate": 7.561368976854166e-05, "loss": 0.0059, "step": 18115 }, { "epoch": 3.5065789473684212, "grad_norm": 0.035536348819732666, "learning_rate": 7.561125073708504e-05, "loss": 0.0068, "step": 18116 }, { "epoch": 3.5067724458204337, "grad_norm": 0.06314747780561447, "learning_rate": 7.560881162900084e-05, "loss": 0.0066, "step": 18117 }, { "epoch": 3.5069659442724457, "grad_norm": 0.03138670325279236, "learning_rate": 7.560637244429819e-05, "loss": 0.0058, "step": 18118 }, { "epoch": 3.507159442724458, "grad_norm": 0.05595772713422775, "learning_rate": 7.56039331829861e-05, "loss": 0.006, "step": 18119 }, { "epoch": 3.5073529411764706, "grad_norm": 0.040793173015117645, "learning_rate": 7.560149384507368e-05, "loss": 0.0063, "step": 18120 }, { "epoch": 3.507546439628483, "grad_norm": 0.03456709533929825, "learning_rate": 7.559905443056997e-05, "loss": 0.0052, "step": 18121 }, { "epoch": 3.5077399380804954, "grad_norm": 0.045907363295555115, "learning_rate": 7.559661493948407e-05, "loss": 0.006, "step": 18122 }, { "epoch": 3.507933436532508, "grad_norm": 0.054409485310316086, "learning_rate": 7.559417537182501e-05, "loss": 0.0063, "step": 18123 }, { "epoch": 3.50812693498452, "grad_norm": 0.04888860881328583, "learning_rate": 7.559173572760189e-05, "loss": 0.0062, "step": 18124 }, { "epoch": 3.5083204334365323, "grad_norm": 0.04669671878218651, "learning_rate": 7.558929600682376e-05, "loss": 0.006, "step": 18125 }, { "epoch": 3.5085139318885448, "grad_norm": 0.0647052749991417, "learning_rate": 7.558685620949971e-05, "loss": 0.0063, "step": 18126 }, { "epoch": 3.508707430340557, "grad_norm": 0.03541174903512001, "learning_rate": 7.55844163356388e-05, "loss": 0.0062, "step": 18127 }, { "epoch": 3.5089009287925697, "grad_norm": 0.04210279881954193, "learning_rate": 7.558197638525011e-05, "loss": 0.0064, "step": 18128 }, { "epoch": 3.509094427244582, "grad_norm": 0.044030461460351944, "learning_rate": 7.557953635834268e-05, "loss": 0.0068, "step": 18129 }, { "epoch": 3.5092879256965945, "grad_norm": 0.021708304062485695, "learning_rate": 7.557709625492563e-05, "loss": 0.0049, "step": 18130 }, { "epoch": 3.509481424148607, "grad_norm": 0.05620511621236801, "learning_rate": 7.557465607500799e-05, "loss": 0.0069, "step": 18131 }, { "epoch": 3.5096749226006194, "grad_norm": 0.027327775955200195, "learning_rate": 7.557221581859887e-05, "loss": 0.0073, "step": 18132 }, { "epoch": 3.5098684210526314, "grad_norm": 0.04802163690328598, "learning_rate": 7.55697754857073e-05, "loss": 0.0064, "step": 18133 }, { "epoch": 3.510061919504644, "grad_norm": 0.030257448554039, "learning_rate": 7.556733507634238e-05, "loss": 0.008, "step": 18134 }, { "epoch": 3.5102554179566563, "grad_norm": 0.07740440964698792, "learning_rate": 7.556489459051318e-05, "loss": 0.0075, "step": 18135 }, { "epoch": 3.5104489164086687, "grad_norm": 0.02479644864797592, "learning_rate": 7.556245402822874e-05, "loss": 0.0054, "step": 18136 }, { "epoch": 3.510642414860681, "grad_norm": 0.05933431535959244, "learning_rate": 7.55600133894982e-05, "loss": 0.0083, "step": 18137 }, { "epoch": 3.5108359133126936, "grad_norm": 0.04176083952188492, "learning_rate": 7.555757267433057e-05, "loss": 0.0057, "step": 18138 }, { "epoch": 3.5110294117647056, "grad_norm": 0.05440301075577736, "learning_rate": 7.555513188273498e-05, "loss": 0.0073, "step": 18139 }, { "epoch": 3.511222910216718, "grad_norm": 0.06685277819633484, "learning_rate": 7.555269101472046e-05, "loss": 0.0058, "step": 18140 }, { "epoch": 3.5114164086687305, "grad_norm": 0.04358096420764923, "learning_rate": 7.555025007029609e-05, "loss": 0.0055, "step": 18141 }, { "epoch": 3.511609907120743, "grad_norm": 0.05833206698298454, "learning_rate": 7.554780904947096e-05, "loss": 0.0056, "step": 18142 }, { "epoch": 3.5118034055727554, "grad_norm": 0.058873046189546585, "learning_rate": 7.554536795225414e-05, "loss": 0.0071, "step": 18143 }, { "epoch": 3.511996904024768, "grad_norm": 0.04927985742688179, "learning_rate": 7.55429267786547e-05, "loss": 0.0065, "step": 18144 }, { "epoch": 3.5121904024767803, "grad_norm": 0.057478323578834534, "learning_rate": 7.554048552868174e-05, "loss": 0.0058, "step": 18145 }, { "epoch": 3.5123839009287927, "grad_norm": 0.0363539382815361, "learning_rate": 7.553804420234431e-05, "loss": 0.0054, "step": 18146 }, { "epoch": 3.512577399380805, "grad_norm": 0.050927698612213135, "learning_rate": 7.553560279965148e-05, "loss": 0.0055, "step": 18147 }, { "epoch": 3.5127708978328176, "grad_norm": 0.06404374539852142, "learning_rate": 7.553316132061233e-05, "loss": 0.0076, "step": 18148 }, { "epoch": 3.5129643962848296, "grad_norm": 0.04774247854948044, "learning_rate": 7.553071976523596e-05, "loss": 0.0067, "step": 18149 }, { "epoch": 3.513157894736842, "grad_norm": 0.08739209920167923, "learning_rate": 7.552827813353142e-05, "loss": 0.0052, "step": 18150 }, { "epoch": 3.5133513931888545, "grad_norm": 0.0312359556555748, "learning_rate": 7.552583642550783e-05, "loss": 0.0053, "step": 18151 }, { "epoch": 3.513544891640867, "grad_norm": 0.10036145150661469, "learning_rate": 7.55233946411742e-05, "loss": 0.0049, "step": 18152 }, { "epoch": 3.5137383900928794, "grad_norm": 0.02640342526137829, "learning_rate": 7.552095278053968e-05, "loss": 0.0049, "step": 18153 }, { "epoch": 3.513931888544892, "grad_norm": 0.09093338251113892, "learning_rate": 7.55185108436133e-05, "loss": 0.0066, "step": 18154 }, { "epoch": 3.514125386996904, "grad_norm": 0.08607538044452667, "learning_rate": 7.551606883040414e-05, "loss": 0.0062, "step": 18155 }, { "epoch": 3.5143188854489162, "grad_norm": 0.042620763182640076, "learning_rate": 7.551362674092131e-05, "loss": 0.0057, "step": 18156 }, { "epoch": 3.5145123839009287, "grad_norm": 0.10749579221010208, "learning_rate": 7.551118457517386e-05, "loss": 0.0055, "step": 18157 }, { "epoch": 3.514705882352941, "grad_norm": 0.041223492473363876, "learning_rate": 7.550874233317088e-05, "loss": 0.0052, "step": 18158 }, { "epoch": 3.5148993808049536, "grad_norm": 0.0497279167175293, "learning_rate": 7.550630001492147e-05, "loss": 0.0075, "step": 18159 }, { "epoch": 3.515092879256966, "grad_norm": 0.0764167457818985, "learning_rate": 7.550385762043466e-05, "loss": 0.007, "step": 18160 }, { "epoch": 3.5152863777089784, "grad_norm": 0.029336577281355858, "learning_rate": 7.550141514971955e-05, "loss": 0.0061, "step": 18161 }, { "epoch": 3.515479876160991, "grad_norm": 0.07420330494642258, "learning_rate": 7.549897260278527e-05, "loss": 0.0057, "step": 18162 }, { "epoch": 3.5156733746130033, "grad_norm": 0.04777205362915993, "learning_rate": 7.549652997964084e-05, "loss": 0.0064, "step": 18163 }, { "epoch": 3.5158668730650153, "grad_norm": 0.03446127846837044, "learning_rate": 7.549408728029537e-05, "loss": 0.0073, "step": 18164 }, { "epoch": 3.5160603715170278, "grad_norm": 0.044113174080848694, "learning_rate": 7.549164450475793e-05, "loss": 0.0059, "step": 18165 }, { "epoch": 3.51625386996904, "grad_norm": 0.03489472344517708, "learning_rate": 7.54892016530376e-05, "loss": 0.0054, "step": 18166 }, { "epoch": 3.5164473684210527, "grad_norm": 0.029451191425323486, "learning_rate": 7.548675872514346e-05, "loss": 0.0062, "step": 18167 }, { "epoch": 3.516640866873065, "grad_norm": 0.07530703395605087, "learning_rate": 7.54843157210846e-05, "loss": 0.0063, "step": 18168 }, { "epoch": 3.5168343653250775, "grad_norm": 0.03781628981232643, "learning_rate": 7.548187264087012e-05, "loss": 0.0073, "step": 18169 }, { "epoch": 3.5170278637770895, "grad_norm": 0.06298547983169556, "learning_rate": 7.547942948450907e-05, "loss": 0.0074, "step": 18170 }, { "epoch": 3.517221362229102, "grad_norm": 0.04924684390425682, "learning_rate": 7.547698625201057e-05, "loss": 0.0049, "step": 18171 }, { "epoch": 3.5174148606811144, "grad_norm": 0.06114165857434273, "learning_rate": 7.547454294338367e-05, "loss": 0.0076, "step": 18172 }, { "epoch": 3.517608359133127, "grad_norm": 0.06449726969003677, "learning_rate": 7.547209955863744e-05, "loss": 0.0065, "step": 18173 }, { "epoch": 3.5178018575851393, "grad_norm": 0.04793323948979378, "learning_rate": 7.546965609778102e-05, "loss": 0.0068, "step": 18174 }, { "epoch": 3.5179953560371517, "grad_norm": 0.06026642024517059, "learning_rate": 7.546721256082344e-05, "loss": 0.0058, "step": 18175 }, { "epoch": 3.518188854489164, "grad_norm": 0.042745865881443024, "learning_rate": 7.546476894777382e-05, "loss": 0.0049, "step": 18176 }, { "epoch": 3.5183823529411766, "grad_norm": 0.07960754632949829, "learning_rate": 7.546232525864122e-05, "loss": 0.0058, "step": 18177 }, { "epoch": 3.518575851393189, "grad_norm": 0.04695438593626022, "learning_rate": 7.545988149343475e-05, "loss": 0.0056, "step": 18178 }, { "epoch": 3.5187693498452015, "grad_norm": 0.08376381546258926, "learning_rate": 7.545743765216347e-05, "loss": 0.0059, "step": 18179 }, { "epoch": 3.5189628482972135, "grad_norm": 0.03990005701780319, "learning_rate": 7.545499373483647e-05, "loss": 0.0048, "step": 18180 }, { "epoch": 3.519156346749226, "grad_norm": 0.09906361997127533, "learning_rate": 7.545254974146286e-05, "loss": 0.0061, "step": 18181 }, { "epoch": 3.5193498452012384, "grad_norm": 0.04748541861772537, "learning_rate": 7.545010567205171e-05, "loss": 0.0061, "step": 18182 }, { "epoch": 3.519543343653251, "grad_norm": 0.07107385247945786, "learning_rate": 7.54476615266121e-05, "loss": 0.0056, "step": 18183 }, { "epoch": 3.5197368421052633, "grad_norm": 0.04824803024530411, "learning_rate": 7.544521730515313e-05, "loss": 0.0056, "step": 18184 }, { "epoch": 3.5199303405572753, "grad_norm": 0.051318131387233734, "learning_rate": 7.544277300768388e-05, "loss": 0.0083, "step": 18185 }, { "epoch": 3.5201238390092877, "grad_norm": 0.05000097677111626, "learning_rate": 7.544032863421343e-05, "loss": 0.0063, "step": 18186 }, { "epoch": 3.5203173374613, "grad_norm": 0.04221991077065468, "learning_rate": 7.543788418475088e-05, "loss": 0.0054, "step": 18187 }, { "epoch": 3.5205108359133126, "grad_norm": 0.053095750510692596, "learning_rate": 7.543543965930531e-05, "loss": 0.0064, "step": 18188 }, { "epoch": 3.520704334365325, "grad_norm": 0.05536776781082153, "learning_rate": 7.54329950578858e-05, "loss": 0.0064, "step": 18189 }, { "epoch": 3.5208978328173375, "grad_norm": 0.04828648641705513, "learning_rate": 7.543055038050147e-05, "loss": 0.0064, "step": 18190 }, { "epoch": 3.52109133126935, "grad_norm": 0.043852243572473526, "learning_rate": 7.542810562716137e-05, "loss": 0.0048, "step": 18191 }, { "epoch": 3.5212848297213624, "grad_norm": 0.03540433198213577, "learning_rate": 7.542566079787462e-05, "loss": 0.0076, "step": 18192 }, { "epoch": 3.521478328173375, "grad_norm": 0.04359421133995056, "learning_rate": 7.542321589265029e-05, "loss": 0.0066, "step": 18193 }, { "epoch": 3.5216718266253872, "grad_norm": 0.032467082142829895, "learning_rate": 7.542077091149747e-05, "loss": 0.005, "step": 18194 }, { "epoch": 3.5218653250773992, "grad_norm": 0.04751857742667198, "learning_rate": 7.541832585442527e-05, "loss": 0.0053, "step": 18195 }, { "epoch": 3.5220588235294117, "grad_norm": 0.04439042881131172, "learning_rate": 7.541588072144275e-05, "loss": 0.0061, "step": 18196 }, { "epoch": 3.522252321981424, "grad_norm": 0.042534153908491135, "learning_rate": 7.541343551255904e-05, "loss": 0.0077, "step": 18197 }, { "epoch": 3.5224458204334366, "grad_norm": 0.05546264350414276, "learning_rate": 7.541099022778319e-05, "loss": 0.0063, "step": 18198 }, { "epoch": 3.522639318885449, "grad_norm": 0.047602783888578415, "learning_rate": 7.54085448671243e-05, "loss": 0.0055, "step": 18199 }, { "epoch": 3.5228328173374615, "grad_norm": 0.06123075634241104, "learning_rate": 7.540609943059148e-05, "loss": 0.0063, "step": 18200 }, { "epoch": 3.5230263157894735, "grad_norm": 0.03577094152569771, "learning_rate": 7.54036539181938e-05, "loss": 0.007, "step": 18201 }, { "epoch": 3.523219814241486, "grad_norm": 0.039054032415151596, "learning_rate": 7.540120832994038e-05, "loss": 0.005, "step": 18202 }, { "epoch": 3.5234133126934983, "grad_norm": 0.038487739861011505, "learning_rate": 7.539876266584028e-05, "loss": 0.0048, "step": 18203 }, { "epoch": 3.5236068111455108, "grad_norm": 0.04652871564030647, "learning_rate": 7.539631692590262e-05, "loss": 0.0072, "step": 18204 }, { "epoch": 3.523800309597523, "grad_norm": 0.04490939900279045, "learning_rate": 7.539387111013645e-05, "loss": 0.0061, "step": 18205 }, { "epoch": 3.5239938080495357, "grad_norm": 0.05179164931178093, "learning_rate": 7.539142521855092e-05, "loss": 0.0048, "step": 18206 }, { "epoch": 3.524187306501548, "grad_norm": 0.05152837559580803, "learning_rate": 7.538897925115509e-05, "loss": 0.0065, "step": 18207 }, { "epoch": 3.5243808049535605, "grad_norm": 0.06858871877193451, "learning_rate": 7.538653320795805e-05, "loss": 0.0065, "step": 18208 }, { "epoch": 3.524574303405573, "grad_norm": 0.08127321302890778, "learning_rate": 7.538408708896891e-05, "loss": 0.0061, "step": 18209 }, { "epoch": 3.524767801857585, "grad_norm": 0.057240892201662064, "learning_rate": 7.538164089419676e-05, "loss": 0.0067, "step": 18210 }, { "epoch": 3.5249613003095974, "grad_norm": 0.09833300113677979, "learning_rate": 7.537919462365069e-05, "loss": 0.0059, "step": 18211 }, { "epoch": 3.52515479876161, "grad_norm": 0.03976469114422798, "learning_rate": 7.53767482773398e-05, "loss": 0.0075, "step": 18212 }, { "epoch": 3.5253482972136223, "grad_norm": 0.09143266081809998, "learning_rate": 7.537430185527317e-05, "loss": 0.0056, "step": 18213 }, { "epoch": 3.5255417956656347, "grad_norm": 0.025950511917471886, "learning_rate": 7.537185535745992e-05, "loss": 0.0072, "step": 18214 }, { "epoch": 3.525735294117647, "grad_norm": 0.09475652128458023, "learning_rate": 7.536940878390911e-05, "loss": 0.0062, "step": 18215 }, { "epoch": 3.525928792569659, "grad_norm": 0.039699431508779526, "learning_rate": 7.536696213462988e-05, "loss": 0.0065, "step": 18216 }, { "epoch": 3.5261222910216716, "grad_norm": 0.09091199934482574, "learning_rate": 7.53645154096313e-05, "loss": 0.0056, "step": 18217 }, { "epoch": 3.526315789473684, "grad_norm": 0.06908970326185226, "learning_rate": 7.536206860892246e-05, "loss": 0.0049, "step": 18218 }, { "epoch": 3.5265092879256965, "grad_norm": 0.07803363353013992, "learning_rate": 7.535962173251246e-05, "loss": 0.0061, "step": 18219 }, { "epoch": 3.526702786377709, "grad_norm": 0.07783443480730057, "learning_rate": 7.535717478041042e-05, "loss": 0.0057, "step": 18220 }, { "epoch": 3.5268962848297214, "grad_norm": 0.07062195241451263, "learning_rate": 7.535472775262543e-05, "loss": 0.0055, "step": 18221 }, { "epoch": 3.527089783281734, "grad_norm": 0.05765048786997795, "learning_rate": 7.535228064916655e-05, "loss": 0.0075, "step": 18222 }, { "epoch": 3.5272832817337463, "grad_norm": 0.07629669457674026, "learning_rate": 7.534983347004291e-05, "loss": 0.0072, "step": 18223 }, { "epoch": 3.5274767801857587, "grad_norm": 0.04508292302489281, "learning_rate": 7.534738621526361e-05, "loss": 0.0075, "step": 18224 }, { "epoch": 3.527670278637771, "grad_norm": 0.0919913575053215, "learning_rate": 7.534493888483774e-05, "loss": 0.0069, "step": 18225 }, { "epoch": 3.527863777089783, "grad_norm": 0.046969201415777206, "learning_rate": 7.534249147877441e-05, "loss": 0.0064, "step": 18226 }, { "epoch": 3.5280572755417956, "grad_norm": 0.09520424157381058, "learning_rate": 7.53400439970827e-05, "loss": 0.005, "step": 18227 }, { "epoch": 3.528250773993808, "grad_norm": 0.06839514523744583, "learning_rate": 7.533759643977172e-05, "loss": 0.0069, "step": 18228 }, { "epoch": 3.5284442724458205, "grad_norm": 0.07831335067749023, "learning_rate": 7.533514880685057e-05, "loss": 0.0063, "step": 18229 }, { "epoch": 3.528637770897833, "grad_norm": 0.13089527189731598, "learning_rate": 7.533270109832834e-05, "loss": 0.0068, "step": 18230 }, { "epoch": 3.5288312693498454, "grad_norm": 0.06898379325866699, "learning_rate": 7.533025331421414e-05, "loss": 0.0069, "step": 18231 }, { "epoch": 3.5290247678018574, "grad_norm": 0.13117338716983795, "learning_rate": 7.532780545451707e-05, "loss": 0.0063, "step": 18232 }, { "epoch": 3.52921826625387, "grad_norm": 0.05917183682322502, "learning_rate": 7.532535751924624e-05, "loss": 0.0065, "step": 18233 }, { "epoch": 3.5294117647058822, "grad_norm": 0.1086677834391594, "learning_rate": 7.532290950841072e-05, "loss": 0.0063, "step": 18234 }, { "epoch": 3.5296052631578947, "grad_norm": 0.10327544063329697, "learning_rate": 7.532046142201963e-05, "loss": 0.007, "step": 18235 }, { "epoch": 3.529798761609907, "grad_norm": 0.03267110884189606, "learning_rate": 7.531801326008209e-05, "loss": 0.0059, "step": 18236 }, { "epoch": 3.5299922600619196, "grad_norm": 0.12065873295068741, "learning_rate": 7.531556502260717e-05, "loss": 0.006, "step": 18237 }, { "epoch": 3.530185758513932, "grad_norm": 0.03918362781405449, "learning_rate": 7.531311670960397e-05, "loss": 0.007, "step": 18238 }, { "epoch": 3.5303792569659445, "grad_norm": 0.06399212777614594, "learning_rate": 7.531066832108163e-05, "loss": 0.0064, "step": 18239 }, { "epoch": 3.530572755417957, "grad_norm": 0.1339045763015747, "learning_rate": 7.530821985704924e-05, "loss": 0.0061, "step": 18240 }, { "epoch": 3.530766253869969, "grad_norm": 0.044078338891267776, "learning_rate": 7.530577131751588e-05, "loss": 0.0059, "step": 18241 }, { "epoch": 3.5309597523219813, "grad_norm": 0.13806220889091492, "learning_rate": 7.530332270249066e-05, "loss": 0.0054, "step": 18242 }, { "epoch": 3.531153250773994, "grad_norm": 0.08640138804912567, "learning_rate": 7.530087401198269e-05, "loss": 0.0064, "step": 18243 }, { "epoch": 3.531346749226006, "grad_norm": 0.08948534727096558, "learning_rate": 7.529842524600107e-05, "loss": 0.0062, "step": 18244 }, { "epoch": 3.5315402476780187, "grad_norm": 0.12918883562088013, "learning_rate": 7.52959764045549e-05, "loss": 0.0056, "step": 18245 }, { "epoch": 3.531733746130031, "grad_norm": 0.028965650126338005, "learning_rate": 7.529352748765331e-05, "loss": 0.0065, "step": 18246 }, { "epoch": 3.531927244582043, "grad_norm": 0.1221105232834816, "learning_rate": 7.529107849530538e-05, "loss": 0.0066, "step": 18247 }, { "epoch": 3.5321207430340555, "grad_norm": 0.08253230899572372, "learning_rate": 7.528862942752022e-05, "loss": 0.0062, "step": 18248 }, { "epoch": 3.532314241486068, "grad_norm": 0.05466992408037186, "learning_rate": 7.528618028430693e-05, "loss": 0.0063, "step": 18249 }, { "epoch": 3.5325077399380804, "grad_norm": 0.13311085104942322, "learning_rate": 7.528373106567461e-05, "loss": 0.0069, "step": 18250 }, { "epoch": 3.532701238390093, "grad_norm": 0.025493692606687546, "learning_rate": 7.52812817716324e-05, "loss": 0.0067, "step": 18251 }, { "epoch": 3.5328947368421053, "grad_norm": 0.10250760614871979, "learning_rate": 7.527883240218935e-05, "loss": 0.0054, "step": 18252 }, { "epoch": 3.5330882352941178, "grad_norm": 0.051486652344465256, "learning_rate": 7.527638295735463e-05, "loss": 0.0057, "step": 18253 }, { "epoch": 3.53328173374613, "grad_norm": 0.06834851205348969, "learning_rate": 7.527393343713731e-05, "loss": 0.0067, "step": 18254 }, { "epoch": 3.5334752321981426, "grad_norm": 0.0810205489397049, "learning_rate": 7.527148384154649e-05, "loss": 0.0068, "step": 18255 }, { "epoch": 3.5336687306501546, "grad_norm": 0.04483472555875778, "learning_rate": 7.526903417059128e-05, "loss": 0.0067, "step": 18256 }, { "epoch": 3.533862229102167, "grad_norm": 0.08854888379573822, "learning_rate": 7.526658442428081e-05, "loss": 0.0073, "step": 18257 }, { "epoch": 3.5340557275541795, "grad_norm": 0.022031143307685852, "learning_rate": 7.526413460262418e-05, "loss": 0.0064, "step": 18258 }, { "epoch": 3.534249226006192, "grad_norm": 0.07096030563116074, "learning_rate": 7.526168470563047e-05, "loss": 0.0066, "step": 18259 }, { "epoch": 3.5344427244582044, "grad_norm": 0.023964226245880127, "learning_rate": 7.525923473330882e-05, "loss": 0.0062, "step": 18260 }, { "epoch": 3.534636222910217, "grad_norm": 0.05296670272946358, "learning_rate": 7.525678468566833e-05, "loss": 0.0071, "step": 18261 }, { "epoch": 3.534829721362229, "grad_norm": 0.03915688395500183, "learning_rate": 7.525433456271811e-05, "loss": 0.0059, "step": 18262 }, { "epoch": 3.5350232198142413, "grad_norm": 0.06152620166540146, "learning_rate": 7.525188436446724e-05, "loss": 0.0088, "step": 18263 }, { "epoch": 3.5352167182662537, "grad_norm": 0.051866743713617325, "learning_rate": 7.524943409092489e-05, "loss": 0.0049, "step": 18264 }, { "epoch": 3.535410216718266, "grad_norm": 0.055482592433691025, "learning_rate": 7.524698374210011e-05, "loss": 0.0045, "step": 18265 }, { "epoch": 3.5356037151702786, "grad_norm": 0.036429937928915024, "learning_rate": 7.524453331800204e-05, "loss": 0.008, "step": 18266 }, { "epoch": 3.535797213622291, "grad_norm": 0.06479768455028534, "learning_rate": 7.524208281863979e-05, "loss": 0.0062, "step": 18267 }, { "epoch": 3.5359907120743035, "grad_norm": 0.02621743641793728, "learning_rate": 7.523963224402244e-05, "loss": 0.0068, "step": 18268 }, { "epoch": 3.536184210526316, "grad_norm": 0.062034253031015396, "learning_rate": 7.523718159415914e-05, "loss": 0.0067, "step": 18269 }, { "epoch": 3.5363777089783284, "grad_norm": 0.028440047055482864, "learning_rate": 7.5234730869059e-05, "loss": 0.0071, "step": 18270 }, { "epoch": 3.536571207430341, "grad_norm": 0.0622272714972496, "learning_rate": 7.523228006873109e-05, "loss": 0.006, "step": 18271 }, { "epoch": 3.536764705882353, "grad_norm": 0.025258950889110565, "learning_rate": 7.522982919318457e-05, "loss": 0.0061, "step": 18272 }, { "epoch": 3.5369582043343653, "grad_norm": 0.04331878945231438, "learning_rate": 7.522737824242851e-05, "loss": 0.0063, "step": 18273 }, { "epoch": 3.5371517027863777, "grad_norm": 0.041907887905836105, "learning_rate": 7.522492721647207e-05, "loss": 0.0066, "step": 18274 }, { "epoch": 3.53734520123839, "grad_norm": 0.036145348101854324, "learning_rate": 7.522247611532431e-05, "loss": 0.0048, "step": 18275 }, { "epoch": 3.5375386996904026, "grad_norm": 0.03853676840662956, "learning_rate": 7.522002493899437e-05, "loss": 0.0069, "step": 18276 }, { "epoch": 3.537732198142415, "grad_norm": 0.055411092936992645, "learning_rate": 7.521757368749137e-05, "loss": 0.0061, "step": 18277 }, { "epoch": 3.537925696594427, "grad_norm": 0.04426022991538048, "learning_rate": 7.52151223608244e-05, "loss": 0.0064, "step": 18278 }, { "epoch": 3.5381191950464395, "grad_norm": 0.07001196593046188, "learning_rate": 7.521267095900259e-05, "loss": 0.0075, "step": 18279 }, { "epoch": 3.538312693498452, "grad_norm": 0.07049839943647385, "learning_rate": 7.521021948203507e-05, "loss": 0.0061, "step": 18280 }, { "epoch": 3.5385061919504643, "grad_norm": 0.05322845280170441, "learning_rate": 7.52077679299309e-05, "loss": 0.0063, "step": 18281 }, { "epoch": 3.538699690402477, "grad_norm": 0.08874410390853882, "learning_rate": 7.520531630269924e-05, "loss": 0.0053, "step": 18282 }, { "epoch": 3.5388931888544892, "grad_norm": 0.05229548364877701, "learning_rate": 7.520286460034919e-05, "loss": 0.0069, "step": 18283 }, { "epoch": 3.5390866873065017, "grad_norm": 0.07213802635669708, "learning_rate": 7.520041282288987e-05, "loss": 0.0063, "step": 18284 }, { "epoch": 3.539280185758514, "grad_norm": 0.05657820776104927, "learning_rate": 7.51979609703304e-05, "loss": 0.0054, "step": 18285 }, { "epoch": 3.5394736842105265, "grad_norm": 0.049158837646245956, "learning_rate": 7.519550904267989e-05, "loss": 0.0058, "step": 18286 }, { "epoch": 3.5396671826625385, "grad_norm": 0.03748840466141701, "learning_rate": 7.519305703994746e-05, "loss": 0.0083, "step": 18287 }, { "epoch": 3.539860681114551, "grad_norm": 0.0201224684715271, "learning_rate": 7.519060496214219e-05, "loss": 0.0048, "step": 18288 }, { "epoch": 3.5400541795665634, "grad_norm": 0.07451929897069931, "learning_rate": 7.518815280927326e-05, "loss": 0.007, "step": 18289 }, { "epoch": 3.540247678018576, "grad_norm": 0.03680618852376938, "learning_rate": 7.518570058134972e-05, "loss": 0.0055, "step": 18290 }, { "epoch": 3.5404411764705883, "grad_norm": 0.06912712007761002, "learning_rate": 7.518324827838075e-05, "loss": 0.0051, "step": 18291 }, { "epoch": 3.5406346749226008, "grad_norm": 0.03503189980983734, "learning_rate": 7.518079590037543e-05, "loss": 0.0071, "step": 18292 }, { "epoch": 3.5408281733746128, "grad_norm": 0.09669298678636551, "learning_rate": 7.517834344734288e-05, "loss": 0.0062, "step": 18293 }, { "epoch": 3.541021671826625, "grad_norm": 0.03397577628493309, "learning_rate": 7.51758909192922e-05, "loss": 0.0066, "step": 18294 }, { "epoch": 3.5412151702786376, "grad_norm": 0.11362069845199585, "learning_rate": 7.517343831623253e-05, "loss": 0.0059, "step": 18295 }, { "epoch": 3.54140866873065, "grad_norm": 0.052597127854824066, "learning_rate": 7.517098563817302e-05, "loss": 0.0073, "step": 18296 }, { "epoch": 3.5416021671826625, "grad_norm": 0.08488284796476364, "learning_rate": 7.516853288512273e-05, "loss": 0.0058, "step": 18297 }, { "epoch": 3.541795665634675, "grad_norm": 0.0837065726518631, "learning_rate": 7.516608005709082e-05, "loss": 0.0063, "step": 18298 }, { "epoch": 3.5419891640866874, "grad_norm": 0.04869551956653595, "learning_rate": 7.516362715408638e-05, "loss": 0.0061, "step": 18299 }, { "epoch": 3.5421826625387, "grad_norm": 0.08546671271324158, "learning_rate": 7.516117417611855e-05, "loss": 0.0074, "step": 18300 }, { "epoch": 3.5423761609907123, "grad_norm": 0.04691997170448303, "learning_rate": 7.515872112319644e-05, "loss": 0.005, "step": 18301 }, { "epoch": 3.5425696594427247, "grad_norm": 0.06395156681537628, "learning_rate": 7.515626799532917e-05, "loss": 0.0059, "step": 18302 }, { "epoch": 3.5427631578947367, "grad_norm": 0.0645851120352745, "learning_rate": 7.515381479252587e-05, "loss": 0.0063, "step": 18303 }, { "epoch": 3.542956656346749, "grad_norm": 0.06405507773160934, "learning_rate": 7.515136151479564e-05, "loss": 0.0066, "step": 18304 }, { "epoch": 3.5431501547987616, "grad_norm": 0.05715462565422058, "learning_rate": 7.514890816214762e-05, "loss": 0.0064, "step": 18305 }, { "epoch": 3.543343653250774, "grad_norm": 0.05923984572291374, "learning_rate": 7.514645473459093e-05, "loss": 0.0056, "step": 18306 }, { "epoch": 3.5435371517027865, "grad_norm": 0.06745880842208862, "learning_rate": 7.514400123213467e-05, "loss": 0.0079, "step": 18307 }, { "epoch": 3.5437306501547985, "grad_norm": 0.11868289113044739, "learning_rate": 7.514154765478798e-05, "loss": 0.0076, "step": 18308 }, { "epoch": 3.543924148606811, "grad_norm": 0.05956775322556496, "learning_rate": 7.513909400256e-05, "loss": 0.0059, "step": 18309 }, { "epoch": 3.5441176470588234, "grad_norm": 0.1026962548494339, "learning_rate": 7.513664027545981e-05, "loss": 0.0066, "step": 18310 }, { "epoch": 3.544311145510836, "grad_norm": 0.08404505997896194, "learning_rate": 7.513418647349657e-05, "loss": 0.007, "step": 18311 }, { "epoch": 3.5445046439628483, "grad_norm": 0.07586995512247086, "learning_rate": 7.513173259667939e-05, "loss": 0.0058, "step": 18312 }, { "epoch": 3.5446981424148607, "grad_norm": 0.12090027332305908, "learning_rate": 7.512927864501736e-05, "loss": 0.0053, "step": 18313 }, { "epoch": 3.544891640866873, "grad_norm": 0.07900770008563995, "learning_rate": 7.512682461851964e-05, "loss": 0.0062, "step": 18314 }, { "epoch": 3.5450851393188856, "grad_norm": 0.08600431680679321, "learning_rate": 7.512437051719537e-05, "loss": 0.0063, "step": 18315 }, { "epoch": 3.545278637770898, "grad_norm": 0.046739958226680756, "learning_rate": 7.512191634105362e-05, "loss": 0.007, "step": 18316 }, { "epoch": 3.5454721362229105, "grad_norm": 0.07516299933195114, "learning_rate": 7.511946209010355e-05, "loss": 0.0055, "step": 18317 }, { "epoch": 3.5456656346749225, "grad_norm": 0.08266766369342804, "learning_rate": 7.51170077643543e-05, "loss": 0.0069, "step": 18318 }, { "epoch": 3.545859133126935, "grad_norm": 0.05160922557115555, "learning_rate": 7.511455336381495e-05, "loss": 0.0069, "step": 18319 }, { "epoch": 3.5460526315789473, "grad_norm": 0.09733914583921432, "learning_rate": 7.511209888849464e-05, "loss": 0.0057, "step": 18320 }, { "epoch": 3.54624613003096, "grad_norm": 0.0735008716583252, "learning_rate": 7.510964433840251e-05, "loss": 0.0054, "step": 18321 }, { "epoch": 3.5464396284829722, "grad_norm": 0.0513642244040966, "learning_rate": 7.510718971354768e-05, "loss": 0.0061, "step": 18322 }, { "epoch": 3.5466331269349847, "grad_norm": 0.08107348531484604, "learning_rate": 7.510473501393928e-05, "loss": 0.0063, "step": 18323 }, { "epoch": 3.5468266253869967, "grad_norm": 0.06975778937339783, "learning_rate": 7.510228023958643e-05, "loss": 0.0054, "step": 18324 }, { "epoch": 3.547020123839009, "grad_norm": 0.06373558938503265, "learning_rate": 7.509982539049825e-05, "loss": 0.0063, "step": 18325 }, { "epoch": 3.5472136222910216, "grad_norm": 0.07401625066995621, "learning_rate": 7.509737046668386e-05, "loss": 0.0066, "step": 18326 }, { "epoch": 3.547407120743034, "grad_norm": 0.03953161463141441, "learning_rate": 7.509491546815241e-05, "loss": 0.0058, "step": 18327 }, { "epoch": 3.5476006191950464, "grad_norm": 0.06296997517347336, "learning_rate": 7.509246039491302e-05, "loss": 0.0072, "step": 18328 }, { "epoch": 3.547794117647059, "grad_norm": 0.05594223737716675, "learning_rate": 7.50900052469748e-05, "loss": 0.0076, "step": 18329 }, { "epoch": 3.5479876160990713, "grad_norm": 0.04380316287279129, "learning_rate": 7.50875500243469e-05, "loss": 0.0077, "step": 18330 }, { "epoch": 3.5481811145510838, "grad_norm": 0.047873884439468384, "learning_rate": 7.508509472703845e-05, "loss": 0.0063, "step": 18331 }, { "epoch": 3.548374613003096, "grad_norm": 0.04186159744858742, "learning_rate": 7.508263935505854e-05, "loss": 0.0044, "step": 18332 }, { "epoch": 3.548568111455108, "grad_norm": 0.040458884090185165, "learning_rate": 7.508018390841635e-05, "loss": 0.0063, "step": 18333 }, { "epoch": 3.5487616099071206, "grad_norm": 0.04796158894896507, "learning_rate": 7.507772838712096e-05, "loss": 0.0064, "step": 18334 }, { "epoch": 3.548955108359133, "grad_norm": 0.034131359308958054, "learning_rate": 7.507527279118157e-05, "loss": 0.0056, "step": 18335 }, { "epoch": 3.5491486068111455, "grad_norm": 0.05787206441164017, "learning_rate": 7.507281712060722e-05, "loss": 0.007, "step": 18336 }, { "epoch": 3.549342105263158, "grad_norm": 0.031283825635910034, "learning_rate": 7.50703613754071e-05, "loss": 0.0073, "step": 18337 }, { "epoch": 3.5495356037151704, "grad_norm": 0.052629269659519196, "learning_rate": 7.506790555559029e-05, "loss": 0.0058, "step": 18338 }, { "epoch": 3.5497291021671824, "grad_norm": 0.044240325689315796, "learning_rate": 7.506544966116598e-05, "loss": 0.0065, "step": 18339 }, { "epoch": 3.549922600619195, "grad_norm": 0.12229162454605103, "learning_rate": 7.506299369214327e-05, "loss": 0.0056, "step": 18340 }, { "epoch": 3.5501160990712073, "grad_norm": 0.06653259694576263, "learning_rate": 7.506053764853129e-05, "loss": 0.0069, "step": 18341 }, { "epoch": 3.5503095975232197, "grad_norm": 0.13988476991653442, "learning_rate": 7.505808153033919e-05, "loss": 0.0075, "step": 18342 }, { "epoch": 3.550503095975232, "grad_norm": 0.15322208404541016, "learning_rate": 7.505562533757605e-05, "loss": 0.0065, "step": 18343 }, { "epoch": 3.5506965944272446, "grad_norm": 0.17013101279735565, "learning_rate": 7.505316907025107e-05, "loss": 0.0071, "step": 18344 }, { "epoch": 3.550890092879257, "grad_norm": 0.17071695625782013, "learning_rate": 7.505071272837333e-05, "loss": 0.0068, "step": 18345 }, { "epoch": 3.5510835913312695, "grad_norm": 0.09369142353534698, "learning_rate": 7.504825631195199e-05, "loss": 0.0073, "step": 18346 }, { "epoch": 3.551277089783282, "grad_norm": 0.19951793551445007, "learning_rate": 7.504579982099616e-05, "loss": 0.0064, "step": 18347 }, { "epoch": 3.5514705882352944, "grad_norm": 0.04647039622068405, "learning_rate": 7.504334325551501e-05, "loss": 0.0069, "step": 18348 }, { "epoch": 3.5516640866873064, "grad_norm": 0.1550212800502777, "learning_rate": 7.504088661551763e-05, "loss": 0.0067, "step": 18349 }, { "epoch": 3.551857585139319, "grad_norm": 0.10301551222801208, "learning_rate": 7.503842990101318e-05, "loss": 0.0063, "step": 18350 }, { "epoch": 3.5520510835913313, "grad_norm": 0.08269052952528, "learning_rate": 7.503597311201076e-05, "loss": 0.0068, "step": 18351 }, { "epoch": 3.5522445820433437, "grad_norm": 0.13716088235378265, "learning_rate": 7.503351624851956e-05, "loss": 0.0067, "step": 18352 }, { "epoch": 3.552438080495356, "grad_norm": 0.0580674409866333, "learning_rate": 7.503105931054867e-05, "loss": 0.008, "step": 18353 }, { "epoch": 3.5526315789473686, "grad_norm": 0.08416708558797836, "learning_rate": 7.502860229810724e-05, "loss": 0.0068, "step": 18354 }, { "epoch": 3.5528250773993806, "grad_norm": 0.06262586265802383, "learning_rate": 7.50261452112044e-05, "loss": 0.0071, "step": 18355 }, { "epoch": 3.553018575851393, "grad_norm": 0.04199325665831566, "learning_rate": 7.502368804984929e-05, "loss": 0.0054, "step": 18356 }, { "epoch": 3.5532120743034055, "grad_norm": 0.06358546018600464, "learning_rate": 7.502123081405104e-05, "loss": 0.0062, "step": 18357 }, { "epoch": 3.553405572755418, "grad_norm": 0.054550863802433014, "learning_rate": 7.50187735038188e-05, "loss": 0.0052, "step": 18358 }, { "epoch": 3.5535990712074303, "grad_norm": 0.05083802714943886, "learning_rate": 7.501631611916168e-05, "loss": 0.0067, "step": 18359 }, { "epoch": 3.553792569659443, "grad_norm": 0.06213022395968437, "learning_rate": 7.501385866008883e-05, "loss": 0.0067, "step": 18360 }, { "epoch": 3.5539860681114552, "grad_norm": 0.04443904384970665, "learning_rate": 7.501140112660938e-05, "loss": 0.0066, "step": 18361 }, { "epoch": 3.5541795665634677, "grad_norm": 0.034211449325084686, "learning_rate": 7.500894351873247e-05, "loss": 0.0057, "step": 18362 }, { "epoch": 3.55437306501548, "grad_norm": 0.031298454850912094, "learning_rate": 7.500648583646724e-05, "loss": 0.006, "step": 18363 }, { "epoch": 3.554566563467492, "grad_norm": 0.03841882944107056, "learning_rate": 7.500402807982283e-05, "loss": 0.0078, "step": 18364 }, { "epoch": 3.5547600619195046, "grad_norm": 0.029096385464072227, "learning_rate": 7.500157024880837e-05, "loss": 0.0065, "step": 18365 }, { "epoch": 3.554953560371517, "grad_norm": 0.06865373998880386, "learning_rate": 7.499911234343301e-05, "loss": 0.008, "step": 18366 }, { "epoch": 3.5551470588235294, "grad_norm": 0.04829441383481026, "learning_rate": 7.499665436370587e-05, "loss": 0.0064, "step": 18367 }, { "epoch": 3.555340557275542, "grad_norm": 0.0452653244137764, "learning_rate": 7.49941963096361e-05, "loss": 0.0065, "step": 18368 }, { "epoch": 3.5555340557275543, "grad_norm": 0.0910477340221405, "learning_rate": 7.499173818123283e-05, "loss": 0.008, "step": 18369 }, { "epoch": 3.5557275541795663, "grad_norm": 0.08896296471357346, "learning_rate": 7.49892799785052e-05, "loss": 0.0066, "step": 18370 }, { "epoch": 3.5559210526315788, "grad_norm": 0.04381313547492027, "learning_rate": 7.498682170146236e-05, "loss": 0.0061, "step": 18371 }, { "epoch": 3.556114551083591, "grad_norm": 0.0738212987780571, "learning_rate": 7.498436335011344e-05, "loss": 0.0078, "step": 18372 }, { "epoch": 3.5563080495356036, "grad_norm": 0.04842052608728409, "learning_rate": 7.498190492446759e-05, "loss": 0.0071, "step": 18373 }, { "epoch": 3.556501547987616, "grad_norm": 0.0706588551402092, "learning_rate": 7.497944642453392e-05, "loss": 0.0066, "step": 18374 }, { "epoch": 3.5566950464396285, "grad_norm": 0.05293954536318779, "learning_rate": 7.497698785032161e-05, "loss": 0.0073, "step": 18375 }, { "epoch": 3.556888544891641, "grad_norm": 0.07909562438726425, "learning_rate": 7.497452920183977e-05, "loss": 0.0062, "step": 18376 }, { "epoch": 3.5570820433436534, "grad_norm": 0.05155949667096138, "learning_rate": 7.497207047909756e-05, "loss": 0.0066, "step": 18377 }, { "epoch": 3.557275541795666, "grad_norm": 0.08859624713659286, "learning_rate": 7.49696116821041e-05, "loss": 0.0064, "step": 18378 }, { "epoch": 3.5574690402476783, "grad_norm": 0.0389089398086071, "learning_rate": 7.496715281086856e-05, "loss": 0.006, "step": 18379 }, { "epoch": 3.5576625386996903, "grad_norm": 0.1097523644566536, "learning_rate": 7.496469386540006e-05, "loss": 0.0061, "step": 18380 }, { "epoch": 3.5578560371517027, "grad_norm": 0.07736163586378098, "learning_rate": 7.496223484570776e-05, "loss": 0.0075, "step": 18381 }, { "epoch": 3.558049535603715, "grad_norm": 0.08854857832193375, "learning_rate": 7.495977575180077e-05, "loss": 0.0056, "step": 18382 }, { "epoch": 3.5582430340557276, "grad_norm": 0.10295742750167847, "learning_rate": 7.495731658368827e-05, "loss": 0.0069, "step": 18383 }, { "epoch": 3.55843653250774, "grad_norm": 0.02822837606072426, "learning_rate": 7.495485734137937e-05, "loss": 0.006, "step": 18384 }, { "epoch": 3.558630030959752, "grad_norm": 0.08726373314857483, "learning_rate": 7.495239802488323e-05, "loss": 0.0071, "step": 18385 }, { "epoch": 3.5588235294117645, "grad_norm": 0.04430317506194115, "learning_rate": 7.494993863420899e-05, "loss": 0.0066, "step": 18386 }, { "epoch": 3.559017027863777, "grad_norm": 0.04449035972356796, "learning_rate": 7.494747916936579e-05, "loss": 0.0087, "step": 18387 }, { "epoch": 3.5592105263157894, "grad_norm": 0.06885173916816711, "learning_rate": 7.49450196303628e-05, "loss": 0.0068, "step": 18388 }, { "epoch": 3.559404024767802, "grad_norm": 0.042419686913490295, "learning_rate": 7.494256001720912e-05, "loss": 0.0065, "step": 18389 }, { "epoch": 3.5595975232198143, "grad_norm": 0.048975974321365356, "learning_rate": 7.494010032991392e-05, "loss": 0.0069, "step": 18390 }, { "epoch": 3.5597910216718267, "grad_norm": 0.07488036900758743, "learning_rate": 7.493764056848635e-05, "loss": 0.006, "step": 18391 }, { "epoch": 3.559984520123839, "grad_norm": 0.12396934628486633, "learning_rate": 7.493518073293553e-05, "loss": 0.0079, "step": 18392 }, { "epoch": 3.5601780185758516, "grad_norm": 0.08099231868982315, "learning_rate": 7.493272082327063e-05, "loss": 0.0063, "step": 18393 }, { "epoch": 3.560371517027864, "grad_norm": 0.10918816179037094, "learning_rate": 7.493026083950077e-05, "loss": 0.0061, "step": 18394 }, { "epoch": 3.560565015479876, "grad_norm": 0.09811724722385406, "learning_rate": 7.492780078163514e-05, "loss": 0.0065, "step": 18395 }, { "epoch": 3.5607585139318885, "grad_norm": 0.06333158165216446, "learning_rate": 7.492534064968282e-05, "loss": 0.0074, "step": 18396 }, { "epoch": 3.560952012383901, "grad_norm": 0.12616360187530518, "learning_rate": 7.492288044365301e-05, "loss": 0.0055, "step": 18397 }, { "epoch": 3.5611455108359134, "grad_norm": 0.050609808415174484, "learning_rate": 7.492042016355486e-05, "loss": 0.0071, "step": 18398 }, { "epoch": 3.561339009287926, "grad_norm": 0.12446129322052002, "learning_rate": 7.491795980939748e-05, "loss": 0.0068, "step": 18399 }, { "epoch": 3.5615325077399382, "grad_norm": 0.08377160876989365, "learning_rate": 7.491549938119003e-05, "loss": 0.0075, "step": 18400 }, { "epoch": 3.5617260061919502, "grad_norm": 0.08650648593902588, "learning_rate": 7.491303887894167e-05, "loss": 0.0069, "step": 18401 }, { "epoch": 3.5619195046439627, "grad_norm": 0.12188725173473358, "learning_rate": 7.491057830266152e-05, "loss": 0.0063, "step": 18402 }, { "epoch": 3.562113003095975, "grad_norm": 0.028023846447467804, "learning_rate": 7.490811765235877e-05, "loss": 0.0069, "step": 18403 }, { "epoch": 3.5623065015479876, "grad_norm": 0.08395760506391525, "learning_rate": 7.490565692804252e-05, "loss": 0.0058, "step": 18404 }, { "epoch": 3.5625, "grad_norm": 0.12461710721254349, "learning_rate": 7.490319612972196e-05, "loss": 0.0063, "step": 18405 }, { "epoch": 3.5626934984520124, "grad_norm": 0.04119938984513283, "learning_rate": 7.490073525740623e-05, "loss": 0.0067, "step": 18406 }, { "epoch": 3.562886996904025, "grad_norm": 0.1414700299501419, "learning_rate": 7.489827431110446e-05, "loss": 0.0051, "step": 18407 }, { "epoch": 3.5630804953560373, "grad_norm": 0.05395016819238663, "learning_rate": 7.489581329082579e-05, "loss": 0.0053, "step": 18408 }, { "epoch": 3.5632739938080498, "grad_norm": 0.10141674429178238, "learning_rate": 7.48933521965794e-05, "loss": 0.0068, "step": 18409 }, { "epoch": 3.5634674922600618, "grad_norm": 0.10658086091279984, "learning_rate": 7.489089102837444e-05, "loss": 0.007, "step": 18410 }, { "epoch": 3.563660990712074, "grad_norm": 0.04439426213502884, "learning_rate": 7.488842978622003e-05, "loss": 0.0064, "step": 18411 }, { "epoch": 3.5638544891640866, "grad_norm": 0.10909809172153473, "learning_rate": 7.488596847012536e-05, "loss": 0.006, "step": 18412 }, { "epoch": 3.564047987616099, "grad_norm": 0.0543474480509758, "learning_rate": 7.488350708009954e-05, "loss": 0.0073, "step": 18413 }, { "epoch": 3.5642414860681115, "grad_norm": 0.08468058705329895, "learning_rate": 7.488104561615174e-05, "loss": 0.0055, "step": 18414 }, { "epoch": 3.564434984520124, "grad_norm": 0.0791608914732933, "learning_rate": 7.487858407829113e-05, "loss": 0.0062, "step": 18415 }, { "epoch": 3.564628482972136, "grad_norm": 0.03163665905594826, "learning_rate": 7.487612246652683e-05, "loss": 0.0065, "step": 18416 }, { "epoch": 3.5648219814241484, "grad_norm": 0.07820753753185272, "learning_rate": 7.487366078086801e-05, "loss": 0.0064, "step": 18417 }, { "epoch": 3.565015479876161, "grad_norm": 0.038631368428468704, "learning_rate": 7.487119902132382e-05, "loss": 0.0082, "step": 18418 }, { "epoch": 3.5652089783281733, "grad_norm": 0.06466619670391083, "learning_rate": 7.486873718790339e-05, "loss": 0.006, "step": 18419 }, { "epoch": 3.5654024767801857, "grad_norm": 0.05850628763437271, "learning_rate": 7.48662752806159e-05, "loss": 0.0049, "step": 18420 }, { "epoch": 3.565595975232198, "grad_norm": 0.06084489822387695, "learning_rate": 7.486381329947049e-05, "loss": 0.005, "step": 18421 }, { "epoch": 3.5657894736842106, "grad_norm": 0.035663705319166183, "learning_rate": 7.486135124447634e-05, "loss": 0.0076, "step": 18422 }, { "epoch": 3.565982972136223, "grad_norm": 0.09116619825363159, "learning_rate": 7.485888911564255e-05, "loss": 0.0053, "step": 18423 }, { "epoch": 3.5661764705882355, "grad_norm": 0.04077910631895065, "learning_rate": 7.485642691297832e-05, "loss": 0.007, "step": 18424 }, { "epoch": 3.566369969040248, "grad_norm": 0.08495611697435379, "learning_rate": 7.485396463649276e-05, "loss": 0.0066, "step": 18425 }, { "epoch": 3.56656346749226, "grad_norm": 0.07793379575014114, "learning_rate": 7.485150228619509e-05, "loss": 0.0076, "step": 18426 }, { "epoch": 3.5667569659442724, "grad_norm": 0.0362139567732811, "learning_rate": 7.484903986209442e-05, "loss": 0.0057, "step": 18427 }, { "epoch": 3.566950464396285, "grad_norm": 0.12028101086616516, "learning_rate": 7.484657736419989e-05, "loss": 0.0078, "step": 18428 }, { "epoch": 3.5671439628482973, "grad_norm": 0.05866042152047157, "learning_rate": 7.484411479252071e-05, "loss": 0.0063, "step": 18429 }, { "epoch": 3.5673374613003097, "grad_norm": 0.11561509966850281, "learning_rate": 7.484165214706596e-05, "loss": 0.0068, "step": 18430 }, { "epoch": 3.5675309597523217, "grad_norm": 0.08924659341573715, "learning_rate": 7.483918942784486e-05, "loss": 0.007, "step": 18431 }, { "epoch": 3.567724458204334, "grad_norm": 0.05144365876913071, "learning_rate": 7.483672663486653e-05, "loss": 0.0064, "step": 18432 }, { "epoch": 3.5679179566563466, "grad_norm": 0.0919174998998642, "learning_rate": 7.483426376814015e-05, "loss": 0.0085, "step": 18433 }, { "epoch": 3.568111455108359, "grad_norm": 0.04129854217171669, "learning_rate": 7.483180082767486e-05, "loss": 0.0064, "step": 18434 }, { "epoch": 3.5683049535603715, "grad_norm": 0.0648854449391365, "learning_rate": 7.482933781347981e-05, "loss": 0.0058, "step": 18435 }, { "epoch": 3.568498452012384, "grad_norm": 0.044721804559230804, "learning_rate": 7.482687472556419e-05, "loss": 0.0055, "step": 18436 }, { "epoch": 3.5686919504643964, "grad_norm": 0.06369674950838089, "learning_rate": 7.482441156393712e-05, "loss": 0.0058, "step": 18437 }, { "epoch": 3.568885448916409, "grad_norm": 0.048064880073070526, "learning_rate": 7.482194832860779e-05, "loss": 0.0074, "step": 18438 }, { "epoch": 3.5690789473684212, "grad_norm": 0.07569465786218643, "learning_rate": 7.481948501958532e-05, "loss": 0.0073, "step": 18439 }, { "epoch": 3.5692724458204337, "grad_norm": 0.0485651008784771, "learning_rate": 7.481702163687888e-05, "loss": 0.0069, "step": 18440 }, { "epoch": 3.5694659442724457, "grad_norm": 0.054097142070531845, "learning_rate": 7.481455818049765e-05, "loss": 0.0061, "step": 18441 }, { "epoch": 3.569659442724458, "grad_norm": 0.054177701473236084, "learning_rate": 7.481209465045078e-05, "loss": 0.006, "step": 18442 }, { "epoch": 3.5698529411764706, "grad_norm": 0.055668994784355164, "learning_rate": 7.480963104674743e-05, "loss": 0.0073, "step": 18443 }, { "epoch": 3.570046439628483, "grad_norm": 0.04353713616728783, "learning_rate": 7.480716736939675e-05, "loss": 0.0068, "step": 18444 }, { "epoch": 3.5702399380804954, "grad_norm": 0.04785269498825073, "learning_rate": 7.480470361840789e-05, "loss": 0.0076, "step": 18445 }, { "epoch": 3.570433436532508, "grad_norm": 0.033725619316101074, "learning_rate": 7.480223979379003e-05, "loss": 0.0052, "step": 18446 }, { "epoch": 3.57062693498452, "grad_norm": 0.05779169872403145, "learning_rate": 7.479977589555232e-05, "loss": 0.0067, "step": 18447 }, { "epoch": 3.5708204334365323, "grad_norm": 0.04686266556382179, "learning_rate": 7.479731192370392e-05, "loss": 0.0067, "step": 18448 }, { "epoch": 3.5710139318885448, "grad_norm": 0.038589369505643845, "learning_rate": 7.479484787825399e-05, "loss": 0.007, "step": 18449 }, { "epoch": 3.571207430340557, "grad_norm": 0.05731785669922829, "learning_rate": 7.479238375921169e-05, "loss": 0.0059, "step": 18450 }, { "epoch": 3.5714009287925697, "grad_norm": 0.04543726518750191, "learning_rate": 7.478991956658621e-05, "loss": 0.0067, "step": 18451 }, { "epoch": 3.571594427244582, "grad_norm": 0.06140659749507904, "learning_rate": 7.478745530038665e-05, "loss": 0.0058, "step": 18452 }, { "epoch": 3.5717879256965945, "grad_norm": 0.04198238253593445, "learning_rate": 7.478499096062224e-05, "loss": 0.0061, "step": 18453 }, { "epoch": 3.571981424148607, "grad_norm": 0.04146955907344818, "learning_rate": 7.478252654730209e-05, "loss": 0.0064, "step": 18454 }, { "epoch": 3.5721749226006194, "grad_norm": 0.04757623001933098, "learning_rate": 7.478006206043539e-05, "loss": 0.0068, "step": 18455 }, { "epoch": 3.5723684210526314, "grad_norm": 0.035064928233623505, "learning_rate": 7.477759750003128e-05, "loss": 0.0058, "step": 18456 }, { "epoch": 3.572561919504644, "grad_norm": 0.030514229089021683, "learning_rate": 7.477513286609896e-05, "loss": 0.0053, "step": 18457 }, { "epoch": 3.5727554179566563, "grad_norm": 0.03253266215324402, "learning_rate": 7.477266815864755e-05, "loss": 0.0065, "step": 18458 }, { "epoch": 3.5729489164086687, "grad_norm": 0.02063293196260929, "learning_rate": 7.477020337768622e-05, "loss": 0.0068, "step": 18459 }, { "epoch": 3.573142414860681, "grad_norm": 0.019040873274207115, "learning_rate": 7.476773852322417e-05, "loss": 0.0058, "step": 18460 }, { "epoch": 3.5733359133126936, "grad_norm": 0.02687789313495159, "learning_rate": 7.476527359527051e-05, "loss": 0.0063, "step": 18461 }, { "epoch": 3.5735294117647056, "grad_norm": 0.022610269486904144, "learning_rate": 7.476280859383447e-05, "loss": 0.0073, "step": 18462 }, { "epoch": 3.573722910216718, "grad_norm": 0.03151766583323479, "learning_rate": 7.476034351892515e-05, "loss": 0.0061, "step": 18463 }, { "epoch": 3.5739164086687305, "grad_norm": 0.044610701501369476, "learning_rate": 7.475787837055175e-05, "loss": 0.007, "step": 18464 }, { "epoch": 3.574109907120743, "grad_norm": 0.06864331662654877, "learning_rate": 7.475541314872342e-05, "loss": 0.0085, "step": 18465 }, { "epoch": 3.5743034055727554, "grad_norm": 0.031493619084358215, "learning_rate": 7.475294785344934e-05, "loss": 0.0049, "step": 18466 }, { "epoch": 3.574496904024768, "grad_norm": 0.05780242383480072, "learning_rate": 7.475048248473866e-05, "loss": 0.0064, "step": 18467 }, { "epoch": 3.5746904024767803, "grad_norm": 0.07292773574590683, "learning_rate": 7.474801704260056e-05, "loss": 0.0065, "step": 18468 }, { "epoch": 3.5748839009287927, "grad_norm": 0.04613741487264633, "learning_rate": 7.47455515270442e-05, "loss": 0.007, "step": 18469 }, { "epoch": 3.575077399380805, "grad_norm": 0.07948940247297287, "learning_rate": 7.474308593807874e-05, "loss": 0.0059, "step": 18470 }, { "epoch": 3.5752708978328176, "grad_norm": 0.05204262211918831, "learning_rate": 7.474062027571335e-05, "loss": 0.0064, "step": 18471 }, { "epoch": 3.5754643962848296, "grad_norm": 0.05522590130567551, "learning_rate": 7.473815453995721e-05, "loss": 0.005, "step": 18472 }, { "epoch": 3.575657894736842, "grad_norm": 0.07427331805229187, "learning_rate": 7.473568873081947e-05, "loss": 0.0063, "step": 18473 }, { "epoch": 3.5758513931888545, "grad_norm": 0.03686283528804779, "learning_rate": 7.47332228483093e-05, "loss": 0.0074, "step": 18474 }, { "epoch": 3.576044891640867, "grad_norm": 0.09207639843225479, "learning_rate": 7.473075689243586e-05, "loss": 0.0068, "step": 18475 }, { "epoch": 3.5762383900928794, "grad_norm": 0.031290989369153976, "learning_rate": 7.472829086320835e-05, "loss": 0.005, "step": 18476 }, { "epoch": 3.576431888544892, "grad_norm": 0.07632652670145035, "learning_rate": 7.472582476063589e-05, "loss": 0.0065, "step": 18477 }, { "epoch": 3.576625386996904, "grad_norm": 0.05495825409889221, "learning_rate": 7.472335858472769e-05, "loss": 0.0061, "step": 18478 }, { "epoch": 3.5768188854489162, "grad_norm": 0.034880682826042175, "learning_rate": 7.47208923354929e-05, "loss": 0.0078, "step": 18479 }, { "epoch": 3.5770123839009287, "grad_norm": 0.07221879065036774, "learning_rate": 7.471842601294069e-05, "loss": 0.0072, "step": 18480 }, { "epoch": 3.577205882352941, "grad_norm": 0.052283573895692825, "learning_rate": 7.471595961708022e-05, "loss": 0.0072, "step": 18481 }, { "epoch": 3.5773993808049536, "grad_norm": 0.04692032188177109, "learning_rate": 7.47134931479207e-05, "loss": 0.0071, "step": 18482 }, { "epoch": 3.577592879256966, "grad_norm": 0.0715356394648552, "learning_rate": 7.471102660547124e-05, "loss": 0.007, "step": 18483 }, { "epoch": 3.5777863777089784, "grad_norm": 0.043782785534858704, "learning_rate": 7.470855998974105e-05, "loss": 0.0068, "step": 18484 }, { "epoch": 3.577979876160991, "grad_norm": 0.05173223093152046, "learning_rate": 7.470609330073929e-05, "loss": 0.0064, "step": 18485 }, { "epoch": 3.5781733746130033, "grad_norm": 0.08367720991373062, "learning_rate": 7.470362653847512e-05, "loss": 0.0056, "step": 18486 }, { "epoch": 3.5783668730650153, "grad_norm": 0.037635043263435364, "learning_rate": 7.470115970295774e-05, "loss": 0.007, "step": 18487 }, { "epoch": 3.5785603715170278, "grad_norm": 0.09250788390636444, "learning_rate": 7.46986927941963e-05, "loss": 0.0052, "step": 18488 }, { "epoch": 3.57875386996904, "grad_norm": 0.06109792739152908, "learning_rate": 7.469622581219998e-05, "loss": 0.0067, "step": 18489 }, { "epoch": 3.5789473684210527, "grad_norm": 0.06970767676830292, "learning_rate": 7.469375875697793e-05, "loss": 0.0059, "step": 18490 }, { "epoch": 3.579140866873065, "grad_norm": 0.09044366329908371, "learning_rate": 7.469129162853934e-05, "loss": 0.0069, "step": 18491 }, { "epoch": 3.5793343653250775, "grad_norm": 0.04318970814347267, "learning_rate": 7.468882442689337e-05, "loss": 0.0068, "step": 18492 }, { "epoch": 3.5795278637770895, "grad_norm": 0.09017185866832733, "learning_rate": 7.468635715204922e-05, "loss": 0.0062, "step": 18493 }, { "epoch": 3.579721362229102, "grad_norm": 0.04380880668759346, "learning_rate": 7.468388980401605e-05, "loss": 0.0078, "step": 18494 }, { "epoch": 3.5799148606811144, "grad_norm": 0.10077540576457977, "learning_rate": 7.468142238280302e-05, "loss": 0.0073, "step": 18495 }, { "epoch": 3.580108359133127, "grad_norm": 0.047415945678949356, "learning_rate": 7.467895488841932e-05, "loss": 0.0058, "step": 18496 }, { "epoch": 3.5803018575851393, "grad_norm": 0.09509176760911942, "learning_rate": 7.467648732087409e-05, "loss": 0.0065, "step": 18497 }, { "epoch": 3.5804953560371517, "grad_norm": 0.051611822098493576, "learning_rate": 7.467401968017654e-05, "loss": 0.0055, "step": 18498 }, { "epoch": 3.580688854489164, "grad_norm": 0.07564603537321091, "learning_rate": 7.467155196633584e-05, "loss": 0.006, "step": 18499 }, { "epoch": 3.5808823529411766, "grad_norm": 0.07856755703687668, "learning_rate": 7.466908417936115e-05, "loss": 0.0052, "step": 18500 }, { "epoch": 3.581075851393189, "grad_norm": 0.04879014566540718, "learning_rate": 7.466661631926168e-05, "loss": 0.0063, "step": 18501 }, { "epoch": 3.5812693498452015, "grad_norm": 0.1054585799574852, "learning_rate": 7.466414838604653e-05, "loss": 0.0056, "step": 18502 }, { "epoch": 3.5814628482972135, "grad_norm": 0.0328856036067009, "learning_rate": 7.466168037972495e-05, "loss": 0.0058, "step": 18503 }, { "epoch": 3.581656346749226, "grad_norm": 0.1137625128030777, "learning_rate": 7.465921230030608e-05, "loss": 0.0068, "step": 18504 }, { "epoch": 3.5818498452012384, "grad_norm": 0.02303411066532135, "learning_rate": 7.46567441477991e-05, "loss": 0.0052, "step": 18505 }, { "epoch": 3.582043343653251, "grad_norm": 0.09612088650465012, "learning_rate": 7.465427592221319e-05, "loss": 0.0074, "step": 18506 }, { "epoch": 3.5822368421052633, "grad_norm": 0.06219276040792465, "learning_rate": 7.465180762355754e-05, "loss": 0.008, "step": 18507 }, { "epoch": 3.5824303405572753, "grad_norm": 0.07006066292524338, "learning_rate": 7.464933925184128e-05, "loss": 0.0072, "step": 18508 }, { "epoch": 3.5826238390092877, "grad_norm": 0.08833177387714386, "learning_rate": 7.464687080707364e-05, "loss": 0.0068, "step": 18509 }, { "epoch": 3.5828173374613, "grad_norm": 0.03954358398914337, "learning_rate": 7.464440228926378e-05, "loss": 0.005, "step": 18510 }, { "epoch": 3.5830108359133126, "grad_norm": 0.09053459763526917, "learning_rate": 7.464193369842086e-05, "loss": 0.0054, "step": 18511 }, { "epoch": 3.583204334365325, "grad_norm": 0.04231644049286842, "learning_rate": 7.463946503455407e-05, "loss": 0.0073, "step": 18512 }, { "epoch": 3.5833978328173375, "grad_norm": 0.0789642482995987, "learning_rate": 7.46369962976726e-05, "loss": 0.0075, "step": 18513 }, { "epoch": 3.58359133126935, "grad_norm": 0.053940530866384506, "learning_rate": 7.46345274877856e-05, "loss": 0.0054, "step": 18514 }, { "epoch": 3.5837848297213624, "grad_norm": 0.05093823000788689, "learning_rate": 7.463205860490229e-05, "loss": 0.0068, "step": 18515 }, { "epoch": 3.583978328173375, "grad_norm": 0.08229531347751617, "learning_rate": 7.462958964903181e-05, "loss": 0.0079, "step": 18516 }, { "epoch": 3.5841718266253872, "grad_norm": 0.05446753650903702, "learning_rate": 7.462712062018333e-05, "loss": 0.0055, "step": 18517 }, { "epoch": 3.5843653250773992, "grad_norm": 0.05757732689380646, "learning_rate": 7.462465151836608e-05, "loss": 0.0051, "step": 18518 }, { "epoch": 3.5845588235294117, "grad_norm": 0.05724284052848816, "learning_rate": 7.46221823435892e-05, "loss": 0.0068, "step": 18519 }, { "epoch": 3.584752321981424, "grad_norm": 0.04333844408392906, "learning_rate": 7.46197130958619e-05, "loss": 0.0058, "step": 18520 }, { "epoch": 3.5849458204334366, "grad_norm": 0.05654418468475342, "learning_rate": 7.461724377519333e-05, "loss": 0.006, "step": 18521 }, { "epoch": 3.585139318885449, "grad_norm": 0.06804519891738892, "learning_rate": 7.461477438159266e-05, "loss": 0.0056, "step": 18522 }, { "epoch": 3.5853328173374615, "grad_norm": 0.029213452711701393, "learning_rate": 7.461230491506912e-05, "loss": 0.0076, "step": 18523 }, { "epoch": 3.5855263157894735, "grad_norm": 0.06194904074072838, "learning_rate": 7.460983537563186e-05, "loss": 0.006, "step": 18524 }, { "epoch": 3.585719814241486, "grad_norm": 0.038807280361652374, "learning_rate": 7.460736576329004e-05, "loss": 0.0063, "step": 18525 }, { "epoch": 3.5859133126934983, "grad_norm": 0.04844176396727562, "learning_rate": 7.46048960780529e-05, "loss": 0.008, "step": 18526 }, { "epoch": 3.5861068111455108, "grad_norm": 0.03047299198806286, "learning_rate": 7.460242631992957e-05, "loss": 0.0073, "step": 18527 }, { "epoch": 3.586300309597523, "grad_norm": 0.03052816167473793, "learning_rate": 7.459995648892923e-05, "loss": 0.0046, "step": 18528 }, { "epoch": 3.5864938080495357, "grad_norm": 0.020613713189959526, "learning_rate": 7.459748658506111e-05, "loss": 0.0055, "step": 18529 }, { "epoch": 3.586687306501548, "grad_norm": 0.045978281646966934, "learning_rate": 7.459501660833435e-05, "loss": 0.0066, "step": 18530 }, { "epoch": 3.5868808049535605, "grad_norm": 0.03577345609664917, "learning_rate": 7.459254655875815e-05, "loss": 0.0073, "step": 18531 }, { "epoch": 3.587074303405573, "grad_norm": 0.031938355416059494, "learning_rate": 7.459007643634169e-05, "loss": 0.0039, "step": 18532 }, { "epoch": 3.587267801857585, "grad_norm": 0.051633693277835846, "learning_rate": 7.458760624109416e-05, "loss": 0.0079, "step": 18533 }, { "epoch": 3.5874613003095974, "grad_norm": 0.022157395258545876, "learning_rate": 7.458513597302473e-05, "loss": 0.0056, "step": 18534 }, { "epoch": 3.58765479876161, "grad_norm": 0.061525072902441025, "learning_rate": 7.458266563214258e-05, "loss": 0.006, "step": 18535 }, { "epoch": 3.5878482972136223, "grad_norm": 0.04423310235142708, "learning_rate": 7.45801952184569e-05, "loss": 0.0066, "step": 18536 }, { "epoch": 3.5880417956656347, "grad_norm": 0.03780834749341011, "learning_rate": 7.45777247319769e-05, "loss": 0.0065, "step": 18537 }, { "epoch": 3.588235294117647, "grad_norm": 0.07650479674339294, "learning_rate": 7.457525417271174e-05, "loss": 0.006, "step": 18538 }, { "epoch": 3.588428792569659, "grad_norm": 0.0405048169195652, "learning_rate": 7.457278354067061e-05, "loss": 0.0054, "step": 18539 }, { "epoch": 3.5886222910216716, "grad_norm": 0.06670338660478592, "learning_rate": 7.457031283586268e-05, "loss": 0.0051, "step": 18540 }, { "epoch": 3.588815789473684, "grad_norm": 0.06059376522898674, "learning_rate": 7.456784205829716e-05, "loss": 0.0066, "step": 18541 }, { "epoch": 3.5890092879256965, "grad_norm": 0.08306223154067993, "learning_rate": 7.456537120798323e-05, "loss": 0.0072, "step": 18542 }, { "epoch": 3.589202786377709, "grad_norm": 0.057899706065654755, "learning_rate": 7.456290028493006e-05, "loss": 0.0069, "step": 18543 }, { "epoch": 3.5893962848297214, "grad_norm": 0.059743136167526245, "learning_rate": 7.456042928914685e-05, "loss": 0.0064, "step": 18544 }, { "epoch": 3.589589783281734, "grad_norm": 0.0559733584523201, "learning_rate": 7.455795822064278e-05, "loss": 0.0062, "step": 18545 }, { "epoch": 3.5897832817337463, "grad_norm": 0.027681345120072365, "learning_rate": 7.455548707942705e-05, "loss": 0.007, "step": 18546 }, { "epoch": 3.5899767801857587, "grad_norm": 0.07150402665138245, "learning_rate": 7.455301586550882e-05, "loss": 0.0061, "step": 18547 }, { "epoch": 3.590170278637771, "grad_norm": 0.03458646684885025, "learning_rate": 7.455054457889731e-05, "loss": 0.0066, "step": 18548 }, { "epoch": 3.590363777089783, "grad_norm": 0.05277124047279358, "learning_rate": 7.45480732196017e-05, "loss": 0.0064, "step": 18549 }, { "epoch": 3.5905572755417956, "grad_norm": 0.05837381258606911, "learning_rate": 7.454560178763116e-05, "loss": 0.0072, "step": 18550 }, { "epoch": 3.590750773993808, "grad_norm": 0.04026069864630699, "learning_rate": 7.454313028299489e-05, "loss": 0.0072, "step": 18551 }, { "epoch": 3.5909442724458205, "grad_norm": 0.0761963427066803, "learning_rate": 7.454065870570209e-05, "loss": 0.0056, "step": 18552 }, { "epoch": 3.591137770897833, "grad_norm": 0.06384066492319107, "learning_rate": 7.453818705576191e-05, "loss": 0.0074, "step": 18553 }, { "epoch": 3.5913312693498454, "grad_norm": 0.06313551217317581, "learning_rate": 7.453571533318357e-05, "loss": 0.0065, "step": 18554 }, { "epoch": 3.5915247678018574, "grad_norm": 0.08264485001564026, "learning_rate": 7.453324353797626e-05, "loss": 0.0064, "step": 18555 }, { "epoch": 3.59171826625387, "grad_norm": 0.05828423053026199, "learning_rate": 7.453077167014917e-05, "loss": 0.0061, "step": 18556 }, { "epoch": 3.5919117647058822, "grad_norm": 0.10228528082370758, "learning_rate": 7.452829972971148e-05, "loss": 0.0065, "step": 18557 }, { "epoch": 3.5921052631578947, "grad_norm": 0.047860026359558105, "learning_rate": 7.452582771667238e-05, "loss": 0.005, "step": 18558 }, { "epoch": 3.592298761609907, "grad_norm": 0.07259977608919144, "learning_rate": 7.452335563104106e-05, "loss": 0.0059, "step": 18559 }, { "epoch": 3.5924922600619196, "grad_norm": 0.08234228938817978, "learning_rate": 7.452088347282671e-05, "loss": 0.0076, "step": 18560 }, { "epoch": 3.592685758513932, "grad_norm": 0.027515675872564316, "learning_rate": 7.451841124203854e-05, "loss": 0.0077, "step": 18561 }, { "epoch": 3.5928792569659445, "grad_norm": 0.07594515383243561, "learning_rate": 7.451593893868573e-05, "loss": 0.0065, "step": 18562 }, { "epoch": 3.593072755417957, "grad_norm": 0.0302827600389719, "learning_rate": 7.451346656277746e-05, "loss": 0.0057, "step": 18563 }, { "epoch": 3.593266253869969, "grad_norm": 0.04756931960582733, "learning_rate": 7.451099411432293e-05, "loss": 0.0067, "step": 18564 }, { "epoch": 3.5934597523219813, "grad_norm": 0.05308513715863228, "learning_rate": 7.450852159333134e-05, "loss": 0.0046, "step": 18565 }, { "epoch": 3.593653250773994, "grad_norm": 0.039858508855104446, "learning_rate": 7.450604899981185e-05, "loss": 0.0057, "step": 18566 }, { "epoch": 3.593846749226006, "grad_norm": 0.0464649461209774, "learning_rate": 7.450357633377369e-05, "loss": 0.0081, "step": 18567 }, { "epoch": 3.5940402476780187, "grad_norm": 0.043153807520866394, "learning_rate": 7.450110359522604e-05, "loss": 0.0058, "step": 18568 }, { "epoch": 3.594233746130031, "grad_norm": 0.030563868582248688, "learning_rate": 7.44986307841781e-05, "loss": 0.0081, "step": 18569 }, { "epoch": 3.594427244582043, "grad_norm": 0.02575734071433544, "learning_rate": 7.449615790063903e-05, "loss": 0.0059, "step": 18570 }, { "epoch": 3.5946207430340555, "grad_norm": 0.039727531373500824, "learning_rate": 7.449368494461808e-05, "loss": 0.0048, "step": 18571 }, { "epoch": 3.594814241486068, "grad_norm": 0.0317794606089592, "learning_rate": 7.44912119161244e-05, "loss": 0.0071, "step": 18572 }, { "epoch": 3.5950077399380804, "grad_norm": 0.03295572102069855, "learning_rate": 7.448873881516719e-05, "loss": 0.0066, "step": 18573 }, { "epoch": 3.595201238390093, "grad_norm": 0.025959786027669907, "learning_rate": 7.448626564175565e-05, "loss": 0.0057, "step": 18574 }, { "epoch": 3.5953947368421053, "grad_norm": 0.01807548478245735, "learning_rate": 7.448379239589899e-05, "loss": 0.0042, "step": 18575 }, { "epoch": 3.5955882352941178, "grad_norm": 0.04334533214569092, "learning_rate": 7.448131907760639e-05, "loss": 0.0066, "step": 18576 }, { "epoch": 3.59578173374613, "grad_norm": 0.016244830563664436, "learning_rate": 7.447884568688703e-05, "loss": 0.0065, "step": 18577 }, { "epoch": 3.5959752321981426, "grad_norm": 0.04185736924409866, "learning_rate": 7.447637222375014e-05, "loss": 0.0063, "step": 18578 }, { "epoch": 3.5961687306501546, "grad_norm": 0.03357566148042679, "learning_rate": 7.447389868820488e-05, "loss": 0.006, "step": 18579 }, { "epoch": 3.596362229102167, "grad_norm": 0.04586989805102348, "learning_rate": 7.447142508026047e-05, "loss": 0.007, "step": 18580 }, { "epoch": 3.5965557275541795, "grad_norm": 0.04489648714661598, "learning_rate": 7.44689513999261e-05, "loss": 0.0056, "step": 18581 }, { "epoch": 3.596749226006192, "grad_norm": 0.053580284118652344, "learning_rate": 7.446647764721097e-05, "loss": 0.0068, "step": 18582 }, { "epoch": 3.5969427244582044, "grad_norm": 0.04234476014971733, "learning_rate": 7.446400382212427e-05, "loss": 0.0062, "step": 18583 }, { "epoch": 3.597136222910217, "grad_norm": 0.03647712618112564, "learning_rate": 7.44615299246752e-05, "loss": 0.0057, "step": 18584 }, { "epoch": 3.597329721362229, "grad_norm": 0.03454233333468437, "learning_rate": 7.445905595487296e-05, "loss": 0.0069, "step": 18585 }, { "epoch": 3.5975232198142413, "grad_norm": 0.038808200508356094, "learning_rate": 7.445658191272672e-05, "loss": 0.0068, "step": 18586 }, { "epoch": 3.5977167182662537, "grad_norm": 0.04763620346784592, "learning_rate": 7.445410779824573e-05, "loss": 0.0055, "step": 18587 }, { "epoch": 3.597910216718266, "grad_norm": 0.053334806114435196, "learning_rate": 7.445163361143915e-05, "loss": 0.0057, "step": 18588 }, { "epoch": 3.5981037151702786, "grad_norm": 0.061576277017593384, "learning_rate": 7.44491593523162e-05, "loss": 0.0065, "step": 18589 }, { "epoch": 3.598297213622291, "grad_norm": 0.06773164123296738, "learning_rate": 7.444668502088607e-05, "loss": 0.0079, "step": 18590 }, { "epoch": 3.5984907120743035, "grad_norm": 0.07225386053323746, "learning_rate": 7.444421061715794e-05, "loss": 0.0056, "step": 18591 }, { "epoch": 3.598684210526316, "grad_norm": 0.06390702724456787, "learning_rate": 7.444173614114103e-05, "loss": 0.0071, "step": 18592 }, { "epoch": 3.5988777089783284, "grad_norm": 0.06159248203039169, "learning_rate": 7.443926159284455e-05, "loss": 0.0057, "step": 18593 }, { "epoch": 3.599071207430341, "grad_norm": 0.09812700003385544, "learning_rate": 7.443678697227769e-05, "loss": 0.0056, "step": 18594 }, { "epoch": 3.599264705882353, "grad_norm": 0.028845233842730522, "learning_rate": 7.443431227944963e-05, "loss": 0.0063, "step": 18595 }, { "epoch": 3.5994582043343653, "grad_norm": 0.11302053928375244, "learning_rate": 7.443183751436958e-05, "loss": 0.0069, "step": 18596 }, { "epoch": 3.5996517027863777, "grad_norm": 0.03171125426888466, "learning_rate": 7.442936267704675e-05, "loss": 0.0058, "step": 18597 }, { "epoch": 3.59984520123839, "grad_norm": 0.08863037824630737, "learning_rate": 7.442688776749032e-05, "loss": 0.0055, "step": 18598 }, { "epoch": 3.6000386996904026, "grad_norm": 0.06977304816246033, "learning_rate": 7.442441278570954e-05, "loss": 0.0067, "step": 18599 }, { "epoch": 3.600232198142415, "grad_norm": 0.06442361325025558, "learning_rate": 7.442193773171358e-05, "loss": 0.0067, "step": 18600 }, { "epoch": 3.600425696594427, "grad_norm": 0.0939580425620079, "learning_rate": 7.441946260551163e-05, "loss": 0.006, "step": 18601 }, { "epoch": 3.6006191950464395, "grad_norm": 0.06001696363091469, "learning_rate": 7.441698740711289e-05, "loss": 0.0055, "step": 18602 }, { "epoch": 3.600812693498452, "grad_norm": 0.08477574586868286, "learning_rate": 7.44145121365266e-05, "loss": 0.0051, "step": 18603 }, { "epoch": 3.6010061919504643, "grad_norm": 0.08590684086084366, "learning_rate": 7.441203679376192e-05, "loss": 0.0074, "step": 18604 }, { "epoch": 3.601199690402477, "grad_norm": 0.09079485386610031, "learning_rate": 7.440956137882807e-05, "loss": 0.0072, "step": 18605 }, { "epoch": 3.6013931888544892, "grad_norm": 0.07832279801368713, "learning_rate": 7.440708589173426e-05, "loss": 0.0067, "step": 18606 }, { "epoch": 3.6015866873065017, "grad_norm": 0.07491088658571243, "learning_rate": 7.440461033248968e-05, "loss": 0.0055, "step": 18607 }, { "epoch": 3.601780185758514, "grad_norm": 0.05092005431652069, "learning_rate": 7.440213470110354e-05, "loss": 0.005, "step": 18608 }, { "epoch": 3.6019736842105265, "grad_norm": 0.0902264192700386, "learning_rate": 7.439965899758505e-05, "loss": 0.006, "step": 18609 }, { "epoch": 3.6021671826625385, "grad_norm": 0.03830763325095177, "learning_rate": 7.439718322194341e-05, "loss": 0.0056, "step": 18610 }, { "epoch": 3.602360681114551, "grad_norm": 0.08199160546064377, "learning_rate": 7.439470737418782e-05, "loss": 0.0062, "step": 18611 }, { "epoch": 3.6025541795665634, "grad_norm": 0.07093063741922379, "learning_rate": 7.43922314543275e-05, "loss": 0.0059, "step": 18612 }, { "epoch": 3.602747678018576, "grad_norm": 0.04890977591276169, "learning_rate": 7.438975546237161e-05, "loss": 0.0049, "step": 18613 }, { "epoch": 3.6029411764705883, "grad_norm": 0.0880095362663269, "learning_rate": 7.438727939832941e-05, "loss": 0.0062, "step": 18614 }, { "epoch": 3.6031346749226008, "grad_norm": 0.03253784030675888, "learning_rate": 7.438480326221008e-05, "loss": 0.0066, "step": 18615 }, { "epoch": 3.6033281733746128, "grad_norm": 0.04632198438048363, "learning_rate": 7.438232705402284e-05, "loss": 0.0075, "step": 18616 }, { "epoch": 3.603521671826625, "grad_norm": 0.04366348311305046, "learning_rate": 7.437985077377685e-05, "loss": 0.0065, "step": 18617 }, { "epoch": 3.6037151702786376, "grad_norm": 0.043177757412195206, "learning_rate": 7.437737442148138e-05, "loss": 0.0056, "step": 18618 }, { "epoch": 3.60390866873065, "grad_norm": 0.051784444600343704, "learning_rate": 7.43748979971456e-05, "loss": 0.0065, "step": 18619 }, { "epoch": 3.6041021671826625, "grad_norm": 0.036902010440826416, "learning_rate": 7.437242150077872e-05, "loss": 0.0064, "step": 18620 }, { "epoch": 3.604295665634675, "grad_norm": 0.0691872239112854, "learning_rate": 7.436994493238995e-05, "loss": 0.0066, "step": 18621 }, { "epoch": 3.6044891640866874, "grad_norm": 0.03360767662525177, "learning_rate": 7.43674682919885e-05, "loss": 0.0063, "step": 18622 }, { "epoch": 3.6046826625387, "grad_norm": 0.060222748667001724, "learning_rate": 7.436499157958356e-05, "loss": 0.0059, "step": 18623 }, { "epoch": 3.6048761609907123, "grad_norm": 0.021350983530282974, "learning_rate": 7.436251479518436e-05, "loss": 0.0058, "step": 18624 }, { "epoch": 3.6050696594427247, "grad_norm": 0.07320785522460938, "learning_rate": 7.436003793880012e-05, "loss": 0.0065, "step": 18625 }, { "epoch": 3.6052631578947367, "grad_norm": 0.05496475100517273, "learning_rate": 7.435756101044003e-05, "loss": 0.0056, "step": 18626 }, { "epoch": 3.605456656346749, "grad_norm": 0.06194509565830231, "learning_rate": 7.435508401011328e-05, "loss": 0.0053, "step": 18627 }, { "epoch": 3.6056501547987616, "grad_norm": 0.07275306433439255, "learning_rate": 7.435260693782909e-05, "loss": 0.0066, "step": 18628 }, { "epoch": 3.605843653250774, "grad_norm": 0.04162410646677017, "learning_rate": 7.43501297935967e-05, "loss": 0.0069, "step": 18629 }, { "epoch": 3.6060371517027865, "grad_norm": 0.07180046290159225, "learning_rate": 7.434765257742527e-05, "loss": 0.0068, "step": 18630 }, { "epoch": 3.6062306501547985, "grad_norm": 0.052212752401828766, "learning_rate": 7.434517528932405e-05, "loss": 0.0053, "step": 18631 }, { "epoch": 3.606424148606811, "grad_norm": 0.04797132685780525, "learning_rate": 7.434269792930223e-05, "loss": 0.0062, "step": 18632 }, { "epoch": 3.6066176470588234, "grad_norm": 0.07603171467781067, "learning_rate": 7.434022049736903e-05, "loss": 0.0058, "step": 18633 }, { "epoch": 3.606811145510836, "grad_norm": 0.07321164757013321, "learning_rate": 7.433774299353367e-05, "loss": 0.0055, "step": 18634 }, { "epoch": 3.6070046439628483, "grad_norm": 0.07014573365449905, "learning_rate": 7.433526541780533e-05, "loss": 0.0063, "step": 18635 }, { "epoch": 3.6071981424148607, "grad_norm": 0.04756984859704971, "learning_rate": 7.433278777019324e-05, "loss": 0.0055, "step": 18636 }, { "epoch": 3.607391640866873, "grad_norm": 0.07000327855348587, "learning_rate": 7.433031005070659e-05, "loss": 0.006, "step": 18637 }, { "epoch": 3.6075851393188856, "grad_norm": 0.04188859835267067, "learning_rate": 7.432783225935462e-05, "loss": 0.006, "step": 18638 }, { "epoch": 3.607778637770898, "grad_norm": 0.047385796904563904, "learning_rate": 7.432535439614653e-05, "loss": 0.0056, "step": 18639 }, { "epoch": 3.6079721362229105, "grad_norm": 0.06719445437192917, "learning_rate": 7.432287646109157e-05, "loss": 0.0053, "step": 18640 }, { "epoch": 3.6081656346749225, "grad_norm": 0.019961358979344368, "learning_rate": 7.43203984541989e-05, "loss": 0.0075, "step": 18641 }, { "epoch": 3.608359133126935, "grad_norm": 0.06061409413814545, "learning_rate": 7.431792037547772e-05, "loss": 0.0053, "step": 18642 }, { "epoch": 3.6085526315789473, "grad_norm": 0.024152856320142746, "learning_rate": 7.431544222493729e-05, "loss": 0.0056, "step": 18643 }, { "epoch": 3.60874613003096, "grad_norm": 0.03413909673690796, "learning_rate": 7.431296400258681e-05, "loss": 0.0045, "step": 18644 }, { "epoch": 3.6089396284829722, "grad_norm": 0.05741928145289421, "learning_rate": 7.431048570843548e-05, "loss": 0.0054, "step": 18645 }, { "epoch": 3.6091331269349847, "grad_norm": 0.04221449792385101, "learning_rate": 7.430800734249253e-05, "loss": 0.0063, "step": 18646 }, { "epoch": 3.6093266253869967, "grad_norm": 0.03949570655822754, "learning_rate": 7.430552890476717e-05, "loss": 0.0063, "step": 18647 }, { "epoch": 3.609520123839009, "grad_norm": 0.060177139937877655, "learning_rate": 7.430305039526862e-05, "loss": 0.0066, "step": 18648 }, { "epoch": 3.6097136222910216, "grad_norm": 0.03715957701206207, "learning_rate": 7.430057181400605e-05, "loss": 0.0066, "step": 18649 }, { "epoch": 3.609907120743034, "grad_norm": 0.06727760285139084, "learning_rate": 7.429809316098873e-05, "loss": 0.0067, "step": 18650 }, { "epoch": 3.6101006191950464, "grad_norm": 0.05921255052089691, "learning_rate": 7.429561443622585e-05, "loss": 0.0046, "step": 18651 }, { "epoch": 3.610294117647059, "grad_norm": 0.0380864217877388, "learning_rate": 7.429313563972664e-05, "loss": 0.006, "step": 18652 }, { "epoch": 3.6104876160990713, "grad_norm": 0.08504152297973633, "learning_rate": 7.42906567715003e-05, "loss": 0.0055, "step": 18653 }, { "epoch": 3.6106811145510838, "grad_norm": 0.035421501845121384, "learning_rate": 7.428817783155604e-05, "loss": 0.0075, "step": 18654 }, { "epoch": 3.610874613003096, "grad_norm": 0.05605531856417656, "learning_rate": 7.42856988199031e-05, "loss": 0.0058, "step": 18655 }, { "epoch": 3.611068111455108, "grad_norm": 0.059689588844776154, "learning_rate": 7.428321973655069e-05, "loss": 0.0065, "step": 18656 }, { "epoch": 3.6112616099071206, "grad_norm": 0.0590699203312397, "learning_rate": 7.428074058150801e-05, "loss": 0.0068, "step": 18657 }, { "epoch": 3.611455108359133, "grad_norm": 0.06786436587572098, "learning_rate": 7.42782613547843e-05, "loss": 0.006, "step": 18658 }, { "epoch": 3.6116486068111455, "grad_norm": 0.04028153792023659, "learning_rate": 7.427578205638874e-05, "loss": 0.0059, "step": 18659 }, { "epoch": 3.611842105263158, "grad_norm": 0.07914530485868454, "learning_rate": 7.427330268633059e-05, "loss": 0.0054, "step": 18660 }, { "epoch": 3.6120356037151704, "grad_norm": 0.025338832288980484, "learning_rate": 7.427082324461905e-05, "loss": 0.0068, "step": 18661 }, { "epoch": 3.6122291021671824, "grad_norm": 0.08265119045972824, "learning_rate": 7.426834373126333e-05, "loss": 0.0065, "step": 18662 }, { "epoch": 3.612422600619195, "grad_norm": 0.06670721620321274, "learning_rate": 7.426586414627265e-05, "loss": 0.006, "step": 18663 }, { "epoch": 3.6126160990712073, "grad_norm": 0.06952285021543503, "learning_rate": 7.426338448965625e-05, "loss": 0.006, "step": 18664 }, { "epoch": 3.6128095975232197, "grad_norm": 0.08767063170671463, "learning_rate": 7.426090476142334e-05, "loss": 0.0062, "step": 18665 }, { "epoch": 3.613003095975232, "grad_norm": 0.04819277301430702, "learning_rate": 7.425842496158313e-05, "loss": 0.0056, "step": 18666 }, { "epoch": 3.6131965944272446, "grad_norm": 0.06440965831279755, "learning_rate": 7.425594509014482e-05, "loss": 0.0061, "step": 18667 }, { "epoch": 3.613390092879257, "grad_norm": 0.053190864622592926, "learning_rate": 7.425346514711766e-05, "loss": 0.0068, "step": 18668 }, { "epoch": 3.6135835913312695, "grad_norm": 0.041711192578077316, "learning_rate": 7.425098513251086e-05, "loss": 0.0065, "step": 18669 }, { "epoch": 3.613777089783282, "grad_norm": 0.04253585636615753, "learning_rate": 7.424850504633366e-05, "loss": 0.0069, "step": 18670 }, { "epoch": 3.6139705882352944, "grad_norm": 0.03592078760266304, "learning_rate": 7.424602488859523e-05, "loss": 0.0066, "step": 18671 }, { "epoch": 3.6141640866873064, "grad_norm": 0.04872050881385803, "learning_rate": 7.424354465930484e-05, "loss": 0.007, "step": 18672 }, { "epoch": 3.614357585139319, "grad_norm": 0.02362714149057865, "learning_rate": 7.42410643584717e-05, "loss": 0.0069, "step": 18673 }, { "epoch": 3.6145510835913313, "grad_norm": 0.06462156027555466, "learning_rate": 7.4238583986105e-05, "loss": 0.0074, "step": 18674 }, { "epoch": 3.6147445820433437, "grad_norm": 0.027511604130268097, "learning_rate": 7.423610354221401e-05, "loss": 0.0067, "step": 18675 }, { "epoch": 3.614938080495356, "grad_norm": 0.051740698516368866, "learning_rate": 7.423362302680791e-05, "loss": 0.0068, "step": 18676 }, { "epoch": 3.6151315789473686, "grad_norm": 0.04822719469666481, "learning_rate": 7.423114243989594e-05, "loss": 0.0077, "step": 18677 }, { "epoch": 3.6153250773993806, "grad_norm": 0.06398820132017136, "learning_rate": 7.422866178148732e-05, "loss": 0.0066, "step": 18678 }, { "epoch": 3.615518575851393, "grad_norm": 0.0401744619011879, "learning_rate": 7.422618105159128e-05, "loss": 0.0049, "step": 18679 }, { "epoch": 3.6157120743034055, "grad_norm": 0.07722284644842148, "learning_rate": 7.422370025021705e-05, "loss": 0.0072, "step": 18680 }, { "epoch": 3.615905572755418, "grad_norm": 0.027464503422379494, "learning_rate": 7.42212193773738e-05, "loss": 0.0058, "step": 18681 }, { "epoch": 3.6160990712074303, "grad_norm": 0.07855197042226791, "learning_rate": 7.421873843307082e-05, "loss": 0.0052, "step": 18682 }, { "epoch": 3.616292569659443, "grad_norm": 0.03165356442332268, "learning_rate": 7.421625741731732e-05, "loss": 0.0074, "step": 18683 }, { "epoch": 3.6164860681114552, "grad_norm": 0.06615998595952988, "learning_rate": 7.421377633012249e-05, "loss": 0.0048, "step": 18684 }, { "epoch": 3.6166795665634677, "grad_norm": 0.04813886433839798, "learning_rate": 7.421129517149557e-05, "loss": 0.0062, "step": 18685 }, { "epoch": 3.61687306501548, "grad_norm": 0.057443227618932724, "learning_rate": 7.42088139414458e-05, "loss": 0.0052, "step": 18686 }, { "epoch": 3.617066563467492, "grad_norm": 0.0598076656460762, "learning_rate": 7.420633263998239e-05, "loss": 0.0051, "step": 18687 }, { "epoch": 3.6172600619195046, "grad_norm": 0.036940205842256546, "learning_rate": 7.420385126711456e-05, "loss": 0.006, "step": 18688 }, { "epoch": 3.617453560371517, "grad_norm": 0.06588885188102722, "learning_rate": 7.420136982285156e-05, "loss": 0.0068, "step": 18689 }, { "epoch": 3.6176470588235294, "grad_norm": 0.06669275462627411, "learning_rate": 7.419888830720259e-05, "loss": 0.005, "step": 18690 }, { "epoch": 3.617840557275542, "grad_norm": 0.05482237786054611, "learning_rate": 7.419640672017688e-05, "loss": 0.0056, "step": 18691 }, { "epoch": 3.6180340557275543, "grad_norm": 0.08178767561912537, "learning_rate": 7.419392506178366e-05, "loss": 0.0075, "step": 18692 }, { "epoch": 3.6182275541795663, "grad_norm": 0.035076629370450974, "learning_rate": 7.419144333203214e-05, "loss": 0.0056, "step": 18693 }, { "epoch": 3.6184210526315788, "grad_norm": 0.07571583241224289, "learning_rate": 7.418896153093159e-05, "loss": 0.0064, "step": 18694 }, { "epoch": 3.618614551083591, "grad_norm": 0.05580820515751839, "learning_rate": 7.41864796584912e-05, "loss": 0.0073, "step": 18695 }, { "epoch": 3.6188080495356036, "grad_norm": 0.05093053728342056, "learning_rate": 7.418399771472021e-05, "loss": 0.0064, "step": 18696 }, { "epoch": 3.619001547987616, "grad_norm": 0.07651611417531967, "learning_rate": 7.418151569962784e-05, "loss": 0.0059, "step": 18697 }, { "epoch": 3.6191950464396285, "grad_norm": 0.05697404593229294, "learning_rate": 7.417903361322331e-05, "loss": 0.0055, "step": 18698 }, { "epoch": 3.619388544891641, "grad_norm": 0.06792306154966354, "learning_rate": 7.417655145551588e-05, "loss": 0.0076, "step": 18699 }, { "epoch": 3.6195820433436534, "grad_norm": 0.04424815997481346, "learning_rate": 7.417406922651474e-05, "loss": 0.0075, "step": 18700 }, { "epoch": 3.619775541795666, "grad_norm": 0.03902053087949753, "learning_rate": 7.417158692622913e-05, "loss": 0.0067, "step": 18701 }, { "epoch": 3.6199690402476783, "grad_norm": 0.017102526500821114, "learning_rate": 7.416910455466831e-05, "loss": 0.0054, "step": 18702 }, { "epoch": 3.6201625386996903, "grad_norm": 0.04739375039935112, "learning_rate": 7.416662211184146e-05, "loss": 0.006, "step": 18703 }, { "epoch": 3.6203560371517027, "grad_norm": 0.02626568265259266, "learning_rate": 7.416413959775784e-05, "loss": 0.006, "step": 18704 }, { "epoch": 3.620549535603715, "grad_norm": 0.060808878391981125, "learning_rate": 7.416165701242668e-05, "loss": 0.0069, "step": 18705 }, { "epoch": 3.6207430340557276, "grad_norm": 0.037081003189086914, "learning_rate": 7.415917435585718e-05, "loss": 0.0066, "step": 18706 }, { "epoch": 3.62093653250774, "grad_norm": 0.07289503514766693, "learning_rate": 7.41566916280586e-05, "loss": 0.0069, "step": 18707 }, { "epoch": 3.621130030959752, "grad_norm": 0.06092410534620285, "learning_rate": 7.415420882904016e-05, "loss": 0.0067, "step": 18708 }, { "epoch": 3.6213235294117645, "grad_norm": 0.057149071246385574, "learning_rate": 7.41517259588111e-05, "loss": 0.0061, "step": 18709 }, { "epoch": 3.621517027863777, "grad_norm": 0.06747912615537643, "learning_rate": 7.414924301738064e-05, "loss": 0.0063, "step": 18710 }, { "epoch": 3.6217105263157894, "grad_norm": 0.05927390232682228, "learning_rate": 7.414676000475801e-05, "loss": 0.006, "step": 18711 }, { "epoch": 3.621904024767802, "grad_norm": 0.06794005632400513, "learning_rate": 7.414427692095245e-05, "loss": 0.0054, "step": 18712 }, { "epoch": 3.6220975232198143, "grad_norm": 0.06350127607584, "learning_rate": 7.414179376597316e-05, "loss": 0.0064, "step": 18713 }, { "epoch": 3.6222910216718267, "grad_norm": 0.06424418836832047, "learning_rate": 7.413931053982944e-05, "loss": 0.007, "step": 18714 }, { "epoch": 3.622484520123839, "grad_norm": 0.07687386870384216, "learning_rate": 7.413682724253045e-05, "loss": 0.0053, "step": 18715 }, { "epoch": 3.6226780185758516, "grad_norm": 0.046453867107629776, "learning_rate": 7.413434387408546e-05, "loss": 0.0066, "step": 18716 }, { "epoch": 3.622871517027864, "grad_norm": 0.07436015456914902, "learning_rate": 7.413186043450369e-05, "loss": 0.0063, "step": 18717 }, { "epoch": 3.623065015479876, "grad_norm": 0.040861062705516815, "learning_rate": 7.412937692379436e-05, "loss": 0.0054, "step": 18718 }, { "epoch": 3.6232585139318885, "grad_norm": 0.06904637068510056, "learning_rate": 7.412689334196674e-05, "loss": 0.0069, "step": 18719 }, { "epoch": 3.623452012383901, "grad_norm": 0.03981047496199608, "learning_rate": 7.412440968903005e-05, "loss": 0.0055, "step": 18720 }, { "epoch": 3.6236455108359134, "grad_norm": 0.05242389440536499, "learning_rate": 7.412192596499351e-05, "loss": 0.0062, "step": 18721 }, { "epoch": 3.623839009287926, "grad_norm": 0.029026707634329796, "learning_rate": 7.411944216986637e-05, "loss": 0.0065, "step": 18722 }, { "epoch": 3.6240325077399382, "grad_norm": 0.06505091488361359, "learning_rate": 7.411695830365784e-05, "loss": 0.0056, "step": 18723 }, { "epoch": 3.6242260061919502, "grad_norm": 0.03779652714729309, "learning_rate": 7.411447436637718e-05, "loss": 0.0058, "step": 18724 }, { "epoch": 3.6244195046439627, "grad_norm": 0.05589579790830612, "learning_rate": 7.41119903580336e-05, "loss": 0.0066, "step": 18725 }, { "epoch": 3.624613003095975, "grad_norm": 0.07715438306331635, "learning_rate": 7.410950627863637e-05, "loss": 0.0062, "step": 18726 }, { "epoch": 3.6248065015479876, "grad_norm": 0.045737411826848984, "learning_rate": 7.410702212819468e-05, "loss": 0.0042, "step": 18727 }, { "epoch": 3.625, "grad_norm": 0.06064363569021225, "learning_rate": 7.41045379067178e-05, "loss": 0.0067, "step": 18728 }, { "epoch": 3.6251934984520124, "grad_norm": 0.03550620749592781, "learning_rate": 7.410205361421497e-05, "loss": 0.0056, "step": 18729 }, { "epoch": 3.625386996904025, "grad_norm": 0.062149856239557266, "learning_rate": 7.40995692506954e-05, "loss": 0.0064, "step": 18730 }, { "epoch": 3.6255804953560373, "grad_norm": 0.0575544498860836, "learning_rate": 7.409708481616834e-05, "loss": 0.006, "step": 18731 }, { "epoch": 3.6257739938080498, "grad_norm": 0.04505273699760437, "learning_rate": 7.4094600310643e-05, "loss": 0.0056, "step": 18732 }, { "epoch": 3.6259674922600618, "grad_norm": 0.06206810474395752, "learning_rate": 7.409211573412867e-05, "loss": 0.0052, "step": 18733 }, { "epoch": 3.626160990712074, "grad_norm": 0.030839253216981888, "learning_rate": 7.408963108663456e-05, "loss": 0.0059, "step": 18734 }, { "epoch": 3.6263544891640866, "grad_norm": 0.055203042924404144, "learning_rate": 7.40871463681699e-05, "loss": 0.0049, "step": 18735 }, { "epoch": 3.626547987616099, "grad_norm": 0.0327848456799984, "learning_rate": 7.408466157874393e-05, "loss": 0.0057, "step": 18736 }, { "epoch": 3.6267414860681115, "grad_norm": 0.03212539479136467, "learning_rate": 7.408217671836589e-05, "loss": 0.0055, "step": 18737 }, { "epoch": 3.626934984520124, "grad_norm": 0.04497769847512245, "learning_rate": 7.407969178704503e-05, "loss": 0.0071, "step": 18738 }, { "epoch": 3.627128482972136, "grad_norm": 0.039630185812711716, "learning_rate": 7.407720678479056e-05, "loss": 0.0061, "step": 18739 }, { "epoch": 3.6273219814241484, "grad_norm": 0.028216004371643066, "learning_rate": 7.407472171161173e-05, "loss": 0.0051, "step": 18740 }, { "epoch": 3.627515479876161, "grad_norm": 0.04994405061006546, "learning_rate": 7.407223656751782e-05, "loss": 0.0054, "step": 18741 }, { "epoch": 3.6277089783281733, "grad_norm": 0.02598278410732746, "learning_rate": 7.4069751352518e-05, "loss": 0.0079, "step": 18742 }, { "epoch": 3.6279024767801857, "grad_norm": 0.05541194602847099, "learning_rate": 7.406726606662156e-05, "loss": 0.0058, "step": 18743 }, { "epoch": 3.628095975232198, "grad_norm": 0.05062255635857582, "learning_rate": 7.406478070983771e-05, "loss": 0.0059, "step": 18744 }, { "epoch": 3.6282894736842106, "grad_norm": 0.07190095633268356, "learning_rate": 7.406229528217571e-05, "loss": 0.0062, "step": 18745 }, { "epoch": 3.628482972136223, "grad_norm": 0.06013483554124832, "learning_rate": 7.40598097836448e-05, "loss": 0.0061, "step": 18746 }, { "epoch": 3.6286764705882355, "grad_norm": 0.045500051230192184, "learning_rate": 7.40573242142542e-05, "loss": 0.0052, "step": 18747 }, { "epoch": 3.628869969040248, "grad_norm": 0.03549954667687416, "learning_rate": 7.405483857401317e-05, "loss": 0.0071, "step": 18748 }, { "epoch": 3.62906346749226, "grad_norm": 0.050256434828042984, "learning_rate": 7.405235286293095e-05, "loss": 0.0073, "step": 18749 }, { "epoch": 3.6292569659442724, "grad_norm": 0.03695862367749214, "learning_rate": 7.404986708101679e-05, "loss": 0.0068, "step": 18750 }, { "epoch": 3.629450464396285, "grad_norm": 0.04145044833421707, "learning_rate": 7.404738122827989e-05, "loss": 0.0057, "step": 18751 }, { "epoch": 3.6296439628482973, "grad_norm": 0.03726648539304733, "learning_rate": 7.404489530472952e-05, "loss": 0.0057, "step": 18752 }, { "epoch": 3.6298374613003097, "grad_norm": 0.032337285578250885, "learning_rate": 7.404240931037495e-05, "loss": 0.0058, "step": 18753 }, { "epoch": 3.6300309597523217, "grad_norm": 0.03988077864050865, "learning_rate": 7.403992324522537e-05, "loss": 0.0062, "step": 18754 }, { "epoch": 3.630224458204334, "grad_norm": 0.04009949788451195, "learning_rate": 7.403743710929007e-05, "loss": 0.0068, "step": 18755 }, { "epoch": 3.6304179566563466, "grad_norm": 0.048162464052438736, "learning_rate": 7.403495090257825e-05, "loss": 0.0077, "step": 18756 }, { "epoch": 3.630611455108359, "grad_norm": 0.03269180282950401, "learning_rate": 7.403246462509918e-05, "loss": 0.0063, "step": 18757 }, { "epoch": 3.6308049535603715, "grad_norm": 0.04903755709528923, "learning_rate": 7.402997827686208e-05, "loss": 0.005, "step": 18758 }, { "epoch": 3.630998452012384, "grad_norm": 0.04304616525769234, "learning_rate": 7.402749185787625e-05, "loss": 0.0059, "step": 18759 }, { "epoch": 3.6311919504643964, "grad_norm": 0.05766921862959862, "learning_rate": 7.402500536815087e-05, "loss": 0.0078, "step": 18760 }, { "epoch": 3.631385448916409, "grad_norm": 0.06618743389844894, "learning_rate": 7.40225188076952e-05, "loss": 0.0056, "step": 18761 }, { "epoch": 3.6315789473684212, "grad_norm": 0.04275484383106232, "learning_rate": 7.402003217651849e-05, "loss": 0.006, "step": 18762 }, { "epoch": 3.6317724458204337, "grad_norm": 0.05775181204080582, "learning_rate": 7.401754547462999e-05, "loss": 0.0057, "step": 18763 }, { "epoch": 3.6319659442724457, "grad_norm": 0.05105649679899216, "learning_rate": 7.401505870203896e-05, "loss": 0.0054, "step": 18764 }, { "epoch": 3.632159442724458, "grad_norm": 0.04787668585777283, "learning_rate": 7.401257185875461e-05, "loss": 0.0065, "step": 18765 }, { "epoch": 3.6323529411764706, "grad_norm": 0.09007707983255386, "learning_rate": 7.401008494478621e-05, "loss": 0.0072, "step": 18766 }, { "epoch": 3.632546439628483, "grad_norm": 0.039548177272081375, "learning_rate": 7.400759796014299e-05, "loss": 0.0054, "step": 18767 }, { "epoch": 3.6327399380804954, "grad_norm": 0.06861059367656708, "learning_rate": 7.400511090483422e-05, "loss": 0.0058, "step": 18768 }, { "epoch": 3.632933436532508, "grad_norm": 0.1365147978067398, "learning_rate": 7.400262377886911e-05, "loss": 0.0066, "step": 18769 }, { "epoch": 3.63312693498452, "grad_norm": 0.04703272134065628, "learning_rate": 7.400013658225693e-05, "loss": 0.0062, "step": 18770 }, { "epoch": 3.6333204334365323, "grad_norm": 0.16226625442504883, "learning_rate": 7.399764931500693e-05, "loss": 0.0057, "step": 18771 }, { "epoch": 3.6335139318885448, "grad_norm": 0.10947884619235992, "learning_rate": 7.399516197712836e-05, "loss": 0.0058, "step": 18772 }, { "epoch": 3.633707430340557, "grad_norm": 0.16863477230072021, "learning_rate": 7.399267456863047e-05, "loss": 0.0078, "step": 18773 }, { "epoch": 3.6339009287925697, "grad_norm": 0.14230576157569885, "learning_rate": 7.399018708952246e-05, "loss": 0.0076, "step": 18774 }, { "epoch": 3.634094427244582, "grad_norm": 0.0563802644610405, "learning_rate": 7.398769953981364e-05, "loss": 0.007, "step": 18775 }, { "epoch": 3.6342879256965945, "grad_norm": 0.15825428068637848, "learning_rate": 7.39852119195132e-05, "loss": 0.0069, "step": 18776 }, { "epoch": 3.634481424148607, "grad_norm": 0.052297644317150116, "learning_rate": 7.398272422863045e-05, "loss": 0.0049, "step": 18777 }, { "epoch": 3.6346749226006194, "grad_norm": 0.12509436905384064, "learning_rate": 7.39802364671746e-05, "loss": 0.0059, "step": 18778 }, { "epoch": 3.6348684210526314, "grad_norm": 0.09417027235031128, "learning_rate": 7.397774863515491e-05, "loss": 0.0073, "step": 18779 }, { "epoch": 3.635061919504644, "grad_norm": 0.06438669562339783, "learning_rate": 7.397526073258064e-05, "loss": 0.0051, "step": 18780 }, { "epoch": 3.6352554179566563, "grad_norm": 0.12421339005231857, "learning_rate": 7.397277275946101e-05, "loss": 0.0061, "step": 18781 }, { "epoch": 3.6354489164086687, "grad_norm": 0.033204495906829834, "learning_rate": 7.397028471580528e-05, "loss": 0.0077, "step": 18782 }, { "epoch": 3.635642414860681, "grad_norm": 0.1412699967622757, "learning_rate": 7.396779660162272e-05, "loss": 0.0052, "step": 18783 }, { "epoch": 3.6358359133126936, "grad_norm": 0.04961719736456871, "learning_rate": 7.396530841692255e-05, "loss": 0.0052, "step": 18784 }, { "epoch": 3.6360294117647056, "grad_norm": 0.049693334847688675, "learning_rate": 7.396282016171405e-05, "loss": 0.0063, "step": 18785 }, { "epoch": 3.636222910216718, "grad_norm": 0.08975785225629807, "learning_rate": 7.396033183600646e-05, "loss": 0.006, "step": 18786 }, { "epoch": 3.6364164086687305, "grad_norm": 0.052425652742385864, "learning_rate": 7.395784343980901e-05, "loss": 0.0059, "step": 18787 }, { "epoch": 3.636609907120743, "grad_norm": 0.048149473965168, "learning_rate": 7.395535497313099e-05, "loss": 0.007, "step": 18788 }, { "epoch": 3.6368034055727554, "grad_norm": 0.07853715121746063, "learning_rate": 7.395286643598162e-05, "loss": 0.0055, "step": 18789 }, { "epoch": 3.636996904024768, "grad_norm": 0.02980153076350689, "learning_rate": 7.395037782837015e-05, "loss": 0.0068, "step": 18790 }, { "epoch": 3.6371904024767803, "grad_norm": 0.04784470796585083, "learning_rate": 7.394788915030587e-05, "loss": 0.0069, "step": 18791 }, { "epoch": 3.6373839009287927, "grad_norm": 0.04698789119720459, "learning_rate": 7.3945400401798e-05, "loss": 0.0057, "step": 18792 }, { "epoch": 3.637577399380805, "grad_norm": 0.027002764865756035, "learning_rate": 7.39429115828558e-05, "loss": 0.0062, "step": 18793 }, { "epoch": 3.6377708978328176, "grad_norm": 0.038778964430093765, "learning_rate": 7.394042269348851e-05, "loss": 0.0057, "step": 18794 }, { "epoch": 3.6379643962848296, "grad_norm": 0.020576344802975655, "learning_rate": 7.39379337337054e-05, "loss": 0.0061, "step": 18795 }, { "epoch": 3.638157894736842, "grad_norm": 0.03623484447598457, "learning_rate": 7.393544470351573e-05, "loss": 0.0064, "step": 18796 }, { "epoch": 3.6383513931888545, "grad_norm": 0.03194185718894005, "learning_rate": 7.393295560292873e-05, "loss": 0.0068, "step": 18797 }, { "epoch": 3.638544891640867, "grad_norm": 0.03725995868444443, "learning_rate": 7.393046643195368e-05, "loss": 0.0067, "step": 18798 }, { "epoch": 3.6387383900928794, "grad_norm": 0.05799822881817818, "learning_rate": 7.392797719059981e-05, "loss": 0.0061, "step": 18799 }, { "epoch": 3.638931888544892, "grad_norm": 0.07191461324691772, "learning_rate": 7.392548787887639e-05, "loss": 0.0064, "step": 18800 }, { "epoch": 3.639125386996904, "grad_norm": 0.06967363506555557, "learning_rate": 7.392299849679265e-05, "loss": 0.007, "step": 18801 }, { "epoch": 3.6393188854489162, "grad_norm": 0.07469088584184647, "learning_rate": 7.392050904435789e-05, "loss": 0.0068, "step": 18802 }, { "epoch": 3.6395123839009287, "grad_norm": 0.05872027948498726, "learning_rate": 7.391801952158132e-05, "loss": 0.0061, "step": 18803 }, { "epoch": 3.639705882352941, "grad_norm": 0.09826882928609848, "learning_rate": 7.391552992847222e-05, "loss": 0.0084, "step": 18804 }, { "epoch": 3.6398993808049536, "grad_norm": 0.09826979041099548, "learning_rate": 7.391304026503986e-05, "loss": 0.0066, "step": 18805 }, { "epoch": 3.640092879256966, "grad_norm": 0.11024405062198639, "learning_rate": 7.391055053129346e-05, "loss": 0.0064, "step": 18806 }, { "epoch": 3.6402863777089784, "grad_norm": 0.040859706699848175, "learning_rate": 7.39080607272423e-05, "loss": 0.0061, "step": 18807 }, { "epoch": 3.640479876160991, "grad_norm": 0.1015634536743164, "learning_rate": 7.390557085289561e-05, "loss": 0.0058, "step": 18808 }, { "epoch": 3.6406733746130033, "grad_norm": 0.10805882513523102, "learning_rate": 7.390308090826269e-05, "loss": 0.0054, "step": 18809 }, { "epoch": 3.6408668730650153, "grad_norm": 0.05418989807367325, "learning_rate": 7.390059089335277e-05, "loss": 0.0065, "step": 18810 }, { "epoch": 3.6410603715170278, "grad_norm": 0.15093587338924408, "learning_rate": 7.38981008081751e-05, "loss": 0.0064, "step": 18811 }, { "epoch": 3.64125386996904, "grad_norm": 0.04791257530450821, "learning_rate": 7.389561065273896e-05, "loss": 0.0059, "step": 18812 }, { "epoch": 3.6414473684210527, "grad_norm": 0.112850621342659, "learning_rate": 7.389312042705356e-05, "loss": 0.0062, "step": 18813 }, { "epoch": 3.641640866873065, "grad_norm": 0.09637978672981262, "learning_rate": 7.389063013112824e-05, "loss": 0.0071, "step": 18814 }, { "epoch": 3.6418343653250775, "grad_norm": 0.043087951838970184, "learning_rate": 7.388813976497217e-05, "loss": 0.0055, "step": 18815 }, { "epoch": 3.6420278637770895, "grad_norm": 0.09954842180013657, "learning_rate": 7.388564932859469e-05, "loss": 0.0066, "step": 18816 }, { "epoch": 3.642221362229102, "grad_norm": 0.11155544221401215, "learning_rate": 7.3883158822005e-05, "loss": 0.0055, "step": 18817 }, { "epoch": 3.6424148606811144, "grad_norm": 0.054848119616508484, "learning_rate": 7.388066824521238e-05, "loss": 0.005, "step": 18818 }, { "epoch": 3.642608359133127, "grad_norm": 0.1248965710401535, "learning_rate": 7.38781775982261e-05, "loss": 0.0059, "step": 18819 }, { "epoch": 3.6428018575851393, "grad_norm": 0.07639256864786148, "learning_rate": 7.387568688105538e-05, "loss": 0.006, "step": 18820 }, { "epoch": 3.6429953560371517, "grad_norm": 0.08267056941986084, "learning_rate": 7.387319609370952e-05, "loss": 0.0068, "step": 18821 }, { "epoch": 3.643188854489164, "grad_norm": 0.10919167846441269, "learning_rate": 7.387070523619776e-05, "loss": 0.0072, "step": 18822 }, { "epoch": 3.6433823529411766, "grad_norm": 0.03527069091796875, "learning_rate": 7.38682143085294e-05, "loss": 0.0056, "step": 18823 }, { "epoch": 3.643575851393189, "grad_norm": 0.08317923545837402, "learning_rate": 7.386572331071364e-05, "loss": 0.0059, "step": 18824 }, { "epoch": 3.6437693498452015, "grad_norm": 0.026851452887058258, "learning_rate": 7.386323224275978e-05, "loss": 0.0052, "step": 18825 }, { "epoch": 3.6439628482972135, "grad_norm": 0.06510867178440094, "learning_rate": 7.386074110467707e-05, "loss": 0.007, "step": 18826 }, { "epoch": 3.644156346749226, "grad_norm": 0.044114187359809875, "learning_rate": 7.385824989647477e-05, "loss": 0.0064, "step": 18827 }, { "epoch": 3.6443498452012384, "grad_norm": 0.06022839993238449, "learning_rate": 7.385575861816212e-05, "loss": 0.0072, "step": 18828 }, { "epoch": 3.644543343653251, "grad_norm": 0.05060882493853569, "learning_rate": 7.385326726974843e-05, "loss": 0.0071, "step": 18829 }, { "epoch": 3.6447368421052633, "grad_norm": 0.032938722521066666, "learning_rate": 7.385077585124294e-05, "loss": 0.0054, "step": 18830 }, { "epoch": 3.6449303405572753, "grad_norm": 0.05624033883213997, "learning_rate": 7.384828436265491e-05, "loss": 0.0049, "step": 18831 }, { "epoch": 3.6451238390092877, "grad_norm": 0.0328405387699604, "learning_rate": 7.384579280399359e-05, "loss": 0.0074, "step": 18832 }, { "epoch": 3.6453173374613, "grad_norm": 0.05264581739902496, "learning_rate": 7.384330117526828e-05, "loss": 0.006, "step": 18833 }, { "epoch": 3.6455108359133126, "grad_norm": 0.0668964758515358, "learning_rate": 7.38408094764882e-05, "loss": 0.0076, "step": 18834 }, { "epoch": 3.645704334365325, "grad_norm": 0.08681868016719818, "learning_rate": 7.383831770766263e-05, "loss": 0.0061, "step": 18835 }, { "epoch": 3.6458978328173375, "grad_norm": 0.0398813858628273, "learning_rate": 7.383582586880085e-05, "loss": 0.0071, "step": 18836 }, { "epoch": 3.64609133126935, "grad_norm": 0.05740171670913696, "learning_rate": 7.383333395991212e-05, "loss": 0.0077, "step": 18837 }, { "epoch": 3.6462848297213624, "grad_norm": 0.054282911121845245, "learning_rate": 7.383084198100567e-05, "loss": 0.0063, "step": 18838 }, { "epoch": 3.646478328173375, "grad_norm": 0.08516903966665268, "learning_rate": 7.382834993209079e-05, "loss": 0.0075, "step": 18839 }, { "epoch": 3.6466718266253872, "grad_norm": 0.04482824355363846, "learning_rate": 7.382585781317675e-05, "loss": 0.0074, "step": 18840 }, { "epoch": 3.6468653250773992, "grad_norm": 0.10965634137392044, "learning_rate": 7.382336562427282e-05, "loss": 0.0071, "step": 18841 }, { "epoch": 3.6470588235294117, "grad_norm": 0.04454430192708969, "learning_rate": 7.382087336538824e-05, "loss": 0.0051, "step": 18842 }, { "epoch": 3.647252321981424, "grad_norm": 0.11355651170015335, "learning_rate": 7.381838103653231e-05, "loss": 0.0064, "step": 18843 }, { "epoch": 3.6474458204334366, "grad_norm": 0.07930579781532288, "learning_rate": 7.381588863771426e-05, "loss": 0.0066, "step": 18844 }, { "epoch": 3.647639318885449, "grad_norm": 0.07952125370502472, "learning_rate": 7.381339616894339e-05, "loss": 0.0057, "step": 18845 }, { "epoch": 3.6478328173374615, "grad_norm": 0.1250055879354477, "learning_rate": 7.381090363022891e-05, "loss": 0.0064, "step": 18846 }, { "epoch": 3.6480263157894735, "grad_norm": 0.04745151847600937, "learning_rate": 7.380841102158016e-05, "loss": 0.0057, "step": 18847 }, { "epoch": 3.648219814241486, "grad_norm": 0.1196867972612381, "learning_rate": 7.380591834300635e-05, "loss": 0.0059, "step": 18848 }, { "epoch": 3.6484133126934983, "grad_norm": 0.09072818607091904, "learning_rate": 7.38034255945168e-05, "loss": 0.007, "step": 18849 }, { "epoch": 3.6486068111455108, "grad_norm": 0.07148832082748413, "learning_rate": 7.380093277612073e-05, "loss": 0.0052, "step": 18850 }, { "epoch": 3.648800309597523, "grad_norm": 0.13087058067321777, "learning_rate": 7.379843988782742e-05, "loss": 0.0071, "step": 18851 }, { "epoch": 3.6489938080495357, "grad_norm": 0.033592384308576584, "learning_rate": 7.379594692964613e-05, "loss": 0.0054, "step": 18852 }, { "epoch": 3.649187306501548, "grad_norm": 0.11067169159650803, "learning_rate": 7.379345390158616e-05, "loss": 0.005, "step": 18853 }, { "epoch": 3.6493808049535605, "grad_norm": 0.08472391217947006, "learning_rate": 7.379096080365678e-05, "loss": 0.0069, "step": 18854 }, { "epoch": 3.649574303405573, "grad_norm": 0.06862015277147293, "learning_rate": 7.378846763586721e-05, "loss": 0.0066, "step": 18855 }, { "epoch": 3.649767801857585, "grad_norm": 0.1208220049738884, "learning_rate": 7.378597439822675e-05, "loss": 0.0071, "step": 18856 }, { "epoch": 3.6499613003095974, "grad_norm": 0.035856179893016815, "learning_rate": 7.378348109074468e-05, "loss": 0.0063, "step": 18857 }, { "epoch": 3.65015479876161, "grad_norm": 0.0946345403790474, "learning_rate": 7.378098771343021e-05, "loss": 0.0053, "step": 18858 }, { "epoch": 3.6503482972136223, "grad_norm": 0.06036968529224396, "learning_rate": 7.377849426629271e-05, "loss": 0.0055, "step": 18859 }, { "epoch": 3.6505417956656347, "grad_norm": 0.06735758483409882, "learning_rate": 7.377600074934136e-05, "loss": 0.0059, "step": 18860 }, { "epoch": 3.650735294117647, "grad_norm": 0.07795953750610352, "learning_rate": 7.37735071625855e-05, "loss": 0.0055, "step": 18861 }, { "epoch": 3.650928792569659, "grad_norm": 0.06007549911737442, "learning_rate": 7.377101350603434e-05, "loss": 0.0065, "step": 18862 }, { "epoch": 3.6511222910216716, "grad_norm": 0.07586230337619781, "learning_rate": 7.376851977969718e-05, "loss": 0.0066, "step": 18863 }, { "epoch": 3.651315789473684, "grad_norm": 0.05182245746254921, "learning_rate": 7.376602598358328e-05, "loss": 0.0062, "step": 18864 }, { "epoch": 3.6515092879256965, "grad_norm": 0.0821814239025116, "learning_rate": 7.376353211770194e-05, "loss": 0.0054, "step": 18865 }, { "epoch": 3.651702786377709, "grad_norm": 0.02632501721382141, "learning_rate": 7.37610381820624e-05, "loss": 0.0065, "step": 18866 }, { "epoch": 3.6518962848297214, "grad_norm": 0.09106574207544327, "learning_rate": 7.375854417667394e-05, "loss": 0.0074, "step": 18867 }, { "epoch": 3.652089783281734, "grad_norm": 0.030964817851781845, "learning_rate": 7.375605010154585e-05, "loss": 0.0058, "step": 18868 }, { "epoch": 3.6522832817337463, "grad_norm": 0.05854927748441696, "learning_rate": 7.375355595668737e-05, "loss": 0.0062, "step": 18869 }, { "epoch": 3.6524767801857587, "grad_norm": 0.07140413671731949, "learning_rate": 7.37510617421078e-05, "loss": 0.0065, "step": 18870 }, { "epoch": 3.652670278637771, "grad_norm": 0.03740968927741051, "learning_rate": 7.37485674578164e-05, "loss": 0.0065, "step": 18871 }, { "epoch": 3.652863777089783, "grad_norm": 0.08089271187782288, "learning_rate": 7.374607310382244e-05, "loss": 0.0065, "step": 18872 }, { "epoch": 3.6530572755417956, "grad_norm": 0.04217987507581711, "learning_rate": 7.374357868013521e-05, "loss": 0.0063, "step": 18873 }, { "epoch": 3.653250773993808, "grad_norm": 0.05295790359377861, "learning_rate": 7.374108418676396e-05, "loss": 0.0066, "step": 18874 }, { "epoch": 3.6534442724458205, "grad_norm": 0.07508337497711182, "learning_rate": 7.373858962371799e-05, "loss": 0.0067, "step": 18875 }, { "epoch": 3.653637770897833, "grad_norm": 0.0362018384039402, "learning_rate": 7.373609499100657e-05, "loss": 0.007, "step": 18876 }, { "epoch": 3.6538312693498454, "grad_norm": 0.0687311589717865, "learning_rate": 7.373360028863894e-05, "loss": 0.007, "step": 18877 }, { "epoch": 3.6540247678018574, "grad_norm": 0.047285668551921844, "learning_rate": 7.373110551662441e-05, "loss": 0.0056, "step": 18878 }, { "epoch": 3.65421826625387, "grad_norm": 0.053961656987667084, "learning_rate": 7.372861067497226e-05, "loss": 0.0055, "step": 18879 }, { "epoch": 3.6544117647058822, "grad_norm": 0.04815206676721573, "learning_rate": 7.372611576369173e-05, "loss": 0.0062, "step": 18880 }, { "epoch": 3.6546052631578947, "grad_norm": 0.04007748141884804, "learning_rate": 7.372362078279214e-05, "loss": 0.0061, "step": 18881 }, { "epoch": 3.654798761609907, "grad_norm": 0.04207415133714676, "learning_rate": 7.372112573228272e-05, "loss": 0.0067, "step": 18882 }, { "epoch": 3.6549922600619196, "grad_norm": 0.04680090397596359, "learning_rate": 7.371863061217275e-05, "loss": 0.0063, "step": 18883 }, { "epoch": 3.655185758513932, "grad_norm": 0.034289997071027756, "learning_rate": 7.371613542247156e-05, "loss": 0.0066, "step": 18884 }, { "epoch": 3.6553792569659445, "grad_norm": 0.03729899227619171, "learning_rate": 7.371364016318837e-05, "loss": 0.0062, "step": 18885 }, { "epoch": 3.655572755417957, "grad_norm": 0.040943827480077744, "learning_rate": 7.371114483433249e-05, "loss": 0.0055, "step": 18886 }, { "epoch": 3.655766253869969, "grad_norm": 0.03129186853766441, "learning_rate": 7.370864943591318e-05, "loss": 0.0063, "step": 18887 }, { "epoch": 3.6559597523219813, "grad_norm": 0.038360919803380966, "learning_rate": 7.37061539679397e-05, "loss": 0.0068, "step": 18888 }, { "epoch": 3.656153250773994, "grad_norm": 0.030911885201931, "learning_rate": 7.370365843042139e-05, "loss": 0.0064, "step": 18889 }, { "epoch": 3.656346749226006, "grad_norm": 0.04232773929834366, "learning_rate": 7.370116282336744e-05, "loss": 0.0074, "step": 18890 }, { "epoch": 3.6565402476780187, "grad_norm": 0.033776454627513885, "learning_rate": 7.369866714678722e-05, "loss": 0.0061, "step": 18891 }, { "epoch": 3.656733746130031, "grad_norm": 0.05076628178358078, "learning_rate": 7.369617140068993e-05, "loss": 0.0065, "step": 18892 }, { "epoch": 3.656927244582043, "grad_norm": 0.03384573012590408, "learning_rate": 7.369367558508489e-05, "loss": 0.006, "step": 18893 }, { "epoch": 3.6571207430340555, "grad_norm": 0.05136756971478462, "learning_rate": 7.369117969998137e-05, "loss": 0.0073, "step": 18894 }, { "epoch": 3.657314241486068, "grad_norm": 0.041401635855436325, "learning_rate": 7.368868374538865e-05, "loss": 0.0064, "step": 18895 }, { "epoch": 3.6575077399380804, "grad_norm": 0.05890984088182449, "learning_rate": 7.368618772131603e-05, "loss": 0.0068, "step": 18896 }, { "epoch": 3.657701238390093, "grad_norm": 0.052780259400606155, "learning_rate": 7.368369162777275e-05, "loss": 0.0059, "step": 18897 }, { "epoch": 3.6578947368421053, "grad_norm": 0.04696411266922951, "learning_rate": 7.36811954647681e-05, "loss": 0.0061, "step": 18898 }, { "epoch": 3.6580882352941178, "grad_norm": 0.0651930570602417, "learning_rate": 7.367869923231138e-05, "loss": 0.0056, "step": 18899 }, { "epoch": 3.65828173374613, "grad_norm": 0.03928526118397713, "learning_rate": 7.367620293041186e-05, "loss": 0.0069, "step": 18900 }, { "epoch": 3.6584752321981426, "grad_norm": 0.07540667057037354, "learning_rate": 7.367370655907882e-05, "loss": 0.0067, "step": 18901 }, { "epoch": 3.6586687306501546, "grad_norm": 0.04348977655172348, "learning_rate": 7.367121011832153e-05, "loss": 0.0067, "step": 18902 }, { "epoch": 3.658862229102167, "grad_norm": 0.0866890624165535, "learning_rate": 7.366871360814928e-05, "loss": 0.0047, "step": 18903 }, { "epoch": 3.6590557275541795, "grad_norm": 0.05135367065668106, "learning_rate": 7.366621702857137e-05, "loss": 0.0071, "step": 18904 }, { "epoch": 3.659249226006192, "grad_norm": 0.07453680783510208, "learning_rate": 7.366372037959704e-05, "loss": 0.0065, "step": 18905 }, { "epoch": 3.6594427244582044, "grad_norm": 0.03206522390246391, "learning_rate": 7.366122366123562e-05, "loss": 0.0057, "step": 18906 }, { "epoch": 3.659636222910217, "grad_norm": 0.059547584503889084, "learning_rate": 7.365872687349636e-05, "loss": 0.0047, "step": 18907 }, { "epoch": 3.659829721362229, "grad_norm": 0.06144071742892265, "learning_rate": 7.365623001638856e-05, "loss": 0.0073, "step": 18908 }, { "epoch": 3.6600232198142413, "grad_norm": 0.055435001850128174, "learning_rate": 7.365373308992147e-05, "loss": 0.0063, "step": 18909 }, { "epoch": 3.6602167182662537, "grad_norm": 0.06956543028354645, "learning_rate": 7.365123609410442e-05, "loss": 0.0062, "step": 18910 }, { "epoch": 3.660410216718266, "grad_norm": 0.050727277994155884, "learning_rate": 7.364873902894665e-05, "loss": 0.0066, "step": 18911 }, { "epoch": 3.6606037151702786, "grad_norm": 0.06087997555732727, "learning_rate": 7.364624189445749e-05, "loss": 0.0071, "step": 18912 }, { "epoch": 3.660797213622291, "grad_norm": 0.05537647381424904, "learning_rate": 7.364374469064618e-05, "loss": 0.0054, "step": 18913 }, { "epoch": 3.6609907120743035, "grad_norm": 0.04890977218747139, "learning_rate": 7.364124741752203e-05, "loss": 0.0058, "step": 18914 }, { "epoch": 3.661184210526316, "grad_norm": 0.043338991701602936, "learning_rate": 7.36387500750943e-05, "loss": 0.0065, "step": 18915 }, { "epoch": 3.6613777089783284, "grad_norm": 0.0578983835875988, "learning_rate": 7.363625266337229e-05, "loss": 0.0058, "step": 18916 }, { "epoch": 3.661571207430341, "grad_norm": 0.02627113275229931, "learning_rate": 7.363375518236531e-05, "loss": 0.0055, "step": 18917 }, { "epoch": 3.661764705882353, "grad_norm": 0.08672195672988892, "learning_rate": 7.363125763208259e-05, "loss": 0.0058, "step": 18918 }, { "epoch": 3.6619582043343653, "grad_norm": 0.05698500573635101, "learning_rate": 7.362876001253346e-05, "loss": 0.0065, "step": 18919 }, { "epoch": 3.6621517027863777, "grad_norm": 0.05649995803833008, "learning_rate": 7.36262623237272e-05, "loss": 0.0055, "step": 18920 }, { "epoch": 3.66234520123839, "grad_norm": 0.06519778817892075, "learning_rate": 7.362376456567307e-05, "loss": 0.0074, "step": 18921 }, { "epoch": 3.6625386996904026, "grad_norm": 0.04867066815495491, "learning_rate": 7.362126673838037e-05, "loss": 0.0058, "step": 18922 }, { "epoch": 3.662732198142415, "grad_norm": 0.05096091702580452, "learning_rate": 7.361876884185838e-05, "loss": 0.0067, "step": 18923 }, { "epoch": 3.662925696594427, "grad_norm": 0.06566309928894043, "learning_rate": 7.361627087611643e-05, "loss": 0.0046, "step": 18924 }, { "epoch": 3.6631191950464395, "grad_norm": 0.06333906948566437, "learning_rate": 7.361377284116376e-05, "loss": 0.0061, "step": 18925 }, { "epoch": 3.663312693498452, "grad_norm": 0.04054320231080055, "learning_rate": 7.361127473700967e-05, "loss": 0.0057, "step": 18926 }, { "epoch": 3.6635061919504643, "grad_norm": 0.1124328076839447, "learning_rate": 7.360877656366342e-05, "loss": 0.0054, "step": 18927 }, { "epoch": 3.663699690402477, "grad_norm": 0.03924517333507538, "learning_rate": 7.360627832113434e-05, "loss": 0.0066, "step": 18928 }, { "epoch": 3.6638931888544892, "grad_norm": 0.10654447227716446, "learning_rate": 7.360378000943173e-05, "loss": 0.0061, "step": 18929 }, { "epoch": 3.6640866873065017, "grad_norm": 0.08692844957113266, "learning_rate": 7.36012816285648e-05, "loss": 0.0058, "step": 18930 }, { "epoch": 3.664280185758514, "grad_norm": 0.04367578774690628, "learning_rate": 7.359878317854291e-05, "loss": 0.0057, "step": 18931 }, { "epoch": 3.6644736842105265, "grad_norm": 0.13139024376869202, "learning_rate": 7.359628465937534e-05, "loss": 0.0059, "step": 18932 }, { "epoch": 3.6646671826625385, "grad_norm": 0.0789717584848404, "learning_rate": 7.359378607107135e-05, "loss": 0.0076, "step": 18933 }, { "epoch": 3.664860681114551, "grad_norm": 0.09293297678232193, "learning_rate": 7.359128741364026e-05, "loss": 0.0045, "step": 18934 }, { "epoch": 3.6650541795665634, "grad_norm": 0.09490721672773361, "learning_rate": 7.358878868709133e-05, "loss": 0.0061, "step": 18935 }, { "epoch": 3.665247678018576, "grad_norm": 0.07712703198194504, "learning_rate": 7.358628989143387e-05, "loss": 0.0077, "step": 18936 }, { "epoch": 3.6654411764705883, "grad_norm": 0.12261368334293365, "learning_rate": 7.358379102667716e-05, "loss": 0.0068, "step": 18937 }, { "epoch": 3.6656346749226008, "grad_norm": 0.12200989574193954, "learning_rate": 7.35812920928305e-05, "loss": 0.0059, "step": 18938 }, { "epoch": 3.6658281733746128, "grad_norm": 0.06406627595424652, "learning_rate": 7.357879308990318e-05, "loss": 0.0072, "step": 18939 }, { "epoch": 3.666021671826625, "grad_norm": 0.11549297720193863, "learning_rate": 7.357629401790448e-05, "loss": 0.0061, "step": 18940 }, { "epoch": 3.6662151702786376, "grad_norm": 0.08293047547340393, "learning_rate": 7.357379487684368e-05, "loss": 0.0063, "step": 18941 }, { "epoch": 3.66640866873065, "grad_norm": 0.0663982555270195, "learning_rate": 7.35712956667301e-05, "loss": 0.0064, "step": 18942 }, { "epoch": 3.6666021671826625, "grad_norm": 0.12668070197105408, "learning_rate": 7.356879638757302e-05, "loss": 0.0066, "step": 18943 }, { "epoch": 3.666795665634675, "grad_norm": 0.05599179491400719, "learning_rate": 7.356629703938173e-05, "loss": 0.0091, "step": 18944 }, { "epoch": 3.6669891640866874, "grad_norm": 0.10782665014266968, "learning_rate": 7.356379762216553e-05, "loss": 0.0071, "step": 18945 }, { "epoch": 3.6671826625387, "grad_norm": 0.048170603811740875, "learning_rate": 7.35612981359337e-05, "loss": 0.0049, "step": 18946 }, { "epoch": 3.6673761609907123, "grad_norm": 0.08379538357257843, "learning_rate": 7.355879858069552e-05, "loss": 0.0081, "step": 18947 }, { "epoch": 3.6675696594427247, "grad_norm": 0.04127878323197365, "learning_rate": 7.355629895646032e-05, "loss": 0.0066, "step": 18948 }, { "epoch": 3.6677631578947367, "grad_norm": 0.047198038548231125, "learning_rate": 7.355379926323737e-05, "loss": 0.0057, "step": 18949 }, { "epoch": 3.667956656346749, "grad_norm": 0.05182456970214844, "learning_rate": 7.355129950103597e-05, "loss": 0.0067, "step": 18950 }, { "epoch": 3.6681501547987616, "grad_norm": 0.03320440277457237, "learning_rate": 7.35487996698654e-05, "loss": 0.0056, "step": 18951 }, { "epoch": 3.668343653250774, "grad_norm": 0.09372209012508392, "learning_rate": 7.354629976973497e-05, "loss": 0.0065, "step": 18952 }, { "epoch": 3.6685371517027865, "grad_norm": 0.11614853143692017, "learning_rate": 7.354379980065395e-05, "loss": 0.0062, "step": 18953 }, { "epoch": 3.6687306501547985, "grad_norm": 0.07564747333526611, "learning_rate": 7.354129976263167e-05, "loss": 0.0064, "step": 18954 }, { "epoch": 3.668924148606811, "grad_norm": 0.09434445202350616, "learning_rate": 7.353879965567741e-05, "loss": 0.0065, "step": 18955 }, { "epoch": 3.6691176470588234, "grad_norm": 0.0755322054028511, "learning_rate": 7.353629947980045e-05, "loss": 0.0061, "step": 18956 }, { "epoch": 3.669311145510836, "grad_norm": 0.05308229848742485, "learning_rate": 7.35337992350101e-05, "loss": 0.0057, "step": 18957 }, { "epoch": 3.6695046439628483, "grad_norm": 0.08446941524744034, "learning_rate": 7.353129892131564e-05, "loss": 0.0064, "step": 18958 }, { "epoch": 3.6696981424148607, "grad_norm": 0.0743497982621193, "learning_rate": 7.352879853872639e-05, "loss": 0.0065, "step": 18959 }, { "epoch": 3.669891640866873, "grad_norm": 0.06942689418792725, "learning_rate": 7.352629808725163e-05, "loss": 0.006, "step": 18960 }, { "epoch": 3.6700851393188856, "grad_norm": 0.09472738206386566, "learning_rate": 7.352379756690066e-05, "loss": 0.0064, "step": 18961 }, { "epoch": 3.670278637770898, "grad_norm": 0.024278050288558006, "learning_rate": 7.352129697768276e-05, "loss": 0.0056, "step": 18962 }, { "epoch": 3.6704721362229105, "grad_norm": 0.10746597498655319, "learning_rate": 7.351879631960727e-05, "loss": 0.0055, "step": 18963 }, { "epoch": 3.6706656346749225, "grad_norm": 0.06053061783313751, "learning_rate": 7.351629559268343e-05, "loss": 0.0064, "step": 18964 }, { "epoch": 3.670859133126935, "grad_norm": 0.08145876228809357, "learning_rate": 7.351379479692058e-05, "loss": 0.0074, "step": 18965 }, { "epoch": 3.6710526315789473, "grad_norm": 0.09863340109586716, "learning_rate": 7.3511293932328e-05, "loss": 0.0062, "step": 18966 }, { "epoch": 3.67124613003096, "grad_norm": 0.019251318648457527, "learning_rate": 7.350879299891499e-05, "loss": 0.0051, "step": 18967 }, { "epoch": 3.6714396284829722, "grad_norm": 0.09990540146827698, "learning_rate": 7.350629199669086e-05, "loss": 0.0074, "step": 18968 }, { "epoch": 3.6716331269349847, "grad_norm": 0.052120551466941833, "learning_rate": 7.350379092566488e-05, "loss": 0.0056, "step": 18969 }, { "epoch": 3.6718266253869967, "grad_norm": 0.04114356264472008, "learning_rate": 7.350128978584637e-05, "loss": 0.0051, "step": 18970 }, { "epoch": 3.672020123839009, "grad_norm": 0.061078991740942, "learning_rate": 7.349878857724462e-05, "loss": 0.006, "step": 18971 }, { "epoch": 3.6722136222910216, "grad_norm": 0.025741884484887123, "learning_rate": 7.349628729986894e-05, "loss": 0.0055, "step": 18972 }, { "epoch": 3.672407120743034, "grad_norm": 0.13166533410549164, "learning_rate": 7.349378595372862e-05, "loss": 0.0065, "step": 18973 }, { "epoch": 3.6726006191950464, "grad_norm": 0.07490742951631546, "learning_rate": 7.349128453883296e-05, "loss": 0.0067, "step": 18974 }, { "epoch": 3.672794117647059, "grad_norm": 0.10202935338020325, "learning_rate": 7.348878305519127e-05, "loss": 0.0067, "step": 18975 }, { "epoch": 3.6729876160990713, "grad_norm": 0.043068770319223404, "learning_rate": 7.348628150281282e-05, "loss": 0.0067, "step": 18976 }, { "epoch": 3.6731811145510838, "grad_norm": 0.13932748138904572, "learning_rate": 7.348377988170694e-05, "loss": 0.0048, "step": 18977 }, { "epoch": 3.673374613003096, "grad_norm": 0.08220487833023071, "learning_rate": 7.348127819188292e-05, "loss": 0.0061, "step": 18978 }, { "epoch": 3.673568111455108, "grad_norm": 0.11738138645887375, "learning_rate": 7.347877643335007e-05, "loss": 0.0065, "step": 18979 }, { "epoch": 3.6737616099071206, "grad_norm": 0.11799012869596481, "learning_rate": 7.347627460611767e-05, "loss": 0.0054, "step": 18980 }, { "epoch": 3.673955108359133, "grad_norm": 0.08062602579593658, "learning_rate": 7.347377271019504e-05, "loss": 0.0069, "step": 18981 }, { "epoch": 3.6741486068111455, "grad_norm": 0.14038242399692535, "learning_rate": 7.347127074559148e-05, "loss": 0.0063, "step": 18982 }, { "epoch": 3.674342105263158, "grad_norm": 0.04742061719298363, "learning_rate": 7.346876871231628e-05, "loss": 0.0067, "step": 18983 }, { "epoch": 3.6745356037151704, "grad_norm": 0.1011914610862732, "learning_rate": 7.346626661037876e-05, "loss": 0.0057, "step": 18984 }, { "epoch": 3.6747291021671824, "grad_norm": 0.0889580100774765, "learning_rate": 7.346376443978819e-05, "loss": 0.0064, "step": 18985 }, { "epoch": 3.674922600619195, "grad_norm": 0.04733050987124443, "learning_rate": 7.346126220055389e-05, "loss": 0.0078, "step": 18986 }, { "epoch": 3.6751160990712073, "grad_norm": 0.11055169999599457, "learning_rate": 7.34587598926852e-05, "loss": 0.0066, "step": 18987 }, { "epoch": 3.6753095975232197, "grad_norm": 0.06334274262189865, "learning_rate": 7.345625751619136e-05, "loss": 0.0078, "step": 18988 }, { "epoch": 3.675503095975232, "grad_norm": 0.08595656603574753, "learning_rate": 7.345375507108171e-05, "loss": 0.0078, "step": 18989 }, { "epoch": 3.6756965944272446, "grad_norm": 0.07820794731378555, "learning_rate": 7.345125255736555e-05, "loss": 0.006, "step": 18990 }, { "epoch": 3.675890092879257, "grad_norm": 0.03770485520362854, "learning_rate": 7.344874997505217e-05, "loss": 0.0074, "step": 18991 }, { "epoch": 3.6760835913312695, "grad_norm": 0.0611555278301239, "learning_rate": 7.344624732415088e-05, "loss": 0.006, "step": 18992 }, { "epoch": 3.676277089783282, "grad_norm": 0.04744226112961769, "learning_rate": 7.3443744604671e-05, "loss": 0.0055, "step": 18993 }, { "epoch": 3.6764705882352944, "grad_norm": 0.04517992585897446, "learning_rate": 7.344124181662182e-05, "loss": 0.0063, "step": 18994 }, { "epoch": 3.6766640866873064, "grad_norm": 0.04811742901802063, "learning_rate": 7.343873896001267e-05, "loss": 0.0063, "step": 18995 }, { "epoch": 3.676857585139319, "grad_norm": 0.05072590336203575, "learning_rate": 7.34362360348528e-05, "loss": 0.0084, "step": 18996 }, { "epoch": 3.6770510835913313, "grad_norm": 0.053272947669029236, "learning_rate": 7.343373304115154e-05, "loss": 0.0056, "step": 18997 }, { "epoch": 3.6772445820433437, "grad_norm": 0.04246731474995613, "learning_rate": 7.343122997891824e-05, "loss": 0.0061, "step": 18998 }, { "epoch": 3.677438080495356, "grad_norm": 0.043886326253414154, "learning_rate": 7.342872684816215e-05, "loss": 0.0059, "step": 18999 }, { "epoch": 3.6776315789473686, "grad_norm": 0.06076343357563019, "learning_rate": 7.342622364889258e-05, "loss": 0.0054, "step": 19000 }, { "epoch": 3.6778250773993806, "grad_norm": 0.03451211005449295, "learning_rate": 7.342372038111887e-05, "loss": 0.0056, "step": 19001 }, { "epoch": 3.678018575851393, "grad_norm": 0.057039979845285416, "learning_rate": 7.342121704485029e-05, "loss": 0.0059, "step": 19002 }, { "epoch": 3.6782120743034055, "grad_norm": 0.035700563341379166, "learning_rate": 7.341871364009618e-05, "loss": 0.0062, "step": 19003 }, { "epoch": 3.678405572755418, "grad_norm": 0.05295286700129509, "learning_rate": 7.341621016686582e-05, "loss": 0.0061, "step": 19004 }, { "epoch": 3.6785990712074303, "grad_norm": 0.04512648656964302, "learning_rate": 7.341370662516853e-05, "loss": 0.0061, "step": 19005 }, { "epoch": 3.678792569659443, "grad_norm": 0.059377968311309814, "learning_rate": 7.341120301501362e-05, "loss": 0.0069, "step": 19006 }, { "epoch": 3.6789860681114552, "grad_norm": 0.03675038740038872, "learning_rate": 7.34086993364104e-05, "loss": 0.0066, "step": 19007 }, { "epoch": 3.6791795665634677, "grad_norm": 0.09023504704236984, "learning_rate": 7.340619558936817e-05, "loss": 0.0065, "step": 19008 }, { "epoch": 3.67937306501548, "grad_norm": 0.020891137421131134, "learning_rate": 7.340369177389624e-05, "loss": 0.0067, "step": 19009 }, { "epoch": 3.679566563467492, "grad_norm": 0.06263735890388489, "learning_rate": 7.340118789000391e-05, "loss": 0.0083, "step": 19010 }, { "epoch": 3.6797600619195046, "grad_norm": 0.06730971485376358, "learning_rate": 7.339868393770049e-05, "loss": 0.0054, "step": 19011 }, { "epoch": 3.679953560371517, "grad_norm": 0.06949169188737869, "learning_rate": 7.339617991699532e-05, "loss": 0.0069, "step": 19012 }, { "epoch": 3.6801470588235294, "grad_norm": 0.10120052099227905, "learning_rate": 7.339367582789767e-05, "loss": 0.0064, "step": 19013 }, { "epoch": 3.680340557275542, "grad_norm": 0.08068783581256866, "learning_rate": 7.339117167041687e-05, "loss": 0.006, "step": 19014 }, { "epoch": 3.6805340557275543, "grad_norm": 0.06557134538888931, "learning_rate": 7.338866744456222e-05, "loss": 0.0053, "step": 19015 }, { "epoch": 3.6807275541795663, "grad_norm": 0.09323984384536743, "learning_rate": 7.338616315034304e-05, "loss": 0.0061, "step": 19016 }, { "epoch": 3.6809210526315788, "grad_norm": 0.03238862380385399, "learning_rate": 7.338365878776864e-05, "loss": 0.0057, "step": 19017 }, { "epoch": 3.681114551083591, "grad_norm": 0.061911217868328094, "learning_rate": 7.338115435684833e-05, "loss": 0.006, "step": 19018 }, { "epoch": 3.6813080495356036, "grad_norm": 0.045546937733888626, "learning_rate": 7.337864985759142e-05, "loss": 0.0059, "step": 19019 }, { "epoch": 3.681501547987616, "grad_norm": 0.05423060059547424, "learning_rate": 7.337614529000721e-05, "loss": 0.0055, "step": 19020 }, { "epoch": 3.6816950464396285, "grad_norm": 0.042061809450387955, "learning_rate": 7.337364065410502e-05, "loss": 0.0067, "step": 19021 }, { "epoch": 3.681888544891641, "grad_norm": 0.052792325615882874, "learning_rate": 7.337113594989415e-05, "loss": 0.0057, "step": 19022 }, { "epoch": 3.6820820433436534, "grad_norm": 0.0424625501036644, "learning_rate": 7.336863117738392e-05, "loss": 0.0069, "step": 19023 }, { "epoch": 3.682275541795666, "grad_norm": 0.06116507947444916, "learning_rate": 7.336612633658368e-05, "loss": 0.0067, "step": 19024 }, { "epoch": 3.6824690402476783, "grad_norm": 0.057318978011608124, "learning_rate": 7.336362142750269e-05, "loss": 0.006, "step": 19025 }, { "epoch": 3.6826625386996903, "grad_norm": 0.04563667252659798, "learning_rate": 7.336111645015027e-05, "loss": 0.0075, "step": 19026 }, { "epoch": 3.6828560371517027, "grad_norm": 0.06478162854909897, "learning_rate": 7.335861140453575e-05, "loss": 0.007, "step": 19027 }, { "epoch": 3.683049535603715, "grad_norm": 0.04280544072389603, "learning_rate": 7.335610629066844e-05, "loss": 0.0062, "step": 19028 }, { "epoch": 3.6832430340557276, "grad_norm": 0.0668649896979332, "learning_rate": 7.335360110855765e-05, "loss": 0.0073, "step": 19029 }, { "epoch": 3.68343653250774, "grad_norm": 0.06274408847093582, "learning_rate": 7.335109585821268e-05, "loss": 0.0053, "step": 19030 }, { "epoch": 3.683630030959752, "grad_norm": 0.03335060551762581, "learning_rate": 7.334859053964286e-05, "loss": 0.0055, "step": 19031 }, { "epoch": 3.6838235294117645, "grad_norm": 0.0713789314031601, "learning_rate": 7.334608515285753e-05, "loss": 0.007, "step": 19032 }, { "epoch": 3.684017027863777, "grad_norm": 0.05715309828519821, "learning_rate": 7.334357969786596e-05, "loss": 0.0058, "step": 19033 }, { "epoch": 3.6842105263157894, "grad_norm": 0.03395148366689682, "learning_rate": 7.334107417467746e-05, "loss": 0.006, "step": 19034 }, { "epoch": 3.684404024767802, "grad_norm": 0.05306077376008034, "learning_rate": 7.333856858330138e-05, "loss": 0.0065, "step": 19035 }, { "epoch": 3.6845975232198143, "grad_norm": 0.03808233141899109, "learning_rate": 7.333606292374703e-05, "loss": 0.0066, "step": 19036 }, { "epoch": 3.6847910216718267, "grad_norm": 0.05803653597831726, "learning_rate": 7.33335571960237e-05, "loss": 0.007, "step": 19037 }, { "epoch": 3.684984520123839, "grad_norm": 0.04785391315817833, "learning_rate": 7.333105140014074e-05, "loss": 0.006, "step": 19038 }, { "epoch": 3.6851780185758516, "grad_norm": 0.06435522437095642, "learning_rate": 7.332854553610744e-05, "loss": 0.0073, "step": 19039 }, { "epoch": 3.685371517027864, "grad_norm": 0.029056694358587265, "learning_rate": 7.332603960393311e-05, "loss": 0.0065, "step": 19040 }, { "epoch": 3.685565015479876, "grad_norm": 0.06939727067947388, "learning_rate": 7.332353360362711e-05, "loss": 0.0065, "step": 19041 }, { "epoch": 3.6857585139318885, "grad_norm": 0.041221991181373596, "learning_rate": 7.332102753519869e-05, "loss": 0.0057, "step": 19042 }, { "epoch": 3.685952012383901, "grad_norm": 0.060809049755334854, "learning_rate": 7.331852139865723e-05, "loss": 0.0061, "step": 19043 }, { "epoch": 3.6861455108359134, "grad_norm": 0.057066284120082855, "learning_rate": 7.331601519401202e-05, "loss": 0.006, "step": 19044 }, { "epoch": 3.686339009287926, "grad_norm": 0.06835424154996872, "learning_rate": 7.331350892127237e-05, "loss": 0.0063, "step": 19045 }, { "epoch": 3.6865325077399382, "grad_norm": 0.05002540349960327, "learning_rate": 7.331100258044762e-05, "loss": 0.0067, "step": 19046 }, { "epoch": 3.6867260061919502, "grad_norm": 0.038194190710783005, "learning_rate": 7.330849617154707e-05, "loss": 0.0055, "step": 19047 }, { "epoch": 3.6869195046439627, "grad_norm": 0.03450443595647812, "learning_rate": 7.330598969458002e-05, "loss": 0.0069, "step": 19048 }, { "epoch": 3.687113003095975, "grad_norm": 0.039541322737932205, "learning_rate": 7.330348314955583e-05, "loss": 0.0063, "step": 19049 }, { "epoch": 3.6873065015479876, "grad_norm": 0.01936512254178524, "learning_rate": 7.33009765364838e-05, "loss": 0.0056, "step": 19050 }, { "epoch": 3.6875, "grad_norm": 0.037135716527700424, "learning_rate": 7.329846985537325e-05, "loss": 0.0069, "step": 19051 }, { "epoch": 3.6876934984520124, "grad_norm": 0.028993936255574226, "learning_rate": 7.32959631062335e-05, "loss": 0.0067, "step": 19052 }, { "epoch": 3.687886996904025, "grad_norm": 0.0350411981344223, "learning_rate": 7.329345628907387e-05, "loss": 0.0058, "step": 19053 }, { "epoch": 3.6880804953560373, "grad_norm": 0.035129692405462265, "learning_rate": 7.329094940390365e-05, "loss": 0.0056, "step": 19054 }, { "epoch": 3.6882739938080498, "grad_norm": 0.025251029059290886, "learning_rate": 7.328844245073221e-05, "loss": 0.0057, "step": 19055 }, { "epoch": 3.6884674922600618, "grad_norm": 0.07677849382162094, "learning_rate": 7.328593542956885e-05, "loss": 0.0071, "step": 19056 }, { "epoch": 3.688660990712074, "grad_norm": 0.05535827577114105, "learning_rate": 7.328342834042287e-05, "loss": 0.0049, "step": 19057 }, { "epoch": 3.6888544891640866, "grad_norm": 0.06095552071928978, "learning_rate": 7.328092118330363e-05, "loss": 0.0052, "step": 19058 }, { "epoch": 3.689047987616099, "grad_norm": 0.03373538702726364, "learning_rate": 7.327841395822042e-05, "loss": 0.0065, "step": 19059 }, { "epoch": 3.6892414860681115, "grad_norm": 0.06530274450778961, "learning_rate": 7.327590666518257e-05, "loss": 0.0057, "step": 19060 }, { "epoch": 3.689434984520124, "grad_norm": 0.029014766216278076, "learning_rate": 7.32733993041994e-05, "loss": 0.0056, "step": 19061 }, { "epoch": 3.689628482972136, "grad_norm": 0.061453185975551605, "learning_rate": 7.327089187528022e-05, "loss": 0.0067, "step": 19062 }, { "epoch": 3.6898219814241484, "grad_norm": 0.07581828534603119, "learning_rate": 7.32683843784344e-05, "loss": 0.0064, "step": 19063 }, { "epoch": 3.690015479876161, "grad_norm": 0.07048345357179642, "learning_rate": 7.326587681367121e-05, "loss": 0.0072, "step": 19064 }, { "epoch": 3.6902089783281733, "grad_norm": 0.06008945405483246, "learning_rate": 7.3263369181e-05, "loss": 0.0062, "step": 19065 }, { "epoch": 3.6904024767801857, "grad_norm": 0.11583150923252106, "learning_rate": 7.326086148043008e-05, "loss": 0.0056, "step": 19066 }, { "epoch": 3.690595975232198, "grad_norm": 0.048824068158864975, "learning_rate": 7.325835371197076e-05, "loss": 0.007, "step": 19067 }, { "epoch": 3.6907894736842106, "grad_norm": 0.18026861548423767, "learning_rate": 7.32558458756314e-05, "loss": 0.007, "step": 19068 }, { "epoch": 3.690982972136223, "grad_norm": 0.059500355273485184, "learning_rate": 7.325333797142129e-05, "loss": 0.0052, "step": 19069 }, { "epoch": 3.6911764705882355, "grad_norm": 0.1354849636554718, "learning_rate": 7.325082999934977e-05, "loss": 0.0066, "step": 19070 }, { "epoch": 3.691369969040248, "grad_norm": 0.1177859902381897, "learning_rate": 7.324832195942617e-05, "loss": 0.0068, "step": 19071 }, { "epoch": 3.69156346749226, "grad_norm": 0.05152095854282379, "learning_rate": 7.324581385165979e-05, "loss": 0.0065, "step": 19072 }, { "epoch": 3.6917569659442724, "grad_norm": 0.10226677358150482, "learning_rate": 7.324330567605997e-05, "loss": 0.0069, "step": 19073 }, { "epoch": 3.691950464396285, "grad_norm": 0.09410473704338074, "learning_rate": 7.324079743263606e-05, "loss": 0.0061, "step": 19074 }, { "epoch": 3.6921439628482973, "grad_norm": 0.06915289908647537, "learning_rate": 7.323828912139733e-05, "loss": 0.007, "step": 19075 }, { "epoch": 3.6923374613003097, "grad_norm": 0.12217642366886139, "learning_rate": 7.323578074235316e-05, "loss": 0.0052, "step": 19076 }, { "epoch": 3.6925309597523217, "grad_norm": 0.05160725489258766, "learning_rate": 7.323327229551285e-05, "loss": 0.0066, "step": 19077 }, { "epoch": 3.692724458204334, "grad_norm": 0.07775944471359253, "learning_rate": 7.323076378088571e-05, "loss": 0.005, "step": 19078 }, { "epoch": 3.6929179566563466, "grad_norm": 0.07570591568946838, "learning_rate": 7.322825519848108e-05, "loss": 0.0058, "step": 19079 }, { "epoch": 3.693111455108359, "grad_norm": 0.04677502438426018, "learning_rate": 7.32257465483083e-05, "loss": 0.0059, "step": 19080 }, { "epoch": 3.6933049535603715, "grad_norm": 0.08523911237716675, "learning_rate": 7.322323783037668e-05, "loss": 0.0065, "step": 19081 }, { "epoch": 3.693498452012384, "grad_norm": 0.037084370851516724, "learning_rate": 7.322072904469555e-05, "loss": 0.0063, "step": 19082 }, { "epoch": 3.6936919504643964, "grad_norm": 0.043195996433496475, "learning_rate": 7.321822019127425e-05, "loss": 0.0068, "step": 19083 }, { "epoch": 3.693885448916409, "grad_norm": 0.049300868064165115, "learning_rate": 7.321571127012209e-05, "loss": 0.0068, "step": 19084 }, { "epoch": 3.6940789473684212, "grad_norm": 0.034990329295396805, "learning_rate": 7.321320228124841e-05, "loss": 0.0064, "step": 19085 }, { "epoch": 3.6942724458204337, "grad_norm": 0.03355484828352928, "learning_rate": 7.321069322466251e-05, "loss": 0.0043, "step": 19086 }, { "epoch": 3.6944659442724457, "grad_norm": 0.03509146347641945, "learning_rate": 7.320818410037376e-05, "loss": 0.0062, "step": 19087 }, { "epoch": 3.694659442724458, "grad_norm": 0.05860532820224762, "learning_rate": 7.320567490839145e-05, "loss": 0.0056, "step": 19088 }, { "epoch": 3.6948529411764706, "grad_norm": 0.04696708172559738, "learning_rate": 7.320316564872494e-05, "loss": 0.0053, "step": 19089 }, { "epoch": 3.695046439628483, "grad_norm": 0.04269697889685631, "learning_rate": 7.320065632138357e-05, "loss": 0.007, "step": 19090 }, { "epoch": 3.6952399380804954, "grad_norm": 0.08964019268751144, "learning_rate": 7.319814692637662e-05, "loss": 0.0059, "step": 19091 }, { "epoch": 3.695433436532508, "grad_norm": 0.03084762580692768, "learning_rate": 7.319563746371343e-05, "loss": 0.0062, "step": 19092 }, { "epoch": 3.69562693498452, "grad_norm": 0.0769527405500412, "learning_rate": 7.319312793340336e-05, "loss": 0.0057, "step": 19093 }, { "epoch": 3.6958204334365323, "grad_norm": 0.07520057260990143, "learning_rate": 7.319061833545572e-05, "loss": 0.0074, "step": 19094 }, { "epoch": 3.6960139318885448, "grad_norm": 0.035813432186841965, "learning_rate": 7.318810866987985e-05, "loss": 0.0057, "step": 19095 }, { "epoch": 3.696207430340557, "grad_norm": 0.09642095118761063, "learning_rate": 7.318559893668508e-05, "loss": 0.0063, "step": 19096 }, { "epoch": 3.6964009287925697, "grad_norm": 0.038400836288928986, "learning_rate": 7.318308913588072e-05, "loss": 0.0054, "step": 19097 }, { "epoch": 3.696594427244582, "grad_norm": 0.06836465746164322, "learning_rate": 7.31805792674761e-05, "loss": 0.0061, "step": 19098 }, { "epoch": 3.6967879256965945, "grad_norm": 0.04065422713756561, "learning_rate": 7.317806933148058e-05, "loss": 0.0067, "step": 19099 }, { "epoch": 3.696981424148607, "grad_norm": 0.03263382986187935, "learning_rate": 7.317555932790348e-05, "loss": 0.0056, "step": 19100 }, { "epoch": 3.6971749226006194, "grad_norm": 0.052206505089998245, "learning_rate": 7.317304925675414e-05, "loss": 0.0054, "step": 19101 }, { "epoch": 3.6973684210526314, "grad_norm": 0.026219813153147697, "learning_rate": 7.317053911804185e-05, "loss": 0.0064, "step": 19102 }, { "epoch": 3.697561919504644, "grad_norm": 0.0329182967543602, "learning_rate": 7.316802891177601e-05, "loss": 0.0058, "step": 19103 }, { "epoch": 3.6977554179566563, "grad_norm": 0.03321800008416176, "learning_rate": 7.31655186379659e-05, "loss": 0.0066, "step": 19104 }, { "epoch": 3.6979489164086687, "grad_norm": 0.029621567577123642, "learning_rate": 7.316300829662085e-05, "loss": 0.0057, "step": 19105 }, { "epoch": 3.698142414860681, "grad_norm": 0.041820887476205826, "learning_rate": 7.316049788775023e-05, "loss": 0.0052, "step": 19106 }, { "epoch": 3.6983359133126936, "grad_norm": 0.025947434827685356, "learning_rate": 7.315798741136336e-05, "loss": 0.0059, "step": 19107 }, { "epoch": 3.6985294117647056, "grad_norm": 0.04250944033265114, "learning_rate": 7.315547686746956e-05, "loss": 0.0054, "step": 19108 }, { "epoch": 3.698722910216718, "grad_norm": 0.02944021485745907, "learning_rate": 7.315296625607817e-05, "loss": 0.0066, "step": 19109 }, { "epoch": 3.6989164086687305, "grad_norm": 0.05240761116147041, "learning_rate": 7.315045557719851e-05, "loss": 0.0067, "step": 19110 }, { "epoch": 3.699109907120743, "grad_norm": 0.025295084342360497, "learning_rate": 7.314794483083996e-05, "loss": 0.006, "step": 19111 }, { "epoch": 3.6993034055727554, "grad_norm": 0.0589522160589695, "learning_rate": 7.314543401701179e-05, "loss": 0.0061, "step": 19112 }, { "epoch": 3.699496904024768, "grad_norm": 0.03582843765616417, "learning_rate": 7.314292313572339e-05, "loss": 0.0079, "step": 19113 }, { "epoch": 3.6996904024767803, "grad_norm": 0.044910408556461334, "learning_rate": 7.314041218698407e-05, "loss": 0.0075, "step": 19114 }, { "epoch": 3.6998839009287927, "grad_norm": 0.059792403131723404, "learning_rate": 7.313790117080316e-05, "loss": 0.0054, "step": 19115 }, { "epoch": 3.700077399380805, "grad_norm": 0.038259945809841156, "learning_rate": 7.313539008719e-05, "loss": 0.005, "step": 19116 }, { "epoch": 3.7002708978328176, "grad_norm": 0.0438859649002552, "learning_rate": 7.313287893615392e-05, "loss": 0.0059, "step": 19117 }, { "epoch": 3.7004643962848296, "grad_norm": 0.05353507399559021, "learning_rate": 7.313036771770427e-05, "loss": 0.0063, "step": 19118 }, { "epoch": 3.700657894736842, "grad_norm": 0.033796995878219604, "learning_rate": 7.31278564318504e-05, "loss": 0.0084, "step": 19119 }, { "epoch": 3.7008513931888545, "grad_norm": 0.0775352194905281, "learning_rate": 7.312534507860161e-05, "loss": 0.0056, "step": 19120 }, { "epoch": 3.701044891640867, "grad_norm": 0.052329763770103455, "learning_rate": 7.312283365796727e-05, "loss": 0.0057, "step": 19121 }, { "epoch": 3.7012383900928794, "grad_norm": 0.0663415864109993, "learning_rate": 7.312032216995668e-05, "loss": 0.0072, "step": 19122 }, { "epoch": 3.701431888544892, "grad_norm": 0.08266380429267883, "learning_rate": 7.31178106145792e-05, "loss": 0.0059, "step": 19123 }, { "epoch": 3.701625386996904, "grad_norm": 0.0497552827000618, "learning_rate": 7.311529899184416e-05, "loss": 0.0058, "step": 19124 }, { "epoch": 3.7018188854489162, "grad_norm": 0.10142435878515244, "learning_rate": 7.311278730176092e-05, "loss": 0.0061, "step": 19125 }, { "epoch": 3.7020123839009287, "grad_norm": 0.04457622766494751, "learning_rate": 7.31102755443388e-05, "loss": 0.0062, "step": 19126 }, { "epoch": 3.702205882352941, "grad_norm": 0.09236080199480057, "learning_rate": 7.310776371958711e-05, "loss": 0.0061, "step": 19127 }, { "epoch": 3.7023993808049536, "grad_norm": 0.04960666969418526, "learning_rate": 7.310525182751525e-05, "loss": 0.0062, "step": 19128 }, { "epoch": 3.702592879256966, "grad_norm": 0.07506757229566574, "learning_rate": 7.310273986813249e-05, "loss": 0.0047, "step": 19129 }, { "epoch": 3.7027863777089784, "grad_norm": 0.053652867674827576, "learning_rate": 7.310022784144824e-05, "loss": 0.0071, "step": 19130 }, { "epoch": 3.702979876160991, "grad_norm": 0.08854856342077255, "learning_rate": 7.309771574747176e-05, "loss": 0.0066, "step": 19131 }, { "epoch": 3.7031733746130033, "grad_norm": 0.04107389971613884, "learning_rate": 7.309520358621247e-05, "loss": 0.0061, "step": 19132 }, { "epoch": 3.7033668730650153, "grad_norm": 0.08369656652212143, "learning_rate": 7.309269135767967e-05, "loss": 0.0071, "step": 19133 }, { "epoch": 3.7035603715170278, "grad_norm": 0.07363280653953552, "learning_rate": 7.309017906188269e-05, "loss": 0.0056, "step": 19134 }, { "epoch": 3.70375386996904, "grad_norm": 0.05102964863181114, "learning_rate": 7.308766669883088e-05, "loss": 0.0077, "step": 19135 }, { "epoch": 3.7039473684210527, "grad_norm": 0.12713491916656494, "learning_rate": 7.308515426853359e-05, "loss": 0.0068, "step": 19136 }, { "epoch": 3.704140866873065, "grad_norm": 0.05318770930171013, "learning_rate": 7.308264177100014e-05, "loss": 0.0069, "step": 19137 }, { "epoch": 3.7043343653250775, "grad_norm": 0.12522153556346893, "learning_rate": 7.30801292062399e-05, "loss": 0.0067, "step": 19138 }, { "epoch": 3.7045278637770895, "grad_norm": 0.04086577892303467, "learning_rate": 7.307761657426219e-05, "loss": 0.0062, "step": 19139 }, { "epoch": 3.704721362229102, "grad_norm": 0.09907796233892441, "learning_rate": 7.307510387507636e-05, "loss": 0.0063, "step": 19140 }, { "epoch": 3.7049148606811144, "grad_norm": 0.07856284826993942, "learning_rate": 7.307259110869172e-05, "loss": 0.0065, "step": 19141 }, { "epoch": 3.705108359133127, "grad_norm": 0.06836492568254471, "learning_rate": 7.307007827511767e-05, "loss": 0.0059, "step": 19142 }, { "epoch": 3.7053018575851393, "grad_norm": 0.08283291012048721, "learning_rate": 7.30675653743635e-05, "loss": 0.0064, "step": 19143 }, { "epoch": 3.7054953560371517, "grad_norm": 0.04895750805735588, "learning_rate": 7.30650524064386e-05, "loss": 0.0068, "step": 19144 }, { "epoch": 3.705688854489164, "grad_norm": 0.08275509625673294, "learning_rate": 7.306253937135227e-05, "loss": 0.0057, "step": 19145 }, { "epoch": 3.7058823529411766, "grad_norm": 0.03328235074877739, "learning_rate": 7.306002626911387e-05, "loss": 0.006, "step": 19146 }, { "epoch": 3.706075851393189, "grad_norm": 0.07143080234527588, "learning_rate": 7.305751309973273e-05, "loss": 0.0063, "step": 19147 }, { "epoch": 3.7062693498452015, "grad_norm": 0.040154844522476196, "learning_rate": 7.305499986321823e-05, "loss": 0.005, "step": 19148 }, { "epoch": 3.7064628482972135, "grad_norm": 0.0756637305021286, "learning_rate": 7.305248655957969e-05, "loss": 0.0057, "step": 19149 }, { "epoch": 3.706656346749226, "grad_norm": 0.05505826696753502, "learning_rate": 7.304997318882643e-05, "loss": 0.0055, "step": 19150 }, { "epoch": 3.7068498452012384, "grad_norm": 0.08888047933578491, "learning_rate": 7.304745975096783e-05, "loss": 0.0063, "step": 19151 }, { "epoch": 3.707043343653251, "grad_norm": 0.0635196641087532, "learning_rate": 7.304494624601321e-05, "loss": 0.0059, "step": 19152 }, { "epoch": 3.7072368421052633, "grad_norm": 0.07839222997426987, "learning_rate": 7.304243267397195e-05, "loss": 0.0074, "step": 19153 }, { "epoch": 3.7074303405572753, "grad_norm": 0.07958372682332993, "learning_rate": 7.303991903485335e-05, "loss": 0.0063, "step": 19154 }, { "epoch": 3.7076238390092877, "grad_norm": 0.04659831151366234, "learning_rate": 7.30374053286668e-05, "loss": 0.0069, "step": 19155 }, { "epoch": 3.7078173374613, "grad_norm": 0.06396867334842682, "learning_rate": 7.303489155542158e-05, "loss": 0.005, "step": 19156 }, { "epoch": 3.7080108359133126, "grad_norm": 0.036103539168834686, "learning_rate": 7.303237771512711e-05, "loss": 0.0074, "step": 19157 }, { "epoch": 3.708204334365325, "grad_norm": 0.04372178390622139, "learning_rate": 7.302986380779269e-05, "loss": 0.0063, "step": 19158 }, { "epoch": 3.7083978328173375, "grad_norm": 0.03236856311559677, "learning_rate": 7.302734983342769e-05, "loss": 0.0064, "step": 19159 }, { "epoch": 3.70859133126935, "grad_norm": 0.06565851718187332, "learning_rate": 7.302483579204145e-05, "loss": 0.008, "step": 19160 }, { "epoch": 3.7087848297213624, "grad_norm": 0.032251156866550446, "learning_rate": 7.30223216836433e-05, "loss": 0.0056, "step": 19161 }, { "epoch": 3.708978328173375, "grad_norm": 0.06574258953332901, "learning_rate": 7.30198075082426e-05, "loss": 0.0067, "step": 19162 }, { "epoch": 3.7091718266253872, "grad_norm": 0.03858024999499321, "learning_rate": 7.30172932658487e-05, "loss": 0.0065, "step": 19163 }, { "epoch": 3.7093653250773992, "grad_norm": 0.04769470915198326, "learning_rate": 7.301477895647094e-05, "loss": 0.0066, "step": 19164 }, { "epoch": 3.7095588235294117, "grad_norm": 0.057460613548755646, "learning_rate": 7.301226458011868e-05, "loss": 0.0058, "step": 19165 }, { "epoch": 3.709752321981424, "grad_norm": 0.03802580386400223, "learning_rate": 7.300975013680123e-05, "loss": 0.0056, "step": 19166 }, { "epoch": 3.7099458204334366, "grad_norm": 0.05407680943608284, "learning_rate": 7.300723562652799e-05, "loss": 0.0063, "step": 19167 }, { "epoch": 3.710139318885449, "grad_norm": 0.05255100876092911, "learning_rate": 7.300472104930828e-05, "loss": 0.0058, "step": 19168 }, { "epoch": 3.7103328173374615, "grad_norm": 0.03735765069723129, "learning_rate": 7.300220640515145e-05, "loss": 0.0063, "step": 19169 }, { "epoch": 3.7105263157894735, "grad_norm": 0.05868026614189148, "learning_rate": 7.299969169406685e-05, "loss": 0.0059, "step": 19170 }, { "epoch": 3.710719814241486, "grad_norm": 0.05705995112657547, "learning_rate": 7.299717691606385e-05, "loss": 0.0045, "step": 19171 }, { "epoch": 3.7109133126934983, "grad_norm": 0.03466883301734924, "learning_rate": 7.299466207115178e-05, "loss": 0.0063, "step": 19172 }, { "epoch": 3.7111068111455108, "grad_norm": 0.054204877465963364, "learning_rate": 7.299214715933998e-05, "loss": 0.0055, "step": 19173 }, { "epoch": 3.711300309597523, "grad_norm": 0.027758818119764328, "learning_rate": 7.29896321806378e-05, "loss": 0.0068, "step": 19174 }, { "epoch": 3.7114938080495357, "grad_norm": 0.03161383792757988, "learning_rate": 7.298711713505461e-05, "loss": 0.0058, "step": 19175 }, { "epoch": 3.711687306501548, "grad_norm": 0.04873301833868027, "learning_rate": 7.298460202259974e-05, "loss": 0.0066, "step": 19176 }, { "epoch": 3.7118808049535605, "grad_norm": 0.02840040624141693, "learning_rate": 7.298208684328257e-05, "loss": 0.0061, "step": 19177 }, { "epoch": 3.712074303405573, "grad_norm": 0.03330331668257713, "learning_rate": 7.297957159711242e-05, "loss": 0.0057, "step": 19178 }, { "epoch": 3.712267801857585, "grad_norm": 0.03089062124490738, "learning_rate": 7.297705628409867e-05, "loss": 0.0067, "step": 19179 }, { "epoch": 3.7124613003095974, "grad_norm": 0.05169025436043739, "learning_rate": 7.297454090425064e-05, "loss": 0.0053, "step": 19180 }, { "epoch": 3.71265479876161, "grad_norm": 0.0334152951836586, "learning_rate": 7.29720254575777e-05, "loss": 0.0053, "step": 19181 }, { "epoch": 3.7128482972136223, "grad_norm": 0.04333401843905449, "learning_rate": 7.296950994408918e-05, "loss": 0.0071, "step": 19182 }, { "epoch": 3.7130417956656347, "grad_norm": 0.04070416837930679, "learning_rate": 7.296699436379448e-05, "loss": 0.0059, "step": 19183 }, { "epoch": 3.713235294117647, "grad_norm": 0.052700627595186234, "learning_rate": 7.29644787167029e-05, "loss": 0.0059, "step": 19184 }, { "epoch": 3.713428792569659, "grad_norm": 0.032089751213788986, "learning_rate": 7.296196300282383e-05, "loss": 0.006, "step": 19185 }, { "epoch": 3.7136222910216716, "grad_norm": 0.035208459943532944, "learning_rate": 7.29594472221666e-05, "loss": 0.0058, "step": 19186 }, { "epoch": 3.713815789473684, "grad_norm": 0.035905905067920685, "learning_rate": 7.295693137474057e-05, "loss": 0.0056, "step": 19187 }, { "epoch": 3.7140092879256965, "grad_norm": 0.05048217996954918, "learning_rate": 7.295441546055508e-05, "loss": 0.0066, "step": 19188 }, { "epoch": 3.714202786377709, "grad_norm": 0.034793347120285034, "learning_rate": 7.295189947961952e-05, "loss": 0.0062, "step": 19189 }, { "epoch": 3.7143962848297214, "grad_norm": 0.04034029319882393, "learning_rate": 7.29493834319432e-05, "loss": 0.0066, "step": 19190 }, { "epoch": 3.714589783281734, "grad_norm": 0.042557235807180405, "learning_rate": 7.294686731753552e-05, "loss": 0.0054, "step": 19191 }, { "epoch": 3.7147832817337463, "grad_norm": 0.04162980988621712, "learning_rate": 7.294435113640581e-05, "loss": 0.0063, "step": 19192 }, { "epoch": 3.7149767801857587, "grad_norm": 0.03251770883798599, "learning_rate": 7.294183488856341e-05, "loss": 0.0064, "step": 19193 }, { "epoch": 3.715170278637771, "grad_norm": 0.03362147882580757, "learning_rate": 7.293931857401768e-05, "loss": 0.0059, "step": 19194 }, { "epoch": 3.715363777089783, "grad_norm": 0.07970165461301804, "learning_rate": 7.293680219277799e-05, "loss": 0.0077, "step": 19195 }, { "epoch": 3.7155572755417956, "grad_norm": 0.04710225388407707, "learning_rate": 7.293428574485369e-05, "loss": 0.0055, "step": 19196 }, { "epoch": 3.715750773993808, "grad_norm": 0.07132858783006668, "learning_rate": 7.293176923025414e-05, "loss": 0.0061, "step": 19197 }, { "epoch": 3.7159442724458205, "grad_norm": 0.046521469950675964, "learning_rate": 7.292925264898869e-05, "loss": 0.0063, "step": 19198 }, { "epoch": 3.716137770897833, "grad_norm": 0.03955652564764023, "learning_rate": 7.292673600106669e-05, "loss": 0.0062, "step": 19199 }, { "epoch": 3.7163312693498454, "grad_norm": 0.08557737618684769, "learning_rate": 7.292421928649751e-05, "loss": 0.0062, "step": 19200 }, { "epoch": 3.7165247678018574, "grad_norm": 0.032559048384428024, "learning_rate": 7.29217025052905e-05, "loss": 0.0076, "step": 19201 }, { "epoch": 3.71671826625387, "grad_norm": 0.10825304687023163, "learning_rate": 7.2919185657455e-05, "loss": 0.0063, "step": 19202 }, { "epoch": 3.7169117647058822, "grad_norm": 0.04088567569851875, "learning_rate": 7.29166687430004e-05, "loss": 0.0058, "step": 19203 }, { "epoch": 3.7171052631578947, "grad_norm": 0.09276355057954788, "learning_rate": 7.291415176193604e-05, "loss": 0.0069, "step": 19204 }, { "epoch": 3.717298761609907, "grad_norm": 0.09433691948652267, "learning_rate": 7.291163471427129e-05, "loss": 0.0078, "step": 19205 }, { "epoch": 3.7174922600619196, "grad_norm": 0.04871022701263428, "learning_rate": 7.290911760001546e-05, "loss": 0.0064, "step": 19206 }, { "epoch": 3.717685758513932, "grad_norm": 0.10407139360904694, "learning_rate": 7.290660041917797e-05, "loss": 0.0059, "step": 19207 }, { "epoch": 3.7178792569659445, "grad_norm": 0.048223841935396194, "learning_rate": 7.290408317176816e-05, "loss": 0.0066, "step": 19208 }, { "epoch": 3.718072755417957, "grad_norm": 0.07340565323829651, "learning_rate": 7.290156585779537e-05, "loss": 0.0068, "step": 19209 }, { "epoch": 3.718266253869969, "grad_norm": 0.06741047650575638, "learning_rate": 7.289904847726896e-05, "loss": 0.0052, "step": 19210 }, { "epoch": 3.7184597523219813, "grad_norm": 0.048625148832798004, "learning_rate": 7.28965310301983e-05, "loss": 0.0063, "step": 19211 }, { "epoch": 3.718653250773994, "grad_norm": 0.06217797473073006, "learning_rate": 7.289401351659275e-05, "loss": 0.0055, "step": 19212 }, { "epoch": 3.718846749226006, "grad_norm": 0.02243758924305439, "learning_rate": 7.289149593646166e-05, "loss": 0.0067, "step": 19213 }, { "epoch": 3.7190402476780187, "grad_norm": 0.060846101492643356, "learning_rate": 7.288897828981442e-05, "loss": 0.0067, "step": 19214 }, { "epoch": 3.719233746130031, "grad_norm": 0.04816782474517822, "learning_rate": 7.288646057666034e-05, "loss": 0.0075, "step": 19215 }, { "epoch": 3.719427244582043, "grad_norm": 0.05674475058913231, "learning_rate": 7.288394279700882e-05, "loss": 0.0065, "step": 19216 }, { "epoch": 3.7196207430340555, "grad_norm": 0.041283056139945984, "learning_rate": 7.288142495086922e-05, "loss": 0.0065, "step": 19217 }, { "epoch": 3.719814241486068, "grad_norm": 0.021543730050325394, "learning_rate": 7.287890703825087e-05, "loss": 0.007, "step": 19218 }, { "epoch": 3.7200077399380804, "grad_norm": 0.056912243366241455, "learning_rate": 7.287638905916314e-05, "loss": 0.0077, "step": 19219 }, { "epoch": 3.720201238390093, "grad_norm": 0.0562894381582737, "learning_rate": 7.287387101361542e-05, "loss": 0.0058, "step": 19220 }, { "epoch": 3.7203947368421053, "grad_norm": 0.06866418570280075, "learning_rate": 7.287135290161704e-05, "loss": 0.0066, "step": 19221 }, { "epoch": 3.7205882352941178, "grad_norm": 0.06547040492296219, "learning_rate": 7.286883472317739e-05, "loss": 0.0051, "step": 19222 }, { "epoch": 3.72078173374613, "grad_norm": 0.04202864319086075, "learning_rate": 7.286631647830579e-05, "loss": 0.0074, "step": 19223 }, { "epoch": 3.7209752321981426, "grad_norm": 0.053038932383060455, "learning_rate": 7.286379816701165e-05, "loss": 0.0056, "step": 19224 }, { "epoch": 3.7211687306501546, "grad_norm": 0.037360481917858124, "learning_rate": 7.286127978930431e-05, "loss": 0.0059, "step": 19225 }, { "epoch": 3.721362229102167, "grad_norm": 0.07112576067447662, "learning_rate": 7.28587613451931e-05, "loss": 0.0062, "step": 19226 }, { "epoch": 3.7215557275541795, "grad_norm": 0.02700703777372837, "learning_rate": 7.285624283468745e-05, "loss": 0.0059, "step": 19227 }, { "epoch": 3.721749226006192, "grad_norm": 0.06625423580408096, "learning_rate": 7.285372425779668e-05, "loss": 0.0067, "step": 19228 }, { "epoch": 3.7219427244582044, "grad_norm": 0.090166375041008, "learning_rate": 7.285120561453016e-05, "loss": 0.0068, "step": 19229 }, { "epoch": 3.722136222910217, "grad_norm": 0.06453343480825424, "learning_rate": 7.284868690489727e-05, "loss": 0.0061, "step": 19230 }, { "epoch": 3.722329721362229, "grad_norm": 0.11952054500579834, "learning_rate": 7.284616812890735e-05, "loss": 0.0069, "step": 19231 }, { "epoch": 3.7225232198142413, "grad_norm": 0.07114670425653458, "learning_rate": 7.284364928656977e-05, "loss": 0.0068, "step": 19232 }, { "epoch": 3.7227167182662537, "grad_norm": 0.09861932694911957, "learning_rate": 7.28411303778939e-05, "loss": 0.0063, "step": 19233 }, { "epoch": 3.722910216718266, "grad_norm": 0.08514931052923203, "learning_rate": 7.28386114028891e-05, "loss": 0.0065, "step": 19234 }, { "epoch": 3.7231037151702786, "grad_norm": 0.07224830240011215, "learning_rate": 7.283609236156475e-05, "loss": 0.0061, "step": 19235 }, { "epoch": 3.723297213622291, "grad_norm": 0.10586917400360107, "learning_rate": 7.28335732539302e-05, "loss": 0.0065, "step": 19236 }, { "epoch": 3.7234907120743035, "grad_norm": 0.046424008905887604, "learning_rate": 7.28310540799948e-05, "loss": 0.0059, "step": 19237 }, { "epoch": 3.723684210526316, "grad_norm": 0.09895309060811996, "learning_rate": 7.282853483976796e-05, "loss": 0.0078, "step": 19238 }, { "epoch": 3.7238777089783284, "grad_norm": 0.06443293392658234, "learning_rate": 7.2826015533259e-05, "loss": 0.006, "step": 19239 }, { "epoch": 3.724071207430341, "grad_norm": 0.08633515238761902, "learning_rate": 7.282349616047732e-05, "loss": 0.0068, "step": 19240 }, { "epoch": 3.724264705882353, "grad_norm": 0.08339180797338486, "learning_rate": 7.282097672143227e-05, "loss": 0.0063, "step": 19241 }, { "epoch": 3.7244582043343653, "grad_norm": 0.05439240485429764, "learning_rate": 7.281845721613323e-05, "loss": 0.0061, "step": 19242 }, { "epoch": 3.7246517027863777, "grad_norm": 0.08875172585248947, "learning_rate": 7.281593764458955e-05, "loss": 0.0068, "step": 19243 }, { "epoch": 3.72484520123839, "grad_norm": 0.04184288904070854, "learning_rate": 7.28134180068106e-05, "loss": 0.0057, "step": 19244 }, { "epoch": 3.7250386996904026, "grad_norm": 0.04755778610706329, "learning_rate": 7.281089830280575e-05, "loss": 0.0063, "step": 19245 }, { "epoch": 3.725232198142415, "grad_norm": 0.05444585159420967, "learning_rate": 7.280837853258436e-05, "loss": 0.0071, "step": 19246 }, { "epoch": 3.725425696594427, "grad_norm": 0.042695023119449615, "learning_rate": 7.280585869615583e-05, "loss": 0.0064, "step": 19247 }, { "epoch": 3.7256191950464395, "grad_norm": 0.05207269266247749, "learning_rate": 7.280333879352949e-05, "loss": 0.006, "step": 19248 }, { "epoch": 3.725812693498452, "grad_norm": 0.04708635434508324, "learning_rate": 7.280081882471472e-05, "loss": 0.0057, "step": 19249 }, { "epoch": 3.7260061919504643, "grad_norm": 0.05002320557832718, "learning_rate": 7.27982987897209e-05, "loss": 0.006, "step": 19250 }, { "epoch": 3.726199690402477, "grad_norm": 0.03516180068254471, "learning_rate": 7.27957786885574e-05, "loss": 0.0057, "step": 19251 }, { "epoch": 3.7263931888544892, "grad_norm": 0.049753911793231964, "learning_rate": 7.279325852123356e-05, "loss": 0.0063, "step": 19252 }, { "epoch": 3.7265866873065017, "grad_norm": 0.03996531292796135, "learning_rate": 7.279073828775878e-05, "loss": 0.0069, "step": 19253 }, { "epoch": 3.726780185758514, "grad_norm": 0.05479230731725693, "learning_rate": 7.278821798814241e-05, "loss": 0.0056, "step": 19254 }, { "epoch": 3.7269736842105265, "grad_norm": 0.05281780660152435, "learning_rate": 7.278569762239384e-05, "loss": 0.0061, "step": 19255 }, { "epoch": 3.7271671826625385, "grad_norm": 0.07219376415014267, "learning_rate": 7.278317719052244e-05, "loss": 0.0073, "step": 19256 }, { "epoch": 3.727360681114551, "grad_norm": 0.04362038895487785, "learning_rate": 7.278065669253752e-05, "loss": 0.0058, "step": 19257 }, { "epoch": 3.7275541795665634, "grad_norm": 0.05235842242836952, "learning_rate": 7.277813612844855e-05, "loss": 0.0064, "step": 19258 }, { "epoch": 3.727747678018576, "grad_norm": 0.0769466683268547, "learning_rate": 7.277561549826483e-05, "loss": 0.0062, "step": 19259 }, { "epoch": 3.7279411764705883, "grad_norm": 0.06961517781019211, "learning_rate": 7.277309480199575e-05, "loss": 0.0056, "step": 19260 }, { "epoch": 3.7281346749226008, "grad_norm": 0.09628665447235107, "learning_rate": 7.27705740396507e-05, "loss": 0.0064, "step": 19261 }, { "epoch": 3.7283281733746128, "grad_norm": 0.06813551485538483, "learning_rate": 7.276805321123902e-05, "loss": 0.0058, "step": 19262 }, { "epoch": 3.728521671826625, "grad_norm": 0.09766657650470734, "learning_rate": 7.276553231677008e-05, "loss": 0.0063, "step": 19263 }, { "epoch": 3.7287151702786376, "grad_norm": 0.0972553938627243, "learning_rate": 7.27630113562533e-05, "loss": 0.007, "step": 19264 }, { "epoch": 3.72890866873065, "grad_norm": 0.0731511190533638, "learning_rate": 7.276049032969799e-05, "loss": 0.0051, "step": 19265 }, { "epoch": 3.7291021671826625, "grad_norm": 0.06511504203081131, "learning_rate": 7.275796923711358e-05, "loss": 0.0066, "step": 19266 }, { "epoch": 3.729295665634675, "grad_norm": 0.06385796517133713, "learning_rate": 7.275544807850941e-05, "loss": 0.006, "step": 19267 }, { "epoch": 3.7294891640866874, "grad_norm": 0.047985028475522995, "learning_rate": 7.275292685389486e-05, "loss": 0.0068, "step": 19268 }, { "epoch": 3.7296826625387, "grad_norm": 0.0640835165977478, "learning_rate": 7.275040556327928e-05, "loss": 0.0085, "step": 19269 }, { "epoch": 3.7298761609907123, "grad_norm": 0.07722994685173035, "learning_rate": 7.274788420667209e-05, "loss": 0.0063, "step": 19270 }, { "epoch": 3.7300696594427247, "grad_norm": 0.048104867339134216, "learning_rate": 7.274536278408262e-05, "loss": 0.0059, "step": 19271 }, { "epoch": 3.7302631578947367, "grad_norm": 0.09778144955635071, "learning_rate": 7.27428412955203e-05, "loss": 0.006, "step": 19272 }, { "epoch": 3.730456656346749, "grad_norm": 0.05760154128074646, "learning_rate": 7.274031974099443e-05, "loss": 0.0078, "step": 19273 }, { "epoch": 3.7306501547987616, "grad_norm": 0.07231868803501129, "learning_rate": 7.273779812051444e-05, "loss": 0.0066, "step": 19274 }, { "epoch": 3.730843653250774, "grad_norm": 0.06854373216629028, "learning_rate": 7.273527643408969e-05, "loss": 0.0059, "step": 19275 }, { "epoch": 3.7310371517027865, "grad_norm": 0.06256987154483795, "learning_rate": 7.273275468172955e-05, "loss": 0.0071, "step": 19276 }, { "epoch": 3.7312306501547985, "grad_norm": 0.05201749503612518, "learning_rate": 7.273023286344337e-05, "loss": 0.0067, "step": 19277 }, { "epoch": 3.731424148606811, "grad_norm": 0.04539898410439491, "learning_rate": 7.272771097924062e-05, "loss": 0.0061, "step": 19278 }, { "epoch": 3.7316176470588234, "grad_norm": 0.05745686963200569, "learning_rate": 7.272518902913056e-05, "loss": 0.0071, "step": 19279 }, { "epoch": 3.731811145510836, "grad_norm": 0.048602327704429626, "learning_rate": 7.272266701312262e-05, "loss": 0.0068, "step": 19280 }, { "epoch": 3.7320046439628483, "grad_norm": 0.046940289437770844, "learning_rate": 7.272014493122617e-05, "loss": 0.007, "step": 19281 }, { "epoch": 3.7321981424148607, "grad_norm": 0.044458843767642975, "learning_rate": 7.271762278345059e-05, "loss": 0.0079, "step": 19282 }, { "epoch": 3.732391640866873, "grad_norm": 0.055636052042245865, "learning_rate": 7.271510056980523e-05, "loss": 0.0066, "step": 19283 }, { "epoch": 3.7325851393188856, "grad_norm": 0.02974073588848114, "learning_rate": 7.271257829029954e-05, "loss": 0.006, "step": 19284 }, { "epoch": 3.732778637770898, "grad_norm": 0.05333460494875908, "learning_rate": 7.271005594494281e-05, "loss": 0.0065, "step": 19285 }, { "epoch": 3.7329721362229105, "grad_norm": 0.027916783466935158, "learning_rate": 7.270753353374448e-05, "loss": 0.0068, "step": 19286 }, { "epoch": 3.7331656346749225, "grad_norm": 0.05075071007013321, "learning_rate": 7.270501105671388e-05, "loss": 0.0058, "step": 19287 }, { "epoch": 3.733359133126935, "grad_norm": 0.023760342970490456, "learning_rate": 7.270248851386044e-05, "loss": 0.0063, "step": 19288 }, { "epoch": 3.7335526315789473, "grad_norm": 0.041848067194223404, "learning_rate": 7.269996590519348e-05, "loss": 0.0054, "step": 19289 }, { "epoch": 3.73374613003096, "grad_norm": 0.03937962278723717, "learning_rate": 7.269744323072243e-05, "loss": 0.0063, "step": 19290 }, { "epoch": 3.7339396284829722, "grad_norm": 0.05060867220163345, "learning_rate": 7.269492049045664e-05, "loss": 0.006, "step": 19291 }, { "epoch": 3.7341331269349847, "grad_norm": 0.038343820720911026, "learning_rate": 7.26923976844055e-05, "loss": 0.0057, "step": 19292 }, { "epoch": 3.7343266253869967, "grad_norm": 0.04683387652039528, "learning_rate": 7.268987481257837e-05, "loss": 0.0058, "step": 19293 }, { "epoch": 3.734520123839009, "grad_norm": 0.06035992130637169, "learning_rate": 7.268735187498465e-05, "loss": 0.0057, "step": 19294 }, { "epoch": 3.7347136222910216, "grad_norm": 0.04743300750851631, "learning_rate": 7.268482887163372e-05, "loss": 0.0064, "step": 19295 }, { "epoch": 3.734907120743034, "grad_norm": 0.05278598889708519, "learning_rate": 7.268230580253494e-05, "loss": 0.007, "step": 19296 }, { "epoch": 3.7351006191950464, "grad_norm": 0.03622673824429512, "learning_rate": 7.267978266769773e-05, "loss": 0.005, "step": 19297 }, { "epoch": 3.735294117647059, "grad_norm": 0.0470430962741375, "learning_rate": 7.267725946713144e-05, "loss": 0.0059, "step": 19298 }, { "epoch": 3.7354876160990713, "grad_norm": 0.03361613303422928, "learning_rate": 7.267473620084543e-05, "loss": 0.0062, "step": 19299 }, { "epoch": 3.7356811145510838, "grad_norm": 0.05196913331747055, "learning_rate": 7.267221286884913e-05, "loss": 0.0073, "step": 19300 }, { "epoch": 3.735874613003096, "grad_norm": 0.031064804643392563, "learning_rate": 7.266968947115189e-05, "loss": 0.0065, "step": 19301 }, { "epoch": 3.736068111455108, "grad_norm": 0.0642932578921318, "learning_rate": 7.266716600776309e-05, "loss": 0.0059, "step": 19302 }, { "epoch": 3.7362616099071206, "grad_norm": 0.028645217418670654, "learning_rate": 7.266464247869215e-05, "loss": 0.0057, "step": 19303 }, { "epoch": 3.736455108359133, "grad_norm": 0.06577698141336441, "learning_rate": 7.26621188839484e-05, "loss": 0.0065, "step": 19304 }, { "epoch": 3.7366486068111455, "grad_norm": 0.02766604535281658, "learning_rate": 7.265959522354124e-05, "loss": 0.0056, "step": 19305 }, { "epoch": 3.736842105263158, "grad_norm": 0.05269927904009819, "learning_rate": 7.265707149748006e-05, "loss": 0.0065, "step": 19306 }, { "epoch": 3.7370356037151704, "grad_norm": 0.053489841520786285, "learning_rate": 7.265454770577423e-05, "loss": 0.0065, "step": 19307 }, { "epoch": 3.7372291021671824, "grad_norm": 0.05854476988315582, "learning_rate": 7.265202384843315e-05, "loss": 0.0054, "step": 19308 }, { "epoch": 3.737422600619195, "grad_norm": 0.06204522028565407, "learning_rate": 7.264949992546619e-05, "loss": 0.0064, "step": 19309 }, { "epoch": 3.7376160990712073, "grad_norm": 0.06166164577007294, "learning_rate": 7.264697593688275e-05, "loss": 0.0072, "step": 19310 }, { "epoch": 3.7378095975232197, "grad_norm": 0.05770431458950043, "learning_rate": 7.264445188269219e-05, "loss": 0.0062, "step": 19311 }, { "epoch": 3.738003095975232, "grad_norm": 0.03119901940226555, "learning_rate": 7.264192776290392e-05, "loss": 0.005, "step": 19312 }, { "epoch": 3.7381965944272446, "grad_norm": 0.06630728393793106, "learning_rate": 7.26394035775273e-05, "loss": 0.0051, "step": 19313 }, { "epoch": 3.738390092879257, "grad_norm": 0.02519155852496624, "learning_rate": 7.263687932657172e-05, "loss": 0.0058, "step": 19314 }, { "epoch": 3.7385835913312695, "grad_norm": 0.05825864151120186, "learning_rate": 7.263435501004657e-05, "loss": 0.0066, "step": 19315 }, { "epoch": 3.738777089783282, "grad_norm": 0.05200700834393501, "learning_rate": 7.263183062796123e-05, "loss": 0.0056, "step": 19316 }, { "epoch": 3.7389705882352944, "grad_norm": 0.03360624983906746, "learning_rate": 7.26293061803251e-05, "loss": 0.0062, "step": 19317 }, { "epoch": 3.7391640866873064, "grad_norm": 0.08010498434305191, "learning_rate": 7.262678166714756e-05, "loss": 0.0057, "step": 19318 }, { "epoch": 3.739357585139319, "grad_norm": 0.026825305074453354, "learning_rate": 7.262425708843797e-05, "loss": 0.0065, "step": 19319 }, { "epoch": 3.7395510835913313, "grad_norm": 0.07912642508745193, "learning_rate": 7.262173244420573e-05, "loss": 0.0073, "step": 19320 }, { "epoch": 3.7397445820433437, "grad_norm": 0.02907412312924862, "learning_rate": 7.261920773446027e-05, "loss": 0.007, "step": 19321 }, { "epoch": 3.739938080495356, "grad_norm": 0.0726371631026268, "learning_rate": 7.26166829592109e-05, "loss": 0.0062, "step": 19322 }, { "epoch": 3.7401315789473686, "grad_norm": 0.05268936604261398, "learning_rate": 7.261415811846704e-05, "loss": 0.0058, "step": 19323 }, { "epoch": 3.7403250773993806, "grad_norm": 0.06012639030814171, "learning_rate": 7.261163321223811e-05, "loss": 0.0065, "step": 19324 }, { "epoch": 3.740518575851393, "grad_norm": 0.0776737779378891, "learning_rate": 7.260910824053343e-05, "loss": 0.0058, "step": 19325 }, { "epoch": 3.7407120743034055, "grad_norm": 0.05016845464706421, "learning_rate": 7.260658320336245e-05, "loss": 0.0059, "step": 19326 }, { "epoch": 3.740905572755418, "grad_norm": 0.0776829868555069, "learning_rate": 7.260405810073455e-05, "loss": 0.0074, "step": 19327 }, { "epoch": 3.7410990712074303, "grad_norm": 0.06329849362373352, "learning_rate": 7.260153293265907e-05, "loss": 0.0059, "step": 19328 }, { "epoch": 3.741292569659443, "grad_norm": 0.08021685481071472, "learning_rate": 7.259900769914543e-05, "loss": 0.0066, "step": 19329 }, { "epoch": 3.7414860681114552, "grad_norm": 0.06245378404855728, "learning_rate": 7.259648240020304e-05, "loss": 0.0059, "step": 19330 }, { "epoch": 3.7416795665634677, "grad_norm": 0.06618436425924301, "learning_rate": 7.259395703584123e-05, "loss": 0.0056, "step": 19331 }, { "epoch": 3.74187306501548, "grad_norm": 0.06509627401828766, "learning_rate": 7.259143160606944e-05, "loss": 0.0075, "step": 19332 }, { "epoch": 3.742066563467492, "grad_norm": 0.05395794287323952, "learning_rate": 7.258890611089704e-05, "loss": 0.0058, "step": 19333 }, { "epoch": 3.7422600619195046, "grad_norm": 0.06760439276695251, "learning_rate": 7.258638055033342e-05, "loss": 0.0067, "step": 19334 }, { "epoch": 3.742453560371517, "grad_norm": 0.06459774821996689, "learning_rate": 7.258385492438798e-05, "loss": 0.0053, "step": 19335 }, { "epoch": 3.7426470588235294, "grad_norm": 0.04959220439195633, "learning_rate": 7.258132923307009e-05, "loss": 0.0064, "step": 19336 }, { "epoch": 3.742840557275542, "grad_norm": 0.06640393286943436, "learning_rate": 7.257880347638916e-05, "loss": 0.0066, "step": 19337 }, { "epoch": 3.7430340557275543, "grad_norm": 0.06282351166009903, "learning_rate": 7.257627765435455e-05, "loss": 0.0063, "step": 19338 }, { "epoch": 3.7432275541795663, "grad_norm": 0.036599356681108475, "learning_rate": 7.25737517669757e-05, "loss": 0.0062, "step": 19339 }, { "epoch": 3.7434210526315788, "grad_norm": 0.024270202964544296, "learning_rate": 7.257122581426194e-05, "loss": 0.0061, "step": 19340 }, { "epoch": 3.743614551083591, "grad_norm": 0.03298189491033554, "learning_rate": 7.25686997962227e-05, "loss": 0.0056, "step": 19341 }, { "epoch": 3.7438080495356036, "grad_norm": 0.028992367908358574, "learning_rate": 7.256617371286737e-05, "loss": 0.0073, "step": 19342 }, { "epoch": 3.744001547987616, "grad_norm": 0.03419893607497215, "learning_rate": 7.256364756420535e-05, "loss": 0.0064, "step": 19343 }, { "epoch": 3.7441950464396285, "grad_norm": 0.03811623528599739, "learning_rate": 7.2561121350246e-05, "loss": 0.0059, "step": 19344 }, { "epoch": 3.744388544891641, "grad_norm": 0.038716863840818405, "learning_rate": 7.255859507099871e-05, "loss": 0.0062, "step": 19345 }, { "epoch": 3.7445820433436534, "grad_norm": 0.0530557706952095, "learning_rate": 7.255606872647292e-05, "loss": 0.0058, "step": 19346 }, { "epoch": 3.744775541795666, "grad_norm": 0.0492781437933445, "learning_rate": 7.255354231667797e-05, "loss": 0.0057, "step": 19347 }, { "epoch": 3.7449690402476783, "grad_norm": 0.047234755009412766, "learning_rate": 7.255101584162329e-05, "loss": 0.0063, "step": 19348 }, { "epoch": 3.7451625386996903, "grad_norm": 0.06316009908914566, "learning_rate": 7.254848930131826e-05, "loss": 0.0071, "step": 19349 }, { "epoch": 3.7453560371517027, "grad_norm": 0.04393564164638519, "learning_rate": 7.254596269577228e-05, "loss": 0.0065, "step": 19350 }, { "epoch": 3.745549535603715, "grad_norm": 0.05459900572896004, "learning_rate": 7.254343602499471e-05, "loss": 0.006, "step": 19351 }, { "epoch": 3.7457430340557276, "grad_norm": 0.04232717305421829, "learning_rate": 7.254090928899496e-05, "loss": 0.0064, "step": 19352 }, { "epoch": 3.74593653250774, "grad_norm": 0.04278576001524925, "learning_rate": 7.253838248778246e-05, "loss": 0.0064, "step": 19353 }, { "epoch": 3.746130030959752, "grad_norm": 0.08649829030036926, "learning_rate": 7.253585562136656e-05, "loss": 0.007, "step": 19354 }, { "epoch": 3.7463235294117645, "grad_norm": 0.05744393169879913, "learning_rate": 7.253332868975668e-05, "loss": 0.005, "step": 19355 }, { "epoch": 3.746517027863777, "grad_norm": 0.055686064064502716, "learning_rate": 7.253080169296219e-05, "loss": 0.0051, "step": 19356 }, { "epoch": 3.7467105263157894, "grad_norm": 0.05131039395928383, "learning_rate": 7.252827463099252e-05, "loss": 0.0067, "step": 19357 }, { "epoch": 3.746904024767802, "grad_norm": 0.06319942325353622, "learning_rate": 7.252574750385702e-05, "loss": 0.0055, "step": 19358 }, { "epoch": 3.7470975232198143, "grad_norm": 0.03108956664800644, "learning_rate": 7.25232203115651e-05, "loss": 0.0054, "step": 19359 }, { "epoch": 3.7472910216718267, "grad_norm": 0.07973508536815643, "learning_rate": 7.25206930541262e-05, "loss": 0.006, "step": 19360 }, { "epoch": 3.747484520123839, "grad_norm": 0.03496763855218887, "learning_rate": 7.251816573154966e-05, "loss": 0.0055, "step": 19361 }, { "epoch": 3.7476780185758516, "grad_norm": 0.0779939740896225, "learning_rate": 7.25156383438449e-05, "loss": 0.0069, "step": 19362 }, { "epoch": 3.747871517027864, "grad_norm": 0.053510960191488266, "learning_rate": 7.251311089102132e-05, "loss": 0.0059, "step": 19363 }, { "epoch": 3.748065015479876, "grad_norm": 0.0734441876411438, "learning_rate": 7.25105833730883e-05, "loss": 0.0053, "step": 19364 }, { "epoch": 3.7482585139318885, "grad_norm": 0.05730925500392914, "learning_rate": 7.250805579005525e-05, "loss": 0.0066, "step": 19365 }, { "epoch": 3.748452012383901, "grad_norm": 0.07529006898403168, "learning_rate": 7.250552814193157e-05, "loss": 0.0054, "step": 19366 }, { "epoch": 3.7486455108359134, "grad_norm": 0.0537918359041214, "learning_rate": 7.250300042872663e-05, "loss": 0.0079, "step": 19367 }, { "epoch": 3.748839009287926, "grad_norm": 0.08411507308483124, "learning_rate": 7.250047265044985e-05, "loss": 0.0064, "step": 19368 }, { "epoch": 3.7490325077399382, "grad_norm": 0.04549502953886986, "learning_rate": 7.249794480711063e-05, "loss": 0.0062, "step": 19369 }, { "epoch": 3.7492260061919502, "grad_norm": 0.08445610105991364, "learning_rate": 7.249541689871838e-05, "loss": 0.0063, "step": 19370 }, { "epoch": 3.7494195046439627, "grad_norm": 0.05046309903264046, "learning_rate": 7.249288892528247e-05, "loss": 0.0061, "step": 19371 }, { "epoch": 3.749613003095975, "grad_norm": 0.04442393407225609, "learning_rate": 7.249036088681229e-05, "loss": 0.0071, "step": 19372 }, { "epoch": 3.7498065015479876, "grad_norm": 0.061451490968465805, "learning_rate": 7.24878327833173e-05, "loss": 0.0072, "step": 19373 }, { "epoch": 3.75, "grad_norm": 0.03736349567770958, "learning_rate": 7.248530461480683e-05, "loss": 0.0052, "step": 19374 }, { "epoch": 3.7501934984520124, "grad_norm": 0.07684621214866638, "learning_rate": 7.24827763812903e-05, "loss": 0.0057, "step": 19375 }, { "epoch": 3.750386996904025, "grad_norm": 0.05032860115170479, "learning_rate": 7.248024808277712e-05, "loss": 0.0056, "step": 19376 }, { "epoch": 3.7505804953560373, "grad_norm": 0.07808355987071991, "learning_rate": 7.247771971927668e-05, "loss": 0.0064, "step": 19377 }, { "epoch": 3.7507739938080498, "grad_norm": 0.04218420013785362, "learning_rate": 7.24751912907984e-05, "loss": 0.005, "step": 19378 }, { "epoch": 3.7509674922600618, "grad_norm": 0.08050034195184708, "learning_rate": 7.247266279735165e-05, "loss": 0.0047, "step": 19379 }, { "epoch": 3.751160990712074, "grad_norm": 0.039173442870378494, "learning_rate": 7.247013423894585e-05, "loss": 0.0071, "step": 19380 }, { "epoch": 3.7513544891640866, "grad_norm": 0.05725123733282089, "learning_rate": 7.24676056155904e-05, "loss": 0.0056, "step": 19381 }, { "epoch": 3.751547987616099, "grad_norm": 0.038206156343221664, "learning_rate": 7.24650769272947e-05, "loss": 0.0074, "step": 19382 }, { "epoch": 3.7517414860681115, "grad_norm": 0.05161953717470169, "learning_rate": 7.246254817406813e-05, "loss": 0.0075, "step": 19383 }, { "epoch": 3.751934984520124, "grad_norm": 0.03395194187760353, "learning_rate": 7.246001935592013e-05, "loss": 0.0052, "step": 19384 }, { "epoch": 3.752128482972136, "grad_norm": 0.05522293969988823, "learning_rate": 7.245749047286007e-05, "loss": 0.0076, "step": 19385 }, { "epoch": 3.7523219814241484, "grad_norm": 0.05039912834763527, "learning_rate": 7.245496152489735e-05, "loss": 0.0064, "step": 19386 }, { "epoch": 3.752515479876161, "grad_norm": 0.03651578724384308, "learning_rate": 7.24524325120414e-05, "loss": 0.0055, "step": 19387 }, { "epoch": 3.7527089783281733, "grad_norm": 0.05368201434612274, "learning_rate": 7.244990343430161e-05, "loss": 0.0066, "step": 19388 }, { "epoch": 3.7529024767801857, "grad_norm": 0.02338518761098385, "learning_rate": 7.244737429168736e-05, "loss": 0.0057, "step": 19389 }, { "epoch": 3.753095975232198, "grad_norm": 0.06382642686367035, "learning_rate": 7.24448450842081e-05, "loss": 0.008, "step": 19390 }, { "epoch": 3.7532894736842106, "grad_norm": 0.031117653474211693, "learning_rate": 7.244231581187318e-05, "loss": 0.0072, "step": 19391 }, { "epoch": 3.753482972136223, "grad_norm": 0.05680404603481293, "learning_rate": 7.243978647469201e-05, "loss": 0.0056, "step": 19392 }, { "epoch": 3.7536764705882355, "grad_norm": 0.045484255999326706, "learning_rate": 7.243725707267406e-05, "loss": 0.0059, "step": 19393 }, { "epoch": 3.753869969040248, "grad_norm": 0.0791618600487709, "learning_rate": 7.243472760582866e-05, "loss": 0.0062, "step": 19394 }, { "epoch": 3.75406346749226, "grad_norm": 0.041181981563568115, "learning_rate": 7.243219807416524e-05, "loss": 0.0062, "step": 19395 }, { "epoch": 3.7542569659442724, "grad_norm": 0.06419562548398972, "learning_rate": 7.24296684776932e-05, "loss": 0.0072, "step": 19396 }, { "epoch": 3.754450464396285, "grad_norm": 0.06982792168855667, "learning_rate": 7.242713881642194e-05, "loss": 0.0081, "step": 19397 }, { "epoch": 3.7546439628482973, "grad_norm": 0.09353617578744888, "learning_rate": 7.24246090903609e-05, "loss": 0.0057, "step": 19398 }, { "epoch": 3.7548374613003097, "grad_norm": 0.0857766717672348, "learning_rate": 7.242207929951944e-05, "loss": 0.007, "step": 19399 }, { "epoch": 3.7550309597523217, "grad_norm": 0.10777057707309723, "learning_rate": 7.241954944390698e-05, "loss": 0.0064, "step": 19400 }, { "epoch": 3.755224458204334, "grad_norm": 0.08717010915279388, "learning_rate": 7.241701952353293e-05, "loss": 0.0071, "step": 19401 }, { "epoch": 3.7554179566563466, "grad_norm": 0.0971096009016037, "learning_rate": 7.24144895384067e-05, "loss": 0.0054, "step": 19402 }, { "epoch": 3.755611455108359, "grad_norm": 0.09444820880889893, "learning_rate": 7.241195948853766e-05, "loss": 0.0066, "step": 19403 }, { "epoch": 3.7558049535603715, "grad_norm": 0.05444776639342308, "learning_rate": 7.240942937393527e-05, "loss": 0.0059, "step": 19404 }, { "epoch": 3.755998452012384, "grad_norm": 0.10725046694278717, "learning_rate": 7.24068991946089e-05, "loss": 0.0067, "step": 19405 }, { "epoch": 3.7561919504643964, "grad_norm": 0.035573359578847885, "learning_rate": 7.2404368950568e-05, "loss": 0.0056, "step": 19406 }, { "epoch": 3.756385448916409, "grad_norm": 0.09639646857976913, "learning_rate": 7.24018386418219e-05, "loss": 0.0064, "step": 19407 }, { "epoch": 3.7565789473684212, "grad_norm": 0.04712848737835884, "learning_rate": 7.239930826838008e-05, "loss": 0.0079, "step": 19408 }, { "epoch": 3.7567724458204337, "grad_norm": 0.07861169427633286, "learning_rate": 7.23967778302519e-05, "loss": 0.0071, "step": 19409 }, { "epoch": 3.7569659442724457, "grad_norm": 0.06544648110866547, "learning_rate": 7.23942473274468e-05, "loss": 0.0071, "step": 19410 }, { "epoch": 3.757159442724458, "grad_norm": 0.08694002032279968, "learning_rate": 7.239171675997417e-05, "loss": 0.0072, "step": 19411 }, { "epoch": 3.7573529411764706, "grad_norm": 0.10022673010826111, "learning_rate": 7.238918612784343e-05, "loss": 0.0055, "step": 19412 }, { "epoch": 3.757546439628483, "grad_norm": 0.10324141383171082, "learning_rate": 7.238665543106398e-05, "loss": 0.0061, "step": 19413 }, { "epoch": 3.7577399380804954, "grad_norm": 0.07872281968593597, "learning_rate": 7.238412466964522e-05, "loss": 0.0067, "step": 19414 }, { "epoch": 3.757933436532508, "grad_norm": 0.1356900930404663, "learning_rate": 7.238159384359657e-05, "loss": 0.0058, "step": 19415 }, { "epoch": 3.75812693498452, "grad_norm": 0.05097179859876633, "learning_rate": 7.237906295292744e-05, "loss": 0.0052, "step": 19416 }, { "epoch": 3.7583204334365323, "grad_norm": 0.1219908595085144, "learning_rate": 7.237653199764723e-05, "loss": 0.0051, "step": 19417 }, { "epoch": 3.7585139318885448, "grad_norm": 0.0990632176399231, "learning_rate": 7.237400097776536e-05, "loss": 0.0063, "step": 19418 }, { "epoch": 3.758707430340557, "grad_norm": 0.05972488224506378, "learning_rate": 7.237146989329124e-05, "loss": 0.0048, "step": 19419 }, { "epoch": 3.7589009287925697, "grad_norm": 0.11768697947263718, "learning_rate": 7.236893874423428e-05, "loss": 0.006, "step": 19420 }, { "epoch": 3.759094427244582, "grad_norm": 0.07310526072978973, "learning_rate": 7.236640753060387e-05, "loss": 0.007, "step": 19421 }, { "epoch": 3.7592879256965945, "grad_norm": 0.1275898516178131, "learning_rate": 7.236387625240944e-05, "loss": 0.0064, "step": 19422 }, { "epoch": 3.759481424148607, "grad_norm": 0.09453465789556503, "learning_rate": 7.236134490966041e-05, "loss": 0.007, "step": 19423 }, { "epoch": 3.7596749226006194, "grad_norm": 0.04795331880450249, "learning_rate": 7.235881350236618e-05, "loss": 0.0057, "step": 19424 }, { "epoch": 3.7598684210526314, "grad_norm": 0.10860693454742432, "learning_rate": 7.235628203053614e-05, "loss": 0.006, "step": 19425 }, { "epoch": 3.760061919504644, "grad_norm": 0.054734967648983, "learning_rate": 7.235375049417972e-05, "loss": 0.0053, "step": 19426 }, { "epoch": 3.7602554179566563, "grad_norm": 0.06629625707864761, "learning_rate": 7.235121889330635e-05, "loss": 0.0054, "step": 19427 }, { "epoch": 3.7604489164086687, "grad_norm": 0.08651122450828552, "learning_rate": 7.234868722792541e-05, "loss": 0.0051, "step": 19428 }, { "epoch": 3.760642414860681, "grad_norm": 0.04062147065997124, "learning_rate": 7.234615549804631e-05, "loss": 0.0059, "step": 19429 }, { "epoch": 3.7608359133126936, "grad_norm": 0.05412616953253746, "learning_rate": 7.234362370367852e-05, "loss": 0.0059, "step": 19430 }, { "epoch": 3.7610294117647056, "grad_norm": 0.044038932770490646, "learning_rate": 7.234109184483136e-05, "loss": 0.0057, "step": 19431 }, { "epoch": 3.761222910216718, "grad_norm": 0.05355722829699516, "learning_rate": 7.233855992151434e-05, "loss": 0.008, "step": 19432 }, { "epoch": 3.7614164086687305, "grad_norm": 0.09250155091285706, "learning_rate": 7.233602793373679e-05, "loss": 0.0063, "step": 19433 }, { "epoch": 3.761609907120743, "grad_norm": 0.05218872055411339, "learning_rate": 7.233349588150818e-05, "loss": 0.0069, "step": 19434 }, { "epoch": 3.7618034055727554, "grad_norm": 0.06302178651094437, "learning_rate": 7.23309637648379e-05, "loss": 0.0059, "step": 19435 }, { "epoch": 3.761996904024768, "grad_norm": 0.11471264064311981, "learning_rate": 7.232843158373537e-05, "loss": 0.0065, "step": 19436 }, { "epoch": 3.7621904024767803, "grad_norm": 0.03635565564036369, "learning_rate": 7.232589933821e-05, "loss": 0.0066, "step": 19437 }, { "epoch": 3.7623839009287927, "grad_norm": 0.12382853776216507, "learning_rate": 7.23233670282712e-05, "loss": 0.0058, "step": 19438 }, { "epoch": 3.762577399380805, "grad_norm": 0.13264212012290955, "learning_rate": 7.23208346539284e-05, "loss": 0.005, "step": 19439 }, { "epoch": 3.7627708978328176, "grad_norm": 0.10579752177000046, "learning_rate": 7.2318302215191e-05, "loss": 0.007, "step": 19440 }, { "epoch": 3.7629643962848296, "grad_norm": 0.16696543991565704, "learning_rate": 7.231576971206842e-05, "loss": 0.007, "step": 19441 }, { "epoch": 3.763157894736842, "grad_norm": 0.03225350379943848, "learning_rate": 7.231323714457005e-05, "loss": 0.0063, "step": 19442 }, { "epoch": 3.7633513931888545, "grad_norm": 0.12635420262813568, "learning_rate": 7.231070451270538e-05, "loss": 0.0053, "step": 19443 }, { "epoch": 3.763544891640867, "grad_norm": 0.08943942189216614, "learning_rate": 7.230817181648374e-05, "loss": 0.0053, "step": 19444 }, { "epoch": 3.7637383900928794, "grad_norm": 0.07053825259208679, "learning_rate": 7.23056390559146e-05, "loss": 0.0074, "step": 19445 }, { "epoch": 3.763931888544892, "grad_norm": 0.09323052316904068, "learning_rate": 7.230310623100735e-05, "loss": 0.0073, "step": 19446 }, { "epoch": 3.764125386996904, "grad_norm": 0.03916312754154205, "learning_rate": 7.230057334177141e-05, "loss": 0.0068, "step": 19447 }, { "epoch": 3.7643188854489162, "grad_norm": 0.07993702590465546, "learning_rate": 7.229804038821619e-05, "loss": 0.0063, "step": 19448 }, { "epoch": 3.7645123839009287, "grad_norm": 0.07118459045886993, "learning_rate": 7.229550737035115e-05, "loss": 0.0077, "step": 19449 }, { "epoch": 3.764705882352941, "grad_norm": 0.05839671194553375, "learning_rate": 7.229297428818566e-05, "loss": 0.0071, "step": 19450 }, { "epoch": 3.7648993808049536, "grad_norm": 0.0802302286028862, "learning_rate": 7.229044114172914e-05, "loss": 0.0062, "step": 19451 }, { "epoch": 3.765092879256966, "grad_norm": 0.02940964139997959, "learning_rate": 7.228790793099104e-05, "loss": 0.0064, "step": 19452 }, { "epoch": 3.7652863777089784, "grad_norm": 0.061902180314064026, "learning_rate": 7.228537465598075e-05, "loss": 0.0067, "step": 19453 }, { "epoch": 3.765479876160991, "grad_norm": 0.039523690938949585, "learning_rate": 7.228284131670769e-05, "loss": 0.0072, "step": 19454 }, { "epoch": 3.7656733746130033, "grad_norm": 0.04679916054010391, "learning_rate": 7.228030791318129e-05, "loss": 0.0056, "step": 19455 }, { "epoch": 3.7658668730650153, "grad_norm": 0.05663420632481575, "learning_rate": 7.227777444541097e-05, "loss": 0.005, "step": 19456 }, { "epoch": 3.7660603715170278, "grad_norm": 0.056650932878255844, "learning_rate": 7.227524091340612e-05, "loss": 0.005, "step": 19457 }, { "epoch": 3.76625386996904, "grad_norm": 0.04254080355167389, "learning_rate": 7.227270731717621e-05, "loss": 0.0073, "step": 19458 }, { "epoch": 3.7664473684210527, "grad_norm": 0.058207765221595764, "learning_rate": 7.227017365673064e-05, "loss": 0.0061, "step": 19459 }, { "epoch": 3.766640866873065, "grad_norm": 0.043125417083501816, "learning_rate": 7.226763993207877e-05, "loss": 0.0068, "step": 19460 }, { "epoch": 3.7668343653250775, "grad_norm": 0.05544691160321236, "learning_rate": 7.226510614323011e-05, "loss": 0.006, "step": 19461 }, { "epoch": 3.7670278637770895, "grad_norm": 0.06581920385360718, "learning_rate": 7.226257229019401e-05, "loss": 0.0072, "step": 19462 }, { "epoch": 3.767221362229102, "grad_norm": 0.10978296399116516, "learning_rate": 7.226003837297994e-05, "loss": 0.0071, "step": 19463 }, { "epoch": 3.7674148606811144, "grad_norm": 0.0264876838773489, "learning_rate": 7.225750439159729e-05, "loss": 0.0062, "step": 19464 }, { "epoch": 3.767608359133127, "grad_norm": 0.10188642889261246, "learning_rate": 7.225497034605552e-05, "loss": 0.0066, "step": 19465 }, { "epoch": 3.7678018575851393, "grad_norm": 0.06226552277803421, "learning_rate": 7.225243623636398e-05, "loss": 0.0055, "step": 19466 }, { "epoch": 3.7679953560371517, "grad_norm": 0.06165903061628342, "learning_rate": 7.224990206253217e-05, "loss": 0.0052, "step": 19467 }, { "epoch": 3.768188854489164, "grad_norm": 0.1003490686416626, "learning_rate": 7.224736782456946e-05, "loss": 0.0056, "step": 19468 }, { "epoch": 3.7683823529411766, "grad_norm": 0.042517952620983124, "learning_rate": 7.224483352248528e-05, "loss": 0.0054, "step": 19469 }, { "epoch": 3.768575851393189, "grad_norm": 0.1050228476524353, "learning_rate": 7.224229915628906e-05, "loss": 0.0051, "step": 19470 }, { "epoch": 3.7687693498452015, "grad_norm": 0.07099646329879761, "learning_rate": 7.223976472599023e-05, "loss": 0.0067, "step": 19471 }, { "epoch": 3.7689628482972135, "grad_norm": 0.0793914720416069, "learning_rate": 7.223723023159818e-05, "loss": 0.0066, "step": 19472 }, { "epoch": 3.769156346749226, "grad_norm": 0.08881201595067978, "learning_rate": 7.223469567312238e-05, "loss": 0.0078, "step": 19473 }, { "epoch": 3.7693498452012384, "grad_norm": 0.044632572680711746, "learning_rate": 7.22321610505722e-05, "loss": 0.0051, "step": 19474 }, { "epoch": 3.769543343653251, "grad_norm": 0.07004550099372864, "learning_rate": 7.222962636395712e-05, "loss": 0.0069, "step": 19475 }, { "epoch": 3.7697368421052633, "grad_norm": 0.08837004005908966, "learning_rate": 7.222709161328652e-05, "loss": 0.0071, "step": 19476 }, { "epoch": 3.7699303405572753, "grad_norm": 0.029395699501037598, "learning_rate": 7.222455679856985e-05, "loss": 0.0054, "step": 19477 }, { "epoch": 3.7701238390092877, "grad_norm": 0.08615996688604355, "learning_rate": 7.222202191981652e-05, "loss": 0.0062, "step": 19478 }, { "epoch": 3.7703173374613, "grad_norm": 0.03771919384598732, "learning_rate": 7.221948697703593e-05, "loss": 0.006, "step": 19479 }, { "epoch": 3.7705108359133126, "grad_norm": 0.07460091263055801, "learning_rate": 7.221695197023756e-05, "loss": 0.0054, "step": 19480 }, { "epoch": 3.770704334365325, "grad_norm": 0.05904518440365791, "learning_rate": 7.22144168994308e-05, "loss": 0.0051, "step": 19481 }, { "epoch": 3.7708978328173375, "grad_norm": 0.045022767037153244, "learning_rate": 7.221188176462508e-05, "loss": 0.0069, "step": 19482 }, { "epoch": 3.77109133126935, "grad_norm": 0.061671532690525055, "learning_rate": 7.220934656582981e-05, "loss": 0.0057, "step": 19483 }, { "epoch": 3.7712848297213624, "grad_norm": 0.03198517858982086, "learning_rate": 7.220681130305446e-05, "loss": 0.0058, "step": 19484 }, { "epoch": 3.771478328173375, "grad_norm": 0.046568404883146286, "learning_rate": 7.22042759763084e-05, "loss": 0.0056, "step": 19485 }, { "epoch": 3.7716718266253872, "grad_norm": 0.055489495396614075, "learning_rate": 7.220174058560107e-05, "loss": 0.007, "step": 19486 }, { "epoch": 3.7718653250773992, "grad_norm": 0.03722755238413811, "learning_rate": 7.219920513094193e-05, "loss": 0.0069, "step": 19487 }, { "epoch": 3.7720588235294117, "grad_norm": 0.028347022831439972, "learning_rate": 7.219666961234039e-05, "loss": 0.0067, "step": 19488 }, { "epoch": 3.772252321981424, "grad_norm": 0.033014342188835144, "learning_rate": 7.219413402980584e-05, "loss": 0.0061, "step": 19489 }, { "epoch": 3.7724458204334366, "grad_norm": 0.04249424859881401, "learning_rate": 7.219159838334777e-05, "loss": 0.0066, "step": 19490 }, { "epoch": 3.772639318885449, "grad_norm": 0.017226049676537514, "learning_rate": 7.218906267297554e-05, "loss": 0.006, "step": 19491 }, { "epoch": 3.7728328173374615, "grad_norm": 0.03782530874013901, "learning_rate": 7.218652689869865e-05, "loss": 0.0051, "step": 19492 }, { "epoch": 3.7730263157894735, "grad_norm": 0.02102765254676342, "learning_rate": 7.218399106052646e-05, "loss": 0.0057, "step": 19493 }, { "epoch": 3.773219814241486, "grad_norm": 0.037579648196697235, "learning_rate": 7.218145515846843e-05, "loss": 0.0069, "step": 19494 }, { "epoch": 3.7734133126934983, "grad_norm": 0.02830170840024948, "learning_rate": 7.217891919253399e-05, "loss": 0.0054, "step": 19495 }, { "epoch": 3.7736068111455108, "grad_norm": 0.03466294705867767, "learning_rate": 7.217638316273255e-05, "loss": 0.0055, "step": 19496 }, { "epoch": 3.773800309597523, "grad_norm": 0.022220879793167114, "learning_rate": 7.217384706907356e-05, "loss": 0.0057, "step": 19497 }, { "epoch": 3.7739938080495357, "grad_norm": 0.03798571601510048, "learning_rate": 7.217131091156643e-05, "loss": 0.0064, "step": 19498 }, { "epoch": 3.774187306501548, "grad_norm": 0.022005446255207062, "learning_rate": 7.21687746902206e-05, "loss": 0.0049, "step": 19499 }, { "epoch": 3.7743808049535605, "grad_norm": 0.03214815631508827, "learning_rate": 7.216623840504549e-05, "loss": 0.0051, "step": 19500 }, { "epoch": 3.774574303405573, "grad_norm": 0.019856812432408333, "learning_rate": 7.216370205605054e-05, "loss": 0.0056, "step": 19501 }, { "epoch": 3.774767801857585, "grad_norm": 0.03857367858290672, "learning_rate": 7.216116564324519e-05, "loss": 0.0045, "step": 19502 }, { "epoch": 3.7749613003095974, "grad_norm": 0.026511300355196, "learning_rate": 7.215862916663884e-05, "loss": 0.0068, "step": 19503 }, { "epoch": 3.77515479876161, "grad_norm": 0.03052249550819397, "learning_rate": 7.215609262624092e-05, "loss": 0.0061, "step": 19504 }, { "epoch": 3.7753482972136223, "grad_norm": 0.06098884344100952, "learning_rate": 7.21535560220609e-05, "loss": 0.006, "step": 19505 }, { "epoch": 3.7755417956656347, "grad_norm": 0.040949415415525436, "learning_rate": 7.215101935410817e-05, "loss": 0.0065, "step": 19506 }, { "epoch": 3.775735294117647, "grad_norm": 0.06050427630543709, "learning_rate": 7.214848262239219e-05, "loss": 0.0064, "step": 19507 }, { "epoch": 3.775928792569659, "grad_norm": 0.05092087760567665, "learning_rate": 7.214594582692238e-05, "loss": 0.0063, "step": 19508 }, { "epoch": 3.7761222910216716, "grad_norm": 0.05310077965259552, "learning_rate": 7.214340896770816e-05, "loss": 0.0048, "step": 19509 }, { "epoch": 3.776315789473684, "grad_norm": 0.08591093868017197, "learning_rate": 7.214087204475896e-05, "loss": 0.0055, "step": 19510 }, { "epoch": 3.7765092879256965, "grad_norm": 0.03516080603003502, "learning_rate": 7.213833505808423e-05, "loss": 0.0065, "step": 19511 }, { "epoch": 3.776702786377709, "grad_norm": 0.07923436164855957, "learning_rate": 7.213579800769339e-05, "loss": 0.0063, "step": 19512 }, { "epoch": 3.7768962848297214, "grad_norm": 0.05724848061800003, "learning_rate": 7.213326089359588e-05, "loss": 0.0052, "step": 19513 }, { "epoch": 3.777089783281734, "grad_norm": 0.06506527215242386, "learning_rate": 7.213072371580114e-05, "loss": 0.0052, "step": 19514 }, { "epoch": 3.7772832817337463, "grad_norm": 0.05988534167408943, "learning_rate": 7.212818647431858e-05, "loss": 0.0066, "step": 19515 }, { "epoch": 3.7774767801857587, "grad_norm": 0.06554019451141357, "learning_rate": 7.212564916915763e-05, "loss": 0.0057, "step": 19516 }, { "epoch": 3.777670278637771, "grad_norm": 0.04761819168925285, "learning_rate": 7.212311180032774e-05, "loss": 0.0062, "step": 19517 }, { "epoch": 3.777863777089783, "grad_norm": 0.0711677297949791, "learning_rate": 7.212057436783835e-05, "loss": 0.0051, "step": 19518 }, { "epoch": 3.7780572755417956, "grad_norm": 0.042397089302539825, "learning_rate": 7.211803687169888e-05, "loss": 0.0057, "step": 19519 }, { "epoch": 3.778250773993808, "grad_norm": 0.0662657618522644, "learning_rate": 7.211549931191877e-05, "loss": 0.0059, "step": 19520 }, { "epoch": 3.7784442724458205, "grad_norm": 0.040656160563230515, "learning_rate": 7.211296168850744e-05, "loss": 0.0052, "step": 19521 }, { "epoch": 3.778637770897833, "grad_norm": 0.04245493561029434, "learning_rate": 7.211042400147435e-05, "loss": 0.0051, "step": 19522 }, { "epoch": 3.7788312693498454, "grad_norm": 0.040025945752859116, "learning_rate": 7.21078862508289e-05, "loss": 0.0059, "step": 19523 }, { "epoch": 3.7790247678018574, "grad_norm": 0.08796769380569458, "learning_rate": 7.210534843658056e-05, "loss": 0.0057, "step": 19524 }, { "epoch": 3.77921826625387, "grad_norm": 0.022106027230620384, "learning_rate": 7.210281055873875e-05, "loss": 0.0059, "step": 19525 }, { "epoch": 3.7794117647058822, "grad_norm": 0.10240139067173004, "learning_rate": 7.21002726173129e-05, "loss": 0.0062, "step": 19526 }, { "epoch": 3.7796052631578947, "grad_norm": 0.031645141541957855, "learning_rate": 7.209773461231245e-05, "loss": 0.006, "step": 19527 }, { "epoch": 3.779798761609907, "grad_norm": 0.08787935972213745, "learning_rate": 7.209519654374683e-05, "loss": 0.0069, "step": 19528 }, { "epoch": 3.7799922600619196, "grad_norm": 0.05552225559949875, "learning_rate": 7.209265841162549e-05, "loss": 0.0057, "step": 19529 }, { "epoch": 3.780185758513932, "grad_norm": 0.07068862020969391, "learning_rate": 7.209012021595784e-05, "loss": 0.0063, "step": 19530 }, { "epoch": 3.7803792569659445, "grad_norm": 0.05857550725340843, "learning_rate": 7.208758195675335e-05, "loss": 0.0062, "step": 19531 }, { "epoch": 3.780572755417957, "grad_norm": 0.0775938406586647, "learning_rate": 7.208504363402146e-05, "loss": 0.0046, "step": 19532 }, { "epoch": 3.780766253869969, "grad_norm": 0.05888265743851662, "learning_rate": 7.208250524777156e-05, "loss": 0.0049, "step": 19533 }, { "epoch": 3.7809597523219813, "grad_norm": 0.08194084465503693, "learning_rate": 7.207996679801313e-05, "loss": 0.0068, "step": 19534 }, { "epoch": 3.781153250773994, "grad_norm": 0.0633184090256691, "learning_rate": 7.207742828475558e-05, "loss": 0.0076, "step": 19535 }, { "epoch": 3.781346749226006, "grad_norm": 0.07345958799123764, "learning_rate": 7.207488970800836e-05, "loss": 0.0064, "step": 19536 }, { "epoch": 3.7815402476780187, "grad_norm": 0.044276490807533264, "learning_rate": 7.207235106778091e-05, "loss": 0.0061, "step": 19537 }, { "epoch": 3.781733746130031, "grad_norm": 0.03599421679973602, "learning_rate": 7.206981236408268e-05, "loss": 0.0058, "step": 19538 }, { "epoch": 3.781927244582043, "grad_norm": 0.06438955664634705, "learning_rate": 7.206727359692308e-05, "loss": 0.0072, "step": 19539 }, { "epoch": 3.7821207430340555, "grad_norm": 0.02896890416741371, "learning_rate": 7.206473476631156e-05, "loss": 0.0074, "step": 19540 }, { "epoch": 3.782314241486068, "grad_norm": 0.04376707598567009, "learning_rate": 7.206219587225757e-05, "loss": 0.0061, "step": 19541 }, { "epoch": 3.7825077399380804, "grad_norm": 0.05610676109790802, "learning_rate": 7.205965691477053e-05, "loss": 0.0056, "step": 19542 }, { "epoch": 3.782701238390093, "grad_norm": 0.03156707435846329, "learning_rate": 7.20571178938599e-05, "loss": 0.0062, "step": 19543 }, { "epoch": 3.7828947368421053, "grad_norm": 0.06487387418746948, "learning_rate": 7.205457880953509e-05, "loss": 0.0053, "step": 19544 }, { "epoch": 3.7830882352941178, "grad_norm": 0.038055263459682465, "learning_rate": 7.205203966180556e-05, "loss": 0.0066, "step": 19545 }, { "epoch": 3.78328173374613, "grad_norm": 0.07002776116132736, "learning_rate": 7.204950045068077e-05, "loss": 0.0078, "step": 19546 }, { "epoch": 3.7834752321981426, "grad_norm": 0.04002377390861511, "learning_rate": 7.204696117617013e-05, "loss": 0.0053, "step": 19547 }, { "epoch": 3.7836687306501546, "grad_norm": 0.10910338163375854, "learning_rate": 7.204442183828309e-05, "loss": 0.0058, "step": 19548 }, { "epoch": 3.783862229102167, "grad_norm": 0.04914065822958946, "learning_rate": 7.204188243702907e-05, "loss": 0.0061, "step": 19549 }, { "epoch": 3.7840557275541795, "grad_norm": 0.07643014937639236, "learning_rate": 7.203934297241755e-05, "loss": 0.0071, "step": 19550 }, { "epoch": 3.784249226006192, "grad_norm": 0.050739843398332596, "learning_rate": 7.203680344445795e-05, "loss": 0.0066, "step": 19551 }, { "epoch": 3.7844427244582044, "grad_norm": 0.0786048099398613, "learning_rate": 7.203426385315971e-05, "loss": 0.0065, "step": 19552 }, { "epoch": 3.784636222910217, "grad_norm": 0.050336357206106186, "learning_rate": 7.203172419853227e-05, "loss": 0.006, "step": 19553 }, { "epoch": 3.784829721362229, "grad_norm": 0.06481637060642242, "learning_rate": 7.202918448058508e-05, "loss": 0.0058, "step": 19554 }, { "epoch": 3.7850232198142413, "grad_norm": 0.043129149824380875, "learning_rate": 7.202664469932757e-05, "loss": 0.0063, "step": 19555 }, { "epoch": 3.7852167182662537, "grad_norm": 0.05864286795258522, "learning_rate": 7.20241048547692e-05, "loss": 0.0076, "step": 19556 }, { "epoch": 3.785410216718266, "grad_norm": 0.05389969050884247, "learning_rate": 7.202156494691941e-05, "loss": 0.0068, "step": 19557 }, { "epoch": 3.7856037151702786, "grad_norm": 0.07955154776573181, "learning_rate": 7.201902497578764e-05, "loss": 0.0055, "step": 19558 }, { "epoch": 3.785797213622291, "grad_norm": 0.054600201547145844, "learning_rate": 7.20164849413833e-05, "loss": 0.0073, "step": 19559 }, { "epoch": 3.7859907120743035, "grad_norm": 0.0649031326174736, "learning_rate": 7.201394484371587e-05, "loss": 0.007, "step": 19560 }, { "epoch": 3.786184210526316, "grad_norm": 0.07487665861845016, "learning_rate": 7.20114046827948e-05, "loss": 0.0062, "step": 19561 }, { "epoch": 3.7863777089783284, "grad_norm": 0.04061059653759003, "learning_rate": 7.200886445862951e-05, "loss": 0.0063, "step": 19562 }, { "epoch": 3.786571207430341, "grad_norm": 0.07460375875234604, "learning_rate": 7.200632417122947e-05, "loss": 0.0073, "step": 19563 }, { "epoch": 3.786764705882353, "grad_norm": 0.02725452557206154, "learning_rate": 7.200378382060409e-05, "loss": 0.0071, "step": 19564 }, { "epoch": 3.7869582043343653, "grad_norm": 0.058284942060709, "learning_rate": 7.200124340676283e-05, "loss": 0.0065, "step": 19565 }, { "epoch": 3.7871517027863777, "grad_norm": 0.04922138527035713, "learning_rate": 7.199870292971515e-05, "loss": 0.0057, "step": 19566 }, { "epoch": 3.78734520123839, "grad_norm": 0.056060127913951874, "learning_rate": 7.199616238947046e-05, "loss": 0.0065, "step": 19567 }, { "epoch": 3.7875386996904026, "grad_norm": 0.05396308749914169, "learning_rate": 7.199362178603824e-05, "loss": 0.0072, "step": 19568 }, { "epoch": 3.787732198142415, "grad_norm": 0.048075612634420395, "learning_rate": 7.199108111942792e-05, "loss": 0.0055, "step": 19569 }, { "epoch": 3.787925696594427, "grad_norm": 0.05911741778254509, "learning_rate": 7.198854038964894e-05, "loss": 0.0067, "step": 19570 }, { "epoch": 3.7881191950464395, "grad_norm": 0.04706413298845291, "learning_rate": 7.198599959671077e-05, "loss": 0.0057, "step": 19571 }, { "epoch": 3.788312693498452, "grad_norm": 0.04821465536952019, "learning_rate": 7.198345874062282e-05, "loss": 0.0056, "step": 19572 }, { "epoch": 3.7885061919504643, "grad_norm": 0.053814686834812164, "learning_rate": 7.198091782139457e-05, "loss": 0.0071, "step": 19573 }, { "epoch": 3.788699690402477, "grad_norm": 0.05015627667307854, "learning_rate": 7.197837683903543e-05, "loss": 0.0062, "step": 19574 }, { "epoch": 3.7888931888544892, "grad_norm": 0.05666385963559151, "learning_rate": 7.197583579355488e-05, "loss": 0.0076, "step": 19575 }, { "epoch": 3.7890866873065017, "grad_norm": 0.05022671818733215, "learning_rate": 7.197329468496237e-05, "loss": 0.0074, "step": 19576 }, { "epoch": 3.789280185758514, "grad_norm": 0.05577714368700981, "learning_rate": 7.197075351326732e-05, "loss": 0.0075, "step": 19577 }, { "epoch": 3.7894736842105265, "grad_norm": 0.04028419405221939, "learning_rate": 7.196821227847919e-05, "loss": 0.0059, "step": 19578 }, { "epoch": 3.7896671826625385, "grad_norm": 0.06726133078336716, "learning_rate": 7.196567098060743e-05, "loss": 0.0081, "step": 19579 }, { "epoch": 3.789860681114551, "grad_norm": 0.04333782196044922, "learning_rate": 7.19631296196615e-05, "loss": 0.0045, "step": 19580 }, { "epoch": 3.7900541795665634, "grad_norm": 0.05318093299865723, "learning_rate": 7.19605881956508e-05, "loss": 0.0051, "step": 19581 }, { "epoch": 3.790247678018576, "grad_norm": 0.053988803178071976, "learning_rate": 7.195804670858484e-05, "loss": 0.006, "step": 19582 }, { "epoch": 3.7904411764705883, "grad_norm": 0.07000748068094254, "learning_rate": 7.195550515847302e-05, "loss": 0.0052, "step": 19583 }, { "epoch": 3.7906346749226008, "grad_norm": 0.04645967856049538, "learning_rate": 7.195296354532483e-05, "loss": 0.0061, "step": 19584 }, { "epoch": 3.7908281733746128, "grad_norm": 0.0736834928393364, "learning_rate": 7.195042186914971e-05, "loss": 0.0047, "step": 19585 }, { "epoch": 3.791021671826625, "grad_norm": 0.0773056223988533, "learning_rate": 7.194788012995707e-05, "loss": 0.0072, "step": 19586 }, { "epoch": 3.7912151702786376, "grad_norm": 0.05325866863131523, "learning_rate": 7.19453383277564e-05, "loss": 0.0052, "step": 19587 }, { "epoch": 3.79140866873065, "grad_norm": 0.08551890403032303, "learning_rate": 7.194279646255713e-05, "loss": 0.0053, "step": 19588 }, { "epoch": 3.7916021671826625, "grad_norm": 0.03522248566150665, "learning_rate": 7.194025453436873e-05, "loss": 0.0062, "step": 19589 }, { "epoch": 3.791795665634675, "grad_norm": 0.04900158941745758, "learning_rate": 7.193771254320062e-05, "loss": 0.007, "step": 19590 }, { "epoch": 3.7919891640866874, "grad_norm": 0.08279945701360703, "learning_rate": 7.193517048906229e-05, "loss": 0.0098, "step": 19591 }, { "epoch": 3.7921826625387, "grad_norm": 0.04528689384460449, "learning_rate": 7.193262837196315e-05, "loss": 0.0057, "step": 19592 }, { "epoch": 3.7923761609907123, "grad_norm": 0.07882750034332275, "learning_rate": 7.193008619191267e-05, "loss": 0.0064, "step": 19593 }, { "epoch": 3.7925696594427247, "grad_norm": 0.07699888199567795, "learning_rate": 7.192754394892031e-05, "loss": 0.0046, "step": 19594 }, { "epoch": 3.7927631578947367, "grad_norm": 0.056643322110176086, "learning_rate": 7.192500164299551e-05, "loss": 0.007, "step": 19595 }, { "epoch": 3.792956656346749, "grad_norm": 0.10310667008161545, "learning_rate": 7.192245927414772e-05, "loss": 0.0064, "step": 19596 }, { "epoch": 3.7931501547987616, "grad_norm": 0.05289892852306366, "learning_rate": 7.191991684238641e-05, "loss": 0.0054, "step": 19597 }, { "epoch": 3.793343653250774, "grad_norm": 0.09956352412700653, "learning_rate": 7.191737434772102e-05, "loss": 0.0048, "step": 19598 }, { "epoch": 3.7935371517027865, "grad_norm": 0.10310190916061401, "learning_rate": 7.191483179016099e-05, "loss": 0.0061, "step": 19599 }, { "epoch": 3.7937306501547985, "grad_norm": 0.058623261749744415, "learning_rate": 7.191228916971576e-05, "loss": 0.0064, "step": 19600 }, { "epoch": 3.793924148606811, "grad_norm": 0.09044846892356873, "learning_rate": 7.190974648639485e-05, "loss": 0.0058, "step": 19601 }, { "epoch": 3.7941176470588234, "grad_norm": 0.026719918474555016, "learning_rate": 7.190720374020764e-05, "loss": 0.0059, "step": 19602 }, { "epoch": 3.794311145510836, "grad_norm": 0.06465326249599457, "learning_rate": 7.190466093116363e-05, "loss": 0.0068, "step": 19603 }, { "epoch": 3.7945046439628483, "grad_norm": 0.03928523510694504, "learning_rate": 7.190211805927226e-05, "loss": 0.0064, "step": 19604 }, { "epoch": 3.7946981424148607, "grad_norm": 0.06699343025684357, "learning_rate": 7.189957512454294e-05, "loss": 0.0067, "step": 19605 }, { "epoch": 3.794891640866873, "grad_norm": 0.02438165247440338, "learning_rate": 7.18970321269852e-05, "loss": 0.006, "step": 19606 }, { "epoch": 3.7950851393188856, "grad_norm": 0.05880911648273468, "learning_rate": 7.189448906660845e-05, "loss": 0.0057, "step": 19607 }, { "epoch": 3.795278637770898, "grad_norm": 0.030496535822749138, "learning_rate": 7.189194594342215e-05, "loss": 0.0072, "step": 19608 }, { "epoch": 3.7954721362229105, "grad_norm": 0.07365729659795761, "learning_rate": 7.188940275743578e-05, "loss": 0.0059, "step": 19609 }, { "epoch": 3.7956656346749225, "grad_norm": 0.026455596089363098, "learning_rate": 7.188685950865874e-05, "loss": 0.0062, "step": 19610 }, { "epoch": 3.795859133126935, "grad_norm": 0.07143578678369522, "learning_rate": 7.188431619710053e-05, "loss": 0.0052, "step": 19611 }, { "epoch": 3.7960526315789473, "grad_norm": 0.04802331328392029, "learning_rate": 7.18817728227706e-05, "loss": 0.0067, "step": 19612 }, { "epoch": 3.79624613003096, "grad_norm": 0.0938355103135109, "learning_rate": 7.187922938567839e-05, "loss": 0.0053, "step": 19613 }, { "epoch": 3.7964396284829722, "grad_norm": 0.057826679199934006, "learning_rate": 7.187668588583336e-05, "loss": 0.0065, "step": 19614 }, { "epoch": 3.7966331269349847, "grad_norm": 0.10658900439739227, "learning_rate": 7.187414232324497e-05, "loss": 0.0064, "step": 19615 }, { "epoch": 3.7968266253869967, "grad_norm": 0.0651872530579567, "learning_rate": 7.187159869792268e-05, "loss": 0.0042, "step": 19616 }, { "epoch": 3.797020123839009, "grad_norm": 0.09644940495491028, "learning_rate": 7.186905500987597e-05, "loss": 0.0064, "step": 19617 }, { "epoch": 3.7972136222910216, "grad_norm": 0.07285168766975403, "learning_rate": 7.186651125911422e-05, "loss": 0.007, "step": 19618 }, { "epoch": 3.797407120743034, "grad_norm": 0.064029760658741, "learning_rate": 7.186396744564696e-05, "loss": 0.0062, "step": 19619 }, { "epoch": 3.7976006191950464, "grad_norm": 0.07520415633916855, "learning_rate": 7.186142356948364e-05, "loss": 0.0057, "step": 19620 }, { "epoch": 3.797794117647059, "grad_norm": 0.05924978107213974, "learning_rate": 7.185887963063367e-05, "loss": 0.0059, "step": 19621 }, { "epoch": 3.7979876160990713, "grad_norm": 0.06599746644496918, "learning_rate": 7.185633562910658e-05, "loss": 0.0068, "step": 19622 }, { "epoch": 3.7981811145510838, "grad_norm": 0.062359314411878586, "learning_rate": 7.185379156491175e-05, "loss": 0.0073, "step": 19623 }, { "epoch": 3.798374613003096, "grad_norm": 0.07229993492364883, "learning_rate": 7.185124743805872e-05, "loss": 0.009, "step": 19624 }, { "epoch": 3.798568111455108, "grad_norm": 0.08193463832139969, "learning_rate": 7.184870324855686e-05, "loss": 0.0057, "step": 19625 }, { "epoch": 3.7987616099071206, "grad_norm": 0.0680396780371666, "learning_rate": 7.184615899641569e-05, "loss": 0.0072, "step": 19626 }, { "epoch": 3.798955108359133, "grad_norm": 0.10063681751489639, "learning_rate": 7.184361468164464e-05, "loss": 0.0081, "step": 19627 }, { "epoch": 3.7991486068111455, "grad_norm": 0.06622166931629181, "learning_rate": 7.18410703042532e-05, "loss": 0.0066, "step": 19628 }, { "epoch": 3.799342105263158, "grad_norm": 0.08362893015146255, "learning_rate": 7.183852586425081e-05, "loss": 0.0056, "step": 19629 }, { "epoch": 3.7995356037151704, "grad_norm": 0.0818880945444107, "learning_rate": 7.183598136164691e-05, "loss": 0.0083, "step": 19630 }, { "epoch": 3.7997291021671824, "grad_norm": 0.0699034258723259, "learning_rate": 7.1833436796451e-05, "loss": 0.006, "step": 19631 }, { "epoch": 3.799922600619195, "grad_norm": 0.14160919189453125, "learning_rate": 7.18308921686725e-05, "loss": 0.006, "step": 19632 }, { "epoch": 3.8001160990712073, "grad_norm": 0.10293842107057571, "learning_rate": 7.182834747832091e-05, "loss": 0.0064, "step": 19633 }, { "epoch": 3.8003095975232197, "grad_norm": 0.10566557198762894, "learning_rate": 7.182580272540566e-05, "loss": 0.0065, "step": 19634 }, { "epoch": 3.800503095975232, "grad_norm": 0.1127944067120552, "learning_rate": 7.182325790993623e-05, "loss": 0.0064, "step": 19635 }, { "epoch": 3.8006965944272446, "grad_norm": 0.07337276637554169, "learning_rate": 7.182071303192207e-05, "loss": 0.0066, "step": 19636 }, { "epoch": 3.800890092879257, "grad_norm": 0.10818591713905334, "learning_rate": 7.181816809137263e-05, "loss": 0.0067, "step": 19637 }, { "epoch": 3.8010835913312695, "grad_norm": 0.10674915462732315, "learning_rate": 7.18156230882974e-05, "loss": 0.0077, "step": 19638 }, { "epoch": 3.801277089783282, "grad_norm": 0.07108680158853531, "learning_rate": 7.181307802270582e-05, "loss": 0.0063, "step": 19639 }, { "epoch": 3.8014705882352944, "grad_norm": 0.13694265484809875, "learning_rate": 7.181053289460737e-05, "loss": 0.007, "step": 19640 }, { "epoch": 3.8016640866873064, "grad_norm": 0.03713472560048103, "learning_rate": 7.180798770401149e-05, "loss": 0.0057, "step": 19641 }, { "epoch": 3.801857585139319, "grad_norm": 0.12864145636558533, "learning_rate": 7.180544245092766e-05, "loss": 0.0068, "step": 19642 }, { "epoch": 3.8020510835913313, "grad_norm": 0.0509890541434288, "learning_rate": 7.180289713536532e-05, "loss": 0.0056, "step": 19643 }, { "epoch": 3.8022445820433437, "grad_norm": 0.10833793133497238, "learning_rate": 7.180035175733395e-05, "loss": 0.0078, "step": 19644 }, { "epoch": 3.802438080495356, "grad_norm": 0.0767962783575058, "learning_rate": 7.179780631684304e-05, "loss": 0.0057, "step": 19645 }, { "epoch": 3.8026315789473686, "grad_norm": 0.07490038126707077, "learning_rate": 7.1795260813902e-05, "loss": 0.0066, "step": 19646 }, { "epoch": 3.8028250773993806, "grad_norm": 0.060767143964767456, "learning_rate": 7.179271524852033e-05, "loss": 0.0059, "step": 19647 }, { "epoch": 3.803018575851393, "grad_norm": 0.05993494763970375, "learning_rate": 7.179016962070747e-05, "loss": 0.0067, "step": 19648 }, { "epoch": 3.8032120743034055, "grad_norm": 0.05029423162341118, "learning_rate": 7.178762393047292e-05, "loss": 0.0052, "step": 19649 }, { "epoch": 3.803405572755418, "grad_norm": 0.036210257560014725, "learning_rate": 7.178507817782609e-05, "loss": 0.0061, "step": 19650 }, { "epoch": 3.8035990712074303, "grad_norm": 0.042797721922397614, "learning_rate": 7.17825323627765e-05, "loss": 0.007, "step": 19651 }, { "epoch": 3.803792569659443, "grad_norm": 0.059751033782958984, "learning_rate": 7.177998648533357e-05, "loss": 0.0055, "step": 19652 }, { "epoch": 3.8039860681114552, "grad_norm": 0.056302983313798904, "learning_rate": 7.177744054550679e-05, "loss": 0.0074, "step": 19653 }, { "epoch": 3.8041795665634677, "grad_norm": 0.05938008427619934, "learning_rate": 7.177489454330562e-05, "loss": 0.0049, "step": 19654 }, { "epoch": 3.80437306501548, "grad_norm": 0.03940051794052124, "learning_rate": 7.177234847873955e-05, "loss": 0.0058, "step": 19655 }, { "epoch": 3.804566563467492, "grad_norm": 0.06349337846040726, "learning_rate": 7.176980235181801e-05, "loss": 0.0059, "step": 19656 }, { "epoch": 3.8047600619195046, "grad_norm": 0.058260198682546616, "learning_rate": 7.176725616255046e-05, "loss": 0.0064, "step": 19657 }, { "epoch": 3.804953560371517, "grad_norm": 0.047460444271564484, "learning_rate": 7.176470991094639e-05, "loss": 0.0063, "step": 19658 }, { "epoch": 3.8051470588235294, "grad_norm": 0.07603993266820908, "learning_rate": 7.176216359701527e-05, "loss": 0.0059, "step": 19659 }, { "epoch": 3.805340557275542, "grad_norm": 0.05972722917795181, "learning_rate": 7.175961722076655e-05, "loss": 0.006, "step": 19660 }, { "epoch": 3.8055340557275543, "grad_norm": 0.04480352625250816, "learning_rate": 7.17570707822097e-05, "loss": 0.0058, "step": 19661 }, { "epoch": 3.8057275541795663, "grad_norm": 0.09241895377635956, "learning_rate": 7.175452428135419e-05, "loss": 0.0087, "step": 19662 }, { "epoch": 3.8059210526315788, "grad_norm": 0.024385934695601463, "learning_rate": 7.175197771820948e-05, "loss": 0.0068, "step": 19663 }, { "epoch": 3.806114551083591, "grad_norm": 0.08267784118652344, "learning_rate": 7.174943109278505e-05, "loss": 0.0066, "step": 19664 }, { "epoch": 3.8063080495356036, "grad_norm": 0.04492044821381569, "learning_rate": 7.174688440509038e-05, "loss": 0.0061, "step": 19665 }, { "epoch": 3.806501547987616, "grad_norm": 0.06905090063810349, "learning_rate": 7.17443376551349e-05, "loss": 0.0052, "step": 19666 }, { "epoch": 3.8066950464396285, "grad_norm": 0.05731217935681343, "learning_rate": 7.174179084292811e-05, "loss": 0.0068, "step": 19667 }, { "epoch": 3.806888544891641, "grad_norm": 0.07956044375896454, "learning_rate": 7.173924396847946e-05, "loss": 0.0066, "step": 19668 }, { "epoch": 3.8070820433436534, "grad_norm": 0.045223888009786606, "learning_rate": 7.173669703179842e-05, "loss": 0.0065, "step": 19669 }, { "epoch": 3.807275541795666, "grad_norm": 0.07383216917514801, "learning_rate": 7.173415003289446e-05, "loss": 0.0068, "step": 19670 }, { "epoch": 3.8074690402476783, "grad_norm": 0.051306258887052536, "learning_rate": 7.173160297177708e-05, "loss": 0.0071, "step": 19671 }, { "epoch": 3.8076625386996903, "grad_norm": 0.061913326382637024, "learning_rate": 7.17290558484557e-05, "loss": 0.007, "step": 19672 }, { "epoch": 3.8078560371517027, "grad_norm": 0.07474419474601746, "learning_rate": 7.172650866293982e-05, "loss": 0.0061, "step": 19673 }, { "epoch": 3.808049535603715, "grad_norm": 0.0644516795873642, "learning_rate": 7.17239614152389e-05, "loss": 0.0058, "step": 19674 }, { "epoch": 3.8082430340557276, "grad_norm": 0.07186706364154816, "learning_rate": 7.172141410536243e-05, "loss": 0.0071, "step": 19675 }, { "epoch": 3.80843653250774, "grad_norm": 0.04283706471323967, "learning_rate": 7.171886673331983e-05, "loss": 0.0073, "step": 19676 }, { "epoch": 3.808630030959752, "grad_norm": 0.06712592393159866, "learning_rate": 7.171631929912062e-05, "loss": 0.0068, "step": 19677 }, { "epoch": 3.8088235294117645, "grad_norm": 0.04406776279211044, "learning_rate": 7.171377180277426e-05, "loss": 0.0068, "step": 19678 }, { "epoch": 3.809017027863777, "grad_norm": 0.06291036307811737, "learning_rate": 7.171122424429022e-05, "loss": 0.0065, "step": 19679 }, { "epoch": 3.8092105263157894, "grad_norm": 0.05370014160871506, "learning_rate": 7.170867662367796e-05, "loss": 0.0067, "step": 19680 }, { "epoch": 3.809404024767802, "grad_norm": 0.0682973638176918, "learning_rate": 7.170612894094694e-05, "loss": 0.0069, "step": 19681 }, { "epoch": 3.8095975232198143, "grad_norm": 0.05390362814068794, "learning_rate": 7.170358119610666e-05, "loss": 0.0056, "step": 19682 }, { "epoch": 3.8097910216718267, "grad_norm": 0.10452049225568771, "learning_rate": 7.170103338916658e-05, "loss": 0.005, "step": 19683 }, { "epoch": 3.809984520123839, "grad_norm": 0.0579850971698761, "learning_rate": 7.169848552013618e-05, "loss": 0.0057, "step": 19684 }, { "epoch": 3.8101780185758516, "grad_norm": 0.09251733869314194, "learning_rate": 7.169593758902492e-05, "loss": 0.0051, "step": 19685 }, { "epoch": 3.810371517027864, "grad_norm": 0.02172037772834301, "learning_rate": 7.169338959584229e-05, "loss": 0.0068, "step": 19686 }, { "epoch": 3.810565015479876, "grad_norm": 0.08384395390748978, "learning_rate": 7.169084154059775e-05, "loss": 0.0056, "step": 19687 }, { "epoch": 3.8107585139318885, "grad_norm": 0.03879883140325546, "learning_rate": 7.168829342330075e-05, "loss": 0.0078, "step": 19688 }, { "epoch": 3.810952012383901, "grad_norm": 0.040296196937561035, "learning_rate": 7.168574524396078e-05, "loss": 0.0054, "step": 19689 }, { "epoch": 3.8111455108359134, "grad_norm": 0.047231003642082214, "learning_rate": 7.168319700258735e-05, "loss": 0.0066, "step": 19690 }, { "epoch": 3.811339009287926, "grad_norm": 0.02939116396009922, "learning_rate": 7.16806486991899e-05, "loss": 0.0064, "step": 19691 }, { "epoch": 3.8115325077399382, "grad_norm": 0.03450005128979683, "learning_rate": 7.167810033377791e-05, "loss": 0.0072, "step": 19692 }, { "epoch": 3.8117260061919502, "grad_norm": 0.024926407262682915, "learning_rate": 7.167555190636084e-05, "loss": 0.0064, "step": 19693 }, { "epoch": 3.8119195046439627, "grad_norm": 0.039862677454948425, "learning_rate": 7.167300341694819e-05, "loss": 0.0059, "step": 19694 }, { "epoch": 3.812113003095975, "grad_norm": 0.015628065913915634, "learning_rate": 7.167045486554941e-05, "loss": 0.0057, "step": 19695 }, { "epoch": 3.8123065015479876, "grad_norm": 0.03605267405509949, "learning_rate": 7.166790625217399e-05, "loss": 0.0072, "step": 19696 }, { "epoch": 3.8125, "grad_norm": 0.03249900043010712, "learning_rate": 7.166535757683141e-05, "loss": 0.0061, "step": 19697 }, { "epoch": 3.8126934984520124, "grad_norm": 0.020829439163208008, "learning_rate": 7.166280883953112e-05, "loss": 0.0063, "step": 19698 }, { "epoch": 3.812886996904025, "grad_norm": 0.0477752760052681, "learning_rate": 7.166026004028263e-05, "loss": 0.0057, "step": 19699 }, { "epoch": 3.8130804953560373, "grad_norm": 0.02689444087445736, "learning_rate": 7.165771117909539e-05, "loss": 0.0061, "step": 19700 }, { "epoch": 3.8132739938080498, "grad_norm": 0.045172907412052155, "learning_rate": 7.165516225597888e-05, "loss": 0.0076, "step": 19701 }, { "epoch": 3.8134674922600618, "grad_norm": 0.045533135533332825, "learning_rate": 7.16526132709426e-05, "loss": 0.0062, "step": 19702 }, { "epoch": 3.813660990712074, "grad_norm": 0.03829839080572128, "learning_rate": 7.165006422399599e-05, "loss": 0.0066, "step": 19703 }, { "epoch": 3.8138544891640866, "grad_norm": 0.05329594761133194, "learning_rate": 7.164751511514856e-05, "loss": 0.0057, "step": 19704 }, { "epoch": 3.814047987616099, "grad_norm": 0.021635854616761208, "learning_rate": 7.164496594440975e-05, "loss": 0.0066, "step": 19705 }, { "epoch": 3.8142414860681115, "grad_norm": 0.03469665348529816, "learning_rate": 7.164241671178907e-05, "loss": 0.008, "step": 19706 }, { "epoch": 3.814434984520124, "grad_norm": 0.055576641112565994, "learning_rate": 7.163986741729597e-05, "loss": 0.0062, "step": 19707 }, { "epoch": 3.814628482972136, "grad_norm": 0.03929128870368004, "learning_rate": 7.163731806093996e-05, "loss": 0.0065, "step": 19708 }, { "epoch": 3.8148219814241484, "grad_norm": 0.04836459830403328, "learning_rate": 7.16347686427305e-05, "loss": 0.0058, "step": 19709 }, { "epoch": 3.815015479876161, "grad_norm": 0.04936638101935387, "learning_rate": 7.163221916267706e-05, "loss": 0.0066, "step": 19710 }, { "epoch": 3.8152089783281733, "grad_norm": 0.07031437009572983, "learning_rate": 7.162966962078914e-05, "loss": 0.0056, "step": 19711 }, { "epoch": 3.8154024767801857, "grad_norm": 0.06305033713579178, "learning_rate": 7.16271200170762e-05, "loss": 0.0049, "step": 19712 }, { "epoch": 3.815595975232198, "grad_norm": 0.06046698987483978, "learning_rate": 7.16245703515477e-05, "loss": 0.006, "step": 19713 }, { "epoch": 3.8157894736842106, "grad_norm": 0.06844952702522278, "learning_rate": 7.162202062421318e-05, "loss": 0.006, "step": 19714 }, { "epoch": 3.815982972136223, "grad_norm": 0.044310927391052246, "learning_rate": 7.161947083508207e-05, "loss": 0.0068, "step": 19715 }, { "epoch": 3.8161764705882355, "grad_norm": 0.07450415194034576, "learning_rate": 7.161692098416386e-05, "loss": 0.0059, "step": 19716 }, { "epoch": 3.816369969040248, "grad_norm": 0.048524171113967896, "learning_rate": 7.161437107146805e-05, "loss": 0.0063, "step": 19717 }, { "epoch": 3.81656346749226, "grad_norm": 0.0411563478410244, "learning_rate": 7.161182109700409e-05, "loss": 0.0063, "step": 19718 }, { "epoch": 3.8167569659442724, "grad_norm": 0.08878692239522934, "learning_rate": 7.160927106078146e-05, "loss": 0.0062, "step": 19719 }, { "epoch": 3.816950464396285, "grad_norm": 0.03923087939620018, "learning_rate": 7.160672096280965e-05, "loss": 0.0056, "step": 19720 }, { "epoch": 3.8171439628482973, "grad_norm": 0.05925365537405014, "learning_rate": 7.160417080309816e-05, "loss": 0.0057, "step": 19721 }, { "epoch": 3.8173374613003097, "grad_norm": 0.03498798981308937, "learning_rate": 7.160162058165645e-05, "loss": 0.0078, "step": 19722 }, { "epoch": 3.8175309597523217, "grad_norm": 0.07646899670362473, "learning_rate": 7.159907029849401e-05, "loss": 0.0064, "step": 19723 }, { "epoch": 3.817724458204334, "grad_norm": 0.03957643732428551, "learning_rate": 7.159651995362032e-05, "loss": 0.0063, "step": 19724 }, { "epoch": 3.8179179566563466, "grad_norm": 0.08827918022871017, "learning_rate": 7.159396954704485e-05, "loss": 0.0067, "step": 19725 }, { "epoch": 3.818111455108359, "grad_norm": 0.07426470518112183, "learning_rate": 7.159141907877708e-05, "loss": 0.0068, "step": 19726 }, { "epoch": 3.8183049535603715, "grad_norm": 0.08524215966463089, "learning_rate": 7.158886854882652e-05, "loss": 0.0054, "step": 19727 }, { "epoch": 3.818498452012384, "grad_norm": 0.0886237621307373, "learning_rate": 7.158631795720263e-05, "loss": 0.0071, "step": 19728 }, { "epoch": 3.8186919504643964, "grad_norm": 0.06602012366056442, "learning_rate": 7.15837673039149e-05, "loss": 0.0065, "step": 19729 }, { "epoch": 3.818885448916409, "grad_norm": 0.10343831032514572, "learning_rate": 7.15812165889728e-05, "loss": 0.0063, "step": 19730 }, { "epoch": 3.8190789473684212, "grad_norm": 0.04793443903326988, "learning_rate": 7.157866581238583e-05, "loss": 0.0059, "step": 19731 }, { "epoch": 3.8192724458204337, "grad_norm": 0.10362732410430908, "learning_rate": 7.157611497416346e-05, "loss": 0.0063, "step": 19732 }, { "epoch": 3.8194659442724457, "grad_norm": 0.0856947973370552, "learning_rate": 7.157356407431517e-05, "loss": 0.0067, "step": 19733 }, { "epoch": 3.819659442724458, "grad_norm": 0.07940678298473358, "learning_rate": 7.157101311285046e-05, "loss": 0.0061, "step": 19734 }, { "epoch": 3.8198529411764706, "grad_norm": 0.11675330251455307, "learning_rate": 7.156846208977881e-05, "loss": 0.0063, "step": 19735 }, { "epoch": 3.820046439628483, "grad_norm": 0.03936824947595596, "learning_rate": 7.156591100510971e-05, "loss": 0.0062, "step": 19736 }, { "epoch": 3.8202399380804954, "grad_norm": 0.07929088920354843, "learning_rate": 7.156335985885262e-05, "loss": 0.0058, "step": 19737 }, { "epoch": 3.820433436532508, "grad_norm": 0.07512299716472626, "learning_rate": 7.156080865101704e-05, "loss": 0.0062, "step": 19738 }, { "epoch": 3.82062693498452, "grad_norm": 0.05780826136469841, "learning_rate": 7.155825738161246e-05, "loss": 0.007, "step": 19739 }, { "epoch": 3.8208204334365323, "grad_norm": 0.07566998898983002, "learning_rate": 7.155570605064836e-05, "loss": 0.0052, "step": 19740 }, { "epoch": 3.8210139318885448, "grad_norm": 0.04819396883249283, "learning_rate": 7.155315465813422e-05, "loss": 0.0062, "step": 19741 }, { "epoch": 3.821207430340557, "grad_norm": 0.07565385848283768, "learning_rate": 7.155060320407953e-05, "loss": 0.0065, "step": 19742 }, { "epoch": 3.8214009287925697, "grad_norm": 0.044196005910634995, "learning_rate": 7.154805168849379e-05, "loss": 0.006, "step": 19743 }, { "epoch": 3.821594427244582, "grad_norm": 0.06702234596014023, "learning_rate": 7.154550011138646e-05, "loss": 0.0063, "step": 19744 }, { "epoch": 3.8217879256965945, "grad_norm": 0.07496870309114456, "learning_rate": 7.154294847276703e-05, "loss": 0.0051, "step": 19745 }, { "epoch": 3.821981424148607, "grad_norm": 0.08085216581821442, "learning_rate": 7.1540396772645e-05, "loss": 0.0059, "step": 19746 }, { "epoch": 3.8221749226006194, "grad_norm": 0.06736746430397034, "learning_rate": 7.153784501102983e-05, "loss": 0.006, "step": 19747 }, { "epoch": 3.8223684210526314, "grad_norm": 0.05625949800014496, "learning_rate": 7.153529318793106e-05, "loss": 0.0054, "step": 19748 }, { "epoch": 3.822561919504644, "grad_norm": 0.046819109469652176, "learning_rate": 7.153274130335813e-05, "loss": 0.0065, "step": 19749 }, { "epoch": 3.8227554179566563, "grad_norm": 0.07310216128826141, "learning_rate": 7.153018935732056e-05, "loss": 0.0068, "step": 19750 }, { "epoch": 3.8229489164086687, "grad_norm": 0.03219091519713402, "learning_rate": 7.152763734982779e-05, "loss": 0.0063, "step": 19751 }, { "epoch": 3.823142414860681, "grad_norm": 0.07824769616127014, "learning_rate": 7.152508528088934e-05, "loss": 0.006, "step": 19752 }, { "epoch": 3.8233359133126936, "grad_norm": 0.03573285788297653, "learning_rate": 7.152253315051471e-05, "loss": 0.0052, "step": 19753 }, { "epoch": 3.8235294117647056, "grad_norm": 0.05248483270406723, "learning_rate": 7.151998095871338e-05, "loss": 0.005, "step": 19754 }, { "epoch": 3.823722910216718, "grad_norm": 0.03974050283432007, "learning_rate": 7.151742870549481e-05, "loss": 0.0075, "step": 19755 }, { "epoch": 3.8239164086687305, "grad_norm": 0.03962499648332596, "learning_rate": 7.151487639086852e-05, "loss": 0.0057, "step": 19756 }, { "epoch": 3.824109907120743, "grad_norm": 0.04433732479810715, "learning_rate": 7.1512324014844e-05, "loss": 0.0057, "step": 19757 }, { "epoch": 3.8243034055727554, "grad_norm": 0.03443646803498268, "learning_rate": 7.15097715774307e-05, "loss": 0.0067, "step": 19758 }, { "epoch": 3.824496904024768, "grad_norm": 0.033230893313884735, "learning_rate": 7.150721907863817e-05, "loss": 0.0066, "step": 19759 }, { "epoch": 3.8246904024767803, "grad_norm": 0.0684761330485344, "learning_rate": 7.150466651847584e-05, "loss": 0.0074, "step": 19760 }, { "epoch": 3.8248839009287927, "grad_norm": 0.05425887554883957, "learning_rate": 7.150211389695324e-05, "loss": 0.0066, "step": 19761 }, { "epoch": 3.825077399380805, "grad_norm": 0.07192645967006683, "learning_rate": 7.149956121407983e-05, "loss": 0.0059, "step": 19762 }, { "epoch": 3.8252708978328176, "grad_norm": 0.05261404439806938, "learning_rate": 7.149700846986515e-05, "loss": 0.0072, "step": 19763 }, { "epoch": 3.8254643962848296, "grad_norm": 0.08077412098646164, "learning_rate": 7.149445566431863e-05, "loss": 0.0051, "step": 19764 }, { "epoch": 3.825657894736842, "grad_norm": 0.05689533054828644, "learning_rate": 7.149190279744978e-05, "loss": 0.0065, "step": 19765 }, { "epoch": 3.8258513931888545, "grad_norm": 0.0822548195719719, "learning_rate": 7.148934986926813e-05, "loss": 0.0059, "step": 19766 }, { "epoch": 3.826044891640867, "grad_norm": 0.065532386302948, "learning_rate": 7.148679687978312e-05, "loss": 0.0067, "step": 19767 }, { "epoch": 3.8262383900928794, "grad_norm": 0.060055386275053024, "learning_rate": 7.148424382900428e-05, "loss": 0.005, "step": 19768 }, { "epoch": 3.826431888544892, "grad_norm": 0.06587658822536469, "learning_rate": 7.148169071694107e-05, "loss": 0.0072, "step": 19769 }, { "epoch": 3.826625386996904, "grad_norm": 0.05409035459160805, "learning_rate": 7.1479137543603e-05, "loss": 0.0072, "step": 19770 }, { "epoch": 3.8268188854489162, "grad_norm": 0.033928681164979935, "learning_rate": 7.147658430899953e-05, "loss": 0.0056, "step": 19771 }, { "epoch": 3.8270123839009287, "grad_norm": 0.041572630405426025, "learning_rate": 7.14740310131402e-05, "loss": 0.0051, "step": 19772 }, { "epoch": 3.827205882352941, "grad_norm": 0.043392881751060486, "learning_rate": 7.14714776560345e-05, "loss": 0.0075, "step": 19773 }, { "epoch": 3.8273993808049536, "grad_norm": 0.06394822150468826, "learning_rate": 7.146892423769189e-05, "loss": 0.0061, "step": 19774 }, { "epoch": 3.827592879256966, "grad_norm": 0.05027463659644127, "learning_rate": 7.146637075812187e-05, "loss": 0.0071, "step": 19775 }, { "epoch": 3.8277863777089784, "grad_norm": 0.07595068961381912, "learning_rate": 7.146381721733395e-05, "loss": 0.0063, "step": 19776 }, { "epoch": 3.827979876160991, "grad_norm": 0.049689069390296936, "learning_rate": 7.14612636153376e-05, "loss": 0.0075, "step": 19777 }, { "epoch": 3.8281733746130033, "grad_norm": 0.058541249483823776, "learning_rate": 7.145870995214236e-05, "loss": 0.0066, "step": 19778 }, { "epoch": 3.8283668730650153, "grad_norm": 0.057143863290548325, "learning_rate": 7.145615622775766e-05, "loss": 0.0064, "step": 19779 }, { "epoch": 3.8285603715170278, "grad_norm": 0.059390839189291, "learning_rate": 7.145360244219305e-05, "loss": 0.007, "step": 19780 }, { "epoch": 3.82875386996904, "grad_norm": 0.052423980087041855, "learning_rate": 7.145104859545798e-05, "loss": 0.0056, "step": 19781 }, { "epoch": 3.8289473684210527, "grad_norm": 0.06365416944026947, "learning_rate": 7.144849468756198e-05, "loss": 0.0061, "step": 19782 }, { "epoch": 3.829140866873065, "grad_norm": 0.06231922283768654, "learning_rate": 7.144594071851451e-05, "loss": 0.0064, "step": 19783 }, { "epoch": 3.8293343653250775, "grad_norm": 0.06819993257522583, "learning_rate": 7.144338668832509e-05, "loss": 0.0049, "step": 19784 }, { "epoch": 3.8295278637770895, "grad_norm": 0.06261847913265228, "learning_rate": 7.144083259700324e-05, "loss": 0.0065, "step": 19785 }, { "epoch": 3.829721362229102, "grad_norm": 0.07672518491744995, "learning_rate": 7.143827844455839e-05, "loss": 0.0061, "step": 19786 }, { "epoch": 3.8299148606811144, "grad_norm": 0.03714047372341156, "learning_rate": 7.143572423100009e-05, "loss": 0.0053, "step": 19787 }, { "epoch": 3.830108359133127, "grad_norm": 0.057451896369457245, "learning_rate": 7.143316995633781e-05, "loss": 0.0052, "step": 19788 }, { "epoch": 3.8303018575851393, "grad_norm": 0.05305555462837219, "learning_rate": 7.143061562058105e-05, "loss": 0.0065, "step": 19789 }, { "epoch": 3.8304953560371517, "grad_norm": 0.05248872935771942, "learning_rate": 7.14280612237393e-05, "loss": 0.0079, "step": 19790 }, { "epoch": 3.830688854489164, "grad_norm": 0.061212748289108276, "learning_rate": 7.14255067658221e-05, "loss": 0.0051, "step": 19791 }, { "epoch": 3.8308823529411766, "grad_norm": 0.0538930669426918, "learning_rate": 7.14229522468389e-05, "loss": 0.0061, "step": 19792 }, { "epoch": 3.831075851393189, "grad_norm": 0.06866588443517685, "learning_rate": 7.142039766679919e-05, "loss": 0.0063, "step": 19793 }, { "epoch": 3.8312693498452015, "grad_norm": 0.06446059048175812, "learning_rate": 7.141784302571252e-05, "loss": 0.0058, "step": 19794 }, { "epoch": 3.8314628482972135, "grad_norm": 0.06276294589042664, "learning_rate": 7.141528832358833e-05, "loss": 0.0054, "step": 19795 }, { "epoch": 3.831656346749226, "grad_norm": 0.04791621118783951, "learning_rate": 7.141273356043614e-05, "loss": 0.0048, "step": 19796 }, { "epoch": 3.8318498452012384, "grad_norm": 0.059655673801898956, "learning_rate": 7.141017873626548e-05, "loss": 0.0073, "step": 19797 }, { "epoch": 3.832043343653251, "grad_norm": 0.0594605915248394, "learning_rate": 7.14076238510858e-05, "loss": 0.0055, "step": 19798 }, { "epoch": 3.8322368421052633, "grad_norm": 0.054633647203445435, "learning_rate": 7.140506890490661e-05, "loss": 0.0054, "step": 19799 }, { "epoch": 3.8324303405572753, "grad_norm": 0.0345524325966835, "learning_rate": 7.140251389773743e-05, "loss": 0.0059, "step": 19800 }, { "epoch": 3.8326238390092877, "grad_norm": 0.06397800147533417, "learning_rate": 7.139995882958775e-05, "loss": 0.0054, "step": 19801 }, { "epoch": 3.8328173374613, "grad_norm": 0.05611136183142662, "learning_rate": 7.139740370046704e-05, "loss": 0.0058, "step": 19802 }, { "epoch": 3.8330108359133126, "grad_norm": 0.06191866099834442, "learning_rate": 7.139484851038484e-05, "loss": 0.0066, "step": 19803 }, { "epoch": 3.833204334365325, "grad_norm": 0.058538321405649185, "learning_rate": 7.139229325935062e-05, "loss": 0.0072, "step": 19804 }, { "epoch": 3.8333978328173375, "grad_norm": 0.042487531900405884, "learning_rate": 7.13897379473739e-05, "loss": 0.006, "step": 19805 }, { "epoch": 3.83359133126935, "grad_norm": 0.050454381853342056, "learning_rate": 7.138718257446419e-05, "loss": 0.0059, "step": 19806 }, { "epoch": 3.8337848297213624, "grad_norm": 0.035995569080114365, "learning_rate": 7.138462714063095e-05, "loss": 0.0062, "step": 19807 }, { "epoch": 3.833978328173375, "grad_norm": 0.06768707931041718, "learning_rate": 7.13820716458837e-05, "loss": 0.0063, "step": 19808 }, { "epoch": 3.8341718266253872, "grad_norm": 0.0515814907848835, "learning_rate": 7.137951609023196e-05, "loss": 0.0061, "step": 19809 }, { "epoch": 3.8343653250773992, "grad_norm": 0.06778503954410553, "learning_rate": 7.137696047368522e-05, "loss": 0.0068, "step": 19810 }, { "epoch": 3.8345588235294117, "grad_norm": 0.05789037421345711, "learning_rate": 7.137440479625295e-05, "loss": 0.0061, "step": 19811 }, { "epoch": 3.834752321981424, "grad_norm": 0.05498635023832321, "learning_rate": 7.137184905794471e-05, "loss": 0.0072, "step": 19812 }, { "epoch": 3.8349458204334366, "grad_norm": 0.06289191544055939, "learning_rate": 7.136929325876996e-05, "loss": 0.0075, "step": 19813 }, { "epoch": 3.835139318885449, "grad_norm": 0.04151647910475731, "learning_rate": 7.136673739873819e-05, "loss": 0.0058, "step": 19814 }, { "epoch": 3.8353328173374615, "grad_norm": 0.054850850254297256, "learning_rate": 7.136418147785893e-05, "loss": 0.0055, "step": 19815 }, { "epoch": 3.8355263157894735, "grad_norm": 0.05366101115942001, "learning_rate": 7.13616254961417e-05, "loss": 0.0062, "step": 19816 }, { "epoch": 3.835719814241486, "grad_norm": 0.06156128644943237, "learning_rate": 7.135906945359596e-05, "loss": 0.0081, "step": 19817 }, { "epoch": 3.8359133126934983, "grad_norm": 0.09378110617399216, "learning_rate": 7.135651335023122e-05, "loss": 0.0064, "step": 19818 }, { "epoch": 3.8361068111455108, "grad_norm": 0.04336775839328766, "learning_rate": 7.135395718605703e-05, "loss": 0.0065, "step": 19819 }, { "epoch": 3.836300309597523, "grad_norm": 0.10425686836242676, "learning_rate": 7.135140096108283e-05, "loss": 0.0052, "step": 19820 }, { "epoch": 3.8364938080495357, "grad_norm": 0.07535725086927414, "learning_rate": 7.134884467531814e-05, "loss": 0.0075, "step": 19821 }, { "epoch": 3.836687306501548, "grad_norm": 0.08685845881700516, "learning_rate": 7.13462883287725e-05, "loss": 0.0056, "step": 19822 }, { "epoch": 3.8368808049535605, "grad_norm": 0.08975513279438019, "learning_rate": 7.134373192145538e-05, "loss": 0.006, "step": 19823 }, { "epoch": 3.837074303405573, "grad_norm": 0.07799182832241058, "learning_rate": 7.134117545337627e-05, "loss": 0.0065, "step": 19824 }, { "epoch": 3.837267801857585, "grad_norm": 0.06608089804649353, "learning_rate": 7.133861892454473e-05, "loss": 0.0065, "step": 19825 }, { "epoch": 3.8374613003095974, "grad_norm": 0.08480330556631088, "learning_rate": 7.133606233497022e-05, "loss": 0.0059, "step": 19826 }, { "epoch": 3.83765479876161, "grad_norm": 0.03088155761361122, "learning_rate": 7.133350568466224e-05, "loss": 0.0062, "step": 19827 }, { "epoch": 3.8378482972136223, "grad_norm": 0.054985519498586655, "learning_rate": 7.133094897363032e-05, "loss": 0.0066, "step": 19828 }, { "epoch": 3.8380417956656347, "grad_norm": 0.03990872949361801, "learning_rate": 7.132839220188396e-05, "loss": 0.0064, "step": 19829 }, { "epoch": 3.838235294117647, "grad_norm": 0.03739739954471588, "learning_rate": 7.132583536943266e-05, "loss": 0.0067, "step": 19830 }, { "epoch": 3.838428792569659, "grad_norm": 0.059272799640893936, "learning_rate": 7.132327847628592e-05, "loss": 0.0074, "step": 19831 }, { "epoch": 3.8386222910216716, "grad_norm": 0.02929845079779625, "learning_rate": 7.132072152245326e-05, "loss": 0.006, "step": 19832 }, { "epoch": 3.838815789473684, "grad_norm": 0.02816660888493061, "learning_rate": 7.131816450794417e-05, "loss": 0.0064, "step": 19833 }, { "epoch": 3.8390092879256965, "grad_norm": 0.03249882161617279, "learning_rate": 7.131560743276817e-05, "loss": 0.0068, "step": 19834 }, { "epoch": 3.839202786377709, "grad_norm": 0.04375579208135605, "learning_rate": 7.131305029693476e-05, "loss": 0.0062, "step": 19835 }, { "epoch": 3.8393962848297214, "grad_norm": 0.07192104309797287, "learning_rate": 7.131049310045345e-05, "loss": 0.0068, "step": 19836 }, { "epoch": 3.839589783281734, "grad_norm": 0.05831003934144974, "learning_rate": 7.130793584333375e-05, "loss": 0.0058, "step": 19837 }, { "epoch": 3.8397832817337463, "grad_norm": 0.08361823111772537, "learning_rate": 7.130537852558517e-05, "loss": 0.0066, "step": 19838 }, { "epoch": 3.8399767801857587, "grad_norm": 0.09184051305055618, "learning_rate": 7.13028211472172e-05, "loss": 0.0064, "step": 19839 }, { "epoch": 3.840170278637771, "grad_norm": 0.08444592356681824, "learning_rate": 7.130026370823935e-05, "loss": 0.006, "step": 19840 }, { "epoch": 3.840363777089783, "grad_norm": 0.08095517754554749, "learning_rate": 7.129770620866114e-05, "loss": 0.0057, "step": 19841 }, { "epoch": 3.8405572755417956, "grad_norm": 0.09094130992889404, "learning_rate": 7.129514864849209e-05, "loss": 0.0056, "step": 19842 }, { "epoch": 3.840750773993808, "grad_norm": 0.08168523013591766, "learning_rate": 7.129259102774168e-05, "loss": 0.0063, "step": 19843 }, { "epoch": 3.8409442724458205, "grad_norm": 0.07812443375587463, "learning_rate": 7.129003334641943e-05, "loss": 0.0065, "step": 19844 }, { "epoch": 3.841137770897833, "grad_norm": 0.05415688082575798, "learning_rate": 7.128747560453486e-05, "loss": 0.006, "step": 19845 }, { "epoch": 3.8413312693498454, "grad_norm": 0.05053190141916275, "learning_rate": 7.128491780209746e-05, "loss": 0.006, "step": 19846 }, { "epoch": 3.8415247678018574, "grad_norm": 0.06442199647426605, "learning_rate": 7.128235993911675e-05, "loss": 0.0064, "step": 19847 }, { "epoch": 3.84171826625387, "grad_norm": 0.04044022783637047, "learning_rate": 7.127980201560224e-05, "loss": 0.0067, "step": 19848 }, { "epoch": 3.8419117647058822, "grad_norm": 0.03983943164348602, "learning_rate": 7.127724403156343e-05, "loss": 0.0062, "step": 19849 }, { "epoch": 3.8421052631578947, "grad_norm": 0.055959705263376236, "learning_rate": 7.127468598700985e-05, "loss": 0.0065, "step": 19850 }, { "epoch": 3.842298761609907, "grad_norm": 0.01776054874062538, "learning_rate": 7.127212788195099e-05, "loss": 0.006, "step": 19851 }, { "epoch": 3.8424922600619196, "grad_norm": 0.03910671919584274, "learning_rate": 7.126956971639637e-05, "loss": 0.0057, "step": 19852 }, { "epoch": 3.842685758513932, "grad_norm": 0.05464870482683182, "learning_rate": 7.126701149035548e-05, "loss": 0.006, "step": 19853 }, { "epoch": 3.8428792569659445, "grad_norm": 0.020660677924752235, "learning_rate": 7.126445320383788e-05, "loss": 0.0059, "step": 19854 }, { "epoch": 3.843072755417957, "grad_norm": 0.05711277574300766, "learning_rate": 7.126189485685304e-05, "loss": 0.0063, "step": 19855 }, { "epoch": 3.843266253869969, "grad_norm": 0.046292781829833984, "learning_rate": 7.125933644941048e-05, "loss": 0.006, "step": 19856 }, { "epoch": 3.8434597523219813, "grad_norm": 0.02500193379819393, "learning_rate": 7.125677798151971e-05, "loss": 0.0061, "step": 19857 }, { "epoch": 3.843653250773994, "grad_norm": 0.03385195881128311, "learning_rate": 7.125421945319026e-05, "loss": 0.0058, "step": 19858 }, { "epoch": 3.843846749226006, "grad_norm": 0.09587506204843521, "learning_rate": 7.125166086443159e-05, "loss": 0.0073, "step": 19859 }, { "epoch": 3.8440402476780187, "grad_norm": 0.030860044062137604, "learning_rate": 7.124910221525328e-05, "loss": 0.0064, "step": 19860 }, { "epoch": 3.844233746130031, "grad_norm": 0.09090541303157806, "learning_rate": 7.12465435056648e-05, "loss": 0.0056, "step": 19861 }, { "epoch": 3.844427244582043, "grad_norm": 0.057680435478687286, "learning_rate": 7.12439847356757e-05, "loss": 0.0056, "step": 19862 }, { "epoch": 3.8446207430340555, "grad_norm": 0.06295079737901688, "learning_rate": 7.124142590529544e-05, "loss": 0.0052, "step": 19863 }, { "epoch": 3.844814241486068, "grad_norm": 0.06855404376983643, "learning_rate": 7.123886701453357e-05, "loss": 0.006, "step": 19864 }, { "epoch": 3.8450077399380804, "grad_norm": 0.03007546253502369, "learning_rate": 7.12363080633996e-05, "loss": 0.0062, "step": 19865 }, { "epoch": 3.845201238390093, "grad_norm": 0.07301463931798935, "learning_rate": 7.123374905190302e-05, "loss": 0.0057, "step": 19866 }, { "epoch": 3.8453947368421053, "grad_norm": 0.02898344397544861, "learning_rate": 7.123118998005337e-05, "loss": 0.0055, "step": 19867 }, { "epoch": 3.8455882352941178, "grad_norm": 0.06224897876381874, "learning_rate": 7.122863084786017e-05, "loss": 0.0058, "step": 19868 }, { "epoch": 3.84578173374613, "grad_norm": 0.07647889852523804, "learning_rate": 7.122607165533291e-05, "loss": 0.0063, "step": 19869 }, { "epoch": 3.8459752321981426, "grad_norm": 0.08944887667894363, "learning_rate": 7.12235124024811e-05, "loss": 0.0062, "step": 19870 }, { "epoch": 3.8461687306501546, "grad_norm": 0.06894344836473465, "learning_rate": 7.122095308931428e-05, "loss": 0.0061, "step": 19871 }, { "epoch": 3.846362229102167, "grad_norm": 0.05577516183257103, "learning_rate": 7.121839371584195e-05, "loss": 0.0068, "step": 19872 }, { "epoch": 3.8465557275541795, "grad_norm": 0.07186566293239594, "learning_rate": 7.121583428207363e-05, "loss": 0.0068, "step": 19873 }, { "epoch": 3.846749226006192, "grad_norm": 0.03209985792636871, "learning_rate": 7.121327478801883e-05, "loss": 0.0056, "step": 19874 }, { "epoch": 3.8469427244582044, "grad_norm": 0.10408104956150055, "learning_rate": 7.121071523368707e-05, "loss": 0.0068, "step": 19875 }, { "epoch": 3.847136222910217, "grad_norm": 0.06360357999801636, "learning_rate": 7.120815561908787e-05, "loss": 0.0059, "step": 19876 }, { "epoch": 3.847329721362229, "grad_norm": 0.10276561975479126, "learning_rate": 7.120559594423074e-05, "loss": 0.0068, "step": 19877 }, { "epoch": 3.8475232198142413, "grad_norm": 0.043335799127817154, "learning_rate": 7.120303620912519e-05, "loss": 0.0064, "step": 19878 }, { "epoch": 3.8477167182662537, "grad_norm": 0.1188722550868988, "learning_rate": 7.120047641378076e-05, "loss": 0.0061, "step": 19879 }, { "epoch": 3.847910216718266, "grad_norm": 0.025554098188877106, "learning_rate": 7.119791655820694e-05, "loss": 0.0052, "step": 19880 }, { "epoch": 3.8481037151702786, "grad_norm": 0.09202201664447784, "learning_rate": 7.119535664241326e-05, "loss": 0.0067, "step": 19881 }, { "epoch": 3.848297213622291, "grad_norm": 0.05948024243116379, "learning_rate": 7.119279666640924e-05, "loss": 0.0056, "step": 19882 }, { "epoch": 3.8484907120743035, "grad_norm": 0.053481802344322205, "learning_rate": 7.119023663020439e-05, "loss": 0.0055, "step": 19883 }, { "epoch": 3.848684210526316, "grad_norm": 0.0687466636300087, "learning_rate": 7.118767653380822e-05, "loss": 0.0063, "step": 19884 }, { "epoch": 3.8488777089783284, "grad_norm": 0.06664711236953735, "learning_rate": 7.118511637723026e-05, "loss": 0.0072, "step": 19885 }, { "epoch": 3.849071207430341, "grad_norm": 0.049132730811834335, "learning_rate": 7.118255616048001e-05, "loss": 0.0054, "step": 19886 }, { "epoch": 3.849264705882353, "grad_norm": 0.08179297298192978, "learning_rate": 7.117999588356704e-05, "loss": 0.0056, "step": 19887 }, { "epoch": 3.8494582043343653, "grad_norm": 0.0516439713537693, "learning_rate": 7.117743554650082e-05, "loss": 0.0064, "step": 19888 }, { "epoch": 3.8496517027863777, "grad_norm": 0.06792858988046646, "learning_rate": 7.117487514929086e-05, "loss": 0.0052, "step": 19889 }, { "epoch": 3.84984520123839, "grad_norm": 0.042215198278427124, "learning_rate": 7.117231469194671e-05, "loss": 0.008, "step": 19890 }, { "epoch": 3.8500386996904026, "grad_norm": 0.05774496868252754, "learning_rate": 7.116975417447788e-05, "loss": 0.0067, "step": 19891 }, { "epoch": 3.850232198142415, "grad_norm": 0.04232599958777428, "learning_rate": 7.11671935968939e-05, "loss": 0.0054, "step": 19892 }, { "epoch": 3.850425696594427, "grad_norm": 0.0345149002969265, "learning_rate": 7.116463295920427e-05, "loss": 0.0044, "step": 19893 }, { "epoch": 3.8506191950464395, "grad_norm": 0.05188232287764549, "learning_rate": 7.116207226141852e-05, "loss": 0.0069, "step": 19894 }, { "epoch": 3.850812693498452, "grad_norm": 0.05929451808333397, "learning_rate": 7.115951150354617e-05, "loss": 0.0066, "step": 19895 }, { "epoch": 3.8510061919504643, "grad_norm": 0.026423465460538864, "learning_rate": 7.115695068559674e-05, "loss": 0.0049, "step": 19896 }, { "epoch": 3.851199690402477, "grad_norm": 0.08023092150688171, "learning_rate": 7.115438980757973e-05, "loss": 0.0069, "step": 19897 }, { "epoch": 3.8513931888544892, "grad_norm": 0.04883478209376335, "learning_rate": 7.115182886950469e-05, "loss": 0.0068, "step": 19898 }, { "epoch": 3.8515866873065017, "grad_norm": 0.07627618312835693, "learning_rate": 7.114926787138113e-05, "loss": 0.005, "step": 19899 }, { "epoch": 3.851780185758514, "grad_norm": 0.029759036377072334, "learning_rate": 7.114670681321858e-05, "loss": 0.0052, "step": 19900 }, { "epoch": 3.8519736842105265, "grad_norm": 0.022359509021043777, "learning_rate": 7.114414569502654e-05, "loss": 0.0057, "step": 19901 }, { "epoch": 3.8521671826625385, "grad_norm": 0.06541671603918076, "learning_rate": 7.114158451681455e-05, "loss": 0.0061, "step": 19902 }, { "epoch": 3.852360681114551, "grad_norm": 0.036315493285655975, "learning_rate": 7.113902327859213e-05, "loss": 0.0053, "step": 19903 }, { "epoch": 3.8525541795665634, "grad_norm": 0.05756297707557678, "learning_rate": 7.113646198036879e-05, "loss": 0.0066, "step": 19904 }, { "epoch": 3.852747678018576, "grad_norm": 0.06794951856136322, "learning_rate": 7.113390062215405e-05, "loss": 0.0069, "step": 19905 }, { "epoch": 3.8529411764705883, "grad_norm": 0.07659684866666794, "learning_rate": 7.113133920395746e-05, "loss": 0.007, "step": 19906 }, { "epoch": 3.8531346749226008, "grad_norm": 0.05634048208594322, "learning_rate": 7.112877772578853e-05, "loss": 0.0061, "step": 19907 }, { "epoch": 3.8533281733746128, "grad_norm": 0.02512258291244507, "learning_rate": 7.112621618765678e-05, "loss": 0.0064, "step": 19908 }, { "epoch": 3.853521671826625, "grad_norm": 0.07356185466051102, "learning_rate": 7.112365458957172e-05, "loss": 0.0084, "step": 19909 }, { "epoch": 3.8537151702786376, "grad_norm": 0.018822744488716125, "learning_rate": 7.112109293154287e-05, "loss": 0.0055, "step": 19910 }, { "epoch": 3.85390866873065, "grad_norm": 0.042044177651405334, "learning_rate": 7.11185312135798e-05, "loss": 0.0069, "step": 19911 }, { "epoch": 3.8541021671826625, "grad_norm": 0.032683804631233215, "learning_rate": 7.111596943569199e-05, "loss": 0.0049, "step": 19912 }, { "epoch": 3.854295665634675, "grad_norm": 0.025615356862545013, "learning_rate": 7.111340759788897e-05, "loss": 0.0056, "step": 19913 }, { "epoch": 3.8544891640866874, "grad_norm": 0.03881185129284859, "learning_rate": 7.111084570018028e-05, "loss": 0.0062, "step": 19914 }, { "epoch": 3.8546826625387, "grad_norm": 0.04123445227742195, "learning_rate": 7.110828374257545e-05, "loss": 0.0062, "step": 19915 }, { "epoch": 3.8548761609907123, "grad_norm": 0.02896689437329769, "learning_rate": 7.110572172508396e-05, "loss": 0.0061, "step": 19916 }, { "epoch": 3.8550696594427247, "grad_norm": 0.02736804448068142, "learning_rate": 7.110315964771538e-05, "loss": 0.0062, "step": 19917 }, { "epoch": 3.8552631578947367, "grad_norm": 0.018285278230905533, "learning_rate": 7.110059751047923e-05, "loss": 0.0075, "step": 19918 }, { "epoch": 3.855456656346749, "grad_norm": 0.024554524570703506, "learning_rate": 7.1098035313385e-05, "loss": 0.0051, "step": 19919 }, { "epoch": 3.8556501547987616, "grad_norm": 0.029215479269623756, "learning_rate": 7.109547305644227e-05, "loss": 0.0049, "step": 19920 }, { "epoch": 3.855843653250774, "grad_norm": 0.04246198758482933, "learning_rate": 7.109291073966052e-05, "loss": 0.006, "step": 19921 }, { "epoch": 3.8560371517027865, "grad_norm": 0.0407252237200737, "learning_rate": 7.109034836304929e-05, "loss": 0.0057, "step": 19922 }, { "epoch": 3.8562306501547985, "grad_norm": 0.04592755436897278, "learning_rate": 7.108778592661812e-05, "loss": 0.0073, "step": 19923 }, { "epoch": 3.856424148606811, "grad_norm": 0.06458403170108795, "learning_rate": 7.108522343037652e-05, "loss": 0.0064, "step": 19924 }, { "epoch": 3.8566176470588234, "grad_norm": 0.020765570923686028, "learning_rate": 7.108266087433401e-05, "loss": 0.0049, "step": 19925 }, { "epoch": 3.856811145510836, "grad_norm": 0.06692568212747574, "learning_rate": 7.108009825850016e-05, "loss": 0.0059, "step": 19926 }, { "epoch": 3.8570046439628483, "grad_norm": 0.037860240787267685, "learning_rate": 7.107753558288446e-05, "loss": 0.0064, "step": 19927 }, { "epoch": 3.8571981424148607, "grad_norm": 0.057405125349760056, "learning_rate": 7.107497284749644e-05, "loss": 0.0062, "step": 19928 }, { "epoch": 3.857391640866873, "grad_norm": 0.048107266426086426, "learning_rate": 7.107241005234561e-05, "loss": 0.0067, "step": 19929 }, { "epoch": 3.8575851393188856, "grad_norm": 0.04799283668398857, "learning_rate": 7.106984719744155e-05, "loss": 0.0059, "step": 19930 }, { "epoch": 3.857778637770898, "grad_norm": 0.05355503782629967, "learning_rate": 7.106728428279373e-05, "loss": 0.0064, "step": 19931 }, { "epoch": 3.8579721362229105, "grad_norm": 0.03815845772624016, "learning_rate": 7.106472130841173e-05, "loss": 0.0067, "step": 19932 }, { "epoch": 3.8581656346749225, "grad_norm": 0.055933382362127304, "learning_rate": 7.106215827430504e-05, "loss": 0.0067, "step": 19933 }, { "epoch": 3.858359133126935, "grad_norm": 0.03634775057435036, "learning_rate": 7.105959518048321e-05, "loss": 0.0058, "step": 19934 }, { "epoch": 3.8585526315789473, "grad_norm": 0.05673063546419144, "learning_rate": 7.105703202695577e-05, "loss": 0.0051, "step": 19935 }, { "epoch": 3.85874613003096, "grad_norm": 0.05155600979924202, "learning_rate": 7.105446881373221e-05, "loss": 0.0062, "step": 19936 }, { "epoch": 3.8589396284829722, "grad_norm": 0.06140565499663353, "learning_rate": 7.105190554082213e-05, "loss": 0.0069, "step": 19937 }, { "epoch": 3.8591331269349847, "grad_norm": 0.07083527743816376, "learning_rate": 7.104934220823499e-05, "loss": 0.0066, "step": 19938 }, { "epoch": 3.8593266253869967, "grad_norm": 0.07309672981500626, "learning_rate": 7.104677881598039e-05, "loss": 0.0054, "step": 19939 }, { "epoch": 3.859520123839009, "grad_norm": 0.06905828416347504, "learning_rate": 7.104421536406779e-05, "loss": 0.0056, "step": 19940 }, { "epoch": 3.8597136222910216, "grad_norm": 0.08175835758447647, "learning_rate": 7.104165185250675e-05, "loss": 0.0055, "step": 19941 }, { "epoch": 3.859907120743034, "grad_norm": 0.044771093875169754, "learning_rate": 7.103908828130681e-05, "loss": 0.0061, "step": 19942 }, { "epoch": 3.8601006191950464, "grad_norm": 0.09528182446956635, "learning_rate": 7.103652465047748e-05, "loss": 0.0064, "step": 19943 }, { "epoch": 3.860294117647059, "grad_norm": 0.062277328222990036, "learning_rate": 7.103396096002832e-05, "loss": 0.0062, "step": 19944 }, { "epoch": 3.8604876160990713, "grad_norm": 0.049683112651109695, "learning_rate": 7.103139720996883e-05, "loss": 0.0041, "step": 19945 }, { "epoch": 3.8606811145510838, "grad_norm": 0.05471594259142876, "learning_rate": 7.102883340030857e-05, "loss": 0.0058, "step": 19946 }, { "epoch": 3.860874613003096, "grad_norm": 0.03469213470816612, "learning_rate": 7.102626953105705e-05, "loss": 0.0055, "step": 19947 }, { "epoch": 3.861068111455108, "grad_norm": 0.05318388342857361, "learning_rate": 7.10237056022238e-05, "loss": 0.0055, "step": 19948 }, { "epoch": 3.8612616099071206, "grad_norm": 0.04929802194237709, "learning_rate": 7.102114161381837e-05, "loss": 0.0056, "step": 19949 }, { "epoch": 3.861455108359133, "grad_norm": 0.037466637790203094, "learning_rate": 7.101857756585028e-05, "loss": 0.008, "step": 19950 }, { "epoch": 3.8616486068111455, "grad_norm": 0.052109576761722565, "learning_rate": 7.101601345832907e-05, "loss": 0.0057, "step": 19951 }, { "epoch": 3.861842105263158, "grad_norm": 0.05572633072733879, "learning_rate": 7.101344929126426e-05, "loss": 0.0073, "step": 19952 }, { "epoch": 3.8620356037151704, "grad_norm": 0.025837603956460953, "learning_rate": 7.10108850646654e-05, "loss": 0.0064, "step": 19953 }, { "epoch": 3.8622291021671824, "grad_norm": 0.050899069756269455, "learning_rate": 7.1008320778542e-05, "loss": 0.0062, "step": 19954 }, { "epoch": 3.862422600619195, "grad_norm": 0.033673644065856934, "learning_rate": 7.100575643290362e-05, "loss": 0.0058, "step": 19955 }, { "epoch": 3.8626160990712073, "grad_norm": 0.03878739848732948, "learning_rate": 7.10031920277598e-05, "loss": 0.0058, "step": 19956 }, { "epoch": 3.8628095975232197, "grad_norm": 0.05624610558152199, "learning_rate": 7.100062756312003e-05, "loss": 0.0064, "step": 19957 }, { "epoch": 3.863003095975232, "grad_norm": 0.058078039437532425, "learning_rate": 7.099806303899387e-05, "loss": 0.007, "step": 19958 }, { "epoch": 3.8631965944272446, "grad_norm": 0.05467744171619415, "learning_rate": 7.099549845539085e-05, "loss": 0.0068, "step": 19959 }, { "epoch": 3.863390092879257, "grad_norm": 0.07997578382492065, "learning_rate": 7.099293381232053e-05, "loss": 0.0065, "step": 19960 }, { "epoch": 3.8635835913312695, "grad_norm": 0.060292910784482956, "learning_rate": 7.09903691097924e-05, "loss": 0.0055, "step": 19961 }, { "epoch": 3.863777089783282, "grad_norm": 0.08787183463573456, "learning_rate": 7.098780434781603e-05, "loss": 0.0056, "step": 19962 }, { "epoch": 3.8639705882352944, "grad_norm": 0.06917392462491989, "learning_rate": 7.098523952640095e-05, "loss": 0.0069, "step": 19963 }, { "epoch": 3.8641640866873064, "grad_norm": 0.057774171233177185, "learning_rate": 7.098267464555668e-05, "loss": 0.0074, "step": 19964 }, { "epoch": 3.864357585139319, "grad_norm": 0.08729442209005356, "learning_rate": 7.098010970529276e-05, "loss": 0.0063, "step": 19965 }, { "epoch": 3.8645510835913313, "grad_norm": 0.05072786286473274, "learning_rate": 7.097754470561873e-05, "loss": 0.0067, "step": 19966 }, { "epoch": 3.8647445820433437, "grad_norm": 0.09107672423124313, "learning_rate": 7.097497964654413e-05, "loss": 0.0071, "step": 19967 }, { "epoch": 3.864938080495356, "grad_norm": 0.08501064777374268, "learning_rate": 7.097241452807849e-05, "loss": 0.0059, "step": 19968 }, { "epoch": 3.8651315789473686, "grad_norm": 0.08220308274030685, "learning_rate": 7.096984935023136e-05, "loss": 0.0066, "step": 19969 }, { "epoch": 3.8653250773993806, "grad_norm": 0.09660026431083679, "learning_rate": 7.096728411301225e-05, "loss": 0.007, "step": 19970 }, { "epoch": 3.865518575851393, "grad_norm": 0.05045658349990845, "learning_rate": 7.096471881643072e-05, "loss": 0.0061, "step": 19971 }, { "epoch": 3.8657120743034055, "grad_norm": 0.08690595626831055, "learning_rate": 7.096215346049628e-05, "loss": 0.0058, "step": 19972 }, { "epoch": 3.865905572755418, "grad_norm": 0.04734829068183899, "learning_rate": 7.09595880452185e-05, "loss": 0.0052, "step": 19973 }, { "epoch": 3.8660990712074303, "grad_norm": 0.05898480489850044, "learning_rate": 7.095702257060691e-05, "loss": 0.0053, "step": 19974 }, { "epoch": 3.866292569659443, "grad_norm": 0.06124650314450264, "learning_rate": 7.095445703667103e-05, "loss": 0.0063, "step": 19975 }, { "epoch": 3.8664860681114552, "grad_norm": 0.030849488452076912, "learning_rate": 7.095189144342043e-05, "loss": 0.0063, "step": 19976 }, { "epoch": 3.8666795665634677, "grad_norm": 0.054741740226745605, "learning_rate": 7.094932579086461e-05, "loss": 0.0068, "step": 19977 }, { "epoch": 3.86687306501548, "grad_norm": 0.03396136313676834, "learning_rate": 7.094676007901312e-05, "loss": 0.0065, "step": 19978 }, { "epoch": 3.867066563467492, "grad_norm": 0.03951563313603401, "learning_rate": 7.094419430787554e-05, "loss": 0.0063, "step": 19979 }, { "epoch": 3.8672600619195046, "grad_norm": 0.06205751374363899, "learning_rate": 7.094162847746133e-05, "loss": 0.0058, "step": 19980 }, { "epoch": 3.867453560371517, "grad_norm": 0.05395437777042389, "learning_rate": 7.09390625877801e-05, "loss": 0.0074, "step": 19981 }, { "epoch": 3.8676470588235294, "grad_norm": 0.03782555088400841, "learning_rate": 7.093649663884135e-05, "loss": 0.007, "step": 19982 }, { "epoch": 3.867840557275542, "grad_norm": 0.028018087148666382, "learning_rate": 7.093393063065466e-05, "loss": 0.0048, "step": 19983 }, { "epoch": 3.8680340557275543, "grad_norm": 0.049337711185216904, "learning_rate": 7.093136456322951e-05, "loss": 0.0065, "step": 19984 }, { "epoch": 3.8682275541795663, "grad_norm": 0.028744826093316078, "learning_rate": 7.092879843657549e-05, "loss": 0.0053, "step": 19985 }, { "epoch": 3.8684210526315788, "grad_norm": 0.05238990858197212, "learning_rate": 7.092623225070213e-05, "loss": 0.0059, "step": 19986 }, { "epoch": 3.868614551083591, "grad_norm": 0.027738068252801895, "learning_rate": 7.092366600561895e-05, "loss": 0.0064, "step": 19987 }, { "epoch": 3.8688080495356036, "grad_norm": 0.054128117859363556, "learning_rate": 7.09210997013355e-05, "loss": 0.0077, "step": 19988 }, { "epoch": 3.869001547987616, "grad_norm": 0.030908318236470222, "learning_rate": 7.091853333786134e-05, "loss": 0.0068, "step": 19989 }, { "epoch": 3.8691950464396285, "grad_norm": 0.04931889846920967, "learning_rate": 7.091596691520598e-05, "loss": 0.0068, "step": 19990 }, { "epoch": 3.869388544891641, "grad_norm": 0.05706334859132767, "learning_rate": 7.091340043337899e-05, "loss": 0.0069, "step": 19991 }, { "epoch": 3.8695820433436534, "grad_norm": 0.039063721895217896, "learning_rate": 7.091083389238987e-05, "loss": 0.0055, "step": 19992 }, { "epoch": 3.869775541795666, "grad_norm": 0.063198521733284, "learning_rate": 7.090826729224821e-05, "loss": 0.0062, "step": 19993 }, { "epoch": 3.8699690402476783, "grad_norm": 0.02873852849006653, "learning_rate": 7.090570063296354e-05, "loss": 0.005, "step": 19994 }, { "epoch": 3.8701625386996903, "grad_norm": 0.0604250393807888, "learning_rate": 7.090313391454538e-05, "loss": 0.0054, "step": 19995 }, { "epoch": 3.8703560371517027, "grad_norm": 0.0672597587108612, "learning_rate": 7.09005671370033e-05, "loss": 0.0053, "step": 19996 }, { "epoch": 3.870549535603715, "grad_norm": 0.022438790649175644, "learning_rate": 7.089800030034681e-05, "loss": 0.0056, "step": 19997 }, { "epoch": 3.8707430340557276, "grad_norm": 0.071227066218853, "learning_rate": 7.089543340458549e-05, "loss": 0.0057, "step": 19998 }, { "epoch": 3.87093653250774, "grad_norm": 0.06714086979627609, "learning_rate": 7.089286644972886e-05, "loss": 0.0054, "step": 19999 }, { "epoch": 3.871130030959752, "grad_norm": 0.029072167351841927, "learning_rate": 7.089029943578645e-05, "loss": 0.0057, "step": 20000 }, { "epoch": 3.8713235294117645, "grad_norm": 0.08636089414358139, "learning_rate": 7.088773236276786e-05, "loss": 0.0064, "step": 20001 }, { "epoch": 3.871517027863777, "grad_norm": 0.034190353006124496, "learning_rate": 7.088516523068257e-05, "loss": 0.0063, "step": 20002 }, { "epoch": 3.8717105263157894, "grad_norm": 0.045373864471912384, "learning_rate": 7.088259803954016e-05, "loss": 0.0066, "step": 20003 }, { "epoch": 3.871904024767802, "grad_norm": 0.07636953145265579, "learning_rate": 7.088003078935015e-05, "loss": 0.0056, "step": 20004 }, { "epoch": 3.8720975232198143, "grad_norm": 0.02304459549486637, "learning_rate": 7.087746348012211e-05, "loss": 0.0065, "step": 20005 }, { "epoch": 3.8722910216718267, "grad_norm": 0.0527498684823513, "learning_rate": 7.087489611186558e-05, "loss": 0.0071, "step": 20006 }, { "epoch": 3.872484520123839, "grad_norm": 0.03749862685799599, "learning_rate": 7.087232868459008e-05, "loss": 0.0061, "step": 20007 }, { "epoch": 3.8726780185758516, "grad_norm": 0.041983991861343384, "learning_rate": 7.086976119830518e-05, "loss": 0.0054, "step": 20008 }, { "epoch": 3.872871517027864, "grad_norm": 0.039871275424957275, "learning_rate": 7.086719365302043e-05, "loss": 0.0065, "step": 20009 }, { "epoch": 3.873065015479876, "grad_norm": 0.028210582211613655, "learning_rate": 7.086462604874536e-05, "loss": 0.0052, "step": 20010 }, { "epoch": 3.8732585139318885, "grad_norm": 0.048501621931791306, "learning_rate": 7.08620583854895e-05, "loss": 0.0052, "step": 20011 }, { "epoch": 3.873452012383901, "grad_norm": 0.03750704228878021, "learning_rate": 7.085949066326244e-05, "loss": 0.0052, "step": 20012 }, { "epoch": 3.8736455108359134, "grad_norm": 0.04449020326137543, "learning_rate": 7.085692288207369e-05, "loss": 0.0061, "step": 20013 }, { "epoch": 3.873839009287926, "grad_norm": 0.03601554408669472, "learning_rate": 7.085435504193281e-05, "loss": 0.0062, "step": 20014 }, { "epoch": 3.8740325077399382, "grad_norm": 0.03555903211236, "learning_rate": 7.085178714284933e-05, "loss": 0.0058, "step": 20015 }, { "epoch": 3.8742260061919502, "grad_norm": 0.04322483018040657, "learning_rate": 7.084921918483284e-05, "loss": 0.0057, "step": 20016 }, { "epoch": 3.8744195046439627, "grad_norm": 0.030119135975837708, "learning_rate": 7.084665116789283e-05, "loss": 0.0048, "step": 20017 }, { "epoch": 3.874613003095975, "grad_norm": 0.04123555123806, "learning_rate": 7.084408309203889e-05, "loss": 0.0063, "step": 20018 }, { "epoch": 3.8748065015479876, "grad_norm": 0.02959645539522171, "learning_rate": 7.084151495728056e-05, "loss": 0.0065, "step": 20019 }, { "epoch": 3.875, "grad_norm": 0.04738044738769531, "learning_rate": 7.083894676362736e-05, "loss": 0.0054, "step": 20020 }, { "epoch": 3.8751934984520124, "grad_norm": 0.033702220767736435, "learning_rate": 7.083637851108888e-05, "loss": 0.0062, "step": 20021 }, { "epoch": 3.875386996904025, "grad_norm": 0.05805858597159386, "learning_rate": 7.083381019967465e-05, "loss": 0.0066, "step": 20022 }, { "epoch": 3.8755804953560373, "grad_norm": 0.024168509989976883, "learning_rate": 7.08312418293942e-05, "loss": 0.0047, "step": 20023 }, { "epoch": 3.8757739938080498, "grad_norm": 0.05131852999329567, "learning_rate": 7.082867340025709e-05, "loss": 0.0063, "step": 20024 }, { "epoch": 3.8759674922600618, "grad_norm": 0.03702579811215401, "learning_rate": 7.082610491227287e-05, "loss": 0.006, "step": 20025 }, { "epoch": 3.876160990712074, "grad_norm": 0.040111642330884933, "learning_rate": 7.08235363654511e-05, "loss": 0.0053, "step": 20026 }, { "epoch": 3.8763544891640866, "grad_norm": 0.05306366831064224, "learning_rate": 7.082096775980132e-05, "loss": 0.0076, "step": 20027 }, { "epoch": 3.876547987616099, "grad_norm": 0.041895706206560135, "learning_rate": 7.08183990953331e-05, "loss": 0.008, "step": 20028 }, { "epoch": 3.8767414860681115, "grad_norm": 0.053544193506240845, "learning_rate": 7.081583037205595e-05, "loss": 0.0058, "step": 20029 }, { "epoch": 3.876934984520124, "grad_norm": 0.05764540657401085, "learning_rate": 7.081326158997946e-05, "loss": 0.0074, "step": 20030 }, { "epoch": 3.877128482972136, "grad_norm": 0.03660332038998604, "learning_rate": 7.081069274911311e-05, "loss": 0.0055, "step": 20031 }, { "epoch": 3.8773219814241484, "grad_norm": 0.07962024211883545, "learning_rate": 7.080812384946655e-05, "loss": 0.0061, "step": 20032 }, { "epoch": 3.877515479876161, "grad_norm": 0.04302702471613884, "learning_rate": 7.080555489104926e-05, "loss": 0.0071, "step": 20033 }, { "epoch": 3.8777089783281733, "grad_norm": 0.08692902326583862, "learning_rate": 7.080298587387083e-05, "loss": 0.0054, "step": 20034 }, { "epoch": 3.8779024767801857, "grad_norm": 0.052129074931144714, "learning_rate": 7.080041679794077e-05, "loss": 0.0059, "step": 20035 }, { "epoch": 3.878095975232198, "grad_norm": 0.07419249415397644, "learning_rate": 7.079784766326866e-05, "loss": 0.0061, "step": 20036 }, { "epoch": 3.8782894736842106, "grad_norm": 0.05525031313300133, "learning_rate": 7.079527846986403e-05, "loss": 0.0055, "step": 20037 }, { "epoch": 3.878482972136223, "grad_norm": 0.06664375960826874, "learning_rate": 7.079270921773648e-05, "loss": 0.0057, "step": 20038 }, { "epoch": 3.8786764705882355, "grad_norm": 0.07423882931470871, "learning_rate": 7.079013990689551e-05, "loss": 0.0056, "step": 20039 }, { "epoch": 3.878869969040248, "grad_norm": 0.04342074692249298, "learning_rate": 7.078757053735069e-05, "loss": 0.0064, "step": 20040 }, { "epoch": 3.87906346749226, "grad_norm": 0.07670275866985321, "learning_rate": 7.078500110911158e-05, "loss": 0.0058, "step": 20041 }, { "epoch": 3.8792569659442724, "grad_norm": 0.04770850017666817, "learning_rate": 7.078243162218771e-05, "loss": 0.0068, "step": 20042 }, { "epoch": 3.879450464396285, "grad_norm": 0.05822855979204178, "learning_rate": 7.077986207658866e-05, "loss": 0.0057, "step": 20043 }, { "epoch": 3.8796439628482973, "grad_norm": 0.05739501118659973, "learning_rate": 7.077729247232396e-05, "loss": 0.0068, "step": 20044 }, { "epoch": 3.8798374613003097, "grad_norm": 0.03834602236747742, "learning_rate": 7.077472280940319e-05, "loss": 0.0064, "step": 20045 }, { "epoch": 3.8800309597523217, "grad_norm": 0.02955801412463188, "learning_rate": 7.077215308783588e-05, "loss": 0.0072, "step": 20046 }, { "epoch": 3.880224458204334, "grad_norm": 0.03531184792518616, "learning_rate": 7.076958330763159e-05, "loss": 0.0059, "step": 20047 }, { "epoch": 3.8804179566563466, "grad_norm": 0.040766265243291855, "learning_rate": 7.076701346879988e-05, "loss": 0.0075, "step": 20048 }, { "epoch": 3.880611455108359, "grad_norm": 0.035378336906433105, "learning_rate": 7.07644435713503e-05, "loss": 0.0068, "step": 20049 }, { "epoch": 3.8808049535603715, "grad_norm": 0.05038927122950554, "learning_rate": 7.07618736152924e-05, "loss": 0.0061, "step": 20050 }, { "epoch": 3.880998452012384, "grad_norm": 0.04053882136940956, "learning_rate": 7.075930360063574e-05, "loss": 0.0065, "step": 20051 }, { "epoch": 3.8811919504643964, "grad_norm": 0.04743925482034683, "learning_rate": 7.075673352738987e-05, "loss": 0.0061, "step": 20052 }, { "epoch": 3.881385448916409, "grad_norm": 0.056839656084775925, "learning_rate": 7.075416339556434e-05, "loss": 0.0059, "step": 20053 }, { "epoch": 3.8815789473684212, "grad_norm": 0.04561454802751541, "learning_rate": 7.075159320516872e-05, "loss": 0.0071, "step": 20054 }, { "epoch": 3.8817724458204337, "grad_norm": 0.05618353933095932, "learning_rate": 7.074902295621256e-05, "loss": 0.0063, "step": 20055 }, { "epoch": 3.8819659442724457, "grad_norm": 0.03473716601729393, "learning_rate": 7.07464526487054e-05, "loss": 0.0046, "step": 20056 }, { "epoch": 3.882159442724458, "grad_norm": 0.03726041689515114, "learning_rate": 7.074388228265684e-05, "loss": 0.007, "step": 20057 }, { "epoch": 3.8823529411764706, "grad_norm": 0.04560364782810211, "learning_rate": 7.074131185807637e-05, "loss": 0.0078, "step": 20058 }, { "epoch": 3.882546439628483, "grad_norm": 0.034922752529382706, "learning_rate": 7.07387413749736e-05, "loss": 0.0058, "step": 20059 }, { "epoch": 3.8827399380804954, "grad_norm": 0.038209427148103714, "learning_rate": 7.073617083335807e-05, "loss": 0.0061, "step": 20060 }, { "epoch": 3.882933436532508, "grad_norm": 0.028866812586784363, "learning_rate": 7.073360023323932e-05, "loss": 0.0048, "step": 20061 }, { "epoch": 3.88312693498452, "grad_norm": 0.026876620948314667, "learning_rate": 7.073102957462692e-05, "loss": 0.0059, "step": 20062 }, { "epoch": 3.8833204334365323, "grad_norm": 0.030690236017107964, "learning_rate": 7.072845885753044e-05, "loss": 0.0061, "step": 20063 }, { "epoch": 3.8835139318885448, "grad_norm": 0.030353274196386337, "learning_rate": 7.072588808195944e-05, "loss": 0.0076, "step": 20064 }, { "epoch": 3.883707430340557, "grad_norm": 0.03734230622649193, "learning_rate": 7.072331724792344e-05, "loss": 0.0041, "step": 20065 }, { "epoch": 3.8839009287925697, "grad_norm": 0.04528304934501648, "learning_rate": 7.072074635543203e-05, "loss": 0.005, "step": 20066 }, { "epoch": 3.884094427244582, "grad_norm": 0.03830181062221527, "learning_rate": 7.071817540449474e-05, "loss": 0.0066, "step": 20067 }, { "epoch": 3.8842879256965945, "grad_norm": 0.05665124952793121, "learning_rate": 7.071560439512117e-05, "loss": 0.0067, "step": 20068 }, { "epoch": 3.884481424148607, "grad_norm": 0.051180120557546616, "learning_rate": 7.071303332732084e-05, "loss": 0.0054, "step": 20069 }, { "epoch": 3.8846749226006194, "grad_norm": 0.08296854794025421, "learning_rate": 7.071046220110334e-05, "loss": 0.0057, "step": 20070 }, { "epoch": 3.8848684210526314, "grad_norm": 0.06363844126462936, "learning_rate": 7.07078910164782e-05, "loss": 0.0058, "step": 20071 }, { "epoch": 3.885061919504644, "grad_norm": 0.03998358175158501, "learning_rate": 7.0705319773455e-05, "loss": 0.006, "step": 20072 }, { "epoch": 3.8852554179566563, "grad_norm": 0.10250364989042282, "learning_rate": 7.070274847204328e-05, "loss": 0.0052, "step": 20073 }, { "epoch": 3.8854489164086687, "grad_norm": 0.0551958903670311, "learning_rate": 7.070017711225262e-05, "loss": 0.0066, "step": 20074 }, { "epoch": 3.885642414860681, "grad_norm": 0.0712190493941307, "learning_rate": 7.069760569409256e-05, "loss": 0.0059, "step": 20075 }, { "epoch": 3.8858359133126936, "grad_norm": 0.08609607815742493, "learning_rate": 7.069503421757269e-05, "loss": 0.0058, "step": 20076 }, { "epoch": 3.8860294117647056, "grad_norm": 0.03194615617394447, "learning_rate": 7.069246268270254e-05, "loss": 0.0051, "step": 20077 }, { "epoch": 3.886222910216718, "grad_norm": 0.09193738549947739, "learning_rate": 7.068989108949166e-05, "loss": 0.0066, "step": 20078 }, { "epoch": 3.8864164086687305, "grad_norm": 0.04539461061358452, "learning_rate": 7.068731943794967e-05, "loss": 0.0051, "step": 20079 }, { "epoch": 3.886609907120743, "grad_norm": 0.05758645758032799, "learning_rate": 7.068474772808606e-05, "loss": 0.0059, "step": 20080 }, { "epoch": 3.8868034055727554, "grad_norm": 0.06670290231704712, "learning_rate": 7.068217595991045e-05, "loss": 0.0074, "step": 20081 }, { "epoch": 3.886996904024768, "grad_norm": 0.03530542552471161, "learning_rate": 7.067960413343235e-05, "loss": 0.0071, "step": 20082 }, { "epoch": 3.8871904024767803, "grad_norm": 0.047859687358140945, "learning_rate": 7.067703224866134e-05, "loss": 0.0052, "step": 20083 }, { "epoch": 3.8873839009287927, "grad_norm": 0.038477618247270584, "learning_rate": 7.067446030560701e-05, "loss": 0.0047, "step": 20084 }, { "epoch": 3.887577399380805, "grad_norm": 0.03611251339316368, "learning_rate": 7.067188830427889e-05, "loss": 0.0053, "step": 20085 }, { "epoch": 3.8877708978328176, "grad_norm": 0.0896969586610794, "learning_rate": 7.066931624468656e-05, "loss": 0.0069, "step": 20086 }, { "epoch": 3.8879643962848296, "grad_norm": 0.02784014865756035, "learning_rate": 7.066674412683955e-05, "loss": 0.008, "step": 20087 }, { "epoch": 3.888157894736842, "grad_norm": 0.09124767780303955, "learning_rate": 7.066417195074745e-05, "loss": 0.0056, "step": 20088 }, { "epoch": 3.8883513931888545, "grad_norm": 0.03806564584374428, "learning_rate": 7.066159971641983e-05, "loss": 0.0053, "step": 20089 }, { "epoch": 3.888544891640867, "grad_norm": 0.0851980671286583, "learning_rate": 7.065902742386625e-05, "loss": 0.0057, "step": 20090 }, { "epoch": 3.8887383900928794, "grad_norm": 0.051557451486587524, "learning_rate": 7.065645507309625e-05, "loss": 0.0062, "step": 20091 }, { "epoch": 3.888931888544892, "grad_norm": 0.054456163197755814, "learning_rate": 7.065388266411941e-05, "loss": 0.0069, "step": 20092 }, { "epoch": 3.889125386996904, "grad_norm": 0.11731462925672531, "learning_rate": 7.065131019694529e-05, "loss": 0.0066, "step": 20093 }, { "epoch": 3.8893188854489162, "grad_norm": 0.054090168327093124, "learning_rate": 7.064873767158345e-05, "loss": 0.006, "step": 20094 }, { "epoch": 3.8895123839009287, "grad_norm": 0.09468961507081985, "learning_rate": 7.064616508804347e-05, "loss": 0.0062, "step": 20095 }, { "epoch": 3.889705882352941, "grad_norm": 0.08643447607755661, "learning_rate": 7.064359244633489e-05, "loss": 0.0077, "step": 20096 }, { "epoch": 3.8898993808049536, "grad_norm": 0.06869738548994064, "learning_rate": 7.064101974646732e-05, "loss": 0.0056, "step": 20097 }, { "epoch": 3.890092879256966, "grad_norm": 0.11791741102933884, "learning_rate": 7.063844698845027e-05, "loss": 0.007, "step": 20098 }, { "epoch": 3.8902863777089784, "grad_norm": 0.06942193955183029, "learning_rate": 7.063587417229333e-05, "loss": 0.0064, "step": 20099 }, { "epoch": 3.890479876160991, "grad_norm": 0.09429405629634857, "learning_rate": 7.063330129800605e-05, "loss": 0.0061, "step": 20100 }, { "epoch": 3.8906733746130033, "grad_norm": 0.10207511484622955, "learning_rate": 7.063072836559803e-05, "loss": 0.0064, "step": 20101 }, { "epoch": 3.8908668730650153, "grad_norm": 0.07401925325393677, "learning_rate": 7.062815537507879e-05, "loss": 0.0063, "step": 20102 }, { "epoch": 3.8910603715170278, "grad_norm": 0.1182950884103775, "learning_rate": 7.062558232645792e-05, "loss": 0.0055, "step": 20103 }, { "epoch": 3.89125386996904, "grad_norm": 0.06179551035165787, "learning_rate": 7.062300921974502e-05, "loss": 0.0054, "step": 20104 }, { "epoch": 3.8914473684210527, "grad_norm": 0.10204512625932693, "learning_rate": 7.06204360549496e-05, "loss": 0.0082, "step": 20105 }, { "epoch": 3.891640866873065, "grad_norm": 0.08083368837833405, "learning_rate": 7.061786283208123e-05, "loss": 0.0054, "step": 20106 }, { "epoch": 3.8918343653250775, "grad_norm": 0.04877008870244026, "learning_rate": 7.061528955114952e-05, "loss": 0.0068, "step": 20107 }, { "epoch": 3.8920278637770895, "grad_norm": 0.07917793095111847, "learning_rate": 7.061271621216399e-05, "loss": 0.0072, "step": 20108 }, { "epoch": 3.892221362229102, "grad_norm": 0.029105203226208687, "learning_rate": 7.061014281513425e-05, "loss": 0.005, "step": 20109 }, { "epoch": 3.8924148606811144, "grad_norm": 0.08026999235153198, "learning_rate": 7.060756936006983e-05, "loss": 0.0067, "step": 20110 }, { "epoch": 3.892608359133127, "grad_norm": 0.024713966995477676, "learning_rate": 7.060499584698032e-05, "loss": 0.0055, "step": 20111 }, { "epoch": 3.8928018575851393, "grad_norm": 0.04988839477300644, "learning_rate": 7.060242227587526e-05, "loss": 0.0066, "step": 20112 }, { "epoch": 3.8929953560371517, "grad_norm": 0.02429155260324478, "learning_rate": 7.059984864676426e-05, "loss": 0.0071, "step": 20113 }, { "epoch": 3.893188854489164, "grad_norm": 0.048593468964099884, "learning_rate": 7.059727495965688e-05, "loss": 0.0057, "step": 20114 }, { "epoch": 3.8933823529411766, "grad_norm": 0.040806904435157776, "learning_rate": 7.059470121456266e-05, "loss": 0.0076, "step": 20115 }, { "epoch": 3.893575851393189, "grad_norm": 0.05744662880897522, "learning_rate": 7.059212741149118e-05, "loss": 0.0063, "step": 20116 }, { "epoch": 3.8937693498452015, "grad_norm": 0.06410235166549683, "learning_rate": 7.058955355045202e-05, "loss": 0.0063, "step": 20117 }, { "epoch": 3.8939628482972135, "grad_norm": 0.07109884917736053, "learning_rate": 7.058697963145474e-05, "loss": 0.0055, "step": 20118 }, { "epoch": 3.894156346749226, "grad_norm": 0.06441523134708405, "learning_rate": 7.058440565450887e-05, "loss": 0.0065, "step": 20119 }, { "epoch": 3.8943498452012384, "grad_norm": 0.04968903213739395, "learning_rate": 7.058183161962407e-05, "loss": 0.0062, "step": 20120 }, { "epoch": 3.894543343653251, "grad_norm": 0.07410665601491928, "learning_rate": 7.057925752680984e-05, "loss": 0.0061, "step": 20121 }, { "epoch": 3.8947368421052633, "grad_norm": 0.0354275219142437, "learning_rate": 7.057668337607579e-05, "loss": 0.0042, "step": 20122 }, { "epoch": 3.8949303405572753, "grad_norm": 0.11720193177461624, "learning_rate": 7.057410916743145e-05, "loss": 0.0057, "step": 20123 }, { "epoch": 3.8951238390092877, "grad_norm": 0.04663648083806038, "learning_rate": 7.057153490088641e-05, "loss": 0.0053, "step": 20124 }, { "epoch": 3.8953173374613, "grad_norm": 0.12612135708332062, "learning_rate": 7.056896057645026e-05, "loss": 0.0069, "step": 20125 }, { "epoch": 3.8955108359133126, "grad_norm": 0.049231868237257004, "learning_rate": 7.056638619413252e-05, "loss": 0.0077, "step": 20126 }, { "epoch": 3.895704334365325, "grad_norm": 0.06396231055259705, "learning_rate": 7.056381175394281e-05, "loss": 0.0059, "step": 20127 }, { "epoch": 3.8958978328173375, "grad_norm": 0.06389755755662918, "learning_rate": 7.056123725589068e-05, "loss": 0.0059, "step": 20128 }, { "epoch": 3.89609133126935, "grad_norm": 0.038341280072927475, "learning_rate": 7.05586626999857e-05, "loss": 0.0053, "step": 20129 }, { "epoch": 3.8962848297213624, "grad_norm": 0.05777394771575928, "learning_rate": 7.055608808623746e-05, "loss": 0.0046, "step": 20130 }, { "epoch": 3.896478328173375, "grad_norm": 0.048831064254045486, "learning_rate": 7.05535134146555e-05, "loss": 0.0058, "step": 20131 }, { "epoch": 3.8966718266253872, "grad_norm": 0.05757449194788933, "learning_rate": 7.055093868524941e-05, "loss": 0.0047, "step": 20132 }, { "epoch": 3.8968653250773992, "grad_norm": 0.051714055240154266, "learning_rate": 7.054836389802876e-05, "loss": 0.0069, "step": 20133 }, { "epoch": 3.8970588235294117, "grad_norm": 0.050310321152210236, "learning_rate": 7.054578905300315e-05, "loss": 0.0061, "step": 20134 }, { "epoch": 3.897252321981424, "grad_norm": 0.04192091524600983, "learning_rate": 7.054321415018212e-05, "loss": 0.0054, "step": 20135 }, { "epoch": 3.8974458204334366, "grad_norm": 0.04853068292140961, "learning_rate": 7.054063918957524e-05, "loss": 0.0065, "step": 20136 }, { "epoch": 3.897639318885449, "grad_norm": 0.04035921394824982, "learning_rate": 7.053806417119209e-05, "loss": 0.0072, "step": 20137 }, { "epoch": 3.8978328173374615, "grad_norm": 0.06177471578121185, "learning_rate": 7.053548909504223e-05, "loss": 0.0081, "step": 20138 }, { "epoch": 3.8980263157894735, "grad_norm": 0.06434411555528641, "learning_rate": 7.053291396113527e-05, "loss": 0.0055, "step": 20139 }, { "epoch": 3.898219814241486, "grad_norm": 0.03984615579247475, "learning_rate": 7.053033876948077e-05, "loss": 0.0048, "step": 20140 }, { "epoch": 3.8984133126934983, "grad_norm": 0.06250424683094025, "learning_rate": 7.052776352008828e-05, "loss": 0.0073, "step": 20141 }, { "epoch": 3.8986068111455108, "grad_norm": 0.03938143327832222, "learning_rate": 7.052518821296741e-05, "loss": 0.0055, "step": 20142 }, { "epoch": 3.898800309597523, "grad_norm": 0.06771507114171982, "learning_rate": 7.052261284812771e-05, "loss": 0.0066, "step": 20143 }, { "epoch": 3.8989938080495357, "grad_norm": 0.051824335008859634, "learning_rate": 7.052003742557876e-05, "loss": 0.0051, "step": 20144 }, { "epoch": 3.899187306501548, "grad_norm": 0.04282427579164505, "learning_rate": 7.051746194533012e-05, "loss": 0.0073, "step": 20145 }, { "epoch": 3.8993808049535605, "grad_norm": 0.05337114259600639, "learning_rate": 7.051488640739139e-05, "loss": 0.0063, "step": 20146 }, { "epoch": 3.899574303405573, "grad_norm": 0.06557715684175491, "learning_rate": 7.051231081177213e-05, "loss": 0.0073, "step": 20147 }, { "epoch": 3.899767801857585, "grad_norm": 0.045337971299886703, "learning_rate": 7.050973515848194e-05, "loss": 0.0079, "step": 20148 }, { "epoch": 3.8999613003095974, "grad_norm": 0.057741399854421616, "learning_rate": 7.050715944753036e-05, "loss": 0.0059, "step": 20149 }, { "epoch": 3.90015479876161, "grad_norm": 0.035936806350946426, "learning_rate": 7.050458367892698e-05, "loss": 0.007, "step": 20150 }, { "epoch": 3.9003482972136223, "grad_norm": 0.0628664568066597, "learning_rate": 7.050200785268138e-05, "loss": 0.0058, "step": 20151 }, { "epoch": 3.9005417956656347, "grad_norm": 0.029671361669898033, "learning_rate": 7.049943196880315e-05, "loss": 0.0056, "step": 20152 }, { "epoch": 3.900735294117647, "grad_norm": 0.04895917326211929, "learning_rate": 7.049685602730184e-05, "loss": 0.0072, "step": 20153 }, { "epoch": 3.900928792569659, "grad_norm": 0.044647689908742905, "learning_rate": 7.049428002818703e-05, "loss": 0.0055, "step": 20154 }, { "epoch": 3.9011222910216716, "grad_norm": 0.032742906361818314, "learning_rate": 7.049170397146832e-05, "loss": 0.0073, "step": 20155 }, { "epoch": 3.901315789473684, "grad_norm": 0.05517172813415527, "learning_rate": 7.048912785715525e-05, "loss": 0.0055, "step": 20156 }, { "epoch": 3.9015092879256965, "grad_norm": 0.022379305213689804, "learning_rate": 7.048655168525744e-05, "loss": 0.0055, "step": 20157 }, { "epoch": 3.901702786377709, "grad_norm": 0.062213391065597534, "learning_rate": 7.048397545578445e-05, "loss": 0.0064, "step": 20158 }, { "epoch": 3.9018962848297214, "grad_norm": 0.03367840498685837, "learning_rate": 7.048139916874585e-05, "loss": 0.0065, "step": 20159 }, { "epoch": 3.902089783281734, "grad_norm": 0.042603474110364914, "learning_rate": 7.04788228241512e-05, "loss": 0.006, "step": 20160 }, { "epoch": 3.9022832817337463, "grad_norm": 0.05611274018883705, "learning_rate": 7.047624642201012e-05, "loss": 0.0061, "step": 20161 }, { "epoch": 3.9024767801857587, "grad_norm": 0.018001502379775047, "learning_rate": 7.047366996233217e-05, "loss": 0.0057, "step": 20162 }, { "epoch": 3.902670278637771, "grad_norm": 0.04831403121352196, "learning_rate": 7.047109344512692e-05, "loss": 0.0051, "step": 20163 }, { "epoch": 3.902863777089783, "grad_norm": 0.024928515776991844, "learning_rate": 7.046851687040397e-05, "loss": 0.0078, "step": 20164 }, { "epoch": 3.9030572755417956, "grad_norm": 0.02616717852652073, "learning_rate": 7.046594023817287e-05, "loss": 0.0062, "step": 20165 }, { "epoch": 3.903250773993808, "grad_norm": 0.03197064995765686, "learning_rate": 7.046336354844323e-05, "loss": 0.0065, "step": 20166 }, { "epoch": 3.9034442724458205, "grad_norm": 0.026184653863310814, "learning_rate": 7.04607868012246e-05, "loss": 0.0043, "step": 20167 }, { "epoch": 3.903637770897833, "grad_norm": 0.03199600428342819, "learning_rate": 7.045820999652659e-05, "loss": 0.0061, "step": 20168 }, { "epoch": 3.9038312693498454, "grad_norm": 0.025504637509584427, "learning_rate": 7.045563313435876e-05, "loss": 0.0048, "step": 20169 }, { "epoch": 3.9040247678018574, "grad_norm": 0.028972774744033813, "learning_rate": 7.045305621473068e-05, "loss": 0.0057, "step": 20170 }, { "epoch": 3.90421826625387, "grad_norm": 0.02631639502942562, "learning_rate": 7.045047923765195e-05, "loss": 0.006, "step": 20171 }, { "epoch": 3.9044117647058822, "grad_norm": 0.04056786000728607, "learning_rate": 7.044790220313216e-05, "loss": 0.007, "step": 20172 }, { "epoch": 3.9046052631578947, "grad_norm": 0.09107380360364914, "learning_rate": 7.044532511118086e-05, "loss": 0.0064, "step": 20173 }, { "epoch": 3.904798761609907, "grad_norm": 0.060114990919828415, "learning_rate": 7.044274796180766e-05, "loss": 0.006, "step": 20174 }, { "epoch": 3.9049922600619196, "grad_norm": 0.07657919079065323, "learning_rate": 7.044017075502211e-05, "loss": 0.0068, "step": 20175 }, { "epoch": 3.905185758513932, "grad_norm": 0.05240928754210472, "learning_rate": 7.043759349083383e-05, "loss": 0.0056, "step": 20176 }, { "epoch": 3.9053792569659445, "grad_norm": 0.07264742255210876, "learning_rate": 7.043501616925238e-05, "loss": 0.0052, "step": 20177 }, { "epoch": 3.905572755417957, "grad_norm": 0.03067280910909176, "learning_rate": 7.043243879028733e-05, "loss": 0.0057, "step": 20178 }, { "epoch": 3.905766253869969, "grad_norm": 0.08287299424409866, "learning_rate": 7.042986135394829e-05, "loss": 0.0077, "step": 20179 }, { "epoch": 3.9059597523219813, "grad_norm": 0.04522261023521423, "learning_rate": 7.042728386024482e-05, "loss": 0.0056, "step": 20180 }, { "epoch": 3.906153250773994, "grad_norm": 0.08804817497730255, "learning_rate": 7.04247063091865e-05, "loss": 0.0071, "step": 20181 }, { "epoch": 3.906346749226006, "grad_norm": 0.04306775704026222, "learning_rate": 7.042212870078293e-05, "loss": 0.0063, "step": 20182 }, { "epoch": 3.9065402476780187, "grad_norm": 0.0630825087428093, "learning_rate": 7.041955103504369e-05, "loss": 0.0059, "step": 20183 }, { "epoch": 3.906733746130031, "grad_norm": 0.051528267562389374, "learning_rate": 7.041697331197835e-05, "loss": 0.0062, "step": 20184 }, { "epoch": 3.906927244582043, "grad_norm": 0.050302669405937195, "learning_rate": 7.041439553159651e-05, "loss": 0.005, "step": 20185 }, { "epoch": 3.9071207430340555, "grad_norm": 0.062003448605537415, "learning_rate": 7.041181769390774e-05, "loss": 0.006, "step": 20186 }, { "epoch": 3.907314241486068, "grad_norm": 0.042308591306209564, "learning_rate": 7.040923979892164e-05, "loss": 0.0066, "step": 20187 }, { "epoch": 3.9075077399380804, "grad_norm": 0.06759792566299438, "learning_rate": 7.040666184664778e-05, "loss": 0.0068, "step": 20188 }, { "epoch": 3.907701238390093, "grad_norm": 0.03368259593844414, "learning_rate": 7.040408383709573e-05, "loss": 0.006, "step": 20189 }, { "epoch": 3.9078947368421053, "grad_norm": 0.04584081098437309, "learning_rate": 7.040150577027511e-05, "loss": 0.0044, "step": 20190 }, { "epoch": 3.9080882352941178, "grad_norm": 0.06451774388551712, "learning_rate": 7.039892764619548e-05, "loss": 0.0059, "step": 20191 }, { "epoch": 3.90828173374613, "grad_norm": 0.026055559515953064, "learning_rate": 7.039634946486642e-05, "loss": 0.0072, "step": 20192 }, { "epoch": 3.9084752321981426, "grad_norm": 0.060392457991838455, "learning_rate": 7.039377122629756e-05, "loss": 0.0057, "step": 20193 }, { "epoch": 3.9086687306501546, "grad_norm": 0.026024499908089638, "learning_rate": 7.039119293049842e-05, "loss": 0.0066, "step": 20194 }, { "epoch": 3.908862229102167, "grad_norm": 0.05625323951244354, "learning_rate": 7.038861457747863e-05, "loss": 0.0063, "step": 20195 }, { "epoch": 3.9090557275541795, "grad_norm": 0.04067853093147278, "learning_rate": 7.038603616724776e-05, "loss": 0.0065, "step": 20196 }, { "epoch": 3.909249226006192, "grad_norm": 0.03599395230412483, "learning_rate": 7.038345769981542e-05, "loss": 0.005, "step": 20197 }, { "epoch": 3.9094427244582044, "grad_norm": 0.0626337081193924, "learning_rate": 7.038087917519115e-05, "loss": 0.0066, "step": 20198 }, { "epoch": 3.909636222910217, "grad_norm": 0.02629222348332405, "learning_rate": 7.037830059338457e-05, "loss": 0.006, "step": 20199 }, { "epoch": 3.909829721362229, "grad_norm": 0.049931664019823074, "learning_rate": 7.037572195440525e-05, "loss": 0.0053, "step": 20200 }, { "epoch": 3.9100232198142413, "grad_norm": 0.03195636346936226, "learning_rate": 7.037314325826278e-05, "loss": 0.0059, "step": 20201 }, { "epoch": 3.9102167182662537, "grad_norm": 0.023576628416776657, "learning_rate": 7.037056450496676e-05, "loss": 0.0051, "step": 20202 }, { "epoch": 3.910410216718266, "grad_norm": 0.027429528534412384, "learning_rate": 7.036798569452677e-05, "loss": 0.0055, "step": 20203 }, { "epoch": 3.9106037151702786, "grad_norm": 0.02757442183792591, "learning_rate": 7.03654068269524e-05, "loss": 0.0063, "step": 20204 }, { "epoch": 3.910797213622291, "grad_norm": 0.046167802065610886, "learning_rate": 7.036282790225322e-05, "loss": 0.0059, "step": 20205 }, { "epoch": 3.9109907120743035, "grad_norm": 0.06410051137208939, "learning_rate": 7.036024892043885e-05, "loss": 0.0067, "step": 20206 }, { "epoch": 3.911184210526316, "grad_norm": 0.05601111799478531, "learning_rate": 7.035766988151884e-05, "loss": 0.0067, "step": 20207 }, { "epoch": 3.9113777089783284, "grad_norm": 0.07835657894611359, "learning_rate": 7.03550907855028e-05, "loss": 0.0073, "step": 20208 }, { "epoch": 3.911571207430341, "grad_norm": 0.08254988491535187, "learning_rate": 7.035251163240033e-05, "loss": 0.0063, "step": 20209 }, { "epoch": 3.911764705882353, "grad_norm": 0.050754364579916, "learning_rate": 7.034993242222099e-05, "loss": 0.0064, "step": 20210 }, { "epoch": 3.9119582043343653, "grad_norm": 0.07742263376712799, "learning_rate": 7.034735315497439e-05, "loss": 0.0058, "step": 20211 }, { "epoch": 3.9121517027863777, "grad_norm": 0.05339338630437851, "learning_rate": 7.034477383067013e-05, "loss": 0.0069, "step": 20212 }, { "epoch": 3.91234520123839, "grad_norm": 0.08300649374723434, "learning_rate": 7.034219444931776e-05, "loss": 0.0055, "step": 20213 }, { "epoch": 3.9125386996904026, "grad_norm": 0.048433441668748856, "learning_rate": 7.033961501092689e-05, "loss": 0.0057, "step": 20214 }, { "epoch": 3.912732198142415, "grad_norm": 0.05755166709423065, "learning_rate": 7.033703551550711e-05, "loss": 0.0073, "step": 20215 }, { "epoch": 3.912925696594427, "grad_norm": 0.07610590010881424, "learning_rate": 7.033445596306802e-05, "loss": 0.0048, "step": 20216 }, { "epoch": 3.9131191950464395, "grad_norm": 0.03880075365304947, "learning_rate": 7.03318763536192e-05, "loss": 0.0056, "step": 20217 }, { "epoch": 3.913312693498452, "grad_norm": 0.07950456440448761, "learning_rate": 7.032929668717024e-05, "loss": 0.0079, "step": 20218 }, { "epoch": 3.9135061919504643, "grad_norm": 0.025089206174016, "learning_rate": 7.032671696373074e-05, "loss": 0.0064, "step": 20219 }, { "epoch": 3.913699690402477, "grad_norm": 0.1385296732187271, "learning_rate": 7.032413718331028e-05, "loss": 0.0049, "step": 20220 }, { "epoch": 3.9138931888544892, "grad_norm": 0.08471015840768814, "learning_rate": 7.032155734591846e-05, "loss": 0.0066, "step": 20221 }, { "epoch": 3.9140866873065017, "grad_norm": 0.10891158878803253, "learning_rate": 7.031897745156485e-05, "loss": 0.0071, "step": 20222 }, { "epoch": 3.914280185758514, "grad_norm": 0.05156676471233368, "learning_rate": 7.031639750025906e-05, "loss": 0.0063, "step": 20223 }, { "epoch": 3.9144736842105265, "grad_norm": 0.0999743863940239, "learning_rate": 7.031381749201069e-05, "loss": 0.0057, "step": 20224 }, { "epoch": 3.9146671826625385, "grad_norm": 0.08197793364524841, "learning_rate": 7.031123742682931e-05, "loss": 0.0065, "step": 20225 }, { "epoch": 3.914860681114551, "grad_norm": 0.08358150720596313, "learning_rate": 7.030865730472453e-05, "loss": 0.0072, "step": 20226 }, { "epoch": 3.9150541795665634, "grad_norm": 0.11079584062099457, "learning_rate": 7.03060771257059e-05, "loss": 0.007, "step": 20227 }, { "epoch": 3.915247678018576, "grad_norm": 0.06865765154361725, "learning_rate": 7.030349688978308e-05, "loss": 0.0079, "step": 20228 }, { "epoch": 3.9154411764705883, "grad_norm": 0.06516861170530319, "learning_rate": 7.030091659696563e-05, "loss": 0.0051, "step": 20229 }, { "epoch": 3.9156346749226008, "grad_norm": 0.07504508644342422, "learning_rate": 7.029833624726314e-05, "loss": 0.0071, "step": 20230 }, { "epoch": 3.9158281733746128, "grad_norm": 0.07247281819581985, "learning_rate": 7.02957558406852e-05, "loss": 0.0068, "step": 20231 }, { "epoch": 3.916021671826625, "grad_norm": 0.11182260513305664, "learning_rate": 7.02931753772414e-05, "loss": 0.0051, "step": 20232 }, { "epoch": 3.9162151702786376, "grad_norm": 0.09959301352500916, "learning_rate": 7.029059485694137e-05, "loss": 0.0063, "step": 20233 }, { "epoch": 3.91640866873065, "grad_norm": 0.0835549384355545, "learning_rate": 7.028801427979465e-05, "loss": 0.0063, "step": 20234 }, { "epoch": 3.9166021671826625, "grad_norm": 0.07355889678001404, "learning_rate": 7.028543364581087e-05, "loss": 0.0054, "step": 20235 }, { "epoch": 3.916795665634675, "grad_norm": 0.06705989688634872, "learning_rate": 7.02828529549996e-05, "loss": 0.005, "step": 20236 }, { "epoch": 3.9169891640866874, "grad_norm": 0.06607870757579803, "learning_rate": 7.028027220737047e-05, "loss": 0.0055, "step": 20237 }, { "epoch": 3.9171826625387, "grad_norm": 0.05622951313853264, "learning_rate": 7.027769140293303e-05, "loss": 0.0064, "step": 20238 }, { "epoch": 3.9173761609907123, "grad_norm": 0.0638025775551796, "learning_rate": 7.027511054169692e-05, "loss": 0.0058, "step": 20239 }, { "epoch": 3.9175696594427247, "grad_norm": 0.07782430201768875, "learning_rate": 7.02725296236717e-05, "loss": 0.0063, "step": 20240 }, { "epoch": 3.9177631578947367, "grad_norm": 0.0816856250166893, "learning_rate": 7.026994864886698e-05, "loss": 0.0067, "step": 20241 }, { "epoch": 3.917956656346749, "grad_norm": 0.05882108584046364, "learning_rate": 7.026736761729236e-05, "loss": 0.0066, "step": 20242 }, { "epoch": 3.9181501547987616, "grad_norm": 0.07350657135248184, "learning_rate": 7.026478652895743e-05, "loss": 0.0067, "step": 20243 }, { "epoch": 3.918343653250774, "grad_norm": 0.055807605385780334, "learning_rate": 7.026220538387179e-05, "loss": 0.0062, "step": 20244 }, { "epoch": 3.9185371517027865, "grad_norm": 0.05434843525290489, "learning_rate": 7.0259624182045e-05, "loss": 0.006, "step": 20245 }, { "epoch": 3.9187306501547985, "grad_norm": 0.06523029506206512, "learning_rate": 7.025704292348672e-05, "loss": 0.0063, "step": 20246 }, { "epoch": 3.918924148606811, "grad_norm": 0.033628951758146286, "learning_rate": 7.025446160820649e-05, "loss": 0.0066, "step": 20247 }, { "epoch": 3.9191176470588234, "grad_norm": 0.07770073413848877, "learning_rate": 7.025188023621396e-05, "loss": 0.006, "step": 20248 }, { "epoch": 3.919311145510836, "grad_norm": 0.024883301928639412, "learning_rate": 7.024929880751867e-05, "loss": 0.0075, "step": 20249 }, { "epoch": 3.9195046439628483, "grad_norm": 0.06123695150017738, "learning_rate": 7.024671732213026e-05, "loss": 0.0074, "step": 20250 }, { "epoch": 3.9196981424148607, "grad_norm": 0.0442340224981308, "learning_rate": 7.024413578005831e-05, "loss": 0.0051, "step": 20251 }, { "epoch": 3.919891640866873, "grad_norm": 0.040258463472127914, "learning_rate": 7.02415541813124e-05, "loss": 0.0058, "step": 20252 }, { "epoch": 3.9200851393188856, "grad_norm": 0.04832844063639641, "learning_rate": 7.023897252590216e-05, "loss": 0.0064, "step": 20253 }, { "epoch": 3.920278637770898, "grad_norm": 0.03019348904490471, "learning_rate": 7.02363908138372e-05, "loss": 0.0061, "step": 20254 }, { "epoch": 3.9204721362229105, "grad_norm": 0.05041324719786644, "learning_rate": 7.023380904512707e-05, "loss": 0.006, "step": 20255 }, { "epoch": 3.9206656346749225, "grad_norm": 0.026393163949251175, "learning_rate": 7.023122721978139e-05, "loss": 0.0052, "step": 20256 }, { "epoch": 3.920859133126935, "grad_norm": 0.03540336713194847, "learning_rate": 7.022864533780977e-05, "loss": 0.0043, "step": 20257 }, { "epoch": 3.9210526315789473, "grad_norm": 0.06374834477901459, "learning_rate": 7.022606339922177e-05, "loss": 0.0057, "step": 20258 }, { "epoch": 3.92124613003096, "grad_norm": 0.026361333206295967, "learning_rate": 7.022348140402706e-05, "loss": 0.0065, "step": 20259 }, { "epoch": 3.9214396284829722, "grad_norm": 0.055761341005563736, "learning_rate": 7.022089935223517e-05, "loss": 0.0055, "step": 20260 }, { "epoch": 3.9216331269349847, "grad_norm": 0.03013632260262966, "learning_rate": 7.021831724385574e-05, "loss": 0.006, "step": 20261 }, { "epoch": 3.9218266253869967, "grad_norm": 0.029648471623659134, "learning_rate": 7.021573507889835e-05, "loss": 0.0062, "step": 20262 }, { "epoch": 3.922020123839009, "grad_norm": 0.04885442182421684, "learning_rate": 7.02131528573726e-05, "loss": 0.0059, "step": 20263 }, { "epoch": 3.9222136222910216, "grad_norm": 0.02288469485938549, "learning_rate": 7.02105705792881e-05, "loss": 0.0051, "step": 20264 }, { "epoch": 3.922407120743034, "grad_norm": 0.044115498661994934, "learning_rate": 7.020798824465446e-05, "loss": 0.0069, "step": 20265 }, { "epoch": 3.9226006191950464, "grad_norm": 0.028196997940540314, "learning_rate": 7.020540585348124e-05, "loss": 0.0057, "step": 20266 }, { "epoch": 3.922794117647059, "grad_norm": 0.027585316449403763, "learning_rate": 7.02028234057781e-05, "loss": 0.0049, "step": 20267 }, { "epoch": 3.9229876160990713, "grad_norm": 0.05270618945360184, "learning_rate": 7.020024090155458e-05, "loss": 0.0067, "step": 20268 }, { "epoch": 3.9231811145510838, "grad_norm": 0.025693150237202644, "learning_rate": 7.019765834082034e-05, "loss": 0.0066, "step": 20269 }, { "epoch": 3.923374613003096, "grad_norm": 0.05476580560207367, "learning_rate": 7.019507572358493e-05, "loss": 0.0076, "step": 20270 }, { "epoch": 3.923568111455108, "grad_norm": 0.026070065796375275, "learning_rate": 7.019249304985798e-05, "loss": 0.0072, "step": 20271 }, { "epoch": 3.9237616099071206, "grad_norm": 0.052272096276283264, "learning_rate": 7.018991031964908e-05, "loss": 0.0065, "step": 20272 }, { "epoch": 3.923955108359133, "grad_norm": 0.02625451423227787, "learning_rate": 7.018732753296784e-05, "loss": 0.0076, "step": 20273 }, { "epoch": 3.9241486068111455, "grad_norm": 0.04845809563994408, "learning_rate": 7.018474468982386e-05, "loss": 0.0064, "step": 20274 }, { "epoch": 3.924342105263158, "grad_norm": 0.04001960530877113, "learning_rate": 7.018216179022674e-05, "loss": 0.0069, "step": 20275 }, { "epoch": 3.9245356037151704, "grad_norm": 0.0506385937333107, "learning_rate": 7.017957883418607e-05, "loss": 0.0063, "step": 20276 }, { "epoch": 3.9247291021671824, "grad_norm": 0.024039387702941895, "learning_rate": 7.017699582171148e-05, "loss": 0.0048, "step": 20277 }, { "epoch": 3.924922600619195, "grad_norm": 0.05788707360625267, "learning_rate": 7.017441275281256e-05, "loss": 0.0056, "step": 20278 }, { "epoch": 3.9251160990712073, "grad_norm": 0.02898249588906765, "learning_rate": 7.017182962749889e-05, "loss": 0.0069, "step": 20279 }, { "epoch": 3.9253095975232197, "grad_norm": 0.062443967908620834, "learning_rate": 7.016924644578014e-05, "loss": 0.0055, "step": 20280 }, { "epoch": 3.925503095975232, "grad_norm": 0.040969736874103546, "learning_rate": 7.016666320766583e-05, "loss": 0.0057, "step": 20281 }, { "epoch": 3.9256965944272446, "grad_norm": 0.0740068107843399, "learning_rate": 7.016407991316562e-05, "loss": 0.0042, "step": 20282 }, { "epoch": 3.925890092879257, "grad_norm": 0.043371107429265976, "learning_rate": 7.01614965622891e-05, "loss": 0.0062, "step": 20283 }, { "epoch": 3.9260835913312695, "grad_norm": 0.0707545205950737, "learning_rate": 7.015891315504586e-05, "loss": 0.0057, "step": 20284 }, { "epoch": 3.926277089783282, "grad_norm": 0.04007195681333542, "learning_rate": 7.015632969144553e-05, "loss": 0.0063, "step": 20285 }, { "epoch": 3.9264705882352944, "grad_norm": 0.048460979014635086, "learning_rate": 7.015374617149768e-05, "loss": 0.0068, "step": 20286 }, { "epoch": 3.9266640866873064, "grad_norm": 0.03528350591659546, "learning_rate": 7.015116259521195e-05, "loss": 0.0063, "step": 20287 }, { "epoch": 3.926857585139319, "grad_norm": 0.048293501138687134, "learning_rate": 7.014857896259794e-05, "loss": 0.0056, "step": 20288 }, { "epoch": 3.9270510835913313, "grad_norm": 0.04187393933534622, "learning_rate": 7.014599527366523e-05, "loss": 0.0065, "step": 20289 }, { "epoch": 3.9272445820433437, "grad_norm": 0.04673805832862854, "learning_rate": 7.014341152842344e-05, "loss": 0.0055, "step": 20290 }, { "epoch": 3.927438080495356, "grad_norm": 0.023537691682577133, "learning_rate": 7.014082772688218e-05, "loss": 0.0045, "step": 20291 }, { "epoch": 3.9276315789473686, "grad_norm": 0.05258498340845108, "learning_rate": 7.013824386905106e-05, "loss": 0.0066, "step": 20292 }, { "epoch": 3.9278250773993806, "grad_norm": 0.039492521435022354, "learning_rate": 7.013565995493969e-05, "loss": 0.005, "step": 20293 }, { "epoch": 3.928018575851393, "grad_norm": 0.0408921092748642, "learning_rate": 7.013307598455766e-05, "loss": 0.0063, "step": 20294 }, { "epoch": 3.9282120743034055, "grad_norm": 0.05108769237995148, "learning_rate": 7.013049195791459e-05, "loss": 0.0066, "step": 20295 }, { "epoch": 3.928405572755418, "grad_norm": 0.06941656768321991, "learning_rate": 7.012790787502005e-05, "loss": 0.0059, "step": 20296 }, { "epoch": 3.9285990712074303, "grad_norm": 0.045009735971689224, "learning_rate": 7.012532373588369e-05, "loss": 0.0061, "step": 20297 }, { "epoch": 3.928792569659443, "grad_norm": 0.07339458167552948, "learning_rate": 7.01227395405151e-05, "loss": 0.0072, "step": 20298 }, { "epoch": 3.9289860681114552, "grad_norm": 0.04523870721459389, "learning_rate": 7.012015528892389e-05, "loss": 0.007, "step": 20299 }, { "epoch": 3.9291795665634677, "grad_norm": 0.05610157176852226, "learning_rate": 7.011757098111968e-05, "loss": 0.0054, "step": 20300 }, { "epoch": 3.92937306501548, "grad_norm": 0.04303843900561333, "learning_rate": 7.011498661711206e-05, "loss": 0.0072, "step": 20301 }, { "epoch": 3.929566563467492, "grad_norm": 0.0640818402171135, "learning_rate": 7.011240219691066e-05, "loss": 0.0049, "step": 20302 }, { "epoch": 3.9297600619195046, "grad_norm": 0.05766461417078972, "learning_rate": 7.010981772052504e-05, "loss": 0.0069, "step": 20303 }, { "epoch": 3.929953560371517, "grad_norm": 0.07097645848989487, "learning_rate": 7.010723318796485e-05, "loss": 0.0048, "step": 20304 }, { "epoch": 3.9301470588235294, "grad_norm": 0.06695401668548584, "learning_rate": 7.01046485992397e-05, "loss": 0.0064, "step": 20305 }, { "epoch": 3.930340557275542, "grad_norm": 0.08097133785486221, "learning_rate": 7.010206395435918e-05, "loss": 0.006, "step": 20306 }, { "epoch": 3.9305340557275543, "grad_norm": 0.04421905800700188, "learning_rate": 7.009947925333292e-05, "loss": 0.0065, "step": 20307 }, { "epoch": 3.9307275541795663, "grad_norm": 0.0368858240544796, "learning_rate": 7.009689449617052e-05, "loss": 0.0065, "step": 20308 }, { "epoch": 3.9309210526315788, "grad_norm": 0.038320060819387436, "learning_rate": 7.009430968288155e-05, "loss": 0.0064, "step": 20309 }, { "epoch": 3.931114551083591, "grad_norm": 0.05393772944808006, "learning_rate": 7.00917248134757e-05, "loss": 0.0074, "step": 20310 }, { "epoch": 3.9313080495356036, "grad_norm": 0.0736820176243782, "learning_rate": 7.008913988796251e-05, "loss": 0.0057, "step": 20311 }, { "epoch": 3.931501547987616, "grad_norm": 0.060723260045051575, "learning_rate": 7.008655490635161e-05, "loss": 0.0071, "step": 20312 }, { "epoch": 3.9316950464396285, "grad_norm": 0.07866939902305603, "learning_rate": 7.008396986865265e-05, "loss": 0.005, "step": 20313 }, { "epoch": 3.931888544891641, "grad_norm": 0.07158922404050827, "learning_rate": 7.008138477487518e-05, "loss": 0.0075, "step": 20314 }, { "epoch": 3.9320820433436534, "grad_norm": 0.06151753291487694, "learning_rate": 7.007879962502885e-05, "loss": 0.0048, "step": 20315 }, { "epoch": 3.932275541795666, "grad_norm": 0.09634152054786682, "learning_rate": 7.007621441912324e-05, "loss": 0.0065, "step": 20316 }, { "epoch": 3.9324690402476783, "grad_norm": 0.075218565762043, "learning_rate": 7.007362915716799e-05, "loss": 0.0061, "step": 20317 }, { "epoch": 3.9326625386996903, "grad_norm": 0.07652046531438828, "learning_rate": 7.007104383917272e-05, "loss": 0.0068, "step": 20318 }, { "epoch": 3.9328560371517027, "grad_norm": 0.10179079324007034, "learning_rate": 7.0068458465147e-05, "loss": 0.0057, "step": 20319 }, { "epoch": 3.933049535603715, "grad_norm": 0.09248532354831696, "learning_rate": 7.006587303510047e-05, "loss": 0.0059, "step": 20320 }, { "epoch": 3.9332430340557276, "grad_norm": 0.08539120852947235, "learning_rate": 7.006328754904273e-05, "loss": 0.0068, "step": 20321 }, { "epoch": 3.93343653250774, "grad_norm": 0.03482604771852493, "learning_rate": 7.00607020069834e-05, "loss": 0.0065, "step": 20322 }, { "epoch": 3.933630030959752, "grad_norm": 0.07729917019605637, "learning_rate": 7.005811640893211e-05, "loss": 0.0061, "step": 20323 }, { "epoch": 3.9338235294117645, "grad_norm": 0.04275960847735405, "learning_rate": 7.005553075489845e-05, "loss": 0.0052, "step": 20324 }, { "epoch": 3.934017027863777, "grad_norm": 0.07784814387559891, "learning_rate": 7.005294504489202e-05, "loss": 0.0066, "step": 20325 }, { "epoch": 3.9342105263157894, "grad_norm": 0.06852307170629501, "learning_rate": 7.005035927892244e-05, "loss": 0.0066, "step": 20326 }, { "epoch": 3.934404024767802, "grad_norm": 0.07290929555892944, "learning_rate": 7.004777345699935e-05, "loss": 0.0061, "step": 20327 }, { "epoch": 3.9345975232198143, "grad_norm": 0.10176965594291687, "learning_rate": 7.004518757913235e-05, "loss": 0.0064, "step": 20328 }, { "epoch": 3.9347910216718267, "grad_norm": 0.08003508299589157, "learning_rate": 7.004260164533104e-05, "loss": 0.0046, "step": 20329 }, { "epoch": 3.934984520123839, "grad_norm": 0.06588736921548843, "learning_rate": 7.004001565560504e-05, "loss": 0.0065, "step": 20330 }, { "epoch": 3.9351780185758516, "grad_norm": 0.10568644106388092, "learning_rate": 7.003742960996398e-05, "loss": 0.006, "step": 20331 }, { "epoch": 3.935371517027864, "grad_norm": 0.040701497346162796, "learning_rate": 7.003484350841747e-05, "loss": 0.0056, "step": 20332 }, { "epoch": 3.935565015479876, "grad_norm": 0.10493296384811401, "learning_rate": 7.00322573509751e-05, "loss": 0.0056, "step": 20333 }, { "epoch": 3.9357585139318885, "grad_norm": 0.05286772549152374, "learning_rate": 7.00296711376465e-05, "loss": 0.0078, "step": 20334 }, { "epoch": 3.935952012383901, "grad_norm": 0.08991473913192749, "learning_rate": 7.002708486844128e-05, "loss": 0.0052, "step": 20335 }, { "epoch": 3.9361455108359134, "grad_norm": 0.05266944319009781, "learning_rate": 7.002449854336908e-05, "loss": 0.0066, "step": 20336 }, { "epoch": 3.936339009287926, "grad_norm": 0.06797026097774506, "learning_rate": 7.002191216243948e-05, "loss": 0.0052, "step": 20337 }, { "epoch": 3.9365325077399382, "grad_norm": 0.046587832272052765, "learning_rate": 7.001932572566214e-05, "loss": 0.0063, "step": 20338 }, { "epoch": 3.9367260061919502, "grad_norm": 0.04919285699725151, "learning_rate": 7.001673923304662e-05, "loss": 0.0068, "step": 20339 }, { "epoch": 3.9369195046439627, "grad_norm": 0.04189716652035713, "learning_rate": 7.001415268460259e-05, "loss": 0.0061, "step": 20340 }, { "epoch": 3.937113003095975, "grad_norm": 0.04948046803474426, "learning_rate": 7.00115660803396e-05, "loss": 0.0061, "step": 20341 }, { "epoch": 3.9373065015479876, "grad_norm": 0.04968106001615524, "learning_rate": 7.000897942026734e-05, "loss": 0.0064, "step": 20342 }, { "epoch": 3.9375, "grad_norm": 0.05237847939133644, "learning_rate": 7.000639270439539e-05, "loss": 0.0057, "step": 20343 }, { "epoch": 3.9376934984520124, "grad_norm": 0.05354071035981178, "learning_rate": 7.000380593273335e-05, "loss": 0.0055, "step": 20344 }, { "epoch": 3.937886996904025, "grad_norm": 0.07013565301895142, "learning_rate": 7.000121910529087e-05, "loss": 0.0074, "step": 20345 }, { "epoch": 3.9380804953560373, "grad_norm": 0.04360537976026535, "learning_rate": 6.999863222207755e-05, "loss": 0.0077, "step": 20346 }, { "epoch": 3.9382739938080498, "grad_norm": 0.0737403854727745, "learning_rate": 6.9996045283103e-05, "loss": 0.0058, "step": 20347 }, { "epoch": 3.9384674922600618, "grad_norm": 0.034775860607624054, "learning_rate": 6.999345828837687e-05, "loss": 0.0058, "step": 20348 }, { "epoch": 3.938660990712074, "grad_norm": 0.06233838573098183, "learning_rate": 6.999087123790875e-05, "loss": 0.0052, "step": 20349 }, { "epoch": 3.9388544891640866, "grad_norm": 0.0462249331176281, "learning_rate": 6.998828413170827e-05, "loss": 0.0054, "step": 20350 }, { "epoch": 3.939047987616099, "grad_norm": 0.03089560568332672, "learning_rate": 6.998569696978504e-05, "loss": 0.0065, "step": 20351 }, { "epoch": 3.9392414860681115, "grad_norm": 0.038806937634944916, "learning_rate": 6.998310975214866e-05, "loss": 0.0065, "step": 20352 }, { "epoch": 3.939434984520124, "grad_norm": 0.035762641578912735, "learning_rate": 6.998052247880878e-05, "loss": 0.006, "step": 20353 }, { "epoch": 3.939628482972136, "grad_norm": 0.027671216055750847, "learning_rate": 6.997793514977502e-05, "loss": 0.0062, "step": 20354 }, { "epoch": 3.9398219814241484, "grad_norm": 0.0453542023897171, "learning_rate": 6.997534776505699e-05, "loss": 0.0078, "step": 20355 }, { "epoch": 3.940015479876161, "grad_norm": 0.031132113188505173, "learning_rate": 6.99727603246643e-05, "loss": 0.0054, "step": 20356 }, { "epoch": 3.9402089783281733, "grad_norm": 0.039378680288791656, "learning_rate": 6.997017282860658e-05, "loss": 0.0059, "step": 20357 }, { "epoch": 3.9404024767801857, "grad_norm": 0.03457273170351982, "learning_rate": 6.996758527689344e-05, "loss": 0.0062, "step": 20358 }, { "epoch": 3.940595975232198, "grad_norm": 0.03561507910490036, "learning_rate": 6.99649976695345e-05, "loss": 0.0052, "step": 20359 }, { "epoch": 3.9407894736842106, "grad_norm": 0.04414993152022362, "learning_rate": 6.99624100065394e-05, "loss": 0.0072, "step": 20360 }, { "epoch": 3.940982972136223, "grad_norm": 0.032634541392326355, "learning_rate": 6.995982228791774e-05, "loss": 0.0064, "step": 20361 }, { "epoch": 3.9411764705882355, "grad_norm": 0.0516144335269928, "learning_rate": 6.995723451367915e-05, "loss": 0.0071, "step": 20362 }, { "epoch": 3.941369969040248, "grad_norm": 0.043628349900245667, "learning_rate": 6.995464668383325e-05, "loss": 0.0058, "step": 20363 }, { "epoch": 3.94156346749226, "grad_norm": 0.05205341801047325, "learning_rate": 6.995205879838967e-05, "loss": 0.0064, "step": 20364 }, { "epoch": 3.9417569659442724, "grad_norm": 0.04626822471618652, "learning_rate": 6.994947085735799e-05, "loss": 0.0062, "step": 20365 }, { "epoch": 3.941950464396285, "grad_norm": 0.06593085825443268, "learning_rate": 6.994688286074787e-05, "loss": 0.0062, "step": 20366 }, { "epoch": 3.9421439628482973, "grad_norm": 0.03868240863084793, "learning_rate": 6.994429480856893e-05, "loss": 0.0056, "step": 20367 }, { "epoch": 3.9423374613003097, "grad_norm": 0.09159465879201889, "learning_rate": 6.994170670083079e-05, "loss": 0.0069, "step": 20368 }, { "epoch": 3.9425309597523217, "grad_norm": 0.04008787125349045, "learning_rate": 6.993911853754304e-05, "loss": 0.0062, "step": 20369 }, { "epoch": 3.942724458204334, "grad_norm": 0.0690755844116211, "learning_rate": 6.993653031871536e-05, "loss": 0.0073, "step": 20370 }, { "epoch": 3.9429179566563466, "grad_norm": 0.05145697295665741, "learning_rate": 6.993394204435734e-05, "loss": 0.0069, "step": 20371 }, { "epoch": 3.943111455108359, "grad_norm": 0.03954358398914337, "learning_rate": 6.993135371447857e-05, "loss": 0.0061, "step": 20372 }, { "epoch": 3.9433049535603715, "grad_norm": 0.05958249047398567, "learning_rate": 6.992876532908873e-05, "loss": 0.0056, "step": 20373 }, { "epoch": 3.943498452012384, "grad_norm": 0.03609209507703781, "learning_rate": 6.992617688819742e-05, "loss": 0.0058, "step": 20374 }, { "epoch": 3.9436919504643964, "grad_norm": 0.047872625291347504, "learning_rate": 6.992358839181425e-05, "loss": 0.0065, "step": 20375 }, { "epoch": 3.943885448916409, "grad_norm": 0.04047152027487755, "learning_rate": 6.992099983994887e-05, "loss": 0.0068, "step": 20376 }, { "epoch": 3.9440789473684212, "grad_norm": 0.044366128742694855, "learning_rate": 6.991841123261087e-05, "loss": 0.0053, "step": 20377 }, { "epoch": 3.9442724458204337, "grad_norm": 0.044458698481321335, "learning_rate": 6.991582256980993e-05, "loss": 0.0057, "step": 20378 }, { "epoch": 3.9444659442724457, "grad_norm": 0.0457552932202816, "learning_rate": 6.99132338515556e-05, "loss": 0.0056, "step": 20379 }, { "epoch": 3.944659442724458, "grad_norm": 0.05178040638566017, "learning_rate": 6.991064507785754e-05, "loss": 0.0055, "step": 20380 }, { "epoch": 3.9448529411764706, "grad_norm": 0.05065307021141052, "learning_rate": 6.990805624872541e-05, "loss": 0.0065, "step": 20381 }, { "epoch": 3.945046439628483, "grad_norm": 0.05862841010093689, "learning_rate": 6.990546736416877e-05, "loss": 0.0056, "step": 20382 }, { "epoch": 3.9452399380804954, "grad_norm": 0.04923880472779274, "learning_rate": 6.99028784241973e-05, "loss": 0.0052, "step": 20383 }, { "epoch": 3.945433436532508, "grad_norm": 0.0395718552172184, "learning_rate": 6.990028942882057e-05, "loss": 0.0063, "step": 20384 }, { "epoch": 3.94562693498452, "grad_norm": 0.058988142758607864, "learning_rate": 6.989770037804825e-05, "loss": 0.0058, "step": 20385 }, { "epoch": 3.9458204334365323, "grad_norm": 0.02007875218987465, "learning_rate": 6.989511127188994e-05, "loss": 0.0058, "step": 20386 }, { "epoch": 3.9460139318885448, "grad_norm": 0.050094470381736755, "learning_rate": 6.98925221103553e-05, "loss": 0.006, "step": 20387 }, { "epoch": 3.946207430340557, "grad_norm": 0.03012753278017044, "learning_rate": 6.988993289345393e-05, "loss": 0.0056, "step": 20388 }, { "epoch": 3.9464009287925697, "grad_norm": 0.051275528967380524, "learning_rate": 6.988734362119544e-05, "loss": 0.0064, "step": 20389 }, { "epoch": 3.946594427244582, "grad_norm": 0.03513948991894722, "learning_rate": 6.98847542935895e-05, "loss": 0.006, "step": 20390 }, { "epoch": 3.9467879256965945, "grad_norm": 0.0287238247692585, "learning_rate": 6.988216491064568e-05, "loss": 0.0054, "step": 20391 }, { "epoch": 3.946981424148607, "grad_norm": 0.028719427064061165, "learning_rate": 6.987957547237365e-05, "loss": 0.0058, "step": 20392 }, { "epoch": 3.9471749226006194, "grad_norm": 0.03465605154633522, "learning_rate": 6.987698597878304e-05, "loss": 0.0054, "step": 20393 }, { "epoch": 3.9473684210526314, "grad_norm": 0.029281137511134148, "learning_rate": 6.987439642988344e-05, "loss": 0.0054, "step": 20394 }, { "epoch": 3.947561919504644, "grad_norm": 0.029608270153403282, "learning_rate": 6.98718068256845e-05, "loss": 0.0065, "step": 20395 }, { "epoch": 3.9477554179566563, "grad_norm": 0.04808221012353897, "learning_rate": 6.986921716619587e-05, "loss": 0.0071, "step": 20396 }, { "epoch": 3.9479489164086687, "grad_norm": 0.040750112384557724, "learning_rate": 6.986662745142714e-05, "loss": 0.0068, "step": 20397 }, { "epoch": 3.948142414860681, "grad_norm": 0.062150079756975174, "learning_rate": 6.986403768138795e-05, "loss": 0.0067, "step": 20398 }, { "epoch": 3.9483359133126936, "grad_norm": 0.034693822264671326, "learning_rate": 6.986144785608794e-05, "loss": 0.0053, "step": 20399 }, { "epoch": 3.9485294117647056, "grad_norm": 0.054810456931591034, "learning_rate": 6.985885797553672e-05, "loss": 0.0066, "step": 20400 }, { "epoch": 3.948722910216718, "grad_norm": 0.04851232096552849, "learning_rate": 6.985626803974393e-05, "loss": 0.0071, "step": 20401 }, { "epoch": 3.9489164086687305, "grad_norm": 0.04304933547973633, "learning_rate": 6.985367804871918e-05, "loss": 0.0055, "step": 20402 }, { "epoch": 3.949109907120743, "grad_norm": 0.05021115764975548, "learning_rate": 6.985108800247214e-05, "loss": 0.0069, "step": 20403 }, { "epoch": 3.9493034055727554, "grad_norm": 0.05412565916776657, "learning_rate": 6.984849790101238e-05, "loss": 0.0057, "step": 20404 }, { "epoch": 3.949496904024768, "grad_norm": 0.04747943952679634, "learning_rate": 6.98459077443496e-05, "loss": 0.0072, "step": 20405 }, { "epoch": 3.9496904024767803, "grad_norm": 0.03170578181743622, "learning_rate": 6.984331753249338e-05, "loss": 0.0078, "step": 20406 }, { "epoch": 3.9498839009287927, "grad_norm": 0.041868001222610474, "learning_rate": 6.984072726545337e-05, "loss": 0.0056, "step": 20407 }, { "epoch": 3.950077399380805, "grad_norm": 0.03619346022605896, "learning_rate": 6.983813694323918e-05, "loss": 0.0062, "step": 20408 }, { "epoch": 3.9502708978328176, "grad_norm": 0.03689716011285782, "learning_rate": 6.983554656586047e-05, "loss": 0.0063, "step": 20409 }, { "epoch": 3.9504643962848296, "grad_norm": 0.03504448011517525, "learning_rate": 6.983295613332684e-05, "loss": 0.0076, "step": 20410 }, { "epoch": 3.950657894736842, "grad_norm": 0.0594744011759758, "learning_rate": 6.983036564564794e-05, "loss": 0.0065, "step": 20411 }, { "epoch": 3.9508513931888545, "grad_norm": 0.026274409145116806, "learning_rate": 6.982777510283339e-05, "loss": 0.0049, "step": 20412 }, { "epoch": 3.951044891640867, "grad_norm": 0.06117083504796028, "learning_rate": 6.982518450489284e-05, "loss": 0.0055, "step": 20413 }, { "epoch": 3.9512383900928794, "grad_norm": 0.03092189133167267, "learning_rate": 6.98225938518359e-05, "loss": 0.0059, "step": 20414 }, { "epoch": 3.951431888544892, "grad_norm": 0.05498272180557251, "learning_rate": 6.982000314367219e-05, "loss": 0.0061, "step": 20415 }, { "epoch": 3.951625386996904, "grad_norm": 0.04114125296473503, "learning_rate": 6.981741238041138e-05, "loss": 0.0048, "step": 20416 }, { "epoch": 3.9518188854489162, "grad_norm": 0.04617041349411011, "learning_rate": 6.981482156206307e-05, "loss": 0.0053, "step": 20417 }, { "epoch": 3.9520123839009287, "grad_norm": 0.0397014357149601, "learning_rate": 6.981223068863691e-05, "loss": 0.0059, "step": 20418 }, { "epoch": 3.952205882352941, "grad_norm": 0.04392797499895096, "learning_rate": 6.980963976014252e-05, "loss": 0.007, "step": 20419 }, { "epoch": 3.9523993808049536, "grad_norm": 0.0414874404668808, "learning_rate": 6.980704877658956e-05, "loss": 0.0058, "step": 20420 }, { "epoch": 3.952592879256966, "grad_norm": 0.04156627133488655, "learning_rate": 6.980445773798762e-05, "loss": 0.007, "step": 20421 }, { "epoch": 3.9527863777089784, "grad_norm": 0.026664674282073975, "learning_rate": 6.980186664434635e-05, "loss": 0.006, "step": 20422 }, { "epoch": 3.952979876160991, "grad_norm": 0.049409497529268265, "learning_rate": 6.979927549567539e-05, "loss": 0.0057, "step": 20423 }, { "epoch": 3.9531733746130033, "grad_norm": 0.03553266450762749, "learning_rate": 6.979668429198438e-05, "loss": 0.0064, "step": 20424 }, { "epoch": 3.9533668730650153, "grad_norm": 0.04390713572502136, "learning_rate": 6.979409303328293e-05, "loss": 0.0056, "step": 20425 }, { "epoch": 3.9535603715170278, "grad_norm": 0.04156472906470299, "learning_rate": 6.979150171958071e-05, "loss": 0.0065, "step": 20426 }, { "epoch": 3.95375386996904, "grad_norm": 0.0413765013217926, "learning_rate": 6.978891035088731e-05, "loss": 0.0066, "step": 20427 }, { "epoch": 3.9539473684210527, "grad_norm": 0.04008936136960983, "learning_rate": 6.97863189272124e-05, "loss": 0.006, "step": 20428 }, { "epoch": 3.954140866873065, "grad_norm": 0.03631480410695076, "learning_rate": 6.978372744856561e-05, "loss": 0.0068, "step": 20429 }, { "epoch": 3.9543343653250775, "grad_norm": 0.04161260277032852, "learning_rate": 6.978113591495652e-05, "loss": 0.0053, "step": 20430 }, { "epoch": 3.9545278637770895, "grad_norm": 0.048719584941864014, "learning_rate": 6.977854432639485e-05, "loss": 0.0051, "step": 20431 }, { "epoch": 3.954721362229102, "grad_norm": 0.030325332656502724, "learning_rate": 6.977595268289018e-05, "loss": 0.0064, "step": 20432 }, { "epoch": 3.9549148606811144, "grad_norm": 0.06820137798786163, "learning_rate": 6.977336098445217e-05, "loss": 0.0068, "step": 20433 }, { "epoch": 3.955108359133127, "grad_norm": 0.024144578725099564, "learning_rate": 6.977076923109044e-05, "loss": 0.0064, "step": 20434 }, { "epoch": 3.9553018575851393, "grad_norm": 0.05607886239886284, "learning_rate": 6.976817742281461e-05, "loss": 0.0062, "step": 20435 }, { "epoch": 3.9554953560371517, "grad_norm": 0.0572853684425354, "learning_rate": 6.976558555963434e-05, "loss": 0.0068, "step": 20436 }, { "epoch": 3.955688854489164, "grad_norm": 0.044796112924814224, "learning_rate": 6.976299364155928e-05, "loss": 0.0062, "step": 20437 }, { "epoch": 3.9558823529411766, "grad_norm": 0.04490407183766365, "learning_rate": 6.976040166859904e-05, "loss": 0.0057, "step": 20438 }, { "epoch": 3.956075851393189, "grad_norm": 0.05533573031425476, "learning_rate": 6.975780964076326e-05, "loss": 0.0054, "step": 20439 }, { "epoch": 3.9562693498452015, "grad_norm": 0.03618213161826134, "learning_rate": 6.975521755806158e-05, "loss": 0.0067, "step": 20440 }, { "epoch": 3.9564628482972135, "grad_norm": 0.07965435087680817, "learning_rate": 6.975262542050364e-05, "loss": 0.0046, "step": 20441 }, { "epoch": 3.956656346749226, "grad_norm": 0.04460339993238449, "learning_rate": 6.975003322809907e-05, "loss": 0.0063, "step": 20442 }, { "epoch": 3.9568498452012384, "grad_norm": 0.05850673094391823, "learning_rate": 6.974744098085751e-05, "loss": 0.0058, "step": 20443 }, { "epoch": 3.957043343653251, "grad_norm": 0.03931904211640358, "learning_rate": 6.97448486787886e-05, "loss": 0.006, "step": 20444 }, { "epoch": 3.9572368421052633, "grad_norm": 0.030109554529190063, "learning_rate": 6.974225632190197e-05, "loss": 0.0043, "step": 20445 }, { "epoch": 3.9574303405572753, "grad_norm": 0.021149208769202232, "learning_rate": 6.973966391020728e-05, "loss": 0.0056, "step": 20446 }, { "epoch": 3.9576238390092877, "grad_norm": 0.01992829330265522, "learning_rate": 6.973707144371414e-05, "loss": 0.0064, "step": 20447 }, { "epoch": 3.9578173374613, "grad_norm": 0.029063226655125618, "learning_rate": 6.973447892243221e-05, "loss": 0.0058, "step": 20448 }, { "epoch": 3.9580108359133126, "grad_norm": 0.04255643114447594, "learning_rate": 6.97318863463711e-05, "loss": 0.0051, "step": 20449 }, { "epoch": 3.958204334365325, "grad_norm": 0.026934389024972916, "learning_rate": 6.972929371554046e-05, "loss": 0.0053, "step": 20450 }, { "epoch": 3.9583978328173375, "grad_norm": 0.027729175984859467, "learning_rate": 6.972670102994995e-05, "loss": 0.0061, "step": 20451 }, { "epoch": 3.95859133126935, "grad_norm": 0.04106764867901802, "learning_rate": 6.972410828960921e-05, "loss": 0.0067, "step": 20452 }, { "epoch": 3.9587848297213624, "grad_norm": 0.021337108686566353, "learning_rate": 6.972151549452786e-05, "loss": 0.0072, "step": 20453 }, { "epoch": 3.958978328173375, "grad_norm": 0.051290903240442276, "learning_rate": 6.971892264471554e-05, "loss": 0.0068, "step": 20454 }, { "epoch": 3.9591718266253872, "grad_norm": 0.027717314660549164, "learning_rate": 6.971632974018186e-05, "loss": 0.0054, "step": 20455 }, { "epoch": 3.9593653250773992, "grad_norm": 0.034001950174570084, "learning_rate": 6.971373678093653e-05, "loss": 0.0067, "step": 20456 }, { "epoch": 3.9595588235294117, "grad_norm": 0.04700268432497978, "learning_rate": 6.971114376698913e-05, "loss": 0.0071, "step": 20457 }, { "epoch": 3.959752321981424, "grad_norm": 0.034368846565485, "learning_rate": 6.970855069834934e-05, "loss": 0.006, "step": 20458 }, { "epoch": 3.9599458204334366, "grad_norm": 0.054943252354860306, "learning_rate": 6.970595757502678e-05, "loss": 0.0076, "step": 20459 }, { "epoch": 3.960139318885449, "grad_norm": 0.05280488729476929, "learning_rate": 6.970336439703108e-05, "loss": 0.0066, "step": 20460 }, { "epoch": 3.9603328173374615, "grad_norm": 0.040786489844322205, "learning_rate": 6.97007711643719e-05, "loss": 0.0069, "step": 20461 }, { "epoch": 3.9605263157894735, "grad_norm": 0.07559847831726074, "learning_rate": 6.969817787705888e-05, "loss": 0.0064, "step": 20462 }, { "epoch": 3.960719814241486, "grad_norm": 0.051123905926942825, "learning_rate": 6.969558453510165e-05, "loss": 0.0076, "step": 20463 }, { "epoch": 3.9609133126934983, "grad_norm": 0.08058971166610718, "learning_rate": 6.969299113850987e-05, "loss": 0.0066, "step": 20464 }, { "epoch": 3.9611068111455108, "grad_norm": 0.07923255115747452, "learning_rate": 6.969039768729315e-05, "loss": 0.0056, "step": 20465 }, { "epoch": 3.961300309597523, "grad_norm": 0.05078720301389694, "learning_rate": 6.968780418146117e-05, "loss": 0.0077, "step": 20466 }, { "epoch": 3.9614938080495357, "grad_norm": 0.0965874195098877, "learning_rate": 6.968521062102353e-05, "loss": 0.0081, "step": 20467 }, { "epoch": 3.961687306501548, "grad_norm": 0.03400181233882904, "learning_rate": 6.968261700598991e-05, "loss": 0.0065, "step": 20468 }, { "epoch": 3.9618808049535605, "grad_norm": 0.0923265740275383, "learning_rate": 6.968002333636994e-05, "loss": 0.0068, "step": 20469 }, { "epoch": 3.962074303405573, "grad_norm": 0.05521344393491745, "learning_rate": 6.967742961217327e-05, "loss": 0.0065, "step": 20470 }, { "epoch": 3.962267801857585, "grad_norm": 0.06355229020118713, "learning_rate": 6.967483583340951e-05, "loss": 0.0074, "step": 20471 }, { "epoch": 3.9624613003095974, "grad_norm": 0.08273100107908249, "learning_rate": 6.967224200008835e-05, "loss": 0.006, "step": 20472 }, { "epoch": 3.96265479876161, "grad_norm": 0.06335268169641495, "learning_rate": 6.966964811221938e-05, "loss": 0.007, "step": 20473 }, { "epoch": 3.9628482972136223, "grad_norm": 0.08086565136909485, "learning_rate": 6.966705416981231e-05, "loss": 0.0065, "step": 20474 }, { "epoch": 3.9630417956656347, "grad_norm": 0.03323036804795265, "learning_rate": 6.966446017287672e-05, "loss": 0.0065, "step": 20475 }, { "epoch": 3.963235294117647, "grad_norm": 0.07954800873994827, "learning_rate": 6.966186612142229e-05, "loss": 0.0063, "step": 20476 }, { "epoch": 3.963428792569659, "grad_norm": 0.021976517513394356, "learning_rate": 6.965927201545865e-05, "loss": 0.0044, "step": 20477 }, { "epoch": 3.9636222910216716, "grad_norm": 0.07478830963373184, "learning_rate": 6.965667785499544e-05, "loss": 0.0067, "step": 20478 }, { "epoch": 3.963815789473684, "grad_norm": 0.028687145560979843, "learning_rate": 6.965408364004234e-05, "loss": 0.0064, "step": 20479 }, { "epoch": 3.9640092879256965, "grad_norm": 0.057627107948064804, "learning_rate": 6.965148937060895e-05, "loss": 0.0055, "step": 20480 }, { "epoch": 3.964202786377709, "grad_norm": 0.0265506599098444, "learning_rate": 6.964889504670493e-05, "loss": 0.0054, "step": 20481 }, { "epoch": 3.9643962848297214, "grad_norm": 0.07147756218910217, "learning_rate": 6.964630066833995e-05, "loss": 0.0064, "step": 20482 }, { "epoch": 3.964589783281734, "grad_norm": 0.031067097559571266, "learning_rate": 6.96437062355236e-05, "loss": 0.0068, "step": 20483 }, { "epoch": 3.9647832817337463, "grad_norm": 0.059076108038425446, "learning_rate": 6.964111174826558e-05, "loss": 0.0059, "step": 20484 }, { "epoch": 3.9649767801857587, "grad_norm": 0.0359799787402153, "learning_rate": 6.963851720657551e-05, "loss": 0.0059, "step": 20485 }, { "epoch": 3.965170278637771, "grad_norm": 0.06318936496973038, "learning_rate": 6.963592261046304e-05, "loss": 0.0061, "step": 20486 }, { "epoch": 3.965363777089783, "grad_norm": 0.07230951637029648, "learning_rate": 6.963332795993781e-05, "loss": 0.0063, "step": 20487 }, { "epoch": 3.9655572755417956, "grad_norm": 0.0869998037815094, "learning_rate": 6.963073325500947e-05, "loss": 0.0061, "step": 20488 }, { "epoch": 3.965750773993808, "grad_norm": 0.08504199236631393, "learning_rate": 6.962813849568769e-05, "loss": 0.0063, "step": 20489 }, { "epoch": 3.9659442724458205, "grad_norm": 0.05268668010830879, "learning_rate": 6.962554368198208e-05, "loss": 0.0063, "step": 20490 }, { "epoch": 3.966137770897833, "grad_norm": 0.10165580362081528, "learning_rate": 6.962294881390232e-05, "loss": 0.0065, "step": 20491 }, { "epoch": 3.9663312693498454, "grad_norm": 0.030003096908330917, "learning_rate": 6.962035389145801e-05, "loss": 0.0056, "step": 20492 }, { "epoch": 3.9665247678018574, "grad_norm": 0.09422574937343597, "learning_rate": 6.961775891465884e-05, "loss": 0.0051, "step": 20493 }, { "epoch": 3.96671826625387, "grad_norm": 0.04764218628406525, "learning_rate": 6.961516388351443e-05, "loss": 0.0058, "step": 20494 }, { "epoch": 3.9669117647058822, "grad_norm": 0.07486507296562195, "learning_rate": 6.961256879803447e-05, "loss": 0.0061, "step": 20495 }, { "epoch": 3.9671052631578947, "grad_norm": 0.034473974257707596, "learning_rate": 6.960997365822856e-05, "loss": 0.0081, "step": 20496 }, { "epoch": 3.967298761609907, "grad_norm": 0.033393748104572296, "learning_rate": 6.960737846410638e-05, "loss": 0.0057, "step": 20497 }, { "epoch": 3.9674922600619196, "grad_norm": 0.03263332322239876, "learning_rate": 6.960478321567755e-05, "loss": 0.0058, "step": 20498 }, { "epoch": 3.967685758513932, "grad_norm": 0.03640604764223099, "learning_rate": 6.960218791295175e-05, "loss": 0.0056, "step": 20499 }, { "epoch": 3.9678792569659445, "grad_norm": 0.027541248127818108, "learning_rate": 6.959959255593861e-05, "loss": 0.0054, "step": 20500 }, { "epoch": 3.968072755417957, "grad_norm": 0.03360923007130623, "learning_rate": 6.959699714464777e-05, "loss": 0.0056, "step": 20501 }, { "epoch": 3.968266253869969, "grad_norm": 0.023164719343185425, "learning_rate": 6.95944016790889e-05, "loss": 0.0062, "step": 20502 }, { "epoch": 3.9684597523219813, "grad_norm": 0.04546236991882324, "learning_rate": 6.959180615927165e-05, "loss": 0.0048, "step": 20503 }, { "epoch": 3.968653250773994, "grad_norm": 0.015664614737033844, "learning_rate": 6.958921058520566e-05, "loss": 0.0064, "step": 20504 }, { "epoch": 3.968846749226006, "grad_norm": 0.026836296543478966, "learning_rate": 6.958661495690058e-05, "loss": 0.0056, "step": 20505 }, { "epoch": 3.9690402476780187, "grad_norm": 0.013534926809370518, "learning_rate": 6.958401927436604e-05, "loss": 0.0051, "step": 20506 }, { "epoch": 3.969233746130031, "grad_norm": 0.04192619025707245, "learning_rate": 6.958142353761173e-05, "loss": 0.0059, "step": 20507 }, { "epoch": 3.969427244582043, "grad_norm": 0.09898557513952255, "learning_rate": 6.957882774664728e-05, "loss": 0.0065, "step": 20508 }, { "epoch": 3.9696207430340555, "grad_norm": 0.02085886150598526, "learning_rate": 6.957623190148232e-05, "loss": 0.0049, "step": 20509 }, { "epoch": 3.969814241486068, "grad_norm": 0.10365114361047745, "learning_rate": 6.957363600212655e-05, "loss": 0.006, "step": 20510 }, { "epoch": 3.9700077399380804, "grad_norm": 0.025946052744984627, "learning_rate": 6.957104004858957e-05, "loss": 0.0063, "step": 20511 }, { "epoch": 3.970201238390093, "grad_norm": 0.0765712559223175, "learning_rate": 6.956844404088106e-05, "loss": 0.0066, "step": 20512 }, { "epoch": 3.9703947368421053, "grad_norm": 0.05719469487667084, "learning_rate": 6.956584797901066e-05, "loss": 0.0063, "step": 20513 }, { "epoch": 3.9705882352941178, "grad_norm": 0.05503023788332939, "learning_rate": 6.956325186298804e-05, "loss": 0.006, "step": 20514 }, { "epoch": 3.97078173374613, "grad_norm": 0.07158347219228745, "learning_rate": 6.956065569282282e-05, "loss": 0.0055, "step": 20515 }, { "epoch": 3.9709752321981426, "grad_norm": 0.031289536505937576, "learning_rate": 6.95580594685247e-05, "loss": 0.0067, "step": 20516 }, { "epoch": 3.9711687306501546, "grad_norm": 0.06905421614646912, "learning_rate": 6.955546319010328e-05, "loss": 0.0049, "step": 20517 }, { "epoch": 3.971362229102167, "grad_norm": 0.026706650853157043, "learning_rate": 6.955286685756822e-05, "loss": 0.0055, "step": 20518 }, { "epoch": 3.9715557275541795, "grad_norm": 0.030224163085222244, "learning_rate": 6.955027047092922e-05, "loss": 0.0055, "step": 20519 }, { "epoch": 3.971749226006192, "grad_norm": 0.04318080469965935, "learning_rate": 6.954767403019588e-05, "loss": 0.006, "step": 20520 }, { "epoch": 3.9719427244582044, "grad_norm": 0.026102280244231224, "learning_rate": 6.954507753537788e-05, "loss": 0.0061, "step": 20521 }, { "epoch": 3.972136222910217, "grad_norm": 0.037106867879629135, "learning_rate": 6.954248098648486e-05, "loss": 0.0054, "step": 20522 }, { "epoch": 3.972329721362229, "grad_norm": 0.025013392791152, "learning_rate": 6.953988438352648e-05, "loss": 0.0073, "step": 20523 }, { "epoch": 3.9725232198142413, "grad_norm": 0.043391402810811996, "learning_rate": 6.953728772651238e-05, "loss": 0.0062, "step": 20524 }, { "epoch": 3.9727167182662537, "grad_norm": 0.02697581984102726, "learning_rate": 6.953469101545224e-05, "loss": 0.0065, "step": 20525 }, { "epoch": 3.972910216718266, "grad_norm": 0.05843961238861084, "learning_rate": 6.95320942503557e-05, "loss": 0.0062, "step": 20526 }, { "epoch": 3.9731037151702786, "grad_norm": 0.02436124160885811, "learning_rate": 6.952949743123241e-05, "loss": 0.0058, "step": 20527 }, { "epoch": 3.973297213622291, "grad_norm": 0.02402406558394432, "learning_rate": 6.952690055809205e-05, "loss": 0.0064, "step": 20528 }, { "epoch": 3.9734907120743035, "grad_norm": 0.05584999918937683, "learning_rate": 6.952430363094423e-05, "loss": 0.0063, "step": 20529 }, { "epoch": 3.973684210526316, "grad_norm": 0.030951615422964096, "learning_rate": 6.952170664979864e-05, "loss": 0.0052, "step": 20530 }, { "epoch": 3.9738777089783284, "grad_norm": 0.04373888298869133, "learning_rate": 6.951910961466492e-05, "loss": 0.0069, "step": 20531 }, { "epoch": 3.974071207430341, "grad_norm": 0.040945492684841156, "learning_rate": 6.951651252555271e-05, "loss": 0.0064, "step": 20532 }, { "epoch": 3.974264705882353, "grad_norm": 0.03710993006825447, "learning_rate": 6.951391538247171e-05, "loss": 0.0067, "step": 20533 }, { "epoch": 3.9744582043343653, "grad_norm": 0.053005315363407135, "learning_rate": 6.951131818543154e-05, "loss": 0.0072, "step": 20534 }, { "epoch": 3.9746517027863777, "grad_norm": 0.0715206190943718, "learning_rate": 6.950872093444186e-05, "loss": 0.0055, "step": 20535 }, { "epoch": 3.97484520123839, "grad_norm": 0.04502911493182182, "learning_rate": 6.950612362951234e-05, "loss": 0.0065, "step": 20536 }, { "epoch": 3.9750386996904026, "grad_norm": 0.06215125322341919, "learning_rate": 6.950352627065261e-05, "loss": 0.0055, "step": 20537 }, { "epoch": 3.975232198142415, "grad_norm": 0.06665320694446564, "learning_rate": 6.950092885787235e-05, "loss": 0.007, "step": 20538 }, { "epoch": 3.975425696594427, "grad_norm": 0.04686710983514786, "learning_rate": 6.949833139118121e-05, "loss": 0.0059, "step": 20539 }, { "epoch": 3.9756191950464395, "grad_norm": 0.07010659575462341, "learning_rate": 6.949573387058885e-05, "loss": 0.0061, "step": 20540 }, { "epoch": 3.975812693498452, "grad_norm": 0.048527635633945465, "learning_rate": 6.949313629610493e-05, "loss": 0.0061, "step": 20541 }, { "epoch": 3.9760061919504643, "grad_norm": 0.06628778576850891, "learning_rate": 6.949053866773909e-05, "loss": 0.0054, "step": 20542 }, { "epoch": 3.976199690402477, "grad_norm": 0.053197238594293594, "learning_rate": 6.9487940985501e-05, "loss": 0.0059, "step": 20543 }, { "epoch": 3.9763931888544892, "grad_norm": 0.03820320591330528, "learning_rate": 6.94853432494003e-05, "loss": 0.006, "step": 20544 }, { "epoch": 3.9765866873065017, "grad_norm": 0.07002990692853928, "learning_rate": 6.948274545944669e-05, "loss": 0.0064, "step": 20545 }, { "epoch": 3.976780185758514, "grad_norm": 0.02103937603533268, "learning_rate": 6.948014761564978e-05, "loss": 0.0073, "step": 20546 }, { "epoch": 3.9769736842105265, "grad_norm": 0.05939587578177452, "learning_rate": 6.947754971801926e-05, "loss": 0.0056, "step": 20547 }, { "epoch": 3.9771671826625385, "grad_norm": 0.04743390530347824, "learning_rate": 6.947495176656478e-05, "loss": 0.0067, "step": 20548 }, { "epoch": 3.977360681114551, "grad_norm": 0.05767623335123062, "learning_rate": 6.947235376129599e-05, "loss": 0.0057, "step": 20549 }, { "epoch": 3.9775541795665634, "grad_norm": 0.04831153526902199, "learning_rate": 6.946975570222256e-05, "loss": 0.0056, "step": 20550 }, { "epoch": 3.977747678018576, "grad_norm": 0.029029378667473793, "learning_rate": 6.946715758935414e-05, "loss": 0.0057, "step": 20551 }, { "epoch": 3.9779411764705883, "grad_norm": 0.05922270938754082, "learning_rate": 6.946455942270041e-05, "loss": 0.0061, "step": 20552 }, { "epoch": 3.9781346749226008, "grad_norm": 0.028517959639430046, "learning_rate": 6.9461961202271e-05, "loss": 0.0053, "step": 20553 }, { "epoch": 3.9783281733746128, "grad_norm": 0.040611766278743744, "learning_rate": 6.945936292807557e-05, "loss": 0.0066, "step": 20554 }, { "epoch": 3.978521671826625, "grad_norm": 0.049051880836486816, "learning_rate": 6.945676460012381e-05, "loss": 0.0052, "step": 20555 }, { "epoch": 3.9787151702786376, "grad_norm": 0.02941982075572014, "learning_rate": 6.945416621842534e-05, "loss": 0.0069, "step": 20556 }, { "epoch": 3.97890866873065, "grad_norm": 0.06982695311307907, "learning_rate": 6.945156778298985e-05, "loss": 0.0065, "step": 20557 }, { "epoch": 3.9791021671826625, "grad_norm": 0.035113006830215454, "learning_rate": 6.9448969293827e-05, "loss": 0.0065, "step": 20558 }, { "epoch": 3.979295665634675, "grad_norm": 0.04659267142415047, "learning_rate": 6.944637075094644e-05, "loss": 0.007, "step": 20559 }, { "epoch": 3.9794891640866874, "grad_norm": 0.03103828988969326, "learning_rate": 6.944377215435782e-05, "loss": 0.0065, "step": 20560 }, { "epoch": 3.9796826625387, "grad_norm": 0.03915850445628166, "learning_rate": 6.944117350407083e-05, "loss": 0.006, "step": 20561 }, { "epoch": 3.9798761609907123, "grad_norm": 0.04768414422869682, "learning_rate": 6.943857480009509e-05, "loss": 0.0048, "step": 20562 }, { "epoch": 3.9800696594427247, "grad_norm": 0.05051376298069954, "learning_rate": 6.943597604244033e-05, "loss": 0.0059, "step": 20563 }, { "epoch": 3.9802631578947367, "grad_norm": 0.03248940780758858, "learning_rate": 6.943337723111612e-05, "loss": 0.0078, "step": 20564 }, { "epoch": 3.980456656346749, "grad_norm": 0.045421309769153595, "learning_rate": 6.94307783661322e-05, "loss": 0.0059, "step": 20565 }, { "epoch": 3.9806501547987616, "grad_norm": 0.04735017567873001, "learning_rate": 6.94281794474982e-05, "loss": 0.0076, "step": 20566 }, { "epoch": 3.980843653250774, "grad_norm": 0.054890915751457214, "learning_rate": 6.942558047522378e-05, "loss": 0.0062, "step": 20567 }, { "epoch": 3.9810371517027865, "grad_norm": 0.04750906676054001, "learning_rate": 6.94229814493186e-05, "loss": 0.0067, "step": 20568 }, { "epoch": 3.9812306501547985, "grad_norm": 0.04058164730668068, "learning_rate": 6.942038236979233e-05, "loss": 0.0054, "step": 20569 }, { "epoch": 3.981424148606811, "grad_norm": 0.054055001586675644, "learning_rate": 6.941778323665464e-05, "loss": 0.0062, "step": 20570 }, { "epoch": 3.9816176470588234, "grad_norm": 0.04274095594882965, "learning_rate": 6.941518404991519e-05, "loss": 0.0064, "step": 20571 }, { "epoch": 3.981811145510836, "grad_norm": 0.04037213325500488, "learning_rate": 6.941258480958363e-05, "loss": 0.0062, "step": 20572 }, { "epoch": 3.9820046439628483, "grad_norm": 0.04439651593565941, "learning_rate": 6.940998551566963e-05, "loss": 0.0063, "step": 20573 }, { "epoch": 3.9821981424148607, "grad_norm": 0.02788332663476467, "learning_rate": 6.940738616818285e-05, "loss": 0.0077, "step": 20574 }, { "epoch": 3.982391640866873, "grad_norm": 0.038474179804325104, "learning_rate": 6.940478676713297e-05, "loss": 0.0061, "step": 20575 }, { "epoch": 3.9825851393188856, "grad_norm": 0.018795626237988472, "learning_rate": 6.940218731252963e-05, "loss": 0.0053, "step": 20576 }, { "epoch": 3.982778637770898, "grad_norm": 0.025416020303964615, "learning_rate": 6.939958780438251e-05, "loss": 0.0048, "step": 20577 }, { "epoch": 3.9829721362229105, "grad_norm": 0.029098493978381157, "learning_rate": 6.939698824270128e-05, "loss": 0.0052, "step": 20578 }, { "epoch": 3.9831656346749225, "grad_norm": 0.023673532530665398, "learning_rate": 6.939438862749557e-05, "loss": 0.0052, "step": 20579 }, { "epoch": 3.983359133126935, "grad_norm": 0.01960013434290886, "learning_rate": 6.93917889587751e-05, "loss": 0.0053, "step": 20580 }, { "epoch": 3.9835526315789473, "grad_norm": 0.04097132757306099, "learning_rate": 6.938918923654948e-05, "loss": 0.0063, "step": 20581 }, { "epoch": 3.98374613003096, "grad_norm": 0.04967288672924042, "learning_rate": 6.938658946082842e-05, "loss": 0.0065, "step": 20582 }, { "epoch": 3.9839396284829722, "grad_norm": 0.03216269984841347, "learning_rate": 6.938398963162156e-05, "loss": 0.005, "step": 20583 }, { "epoch": 3.9841331269349847, "grad_norm": 0.04244985058903694, "learning_rate": 6.938138974893857e-05, "loss": 0.0055, "step": 20584 }, { "epoch": 3.9843266253869967, "grad_norm": 0.05150581896305084, "learning_rate": 6.937878981278912e-05, "loss": 0.0062, "step": 20585 }, { "epoch": 3.984520123839009, "grad_norm": 0.03665423393249512, "learning_rate": 6.937618982318286e-05, "loss": 0.0054, "step": 20586 }, { "epoch": 3.9847136222910216, "grad_norm": 0.050485752522945404, "learning_rate": 6.937358978012948e-05, "loss": 0.006, "step": 20587 }, { "epoch": 3.984907120743034, "grad_norm": 0.0314178504049778, "learning_rate": 6.937098968363863e-05, "loss": 0.0067, "step": 20588 }, { "epoch": 3.9851006191950464, "grad_norm": 0.04184412956237793, "learning_rate": 6.936838953371998e-05, "loss": 0.0048, "step": 20589 }, { "epoch": 3.985294117647059, "grad_norm": 0.05727008357644081, "learning_rate": 6.93657893303832e-05, "loss": 0.005, "step": 20590 }, { "epoch": 3.9854876160990713, "grad_norm": 0.028353648260235786, "learning_rate": 6.936318907363795e-05, "loss": 0.0066, "step": 20591 }, { "epoch": 3.9856811145510838, "grad_norm": 0.0471782311797142, "learning_rate": 6.936058876349392e-05, "loss": 0.0058, "step": 20592 }, { "epoch": 3.985874613003096, "grad_norm": 0.040786437690258026, "learning_rate": 6.935798839996073e-05, "loss": 0.006, "step": 20593 }, { "epoch": 3.986068111455108, "grad_norm": 0.04504360258579254, "learning_rate": 6.935538798304812e-05, "loss": 0.0054, "step": 20594 }, { "epoch": 3.9862616099071206, "grad_norm": 0.05411317199468613, "learning_rate": 6.935278751276567e-05, "loss": 0.0065, "step": 20595 }, { "epoch": 3.986455108359133, "grad_norm": 0.05603291094303131, "learning_rate": 6.935018698912311e-05, "loss": 0.0058, "step": 20596 }, { "epoch": 3.9866486068111455, "grad_norm": 0.05785684287548065, "learning_rate": 6.934758641213009e-05, "loss": 0.0071, "step": 20597 }, { "epoch": 3.986842105263158, "grad_norm": 0.039762139320373535, "learning_rate": 6.934498578179629e-05, "loss": 0.0065, "step": 20598 }, { "epoch": 3.9870356037151704, "grad_norm": 0.05996681749820709, "learning_rate": 6.934238509813136e-05, "loss": 0.0055, "step": 20599 }, { "epoch": 3.9872291021671824, "grad_norm": 0.03942783549427986, "learning_rate": 6.933978436114497e-05, "loss": 0.0054, "step": 20600 }, { "epoch": 3.987422600619195, "grad_norm": 0.038158442825078964, "learning_rate": 6.933718357084681e-05, "loss": 0.0046, "step": 20601 }, { "epoch": 3.9876160990712073, "grad_norm": 0.04423513263463974, "learning_rate": 6.933458272724653e-05, "loss": 0.0066, "step": 20602 }, { "epoch": 3.9878095975232197, "grad_norm": 0.02694033272564411, "learning_rate": 6.933198183035379e-05, "loss": 0.0059, "step": 20603 }, { "epoch": 3.988003095975232, "grad_norm": 0.047245364636182785, "learning_rate": 6.932938088017828e-05, "loss": 0.0062, "step": 20604 }, { "epoch": 3.9881965944272446, "grad_norm": 0.023554030805826187, "learning_rate": 6.932677987672966e-05, "loss": 0.0054, "step": 20605 }, { "epoch": 3.988390092879257, "grad_norm": 0.04538607597351074, "learning_rate": 6.932417882001762e-05, "loss": 0.0056, "step": 20606 }, { "epoch": 3.9885835913312695, "grad_norm": 0.025562329217791557, "learning_rate": 6.932157771005179e-05, "loss": 0.0051, "step": 20607 }, { "epoch": 3.988777089783282, "grad_norm": 0.043832045048475266, "learning_rate": 6.931897654684187e-05, "loss": 0.0061, "step": 20608 }, { "epoch": 3.9889705882352944, "grad_norm": 0.04284167289733887, "learning_rate": 6.931637533039754e-05, "loss": 0.0067, "step": 20609 }, { "epoch": 3.9891640866873064, "grad_norm": 0.036127593368291855, "learning_rate": 6.931377406072843e-05, "loss": 0.0062, "step": 20610 }, { "epoch": 3.989357585139319, "grad_norm": 0.0609891451895237, "learning_rate": 6.931117273784424e-05, "loss": 0.0058, "step": 20611 }, { "epoch": 3.9895510835913313, "grad_norm": 0.037143148481845856, "learning_rate": 6.930857136175464e-05, "loss": 0.0052, "step": 20612 }, { "epoch": 3.9897445820433437, "grad_norm": 0.05335143208503723, "learning_rate": 6.930596993246929e-05, "loss": 0.0078, "step": 20613 }, { "epoch": 3.989938080495356, "grad_norm": 0.0493248775601387, "learning_rate": 6.93033684499979e-05, "loss": 0.0059, "step": 20614 }, { "epoch": 3.9901315789473686, "grad_norm": 0.05904781073331833, "learning_rate": 6.930076691435008e-05, "loss": 0.0071, "step": 20615 }, { "epoch": 3.9903250773993806, "grad_norm": 0.06337538361549377, "learning_rate": 6.929816532553554e-05, "loss": 0.0048, "step": 20616 }, { "epoch": 3.990518575851393, "grad_norm": 0.05900401994585991, "learning_rate": 6.929556368356394e-05, "loss": 0.0086, "step": 20617 }, { "epoch": 3.9907120743034055, "grad_norm": 0.08192545175552368, "learning_rate": 6.929296198844497e-05, "loss": 0.0069, "step": 20618 }, { "epoch": 3.990905572755418, "grad_norm": 0.027995890006422997, "learning_rate": 6.929036024018827e-05, "loss": 0.0056, "step": 20619 }, { "epoch": 3.9910990712074303, "grad_norm": 0.0679156631231308, "learning_rate": 6.928775843880354e-05, "loss": 0.007, "step": 20620 }, { "epoch": 3.991292569659443, "grad_norm": 0.05180894955992699, "learning_rate": 6.928515658430044e-05, "loss": 0.0052, "step": 20621 }, { "epoch": 3.9914860681114552, "grad_norm": 0.06703686714172363, "learning_rate": 6.928255467668866e-05, "loss": 0.0066, "step": 20622 }, { "epoch": 3.9916795665634677, "grad_norm": 0.06667522341012955, "learning_rate": 6.927995271597787e-05, "loss": 0.005, "step": 20623 }, { "epoch": 3.99187306501548, "grad_norm": 0.0819736048579216, "learning_rate": 6.927735070217773e-05, "loss": 0.0066, "step": 20624 }, { "epoch": 3.992066563467492, "grad_norm": 0.03790552541613579, "learning_rate": 6.927474863529789e-05, "loss": 0.0057, "step": 20625 }, { "epoch": 3.9922600619195046, "grad_norm": 0.06991017609834671, "learning_rate": 6.927214651534808e-05, "loss": 0.0046, "step": 20626 }, { "epoch": 3.992453560371517, "grad_norm": 0.02778918854892254, "learning_rate": 6.926954434233795e-05, "loss": 0.0053, "step": 20627 }, { "epoch": 3.9926470588235294, "grad_norm": 0.06233585998415947, "learning_rate": 6.926694211627715e-05, "loss": 0.0065, "step": 20628 }, { "epoch": 3.992840557275542, "grad_norm": 0.048768896609544754, "learning_rate": 6.926433983717537e-05, "loss": 0.0062, "step": 20629 }, { "epoch": 3.9930340557275543, "grad_norm": 0.05103723332285881, "learning_rate": 6.92617375050423e-05, "loss": 0.0068, "step": 20630 }, { "epoch": 3.9932275541795663, "grad_norm": 0.057818107306957245, "learning_rate": 6.92591351198876e-05, "loss": 0.0063, "step": 20631 }, { "epoch": 3.9934210526315788, "grad_norm": 0.05848318338394165, "learning_rate": 6.925653268172096e-05, "loss": 0.0069, "step": 20632 }, { "epoch": 3.993614551083591, "grad_norm": 0.0518345832824707, "learning_rate": 6.925393019055203e-05, "loss": 0.006, "step": 20633 }, { "epoch": 3.9938080495356036, "grad_norm": 0.04916287586092949, "learning_rate": 6.925132764639052e-05, "loss": 0.0059, "step": 20634 }, { "epoch": 3.994001547987616, "grad_norm": 0.06115676090121269, "learning_rate": 6.924872504924608e-05, "loss": 0.0049, "step": 20635 }, { "epoch": 3.9941950464396285, "grad_norm": 0.051668137311935425, "learning_rate": 6.924612239912838e-05, "loss": 0.0064, "step": 20636 }, { "epoch": 3.994388544891641, "grad_norm": 0.06626836955547333, "learning_rate": 6.924351969604712e-05, "loss": 0.007, "step": 20637 }, { "epoch": 3.9945820433436534, "grad_norm": 0.05273722857236862, "learning_rate": 6.924091694001195e-05, "loss": 0.0075, "step": 20638 }, { "epoch": 3.994775541795666, "grad_norm": 0.04585786163806915, "learning_rate": 6.923831413103255e-05, "loss": 0.006, "step": 20639 }, { "epoch": 3.9949690402476783, "grad_norm": 0.048430558294057846, "learning_rate": 6.923571126911865e-05, "loss": 0.0052, "step": 20640 }, { "epoch": 3.9951625386996903, "grad_norm": 0.033003296703100204, "learning_rate": 6.923310835427985e-05, "loss": 0.007, "step": 20641 }, { "epoch": 3.9953560371517027, "grad_norm": 0.043889790773391724, "learning_rate": 6.923050538652584e-05, "loss": 0.0063, "step": 20642 }, { "epoch": 3.995549535603715, "grad_norm": 0.03778770565986633, "learning_rate": 6.922790236586636e-05, "loss": 0.0072, "step": 20643 }, { "epoch": 3.9957430340557276, "grad_norm": 0.06065439060330391, "learning_rate": 6.922529929231102e-05, "loss": 0.0056, "step": 20644 }, { "epoch": 3.99593653250774, "grad_norm": 0.03376951441168785, "learning_rate": 6.922269616586952e-05, "loss": 0.0059, "step": 20645 }, { "epoch": 3.996130030959752, "grad_norm": 0.04655451327562332, "learning_rate": 6.922009298655157e-05, "loss": 0.0065, "step": 20646 }, { "epoch": 3.9963235294117645, "grad_norm": 0.04936818778514862, "learning_rate": 6.921748975436679e-05, "loss": 0.0061, "step": 20647 }, { "epoch": 3.996517027863777, "grad_norm": 0.03034786880016327, "learning_rate": 6.921488646932491e-05, "loss": 0.0061, "step": 20648 }, { "epoch": 3.9967105263157894, "grad_norm": 0.05032312124967575, "learning_rate": 6.921228313143557e-05, "loss": 0.0058, "step": 20649 }, { "epoch": 3.996904024767802, "grad_norm": 0.06216782331466675, "learning_rate": 6.920967974070847e-05, "loss": 0.0065, "step": 20650 }, { "epoch": 3.9970975232198143, "grad_norm": 0.03301497548818588, "learning_rate": 6.920707629715326e-05, "loss": 0.0068, "step": 20651 }, { "epoch": 3.9972910216718267, "grad_norm": 0.07775714248418808, "learning_rate": 6.920447280077966e-05, "loss": 0.0078, "step": 20652 }, { "epoch": 3.997484520123839, "grad_norm": 0.06570632755756378, "learning_rate": 6.920186925159731e-05, "loss": 0.0049, "step": 20653 }, { "epoch": 3.9976780185758516, "grad_norm": 0.05473156273365021, "learning_rate": 6.919926564961594e-05, "loss": 0.0063, "step": 20654 }, { "epoch": 3.997871517027864, "grad_norm": 0.06496476382017136, "learning_rate": 6.919666199484519e-05, "loss": 0.0052, "step": 20655 }, { "epoch": 3.998065015479876, "grad_norm": 0.03860592469573021, "learning_rate": 6.919405828729475e-05, "loss": 0.005, "step": 20656 }, { "epoch": 3.9982585139318885, "grad_norm": 0.0631507933139801, "learning_rate": 6.919145452697428e-05, "loss": 0.0054, "step": 20657 }, { "epoch": 3.998452012383901, "grad_norm": 0.028606964275240898, "learning_rate": 6.91888507138935e-05, "loss": 0.0067, "step": 20658 }, { "epoch": 3.9986455108359134, "grad_norm": 0.053178731352090836, "learning_rate": 6.918624684806205e-05, "loss": 0.007, "step": 20659 }, { "epoch": 3.998839009287926, "grad_norm": 0.026756735518574715, "learning_rate": 6.918364292948964e-05, "loss": 0.0062, "step": 20660 }, { "epoch": 3.9990325077399382, "grad_norm": 0.049718137830495834, "learning_rate": 6.918103895818595e-05, "loss": 0.0061, "step": 20661 }, { "epoch": 3.9992260061919502, "grad_norm": 0.04892347753047943, "learning_rate": 6.917843493416065e-05, "loss": 0.007, "step": 20662 }, { "epoch": 3.9994195046439627, "grad_norm": 0.05164754390716553, "learning_rate": 6.91758308574234e-05, "loss": 0.0068, "step": 20663 }, { "epoch": 3.999613003095975, "grad_norm": 0.059348512440919876, "learning_rate": 6.917322672798392e-05, "loss": 0.0051, "step": 20664 } ], "logging_steps": 1.0, "max_steps": 51680, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.440822063752238e+21, "train_batch_size": 16, "trial_name": null, "trial_params": null }