{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.733371596199222, "eval_steps": 500, "global_step": 11500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.25009894478923006, "learning_rate": 2.1231422505307853e-09, "loss": 0.1048, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.19922636436321503, "learning_rate": 4.246284501061571e-09, "loss": 0.1915, "step": 2 }, { "epoch": 0.0, "grad_norm": 0.3269235595332411, "learning_rate": 6.369426751592357e-09, "loss": 0.2746, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.358077214427226, "learning_rate": 8.492569002123141e-09, "loss": 0.3747, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.2540606765888093, "learning_rate": 1.0615711252653927e-08, "loss": 0.2772, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.28658288356106465, "learning_rate": 1.2738853503184714e-08, "loss": 0.349, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.685969966490638, "learning_rate": 1.4861995753715499e-08, "loss": 0.294, "step": 7 }, { "epoch": 0.0, "grad_norm": 0.7418569063982221, "learning_rate": 1.6985138004246283e-08, "loss": 0.2128, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.13356416195804294, "learning_rate": 1.910828025477707e-08, "loss": 0.1117, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.10029129219851991, "learning_rate": 2.1231422505307853e-08, "loss": 0.0753, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.19137471392482466, "learning_rate": 2.3354564755838637e-08, "loss": 0.2082, "step": 11 }, { "epoch": 0.0, "grad_norm": 0.22620629379442936, "learning_rate": 2.5477707006369427e-08, "loss": 0.1041, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.24389827304886408, "learning_rate": 2.760084925690021e-08, "loss": 0.181, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.3382479000349999, "learning_rate": 2.9723991507430998e-08, "loss": 0.33, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.18771980115214004, "learning_rate": 3.184713375796178e-08, "loss": 0.0976, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.42516237338923024, "learning_rate": 3.3970276008492565e-08, "loss": 0.319, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.24522305917300966, "learning_rate": 3.609341825902335e-08, "loss": 0.4258, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.24229655895652444, "learning_rate": 3.821656050955414e-08, "loss": 0.2826, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.3229545378825092, "learning_rate": 4.033970276008492e-08, "loss": 0.286, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.16404456968742298, "learning_rate": 4.2462845010615706e-08, "loss": 0.028, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.16530036487321828, "learning_rate": 4.458598726114649e-08, "loss": 0.1198, "step": 21 }, { "epoch": 0.0, "grad_norm": 0.2385576349885164, "learning_rate": 4.6709129511677274e-08, "loss": 0.1685, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.19659189540970257, "learning_rate": 4.883227176220807e-08, "loss": 0.1903, "step": 23 }, { "epoch": 0.0, "grad_norm": 0.2947538234856561, "learning_rate": 5.0955414012738854e-08, "loss": 0.1341, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.23116107483481232, "learning_rate": 5.307855626326964e-08, "loss": 0.209, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.1033019559393957, "learning_rate": 5.520169851380042e-08, "loss": 0.1789, "step": 26 }, { "epoch": 0.0, "grad_norm": 0.08804539463365178, "learning_rate": 5.732484076433121e-08, "loss": 0.0189, "step": 27 }, { "epoch": 0.0, "grad_norm": 0.2914851513918007, "learning_rate": 5.9447983014861996e-08, "loss": 0.1725, "step": 28 }, { "epoch": 0.0, "grad_norm": 0.14836108588407937, "learning_rate": 6.157112526539278e-08, "loss": 0.2839, "step": 29 }, { "epoch": 0.0, "grad_norm": 0.1919117139250952, "learning_rate": 6.369426751592356e-08, "loss": 0.1446, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.2761565442014638, "learning_rate": 6.581740976645435e-08, "loss": 0.2991, "step": 31 }, { "epoch": 0.0, "grad_norm": 0.22305278681009405, "learning_rate": 6.794055201698513e-08, "loss": 0.0708, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.1610106563978104, "learning_rate": 7.006369426751591e-08, "loss": 0.261, "step": 33 }, { "epoch": 0.0, "grad_norm": 0.14077057547983793, "learning_rate": 7.21868365180467e-08, "loss": 0.1801, "step": 34 }, { "epoch": 0.0, "grad_norm": 0.20065851726476622, "learning_rate": 7.43099787685775e-08, "loss": 0.2476, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.3081035531257673, "learning_rate": 7.643312101910828e-08, "loss": 0.216, "step": 36 }, { "epoch": 0.0, "grad_norm": 0.09010003471735234, "learning_rate": 7.855626326963906e-08, "loss": 0.0802, "step": 37 }, { "epoch": 0.0, "grad_norm": 0.19947396878358303, "learning_rate": 8.067940552016985e-08, "loss": 0.305, "step": 38 }, { "epoch": 0.0, "grad_norm": 0.262252267634813, "learning_rate": 8.280254777070063e-08, "loss": 0.1195, "step": 39 }, { "epoch": 0.0, "grad_norm": 0.17317934071438829, "learning_rate": 8.492569002123141e-08, "loss": 0.0463, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.22859501169296872, "learning_rate": 8.70488322717622e-08, "loss": 0.1598, "step": 41 }, { "epoch": 0.0, "grad_norm": 0.26001327021273624, "learning_rate": 8.917197452229298e-08, "loss": 0.066, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.1198363946982399, "learning_rate": 9.129511677282376e-08, "loss": 0.0997, "step": 43 }, { "epoch": 0.0, "grad_norm": 0.2593161588612782, "learning_rate": 9.341825902335455e-08, "loss": 0.0994, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.2306799823690304, "learning_rate": 9.554140127388536e-08, "loss": 0.2179, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.17571689838456886, "learning_rate": 9.766454352441614e-08, "loss": 0.1061, "step": 46 }, { "epoch": 0.0, "grad_norm": 0.17914073942300426, "learning_rate": 9.978768577494693e-08, "loss": 0.223, "step": 47 }, { "epoch": 0.0, "grad_norm": 0.24889993931575963, "learning_rate": 1.0191082802547771e-07, "loss": 0.2474, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.22781020002071803, "learning_rate": 1.0403397027600849e-07, "loss": 0.11, "step": 49 }, { "epoch": 0.0, "grad_norm": 0.2501869999590839, "learning_rate": 1.0615711252653928e-07, "loss": 0.176, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.36315613525619006, "learning_rate": 1.0828025477707006e-07, "loss": 0.1577, "step": 51 }, { "epoch": 0.0, "grad_norm": 0.2658342666766177, "learning_rate": 1.1040339702760084e-07, "loss": 0.1983, "step": 52 }, { "epoch": 0.0, "grad_norm": 0.18959948719710448, "learning_rate": 1.1252653927813163e-07, "loss": 0.1979, "step": 53 }, { "epoch": 0.0, "grad_norm": 0.2601087356669149, "learning_rate": 1.1464968152866242e-07, "loss": 0.133, "step": 54 }, { "epoch": 0.0, "grad_norm": 0.31557862443430856, "learning_rate": 1.1677282377919321e-07, "loss": 0.3968, "step": 55 }, { "epoch": 0.0, "grad_norm": 0.19390524170160062, "learning_rate": 1.1889596602972399e-07, "loss": 0.2072, "step": 56 }, { "epoch": 0.0, "grad_norm": 0.1998692909914467, "learning_rate": 1.2101910828025477e-07, "loss": 0.1156, "step": 57 }, { "epoch": 0.0, "grad_norm": 0.1221613667000933, "learning_rate": 1.2314225053078556e-07, "loss": 0.2259, "step": 58 }, { "epoch": 0.0, "grad_norm": 0.23782027171460882, "learning_rate": 1.2526539278131634e-07, "loss": 0.1484, "step": 59 }, { "epoch": 0.0, "grad_norm": 0.2328260333787332, "learning_rate": 1.2738853503184713e-07, "loss": 0.288, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.14448131959238655, "learning_rate": 1.295116772823779e-07, "loss": 0.0096, "step": 61 }, { "epoch": 0.0, "grad_norm": 0.332046819939343, "learning_rate": 1.316348195329087e-07, "loss": 0.0806, "step": 62 }, { "epoch": 0.0, "grad_norm": 0.2544066675974567, "learning_rate": 1.3375796178343948e-07, "loss": 0.2087, "step": 63 }, { "epoch": 0.0, "grad_norm": 0.10688903983575725, "learning_rate": 1.3588110403397026e-07, "loss": 0.0316, "step": 64 }, { "epoch": 0.0, "grad_norm": 0.15536549771069488, "learning_rate": 1.3800424628450104e-07, "loss": 0.1157, "step": 65 }, { "epoch": 0.0, "grad_norm": 0.18711607110158102, "learning_rate": 1.4012738853503183e-07, "loss": 0.3105, "step": 66 }, { "epoch": 0.0, "grad_norm": 0.2615454076682301, "learning_rate": 1.422505307855626e-07, "loss": 0.1635, "step": 67 }, { "epoch": 0.0, "grad_norm": 0.13564631530215016, "learning_rate": 1.443736730360934e-07, "loss": 0.1035, "step": 68 }, { "epoch": 0.0, "grad_norm": 0.21407966082271274, "learning_rate": 1.464968152866242e-07, "loss": 0.119, "step": 69 }, { "epoch": 0.0, "grad_norm": 0.30779767305709155, "learning_rate": 1.48619957537155e-07, "loss": 0.2524, "step": 70 }, { "epoch": 0.0, "grad_norm": 0.33050380029323323, "learning_rate": 1.5074309978768577e-07, "loss": 0.2835, "step": 71 }, { "epoch": 0.0, "grad_norm": 0.2502548847332565, "learning_rate": 1.5286624203821656e-07, "loss": 0.1124, "step": 72 }, { "epoch": 0.0, "grad_norm": 0.2553321164753365, "learning_rate": 1.5498938428874734e-07, "loss": 0.2557, "step": 73 }, { "epoch": 0.0, "grad_norm": 0.18679106407193102, "learning_rate": 1.5711252653927812e-07, "loss": 0.1674, "step": 74 }, { "epoch": 0.0, "grad_norm": 0.1283137929571681, "learning_rate": 1.592356687898089e-07, "loss": 0.3204, "step": 75 }, { "epoch": 0.0, "grad_norm": 0.24057748518138783, "learning_rate": 1.613588110403397e-07, "loss": 0.1441, "step": 76 }, { "epoch": 0.0, "grad_norm": 0.21625351951617686, "learning_rate": 1.6348195329087047e-07, "loss": 0.1564, "step": 77 }, { "epoch": 0.0, "grad_norm": 0.2807641314251535, "learning_rate": 1.6560509554140126e-07, "loss": 0.2258, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.21426607779258156, "learning_rate": 1.6772823779193204e-07, "loss": 0.0787, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.21735166406748668, "learning_rate": 1.6985138004246283e-07, "loss": 0.1448, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.09655181133610861, "learning_rate": 1.719745222929936e-07, "loss": 0.1662, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.1774750298108412, "learning_rate": 1.740976645435244e-07, "loss": 0.1068, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.9165863046549484, "learning_rate": 1.7622080679405518e-07, "loss": 0.2897, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.09591992055241581, "learning_rate": 1.7834394904458596e-07, "loss": 0.1542, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.1871035981427734, "learning_rate": 1.8046709129511674e-07, "loss": 0.0815, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.3013920904314661, "learning_rate": 1.8259023354564753e-07, "loss": 0.3896, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.08166990499696496, "learning_rate": 1.847133757961783e-07, "loss": 0.1132, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.24959187128555374, "learning_rate": 1.868365180467091e-07, "loss": 0.1128, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.11530906694869626, "learning_rate": 1.8895966029723993e-07, "loss": 0.1156, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.284104155324781, "learning_rate": 1.9108280254777072e-07, "loss": 0.2533, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.2161792934914752, "learning_rate": 1.932059447983015e-07, "loss": 0.0977, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.16412721895417143, "learning_rate": 1.9532908704883228e-07, "loss": 0.2882, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.21110753539717453, "learning_rate": 1.9745222929936307e-07, "loss": 0.0277, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.26361369211501084, "learning_rate": 1.9957537154989385e-07, "loss": 0.2768, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.2529278381367476, "learning_rate": 2.0169851380042463e-07, "loss": 0.1897, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.16391164203616654, "learning_rate": 2.0382165605095542e-07, "loss": 0.1368, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.2820574987511089, "learning_rate": 2.059447983014862e-07, "loss": 0.1703, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.11268193516563275, "learning_rate": 2.0806794055201698e-07, "loss": 0.2199, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.287806588908634, "learning_rate": 2.1019108280254777e-07, "loss": 0.0926, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.2822299996909002, "learning_rate": 2.1231422505307855e-07, "loss": 0.1967, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.2393906685301478, "learning_rate": 2.1443736730360934e-07, "loss": 0.398, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.21571853982926306, "learning_rate": 2.1656050955414012e-07, "loss": 0.2037, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.19160969775378794, "learning_rate": 2.186836518046709e-07, "loss": 0.3974, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.2942322069415299, "learning_rate": 2.208067940552017e-07, "loss": 0.3434, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.25922201553758256, "learning_rate": 2.2292993630573247e-07, "loss": 0.1251, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.2071198246502389, "learning_rate": 2.2505307855626325e-07, "loss": 0.1807, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.1794504223080536, "learning_rate": 2.2717622080679404e-07, "loss": 0.1192, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.3541472935508919, "learning_rate": 2.2929936305732485e-07, "loss": 0.1795, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.42212667279454036, "learning_rate": 2.3142250530785563e-07, "loss": 0.2508, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.15008163868349053, "learning_rate": 2.3354564755838642e-07, "loss": 0.1445, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.2883339348097026, "learning_rate": 2.356687898089172e-07, "loss": 0.1372, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.2689503988887403, "learning_rate": 2.3779193205944798e-07, "loss": 0.2331, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.3632782308638637, "learning_rate": 2.3991507430997877e-07, "loss": 0.3856, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.386197873074015, "learning_rate": 2.4203821656050955e-07, "loss": 0.0899, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.09151484128845049, "learning_rate": 2.4416135881104033e-07, "loss": 0.0109, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.24683497049830289, "learning_rate": 2.462845010615711e-07, "loss": 0.1895, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.2510248585389427, "learning_rate": 2.484076433121019e-07, "loss": 0.1953, "step": 117 }, { "epoch": 0.01, "grad_norm": 0.31437307952859195, "learning_rate": 2.505307855626327e-07, "loss": 0.2637, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.5792771847840741, "learning_rate": 2.5265392781316347e-07, "loss": 0.2178, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.3569547700025303, "learning_rate": 2.5477707006369425e-07, "loss": 0.2827, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.3541865319331499, "learning_rate": 2.5690021231422504e-07, "loss": 0.2158, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.2803812751973915, "learning_rate": 2.590233545647558e-07, "loss": 0.1308, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.3733045506699708, "learning_rate": 2.611464968152866e-07, "loss": 0.1748, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.2627044050122649, "learning_rate": 2.632696390658174e-07, "loss": 0.1862, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.1801284370762871, "learning_rate": 2.6539278131634817e-07, "loss": 0.2093, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.14356965529788235, "learning_rate": 2.6751592356687895e-07, "loss": 0.1077, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.1448176733758346, "learning_rate": 2.6963906581740974e-07, "loss": 0.2931, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.4005460509049744, "learning_rate": 2.717622080679405e-07, "loss": 0.1749, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.22251676974314188, "learning_rate": 2.738853503184713e-07, "loss": 0.1389, "step": 129 }, { "epoch": 0.01, "grad_norm": 0.3027281145545714, "learning_rate": 2.760084925690021e-07, "loss": 0.1792, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.23846282537302702, "learning_rate": 2.7813163481953287e-07, "loss": 0.3108, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.2564118214473458, "learning_rate": 2.8025477707006366e-07, "loss": 0.2239, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.17751117550418236, "learning_rate": 2.8237791932059444e-07, "loss": 0.1463, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.25702667079807706, "learning_rate": 2.845010615711252e-07, "loss": 0.3392, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.18390417299723882, "learning_rate": 2.86624203821656e-07, "loss": 0.101, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.2256740628934894, "learning_rate": 2.887473460721868e-07, "loss": 0.2245, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.22750489451154088, "learning_rate": 2.908704883227176e-07, "loss": 0.3181, "step": 137 }, { "epoch": 0.01, "grad_norm": 0.2252501766471252, "learning_rate": 2.929936305732484e-07, "loss": 0.1086, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.26754928057573385, "learning_rate": 2.951167728237792e-07, "loss": 0.3815, "step": 139 }, { "epoch": 0.01, "grad_norm": 0.3578695711287303, "learning_rate": 2.9723991507431e-07, "loss": 0.3195, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.166913223467953, "learning_rate": 2.9936305732484076e-07, "loss": 0.1541, "step": 141 }, { "epoch": 0.01, "grad_norm": 0.22575856821984214, "learning_rate": 3.0148619957537155e-07, "loss": 0.1101, "step": 142 }, { "epoch": 0.01, "grad_norm": 0.1816892201464045, "learning_rate": 3.0360934182590233e-07, "loss": 0.243, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.24199343241011556, "learning_rate": 3.057324840764331e-07, "loss": 0.0549, "step": 144 }, { "epoch": 0.01, "grad_norm": 0.3745084458743019, "learning_rate": 3.078556263269639e-07, "loss": 0.0462, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.1060462353898553, "learning_rate": 3.099787685774947e-07, "loss": 0.1055, "step": 146 }, { "epoch": 0.01, "grad_norm": 0.33017772424792025, "learning_rate": 3.1210191082802546e-07, "loss": 0.3886, "step": 147 }, { "epoch": 0.01, "grad_norm": 0.13086145221413323, "learning_rate": 3.1422505307855625e-07, "loss": 0.1973, "step": 148 }, { "epoch": 0.01, "grad_norm": 0.26957199286967665, "learning_rate": 3.1634819532908703e-07, "loss": 0.3495, "step": 149 }, { "epoch": 0.01, "grad_norm": 0.25739770577352045, "learning_rate": 3.184713375796178e-07, "loss": 0.1319, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.26789222142663155, "learning_rate": 3.205944798301486e-07, "loss": 0.3428, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.09704698154957962, "learning_rate": 3.227176220806794e-07, "loss": 0.1104, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.29446869545909066, "learning_rate": 3.2484076433121017e-07, "loss": 0.2044, "step": 153 }, { "epoch": 0.01, "grad_norm": 0.24583974076593526, "learning_rate": 3.2696390658174095e-07, "loss": 0.143, "step": 154 }, { "epoch": 0.01, "grad_norm": 0.21672372184958014, "learning_rate": 3.2908704883227173e-07, "loss": 0.2422, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.2316459136996034, "learning_rate": 3.312101910828025e-07, "loss": 0.0438, "step": 156 }, { "epoch": 0.01, "grad_norm": 0.20623875642113082, "learning_rate": 3.333333333333333e-07, "loss": 0.1578, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.4061097538574797, "learning_rate": 3.354564755838641e-07, "loss": 0.3662, "step": 158 }, { "epoch": 0.01, "grad_norm": 0.14448612182415435, "learning_rate": 3.3757961783439487e-07, "loss": 0.0628, "step": 159 }, { "epoch": 0.01, "grad_norm": 0.39492608570239246, "learning_rate": 3.3970276008492565e-07, "loss": 0.2601, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.25801533070450783, "learning_rate": 3.4182590233545644e-07, "loss": 0.1741, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.14966054415149585, "learning_rate": 3.439490445859872e-07, "loss": 0.1854, "step": 162 }, { "epoch": 0.01, "grad_norm": 0.253115839014238, "learning_rate": 3.46072186836518e-07, "loss": 0.2261, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.34562150869603286, "learning_rate": 3.481953290870488e-07, "loss": 0.09, "step": 164 }, { "epoch": 0.01, "grad_norm": 0.23803559936593066, "learning_rate": 3.5031847133757957e-07, "loss": 0.1875, "step": 165 }, { "epoch": 0.01, "grad_norm": 0.3221505541878738, "learning_rate": 3.5244161358811035e-07, "loss": 0.1857, "step": 166 }, { "epoch": 0.01, "grad_norm": 0.12490966501456342, "learning_rate": 3.5456475583864114e-07, "loss": 0.0353, "step": 167 }, { "epoch": 0.01, "grad_norm": 0.18338891854904169, "learning_rate": 3.566878980891719e-07, "loss": 0.0618, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.17791003089439517, "learning_rate": 3.588110403397027e-07, "loss": 0.3212, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.21835973917262472, "learning_rate": 3.609341825902335e-07, "loss": 0.2773, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.21765605514455302, "learning_rate": 3.6305732484076427e-07, "loss": 0.1221, "step": 171 }, { "epoch": 0.01, "grad_norm": 0.21150022657865714, "learning_rate": 3.6518046709129506e-07, "loss": 0.3392, "step": 172 }, { "epoch": 0.01, "grad_norm": 0.17460086087779267, "learning_rate": 3.6730360934182584e-07, "loss": 0.1001, "step": 173 }, { "epoch": 0.01, "grad_norm": 0.2999670269163138, "learning_rate": 3.694267515923566e-07, "loss": 0.0441, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.25135495215228293, "learning_rate": 3.715498938428874e-07, "loss": 0.2128, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.6471046329195711, "learning_rate": 3.736730360934182e-07, "loss": 0.1948, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.2278373196163679, "learning_rate": 3.757961783439491e-07, "loss": 0.2304, "step": 177 }, { "epoch": 0.01, "grad_norm": 0.2758925917903719, "learning_rate": 3.7791932059447986e-07, "loss": 0.4609, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.21079555432541505, "learning_rate": 3.8004246284501065e-07, "loss": 0.2711, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.22056801853242466, "learning_rate": 3.8216560509554143e-07, "loss": 0.1321, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.934771789969799, "learning_rate": 3.842887473460722e-07, "loss": 0.3233, "step": 181 }, { "epoch": 0.01, "grad_norm": 0.21372971984552552, "learning_rate": 3.86411889596603e-07, "loss": 0.3842, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.18177493199129321, "learning_rate": 3.885350318471338e-07, "loss": 0.1263, "step": 183 }, { "epoch": 0.01, "grad_norm": 0.20539564694344178, "learning_rate": 3.9065817409766457e-07, "loss": 0.099, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.24517169926202315, "learning_rate": 3.9278131634819535e-07, "loss": 0.1368, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.1843016081672305, "learning_rate": 3.9490445859872613e-07, "loss": 0.0505, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.30375760425814136, "learning_rate": 3.970276008492569e-07, "loss": 0.2151, "step": 187 }, { "epoch": 0.01, "grad_norm": 0.23789039380870222, "learning_rate": 3.991507430997877e-07, "loss": 0.0694, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.24787341950436864, "learning_rate": 4.012738853503185e-07, "loss": 0.3139, "step": 189 }, { "epoch": 0.01, "grad_norm": 0.24142286451697284, "learning_rate": 4.0339702760084927e-07, "loss": 0.1738, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.20276550233245788, "learning_rate": 4.0552016985138005e-07, "loss": 0.0852, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.4582239097053706, "learning_rate": 4.0764331210191083e-07, "loss": 0.219, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.3893721484305137, "learning_rate": 4.097664543524416e-07, "loss": 0.2916, "step": 193 }, { "epoch": 0.01, "grad_norm": 0.46880224835496076, "learning_rate": 4.118895966029724e-07, "loss": 0.2952, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.13977758365510837, "learning_rate": 4.140127388535032e-07, "loss": 0.1819, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.13470332419651734, "learning_rate": 4.1613588110403397e-07, "loss": 0.1215, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.2307751266090855, "learning_rate": 4.1825902335456475e-07, "loss": 0.2767, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.271595431624329, "learning_rate": 4.2038216560509554e-07, "loss": 0.1746, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.23079560235515081, "learning_rate": 4.225053078556263e-07, "loss": 0.1271, "step": 199 }, { "epoch": 0.01, "grad_norm": 0.21300416406820002, "learning_rate": 4.246284501061571e-07, "loss": 0.2239, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.11774526827543365, "learning_rate": 4.267515923566879e-07, "loss": 0.1041, "step": 201 }, { "epoch": 0.01, "grad_norm": 0.14021883811615443, "learning_rate": 4.2887473460721867e-07, "loss": 0.3552, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.15598643532331874, "learning_rate": 4.3099787685774946e-07, "loss": 0.0227, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.19572744171804726, "learning_rate": 4.3312101910828024e-07, "loss": 0.1274, "step": 204 }, { "epoch": 0.01, "grad_norm": 0.27843730774946024, "learning_rate": 4.35244161358811e-07, "loss": 0.3357, "step": 205 }, { "epoch": 0.01, "grad_norm": 0.2768138090599354, "learning_rate": 4.373673036093418e-07, "loss": 0.2559, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.30303560402599317, "learning_rate": 4.394904458598726e-07, "loss": 0.2036, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.3574035759761096, "learning_rate": 4.416135881104034e-07, "loss": 0.0257, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.513602085503996, "learning_rate": 4.4373673036093416e-07, "loss": 0.1839, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.23574962632529362, "learning_rate": 4.4585987261146494e-07, "loss": 0.131, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.301502611286239, "learning_rate": 4.479830148619957e-07, "loss": 0.1754, "step": 211 }, { "epoch": 0.01, "grad_norm": 0.12831294475009764, "learning_rate": 4.501061571125265e-07, "loss": 0.0413, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.44448268764246396, "learning_rate": 4.522292993630573e-07, "loss": 0.0187, "step": 213 }, { "epoch": 0.01, "grad_norm": 0.16541563293031547, "learning_rate": 4.543524416135881e-07, "loss": 0.0696, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.4156837966875057, "learning_rate": 4.5647558386411886e-07, "loss": 0.1157, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.27956594396333684, "learning_rate": 4.585987261146497e-07, "loss": 0.0714, "step": 216 }, { "epoch": 0.01, "grad_norm": 0.1141071927218783, "learning_rate": 4.607218683651805e-07, "loss": 0.0591, "step": 217 }, { "epoch": 0.01, "grad_norm": 0.168783943119781, "learning_rate": 4.6284501061571126e-07, "loss": 0.2204, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.1561287939942871, "learning_rate": 4.6496815286624205e-07, "loss": 0.0735, "step": 219 }, { "epoch": 0.01, "grad_norm": 0.30322314822119223, "learning_rate": 4.6709129511677283e-07, "loss": 0.1685, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.3697363694305286, "learning_rate": 4.692144373673036e-07, "loss": 0.3518, "step": 221 }, { "epoch": 0.01, "grad_norm": 0.2906261422640796, "learning_rate": 4.713375796178344e-07, "loss": 0.1692, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.26954453234957454, "learning_rate": 4.734607218683652e-07, "loss": 0.1832, "step": 223 }, { "epoch": 0.01, "grad_norm": 0.27235398428229896, "learning_rate": 4.7558386411889597e-07, "loss": 0.3283, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.280661228991786, "learning_rate": 4.777070063694267e-07, "loss": 0.314, "step": 225 }, { "epoch": 0.01, "grad_norm": 0.2218287050986227, "learning_rate": 4.798301486199575e-07, "loss": 0.1771, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.12591559039362718, "learning_rate": 4.819532908704883e-07, "loss": 0.0138, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.13838304163340706, "learning_rate": 4.840764331210191e-07, "loss": 0.0261, "step": 228 }, { "epoch": 0.01, "grad_norm": 0.11523289156706043, "learning_rate": 4.861995753715499e-07, "loss": 0.1658, "step": 229 }, { "epoch": 0.01, "grad_norm": 0.24297355446371952, "learning_rate": 4.883227176220807e-07, "loss": 0.3567, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.19210771351944797, "learning_rate": 4.904458598726115e-07, "loss": 0.1158, "step": 231 }, { "epoch": 0.01, "grad_norm": 0.17117268316319734, "learning_rate": 4.925690021231422e-07, "loss": 0.0205, "step": 232 }, { "epoch": 0.01, "grad_norm": 0.5389672372369377, "learning_rate": 4.94692144373673e-07, "loss": 0.1087, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.10165201346621026, "learning_rate": 4.968152866242038e-07, "loss": 0.021, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.15794862575618773, "learning_rate": 4.989384288747346e-07, "loss": 0.1067, "step": 235 }, { "epoch": 0.02, "grad_norm": 0.0857455512752192, "learning_rate": 5.010615711252654e-07, "loss": 0.0126, "step": 236 }, { "epoch": 0.02, "grad_norm": 0.15674156646349519, "learning_rate": 5.031847133757962e-07, "loss": 0.17, "step": 237 }, { "epoch": 0.02, "grad_norm": 0.23624012189202026, "learning_rate": 5.053078556263269e-07, "loss": 0.0348, "step": 238 }, { "epoch": 0.02, "grad_norm": 0.20950870199232627, "learning_rate": 5.074309978768577e-07, "loss": 0.1308, "step": 239 }, { "epoch": 0.02, "grad_norm": 0.182355080636987, "learning_rate": 5.095541401273885e-07, "loss": 0.2538, "step": 240 }, { "epoch": 0.02, "grad_norm": 0.19906201837253645, "learning_rate": 5.116772823779193e-07, "loss": 0.1013, "step": 241 }, { "epoch": 0.02, "grad_norm": 0.31748922822807213, "learning_rate": 5.138004246284501e-07, "loss": 0.1983, "step": 242 }, { "epoch": 0.02, "grad_norm": 0.2879583102322477, "learning_rate": 5.159235668789809e-07, "loss": 0.3305, "step": 243 }, { "epoch": 0.02, "grad_norm": 0.2848482889330333, "learning_rate": 5.180467091295116e-07, "loss": 0.1683, "step": 244 }, { "epoch": 0.02, "grad_norm": 0.38641844976427114, "learning_rate": 5.201698513800424e-07, "loss": 0.1918, "step": 245 }, { "epoch": 0.02, "grad_norm": 0.29791922743032145, "learning_rate": 5.222929936305732e-07, "loss": 0.2141, "step": 246 }, { "epoch": 0.02, "grad_norm": 0.37328860142096176, "learning_rate": 5.24416135881104e-07, "loss": 0.324, "step": 247 }, { "epoch": 0.02, "grad_norm": 0.11778988848411559, "learning_rate": 5.265392781316348e-07, "loss": 0.197, "step": 248 }, { "epoch": 0.02, "grad_norm": 0.33803319607529914, "learning_rate": 5.286624203821656e-07, "loss": 0.1999, "step": 249 }, { "epoch": 0.02, "grad_norm": 0.09994307309166663, "learning_rate": 5.307855626326963e-07, "loss": 0.0133, "step": 250 }, { "epoch": 0.02, "grad_norm": 0.18546932643609904, "learning_rate": 5.329087048832271e-07, "loss": 0.1493, "step": 251 }, { "epoch": 0.02, "grad_norm": 0.24972406037114434, "learning_rate": 5.350318471337579e-07, "loss": 0.2281, "step": 252 }, { "epoch": 0.02, "grad_norm": 0.22815217212037803, "learning_rate": 5.371549893842887e-07, "loss": 0.3029, "step": 253 }, { "epoch": 0.02, "grad_norm": 0.35664572216679813, "learning_rate": 5.392781316348195e-07, "loss": 0.2978, "step": 254 }, { "epoch": 0.02, "grad_norm": 0.36878615874476434, "learning_rate": 5.414012738853503e-07, "loss": 0.1935, "step": 255 }, { "epoch": 0.02, "grad_norm": 0.12433161555769073, "learning_rate": 5.43524416135881e-07, "loss": 0.0079, "step": 256 }, { "epoch": 0.02, "grad_norm": 0.7621643835905139, "learning_rate": 5.456475583864118e-07, "loss": 0.2617, "step": 257 }, { "epoch": 0.02, "grad_norm": 0.19432234383374675, "learning_rate": 5.477707006369426e-07, "loss": 0.1661, "step": 258 }, { "epoch": 0.02, "grad_norm": 0.15478225006265445, "learning_rate": 5.498938428874734e-07, "loss": 0.1457, "step": 259 }, { "epoch": 0.02, "grad_norm": 0.4026440623845048, "learning_rate": 5.520169851380042e-07, "loss": 0.2138, "step": 260 }, { "epoch": 0.02, "grad_norm": 0.14132590741784368, "learning_rate": 5.54140127388535e-07, "loss": 0.3274, "step": 261 }, { "epoch": 0.02, "grad_norm": 0.2744286830896772, "learning_rate": 5.562632696390657e-07, "loss": 0.319, "step": 262 }, { "epoch": 0.02, "grad_norm": 0.32236468776382265, "learning_rate": 5.583864118895965e-07, "loss": 0.256, "step": 263 }, { "epoch": 0.02, "grad_norm": 0.17485595559343078, "learning_rate": 5.605095541401273e-07, "loss": 0.2133, "step": 264 }, { "epoch": 0.02, "grad_norm": 0.29452707985083865, "learning_rate": 5.626326963906581e-07, "loss": 0.0416, "step": 265 }, { "epoch": 0.02, "grad_norm": 0.3424552020931376, "learning_rate": 5.647558386411889e-07, "loss": 0.1472, "step": 266 }, { "epoch": 0.02, "grad_norm": 0.26338706700603143, "learning_rate": 5.668789808917197e-07, "loss": 0.296, "step": 267 }, { "epoch": 0.02, "grad_norm": 0.14705260009709054, "learning_rate": 5.690021231422504e-07, "loss": 0.1678, "step": 268 }, { "epoch": 0.02, "grad_norm": 0.7860159787237884, "learning_rate": 5.711252653927812e-07, "loss": 0.1939, "step": 269 }, { "epoch": 0.02, "grad_norm": 0.13624004223090053, "learning_rate": 5.73248407643312e-07, "loss": 0.1554, "step": 270 }, { "epoch": 0.02, "grad_norm": 0.09898779142099313, "learning_rate": 5.753715498938428e-07, "loss": 0.192, "step": 271 }, { "epoch": 0.02, "grad_norm": 0.18706483719575634, "learning_rate": 5.774946921443736e-07, "loss": 0.2032, "step": 272 }, { "epoch": 0.02, "grad_norm": 0.29195550701482514, "learning_rate": 5.796178343949044e-07, "loss": 0.2077, "step": 273 }, { "epoch": 0.02, "grad_norm": 0.3392739493646915, "learning_rate": 5.817409766454351e-07, "loss": 0.2532, "step": 274 }, { "epoch": 0.02, "grad_norm": 0.3041085515552397, "learning_rate": 5.838641188959659e-07, "loss": 0.0918, "step": 275 }, { "epoch": 0.02, "grad_norm": 0.4770793774693724, "learning_rate": 5.859872611464968e-07, "loss": 0.1106, "step": 276 }, { "epoch": 0.02, "grad_norm": 0.19562567545163273, "learning_rate": 5.881104033970276e-07, "loss": 0.1333, "step": 277 }, { "epoch": 0.02, "grad_norm": 0.2707278306013383, "learning_rate": 5.902335456475584e-07, "loss": 0.1306, "step": 278 }, { "epoch": 0.02, "grad_norm": 0.16296943278549977, "learning_rate": 5.923566878980892e-07, "loss": 0.0364, "step": 279 }, { "epoch": 0.02, "grad_norm": 0.5386648312096373, "learning_rate": 5.9447983014862e-07, "loss": 0.0808, "step": 280 }, { "epoch": 0.02, "grad_norm": 0.2942127275073548, "learning_rate": 5.966029723991507e-07, "loss": 0.2626, "step": 281 }, { "epoch": 0.02, "grad_norm": 0.3119903160409143, "learning_rate": 5.987261146496815e-07, "loss": 0.1895, "step": 282 }, { "epoch": 0.02, "grad_norm": 0.2737013844339624, "learning_rate": 6.008492569002123e-07, "loss": 0.1385, "step": 283 }, { "epoch": 0.02, "grad_norm": 0.16855653755832362, "learning_rate": 6.029723991507431e-07, "loss": 0.1529, "step": 284 }, { "epoch": 0.02, "grad_norm": 0.49718223528016353, "learning_rate": 6.050955414012739e-07, "loss": 0.4262, "step": 285 }, { "epoch": 0.02, "grad_norm": 0.2627780618572779, "learning_rate": 6.072186836518047e-07, "loss": 0.3654, "step": 286 }, { "epoch": 0.02, "grad_norm": 0.2270233033606091, "learning_rate": 6.093418259023354e-07, "loss": 0.1655, "step": 287 }, { "epoch": 0.02, "grad_norm": 0.32556073415138226, "learning_rate": 6.114649681528662e-07, "loss": 0.1, "step": 288 }, { "epoch": 0.02, "grad_norm": 0.0688499303724151, "learning_rate": 6.13588110403397e-07, "loss": 0.0131, "step": 289 }, { "epoch": 0.02, "grad_norm": 0.19737684668244823, "learning_rate": 6.157112526539278e-07, "loss": 0.2689, "step": 290 }, { "epoch": 0.02, "grad_norm": 0.22678346062623986, "learning_rate": 6.178343949044586e-07, "loss": 0.2251, "step": 291 }, { "epoch": 0.02, "grad_norm": 0.21751630427029792, "learning_rate": 6.199575371549894e-07, "loss": 0.2584, "step": 292 }, { "epoch": 0.02, "grad_norm": 0.2141418417559859, "learning_rate": 6.220806794055201e-07, "loss": 0.0784, "step": 293 }, { "epoch": 0.02, "grad_norm": 0.1430074561073039, "learning_rate": 6.242038216560509e-07, "loss": 0.1433, "step": 294 }, { "epoch": 0.02, "grad_norm": 0.2786748881959617, "learning_rate": 6.263269639065817e-07, "loss": 0.2284, "step": 295 }, { "epoch": 0.02, "grad_norm": 0.2433691630645959, "learning_rate": 6.284501061571125e-07, "loss": 0.1846, "step": 296 }, { "epoch": 0.02, "grad_norm": 0.7923395523484135, "learning_rate": 6.305732484076433e-07, "loss": 0.3029, "step": 297 }, { "epoch": 0.02, "grad_norm": 0.1963409180306421, "learning_rate": 6.326963906581741e-07, "loss": 0.1897, "step": 298 }, { "epoch": 0.02, "grad_norm": 0.28965013571514175, "learning_rate": 6.348195329087048e-07, "loss": 0.0649, "step": 299 }, { "epoch": 0.02, "grad_norm": 0.10288304663737333, "learning_rate": 6.369426751592356e-07, "loss": 0.0935, "step": 300 }, { "epoch": 0.02, "grad_norm": 0.2386140993298097, "learning_rate": 6.390658174097664e-07, "loss": 0.1989, "step": 301 }, { "epoch": 0.02, "grad_norm": 0.2001752439033513, "learning_rate": 6.411889596602972e-07, "loss": 0.2559, "step": 302 }, { "epoch": 0.02, "grad_norm": 0.14419532926564962, "learning_rate": 6.43312101910828e-07, "loss": 0.1163, "step": 303 }, { "epoch": 0.02, "grad_norm": 0.20905738114675854, "learning_rate": 6.454352441613588e-07, "loss": 0.015, "step": 304 }, { "epoch": 0.02, "grad_norm": 0.24380431007046924, "learning_rate": 6.475583864118895e-07, "loss": 0.112, "step": 305 }, { "epoch": 0.02, "grad_norm": 0.3023639200080016, "learning_rate": 6.496815286624203e-07, "loss": 0.2968, "step": 306 }, { "epoch": 0.02, "grad_norm": 0.2606866540139771, "learning_rate": 6.518046709129511e-07, "loss": 0.1304, "step": 307 }, { "epoch": 0.02, "grad_norm": 0.2656805524127184, "learning_rate": 6.539278131634819e-07, "loss": 0.2697, "step": 308 }, { "epoch": 0.02, "grad_norm": 0.3193198432455117, "learning_rate": 6.560509554140127e-07, "loss": 0.1832, "step": 309 }, { "epoch": 0.02, "grad_norm": 0.2874561835870219, "learning_rate": 6.581740976645435e-07, "loss": 0.0569, "step": 310 }, { "epoch": 0.02, "grad_norm": 0.3174031017726217, "learning_rate": 6.602972399150743e-07, "loss": 0.1652, "step": 311 }, { "epoch": 0.02, "grad_norm": 0.20165045009761387, "learning_rate": 6.62420382165605e-07, "loss": 0.0533, "step": 312 }, { "epoch": 0.02, "grad_norm": 0.26014267892389276, "learning_rate": 6.645435244161358e-07, "loss": 0.3167, "step": 313 }, { "epoch": 0.02, "grad_norm": 0.1251119388682503, "learning_rate": 6.666666666666666e-07, "loss": 0.1834, "step": 314 }, { "epoch": 0.02, "grad_norm": 0.3945081001141345, "learning_rate": 6.687898089171974e-07, "loss": 0.2264, "step": 315 }, { "epoch": 0.02, "grad_norm": 0.3474798985267554, "learning_rate": 6.709129511677282e-07, "loss": 0.2383, "step": 316 }, { "epoch": 0.02, "grad_norm": 0.1947402716044741, "learning_rate": 6.73036093418259e-07, "loss": 0.2622, "step": 317 }, { "epoch": 0.02, "grad_norm": 0.22362536364169935, "learning_rate": 6.751592356687897e-07, "loss": 0.0834, "step": 318 }, { "epoch": 0.02, "grad_norm": 0.11661468027254816, "learning_rate": 6.772823779193205e-07, "loss": 0.0487, "step": 319 }, { "epoch": 0.02, "grad_norm": 0.17541144954253332, "learning_rate": 6.794055201698513e-07, "loss": 0.1103, "step": 320 }, { "epoch": 0.02, "grad_norm": 0.13308947738335472, "learning_rate": 6.815286624203821e-07, "loss": 0.0908, "step": 321 }, { "epoch": 0.02, "grad_norm": 0.28308382340263966, "learning_rate": 6.836518046709129e-07, "loss": 0.2402, "step": 322 }, { "epoch": 0.02, "grad_norm": 0.16304676460641138, "learning_rate": 6.857749469214437e-07, "loss": 0.1215, "step": 323 }, { "epoch": 0.02, "grad_norm": 0.2690315548677506, "learning_rate": 6.878980891719744e-07, "loss": 0.1385, "step": 324 }, { "epoch": 0.02, "grad_norm": 0.28782689849209325, "learning_rate": 6.900212314225052e-07, "loss": 0.1083, "step": 325 }, { "epoch": 0.02, "grad_norm": 0.32365798582335054, "learning_rate": 6.92144373673036e-07, "loss": 0.1046, "step": 326 }, { "epoch": 0.02, "grad_norm": 0.3017609449576566, "learning_rate": 6.942675159235668e-07, "loss": 0.2847, "step": 327 }, { "epoch": 0.02, "grad_norm": 0.26305840526601176, "learning_rate": 6.963906581740976e-07, "loss": 0.132, "step": 328 }, { "epoch": 0.02, "grad_norm": 0.527745134158186, "learning_rate": 6.985138004246284e-07, "loss": 0.3415, "step": 329 }, { "epoch": 0.02, "grad_norm": 0.2400082999037914, "learning_rate": 7.006369426751591e-07, "loss": 0.224, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.31498865831609935, "learning_rate": 7.027600849256899e-07, "loss": 0.2224, "step": 331 }, { "epoch": 0.02, "grad_norm": 0.23024024876382654, "learning_rate": 7.048832271762207e-07, "loss": 0.1899, "step": 332 }, { "epoch": 0.02, "grad_norm": 0.4186895240598085, "learning_rate": 7.070063694267515e-07, "loss": 0.2308, "step": 333 }, { "epoch": 0.02, "grad_norm": 0.13259159510712912, "learning_rate": 7.091295116772823e-07, "loss": 0.1469, "step": 334 }, { "epoch": 0.02, "grad_norm": 0.2373762968766546, "learning_rate": 7.112526539278131e-07, "loss": 0.1811, "step": 335 }, { "epoch": 0.02, "grad_norm": 0.2953297267584623, "learning_rate": 7.133757961783438e-07, "loss": 0.2956, "step": 336 }, { "epoch": 0.02, "grad_norm": 0.2963767478220723, "learning_rate": 7.154989384288746e-07, "loss": 0.27, "step": 337 }, { "epoch": 0.02, "grad_norm": 0.22335625807264956, "learning_rate": 7.176220806794054e-07, "loss": 0.1269, "step": 338 }, { "epoch": 0.02, "grad_norm": 0.32139447372756724, "learning_rate": 7.197452229299362e-07, "loss": 0.2346, "step": 339 }, { "epoch": 0.02, "grad_norm": 0.23705093106999992, "learning_rate": 7.21868365180467e-07, "loss": 0.2248, "step": 340 }, { "epoch": 0.02, "grad_norm": 0.22767202512578014, "learning_rate": 7.239915074309978e-07, "loss": 0.2066, "step": 341 }, { "epoch": 0.02, "grad_norm": 0.18397356381653115, "learning_rate": 7.261146496815285e-07, "loss": 0.1799, "step": 342 }, { "epoch": 0.02, "grad_norm": 0.20673006822064957, "learning_rate": 7.282377919320593e-07, "loss": 0.4484, "step": 343 }, { "epoch": 0.02, "grad_norm": 0.0557207926549818, "learning_rate": 7.303609341825901e-07, "loss": 0.0687, "step": 344 }, { "epoch": 0.02, "grad_norm": 0.3081173807962164, "learning_rate": 7.324840764331209e-07, "loss": 0.2799, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.18776167618511286, "learning_rate": 7.346072186836517e-07, "loss": 0.2008, "step": 346 }, { "epoch": 0.02, "grad_norm": 0.3296962816092498, "learning_rate": 7.367303609341825e-07, "loss": 0.4462, "step": 347 }, { "epoch": 0.02, "grad_norm": 0.1485189730693178, "learning_rate": 7.388535031847132e-07, "loss": 0.0807, "step": 348 }, { "epoch": 0.02, "grad_norm": 0.6852474737176507, "learning_rate": 7.40976645435244e-07, "loss": 0.2873, "step": 349 }, { "epoch": 0.02, "grad_norm": 0.3262034889242401, "learning_rate": 7.430997876857748e-07, "loss": 0.1459, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.15031259529226046, "learning_rate": 7.452229299363056e-07, "loss": 0.2377, "step": 351 }, { "epoch": 0.02, "grad_norm": 0.16892482959317795, "learning_rate": 7.473460721868364e-07, "loss": 0.2246, "step": 352 }, { "epoch": 0.02, "grad_norm": 0.25425496343673953, "learning_rate": 7.494692144373672e-07, "loss": 0.0964, "step": 353 }, { "epoch": 0.02, "grad_norm": 0.13793263456492666, "learning_rate": 7.515923566878982e-07, "loss": 0.1007, "step": 354 }, { "epoch": 0.02, "grad_norm": 0.2097891423154737, "learning_rate": 7.537154989384289e-07, "loss": 0.2488, "step": 355 }, { "epoch": 0.02, "grad_norm": 0.08684897361559107, "learning_rate": 7.558386411889597e-07, "loss": 0.1011, "step": 356 }, { "epoch": 0.02, "grad_norm": 0.14510536271973992, "learning_rate": 7.579617834394905e-07, "loss": 0.0371, "step": 357 }, { "epoch": 0.02, "grad_norm": 0.2233939359582414, "learning_rate": 7.600849256900213e-07, "loss": 0.2513, "step": 358 }, { "epoch": 0.02, "grad_norm": 0.1778509190137439, "learning_rate": 7.622080679405521e-07, "loss": 0.1493, "step": 359 }, { "epoch": 0.02, "grad_norm": 0.29882084943077136, "learning_rate": 7.643312101910829e-07, "loss": 0.0833, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.27539509978181104, "learning_rate": 7.664543524416136e-07, "loss": 0.4315, "step": 361 }, { "epoch": 0.02, "grad_norm": 0.43024944450808494, "learning_rate": 7.685774946921444e-07, "loss": 0.2586, "step": 362 }, { "epoch": 0.02, "grad_norm": 0.29794875496527123, "learning_rate": 7.707006369426752e-07, "loss": 0.1013, "step": 363 }, { "epoch": 0.02, "grad_norm": 0.11911472604816405, "learning_rate": 7.72823779193206e-07, "loss": 0.1041, "step": 364 }, { "epoch": 0.02, "grad_norm": 0.16868684281299706, "learning_rate": 7.749469214437368e-07, "loss": 0.2312, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.1845638602526959, "learning_rate": 7.770700636942676e-07, "loss": 0.322, "step": 366 }, { "epoch": 0.02, "grad_norm": 0.26984042118586926, "learning_rate": 7.791932059447983e-07, "loss": 0.2608, "step": 367 }, { "epoch": 0.02, "grad_norm": 0.16810527194085234, "learning_rate": 7.813163481953291e-07, "loss": 0.2321, "step": 368 }, { "epoch": 0.02, "grad_norm": 0.06283977236084466, "learning_rate": 7.834394904458599e-07, "loss": 0.0854, "step": 369 }, { "epoch": 0.02, "grad_norm": 0.3218832078829887, "learning_rate": 7.855626326963907e-07, "loss": 0.2296, "step": 370 }, { "epoch": 0.02, "grad_norm": 0.1727750015726721, "learning_rate": 7.876857749469215e-07, "loss": 0.0982, "step": 371 }, { "epoch": 0.02, "grad_norm": 0.2994955471352695, "learning_rate": 7.898089171974523e-07, "loss": 0.0651, "step": 372 }, { "epoch": 0.02, "grad_norm": 0.3306255736205435, "learning_rate": 7.91932059447983e-07, "loss": 0.2255, "step": 373 }, { "epoch": 0.02, "grad_norm": 0.3446522069251835, "learning_rate": 7.940552016985138e-07, "loss": 0.2778, "step": 374 }, { "epoch": 0.02, "grad_norm": 0.15815947749538553, "learning_rate": 7.961783439490446e-07, "loss": 0.0535, "step": 375 }, { "epoch": 0.02, "grad_norm": 0.10367095348687529, "learning_rate": 7.983014861995754e-07, "loss": 0.049, "step": 376 }, { "epoch": 0.02, "grad_norm": 0.249563603920521, "learning_rate": 8.004246284501062e-07, "loss": 0.2211, "step": 377 }, { "epoch": 0.02, "grad_norm": 0.3102621403983259, "learning_rate": 8.02547770700637e-07, "loss": 0.158, "step": 378 }, { "epoch": 0.02, "grad_norm": 0.45287223375211105, "learning_rate": 8.046709129511678e-07, "loss": 0.1806, "step": 379 }, { "epoch": 0.02, "grad_norm": 0.24097223015886662, "learning_rate": 8.067940552016985e-07, "loss": 0.171, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.2050006159478445, "learning_rate": 8.089171974522293e-07, "loss": 0.0992, "step": 381 }, { "epoch": 0.02, "grad_norm": 0.20793762739204616, "learning_rate": 8.110403397027601e-07, "loss": 0.1235, "step": 382 }, { "epoch": 0.02, "grad_norm": 0.19965927601019257, "learning_rate": 8.131634819532909e-07, "loss": 0.5675, "step": 383 }, { "epoch": 0.02, "grad_norm": 0.4643045643537523, "learning_rate": 8.152866242038217e-07, "loss": 0.1717, "step": 384 }, { "epoch": 0.02, "grad_norm": 0.12032396043051378, "learning_rate": 8.174097664543525e-07, "loss": 0.1565, "step": 385 }, { "epoch": 0.02, "grad_norm": 0.3713133529651683, "learning_rate": 8.195329087048832e-07, "loss": 0.0515, "step": 386 }, { "epoch": 0.02, "grad_norm": 0.3202619840308792, "learning_rate": 8.21656050955414e-07, "loss": 0.291, "step": 387 }, { "epoch": 0.02, "grad_norm": 0.19443370285639025, "learning_rate": 8.237791932059448e-07, "loss": 0.1902, "step": 388 }, { "epoch": 0.02, "grad_norm": 0.19849023334196433, "learning_rate": 8.259023354564756e-07, "loss": 0.1461, "step": 389 }, { "epoch": 0.02, "grad_norm": 0.214838462555769, "learning_rate": 8.280254777070064e-07, "loss": 0.1556, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.13309551953092594, "learning_rate": 8.301486199575372e-07, "loss": 0.0695, "step": 391 }, { "epoch": 0.02, "grad_norm": 0.47221558686874704, "learning_rate": 8.322717622080679e-07, "loss": 0.3792, "step": 392 }, { "epoch": 0.03, "grad_norm": 0.3251883958033443, "learning_rate": 8.343949044585987e-07, "loss": 0.1702, "step": 393 }, { "epoch": 0.03, "grad_norm": 0.13748152191871976, "learning_rate": 8.365180467091295e-07, "loss": 0.0414, "step": 394 }, { "epoch": 0.03, "grad_norm": 0.2577512517607871, "learning_rate": 8.386411889596603e-07, "loss": 0.2511, "step": 395 }, { "epoch": 0.03, "grad_norm": 0.18287998914848866, "learning_rate": 8.407643312101911e-07, "loss": 0.1164, "step": 396 }, { "epoch": 0.03, "grad_norm": 0.17709339849932243, "learning_rate": 8.428874734607219e-07, "loss": 0.0207, "step": 397 }, { "epoch": 0.03, "grad_norm": 0.2298605814218488, "learning_rate": 8.450106157112526e-07, "loss": 0.2123, "step": 398 }, { "epoch": 0.03, "grad_norm": 0.3962396303985176, "learning_rate": 8.471337579617834e-07, "loss": 0.4138, "step": 399 }, { "epoch": 0.03, "grad_norm": 0.2808567887236285, "learning_rate": 8.492569002123142e-07, "loss": 0.4247, "step": 400 }, { "epoch": 0.03, "grad_norm": 0.23215587936104334, "learning_rate": 8.51380042462845e-07, "loss": 0.1985, "step": 401 }, { "epoch": 0.03, "grad_norm": 0.2939695074098435, "learning_rate": 8.535031847133758e-07, "loss": 0.1631, "step": 402 }, { "epoch": 0.03, "grad_norm": 0.23766967230175637, "learning_rate": 8.556263269639066e-07, "loss": 0.1726, "step": 403 }, { "epoch": 0.03, "grad_norm": 0.30850612413413675, "learning_rate": 8.577494692144373e-07, "loss": 0.2422, "step": 404 }, { "epoch": 0.03, "grad_norm": 0.2413195428537783, "learning_rate": 8.598726114649681e-07, "loss": 0.5659, "step": 405 }, { "epoch": 0.03, "grad_norm": 0.13921330227742174, "learning_rate": 8.619957537154989e-07, "loss": 0.0168, "step": 406 }, { "epoch": 0.03, "grad_norm": 0.2163843683745216, "learning_rate": 8.641188959660297e-07, "loss": 0.3101, "step": 407 }, { "epoch": 0.03, "grad_norm": 0.2196577154163935, "learning_rate": 8.662420382165605e-07, "loss": 0.3257, "step": 408 }, { "epoch": 0.03, "grad_norm": 0.6236373549005418, "learning_rate": 8.683651804670913e-07, "loss": 0.3986, "step": 409 }, { "epoch": 0.03, "grad_norm": 0.12046982288675889, "learning_rate": 8.70488322717622e-07, "loss": 0.125, "step": 410 }, { "epoch": 0.03, "grad_norm": 0.2581329329149649, "learning_rate": 8.726114649681528e-07, "loss": 0.4828, "step": 411 }, { "epoch": 0.03, "grad_norm": 0.2851014489197922, "learning_rate": 8.747346072186836e-07, "loss": 0.2913, "step": 412 }, { "epoch": 0.03, "grad_norm": 0.2410155492065017, "learning_rate": 8.768577494692144e-07, "loss": 0.1909, "step": 413 }, { "epoch": 0.03, "grad_norm": 0.17536848451664563, "learning_rate": 8.789808917197452e-07, "loss": 0.1295, "step": 414 }, { "epoch": 0.03, "grad_norm": 0.38772909706184794, "learning_rate": 8.81104033970276e-07, "loss": 0.188, "step": 415 }, { "epoch": 0.03, "grad_norm": 0.28915557836813943, "learning_rate": 8.832271762208067e-07, "loss": 0.1589, "step": 416 }, { "epoch": 0.03, "grad_norm": 0.28975293174705274, "learning_rate": 8.853503184713375e-07, "loss": 0.1225, "step": 417 }, { "epoch": 0.03, "grad_norm": 0.22249415674474898, "learning_rate": 8.874734607218683e-07, "loss": 0.2004, "step": 418 }, { "epoch": 0.03, "grad_norm": 0.2940454009135285, "learning_rate": 8.895966029723991e-07, "loss": 0.1272, "step": 419 }, { "epoch": 0.03, "grad_norm": 0.42358785034734187, "learning_rate": 8.917197452229299e-07, "loss": 0.0576, "step": 420 }, { "epoch": 0.03, "grad_norm": 0.21235244241456386, "learning_rate": 8.938428874734607e-07, "loss": 0.2632, "step": 421 }, { "epoch": 0.03, "grad_norm": 0.37224640019004696, "learning_rate": 8.959660297239914e-07, "loss": 0.2549, "step": 422 }, { "epoch": 0.03, "grad_norm": 0.36378654543030986, "learning_rate": 8.980891719745222e-07, "loss": 0.3476, "step": 423 }, { "epoch": 0.03, "grad_norm": 0.2566243356722543, "learning_rate": 9.00212314225053e-07, "loss": 0.2631, "step": 424 }, { "epoch": 0.03, "grad_norm": 0.4315065982171834, "learning_rate": 9.023354564755838e-07, "loss": 0.2681, "step": 425 }, { "epoch": 0.03, "grad_norm": 0.13878811119352702, "learning_rate": 9.044585987261146e-07, "loss": 0.0866, "step": 426 }, { "epoch": 0.03, "grad_norm": 0.098010200776135, "learning_rate": 9.065817409766454e-07, "loss": 0.0576, "step": 427 }, { "epoch": 0.03, "grad_norm": 0.18421455741569376, "learning_rate": 9.087048832271762e-07, "loss": 0.1636, "step": 428 }, { "epoch": 0.03, "grad_norm": 0.22135093664942307, "learning_rate": 9.108280254777069e-07, "loss": 0.1907, "step": 429 }, { "epoch": 0.03, "grad_norm": 0.2613270863068073, "learning_rate": 9.129511677282377e-07, "loss": 0.0568, "step": 430 }, { "epoch": 0.03, "grad_norm": 0.4885576563006937, "learning_rate": 9.150743099787685e-07, "loss": 0.1314, "step": 431 }, { "epoch": 0.03, "grad_norm": 0.3843438674385242, "learning_rate": 9.171974522292994e-07, "loss": 0.2748, "step": 432 }, { "epoch": 0.03, "grad_norm": 0.3162268006181552, "learning_rate": 9.193205944798302e-07, "loss": 0.2064, "step": 433 }, { "epoch": 0.03, "grad_norm": 0.18415232052742567, "learning_rate": 9.21443736730361e-07, "loss": 0.1955, "step": 434 }, { "epoch": 0.03, "grad_norm": 0.1977546707271865, "learning_rate": 9.235668789808917e-07, "loss": 0.1213, "step": 435 }, { "epoch": 0.03, "grad_norm": 0.17277134525309018, "learning_rate": 9.256900212314225e-07, "loss": 0.288, "step": 436 }, { "epoch": 0.03, "grad_norm": 0.25520185550781, "learning_rate": 9.278131634819533e-07, "loss": 0.1035, "step": 437 }, { "epoch": 0.03, "grad_norm": 0.17704131808478485, "learning_rate": 9.299363057324841e-07, "loss": 0.0181, "step": 438 }, { "epoch": 0.03, "grad_norm": 0.3245504988404467, "learning_rate": 9.320594479830149e-07, "loss": 0.2044, "step": 439 }, { "epoch": 0.03, "grad_norm": 0.18244120441800724, "learning_rate": 9.341825902335457e-07, "loss": 0.2578, "step": 440 }, { "epoch": 0.03, "grad_norm": 0.21902255767587192, "learning_rate": 9.363057324840764e-07, "loss": 0.1456, "step": 441 }, { "epoch": 0.03, "grad_norm": 0.1695979665834792, "learning_rate": 9.384288747346072e-07, "loss": 0.1774, "step": 442 }, { "epoch": 0.03, "grad_norm": 0.2676615780508529, "learning_rate": 9.40552016985138e-07, "loss": 0.2396, "step": 443 }, { "epoch": 0.03, "grad_norm": 0.34088965282414585, "learning_rate": 9.426751592356688e-07, "loss": 0.1528, "step": 444 }, { "epoch": 0.03, "grad_norm": 0.07925269437411404, "learning_rate": 9.447983014861996e-07, "loss": 0.0918, "step": 445 }, { "epoch": 0.03, "grad_norm": 0.13870357480386233, "learning_rate": 9.469214437367304e-07, "loss": 0.0852, "step": 446 }, { "epoch": 0.03, "grad_norm": 0.32870650312044764, "learning_rate": 9.490445859872611e-07, "loss": 0.1525, "step": 447 }, { "epoch": 0.03, "grad_norm": 0.24305219592733074, "learning_rate": 9.511677282377919e-07, "loss": 0.2167, "step": 448 }, { "epoch": 0.03, "grad_norm": 0.31525378059408554, "learning_rate": 9.532908704883227e-07, "loss": 0.364, "step": 449 }, { "epoch": 0.03, "grad_norm": 0.4545081071608993, "learning_rate": 9.554140127388535e-07, "loss": 0.0589, "step": 450 }, { "epoch": 0.03, "grad_norm": 0.1864295918607807, "learning_rate": 9.575371549893843e-07, "loss": 0.1124, "step": 451 }, { "epoch": 0.03, "grad_norm": 0.1497477565593572, "learning_rate": 9.59660297239915e-07, "loss": 0.2063, "step": 452 }, { "epoch": 0.03, "grad_norm": 0.15682831310264794, "learning_rate": 9.617834394904458e-07, "loss": 0.1217, "step": 453 }, { "epoch": 0.03, "grad_norm": 0.23565246524026945, "learning_rate": 9.639065817409766e-07, "loss": 0.2472, "step": 454 }, { "epoch": 0.03, "grad_norm": 0.21338418196035058, "learning_rate": 9.660297239915074e-07, "loss": 0.1409, "step": 455 }, { "epoch": 0.03, "grad_norm": 0.32363018578448816, "learning_rate": 9.681528662420382e-07, "loss": 0.0964, "step": 456 }, { "epoch": 0.03, "grad_norm": 0.36305631286103945, "learning_rate": 9.70276008492569e-07, "loss": 0.2474, "step": 457 }, { "epoch": 0.03, "grad_norm": 0.1315636111272709, "learning_rate": 9.723991507430998e-07, "loss": 0.0133, "step": 458 }, { "epoch": 0.03, "grad_norm": 0.36665855866774594, "learning_rate": 9.745222929936306e-07, "loss": 0.0556, "step": 459 }, { "epoch": 0.03, "grad_norm": 0.2932467384725678, "learning_rate": 9.766454352441613e-07, "loss": 0.4045, "step": 460 }, { "epoch": 0.03, "grad_norm": 0.19772647917989614, "learning_rate": 9.787685774946921e-07, "loss": 0.1258, "step": 461 }, { "epoch": 0.03, "grad_norm": 0.1944562471708232, "learning_rate": 9.80891719745223e-07, "loss": 0.1912, "step": 462 }, { "epoch": 0.03, "grad_norm": 0.5129004712182335, "learning_rate": 9.830148619957537e-07, "loss": 0.0369, "step": 463 }, { "epoch": 0.03, "grad_norm": 0.27134534580369873, "learning_rate": 9.851380042462845e-07, "loss": 0.2076, "step": 464 }, { "epoch": 0.03, "grad_norm": 0.6036942576112112, "learning_rate": 9.872611464968153e-07, "loss": 0.1239, "step": 465 }, { "epoch": 0.03, "grad_norm": 0.4238546988928349, "learning_rate": 9.89384288747346e-07, "loss": 0.1025, "step": 466 }, { "epoch": 0.03, "grad_norm": 0.27541270253213207, "learning_rate": 9.915074309978768e-07, "loss": 0.194, "step": 467 }, { "epoch": 0.03, "grad_norm": 0.2546318338738281, "learning_rate": 9.936305732484076e-07, "loss": 0.2443, "step": 468 }, { "epoch": 0.03, "grad_norm": 0.18805689752215418, "learning_rate": 9.957537154989384e-07, "loss": 0.1765, "step": 469 }, { "epoch": 0.03, "grad_norm": 0.29960419963214957, "learning_rate": 9.978768577494692e-07, "loss": 0.2171, "step": 470 }, { "epoch": 0.03, "grad_norm": 0.34422612896806737, "learning_rate": 1e-06, "loss": 0.4547, "step": 471 }, { "epoch": 0.03, "grad_norm": 0.21025726996204436, "learning_rate": 9.999999893344976e-07, "loss": 0.2975, "step": 472 }, { "epoch": 0.03, "grad_norm": 0.3184464372094676, "learning_rate": 9.999999573379902e-07, "loss": 0.3635, "step": 473 }, { "epoch": 0.03, "grad_norm": 0.22529194348745743, "learning_rate": 9.999999040104795e-07, "loss": 0.1233, "step": 474 }, { "epoch": 0.03, "grad_norm": 0.17920512378839853, "learning_rate": 9.99999829351968e-07, "loss": 0.1818, "step": 475 }, { "epoch": 0.03, "grad_norm": 0.21146143920686314, "learning_rate": 9.999997333624587e-07, "loss": 0.1197, "step": 476 }, { "epoch": 0.03, "grad_norm": 0.21073388057143283, "learning_rate": 9.999996160419555e-07, "loss": 0.1825, "step": 477 }, { "epoch": 0.03, "grad_norm": 0.16078577139204342, "learning_rate": 9.999994773904636e-07, "loss": 0.2614, "step": 478 }, { "epoch": 0.03, "grad_norm": 0.2761128508668464, "learning_rate": 9.999993174079888e-07, "loss": 0.1577, "step": 479 }, { "epoch": 0.03, "grad_norm": 0.2885503791210814, "learning_rate": 9.999991360945382e-07, "loss": 0.1658, "step": 480 }, { "epoch": 0.03, "grad_norm": 0.5056550557135914, "learning_rate": 9.99998933450119e-07, "loss": 0.1356, "step": 481 }, { "epoch": 0.03, "grad_norm": 0.2587423302824715, "learning_rate": 9.999987094747404e-07, "loss": 0.1906, "step": 482 }, { "epoch": 0.03, "grad_norm": 0.27175524731807416, "learning_rate": 9.999984641684116e-07, "loss": 0.1844, "step": 483 }, { "epoch": 0.03, "grad_norm": 0.16102534545447217, "learning_rate": 9.999981975311433e-07, "loss": 0.1798, "step": 484 }, { "epoch": 0.03, "grad_norm": 0.13692929348976293, "learning_rate": 9.999979095629469e-07, "loss": 0.1092, "step": 485 }, { "epoch": 0.03, "grad_norm": 0.19888672041303426, "learning_rate": 9.999976002638344e-07, "loss": 0.3372, "step": 486 }, { "epoch": 0.03, "grad_norm": 0.26586280153311215, "learning_rate": 9.99997269633819e-07, "loss": 0.1877, "step": 487 }, { "epoch": 0.03, "grad_norm": 0.10917284468967871, "learning_rate": 9.999969176729153e-07, "loss": 0.1221, "step": 488 }, { "epoch": 0.03, "grad_norm": 0.650651973475683, "learning_rate": 9.999965443811376e-07, "loss": 0.1607, "step": 489 }, { "epoch": 0.03, "grad_norm": 0.2145075448697716, "learning_rate": 9.999961497585024e-07, "loss": 0.0983, "step": 490 }, { "epoch": 0.03, "grad_norm": 0.6452394084802534, "learning_rate": 9.999957338050265e-07, "loss": 0.3028, "step": 491 }, { "epoch": 0.03, "grad_norm": 0.29862832309920007, "learning_rate": 9.999952965207273e-07, "loss": 0.1307, "step": 492 }, { "epoch": 0.03, "grad_norm": 0.32137994202195397, "learning_rate": 9.999948379056235e-07, "loss": 0.1424, "step": 493 }, { "epoch": 0.03, "grad_norm": 0.38034356609412096, "learning_rate": 9.99994357959735e-07, "loss": 0.3911, "step": 494 }, { "epoch": 0.03, "grad_norm": 0.15666426264417455, "learning_rate": 9.99993856683082e-07, "loss": 0.2112, "step": 495 }, { "epoch": 0.03, "grad_norm": 0.12027522270375962, "learning_rate": 9.99993334075686e-07, "loss": 0.0216, "step": 496 }, { "epoch": 0.03, "grad_norm": 0.10923574165943764, "learning_rate": 9.99992790137569e-07, "loss": 0.1166, "step": 497 }, { "epoch": 0.03, "grad_norm": 0.14919427521926085, "learning_rate": 9.999922248687548e-07, "loss": 0.1056, "step": 498 }, { "epoch": 0.03, "grad_norm": 0.3044315277531875, "learning_rate": 9.99991638269267e-07, "loss": 0.1135, "step": 499 }, { "epoch": 0.03, "grad_norm": 0.6105525827468412, "learning_rate": 9.999910303391308e-07, "loss": 0.1656, "step": 500 }, { "epoch": 0.03, "grad_norm": 0.23900556126543304, "learning_rate": 9.999904010783723e-07, "loss": 0.1068, "step": 501 }, { "epoch": 0.03, "grad_norm": 0.394750778990148, "learning_rate": 9.999897504870182e-07, "loss": 0.2906, "step": 502 }, { "epoch": 0.03, "grad_norm": 0.12818377999218414, "learning_rate": 9.99989078565096e-07, "loss": 0.1879, "step": 503 }, { "epoch": 0.03, "grad_norm": 0.3565725372269199, "learning_rate": 9.999883853126348e-07, "loss": 0.134, "step": 504 }, { "epoch": 0.03, "grad_norm": 0.26619545869125333, "learning_rate": 9.999876707296637e-07, "loss": 0.2273, "step": 505 }, { "epoch": 0.03, "grad_norm": 0.2532572699129161, "learning_rate": 9.999869348162139e-07, "loss": 0.021, "step": 506 }, { "epoch": 0.03, "grad_norm": 0.16057755469215762, "learning_rate": 9.999861775723161e-07, "loss": 0.038, "step": 507 }, { "epoch": 0.03, "grad_norm": 0.14580176791619404, "learning_rate": 9.999853989980027e-07, "loss": 0.1291, "step": 508 }, { "epoch": 0.03, "grad_norm": 0.23289785982715136, "learning_rate": 9.999845990933073e-07, "loss": 0.1479, "step": 509 }, { "epoch": 0.03, "grad_norm": 0.33869594797132546, "learning_rate": 9.999837778582638e-07, "loss": 0.1374, "step": 510 }, { "epoch": 0.03, "grad_norm": 0.1817354272596778, "learning_rate": 9.999829352929074e-07, "loss": 0.0891, "step": 511 }, { "epoch": 0.03, "grad_norm": 0.3110651313704324, "learning_rate": 9.999820713972737e-07, "loss": 0.0974, "step": 512 }, { "epoch": 0.03, "grad_norm": 0.23108817473383325, "learning_rate": 9.999811861713998e-07, "loss": 0.2463, "step": 513 }, { "epoch": 0.03, "grad_norm": 0.15727001210456104, "learning_rate": 9.999802796153234e-07, "loss": 0.015, "step": 514 }, { "epoch": 0.03, "grad_norm": 0.26637691734140634, "learning_rate": 9.99979351729083e-07, "loss": 0.3334, "step": 515 }, { "epoch": 0.03, "grad_norm": 0.2124085837214984, "learning_rate": 9.999784025127185e-07, "loss": 0.0869, "step": 516 }, { "epoch": 0.03, "grad_norm": 0.199038642426334, "learning_rate": 9.999774319662703e-07, "loss": 0.1575, "step": 517 }, { "epoch": 0.03, "grad_norm": 0.27989246765365144, "learning_rate": 9.999764400897798e-07, "loss": 0.0992, "step": 518 }, { "epoch": 0.03, "grad_norm": 0.24058349608902685, "learning_rate": 9.999754268832893e-07, "loss": 0.4741, "step": 519 }, { "epoch": 0.03, "grad_norm": 0.21542311327582966, "learning_rate": 9.99974392346842e-07, "loss": 0.2214, "step": 520 }, { "epoch": 0.03, "grad_norm": 0.3549191787539132, "learning_rate": 9.999733364804818e-07, "loss": 0.0226, "step": 521 }, { "epoch": 0.03, "grad_norm": 0.2232365983566046, "learning_rate": 9.999722592842543e-07, "loss": 0.1226, "step": 522 }, { "epoch": 0.03, "grad_norm": 0.2583979267337865, "learning_rate": 9.999711607582052e-07, "loss": 0.2159, "step": 523 }, { "epoch": 0.03, "grad_norm": 0.2723403267606881, "learning_rate": 9.99970040902381e-07, "loss": 0.3245, "step": 524 }, { "epoch": 0.03, "grad_norm": 0.2542485452458295, "learning_rate": 9.999688997168301e-07, "loss": 0.2327, "step": 525 }, { "epoch": 0.03, "grad_norm": 0.41809755135138027, "learning_rate": 9.999677372016007e-07, "loss": 0.1287, "step": 526 }, { "epoch": 0.03, "grad_norm": 0.27112163543158724, "learning_rate": 9.999665533567426e-07, "loss": 0.3427, "step": 527 }, { "epoch": 0.03, "grad_norm": 0.26191221466291176, "learning_rate": 9.999653481823063e-07, "loss": 0.2782, "step": 528 }, { "epoch": 0.03, "grad_norm": 0.2102654999678496, "learning_rate": 9.99964121678343e-07, "loss": 0.3097, "step": 529 }, { "epoch": 0.03, "grad_norm": 0.11716270764715653, "learning_rate": 9.999628738449055e-07, "loss": 0.0224, "step": 530 }, { "epoch": 0.03, "grad_norm": 0.2342432126524212, "learning_rate": 9.999616046820466e-07, "loss": 0.1761, "step": 531 }, { "epoch": 0.03, "grad_norm": 0.528700125502073, "learning_rate": 9.999603141898207e-07, "loss": 0.3711, "step": 532 }, { "epoch": 0.03, "grad_norm": 0.22774259370215127, "learning_rate": 9.999590023682826e-07, "loss": 0.1242, "step": 533 }, { "epoch": 0.03, "grad_norm": 0.25913121938300426, "learning_rate": 9.999576692174884e-07, "loss": 0.3121, "step": 534 }, { "epoch": 0.03, "grad_norm": 0.2695919605172889, "learning_rate": 9.999563147374952e-07, "loss": 0.2923, "step": 535 }, { "epoch": 0.03, "grad_norm": 0.17673669493872907, "learning_rate": 9.999549389283606e-07, "loss": 0.135, "step": 536 }, { "epoch": 0.03, "grad_norm": 0.23203256134608177, "learning_rate": 9.99953541790143e-07, "loss": 0.0209, "step": 537 }, { "epoch": 0.03, "grad_norm": 0.39836612884069494, "learning_rate": 9.999521233229025e-07, "loss": 0.351, "step": 538 }, { "epoch": 0.03, "grad_norm": 0.34472204539733897, "learning_rate": 9.999506835266992e-07, "loss": 0.2228, "step": 539 }, { "epoch": 0.03, "grad_norm": 0.264377801633734, "learning_rate": 9.999492224015948e-07, "loss": 0.0628, "step": 540 }, { "epoch": 0.03, "grad_norm": 0.20910132050425237, "learning_rate": 9.999477399476516e-07, "loss": 0.1251, "step": 541 }, { "epoch": 0.03, "grad_norm": 0.23636435994906232, "learning_rate": 9.999462361649327e-07, "loss": 0.2616, "step": 542 }, { "epoch": 0.03, "grad_norm": 0.40297277864631065, "learning_rate": 9.999447110535024e-07, "loss": 0.3826, "step": 543 }, { "epoch": 0.03, "grad_norm": 0.28148856030908415, "learning_rate": 9.999431646134257e-07, "loss": 0.1426, "step": 544 }, { "epoch": 0.03, "grad_norm": 0.26535308731379725, "learning_rate": 9.999415968447687e-07, "loss": 0.4973, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.14706290474883787, "learning_rate": 9.99940007747598e-07, "loss": 0.1553, "step": 546 }, { "epoch": 0.03, "grad_norm": 0.17983524372845375, "learning_rate": 9.999383973219816e-07, "loss": 0.2053, "step": 547 }, { "epoch": 0.03, "grad_norm": 0.1621418678808146, "learning_rate": 9.999367655679881e-07, "loss": 0.0513, "step": 548 }, { "epoch": 0.04, "grad_norm": 0.06329448454305941, "learning_rate": 9.999351124856873e-07, "loss": 0.0875, "step": 549 }, { "epoch": 0.04, "grad_norm": 0.20894730328341823, "learning_rate": 9.999334380751497e-07, "loss": 0.0524, "step": 550 }, { "epoch": 0.04, "grad_norm": 0.3801867943102373, "learning_rate": 9.999317423364465e-07, "loss": 0.2819, "step": 551 }, { "epoch": 0.04, "grad_norm": 0.12575459885606005, "learning_rate": 9.9993002526965e-07, "loss": 0.174, "step": 552 }, { "epoch": 0.04, "grad_norm": 0.19373342460912157, "learning_rate": 9.99928286874834e-07, "loss": 0.2381, "step": 553 }, { "epoch": 0.04, "grad_norm": 0.290343647970398, "learning_rate": 9.99926527152072e-07, "loss": 0.2266, "step": 554 }, { "epoch": 0.04, "grad_norm": 0.21789424364573587, "learning_rate": 9.999247461014395e-07, "loss": 0.1309, "step": 555 }, { "epoch": 0.04, "grad_norm": 0.3589600866174122, "learning_rate": 9.999229437230123e-07, "loss": 0.1268, "step": 556 }, { "epoch": 0.04, "grad_norm": 0.32039536701754084, "learning_rate": 9.999211200168675e-07, "loss": 0.0894, "step": 557 }, { "epoch": 0.04, "grad_norm": 0.602307055588835, "learning_rate": 9.999192749830827e-07, "loss": 0.2676, "step": 558 }, { "epoch": 0.04, "grad_norm": 0.1731340196071816, "learning_rate": 9.999174086217368e-07, "loss": 0.2218, "step": 559 }, { "epoch": 0.04, "grad_norm": 0.21485030799997704, "learning_rate": 9.99915520932909e-07, "loss": 0.197, "step": 560 }, { "epoch": 0.04, "grad_norm": 0.09116657996386676, "learning_rate": 9.999136119166803e-07, "loss": 0.0076, "step": 561 }, { "epoch": 0.04, "grad_norm": 0.2595362787199046, "learning_rate": 9.999116815731318e-07, "loss": 0.1199, "step": 562 }, { "epoch": 0.04, "grad_norm": 0.2943434903221813, "learning_rate": 9.999097299023463e-07, "loss": 0.2567, "step": 563 }, { "epoch": 0.04, "grad_norm": 0.2549772125656584, "learning_rate": 9.999077569044066e-07, "loss": 0.2859, "step": 564 }, { "epoch": 0.04, "grad_norm": 0.11980427571428975, "learning_rate": 9.999057625793969e-07, "loss": 0.0486, "step": 565 }, { "epoch": 0.04, "grad_norm": 0.23365723416624679, "learning_rate": 9.999037469274026e-07, "loss": 0.1725, "step": 566 }, { "epoch": 0.04, "grad_norm": 0.07751232484547327, "learning_rate": 9.999017099485095e-07, "loss": 0.0058, "step": 567 }, { "epoch": 0.04, "grad_norm": 0.47467674056310083, "learning_rate": 9.998996516428045e-07, "loss": 0.2391, "step": 568 }, { "epoch": 0.04, "grad_norm": 0.30761449261074536, "learning_rate": 9.998975720103756e-07, "loss": 0.2047, "step": 569 }, { "epoch": 0.04, "grad_norm": 0.18044750339675913, "learning_rate": 9.99895471051311e-07, "loss": 0.2432, "step": 570 }, { "epoch": 0.04, "grad_norm": 0.28473339587930035, "learning_rate": 9.99893348765701e-07, "loss": 0.1458, "step": 571 }, { "epoch": 0.04, "grad_norm": 0.20667850273765226, "learning_rate": 9.998912051536358e-07, "loss": 0.1582, "step": 572 }, { "epoch": 0.04, "grad_norm": 0.3232007620220984, "learning_rate": 9.998890402152067e-07, "loss": 0.2699, "step": 573 }, { "epoch": 0.04, "grad_norm": 0.2747885272567359, "learning_rate": 9.998868539505065e-07, "loss": 0.061, "step": 574 }, { "epoch": 0.04, "grad_norm": 0.3028579312727913, "learning_rate": 9.99884646359628e-07, "loss": 0.4315, "step": 575 }, { "epoch": 0.04, "grad_norm": 0.26332033672438776, "learning_rate": 9.998824174426656e-07, "loss": 0.0787, "step": 576 }, { "epoch": 0.04, "grad_norm": 0.35321268776480974, "learning_rate": 9.998801671997146e-07, "loss": 0.3332, "step": 577 }, { "epoch": 0.04, "grad_norm": 0.24897210129496727, "learning_rate": 9.998778956308707e-07, "loss": 0.0626, "step": 578 }, { "epoch": 0.04, "grad_norm": 0.47698351985136805, "learning_rate": 9.998756027362308e-07, "loss": 0.3223, "step": 579 }, { "epoch": 0.04, "grad_norm": 0.19134790383960557, "learning_rate": 9.998732885158927e-07, "loss": 0.2695, "step": 580 }, { "epoch": 0.04, "grad_norm": 0.4879603420778916, "learning_rate": 9.998709529699555e-07, "loss": 0.1466, "step": 581 }, { "epoch": 0.04, "grad_norm": 0.3128280251335007, "learning_rate": 9.998685960985186e-07, "loss": 0.4176, "step": 582 }, { "epoch": 0.04, "grad_norm": 0.1838525897173844, "learning_rate": 9.998662179016821e-07, "loss": 0.1679, "step": 583 }, { "epoch": 0.04, "grad_norm": 0.19676301033745802, "learning_rate": 9.998638183795483e-07, "loss": 0.2997, "step": 584 }, { "epoch": 0.04, "grad_norm": 0.1403235253701055, "learning_rate": 9.99861397532219e-07, "loss": 0.1538, "step": 585 }, { "epoch": 0.04, "grad_norm": 0.2713200681540278, "learning_rate": 9.998589553597976e-07, "loss": 0.2481, "step": 586 }, { "epoch": 0.04, "grad_norm": 0.31210448436474636, "learning_rate": 9.998564918623884e-07, "loss": 0.3303, "step": 587 }, { "epoch": 0.04, "grad_norm": 0.19441767560085377, "learning_rate": 9.998540070400965e-07, "loss": 0.1644, "step": 588 }, { "epoch": 0.04, "grad_norm": 0.1307191376988514, "learning_rate": 9.998515008930277e-07, "loss": 0.202, "step": 589 }, { "epoch": 0.04, "grad_norm": 0.7254158512474338, "learning_rate": 9.99848973421289e-07, "loss": 0.3441, "step": 590 }, { "epoch": 0.04, "grad_norm": 0.27264410203686634, "learning_rate": 9.998464246249883e-07, "loss": 0.2037, "step": 591 }, { "epoch": 0.04, "grad_norm": 0.2911967943345156, "learning_rate": 9.998438545042345e-07, "loss": 0.0963, "step": 592 }, { "epoch": 0.04, "grad_norm": 0.327683277210842, "learning_rate": 9.998412630591369e-07, "loss": 0.1515, "step": 593 }, { "epoch": 0.04, "grad_norm": 0.2219327938303094, "learning_rate": 9.998386502898061e-07, "loss": 0.1894, "step": 594 }, { "epoch": 0.04, "grad_norm": 0.18622294921364274, "learning_rate": 9.99836016196354e-07, "loss": 0.1695, "step": 595 }, { "epoch": 0.04, "grad_norm": 0.2925134602430663, "learning_rate": 9.998333607788922e-07, "loss": 0.1408, "step": 596 }, { "epoch": 0.04, "grad_norm": 0.40499870275179556, "learning_rate": 9.998306840375349e-07, "loss": 0.1963, "step": 597 }, { "epoch": 0.04, "grad_norm": 0.40439244999887497, "learning_rate": 9.998279859723954e-07, "loss": 0.2666, "step": 598 }, { "epoch": 0.04, "grad_norm": 0.30494083909444997, "learning_rate": 9.998252665835895e-07, "loss": 0.1802, "step": 599 }, { "epoch": 0.04, "grad_norm": 0.29650189969662283, "learning_rate": 9.99822525871233e-07, "loss": 0.1154, "step": 600 }, { "epoch": 0.04, "grad_norm": 0.3300319194535553, "learning_rate": 9.998197638354427e-07, "loss": 0.0357, "step": 601 }, { "epoch": 0.04, "grad_norm": 0.29748899925065037, "learning_rate": 9.998169804763365e-07, "loss": 0.3921, "step": 602 }, { "epoch": 0.04, "grad_norm": 0.2842270442567387, "learning_rate": 9.99814175794033e-07, "loss": 0.1997, "step": 603 }, { "epoch": 0.04, "grad_norm": 0.17694701192442455, "learning_rate": 9.998113497886522e-07, "loss": 0.0825, "step": 604 }, { "epoch": 0.04, "grad_norm": 0.19775297135044023, "learning_rate": 9.998085024603144e-07, "loss": 0.2962, "step": 605 }, { "epoch": 0.04, "grad_norm": 0.4098023299487314, "learning_rate": 9.998056338091413e-07, "loss": 0.1432, "step": 606 }, { "epoch": 0.04, "grad_norm": 0.2230885301929253, "learning_rate": 9.99802743835255e-07, "loss": 0.3254, "step": 607 }, { "epoch": 0.04, "grad_norm": 0.12637777237273629, "learning_rate": 9.997998325387788e-07, "loss": 0.0687, "step": 608 }, { "epoch": 0.04, "grad_norm": 0.3196469626868912, "learning_rate": 9.997968999198371e-07, "loss": 0.0266, "step": 609 }, { "epoch": 0.04, "grad_norm": 0.2310289775253604, "learning_rate": 9.997939459785552e-07, "loss": 0.2164, "step": 610 }, { "epoch": 0.04, "grad_norm": 0.26611036284271894, "learning_rate": 9.997909707150584e-07, "loss": 0.3605, "step": 611 }, { "epoch": 0.04, "grad_norm": 0.2614133341008609, "learning_rate": 9.997879741294744e-07, "loss": 0.0632, "step": 612 }, { "epoch": 0.04, "grad_norm": 0.26078759864867007, "learning_rate": 9.997849562219307e-07, "loss": 0.1604, "step": 613 }, { "epoch": 0.04, "grad_norm": 0.44154590338271715, "learning_rate": 9.99781916992556e-07, "loss": 0.2396, "step": 614 }, { "epoch": 0.04, "grad_norm": 0.14067566349075272, "learning_rate": 9.9977885644148e-07, "loss": 0.2071, "step": 615 }, { "epoch": 0.04, "grad_norm": 0.3575616851456624, "learning_rate": 9.997757745688334e-07, "loss": 0.3583, "step": 616 }, { "epoch": 0.04, "grad_norm": 0.33687583636678, "learning_rate": 9.997726713747475e-07, "loss": 0.1614, "step": 617 }, { "epoch": 0.04, "grad_norm": 0.1287137337645716, "learning_rate": 9.997695468593547e-07, "loss": 0.0285, "step": 618 }, { "epoch": 0.04, "grad_norm": 0.22629252964500782, "learning_rate": 9.997664010227885e-07, "loss": 0.1418, "step": 619 }, { "epoch": 0.04, "grad_norm": 0.2244281138166755, "learning_rate": 9.997632338651828e-07, "loss": 0.2675, "step": 620 }, { "epoch": 0.04, "grad_norm": 0.4123947347059294, "learning_rate": 9.997600453866732e-07, "loss": 0.3591, "step": 621 }, { "epoch": 0.04, "grad_norm": 0.11262626115015796, "learning_rate": 9.997568355873953e-07, "loss": 0.1854, "step": 622 }, { "epoch": 0.04, "grad_norm": 0.19934916889725812, "learning_rate": 9.997536044674862e-07, "loss": 0.0942, "step": 623 }, { "epoch": 0.04, "grad_norm": 0.4240857699247332, "learning_rate": 9.997503520270835e-07, "loss": 0.1083, "step": 624 }, { "epoch": 0.04, "grad_norm": 0.2192476132653266, "learning_rate": 9.997470782663262e-07, "loss": 0.2008, "step": 625 }, { "epoch": 0.04, "grad_norm": 0.47564784426711987, "learning_rate": 9.99743783185354e-07, "loss": 0.1532, "step": 626 }, { "epoch": 0.04, "grad_norm": 0.33507348750716026, "learning_rate": 9.997404667843074e-07, "loss": 0.0814, "step": 627 }, { "epoch": 0.04, "grad_norm": 0.20428464002313707, "learning_rate": 9.997371290633278e-07, "loss": 0.3139, "step": 628 }, { "epoch": 0.04, "grad_norm": 0.3458666228430019, "learning_rate": 9.997337700225578e-07, "loss": 0.1347, "step": 629 }, { "epoch": 0.04, "grad_norm": 0.1155058198126477, "learning_rate": 9.997303896621404e-07, "loss": 0.1214, "step": 630 }, { "epoch": 0.04, "grad_norm": 0.2868224800278387, "learning_rate": 9.997269879822204e-07, "loss": 0.1442, "step": 631 }, { "epoch": 0.04, "grad_norm": 0.21098114298120535, "learning_rate": 9.99723564982942e-07, "loss": 0.4881, "step": 632 }, { "epoch": 0.04, "grad_norm": 0.28539541990386547, "learning_rate": 9.99720120664452e-07, "loss": 0.212, "step": 633 }, { "epoch": 0.04, "grad_norm": 0.15762416827649048, "learning_rate": 9.997166550268972e-07, "loss": 0.0298, "step": 634 }, { "epoch": 0.04, "grad_norm": 0.17249326656550196, "learning_rate": 9.997131680704251e-07, "loss": 0.1836, "step": 635 }, { "epoch": 0.04, "grad_norm": 0.26370313587358046, "learning_rate": 9.99709659795185e-07, "loss": 0.3679, "step": 636 }, { "epoch": 0.04, "grad_norm": 0.23461575570233473, "learning_rate": 9.99706130201326e-07, "loss": 0.2692, "step": 637 }, { "epoch": 0.04, "grad_norm": 0.2979758421620129, "learning_rate": 9.99702579288999e-07, "loss": 0.2391, "step": 638 }, { "epoch": 0.04, "grad_norm": 0.1770101807416079, "learning_rate": 9.996990070583555e-07, "loss": 0.2021, "step": 639 }, { "epoch": 0.04, "grad_norm": 0.3250207900693622, "learning_rate": 9.996954135095478e-07, "loss": 0.1972, "step": 640 }, { "epoch": 0.04, "grad_norm": 0.29805102006964246, "learning_rate": 9.996917986427293e-07, "loss": 0.1832, "step": 641 }, { "epoch": 0.04, "grad_norm": 0.34048618804477215, "learning_rate": 9.99688162458054e-07, "loss": 0.0834, "step": 642 }, { "epoch": 0.04, "grad_norm": 0.276125773326266, "learning_rate": 9.996845049556775e-07, "loss": 0.0967, "step": 643 }, { "epoch": 0.04, "grad_norm": 0.3643296711012014, "learning_rate": 9.996808261357553e-07, "loss": 0.4839, "step": 644 }, { "epoch": 0.04, "grad_norm": 0.3594869795982038, "learning_rate": 9.996771259984446e-07, "loss": 0.2085, "step": 645 }, { "epoch": 0.04, "grad_norm": 0.25612813685311064, "learning_rate": 9.996734045439032e-07, "loss": 0.2249, "step": 646 }, { "epoch": 0.04, "grad_norm": 0.5294491242173487, "learning_rate": 9.9966966177229e-07, "loss": 0.2232, "step": 647 }, { "epoch": 0.04, "grad_norm": 0.223394109480478, "learning_rate": 9.996658976837644e-07, "loss": 0.2066, "step": 648 }, { "epoch": 0.04, "grad_norm": 0.0964490455397499, "learning_rate": 9.996621122784872e-07, "loss": 0.0176, "step": 649 }, { "epoch": 0.04, "grad_norm": 0.9916404275370678, "learning_rate": 9.996583055566199e-07, "loss": 0.2847, "step": 650 }, { "epoch": 0.04, "grad_norm": 0.4085104775813451, "learning_rate": 9.99654477518325e-07, "loss": 0.1965, "step": 651 }, { "epoch": 0.04, "grad_norm": 0.26930592600432185, "learning_rate": 9.996506281637653e-07, "loss": 0.1166, "step": 652 }, { "epoch": 0.04, "grad_norm": 0.20570651628474856, "learning_rate": 9.996467574931058e-07, "loss": 0.2854, "step": 653 }, { "epoch": 0.04, "grad_norm": 0.23019324170520708, "learning_rate": 9.996428655065108e-07, "loss": 0.238, "step": 654 }, { "epoch": 0.04, "grad_norm": 0.28456364609373247, "learning_rate": 9.99638952204147e-07, "loss": 0.3202, "step": 655 }, { "epoch": 0.04, "grad_norm": 0.33711673215738003, "learning_rate": 9.996350175861809e-07, "loss": 0.2112, "step": 656 }, { "epoch": 0.04, "grad_norm": 0.2628481495394333, "learning_rate": 9.996310616527808e-07, "loss": 0.097, "step": 657 }, { "epoch": 0.04, "grad_norm": 0.14330502365215603, "learning_rate": 9.99627084404115e-07, "loss": 0.1624, "step": 658 }, { "epoch": 0.04, "grad_norm": 0.14064107397795123, "learning_rate": 9.996230858403536e-07, "loss": 0.0913, "step": 659 }, { "epoch": 0.04, "grad_norm": 0.3607971179922472, "learning_rate": 9.99619065961667e-07, "loss": 0.0854, "step": 660 }, { "epoch": 0.04, "grad_norm": 0.23005910265204066, "learning_rate": 9.996150247682265e-07, "loss": 0.2756, "step": 661 }, { "epoch": 0.04, "grad_norm": 0.24785734359247824, "learning_rate": 9.996109622602048e-07, "loss": 0.1309, "step": 662 }, { "epoch": 0.04, "grad_norm": 0.4946815559380326, "learning_rate": 9.99606878437775e-07, "loss": 0.3128, "step": 663 }, { "epoch": 0.04, "grad_norm": 0.18161354687738152, "learning_rate": 9.996027733011117e-07, "loss": 0.1091, "step": 664 }, { "epoch": 0.04, "grad_norm": 0.40597640467228835, "learning_rate": 9.995986468503894e-07, "loss": 0.3422, "step": 665 }, { "epoch": 0.04, "grad_norm": 0.10124882585850081, "learning_rate": 9.995944990857847e-07, "loss": 0.1157, "step": 666 }, { "epoch": 0.04, "grad_norm": 0.31196022827667125, "learning_rate": 9.995903300074744e-07, "loss": 0.3875, "step": 667 }, { "epoch": 0.04, "grad_norm": 0.31771538807403404, "learning_rate": 9.995861396156362e-07, "loss": 0.21, "step": 668 }, { "epoch": 0.04, "grad_norm": 0.3493822437991453, "learning_rate": 9.995819279104491e-07, "loss": 0.0353, "step": 669 }, { "epoch": 0.04, "grad_norm": 0.4421172995145604, "learning_rate": 9.995776948920927e-07, "loss": 0.1876, "step": 670 }, { "epoch": 0.04, "grad_norm": 0.19443582410038723, "learning_rate": 9.995734405607474e-07, "loss": 0.0966, "step": 671 }, { "epoch": 0.04, "grad_norm": 0.3275481656952158, "learning_rate": 9.99569164916595e-07, "loss": 0.4793, "step": 672 }, { "epoch": 0.04, "grad_norm": 0.22924677843911015, "learning_rate": 9.995648679598176e-07, "loss": 0.2181, "step": 673 }, { "epoch": 0.04, "grad_norm": 0.3008393077777119, "learning_rate": 9.99560549690599e-07, "loss": 0.4154, "step": 674 }, { "epoch": 0.04, "grad_norm": 0.5001730906408609, "learning_rate": 9.99556210109123e-07, "loss": 0.0971, "step": 675 }, { "epoch": 0.04, "grad_norm": 0.25220347513813307, "learning_rate": 9.995518492155746e-07, "loss": 0.1822, "step": 676 }, { "epoch": 0.04, "grad_norm": 0.21326006771705913, "learning_rate": 9.9954746701014e-07, "loss": 0.0362, "step": 677 }, { "epoch": 0.04, "grad_norm": 0.12982483839799883, "learning_rate": 9.995430634930066e-07, "loss": 0.1691, "step": 678 }, { "epoch": 0.04, "grad_norm": 0.33526482948055236, "learning_rate": 9.99538638664362e-07, "loss": 0.5494, "step": 679 }, { "epoch": 0.04, "grad_norm": 0.20965784744111918, "learning_rate": 9.995341925243944e-07, "loss": 0.1009, "step": 680 }, { "epoch": 0.04, "grad_norm": 0.4229254242252686, "learning_rate": 9.99529725073294e-07, "loss": 0.1194, "step": 681 }, { "epoch": 0.04, "grad_norm": 0.08776084595143485, "learning_rate": 9.995252363112518e-07, "loss": 0.065, "step": 682 }, { "epoch": 0.04, "grad_norm": 0.16284348597463982, "learning_rate": 9.995207262384585e-07, "loss": 0.0518, "step": 683 }, { "epoch": 0.04, "grad_norm": 0.20175325495331153, "learning_rate": 9.995161948551069e-07, "loss": 0.0596, "step": 684 }, { "epoch": 0.04, "grad_norm": 0.41025397147531095, "learning_rate": 9.995116421613905e-07, "loss": 0.2243, "step": 685 }, { "epoch": 0.04, "grad_norm": 0.27349920467101396, "learning_rate": 9.995070681575032e-07, "loss": 0.3185, "step": 686 }, { "epoch": 0.04, "grad_norm": 0.39644977768940787, "learning_rate": 9.995024728436401e-07, "loss": 0.2763, "step": 687 }, { "epoch": 0.04, "grad_norm": 0.1399900794362018, "learning_rate": 9.994978562199973e-07, "loss": 0.0156, "step": 688 }, { "epoch": 0.04, "grad_norm": 0.3472303888369003, "learning_rate": 9.99493218286772e-07, "loss": 0.182, "step": 689 }, { "epoch": 0.04, "grad_norm": 0.2636485704732172, "learning_rate": 9.99488559044162e-07, "loss": 0.159, "step": 690 }, { "epoch": 0.04, "grad_norm": 0.44025629657577875, "learning_rate": 9.994838784923657e-07, "loss": 0.13, "step": 691 }, { "epoch": 0.04, "grad_norm": 0.5309811169145624, "learning_rate": 9.994791766315833e-07, "loss": 0.2435, "step": 692 }, { "epoch": 0.04, "grad_norm": 0.3815430042476451, "learning_rate": 9.99474453462015e-07, "loss": 0.0291, "step": 693 }, { "epoch": 0.04, "grad_norm": 0.6649567885730471, "learning_rate": 9.994697089838626e-07, "loss": 0.068, "step": 694 }, { "epoch": 0.04, "grad_norm": 0.3355111021093103, "learning_rate": 9.994649431973283e-07, "loss": 0.0949, "step": 695 }, { "epoch": 0.04, "grad_norm": 0.22658701080249202, "learning_rate": 9.994601561026153e-07, "loss": 0.0596, "step": 696 }, { "epoch": 0.04, "grad_norm": 0.23806279247513487, "learning_rate": 9.994553476999281e-07, "loss": 0.2143, "step": 697 }, { "epoch": 0.04, "grad_norm": 0.3346821326673875, "learning_rate": 9.994505179894718e-07, "loss": 0.2204, "step": 698 }, { "epoch": 0.04, "grad_norm": 0.3255836963757066, "learning_rate": 9.994456669714523e-07, "loss": 0.117, "step": 699 }, { "epoch": 0.04, "grad_norm": 0.17276392954244343, "learning_rate": 9.994407946460768e-07, "loss": 0.0316, "step": 700 }, { "epoch": 0.04, "grad_norm": 0.40489743890177804, "learning_rate": 9.99435901013553e-07, "loss": 0.1418, "step": 701 }, { "epoch": 0.04, "grad_norm": 0.5784755364483929, "learning_rate": 9.994309860740894e-07, "loss": 0.2431, "step": 702 }, { "epoch": 0.04, "grad_norm": 0.41512771975633733, "learning_rate": 9.994260498278962e-07, "loss": 0.2355, "step": 703 }, { "epoch": 0.04, "grad_norm": 0.3187904283921282, "learning_rate": 9.994210922751836e-07, "loss": 0.3454, "step": 704 }, { "epoch": 0.04, "grad_norm": 0.10930687596375602, "learning_rate": 9.994161134161632e-07, "loss": 0.0869, "step": 705 }, { "epoch": 0.05, "grad_norm": 0.21349147939129332, "learning_rate": 9.994111132510477e-07, "loss": 0.0998, "step": 706 }, { "epoch": 0.05, "grad_norm": 0.29174491502664607, "learning_rate": 9.994060917800499e-07, "loss": 0.1836, "step": 707 }, { "epoch": 0.05, "grad_norm": 0.267973293440664, "learning_rate": 9.994010490033843e-07, "loss": 0.0963, "step": 708 }, { "epoch": 0.05, "grad_norm": 0.2959997574431314, "learning_rate": 9.993959849212662e-07, "loss": 0.1727, "step": 709 }, { "epoch": 0.05, "grad_norm": 0.37866755142864683, "learning_rate": 9.993908995339114e-07, "loss": 0.3663, "step": 710 }, { "epoch": 0.05, "grad_norm": 0.2138710941633362, "learning_rate": 9.993857928415368e-07, "loss": 0.2025, "step": 711 }, { "epoch": 0.05, "grad_norm": 0.3630054849873252, "learning_rate": 9.993806648443606e-07, "loss": 0.3173, "step": 712 }, { "epoch": 0.05, "grad_norm": 0.21645649187262386, "learning_rate": 9.993755155426014e-07, "loss": 0.1201, "step": 713 }, { "epoch": 0.05, "grad_norm": 0.4298086689210384, "learning_rate": 9.993703449364785e-07, "loss": 0.2854, "step": 714 }, { "epoch": 0.05, "grad_norm": 0.6737470060141839, "learning_rate": 9.99365153026213e-07, "loss": 0.2807, "step": 715 }, { "epoch": 0.05, "grad_norm": 0.46771804450657134, "learning_rate": 9.993599398120263e-07, "loss": 0.3458, "step": 716 }, { "epoch": 0.05, "grad_norm": 0.13088635700932683, "learning_rate": 9.993547052941407e-07, "loss": 0.1335, "step": 717 }, { "epoch": 0.05, "grad_norm": 0.38230493975198065, "learning_rate": 9.993494494727795e-07, "loss": 0.2078, "step": 718 }, { "epoch": 0.05, "grad_norm": 0.5303441806830073, "learning_rate": 9.99344172348167e-07, "loss": 0.3254, "step": 719 }, { "epoch": 0.05, "grad_norm": 0.41501870663594365, "learning_rate": 9.993388739205283e-07, "loss": 0.2984, "step": 720 }, { "epoch": 0.05, "grad_norm": 0.4466052256901839, "learning_rate": 9.993335541900893e-07, "loss": 0.326, "step": 721 }, { "epoch": 0.05, "grad_norm": 0.3664984275313783, "learning_rate": 9.993282131570772e-07, "loss": 0.408, "step": 722 }, { "epoch": 0.05, "grad_norm": 0.42448567094972184, "learning_rate": 9.9932285082172e-07, "loss": 0.2234, "step": 723 }, { "epoch": 0.05, "grad_norm": 0.17745007270794735, "learning_rate": 9.99317467184246e-07, "loss": 0.1807, "step": 724 }, { "epoch": 0.05, "grad_norm": 0.44935936650217406, "learning_rate": 9.993120622448849e-07, "loss": 0.2376, "step": 725 }, { "epoch": 0.05, "grad_norm": 0.6126905176000934, "learning_rate": 9.993066360038678e-07, "loss": 0.2615, "step": 726 }, { "epoch": 0.05, "grad_norm": 0.7901474797504096, "learning_rate": 9.993011884614256e-07, "loss": 0.0211, "step": 727 }, { "epoch": 0.05, "grad_norm": 0.26384046180651166, "learning_rate": 9.99295719617791e-07, "loss": 0.1528, "step": 728 }, { "epoch": 0.05, "grad_norm": 0.1624837760476425, "learning_rate": 9.992902294731974e-07, "loss": 0.0162, "step": 729 }, { "epoch": 0.05, "grad_norm": 0.3153028508242539, "learning_rate": 9.992847180278792e-07, "loss": 0.3626, "step": 730 }, { "epoch": 0.05, "grad_norm": 0.3489224986364525, "learning_rate": 9.992791852820708e-07, "loss": 0.1076, "step": 731 }, { "epoch": 0.05, "grad_norm": 0.38130284266583797, "learning_rate": 9.992736312360089e-07, "loss": 0.3346, "step": 732 }, { "epoch": 0.05, "grad_norm": 0.8070432281761301, "learning_rate": 9.992680558899303e-07, "loss": 0.3271, "step": 733 }, { "epoch": 0.05, "grad_norm": 0.43492179476209475, "learning_rate": 9.992624592440725e-07, "loss": 0.2262, "step": 734 }, { "epoch": 0.05, "grad_norm": 0.15688682197524684, "learning_rate": 9.992568412986748e-07, "loss": 0.1294, "step": 735 }, { "epoch": 0.05, "grad_norm": 0.32013111372934266, "learning_rate": 9.992512020539765e-07, "loss": 0.2633, "step": 736 }, { "epoch": 0.05, "grad_norm": 0.3857147256820132, "learning_rate": 9.992455415102182e-07, "loss": 0.2661, "step": 737 }, { "epoch": 0.05, "grad_norm": 0.4282079559143247, "learning_rate": 9.992398596676417e-07, "loss": 0.1998, "step": 738 }, { "epoch": 0.05, "grad_norm": 0.08828878270340566, "learning_rate": 9.99234156526489e-07, "loss": 0.0104, "step": 739 }, { "epoch": 0.05, "grad_norm": 0.3516698478793413, "learning_rate": 9.992284320870037e-07, "loss": 0.045, "step": 740 }, { "epoch": 0.05, "grad_norm": 0.2377456378438165, "learning_rate": 9.9922268634943e-07, "loss": 0.141, "step": 741 }, { "epoch": 0.05, "grad_norm": 0.3916903296743999, "learning_rate": 9.992169193140127e-07, "loss": 0.2449, "step": 742 }, { "epoch": 0.05, "grad_norm": 0.2735201883748301, "learning_rate": 9.992111309809982e-07, "loss": 0.35, "step": 743 }, { "epoch": 0.05, "grad_norm": 0.3854829590577924, "learning_rate": 9.992053213506333e-07, "loss": 0.1347, "step": 744 }, { "epoch": 0.05, "grad_norm": 0.2756410028217851, "learning_rate": 9.99199490423166e-07, "loss": 0.0884, "step": 745 }, { "epoch": 0.05, "grad_norm": 0.27719372195536185, "learning_rate": 9.991936381988447e-07, "loss": 0.1471, "step": 746 }, { "epoch": 0.05, "grad_norm": 0.21292987230744306, "learning_rate": 9.991877646779194e-07, "loss": 0.2303, "step": 747 }, { "epoch": 0.05, "grad_norm": 0.561405462595992, "learning_rate": 9.991818698606404e-07, "loss": 0.1685, "step": 748 }, { "epoch": 0.05, "grad_norm": 0.26414580347221267, "learning_rate": 9.991759537472597e-07, "loss": 0.3096, "step": 749 }, { "epoch": 0.05, "grad_norm": 0.25524290308172876, "learning_rate": 9.99170016338029e-07, "loss": 0.1581, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.148642376017468, "learning_rate": 9.99164057633202e-07, "loss": 0.4003, "step": 751 }, { "epoch": 0.05, "grad_norm": 0.3809899101312142, "learning_rate": 9.99158077633033e-07, "loss": 0.3427, "step": 752 }, { "epoch": 0.05, "grad_norm": 0.335677292084092, "learning_rate": 9.99152076337777e-07, "loss": 0.2826, "step": 753 }, { "epoch": 0.05, "grad_norm": 0.3201739427658402, "learning_rate": 9.9914605374769e-07, "loss": 0.367, "step": 754 }, { "epoch": 0.05, "grad_norm": 0.4314453049894622, "learning_rate": 9.991400098630288e-07, "loss": 0.3241, "step": 755 }, { "epoch": 0.05, "grad_norm": 0.39924962518783, "learning_rate": 9.991339446840515e-07, "loss": 0.1677, "step": 756 }, { "epoch": 0.05, "grad_norm": 0.23400593462214092, "learning_rate": 9.991278582110166e-07, "loss": 0.2681, "step": 757 }, { "epoch": 0.05, "grad_norm": 0.8307402139466588, "learning_rate": 9.99121750444184e-07, "loss": 0.1325, "step": 758 }, { "epoch": 0.05, "grad_norm": 0.43037593280946795, "learning_rate": 9.99115621383814e-07, "loss": 0.3028, "step": 759 }, { "epoch": 0.05, "grad_norm": 0.2593830448653943, "learning_rate": 9.991094710301686e-07, "loss": 0.0571, "step": 760 }, { "epoch": 0.05, "grad_norm": 0.32735630380268255, "learning_rate": 9.991032993835095e-07, "loss": 0.0852, "step": 761 }, { "epoch": 0.05, "grad_norm": 0.2900643412585144, "learning_rate": 9.990971064441004e-07, "loss": 0.2157, "step": 762 }, { "epoch": 0.05, "grad_norm": 0.29488411113941265, "learning_rate": 9.990908922122056e-07, "loss": 0.1244, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.0569396764700685, "learning_rate": 9.990846566880899e-07, "loss": 0.1896, "step": 764 }, { "epoch": 0.05, "grad_norm": 0.17865347993534295, "learning_rate": 9.990783998720193e-07, "loss": 0.0746, "step": 765 }, { "epoch": 0.05, "grad_norm": 0.5269334730810578, "learning_rate": 9.990721217642612e-07, "loss": 0.0305, "step": 766 }, { "epoch": 0.05, "grad_norm": 0.3051566717293546, "learning_rate": 9.99065822365083e-07, "loss": 0.314, "step": 767 }, { "epoch": 0.05, "grad_norm": 0.9345007572272831, "learning_rate": 9.990595016747535e-07, "loss": 0.5108, "step": 768 }, { "epoch": 0.05, "grad_norm": 0.5998423994556221, "learning_rate": 9.990531596935424e-07, "loss": 0.3033, "step": 769 }, { "epoch": 0.05, "grad_norm": 0.45181850336453955, "learning_rate": 9.990467964217204e-07, "loss": 0.2796, "step": 770 }, { "epoch": 0.05, "grad_norm": 0.4850613814989678, "learning_rate": 9.990404118595587e-07, "loss": 0.1074, "step": 771 }, { "epoch": 0.05, "grad_norm": 0.18496526534645386, "learning_rate": 9.990340060073302e-07, "loss": 0.1308, "step": 772 }, { "epoch": 0.05, "grad_norm": 0.19599395432566782, "learning_rate": 9.990275788653074e-07, "loss": 0.1107, "step": 773 }, { "epoch": 0.05, "grad_norm": 0.38743758561766645, "learning_rate": 9.99021130433765e-07, "loss": 0.281, "step": 774 }, { "epoch": 0.05, "grad_norm": 0.29353828597777165, "learning_rate": 9.99014660712978e-07, "loss": 0.1374, "step": 775 }, { "epoch": 0.05, "grad_norm": 0.38702552932139134, "learning_rate": 9.990081697032226e-07, "loss": 0.3723, "step": 776 }, { "epoch": 0.05, "grad_norm": 0.5511249787455814, "learning_rate": 9.990016574047755e-07, "loss": 0.3179, "step": 777 }, { "epoch": 0.05, "grad_norm": 0.19069921374797308, "learning_rate": 9.989951238179146e-07, "loss": 0.1863, "step": 778 }, { "epoch": 0.05, "grad_norm": 0.5477637035431495, "learning_rate": 9.989885689429187e-07, "loss": 0.2018, "step": 779 }, { "epoch": 0.05, "grad_norm": 0.23073750329632237, "learning_rate": 9.989819927800671e-07, "loss": 0.1418, "step": 780 }, { "epoch": 0.05, "grad_norm": 0.030750283996359234, "learning_rate": 9.989753953296408e-07, "loss": 0.0027, "step": 781 }, { "epoch": 0.05, "grad_norm": 0.1145215007398251, "learning_rate": 9.98968776591921e-07, "loss": 0.147, "step": 782 }, { "epoch": 0.05, "grad_norm": 0.25940045290858127, "learning_rate": 9.989621365671902e-07, "loss": 0.0713, "step": 783 }, { "epoch": 0.05, "grad_norm": 0.43694900746613674, "learning_rate": 9.989554752557314e-07, "loss": 0.1064, "step": 784 }, { "epoch": 0.05, "grad_norm": 0.3971010506393241, "learning_rate": 9.989487926578291e-07, "loss": 0.1655, "step": 785 }, { "epoch": 0.05, "grad_norm": 0.1909020753793466, "learning_rate": 9.989420887737683e-07, "loss": 0.2461, "step": 786 }, { "epoch": 0.05, "grad_norm": 0.3099298618303888, "learning_rate": 9.989353636038351e-07, "loss": 0.2658, "step": 787 }, { "epoch": 0.05, "grad_norm": 0.9535739179506793, "learning_rate": 9.989286171483161e-07, "loss": 0.271, "step": 788 }, { "epoch": 0.05, "grad_norm": 0.36664103535127324, "learning_rate": 9.989218494074995e-07, "loss": 0.2692, "step": 789 }, { "epoch": 0.05, "grad_norm": 0.36033331083904596, "learning_rate": 9.989150603816738e-07, "loss": 0.1979, "step": 790 }, { "epoch": 0.05, "grad_norm": 1.0563505863364127, "learning_rate": 9.989082500711287e-07, "loss": 0.2973, "step": 791 }, { "epoch": 0.05, "grad_norm": 0.35337153350301953, "learning_rate": 9.989014184761546e-07, "loss": 0.0985, "step": 792 }, { "epoch": 0.05, "grad_norm": 0.41319436850610153, "learning_rate": 9.988945655970434e-07, "loss": 0.1578, "step": 793 }, { "epoch": 0.05, "grad_norm": 0.428415309646363, "learning_rate": 9.988876914340868e-07, "loss": 0.3707, "step": 794 }, { "epoch": 0.05, "grad_norm": 0.216637732229521, "learning_rate": 9.988807959875786e-07, "loss": 0.0188, "step": 795 }, { "epoch": 0.05, "grad_norm": 0.4181888867376164, "learning_rate": 9.988738792578126e-07, "loss": 0.2444, "step": 796 }, { "epoch": 0.05, "grad_norm": 0.2000092320601304, "learning_rate": 9.98866941245084e-07, "loss": 0.1637, "step": 797 }, { "epoch": 0.05, "grad_norm": 0.41985990604917495, "learning_rate": 9.98859981949689e-07, "loss": 0.2563, "step": 798 }, { "epoch": 0.05, "grad_norm": 0.3699535318356039, "learning_rate": 9.988530013719243e-07, "loss": 0.1085, "step": 799 }, { "epoch": 0.05, "grad_norm": 0.26470185162460874, "learning_rate": 9.988459995120877e-07, "loss": 0.1144, "step": 800 }, { "epoch": 0.05, "grad_norm": 0.40079513827952984, "learning_rate": 9.988389763704778e-07, "loss": 0.3005, "step": 801 }, { "epoch": 0.05, "grad_norm": 0.7079814755560667, "learning_rate": 9.988319319473947e-07, "loss": 0.2295, "step": 802 }, { "epoch": 0.05, "grad_norm": 0.47031904416782294, "learning_rate": 9.988248662431385e-07, "loss": 0.4155, "step": 803 }, { "epoch": 0.05, "grad_norm": 0.35989836180944096, "learning_rate": 9.988177792580106e-07, "loss": 0.0439, "step": 804 }, { "epoch": 0.05, "grad_norm": 0.29549006075515255, "learning_rate": 9.988106709923137e-07, "loss": 0.09, "step": 805 }, { "epoch": 0.05, "grad_norm": 0.17598994399439483, "learning_rate": 9.988035414463507e-07, "loss": 0.0525, "step": 806 }, { "epoch": 0.05, "grad_norm": 0.5363124292698066, "learning_rate": 9.98796390620426e-07, "loss": 0.3053, "step": 807 }, { "epoch": 0.05, "grad_norm": 0.6041872608092101, "learning_rate": 9.987892185148443e-07, "loss": 0.3073, "step": 808 }, { "epoch": 0.05, "grad_norm": 0.4189089818332621, "learning_rate": 9.98782025129912e-07, "loss": 0.3295, "step": 809 }, { "epoch": 0.05, "grad_norm": 0.4753315947071798, "learning_rate": 9.987748104659359e-07, "loss": 0.2363, "step": 810 }, { "epoch": 0.05, "grad_norm": 0.6384770671401014, "learning_rate": 9.987675745232239e-07, "loss": 0.0567, "step": 811 }, { "epoch": 0.05, "grad_norm": 0.39078600515760487, "learning_rate": 9.987603173020842e-07, "loss": 0.1865, "step": 812 }, { "epoch": 0.05, "grad_norm": 0.41160175106312996, "learning_rate": 9.987530388028267e-07, "loss": 0.2448, "step": 813 }, { "epoch": 0.05, "grad_norm": 0.2468671563228391, "learning_rate": 9.987457390257622e-07, "loss": 0.1582, "step": 814 }, { "epoch": 0.05, "grad_norm": 0.2780845450730514, "learning_rate": 9.987384179712018e-07, "loss": 0.1083, "step": 815 }, { "epoch": 0.05, "grad_norm": 0.5634701131210977, "learning_rate": 9.987310756394578e-07, "loss": 0.2131, "step": 816 }, { "epoch": 0.05, "grad_norm": 0.378584012441739, "learning_rate": 9.987237120308435e-07, "loss": 0.2024, "step": 817 }, { "epoch": 0.05, "grad_norm": 0.24450289137278988, "learning_rate": 9.987163271456733e-07, "loss": 0.2023, "step": 818 }, { "epoch": 0.05, "grad_norm": 0.5248509815392669, "learning_rate": 9.987089209842618e-07, "loss": 0.2578, "step": 819 }, { "epoch": 0.05, "grad_norm": 0.3488439970835759, "learning_rate": 9.987014935469254e-07, "loss": 0.3219, "step": 820 }, { "epoch": 0.05, "grad_norm": 0.4434052624292268, "learning_rate": 9.986940448339807e-07, "loss": 0.1275, "step": 821 }, { "epoch": 0.05, "grad_norm": 0.1639172911274137, "learning_rate": 9.986865748457455e-07, "loss": 0.0444, "step": 822 }, { "epoch": 0.05, "grad_norm": 0.3198579670432246, "learning_rate": 9.986790835825385e-07, "loss": 0.2233, "step": 823 }, { "epoch": 0.05, "grad_norm": 0.6046038247945492, "learning_rate": 9.986715710446795e-07, "loss": 0.2182, "step": 824 }, { "epoch": 0.05, "grad_norm": 0.5503062834527775, "learning_rate": 9.986640372324887e-07, "loss": 0.1813, "step": 825 }, { "epoch": 0.05, "grad_norm": 0.3311415631719066, "learning_rate": 9.986564821462876e-07, "loss": 0.1052, "step": 826 }, { "epoch": 0.05, "grad_norm": 0.3745468369706555, "learning_rate": 9.986489057863986e-07, "loss": 0.2697, "step": 827 }, { "epoch": 0.05, "grad_norm": 0.4019031440647441, "learning_rate": 9.986413081531448e-07, "loss": 0.2565, "step": 828 }, { "epoch": 0.05, "grad_norm": 0.48819622858764106, "learning_rate": 9.986336892468506e-07, "loss": 0.158, "step": 829 }, { "epoch": 0.05, "grad_norm": 0.49255007594275985, "learning_rate": 9.986260490678406e-07, "loss": 0.2423, "step": 830 }, { "epoch": 0.05, "grad_norm": 0.46966914868829196, "learning_rate": 9.98618387616441e-07, "loss": 0.3574, "step": 831 }, { "epoch": 0.05, "grad_norm": 0.5241945042886459, "learning_rate": 9.98610704892979e-07, "loss": 0.1017, "step": 832 }, { "epoch": 0.05, "grad_norm": 0.4420068530363283, "learning_rate": 9.986030008977816e-07, "loss": 0.1116, "step": 833 }, { "epoch": 0.05, "grad_norm": 0.42633745870809486, "learning_rate": 9.98595275631178e-07, "loss": 0.1198, "step": 834 }, { "epoch": 0.05, "grad_norm": 0.42117115114059506, "learning_rate": 9.985875290934974e-07, "loss": 0.2213, "step": 835 }, { "epoch": 0.05, "grad_norm": 0.5886583825546735, "learning_rate": 9.985797612850709e-07, "loss": 0.2779, "step": 836 }, { "epoch": 0.05, "grad_norm": 0.17939168561944074, "learning_rate": 9.985719722062293e-07, "loss": 0.0299, "step": 837 }, { "epoch": 0.05, "grad_norm": 0.55192345774415, "learning_rate": 9.98564161857305e-07, "loss": 0.1841, "step": 838 }, { "epoch": 0.05, "grad_norm": 0.9291312498876918, "learning_rate": 9.985563302386317e-07, "loss": 0.4546, "step": 839 }, { "epoch": 0.05, "grad_norm": 0.6257171085637463, "learning_rate": 9.985484773505427e-07, "loss": 0.3799, "step": 840 }, { "epoch": 0.05, "grad_norm": 0.6860559930564434, "learning_rate": 9.985406031933737e-07, "loss": 0.2958, "step": 841 }, { "epoch": 0.05, "grad_norm": 0.6107884200040208, "learning_rate": 9.985327077674604e-07, "loss": 0.2403, "step": 842 }, { "epoch": 0.05, "grad_norm": 0.5212449988275933, "learning_rate": 9.985247910731395e-07, "loss": 0.1581, "step": 843 }, { "epoch": 0.05, "grad_norm": 0.5544588418030959, "learning_rate": 9.985168531107488e-07, "loss": 0.2562, "step": 844 }, { "epoch": 0.05, "grad_norm": 0.9556498835020415, "learning_rate": 9.985088938806271e-07, "loss": 0.0674, "step": 845 }, { "epoch": 0.05, "grad_norm": 0.06436617935042317, "learning_rate": 9.98500913383114e-07, "loss": 0.0047, "step": 846 }, { "epoch": 0.05, "grad_norm": 0.6433903061356261, "learning_rate": 9.984929116185497e-07, "loss": 0.4003, "step": 847 }, { "epoch": 0.05, "grad_norm": 0.18427332635185062, "learning_rate": 9.984848885872756e-07, "loss": 0.0985, "step": 848 }, { "epoch": 0.05, "grad_norm": 0.326773548187247, "learning_rate": 9.984768442896341e-07, "loss": 0.1337, "step": 849 }, { "epoch": 0.05, "grad_norm": 0.7734791586844639, "learning_rate": 9.984687787259683e-07, "loss": 0.2146, "step": 850 }, { "epoch": 0.05, "grad_norm": 0.4573907101064241, "learning_rate": 9.984606918966226e-07, "loss": 0.2659, "step": 851 }, { "epoch": 0.05, "grad_norm": 0.7185429956776199, "learning_rate": 9.984525838019415e-07, "loss": 0.3559, "step": 852 }, { "epoch": 0.05, "grad_norm": 0.5786571028021502, "learning_rate": 9.984444544422712e-07, "loss": 0.2182, "step": 853 }, { "epoch": 0.05, "grad_norm": 0.261193248528463, "learning_rate": 9.984363038179587e-07, "loss": 0.0158, "step": 854 }, { "epoch": 0.05, "grad_norm": 0.1607690185548112, "learning_rate": 9.984281319293513e-07, "loss": 0.0183, "step": 855 }, { "epoch": 0.05, "grad_norm": 0.43391240870556813, "learning_rate": 9.984199387767978e-07, "loss": 0.3064, "step": 856 }, { "epoch": 0.05, "grad_norm": 0.3421269906808885, "learning_rate": 9.984117243606478e-07, "loss": 0.2376, "step": 857 }, { "epoch": 0.05, "grad_norm": 0.3914337813170445, "learning_rate": 9.984034886812519e-07, "loss": 0.1398, "step": 858 }, { "epoch": 0.05, "grad_norm": 0.22670431645526753, "learning_rate": 9.983952317389609e-07, "loss": 0.1208, "step": 859 }, { "epoch": 0.05, "grad_norm": 0.551276633488919, "learning_rate": 9.983869535341276e-07, "loss": 0.2405, "step": 860 }, { "epoch": 0.05, "grad_norm": 0.5254185456481869, "learning_rate": 9.98378654067105e-07, "loss": 0.1736, "step": 861 }, { "epoch": 0.05, "grad_norm": 0.1252888813245433, "learning_rate": 9.98370333338247e-07, "loss": 0.085, "step": 862 }, { "epoch": 0.06, "grad_norm": 0.24475631176376744, "learning_rate": 9.98361991347909e-07, "loss": 0.1545, "step": 863 }, { "epoch": 0.06, "grad_norm": 0.8985437187589743, "learning_rate": 9.983536280964463e-07, "loss": 0.1804, "step": 864 }, { "epoch": 0.06, "grad_norm": 0.5835744721955892, "learning_rate": 9.983452435842161e-07, "loss": 0.2285, "step": 865 }, { "epoch": 0.06, "grad_norm": 0.4454473512156972, "learning_rate": 9.98336837811576e-07, "loss": 0.1476, "step": 866 }, { "epoch": 0.06, "grad_norm": 0.5508829195580042, "learning_rate": 9.983284107788849e-07, "loss": 0.2139, "step": 867 }, { "epoch": 0.06, "grad_norm": 0.46970387871543356, "learning_rate": 9.983199624865019e-07, "loss": 0.2151, "step": 868 }, { "epoch": 0.06, "grad_norm": 0.3177141191160735, "learning_rate": 9.983114929347875e-07, "loss": 0.0975, "step": 869 }, { "epoch": 0.06, "grad_norm": 0.7368498456487158, "learning_rate": 9.98303002124103e-07, "loss": 0.2882, "step": 870 }, { "epoch": 0.06, "grad_norm": 0.4349333901871027, "learning_rate": 9.982944900548106e-07, "loss": 0.2377, "step": 871 }, { "epoch": 0.06, "grad_norm": 0.14115678908192322, "learning_rate": 9.982859567272738e-07, "loss": 0.0091, "step": 872 }, { "epoch": 0.06, "grad_norm": 0.37967986469455783, "learning_rate": 9.982774021418564e-07, "loss": 0.2847, "step": 873 }, { "epoch": 0.06, "grad_norm": 0.10136774015629568, "learning_rate": 9.982688262989235e-07, "loss": 0.0098, "step": 874 }, { "epoch": 0.06, "grad_norm": 0.3824427964589409, "learning_rate": 9.982602291988404e-07, "loss": 0.013, "step": 875 }, { "epoch": 0.06, "grad_norm": 0.749030640530025, "learning_rate": 9.982516108419745e-07, "loss": 0.2335, "step": 876 }, { "epoch": 0.06, "grad_norm": 0.5183944961616913, "learning_rate": 9.982429712286933e-07, "loss": 0.2373, "step": 877 }, { "epoch": 0.06, "grad_norm": 0.24214020912983317, "learning_rate": 9.982343103593654e-07, "loss": 0.0894, "step": 878 }, { "epoch": 0.06, "grad_norm": 0.4661896309772867, "learning_rate": 9.982256282343603e-07, "loss": 0.1716, "step": 879 }, { "epoch": 0.06, "grad_norm": 0.416058283139024, "learning_rate": 9.98216924854048e-07, "loss": 0.2509, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.0653442194630949, "learning_rate": 9.982082002188004e-07, "loss": 0.1884, "step": 881 }, { "epoch": 0.06, "grad_norm": 0.45427821871010937, "learning_rate": 9.981994543289895e-07, "loss": 0.0431, "step": 882 }, { "epoch": 0.06, "grad_norm": 0.4367716379240054, "learning_rate": 9.981906871849884e-07, "loss": 0.1209, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.1250590942922611, "learning_rate": 9.981818987871708e-07, "loss": 0.2175, "step": 884 }, { "epoch": 0.06, "grad_norm": 0.31067847406138993, "learning_rate": 9.98173089135912e-07, "loss": 0.019, "step": 885 }, { "epoch": 0.06, "grad_norm": 0.7127744823338241, "learning_rate": 9.98164258231588e-07, "loss": 0.4501, "step": 886 }, { "epoch": 0.06, "grad_norm": 0.5454463047150027, "learning_rate": 9.981554060745754e-07, "loss": 0.2822, "step": 887 }, { "epoch": 0.06, "grad_norm": 0.7384467024144157, "learning_rate": 9.981465326652515e-07, "loss": 0.1541, "step": 888 }, { "epoch": 0.06, "grad_norm": 0.2491223286847421, "learning_rate": 9.98137638003995e-07, "loss": 0.1961, "step": 889 }, { "epoch": 0.06, "grad_norm": 0.3060126226204753, "learning_rate": 9.981287220911857e-07, "loss": 0.1382, "step": 890 }, { "epoch": 0.06, "grad_norm": 0.3969689694949935, "learning_rate": 9.981197849272038e-07, "loss": 0.2625, "step": 891 }, { "epoch": 0.06, "grad_norm": 0.27307865038414464, "learning_rate": 9.981108265124303e-07, "loss": 0.2458, "step": 892 }, { "epoch": 0.06, "grad_norm": 0.6276549621281022, "learning_rate": 9.981018468472478e-07, "loss": 0.2624, "step": 893 }, { "epoch": 0.06, "grad_norm": 0.3623102171189811, "learning_rate": 9.98092845932039e-07, "loss": 0.1402, "step": 894 }, { "epoch": 0.06, "grad_norm": 0.6124999407880806, "learning_rate": 9.980838237671883e-07, "loss": 0.1638, "step": 895 }, { "epoch": 0.06, "grad_norm": 0.7329529551589599, "learning_rate": 9.980747803530804e-07, "loss": 0.1128, "step": 896 }, { "epoch": 0.06, "grad_norm": 0.22111671966192745, "learning_rate": 9.98065715690101e-07, "loss": 0.1134, "step": 897 }, { "epoch": 0.06, "grad_norm": 0.5603531292210565, "learning_rate": 9.98056629778637e-07, "loss": 0.117, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.1477932072556478, "learning_rate": 9.98047522619076e-07, "loss": 0.2562, "step": 899 }, { "epoch": 0.06, "grad_norm": 0.5238308145115796, "learning_rate": 9.980383942118065e-07, "loss": 0.1441, "step": 900 }, { "epoch": 0.06, "grad_norm": 0.5232892018074623, "learning_rate": 9.980292445572179e-07, "loss": 0.2693, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.5262895713718663, "learning_rate": 9.980200736557004e-07, "loss": 0.3365, "step": 902 }, { "epoch": 0.06, "grad_norm": 0.25404175535715173, "learning_rate": 9.980108815076455e-07, "loss": 0.0636, "step": 903 }, { "epoch": 0.06, "grad_norm": 0.14217573604570202, "learning_rate": 9.980016681134454e-07, "loss": 0.0077, "step": 904 }, { "epoch": 0.06, "grad_norm": 0.27577195936313037, "learning_rate": 9.979924334734929e-07, "loss": 0.1928, "step": 905 }, { "epoch": 0.06, "grad_norm": 0.44262819970244743, "learning_rate": 9.979831775881819e-07, "loss": 0.1663, "step": 906 }, { "epoch": 0.06, "grad_norm": 0.44267162341035937, "learning_rate": 9.979739004579077e-07, "loss": 0.1427, "step": 907 }, { "epoch": 0.06, "grad_norm": 0.6146554850708117, "learning_rate": 9.97964602083066e-07, "loss": 0.1161, "step": 908 }, { "epoch": 0.06, "grad_norm": 0.9272958776949122, "learning_rate": 9.979552824640531e-07, "loss": 0.1589, "step": 909 }, { "epoch": 0.06, "grad_norm": 0.21303002979582747, "learning_rate": 9.97945941601267e-07, "loss": 0.0127, "step": 910 }, { "epoch": 0.06, "grad_norm": 0.41194094422912997, "learning_rate": 9.979365794951058e-07, "loss": 0.1445, "step": 911 }, { "epoch": 0.06, "grad_norm": 0.46353461993783285, "learning_rate": 9.979271961459694e-07, "loss": 0.1449, "step": 912 }, { "epoch": 0.06, "grad_norm": 0.20054834219323922, "learning_rate": 9.979177915542578e-07, "loss": 0.0069, "step": 913 }, { "epoch": 0.06, "grad_norm": 0.2708297641113638, "learning_rate": 9.979083657203723e-07, "loss": 0.1858, "step": 914 }, { "epoch": 0.06, "grad_norm": 0.6109363257222682, "learning_rate": 9.978989186447146e-07, "loss": 0.1778, "step": 915 }, { "epoch": 0.06, "grad_norm": 0.5028189692038627, "learning_rate": 9.978894503276885e-07, "loss": 0.1534, "step": 916 }, { "epoch": 0.06, "grad_norm": 0.465761413142227, "learning_rate": 9.978799607696975e-07, "loss": 0.0239, "step": 917 }, { "epoch": 0.06, "grad_norm": 0.379612151226625, "learning_rate": 9.978704499711466e-07, "loss": 0.296, "step": 918 }, { "epoch": 0.06, "grad_norm": 0.36307380563278896, "learning_rate": 9.978609179324414e-07, "loss": 0.1662, "step": 919 }, { "epoch": 0.06, "grad_norm": 0.5121961709273829, "learning_rate": 9.978513646539886e-07, "loss": 0.1688, "step": 920 }, { "epoch": 0.06, "grad_norm": 0.5762208409026472, "learning_rate": 9.978417901361957e-07, "loss": 0.4547, "step": 921 }, { "epoch": 0.06, "grad_norm": 0.7750293229894867, "learning_rate": 9.978321943794715e-07, "loss": 0.2777, "step": 922 }, { "epoch": 0.06, "grad_norm": 0.1923763827464195, "learning_rate": 9.978225773842248e-07, "loss": 0.065, "step": 923 }, { "epoch": 0.06, "grad_norm": 0.21859226925702638, "learning_rate": 9.978129391508663e-07, "loss": 0.1765, "step": 924 }, { "epoch": 0.06, "grad_norm": 0.4779201707557506, "learning_rate": 9.97803279679807e-07, "loss": 0.4008, "step": 925 }, { "epoch": 0.06, "grad_norm": 0.4703071639715682, "learning_rate": 9.977935989714594e-07, "loss": 0.1467, "step": 926 }, { "epoch": 0.06, "grad_norm": 0.9150128592447756, "learning_rate": 9.97783897026236e-07, "loss": 0.4139, "step": 927 }, { "epoch": 0.06, "grad_norm": 0.6190921533181195, "learning_rate": 9.977741738445507e-07, "loss": 0.3267, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.2143610086216308, "learning_rate": 9.977644294268187e-07, "loss": 0.1645, "step": 929 }, { "epoch": 0.06, "grad_norm": 0.6172031300911652, "learning_rate": 9.977546637734556e-07, "loss": 0.1381, "step": 930 }, { "epoch": 0.06, "grad_norm": 0.6070008665072504, "learning_rate": 9.977448768848777e-07, "loss": 0.426, "step": 931 }, { "epoch": 0.06, "grad_norm": 0.5518807559900349, "learning_rate": 9.977350687615027e-07, "loss": 0.1151, "step": 932 }, { "epoch": 0.06, "grad_norm": 0.5046020486044156, "learning_rate": 9.977252394037492e-07, "loss": 0.1508, "step": 933 }, { "epoch": 0.06, "grad_norm": 0.21914492471035532, "learning_rate": 9.977153888120366e-07, "loss": 0.1708, "step": 934 }, { "epoch": 0.06, "grad_norm": 0.6464788062297534, "learning_rate": 9.977055169867848e-07, "loss": 0.3474, "step": 935 }, { "epoch": 0.06, "grad_norm": 0.33618861236827735, "learning_rate": 9.976956239284151e-07, "loss": 0.1884, "step": 936 }, { "epoch": 0.06, "grad_norm": 0.6457467960715154, "learning_rate": 9.976857096373499e-07, "loss": 0.1886, "step": 937 }, { "epoch": 0.06, "grad_norm": 0.6412063637630472, "learning_rate": 9.976757741140115e-07, "loss": 0.1398, "step": 938 }, { "epoch": 0.06, "grad_norm": 0.4207704803199653, "learning_rate": 9.976658173588243e-07, "loss": 0.2348, "step": 939 }, { "epoch": 0.06, "grad_norm": 0.46616906548217213, "learning_rate": 9.976558393722127e-07, "loss": 0.338, "step": 940 }, { "epoch": 0.06, "grad_norm": 0.4083200801513483, "learning_rate": 9.976458401546028e-07, "loss": 0.1094, "step": 941 }, { "epoch": 0.06, "grad_norm": 0.5406999137425461, "learning_rate": 9.97635819706421e-07, "loss": 0.1616, "step": 942 }, { "epoch": 0.06, "grad_norm": 0.3111717063994098, "learning_rate": 9.976257780280945e-07, "loss": 0.2098, "step": 943 }, { "epoch": 0.06, "grad_norm": 0.587578293857706, "learning_rate": 9.976157151200522e-07, "loss": 0.1851, "step": 944 }, { "epoch": 0.06, "grad_norm": 0.3849442286716842, "learning_rate": 9.97605630982723e-07, "loss": 0.3162, "step": 945 }, { "epoch": 0.06, "grad_norm": 0.40198988524598256, "learning_rate": 9.975955256165374e-07, "loss": 0.3014, "step": 946 }, { "epoch": 0.06, "grad_norm": 0.28406903973403613, "learning_rate": 9.975853990219264e-07, "loss": 0.053, "step": 947 }, { "epoch": 0.06, "grad_norm": 0.2876604500665912, "learning_rate": 9.975752511993218e-07, "loss": 0.1911, "step": 948 }, { "epoch": 0.06, "grad_norm": 0.46743378594402885, "learning_rate": 9.97565082149157e-07, "loss": 0.2186, "step": 949 }, { "epoch": 0.06, "grad_norm": 0.6255651517071519, "learning_rate": 9.975548918718653e-07, "loss": 0.2079, "step": 950 }, { "epoch": 0.06, "grad_norm": 0.5948784357183413, "learning_rate": 9.975446803678817e-07, "loss": 0.23, "step": 951 }, { "epoch": 0.06, "grad_norm": 0.7860708313336866, "learning_rate": 9.97534447637642e-07, "loss": 0.1739, "step": 952 }, { "epoch": 0.06, "grad_norm": 0.6335474927518889, "learning_rate": 9.975241936815826e-07, "loss": 0.3164, "step": 953 }, { "epoch": 0.06, "grad_norm": 0.34379862224453245, "learning_rate": 9.97513918500141e-07, "loss": 0.0258, "step": 954 }, { "epoch": 0.06, "grad_norm": 0.7167515453188937, "learning_rate": 9.975036220937553e-07, "loss": 0.2723, "step": 955 }, { "epoch": 0.06, "grad_norm": 0.6856200757666084, "learning_rate": 9.974933044628652e-07, "loss": 0.1118, "step": 956 }, { "epoch": 0.06, "grad_norm": 0.21543958132484572, "learning_rate": 9.974829656079104e-07, "loss": 0.1057, "step": 957 }, { "epoch": 0.06, "grad_norm": 1.1921174412206408, "learning_rate": 9.974726055293325e-07, "loss": 0.1333, "step": 958 }, { "epoch": 0.06, "grad_norm": 0.33487542565410683, "learning_rate": 9.97462224227573e-07, "loss": 0.0938, "step": 959 }, { "epoch": 0.06, "grad_norm": 0.6336804368411803, "learning_rate": 9.97451821703075e-07, "loss": 0.1854, "step": 960 }, { "epoch": 0.06, "grad_norm": 0.5751970727441501, "learning_rate": 9.974413979562823e-07, "loss": 0.2631, "step": 961 }, { "epoch": 0.06, "grad_norm": 0.6658528447001772, "learning_rate": 9.974309529876396e-07, "loss": 0.1177, "step": 962 }, { "epoch": 0.06, "grad_norm": 0.7153463446728332, "learning_rate": 9.974204867975926e-07, "loss": 0.2134, "step": 963 }, { "epoch": 0.06, "grad_norm": 0.556809473274764, "learning_rate": 9.974099993865877e-07, "loss": 0.0977, "step": 964 }, { "epoch": 0.06, "grad_norm": 0.4850417655109695, "learning_rate": 9.97399490755072e-07, "loss": 0.3, "step": 965 }, { "epoch": 0.06, "grad_norm": 0.393049910278882, "learning_rate": 9.973889609034944e-07, "loss": 0.1262, "step": 966 }, { "epoch": 0.06, "grad_norm": 0.21258533130898213, "learning_rate": 9.973784098323038e-07, "loss": 0.0703, "step": 967 }, { "epoch": 0.06, "grad_norm": 0.7971081695534867, "learning_rate": 9.973678375419504e-07, "loss": 0.1148, "step": 968 }, { "epoch": 0.06, "grad_norm": 0.6055105582764296, "learning_rate": 9.97357244032885e-07, "loss": 0.1464, "step": 969 }, { "epoch": 0.06, "grad_norm": 0.98527191471172, "learning_rate": 9.973466293055602e-07, "loss": 0.1548, "step": 970 }, { "epoch": 0.06, "grad_norm": 0.8005624121428562, "learning_rate": 9.97335993360428e-07, "loss": 0.2071, "step": 971 }, { "epoch": 0.06, "grad_norm": 0.7641223533278209, "learning_rate": 9.973253361979427e-07, "loss": 0.2064, "step": 972 }, { "epoch": 0.06, "grad_norm": 0.5262702698149297, "learning_rate": 9.973146578185588e-07, "loss": 0.3942, "step": 973 }, { "epoch": 0.06, "grad_norm": 0.44707227252640414, "learning_rate": 9.973039582227319e-07, "loss": 0.5105, "step": 974 }, { "epoch": 0.06, "grad_norm": 1.459651156249005, "learning_rate": 9.972932374109182e-07, "loss": 0.3136, "step": 975 }, { "epoch": 0.06, "grad_norm": 0.8895166874354026, "learning_rate": 9.972824953835756e-07, "loss": 0.2322, "step": 976 }, { "epoch": 0.06, "grad_norm": 0.29423753592284146, "learning_rate": 9.97271732141162e-07, "loss": 0.1969, "step": 977 }, { "epoch": 0.06, "grad_norm": 0.8961796479193413, "learning_rate": 9.972609476841365e-07, "loss": 0.0635, "step": 978 }, { "epoch": 0.06, "grad_norm": 0.27029945533215904, "learning_rate": 9.972501420129595e-07, "loss": 0.0962, "step": 979 }, { "epoch": 0.06, "grad_norm": 0.4174636997470639, "learning_rate": 9.97239315128092e-07, "loss": 0.1965, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.5576671873511183, "learning_rate": 9.972284670299955e-07, "loss": 0.1702, "step": 981 }, { "epoch": 0.06, "grad_norm": 0.8479045439406689, "learning_rate": 9.972175977191332e-07, "loss": 0.4184, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.4090059840814684, "learning_rate": 9.972067071959685e-07, "loss": 0.5159, "step": 983 }, { "epoch": 0.06, "grad_norm": 0.7488220660318784, "learning_rate": 9.971957954609662e-07, "loss": 0.2249, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.0557110648302839, "learning_rate": 9.971848625145919e-07, "loss": 0.1822, "step": 985 }, { "epoch": 0.06, "grad_norm": 0.6860523194772445, "learning_rate": 9.97173908357312e-07, "loss": 0.2638, "step": 986 }, { "epoch": 0.06, "grad_norm": 0.5146231791989171, "learning_rate": 9.971629329895934e-07, "loss": 0.2392, "step": 987 }, { "epoch": 0.06, "grad_norm": 0.6204843920733871, "learning_rate": 9.971519364119048e-07, "loss": 0.1253, "step": 988 }, { "epoch": 0.06, "grad_norm": 0.6831185038719864, "learning_rate": 9.971409186247151e-07, "loss": 0.1564, "step": 989 }, { "epoch": 0.06, "grad_norm": 0.5145580830055223, "learning_rate": 9.971298796284947e-07, "loss": 0.1791, "step": 990 }, { "epoch": 0.06, "grad_norm": 0.3080111600263855, "learning_rate": 9.97118819423714e-07, "loss": 0.1089, "step": 991 }, { "epoch": 0.06, "grad_norm": 1.026427565716047, "learning_rate": 9.97107738010845e-07, "loss": 0.1174, "step": 992 }, { "epoch": 0.06, "grad_norm": 0.556784534737722, "learning_rate": 9.97096635390361e-07, "loss": 0.0677, "step": 993 }, { "epoch": 0.06, "grad_norm": 0.3100620984644595, "learning_rate": 9.97085511562735e-07, "loss": 0.1863, "step": 994 }, { "epoch": 0.06, "grad_norm": 0.6374941098365919, "learning_rate": 9.970743665284418e-07, "loss": 0.0677, "step": 995 }, { "epoch": 0.06, "grad_norm": 0.4408399328420816, "learning_rate": 9.97063200287957e-07, "loss": 0.0862, "step": 996 }, { "epoch": 0.06, "grad_norm": 0.4235597994350044, "learning_rate": 9.970520128417567e-07, "loss": 0.3549, "step": 997 }, { "epoch": 0.06, "grad_norm": 0.5956360358683012, "learning_rate": 9.970408041903185e-07, "loss": 0.165, "step": 998 }, { "epoch": 0.06, "grad_norm": 0.5629107483257062, "learning_rate": 9.970295743341205e-07, "loss": 0.1385, "step": 999 }, { "epoch": 0.06, "grad_norm": 0.2858967054426137, "learning_rate": 9.970183232736414e-07, "loss": 0.0118, "step": 1000 }, { "epoch": 0.06, "grad_norm": 0.5001823321468504, "learning_rate": 9.970070510093616e-07, "loss": 0.3272, "step": 1001 }, { "epoch": 0.06, "grad_norm": 0.8804405518087354, "learning_rate": 9.96995757541762e-07, "loss": 0.3571, "step": 1002 }, { "epoch": 0.06, "grad_norm": 0.5344573086959735, "learning_rate": 9.969844428713242e-07, "loss": 0.2215, "step": 1003 }, { "epoch": 0.06, "grad_norm": 1.7997486073780382, "learning_rate": 9.96973106998531e-07, "loss": 0.2332, "step": 1004 }, { "epoch": 0.06, "grad_norm": 0.6109746072637622, "learning_rate": 9.96961749923866e-07, "loss": 0.4813, "step": 1005 }, { "epoch": 0.06, "grad_norm": 0.31869720912455685, "learning_rate": 9.969503716478138e-07, "loss": 0.2018, "step": 1006 }, { "epoch": 0.06, "grad_norm": 0.5995328400952173, "learning_rate": 9.9693897217086e-07, "loss": 0.1437, "step": 1007 }, { "epoch": 0.06, "grad_norm": 1.1796887188031795, "learning_rate": 9.969275514934903e-07, "loss": 0.1808, "step": 1008 }, { "epoch": 0.06, "grad_norm": 0.5658398713474428, "learning_rate": 9.969161096161924e-07, "loss": 0.1765, "step": 1009 }, { "epoch": 0.06, "grad_norm": 0.4490361150588233, "learning_rate": 9.969046465394544e-07, "loss": 0.1756, "step": 1010 }, { "epoch": 0.06, "grad_norm": 0.5629561790593844, "learning_rate": 9.968931622637651e-07, "loss": 0.0333, "step": 1011 }, { "epoch": 0.06, "grad_norm": 0.5323933653130326, "learning_rate": 9.968816567896148e-07, "loss": 0.2166, "step": 1012 }, { "epoch": 0.06, "grad_norm": 0.3418637697195922, "learning_rate": 9.96870130117494e-07, "loss": 0.2775, "step": 1013 }, { "epoch": 0.06, "grad_norm": 0.5571011756013858, "learning_rate": 9.968585822478948e-07, "loss": 0.3092, "step": 1014 }, { "epoch": 0.06, "grad_norm": 1.470738521801038, "learning_rate": 9.968470131813096e-07, "loss": 0.21, "step": 1015 }, { "epoch": 0.06, "grad_norm": 0.2805505635889866, "learning_rate": 9.968354229182319e-07, "loss": 0.0548, "step": 1016 }, { "epoch": 0.06, "grad_norm": 0.6034060234356918, "learning_rate": 9.968238114591565e-07, "loss": 0.3312, "step": 1017 }, { "epoch": 0.06, "grad_norm": 0.5386064080951632, "learning_rate": 9.968121788045784e-07, "loss": 0.2165, "step": 1018 }, { "epoch": 0.06, "grad_norm": 0.31641615699874504, "learning_rate": 9.968005249549942e-07, "loss": 0.0959, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.3656653627529067, "learning_rate": 9.967888499109007e-07, "loss": 0.2005, "step": 1020 }, { "epoch": 0.07, "grad_norm": 0.34502538080044065, "learning_rate": 9.967771536727963e-07, "loss": 0.2918, "step": 1021 }, { "epoch": 0.07, "grad_norm": 0.6263498899361163, "learning_rate": 9.967654362411798e-07, "loss": 0.3505, "step": 1022 }, { "epoch": 0.07, "grad_norm": 0.22916347440126475, "learning_rate": 9.967536976165515e-07, "loss": 0.0495, "step": 1023 }, { "epoch": 0.07, "grad_norm": 0.36512885889328833, "learning_rate": 9.967419377994116e-07, "loss": 0.2453, "step": 1024 }, { "epoch": 0.07, "grad_norm": 0.6159879197644346, "learning_rate": 9.967301567902619e-07, "loss": 0.0777, "step": 1025 }, { "epoch": 0.07, "grad_norm": 0.5848397426298377, "learning_rate": 9.967183545896055e-07, "loss": 0.1947, "step": 1026 }, { "epoch": 0.07, "grad_norm": 0.3063476765681408, "learning_rate": 9.967065311979452e-07, "loss": 0.0198, "step": 1027 }, { "epoch": 0.07, "grad_norm": 0.12462664133673665, "learning_rate": 9.96694686615786e-07, "loss": 0.0142, "step": 1028 }, { "epoch": 0.07, "grad_norm": 0.33720844756144086, "learning_rate": 9.96682820843633e-07, "loss": 0.1659, "step": 1029 }, { "epoch": 0.07, "grad_norm": 0.5796779985194487, "learning_rate": 9.966709338819924e-07, "loss": 0.1904, "step": 1030 }, { "epoch": 0.07, "grad_norm": 0.4410799052200912, "learning_rate": 9.966590257313713e-07, "loss": 0.2489, "step": 1031 }, { "epoch": 0.07, "grad_norm": 0.5506144502649098, "learning_rate": 9.966470963922778e-07, "loss": 0.1857, "step": 1032 }, { "epoch": 0.07, "grad_norm": 0.8245813418322644, "learning_rate": 9.966351458652207e-07, "loss": 0.0588, "step": 1033 }, { "epoch": 0.07, "grad_norm": 0.7573390896270649, "learning_rate": 9.9662317415071e-07, "loss": 0.2642, "step": 1034 }, { "epoch": 0.07, "grad_norm": 0.6741845741174953, "learning_rate": 9.96611181249256e-07, "loss": 0.1297, "step": 1035 }, { "epoch": 0.07, "grad_norm": 0.5176367178481315, "learning_rate": 9.965991671613712e-07, "loss": 0.1497, "step": 1036 }, { "epoch": 0.07, "grad_norm": 0.24016814478297469, "learning_rate": 9.965871318875674e-07, "loss": 0.1805, "step": 1037 }, { "epoch": 0.07, "grad_norm": 0.5975343726265553, "learning_rate": 9.965750754283581e-07, "loss": 0.3547, "step": 1038 }, { "epoch": 0.07, "grad_norm": 0.48165162801011546, "learning_rate": 9.965629977842583e-07, "loss": 0.0807, "step": 1039 }, { "epoch": 0.07, "grad_norm": 0.3968531251659212, "learning_rate": 9.965508989557825e-07, "loss": 0.3349, "step": 1040 }, { "epoch": 0.07, "grad_norm": 0.27580191225200595, "learning_rate": 9.965387789434473e-07, "loss": 0.011, "step": 1041 }, { "epoch": 0.07, "grad_norm": 0.44955503042974304, "learning_rate": 9.965266377477694e-07, "loss": 0.2546, "step": 1042 }, { "epoch": 0.07, "grad_norm": 0.3241293984826068, "learning_rate": 9.965144753692672e-07, "loss": 0.0935, "step": 1043 }, { "epoch": 0.07, "grad_norm": 0.3984129298375122, "learning_rate": 9.965022918084591e-07, "loss": 0.0321, "step": 1044 }, { "epoch": 0.07, "grad_norm": 0.22176833947800315, "learning_rate": 9.964900870658653e-07, "loss": 0.1971, "step": 1045 }, { "epoch": 0.07, "grad_norm": 0.4374868508187839, "learning_rate": 9.964778611420063e-07, "loss": 0.1572, "step": 1046 }, { "epoch": 0.07, "grad_norm": 0.6606499413383291, "learning_rate": 9.964656140374038e-07, "loss": 0.1915, "step": 1047 }, { "epoch": 0.07, "grad_norm": 0.6208050665040733, "learning_rate": 9.9645334575258e-07, "loss": 0.0846, "step": 1048 }, { "epoch": 0.07, "grad_norm": 0.3445588204772651, "learning_rate": 9.964410562880587e-07, "loss": 0.0497, "step": 1049 }, { "epoch": 0.07, "grad_norm": 0.4460992279579737, "learning_rate": 9.964287456443639e-07, "loss": 0.3039, "step": 1050 }, { "epoch": 0.07, "grad_norm": 0.3220445790766819, "learning_rate": 9.964164138220207e-07, "loss": 0.0138, "step": 1051 }, { "epoch": 0.07, "grad_norm": 0.6758744160821144, "learning_rate": 9.964040608215557e-07, "loss": 0.2955, "step": 1052 }, { "epoch": 0.07, "grad_norm": 0.3339475440618883, "learning_rate": 9.963916866434952e-07, "loss": 0.0671, "step": 1053 }, { "epoch": 0.07, "grad_norm": 1.1073303272937873, "learning_rate": 9.963792912883676e-07, "loss": 0.2129, "step": 1054 }, { "epoch": 0.07, "grad_norm": 0.6059768593260848, "learning_rate": 9.963668747567018e-07, "loss": 0.2493, "step": 1055 }, { "epoch": 0.07, "grad_norm": 0.4349080240496784, "learning_rate": 9.963544370490268e-07, "loss": 0.408, "step": 1056 }, { "epoch": 0.07, "grad_norm": 0.6005730259160773, "learning_rate": 9.963419781658742e-07, "loss": 0.095, "step": 1057 }, { "epoch": 0.07, "grad_norm": 0.6750176494329789, "learning_rate": 9.963294981077747e-07, "loss": 0.3842, "step": 1058 }, { "epoch": 0.07, "grad_norm": 0.3515007208986595, "learning_rate": 9.963169968752613e-07, "loss": 0.128, "step": 1059 }, { "epoch": 0.07, "grad_norm": 0.8845299459356517, "learning_rate": 9.96304474468867e-07, "loss": 0.3846, "step": 1060 }, { "epoch": 0.07, "grad_norm": 0.7393735816661642, "learning_rate": 9.962919308891263e-07, "loss": 0.3981, "step": 1061 }, { "epoch": 0.07, "grad_norm": 0.44839149358820335, "learning_rate": 9.96279366136574e-07, "loss": 0.2503, "step": 1062 }, { "epoch": 0.07, "grad_norm": 0.5404046180966962, "learning_rate": 9.962667802117463e-07, "loss": 0.1419, "step": 1063 }, { "epoch": 0.07, "grad_norm": 0.16238469118518242, "learning_rate": 9.962541731151802e-07, "loss": 0.0093, "step": 1064 }, { "epoch": 0.07, "grad_norm": 0.5738195741439209, "learning_rate": 9.962415448474133e-07, "loss": 0.2628, "step": 1065 }, { "epoch": 0.07, "grad_norm": 0.25126688476863107, "learning_rate": 9.962288954089845e-07, "loss": 0.0424, "step": 1066 }, { "epoch": 0.07, "grad_norm": 0.35598606064613925, "learning_rate": 9.962162248004337e-07, "loss": 0.1254, "step": 1067 }, { "epoch": 0.07, "grad_norm": 0.46534900163923154, "learning_rate": 9.96203533022301e-07, "loss": 0.0417, "step": 1068 }, { "epoch": 0.07, "grad_norm": 0.469743894149503, "learning_rate": 9.961908200751283e-07, "loss": 0.1853, "step": 1069 }, { "epoch": 0.07, "grad_norm": 0.5671191935457041, "learning_rate": 9.961780859594578e-07, "loss": 0.1609, "step": 1070 }, { "epoch": 0.07, "grad_norm": 0.5871653801245706, "learning_rate": 9.961653306758325e-07, "loss": 0.2998, "step": 1071 }, { "epoch": 0.07, "grad_norm": 0.5748645345770612, "learning_rate": 9.961525542247968e-07, "loss": 0.1314, "step": 1072 }, { "epoch": 0.07, "grad_norm": 0.8097576392881596, "learning_rate": 9.961397566068958e-07, "loss": 0.2395, "step": 1073 }, { "epoch": 0.07, "grad_norm": 0.3934771452321142, "learning_rate": 9.961269378226755e-07, "loss": 0.0575, "step": 1074 }, { "epoch": 0.07, "grad_norm": 0.7713689273114455, "learning_rate": 9.961140978726827e-07, "loss": 0.3685, "step": 1075 }, { "epoch": 0.07, "grad_norm": 0.6497760757747596, "learning_rate": 9.96101236757465e-07, "loss": 0.2248, "step": 1076 }, { "epoch": 0.07, "grad_norm": 1.141576142325507, "learning_rate": 9.960883544775713e-07, "loss": 0.1828, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.348158927625783, "learning_rate": 9.960754510335513e-07, "loss": 0.2588, "step": 1078 }, { "epoch": 0.07, "grad_norm": 0.7260341348660807, "learning_rate": 9.960625264259552e-07, "loss": 0.3234, "step": 1079 }, { "epoch": 0.07, "grad_norm": 0.23468077749983832, "learning_rate": 9.960495806553345e-07, "loss": 0.1675, "step": 1080 }, { "epoch": 0.07, "grad_norm": 1.140786165743733, "learning_rate": 9.960366137222413e-07, "loss": 0.0529, "step": 1081 }, { "epoch": 0.07, "grad_norm": 0.5568296291928382, "learning_rate": 9.960236256272293e-07, "loss": 0.1255, "step": 1082 }, { "epoch": 0.07, "grad_norm": 0.5604993219180562, "learning_rate": 9.960106163708522e-07, "loss": 0.3344, "step": 1083 }, { "epoch": 0.07, "grad_norm": 0.4688437202999613, "learning_rate": 9.959975859536652e-07, "loss": 0.0343, "step": 1084 }, { "epoch": 0.07, "grad_norm": 0.42635694146014674, "learning_rate": 9.95984534376224e-07, "loss": 0.1534, "step": 1085 }, { "epoch": 0.07, "grad_norm": 0.17090463730875813, "learning_rate": 9.959714616390855e-07, "loss": 0.0088, "step": 1086 }, { "epoch": 0.07, "grad_norm": 0.5101292094283796, "learning_rate": 9.959583677428074e-07, "loss": 0.2843, "step": 1087 }, { "epoch": 0.07, "grad_norm": 0.6069779617803412, "learning_rate": 9.959452526879484e-07, "loss": 0.1746, "step": 1088 }, { "epoch": 0.07, "grad_norm": 1.1833347923100763, "learning_rate": 9.959321164750678e-07, "loss": 0.2469, "step": 1089 }, { "epoch": 0.07, "grad_norm": 0.5081377136538748, "learning_rate": 9.959189591047264e-07, "loss": 0.3232, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.4076384920062124, "learning_rate": 9.959057805774851e-07, "loss": 0.2807, "step": 1091 }, { "epoch": 0.07, "grad_norm": 0.3374042679952573, "learning_rate": 9.958925808939063e-07, "loss": 0.1914, "step": 1092 }, { "epoch": 0.07, "grad_norm": 0.6500977046862573, "learning_rate": 9.958793600545531e-07, "loss": 0.4011, "step": 1093 }, { "epoch": 0.07, "grad_norm": 0.17172487725261068, "learning_rate": 9.958661180599898e-07, "loss": 0.0724, "step": 1094 }, { "epoch": 0.07, "grad_norm": 0.5200887718325509, "learning_rate": 9.95852854910781e-07, "loss": 0.109, "step": 1095 }, { "epoch": 0.07, "grad_norm": 1.0243269067900986, "learning_rate": 9.958395706074925e-07, "loss": 0.3678, "step": 1096 }, { "epoch": 0.07, "grad_norm": 0.21424244960150624, "learning_rate": 9.958262651506913e-07, "loss": 0.0498, "step": 1097 }, { "epoch": 0.07, "grad_norm": 0.43342568758247785, "learning_rate": 9.958129385409447e-07, "loss": 0.2587, "step": 1098 }, { "epoch": 0.07, "grad_norm": 0.44996930849030936, "learning_rate": 9.957995907788217e-07, "loss": 0.0959, "step": 1099 }, { "epoch": 0.07, "grad_norm": 0.3958412973964965, "learning_rate": 9.95786221864891e-07, "loss": 0.2296, "step": 1100 }, { "epoch": 0.07, "grad_norm": 0.6945523383410414, "learning_rate": 9.95772831799724e-07, "loss": 0.2633, "step": 1101 }, { "epoch": 0.07, "grad_norm": 0.5773111265925281, "learning_rate": 9.95759420583891e-07, "loss": 0.0299, "step": 1102 }, { "epoch": 0.07, "grad_norm": 0.49091192136696843, "learning_rate": 9.957459882179647e-07, "loss": 0.2164, "step": 1103 }, { "epoch": 0.07, "grad_norm": 0.6774248050977482, "learning_rate": 9.957325347025178e-07, "loss": 0.3325, "step": 1104 }, { "epoch": 0.07, "grad_norm": 0.5824953378957358, "learning_rate": 9.957190600381245e-07, "loss": 0.2733, "step": 1105 }, { "epoch": 0.07, "grad_norm": 0.565278403467531, "learning_rate": 9.957055642253596e-07, "loss": 0.376, "step": 1106 }, { "epoch": 0.07, "grad_norm": 0.27982614461170324, "learning_rate": 9.95692047264799e-07, "loss": 0.0116, "step": 1107 }, { "epoch": 0.07, "grad_norm": 0.6146706873778525, "learning_rate": 9.956785091570189e-07, "loss": 0.2508, "step": 1108 }, { "epoch": 0.07, "grad_norm": 0.6467482493773037, "learning_rate": 9.956649499025973e-07, "loss": 0.2231, "step": 1109 }, { "epoch": 0.07, "grad_norm": 0.41524716018566904, "learning_rate": 9.956513695021124e-07, "loss": 0.3621, "step": 1110 }, { "epoch": 0.07, "grad_norm": 0.39463424844312994, "learning_rate": 9.956377679561439e-07, "loss": 0.3311, "step": 1111 }, { "epoch": 0.07, "grad_norm": 0.20254282800084308, "learning_rate": 9.956241452652717e-07, "loss": 0.1893, "step": 1112 }, { "epoch": 0.07, "grad_norm": 0.6996924986838188, "learning_rate": 9.95610501430077e-07, "loss": 0.2643, "step": 1113 }, { "epoch": 0.07, "grad_norm": 0.45664575882710157, "learning_rate": 9.955968364511425e-07, "loss": 0.2615, "step": 1114 }, { "epoch": 0.07, "grad_norm": 0.4008275526455585, "learning_rate": 9.955831503290502e-07, "loss": 0.2214, "step": 1115 }, { "epoch": 0.07, "grad_norm": 0.6087192129796571, "learning_rate": 9.955694430643847e-07, "loss": 0.2212, "step": 1116 }, { "epoch": 0.07, "grad_norm": 0.30902609825608435, "learning_rate": 9.955557146577305e-07, "loss": 0.2269, "step": 1117 }, { "epoch": 0.07, "grad_norm": 0.6729231869763611, "learning_rate": 9.955419651096733e-07, "loss": 0.155, "step": 1118 }, { "epoch": 0.07, "grad_norm": 0.6423072684982292, "learning_rate": 9.955281944207996e-07, "loss": 0.2317, "step": 1119 }, { "epoch": 0.07, "grad_norm": 0.3614719727445751, "learning_rate": 9.955144025916972e-07, "loss": 0.0284, "step": 1120 }, { "epoch": 0.07, "grad_norm": 0.31847134873224486, "learning_rate": 9.955005896229541e-07, "loss": 0.059, "step": 1121 }, { "epoch": 0.07, "grad_norm": 0.6640613090116112, "learning_rate": 9.954867555151599e-07, "loss": 0.1176, "step": 1122 }, { "epoch": 0.07, "grad_norm": 0.2870218793153667, "learning_rate": 9.954729002689046e-07, "loss": 0.1116, "step": 1123 }, { "epoch": 0.07, "grad_norm": 2.4299444902143463, "learning_rate": 9.954590238847792e-07, "loss": 0.177, "step": 1124 }, { "epoch": 0.07, "grad_norm": 0.5042856873929278, "learning_rate": 9.954451263633761e-07, "loss": 0.2856, "step": 1125 }, { "epoch": 0.07, "grad_norm": 0.5624574567029191, "learning_rate": 9.95431207705288e-07, "loss": 0.2922, "step": 1126 }, { "epoch": 0.07, "grad_norm": 0.7033341472119826, "learning_rate": 9.954172679111083e-07, "loss": 0.19, "step": 1127 }, { "epoch": 0.07, "grad_norm": 0.5166063605840293, "learning_rate": 9.954033069814323e-07, "loss": 0.1983, "step": 1128 }, { "epoch": 0.07, "grad_norm": 0.3111036588703034, "learning_rate": 9.953893249168552e-07, "loss": 0.2572, "step": 1129 }, { "epoch": 0.07, "grad_norm": 0.5850201954455693, "learning_rate": 9.953753217179737e-07, "loss": 0.3981, "step": 1130 }, { "epoch": 0.07, "grad_norm": 0.31148056278206443, "learning_rate": 9.95361297385385e-07, "loss": 0.1387, "step": 1131 }, { "epoch": 0.07, "grad_norm": 0.5011527650666843, "learning_rate": 9.953472519196876e-07, "loss": 0.1981, "step": 1132 }, { "epoch": 0.07, "grad_norm": 0.2952964681892801, "learning_rate": 9.95333185321481e-07, "loss": 0.0314, "step": 1133 }, { "epoch": 0.07, "grad_norm": 0.2969760901552938, "learning_rate": 9.953190975913645e-07, "loss": 0.0142, "step": 1134 }, { "epoch": 0.07, "grad_norm": 0.303357352799507, "learning_rate": 9.953049887299399e-07, "loss": 0.0174, "step": 1135 }, { "epoch": 0.07, "grad_norm": 0.5223668998963273, "learning_rate": 9.952908587378088e-07, "loss": 0.0619, "step": 1136 }, { "epoch": 0.07, "grad_norm": 0.4011876020727392, "learning_rate": 9.95276707615574e-07, "loss": 0.1629, "step": 1137 }, { "epoch": 0.07, "grad_norm": 0.6661033093782415, "learning_rate": 9.95262535363839e-07, "loss": 0.3111, "step": 1138 }, { "epoch": 0.07, "grad_norm": 0.20087502918394676, "learning_rate": 9.952483419832087e-07, "loss": 0.0984, "step": 1139 }, { "epoch": 0.07, "grad_norm": 0.5523294888749696, "learning_rate": 9.952341274742885e-07, "loss": 0.3158, "step": 1140 }, { "epoch": 0.07, "grad_norm": 0.4577651432739096, "learning_rate": 9.952198918376852e-07, "loss": 0.233, "step": 1141 }, { "epoch": 0.07, "grad_norm": 0.6495563574623383, "learning_rate": 9.952056350740055e-07, "loss": 0.231, "step": 1142 }, { "epoch": 0.07, "grad_norm": 0.1953624200184034, "learning_rate": 9.95191357183858e-07, "loss": 0.1671, "step": 1143 }, { "epoch": 0.07, "grad_norm": 0.656737350758839, "learning_rate": 9.951770581678517e-07, "loss": 0.2997, "step": 1144 }, { "epoch": 0.07, "grad_norm": 0.4068216497777626, "learning_rate": 9.951627380265966e-07, "loss": 0.4044, "step": 1145 }, { "epoch": 0.07, "grad_norm": 0.7505436657249192, "learning_rate": 9.95148396760704e-07, "loss": 0.1125, "step": 1146 }, { "epoch": 0.07, "grad_norm": 2.649769295135062, "learning_rate": 9.95134034370785e-07, "loss": 0.0573, "step": 1147 }, { "epoch": 0.07, "grad_norm": 0.3886561767416595, "learning_rate": 9.95119650857453e-07, "loss": 0.0267, "step": 1148 }, { "epoch": 0.07, "grad_norm": 0.3897086233521129, "learning_rate": 9.951052462213214e-07, "loss": 0.133, "step": 1149 }, { "epoch": 0.07, "grad_norm": 0.29633369766252005, "learning_rate": 9.950908204630047e-07, "loss": 0.1935, "step": 1150 }, { "epoch": 0.07, "grad_norm": 0.4656970089994235, "learning_rate": 9.950763735831182e-07, "loss": 0.3169, "step": 1151 }, { "epoch": 0.07, "grad_norm": 0.3022354535153058, "learning_rate": 9.950619055822786e-07, "loss": 0.0651, "step": 1152 }, { "epoch": 0.07, "grad_norm": 0.7774838289878337, "learning_rate": 9.950474164611028e-07, "loss": 0.1482, "step": 1153 }, { "epoch": 0.07, "grad_norm": 0.5852672827206604, "learning_rate": 9.95032906220209e-07, "loss": 0.1979, "step": 1154 }, { "epoch": 0.07, "grad_norm": 0.4731884099599208, "learning_rate": 9.950183748602163e-07, "loss": 0.2213, "step": 1155 }, { "epoch": 0.07, "grad_norm": 0.6184415335068383, "learning_rate": 9.950038223817447e-07, "loss": 0.073, "step": 1156 }, { "epoch": 0.07, "grad_norm": 0.47959352443439546, "learning_rate": 9.949892487854149e-07, "loss": 0.252, "step": 1157 }, { "epoch": 0.07, "grad_norm": 0.7619017568194408, "learning_rate": 9.949746540718487e-07, "loss": 0.2122, "step": 1158 }, { "epoch": 0.07, "grad_norm": 0.4304366317713998, "learning_rate": 9.949600382416685e-07, "loss": 0.0904, "step": 1159 }, { "epoch": 0.07, "grad_norm": 0.6021043497073343, "learning_rate": 9.949454012954985e-07, "loss": 0.1185, "step": 1160 }, { "epoch": 0.07, "grad_norm": 0.6665744147667394, "learning_rate": 9.949307432339624e-07, "loss": 0.1825, "step": 1161 }, { "epoch": 0.07, "grad_norm": 0.47650417959695823, "learning_rate": 9.94916064057686e-07, "loss": 0.3691, "step": 1162 }, { "epoch": 0.07, "grad_norm": 0.15356554982909978, "learning_rate": 9.949013637672953e-07, "loss": 0.1171, "step": 1163 }, { "epoch": 0.07, "grad_norm": 0.5157615623872764, "learning_rate": 9.948866423634176e-07, "loss": 0.1202, "step": 1164 }, { "epoch": 0.07, "grad_norm": 0.4315926796169466, "learning_rate": 9.94871899846681e-07, "loss": 0.152, "step": 1165 }, { "epoch": 0.07, "grad_norm": 0.40499839612104527, "learning_rate": 9.94857136217714e-07, "loss": 0.3212, "step": 1166 }, { "epoch": 0.07, "grad_norm": 0.2255758734858464, "learning_rate": 9.948423514771472e-07, "loss": 0.1532, "step": 1167 }, { "epoch": 0.07, "grad_norm": 0.6682922622170951, "learning_rate": 9.948275456256108e-07, "loss": 0.2367, "step": 1168 }, { "epoch": 0.07, "grad_norm": 0.7060494908892788, "learning_rate": 9.948127186637365e-07, "loss": 0.0218, "step": 1169 }, { "epoch": 0.07, "grad_norm": 0.3161321311597232, "learning_rate": 9.94797870592157e-07, "loss": 0.2219, "step": 1170 }, { "epoch": 0.07, "grad_norm": 0.3369016471937861, "learning_rate": 9.947830014115056e-07, "loss": 0.2064, "step": 1171 }, { "epoch": 0.07, "grad_norm": 1.1991222681659008, "learning_rate": 9.947681111224166e-07, "loss": 0.4302, "step": 1172 }, { "epoch": 0.07, "grad_norm": 1.4410691788944219, "learning_rate": 9.947531997255256e-07, "loss": 0.2865, "step": 1173 }, { "epoch": 0.07, "grad_norm": 0.3624040415600144, "learning_rate": 9.947382672214684e-07, "loss": 0.2802, "step": 1174 }, { "epoch": 0.07, "grad_norm": 0.5548278217941871, "learning_rate": 9.947233136108822e-07, "loss": 0.1076, "step": 1175 }, { "epoch": 0.07, "grad_norm": 0.5653568715153751, "learning_rate": 9.94708338894405e-07, "loss": 0.3068, "step": 1176 }, { "epoch": 0.08, "grad_norm": 0.18076817660824668, "learning_rate": 9.946933430726753e-07, "loss": 0.0119, "step": 1177 }, { "epoch": 0.08, "grad_norm": 0.4426239335022008, "learning_rate": 9.946783261463333e-07, "loss": 0.1841, "step": 1178 }, { "epoch": 0.08, "grad_norm": 0.582521438640547, "learning_rate": 9.946632881160196e-07, "loss": 0.2756, "step": 1179 }, { "epoch": 0.08, "grad_norm": 0.7782034172939633, "learning_rate": 9.946482289823755e-07, "loss": 0.1649, "step": 1180 }, { "epoch": 0.08, "grad_norm": 0.3751614071969384, "learning_rate": 9.946331487460435e-07, "loss": 0.048, "step": 1181 }, { "epoch": 0.08, "grad_norm": 0.6767317790794916, "learning_rate": 9.946180474076673e-07, "loss": 0.1142, "step": 1182 }, { "epoch": 0.08, "grad_norm": 0.40650271265656646, "learning_rate": 9.946029249678906e-07, "loss": 0.0159, "step": 1183 }, { "epoch": 0.08, "grad_norm": 0.4988330139152289, "learning_rate": 9.94587781427359e-07, "loss": 0.1236, "step": 1184 }, { "epoch": 0.08, "grad_norm": 0.741740164161934, "learning_rate": 9.945726167867184e-07, "loss": 0.3357, "step": 1185 }, { "epoch": 0.08, "grad_norm": 0.6184543088109902, "learning_rate": 9.945574310466159e-07, "loss": 0.2253, "step": 1186 }, { "epoch": 0.08, "grad_norm": 0.5727246845437999, "learning_rate": 9.945422242076989e-07, "loss": 0.3845, "step": 1187 }, { "epoch": 0.08, "grad_norm": 0.5432437340533075, "learning_rate": 9.945269962706167e-07, "loss": 0.3971, "step": 1188 }, { "epoch": 0.08, "grad_norm": 0.29682050896542284, "learning_rate": 9.945117472360184e-07, "loss": 0.1106, "step": 1189 }, { "epoch": 0.08, "grad_norm": 0.8166999080191234, "learning_rate": 9.944964771045552e-07, "loss": 0.3071, "step": 1190 }, { "epoch": 0.08, "grad_norm": 0.7471517122493586, "learning_rate": 9.944811858768782e-07, "loss": 0.1709, "step": 1191 }, { "epoch": 0.08, "grad_norm": 0.2994779575470051, "learning_rate": 9.944658735536395e-07, "loss": 0.2743, "step": 1192 }, { "epoch": 0.08, "grad_norm": 0.23381810826721885, "learning_rate": 9.94450540135493e-07, "loss": 0.0987, "step": 1193 }, { "epoch": 0.08, "grad_norm": 0.39938123805950754, "learning_rate": 9.94435185623092e-07, "loss": 0.2987, "step": 1194 }, { "epoch": 0.08, "grad_norm": 0.9688135724386572, "learning_rate": 9.944198100170927e-07, "loss": 0.152, "step": 1195 }, { "epoch": 0.08, "grad_norm": 0.6587060853941012, "learning_rate": 9.9440441331815e-07, "loss": 0.1525, "step": 1196 }, { "epoch": 0.08, "grad_norm": 0.16281682683361928, "learning_rate": 9.943889955269212e-07, "loss": 0.0904, "step": 1197 }, { "epoch": 0.08, "grad_norm": 0.33697811875794936, "learning_rate": 9.94373556644064e-07, "loss": 0.059, "step": 1198 }, { "epoch": 0.08, "grad_norm": 0.3749015720899504, "learning_rate": 9.94358096670237e-07, "loss": 0.2188, "step": 1199 }, { "epoch": 0.08, "grad_norm": 1.010358437978261, "learning_rate": 9.943426156061e-07, "loss": 0.2657, "step": 1200 }, { "epoch": 0.08, "grad_norm": 0.5161456376184853, "learning_rate": 9.94327113452313e-07, "loss": 0.0139, "step": 1201 }, { "epoch": 0.08, "grad_norm": 0.2871394945981592, "learning_rate": 9.943115902095378e-07, "loss": 0.008, "step": 1202 }, { "epoch": 0.08, "grad_norm": 0.6177966823720042, "learning_rate": 9.942960458784364e-07, "loss": 0.3859, "step": 1203 }, { "epoch": 0.08, "grad_norm": 0.3777364649984627, "learning_rate": 9.942804804596722e-07, "loss": 0.1185, "step": 1204 }, { "epoch": 0.08, "grad_norm": 0.969121238221322, "learning_rate": 9.942648939539086e-07, "loss": 0.306, "step": 1205 }, { "epoch": 0.08, "grad_norm": 0.146732018703517, "learning_rate": 9.942492863618114e-07, "loss": 0.156, "step": 1206 }, { "epoch": 0.08, "grad_norm": 0.41643345430756434, "learning_rate": 9.942336576840462e-07, "loss": 0.1954, "step": 1207 }, { "epoch": 0.08, "grad_norm": 0.19056838451877037, "learning_rate": 9.942180079212793e-07, "loss": 0.093, "step": 1208 }, { "epoch": 0.08, "grad_norm": 0.39719466309769197, "learning_rate": 9.94202337074179e-07, "loss": 0.1966, "step": 1209 }, { "epoch": 0.08, "grad_norm": 0.7120739258858446, "learning_rate": 9.941866451434131e-07, "loss": 0.2299, "step": 1210 }, { "epoch": 0.08, "grad_norm": 2.394798508834025, "learning_rate": 9.94170932129652e-07, "loss": 0.2869, "step": 1211 }, { "epoch": 0.08, "grad_norm": 0.5192994332437146, "learning_rate": 9.941551980335652e-07, "loss": 0.2403, "step": 1212 }, { "epoch": 0.08, "grad_norm": 0.4367852588222532, "learning_rate": 9.941394428558244e-07, "loss": 0.1468, "step": 1213 }, { "epoch": 0.08, "grad_norm": 0.74335549781076, "learning_rate": 9.941236665971015e-07, "loss": 0.2553, "step": 1214 }, { "epoch": 0.08, "grad_norm": 0.3262131805085108, "learning_rate": 9.941078692580698e-07, "loss": 0.1993, "step": 1215 }, { "epoch": 0.08, "grad_norm": 0.3145273998877605, "learning_rate": 9.94092050839403e-07, "loss": 0.1359, "step": 1216 }, { "epoch": 0.08, "grad_norm": 0.43561714541066404, "learning_rate": 9.94076211341776e-07, "loss": 0.0249, "step": 1217 }, { "epoch": 0.08, "grad_norm": 0.2378825638605044, "learning_rate": 9.940603507658648e-07, "loss": 0.0156, "step": 1218 }, { "epoch": 0.08, "grad_norm": 0.6998547645217111, "learning_rate": 9.940444691123458e-07, "loss": 0.3879, "step": 1219 }, { "epoch": 0.08, "grad_norm": 0.9523241648092471, "learning_rate": 9.940285663818967e-07, "loss": 0.3711, "step": 1220 }, { "epoch": 0.08, "grad_norm": 0.8063499293177215, "learning_rate": 9.940126425751956e-07, "loss": 0.0743, "step": 1221 }, { "epoch": 0.08, "grad_norm": 1.6669138093510243, "learning_rate": 9.939966976929222e-07, "loss": 0.4219, "step": 1222 }, { "epoch": 0.08, "grad_norm": 0.5257776310542316, "learning_rate": 9.939807317357566e-07, "loss": 0.3033, "step": 1223 }, { "epoch": 0.08, "grad_norm": 0.23137183935848096, "learning_rate": 9.939647447043798e-07, "loss": 0.0905, "step": 1224 }, { "epoch": 0.08, "grad_norm": 0.1987866191319867, "learning_rate": 9.939487365994741e-07, "loss": 0.0111, "step": 1225 }, { "epoch": 0.08, "grad_norm": 0.4266213404580218, "learning_rate": 9.939327074217225e-07, "loss": 0.1496, "step": 1226 }, { "epoch": 0.08, "grad_norm": 0.8760138859606063, "learning_rate": 9.939166571718084e-07, "loss": 0.5151, "step": 1227 }, { "epoch": 0.08, "grad_norm": 0.46525354145023406, "learning_rate": 9.93900585850417e-07, "loss": 0.15, "step": 1228 }, { "epoch": 0.08, "grad_norm": 0.48283921693480103, "learning_rate": 9.938844934582337e-07, "loss": 0.2343, "step": 1229 }, { "epoch": 0.08, "grad_norm": 0.6795500594554379, "learning_rate": 9.938683799959452e-07, "loss": 0.2176, "step": 1230 }, { "epoch": 0.08, "grad_norm": 0.4507303170281007, "learning_rate": 9.938522454642387e-07, "loss": 0.1081, "step": 1231 }, { "epoch": 0.08, "grad_norm": 0.4182381054561944, "learning_rate": 9.938360898638026e-07, "loss": 0.1998, "step": 1232 }, { "epoch": 0.08, "grad_norm": 0.1807866975834911, "learning_rate": 9.938199131953263e-07, "loss": 0.0178, "step": 1233 }, { "epoch": 0.08, "grad_norm": 0.6328066547132252, "learning_rate": 9.938037154594996e-07, "loss": 0.1559, "step": 1234 }, { "epoch": 0.08, "grad_norm": 0.4285963389738819, "learning_rate": 9.937874966570139e-07, "loss": 0.1104, "step": 1235 }, { "epoch": 0.08, "grad_norm": 0.689788062574801, "learning_rate": 9.937712567885608e-07, "loss": 0.1247, "step": 1236 }, { "epoch": 0.08, "grad_norm": 0.4376887501103226, "learning_rate": 9.937549958548335e-07, "loss": 0.3802, "step": 1237 }, { "epoch": 0.08, "grad_norm": 0.42673112844316574, "learning_rate": 9.937387138565255e-07, "loss": 0.2974, "step": 1238 }, { "epoch": 0.08, "grad_norm": 0.48626121488061697, "learning_rate": 9.93722410794331e-07, "loss": 0.3471, "step": 1239 }, { "epoch": 0.08, "grad_norm": 0.6003795063350325, "learning_rate": 9.937060866689463e-07, "loss": 0.2365, "step": 1240 }, { "epoch": 0.08, "grad_norm": 0.5696774630629393, "learning_rate": 9.936897414810676e-07, "loss": 0.2721, "step": 1241 }, { "epoch": 0.08, "grad_norm": 0.20690602525821974, "learning_rate": 9.936733752313918e-07, "loss": 0.1156, "step": 1242 }, { "epoch": 0.08, "grad_norm": 0.7178905498679734, "learning_rate": 9.936569879206175e-07, "loss": 0.2117, "step": 1243 }, { "epoch": 0.08, "grad_norm": 0.5812984740921069, "learning_rate": 9.936405795494438e-07, "loss": 0.0232, "step": 1244 }, { "epoch": 0.08, "grad_norm": 0.12484029407011618, "learning_rate": 9.936241501185705e-07, "loss": 0.0247, "step": 1245 }, { "epoch": 0.08, "grad_norm": 0.5954917585734902, "learning_rate": 9.936076996286987e-07, "loss": 0.0639, "step": 1246 }, { "epoch": 0.08, "grad_norm": 0.4379080912973191, "learning_rate": 9.935912280805302e-07, "loss": 0.1497, "step": 1247 }, { "epoch": 0.08, "grad_norm": 0.8882229683532059, "learning_rate": 9.935747354747677e-07, "loss": 0.0951, "step": 1248 }, { "epoch": 0.08, "grad_norm": 0.29518732119725033, "learning_rate": 9.935582218121147e-07, "loss": 0.0143, "step": 1249 }, { "epoch": 0.08, "grad_norm": 0.4637263032560623, "learning_rate": 9.935416870932757e-07, "loss": 0.1727, "step": 1250 }, { "epoch": 0.08, "grad_norm": 0.28543800842752126, "learning_rate": 9.935251313189563e-07, "loss": 0.0648, "step": 1251 }, { "epoch": 0.08, "grad_norm": 0.3134866725587153, "learning_rate": 9.935085544898627e-07, "loss": 0.0368, "step": 1252 }, { "epoch": 0.08, "grad_norm": 1.0373680656726496, "learning_rate": 9.93491956606702e-07, "loss": 0.1571, "step": 1253 }, { "epoch": 0.08, "grad_norm": 0.484000544280834, "learning_rate": 9.934753376701825e-07, "loss": 0.2467, "step": 1254 }, { "epoch": 0.08, "grad_norm": 0.5171668737774494, "learning_rate": 9.93458697681013e-07, "loss": 0.2099, "step": 1255 }, { "epoch": 0.08, "grad_norm": 1.02310506054813, "learning_rate": 9.934420366399036e-07, "loss": 0.2822, "step": 1256 }, { "epoch": 0.08, "grad_norm": 0.42189408668989653, "learning_rate": 9.934253545475648e-07, "loss": 0.1756, "step": 1257 }, { "epoch": 0.08, "grad_norm": 0.15533316374032066, "learning_rate": 9.934086514047086e-07, "loss": 0.0806, "step": 1258 }, { "epoch": 0.08, "grad_norm": 0.6559099183399568, "learning_rate": 9.933919272120474e-07, "loss": 0.2921, "step": 1259 }, { "epoch": 0.08, "grad_norm": 0.17634704006158938, "learning_rate": 9.933751819702948e-07, "loss": 0.0823, "step": 1260 }, { "epoch": 0.08, "grad_norm": 0.5371146377850167, "learning_rate": 9.93358415680165e-07, "loss": 0.194, "step": 1261 }, { "epoch": 0.08, "grad_norm": 0.6832922633310711, "learning_rate": 9.933416283423736e-07, "loss": 0.154, "step": 1262 }, { "epoch": 0.08, "grad_norm": 0.5445353980456783, "learning_rate": 9.933248199576364e-07, "loss": 0.1312, "step": 1263 }, { "epoch": 0.08, "grad_norm": 0.7814843978025754, "learning_rate": 9.933079905266707e-07, "loss": 0.1132, "step": 1264 }, { "epoch": 0.08, "grad_norm": 0.952130812885761, "learning_rate": 9.932911400501947e-07, "loss": 0.1473, "step": 1265 }, { "epoch": 0.08, "grad_norm": 0.56061281154319, "learning_rate": 9.932742685289269e-07, "loss": 0.2857, "step": 1266 }, { "epoch": 0.08, "grad_norm": 0.8866076182489778, "learning_rate": 9.93257375963587e-07, "loss": 0.1433, "step": 1267 }, { "epoch": 0.08, "grad_norm": 0.21904820241433778, "learning_rate": 9.93240462354896e-07, "loss": 0.0023, "step": 1268 }, { "epoch": 0.08, "grad_norm": 0.7858922196798822, "learning_rate": 9.932235277035754e-07, "loss": 0.291, "step": 1269 }, { "epoch": 0.08, "grad_norm": 0.7868040611739603, "learning_rate": 9.932065720103476e-07, "loss": 0.4161, "step": 1270 }, { "epoch": 0.08, "grad_norm": 0.657228617660802, "learning_rate": 9.931895952759357e-07, "loss": 0.1842, "step": 1271 }, { "epoch": 0.08, "grad_norm": 0.5977616164189764, "learning_rate": 9.931725975010646e-07, "loss": 0.1011, "step": 1272 }, { "epoch": 0.08, "grad_norm": 0.6084631591727283, "learning_rate": 9.931555786864589e-07, "loss": 0.2752, "step": 1273 }, { "epoch": 0.08, "grad_norm": 0.5876481628805643, "learning_rate": 9.931385388328448e-07, "loss": 0.5346, "step": 1274 }, { "epoch": 0.08, "grad_norm": 0.35573169640134594, "learning_rate": 9.931214779409494e-07, "loss": 0.2471, "step": 1275 }, { "epoch": 0.08, "grad_norm": 0.5684956930967539, "learning_rate": 9.931043960115005e-07, "loss": 0.2068, "step": 1276 }, { "epoch": 0.08, "grad_norm": 0.5446491705101372, "learning_rate": 9.930872930452267e-07, "loss": 0.0741, "step": 1277 }, { "epoch": 0.08, "grad_norm": 0.4690054081002771, "learning_rate": 9.930701690428577e-07, "loss": 0.2964, "step": 1278 }, { "epoch": 0.08, "grad_norm": 0.21813429221611408, "learning_rate": 9.930530240051243e-07, "loss": 0.0949, "step": 1279 }, { "epoch": 0.08, "grad_norm": 0.8295842997198223, "learning_rate": 9.930358579327576e-07, "loss": 0.4008, "step": 1280 }, { "epoch": 0.08, "grad_norm": 1.498212880447362, "learning_rate": 9.9301867082649e-07, "loss": 0.2155, "step": 1281 }, { "epoch": 0.08, "grad_norm": 0.9418525856185737, "learning_rate": 9.93001462687055e-07, "loss": 0.1317, "step": 1282 }, { "epoch": 0.08, "grad_norm": 0.16956843695330423, "learning_rate": 9.929842335151863e-07, "loss": 0.0192, "step": 1283 }, { "epoch": 0.08, "grad_norm": 0.45637433018782536, "learning_rate": 9.929669833116194e-07, "loss": 0.2064, "step": 1284 }, { "epoch": 0.08, "grad_norm": 0.25788325094536546, "learning_rate": 9.9294971207709e-07, "loss": 0.0865, "step": 1285 }, { "epoch": 0.08, "grad_norm": 0.8106476754548811, "learning_rate": 9.929324198123347e-07, "loss": 0.1804, "step": 1286 }, { "epoch": 0.08, "grad_norm": 0.48021474596666486, "learning_rate": 9.929151065180915e-07, "loss": 0.3005, "step": 1287 }, { "epoch": 0.08, "grad_norm": 0.29667317885857586, "learning_rate": 9.928977721950992e-07, "loss": 0.2073, "step": 1288 }, { "epoch": 0.08, "grad_norm": 0.43856265730787586, "learning_rate": 9.928804168440969e-07, "loss": 0.1377, "step": 1289 }, { "epoch": 0.08, "grad_norm": 1.0318322581898132, "learning_rate": 9.928630404658254e-07, "loss": 0.1567, "step": 1290 }, { "epoch": 0.08, "grad_norm": 0.49896780895805287, "learning_rate": 9.928456430610257e-07, "loss": 0.1008, "step": 1291 }, { "epoch": 0.08, "grad_norm": 0.4054032864642662, "learning_rate": 9.9282822463044e-07, "loss": 0.2169, "step": 1292 }, { "epoch": 0.08, "grad_norm": 0.5500438098269558, "learning_rate": 9.928107851748118e-07, "loss": 0.2038, "step": 1293 }, { "epoch": 0.08, "grad_norm": 0.5264338135127226, "learning_rate": 9.927933246948846e-07, "loss": 0.1568, "step": 1294 }, { "epoch": 0.08, "grad_norm": 0.5709867696264319, "learning_rate": 9.927758431914036e-07, "loss": 0.2531, "step": 1295 }, { "epoch": 0.08, "grad_norm": 0.3732442385885163, "learning_rate": 9.927583406651145e-07, "loss": 0.1685, "step": 1296 }, { "epoch": 0.08, "grad_norm": 0.13061876615576834, "learning_rate": 9.927408171167641e-07, "loss": 0.0074, "step": 1297 }, { "epoch": 0.08, "grad_norm": 0.5839412601840739, "learning_rate": 9.927232725470998e-07, "loss": 0.4051, "step": 1298 }, { "epoch": 0.08, "grad_norm": 0.41830401854019367, "learning_rate": 9.927057069568702e-07, "loss": 0.134, "step": 1299 }, { "epoch": 0.08, "grad_norm": 0.9675478024352526, "learning_rate": 9.926881203468248e-07, "loss": 0.1277, "step": 1300 }, { "epoch": 0.08, "grad_norm": 0.6413055047498372, "learning_rate": 9.926705127177137e-07, "loss": 0.1473, "step": 1301 }, { "epoch": 0.08, "grad_norm": 0.5766913693417655, "learning_rate": 9.92652884070288e-07, "loss": 0.0879, "step": 1302 }, { "epoch": 0.08, "grad_norm": 0.6926279196508893, "learning_rate": 9.926352344053001e-07, "loss": 0.0861, "step": 1303 }, { "epoch": 0.08, "grad_norm": 0.8660050060857681, "learning_rate": 9.926175637235026e-07, "loss": 0.1214, "step": 1304 }, { "epoch": 0.08, "grad_norm": 0.6410176098271414, "learning_rate": 9.925998720256496e-07, "loss": 0.0194, "step": 1305 }, { "epoch": 0.08, "grad_norm": 0.36678590115652, "learning_rate": 9.925821593124959e-07, "loss": 0.0723, "step": 1306 }, { "epoch": 0.08, "grad_norm": 0.64978428647516, "learning_rate": 9.92564425584797e-07, "loss": 0.2001, "step": 1307 }, { "epoch": 0.08, "grad_norm": 0.5857836993663871, "learning_rate": 9.925466708433097e-07, "loss": 0.1756, "step": 1308 }, { "epoch": 0.08, "grad_norm": 0.6154155311141734, "learning_rate": 9.925288950887912e-07, "loss": 0.147, "step": 1309 }, { "epoch": 0.08, "grad_norm": 0.2220469057151436, "learning_rate": 9.925110983219998e-07, "loss": 0.0138, "step": 1310 }, { "epoch": 0.08, "grad_norm": 0.44819777842306247, "learning_rate": 9.924932805436948e-07, "loss": 0.2058, "step": 1311 }, { "epoch": 0.08, "grad_norm": 0.9549652290680253, "learning_rate": 9.924754417546367e-07, "loss": 0.123, "step": 1312 }, { "epoch": 0.08, "grad_norm": 0.6976385869512235, "learning_rate": 9.924575819555861e-07, "loss": 0.2911, "step": 1313 }, { "epoch": 0.08, "grad_norm": 0.5428886474113841, "learning_rate": 9.92439701147305e-07, "loss": 0.4047, "step": 1314 }, { "epoch": 0.08, "grad_norm": 0.522051202353723, "learning_rate": 9.924217993305563e-07, "loss": 0.3151, "step": 1315 }, { "epoch": 0.08, "grad_norm": 0.4459646433200106, "learning_rate": 9.92403876506104e-07, "loss": 0.1648, "step": 1316 }, { "epoch": 0.08, "grad_norm": 0.5060715812929723, "learning_rate": 9.923859326747124e-07, "loss": 0.2347, "step": 1317 }, { "epoch": 0.08, "grad_norm": 0.31687712905787674, "learning_rate": 9.92367967837147e-07, "loss": 0.0478, "step": 1318 }, { "epoch": 0.08, "grad_norm": 0.6603126960062933, "learning_rate": 9.923499819941744e-07, "loss": 0.2607, "step": 1319 }, { "epoch": 0.08, "grad_norm": 0.6912679532061217, "learning_rate": 9.923319751465615e-07, "loss": 0.2336, "step": 1320 }, { "epoch": 0.08, "grad_norm": 0.4936629031027776, "learning_rate": 9.923139472950772e-07, "loss": 0.0684, "step": 1321 }, { "epoch": 0.08, "grad_norm": 0.4074506017586614, "learning_rate": 9.922958984404901e-07, "loss": 0.1365, "step": 1322 }, { "epoch": 0.08, "grad_norm": 0.06931220971631999, "learning_rate": 9.922778285835704e-07, "loss": 0.0039, "step": 1323 }, { "epoch": 0.08, "grad_norm": 1.0584991047923242, "learning_rate": 9.92259737725089e-07, "loss": 0.1751, "step": 1324 }, { "epoch": 0.08, "grad_norm": 0.42512537728409516, "learning_rate": 9.922416258658173e-07, "loss": 0.0221, "step": 1325 }, { "epoch": 0.08, "grad_norm": 0.7174700906380793, "learning_rate": 9.922234930065285e-07, "loss": 0.1311, "step": 1326 }, { "epoch": 0.08, "grad_norm": 0.5040507640209686, "learning_rate": 9.922053391479961e-07, "loss": 0.1044, "step": 1327 }, { "epoch": 0.08, "grad_norm": 0.47708684920990274, "learning_rate": 9.921871642909944e-07, "loss": 0.097, "step": 1328 }, { "epoch": 0.08, "grad_norm": 0.505050356460811, "learning_rate": 9.92168968436299e-07, "loss": 0.1123, "step": 1329 }, { "epoch": 0.08, "grad_norm": 0.5969726272276742, "learning_rate": 9.921507515846856e-07, "loss": 0.1421, "step": 1330 }, { "epoch": 0.08, "grad_norm": 0.606016186383965, "learning_rate": 9.92132513736932e-07, "loss": 0.2951, "step": 1331 }, { "epoch": 0.08, "grad_norm": 0.5882241937129672, "learning_rate": 9.92114254893816e-07, "loss": 0.2291, "step": 1332 }, { "epoch": 0.09, "grad_norm": 0.26234108070725803, "learning_rate": 9.920959750561169e-07, "loss": 0.069, "step": 1333 }, { "epoch": 0.09, "grad_norm": 0.26844022808681833, "learning_rate": 9.920776742246142e-07, "loss": 0.0168, "step": 1334 }, { "epoch": 0.09, "grad_norm": 0.8127077765743058, "learning_rate": 9.920593524000885e-07, "loss": 0.4992, "step": 1335 }, { "epoch": 0.09, "grad_norm": 0.4332594733357405, "learning_rate": 9.920410095833217e-07, "loss": 0.3344, "step": 1336 }, { "epoch": 0.09, "grad_norm": 0.15581514468417898, "learning_rate": 9.920226457750964e-07, "loss": 0.0103, "step": 1337 }, { "epoch": 0.09, "grad_norm": 0.5601738146747562, "learning_rate": 9.920042609761961e-07, "loss": 0.3145, "step": 1338 }, { "epoch": 0.09, "grad_norm": 0.7410327563077606, "learning_rate": 9.919858551874048e-07, "loss": 0.4673, "step": 1339 }, { "epoch": 0.09, "grad_norm": 0.05010858722366252, "learning_rate": 9.919674284095078e-07, "loss": 0.0021, "step": 1340 }, { "epoch": 0.09, "grad_norm": 0.28155993225973236, "learning_rate": 9.919489806432914e-07, "loss": 0.1986, "step": 1341 }, { "epoch": 0.09, "grad_norm": 0.9192755585612271, "learning_rate": 9.919305118895424e-07, "loss": 0.4798, "step": 1342 }, { "epoch": 0.09, "grad_norm": 1.0614597046619378, "learning_rate": 9.919120221490492e-07, "loss": 0.2899, "step": 1343 }, { "epoch": 0.09, "grad_norm": 0.16533559228097142, "learning_rate": 9.918935114226e-07, "loss": 0.0089, "step": 1344 }, { "epoch": 0.09, "grad_norm": 0.8236466378681705, "learning_rate": 9.918749797109848e-07, "loss": 0.2607, "step": 1345 }, { "epoch": 0.09, "grad_norm": 0.26840855921958284, "learning_rate": 9.918564270149942e-07, "loss": 0.1592, "step": 1346 }, { "epoch": 0.09, "grad_norm": 0.3758093577499254, "learning_rate": 9.918378533354197e-07, "loss": 0.115, "step": 1347 }, { "epoch": 0.09, "grad_norm": 0.2804836172746173, "learning_rate": 9.918192586730538e-07, "loss": 0.2261, "step": 1348 }, { "epoch": 0.09, "grad_norm": 0.8855784879112487, "learning_rate": 9.918006430286893e-07, "loss": 0.1226, "step": 1349 }, { "epoch": 0.09, "grad_norm": 1.2574103117154745, "learning_rate": 9.917820064031211e-07, "loss": 0.1307, "step": 1350 }, { "epoch": 0.09, "grad_norm": 0.6820728818513073, "learning_rate": 9.917633487971438e-07, "loss": 0.255, "step": 1351 }, { "epoch": 0.09, "grad_norm": 0.4839934500545702, "learning_rate": 9.917446702115533e-07, "loss": 0.218, "step": 1352 }, { "epoch": 0.09, "grad_norm": 0.9974031137381807, "learning_rate": 9.917259706471467e-07, "loss": 0.1613, "step": 1353 }, { "epoch": 0.09, "grad_norm": 0.6288966585844071, "learning_rate": 9.917072501047217e-07, "loss": 0.3266, "step": 1354 }, { "epoch": 0.09, "grad_norm": 0.5899033809330453, "learning_rate": 9.91688508585077e-07, "loss": 0.0135, "step": 1355 }, { "epoch": 0.09, "grad_norm": 0.48974260277611126, "learning_rate": 9.91669746089012e-07, "loss": 0.2727, "step": 1356 }, { "epoch": 0.09, "grad_norm": 0.892130586122228, "learning_rate": 9.916509626173275e-07, "loss": 0.2681, "step": 1357 }, { "epoch": 0.09, "grad_norm": 0.5759666173641834, "learning_rate": 9.916321581708245e-07, "loss": 0.2102, "step": 1358 }, { "epoch": 0.09, "grad_norm": 0.1415334650198193, "learning_rate": 9.916133327503052e-07, "loss": 0.0707, "step": 1359 }, { "epoch": 0.09, "grad_norm": 0.2950143180306834, "learning_rate": 9.915944863565728e-07, "loss": 0.0184, "step": 1360 }, { "epoch": 0.09, "grad_norm": 0.622270261736345, "learning_rate": 9.915756189904316e-07, "loss": 0.2563, "step": 1361 }, { "epoch": 0.09, "grad_norm": 0.6687547593629255, "learning_rate": 9.915567306526862e-07, "loss": 0.2629, "step": 1362 }, { "epoch": 0.09, "grad_norm": 1.813456436767718, "learning_rate": 9.915378213441425e-07, "loss": 0.059, "step": 1363 }, { "epoch": 0.09, "grad_norm": 0.8296119799260199, "learning_rate": 9.915188910656073e-07, "loss": 0.1444, "step": 1364 }, { "epoch": 0.09, "grad_norm": 0.7155707282550502, "learning_rate": 9.91499939817888e-07, "loss": 0.0603, "step": 1365 }, { "epoch": 0.09, "grad_norm": 0.4621222371647438, "learning_rate": 9.914809676017935e-07, "loss": 0.1742, "step": 1366 }, { "epoch": 0.09, "grad_norm": 1.1300100528341723, "learning_rate": 9.914619744181326e-07, "loss": 0.5811, "step": 1367 }, { "epoch": 0.09, "grad_norm": 0.9614231224301606, "learning_rate": 9.914429602677161e-07, "loss": 0.3166, "step": 1368 }, { "epoch": 0.09, "grad_norm": 1.0794458569950924, "learning_rate": 9.914239251513549e-07, "loss": 0.18, "step": 1369 }, { "epoch": 0.09, "grad_norm": 0.6467544649663229, "learning_rate": 9.91404869069861e-07, "loss": 0.2362, "step": 1370 }, { "epoch": 0.09, "grad_norm": 0.6154057398537042, "learning_rate": 9.91385792024048e-07, "loss": 0.084, "step": 1371 }, { "epoch": 0.09, "grad_norm": 0.5645728368622048, "learning_rate": 9.913666940147289e-07, "loss": 0.3736, "step": 1372 }, { "epoch": 0.09, "grad_norm": 0.6206794190429861, "learning_rate": 9.91347575042719e-07, "loss": 0.1729, "step": 1373 }, { "epoch": 0.09, "grad_norm": 0.46476261881865205, "learning_rate": 9.913284351088338e-07, "loss": 0.0415, "step": 1374 }, { "epoch": 0.09, "grad_norm": 0.9571329913919955, "learning_rate": 9.9130927421389e-07, "loss": 0.4317, "step": 1375 }, { "epoch": 0.09, "grad_norm": 0.5332665697299472, "learning_rate": 9.912900923587047e-07, "loss": 0.2812, "step": 1376 }, { "epoch": 0.09, "grad_norm": 0.3770817447669728, "learning_rate": 9.912708895440966e-07, "loss": 0.0834, "step": 1377 }, { "epoch": 0.09, "grad_norm": 0.6260119806099472, "learning_rate": 9.912516657708847e-07, "loss": 0.0474, "step": 1378 }, { "epoch": 0.09, "grad_norm": 0.5076190706164107, "learning_rate": 9.912324210398892e-07, "loss": 0.2409, "step": 1379 }, { "epoch": 0.09, "grad_norm": 0.5911689240300607, "learning_rate": 9.91213155351931e-07, "loss": 0.3276, "step": 1380 }, { "epoch": 0.09, "grad_norm": 0.527706445220485, "learning_rate": 9.911938687078323e-07, "loss": 0.1643, "step": 1381 }, { "epoch": 0.09, "grad_norm": 0.6533182877886919, "learning_rate": 9.911745611084156e-07, "loss": 0.1749, "step": 1382 }, { "epoch": 0.09, "grad_norm": 1.2121348501912437, "learning_rate": 9.91155232554505e-07, "loss": 0.2173, "step": 1383 }, { "epoch": 0.09, "grad_norm": 0.5489449013024819, "learning_rate": 9.911358830469247e-07, "loss": 0.1405, "step": 1384 }, { "epoch": 0.09, "grad_norm": 0.4199252225993026, "learning_rate": 9.911165125865001e-07, "loss": 0.2352, "step": 1385 }, { "epoch": 0.09, "grad_norm": 0.5663838266086673, "learning_rate": 9.91097121174058e-07, "loss": 0.208, "step": 1386 }, { "epoch": 0.09, "grad_norm": 0.41766510964406783, "learning_rate": 9.910777088104256e-07, "loss": 0.0624, "step": 1387 }, { "epoch": 0.09, "grad_norm": 0.7570359673996523, "learning_rate": 9.910582754964306e-07, "loss": 0.3536, "step": 1388 }, { "epoch": 0.09, "grad_norm": 0.8684024534790326, "learning_rate": 9.910388212329027e-07, "loss": 0.2251, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.0843837871077118, "learning_rate": 9.910193460206716e-07, "loss": 0.163, "step": 1390 }, { "epoch": 0.09, "grad_norm": 0.6591320256330364, "learning_rate": 9.90999849860568e-07, "loss": 0.4135, "step": 1391 }, { "epoch": 0.09, "grad_norm": 0.38250931088260687, "learning_rate": 9.909803327534239e-07, "loss": 0.0429, "step": 1392 }, { "epoch": 0.09, "grad_norm": 0.5092062891712076, "learning_rate": 9.909607947000717e-07, "loss": 0.2099, "step": 1393 }, { "epoch": 0.09, "grad_norm": 0.5321071463340533, "learning_rate": 9.90941235701345e-07, "loss": 0.089, "step": 1394 }, { "epoch": 0.09, "grad_norm": 0.5105319453775725, "learning_rate": 9.909216557580784e-07, "loss": 0.1912, "step": 1395 }, { "epoch": 0.09, "grad_norm": 1.0240034951819497, "learning_rate": 9.90902054871107e-07, "loss": 0.2655, "step": 1396 }, { "epoch": 0.09, "grad_norm": 0.35749500684136865, "learning_rate": 9.90882433041267e-07, "loss": 0.0129, "step": 1397 }, { "epoch": 0.09, "grad_norm": 0.7430532137923848, "learning_rate": 9.908627902693957e-07, "loss": 0.1514, "step": 1398 }, { "epoch": 0.09, "grad_norm": 0.4790887423592187, "learning_rate": 9.908431265563313e-07, "loss": 0.1774, "step": 1399 }, { "epoch": 0.09, "grad_norm": 0.8233676472290283, "learning_rate": 9.90823441902912e-07, "loss": 0.1219, "step": 1400 }, { "epoch": 0.09, "grad_norm": 0.5939786949019252, "learning_rate": 9.908037363099782e-07, "loss": 0.0689, "step": 1401 }, { "epoch": 0.09, "grad_norm": 0.5939627617523665, "learning_rate": 9.907840097783704e-07, "loss": 0.1783, "step": 1402 }, { "epoch": 0.09, "grad_norm": 0.49141165452616525, "learning_rate": 9.9076426230893e-07, "loss": 0.1545, "step": 1403 }, { "epoch": 0.09, "grad_norm": 0.7278504336264148, "learning_rate": 9.907444939024997e-07, "loss": 0.2825, "step": 1404 }, { "epoch": 0.09, "grad_norm": 0.28442755361920813, "learning_rate": 9.907247045599226e-07, "loss": 0.1271, "step": 1405 }, { "epoch": 0.09, "grad_norm": 0.45846238394230227, "learning_rate": 9.907048942820432e-07, "loss": 0.0251, "step": 1406 }, { "epoch": 0.09, "grad_norm": 0.5143526782461665, "learning_rate": 9.906850630697066e-07, "loss": 0.2793, "step": 1407 }, { "epoch": 0.09, "grad_norm": 0.6207469331297467, "learning_rate": 9.906652109237588e-07, "loss": 0.0464, "step": 1408 }, { "epoch": 0.09, "grad_norm": 0.874109918494243, "learning_rate": 9.90645337845047e-07, "loss": 0.2319, "step": 1409 }, { "epoch": 0.09, "grad_norm": 0.4302973166798125, "learning_rate": 9.906254438344185e-07, "loss": 0.2829, "step": 1410 }, { "epoch": 0.09, "grad_norm": 0.37521259962395315, "learning_rate": 9.906055288927221e-07, "loss": 0.1903, "step": 1411 }, { "epoch": 0.09, "grad_norm": 0.9284162240136209, "learning_rate": 9.90585593020808e-07, "loss": 0.359, "step": 1412 }, { "epoch": 0.09, "grad_norm": 0.7100888419280851, "learning_rate": 9.905656362195261e-07, "loss": 0.2603, "step": 1413 }, { "epoch": 0.09, "grad_norm": 0.34959608011317916, "learning_rate": 9.90545658489728e-07, "loss": 0.2394, "step": 1414 }, { "epoch": 0.09, "grad_norm": 0.4230948498712605, "learning_rate": 9.90525659832266e-07, "loss": 0.2377, "step": 1415 }, { "epoch": 0.09, "grad_norm": 0.48756417643515504, "learning_rate": 9.905056402479933e-07, "loss": 0.142, "step": 1416 }, { "epoch": 0.09, "grad_norm": 0.27732749330665324, "learning_rate": 9.904855997377638e-07, "loss": 0.1023, "step": 1417 }, { "epoch": 0.09, "grad_norm": 0.21866130982516352, "learning_rate": 9.904655383024327e-07, "loss": 0.0283, "step": 1418 }, { "epoch": 0.09, "grad_norm": 0.4805676435876558, "learning_rate": 9.90445455942856e-07, "loss": 0.3483, "step": 1419 }, { "epoch": 0.09, "grad_norm": 0.6717010660990245, "learning_rate": 9.9042535265989e-07, "loss": 0.0295, "step": 1420 }, { "epoch": 0.09, "grad_norm": 0.42242652013031196, "learning_rate": 9.904052284543925e-07, "loss": 0.3359, "step": 1421 }, { "epoch": 0.09, "grad_norm": 0.9660930421018699, "learning_rate": 9.903850833272222e-07, "loss": 0.4354, "step": 1422 }, { "epoch": 0.09, "grad_norm": 0.4133245581699803, "learning_rate": 9.903649172792386e-07, "loss": 0.1681, "step": 1423 }, { "epoch": 0.09, "grad_norm": 0.8389060031095329, "learning_rate": 9.903447303113017e-07, "loss": 0.3308, "step": 1424 }, { "epoch": 0.09, "grad_norm": 0.2505562843920614, "learning_rate": 9.90324522424273e-07, "loss": 0.1088, "step": 1425 }, { "epoch": 0.09, "grad_norm": 0.46521286222480235, "learning_rate": 9.903042936190145e-07, "loss": 0.1423, "step": 1426 }, { "epoch": 0.09, "grad_norm": 0.5018964774218596, "learning_rate": 9.90284043896389e-07, "loss": 0.2328, "step": 1427 }, { "epoch": 0.09, "grad_norm": 0.5915417271474269, "learning_rate": 9.90263773257261e-07, "loss": 0.4089, "step": 1428 }, { "epoch": 0.09, "grad_norm": 0.524245641467094, "learning_rate": 9.902434817024945e-07, "loss": 0.3119, "step": 1429 }, { "epoch": 0.09, "grad_norm": 1.4189219922182432, "learning_rate": 9.902231692329556e-07, "loss": 0.1085, "step": 1430 }, { "epoch": 0.09, "grad_norm": 0.276746429325565, "learning_rate": 9.90202835849511e-07, "loss": 0.3031, "step": 1431 }, { "epoch": 0.09, "grad_norm": 0.8251646253861777, "learning_rate": 9.901824815530277e-07, "loss": 0.1873, "step": 1432 }, { "epoch": 0.09, "grad_norm": 0.5937526854920018, "learning_rate": 9.901621063443746e-07, "loss": 0.1396, "step": 1433 }, { "epoch": 0.09, "grad_norm": 0.4455253693138965, "learning_rate": 9.901417102244207e-07, "loss": 0.2131, "step": 1434 }, { "epoch": 0.09, "grad_norm": 0.582796698479084, "learning_rate": 9.90121293194036e-07, "loss": 0.5477, "step": 1435 }, { "epoch": 0.09, "grad_norm": 0.4848414087429192, "learning_rate": 9.901008552540918e-07, "loss": 0.1672, "step": 1436 }, { "epoch": 0.09, "grad_norm": 0.28593497405984064, "learning_rate": 9.900803964054597e-07, "loss": 0.0338, "step": 1437 }, { "epoch": 0.09, "grad_norm": 0.6782419152092031, "learning_rate": 9.900599166490129e-07, "loss": 0.2368, "step": 1438 }, { "epoch": 0.09, "grad_norm": 0.5081115265738643, "learning_rate": 9.900394159856249e-07, "loss": 0.3658, "step": 1439 }, { "epoch": 0.09, "grad_norm": 0.9275593554404851, "learning_rate": 9.900188944161701e-07, "loss": 0.2623, "step": 1440 }, { "epoch": 0.09, "grad_norm": 0.400317797796568, "learning_rate": 9.899983519415244e-07, "loss": 0.1618, "step": 1441 }, { "epoch": 0.09, "grad_norm": 0.8491504770105957, "learning_rate": 9.89977788562564e-07, "loss": 0.2061, "step": 1442 }, { "epoch": 0.09, "grad_norm": 0.6179973314813763, "learning_rate": 9.899572042801661e-07, "loss": 0.1308, "step": 1443 }, { "epoch": 0.09, "grad_norm": 0.6112656200925553, "learning_rate": 9.89936599095209e-07, "loss": 0.0589, "step": 1444 }, { "epoch": 0.09, "grad_norm": 0.48155520352962544, "learning_rate": 9.899159730085714e-07, "loss": 0.197, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.2113338467664378, "learning_rate": 9.898953260211337e-07, "loss": 0.1121, "step": 1446 }, { "epoch": 0.09, "grad_norm": 0.5039106787821244, "learning_rate": 9.898746581337766e-07, "loss": 0.2074, "step": 1447 }, { "epoch": 0.09, "grad_norm": 0.4526936579946449, "learning_rate": 9.89853969347382e-07, "loss": 0.1398, "step": 1448 }, { "epoch": 0.09, "grad_norm": 0.5788236603227206, "learning_rate": 9.898332596628322e-07, "loss": 0.1798, "step": 1449 }, { "epoch": 0.09, "grad_norm": 0.20559894082176153, "learning_rate": 9.898125290810107e-07, "loss": 0.1567, "step": 1450 }, { "epoch": 0.09, "grad_norm": 0.6075111709699883, "learning_rate": 9.897917776028022e-07, "loss": 0.2502, "step": 1451 }, { "epoch": 0.09, "grad_norm": 1.31621268799203, "learning_rate": 9.89771005229092e-07, "loss": 0.0991, "step": 1452 }, { "epoch": 0.09, "grad_norm": 0.27102036808554036, "learning_rate": 9.89750211960766e-07, "loss": 0.1116, "step": 1453 }, { "epoch": 0.09, "grad_norm": 0.19562214729796376, "learning_rate": 9.897293977987112e-07, "loss": 0.1442, "step": 1454 }, { "epoch": 0.09, "grad_norm": 0.2897769074366858, "learning_rate": 9.897085627438162e-07, "loss": 0.1174, "step": 1455 }, { "epoch": 0.09, "grad_norm": 0.9699879807136473, "learning_rate": 9.896877067969694e-07, "loss": 0.3481, "step": 1456 }, { "epoch": 0.09, "grad_norm": 0.4334282701949201, "learning_rate": 9.896668299590606e-07, "loss": 0.1089, "step": 1457 }, { "epoch": 0.09, "grad_norm": 0.263748101801689, "learning_rate": 9.896459322309802e-07, "loss": 0.0101, "step": 1458 }, { "epoch": 0.09, "grad_norm": 0.5811943469647456, "learning_rate": 9.896250136136203e-07, "loss": 0.0801, "step": 1459 }, { "epoch": 0.09, "grad_norm": 0.8318265996009885, "learning_rate": 9.89604074107873e-07, "loss": 0.1777, "step": 1460 }, { "epoch": 0.09, "grad_norm": 0.2212493744552875, "learning_rate": 9.895831137146318e-07, "loss": 0.0888, "step": 1461 }, { "epoch": 0.09, "grad_norm": 0.26580603276275505, "learning_rate": 9.895621324347908e-07, "loss": 0.178, "step": 1462 }, { "epoch": 0.09, "grad_norm": 0.277147620795641, "learning_rate": 9.895411302692448e-07, "loss": 0.051, "step": 1463 }, { "epoch": 0.09, "grad_norm": 0.5628289413299168, "learning_rate": 9.895201072188903e-07, "loss": 0.3389, "step": 1464 }, { "epoch": 0.09, "grad_norm": 0.6444211883774726, "learning_rate": 9.89499063284624e-07, "loss": 0.1766, "step": 1465 }, { "epoch": 0.09, "grad_norm": 0.4995024859452174, "learning_rate": 9.894779984673433e-07, "loss": 0.266, "step": 1466 }, { "epoch": 0.09, "grad_norm": 0.5980645482596011, "learning_rate": 9.894569127679476e-07, "loss": 0.3054, "step": 1467 }, { "epoch": 0.09, "grad_norm": 0.4433154938139935, "learning_rate": 9.894358061873358e-07, "loss": 0.0984, "step": 1468 }, { "epoch": 0.09, "grad_norm": 0.2491109776587946, "learning_rate": 9.894146787264088e-07, "loss": 0.0295, "step": 1469 }, { "epoch": 0.09, "grad_norm": 0.36524548467967133, "learning_rate": 9.893935303860677e-07, "loss": 0.0157, "step": 1470 }, { "epoch": 0.09, "grad_norm": 0.3223149265797452, "learning_rate": 9.893723611672147e-07, "loss": 0.4159, "step": 1471 }, { "epoch": 0.09, "grad_norm": 0.5802469108703582, "learning_rate": 9.89351171070753e-07, "loss": 0.1877, "step": 1472 }, { "epoch": 0.09, "grad_norm": 0.3402492212622589, "learning_rate": 9.89329960097587e-07, "loss": 0.1217, "step": 1473 }, { "epoch": 0.09, "grad_norm": 0.7272394862500775, "learning_rate": 9.893087282486208e-07, "loss": 0.2007, "step": 1474 }, { "epoch": 0.09, "grad_norm": 0.1860676868954452, "learning_rate": 9.892874755247608e-07, "loss": 0.1, "step": 1475 }, { "epoch": 0.09, "grad_norm": 0.533114804369231, "learning_rate": 9.892662019269136e-07, "loss": 0.2238, "step": 1476 }, { "epoch": 0.09, "grad_norm": 0.6122280427665033, "learning_rate": 9.892449074559864e-07, "loss": 0.039, "step": 1477 }, { "epoch": 0.09, "grad_norm": 0.6477653508701525, "learning_rate": 9.892235921128881e-07, "loss": 0.2751, "step": 1478 }, { "epoch": 0.09, "grad_norm": 0.6317612010148661, "learning_rate": 9.89202255898528e-07, "loss": 0.2251, "step": 1479 }, { "epoch": 0.09, "grad_norm": 0.47801365439872195, "learning_rate": 9.89180898813816e-07, "loss": 0.1295, "step": 1480 }, { "epoch": 0.09, "grad_norm": 0.3439080532007902, "learning_rate": 9.891595208596638e-07, "loss": 0.161, "step": 1481 }, { "epoch": 0.09, "grad_norm": 0.2094726595885454, "learning_rate": 9.891381220369827e-07, "loss": 0.0166, "step": 1482 }, { "epoch": 0.09, "grad_norm": 0.4794332543917334, "learning_rate": 9.891167023466864e-07, "loss": 0.1335, "step": 1483 }, { "epoch": 0.09, "grad_norm": 0.6044568815496913, "learning_rate": 9.890952617896882e-07, "loss": 0.3535, "step": 1484 }, { "epoch": 0.09, "grad_norm": 0.36745720152941697, "learning_rate": 9.890738003669027e-07, "loss": 0.0173, "step": 1485 }, { "epoch": 0.09, "grad_norm": 0.7129460235783216, "learning_rate": 9.89052318079246e-07, "loss": 0.0579, "step": 1486 }, { "epoch": 0.09, "grad_norm": 1.0361603130817678, "learning_rate": 9.890308149276342e-07, "loss": 0.1412, "step": 1487 }, { "epoch": 0.09, "grad_norm": 0.18823015993972705, "learning_rate": 9.890092909129848e-07, "loss": 0.0465, "step": 1488 }, { "epoch": 0.09, "grad_norm": 1.3016928228509086, "learning_rate": 9.88987746036216e-07, "loss": 0.1725, "step": 1489 }, { "epoch": 0.1, "grad_norm": 0.3818409880923631, "learning_rate": 9.889661802982468e-07, "loss": 0.251, "step": 1490 }, { "epoch": 0.1, "grad_norm": 0.3649233229388234, "learning_rate": 9.889445936999976e-07, "loss": 0.2045, "step": 1491 }, { "epoch": 0.1, "grad_norm": 0.9295062256605674, "learning_rate": 9.889229862423892e-07, "loss": 0.1487, "step": 1492 }, { "epoch": 0.1, "grad_norm": 0.6602399601320765, "learning_rate": 9.889013579263433e-07, "loss": 0.2343, "step": 1493 }, { "epoch": 0.1, "grad_norm": 0.6796608325296203, "learning_rate": 9.888797087527826e-07, "loss": 0.2295, "step": 1494 }, { "epoch": 0.1, "grad_norm": 0.3587957490130416, "learning_rate": 9.888580387226307e-07, "loss": 0.0863, "step": 1495 }, { "epoch": 0.1, "grad_norm": 0.5379181859428669, "learning_rate": 9.88836347836812e-07, "loss": 0.2108, "step": 1496 }, { "epoch": 0.1, "grad_norm": 1.2348044214295755, "learning_rate": 9.88814636096252e-07, "loss": 0.3544, "step": 1497 }, { "epoch": 0.1, "grad_norm": 0.294621699605234, "learning_rate": 9.887929035018773e-07, "loss": 0.1577, "step": 1498 }, { "epoch": 0.1, "grad_norm": 1.1004985743456288, "learning_rate": 9.887711500546147e-07, "loss": 0.1324, "step": 1499 }, { "epoch": 0.1, "grad_norm": 0.44690192051575695, "learning_rate": 9.887493757553923e-07, "loss": 0.3278, "step": 1500 }, { "epoch": 0.1, "grad_norm": 0.7855658632665525, "learning_rate": 9.887275806051388e-07, "loss": 0.3881, "step": 1501 }, { "epoch": 0.1, "grad_norm": 0.9292787724738459, "learning_rate": 9.887057646047846e-07, "loss": 0.2462, "step": 1502 }, { "epoch": 0.1, "grad_norm": 0.712761817448678, "learning_rate": 9.886839277552597e-07, "loss": 0.0251, "step": 1503 }, { "epoch": 0.1, "grad_norm": 0.5377398737419191, "learning_rate": 9.886620700574962e-07, "loss": 0.2777, "step": 1504 }, { "epoch": 0.1, "grad_norm": 0.3413670328124251, "learning_rate": 9.886401915124263e-07, "loss": 0.1447, "step": 1505 }, { "epoch": 0.1, "grad_norm": 0.5039823574572356, "learning_rate": 9.886182921209839e-07, "loss": 0.2739, "step": 1506 }, { "epoch": 0.1, "grad_norm": 0.4076212630655461, "learning_rate": 9.885963718841025e-07, "loss": 0.2969, "step": 1507 }, { "epoch": 0.1, "grad_norm": 0.44300072813030744, "learning_rate": 9.88574430802718e-07, "loss": 0.2196, "step": 1508 }, { "epoch": 0.1, "grad_norm": 0.5676336561032818, "learning_rate": 9.88552468877766e-07, "loss": 0.2615, "step": 1509 }, { "epoch": 0.1, "grad_norm": 0.93740836315491, "learning_rate": 9.885304861101835e-07, "loss": 0.2727, "step": 1510 }, { "epoch": 0.1, "grad_norm": 0.8297037804825235, "learning_rate": 9.885084825009084e-07, "loss": 0.3854, "step": 1511 }, { "epoch": 0.1, "grad_norm": 0.640175390536145, "learning_rate": 9.884864580508795e-07, "loss": 0.4684, "step": 1512 }, { "epoch": 0.1, "grad_norm": 0.861467133913177, "learning_rate": 9.884644127610365e-07, "loss": 0.2164, "step": 1513 }, { "epoch": 0.1, "grad_norm": 0.2962211416169949, "learning_rate": 9.884423466323192e-07, "loss": 0.1421, "step": 1514 }, { "epoch": 0.1, "grad_norm": 0.22972637966336357, "learning_rate": 9.8842025966567e-07, "loss": 0.1644, "step": 1515 }, { "epoch": 0.1, "grad_norm": 0.26510495004736484, "learning_rate": 9.883981518620304e-07, "loss": 0.104, "step": 1516 }, { "epoch": 0.1, "grad_norm": 0.4411577210188472, "learning_rate": 9.88376023222344e-07, "loss": 0.0874, "step": 1517 }, { "epoch": 0.1, "grad_norm": 0.4871797030131832, "learning_rate": 9.883538737475544e-07, "loss": 0.2601, "step": 1518 }, { "epoch": 0.1, "grad_norm": 0.7531400614799489, "learning_rate": 9.883317034386073e-07, "loss": 0.3019, "step": 1519 }, { "epoch": 0.1, "grad_norm": 0.7616916491544177, "learning_rate": 9.883095122964477e-07, "loss": 0.4258, "step": 1520 }, { "epoch": 0.1, "grad_norm": 0.4315192948478817, "learning_rate": 9.882873003220227e-07, "loss": 0.2612, "step": 1521 }, { "epoch": 0.1, "grad_norm": 0.33962815028522214, "learning_rate": 9.8826506751628e-07, "loss": 0.0532, "step": 1522 }, { "epoch": 0.1, "grad_norm": 0.31640458172068525, "learning_rate": 9.88242813880168e-07, "loss": 0.2545, "step": 1523 }, { "epoch": 0.1, "grad_norm": 0.24711460479640185, "learning_rate": 9.88220539414636e-07, "loss": 0.1543, "step": 1524 }, { "epoch": 0.1, "grad_norm": 0.5478387891792209, "learning_rate": 9.881982441206343e-07, "loss": 0.4665, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.097502839498695, "learning_rate": 9.881759279991143e-07, "loss": 0.0322, "step": 1526 }, { "epoch": 0.1, "grad_norm": 0.3476195653312959, "learning_rate": 9.881535910510276e-07, "loss": 0.0366, "step": 1527 }, { "epoch": 0.1, "grad_norm": 0.5333026791706978, "learning_rate": 9.881312332773275e-07, "loss": 0.4375, "step": 1528 }, { "epoch": 0.1, "grad_norm": 0.3112252104360692, "learning_rate": 9.881088546789677e-07, "loss": 0.1351, "step": 1529 }, { "epoch": 0.1, "grad_norm": 0.4399471696451268, "learning_rate": 9.88086455256903e-07, "loss": 0.1491, "step": 1530 }, { "epoch": 0.1, "grad_norm": 0.6295850881492374, "learning_rate": 9.88064035012089e-07, "loss": 0.0689, "step": 1531 }, { "epoch": 0.1, "grad_norm": 0.38561365987020313, "learning_rate": 9.88041593945482e-07, "loss": 0.2467, "step": 1532 }, { "epoch": 0.1, "grad_norm": 0.5938699720510816, "learning_rate": 9.880191320580396e-07, "loss": 0.3254, "step": 1533 }, { "epoch": 0.1, "grad_norm": 0.1697906908430658, "learning_rate": 9.879966493507198e-07, "loss": 0.0077, "step": 1534 }, { "epoch": 0.1, "grad_norm": 0.3496812653264161, "learning_rate": 9.879741458244822e-07, "loss": 0.0746, "step": 1535 }, { "epoch": 0.1, "grad_norm": 0.31029222651159144, "learning_rate": 9.879516214802866e-07, "loss": 0.216, "step": 1536 }, { "epoch": 0.1, "grad_norm": 0.4859938049276853, "learning_rate": 9.879290763190937e-07, "loss": 0.15, "step": 1537 }, { "epoch": 0.1, "grad_norm": 0.37417699629811346, "learning_rate": 9.879065103418657e-07, "loss": 0.0962, "step": 1538 }, { "epoch": 0.1, "grad_norm": 0.5437353087596696, "learning_rate": 9.87883923549565e-07, "loss": 0.145, "step": 1539 }, { "epoch": 0.1, "grad_norm": 0.8925268366823884, "learning_rate": 9.878613159431554e-07, "loss": 0.2945, "step": 1540 }, { "epoch": 0.1, "grad_norm": 0.5877766008896336, "learning_rate": 9.878386875236016e-07, "loss": 0.17, "step": 1541 }, { "epoch": 0.1, "grad_norm": 0.13553921863758533, "learning_rate": 9.878160382918683e-07, "loss": 0.0674, "step": 1542 }, { "epoch": 0.1, "grad_norm": 0.614686376491471, "learning_rate": 9.877933682489224e-07, "loss": 0.154, "step": 1543 }, { "epoch": 0.1, "grad_norm": 0.4224147413842552, "learning_rate": 9.877706773957308e-07, "loss": 0.2191, "step": 1544 }, { "epoch": 0.1, "grad_norm": 0.6719867186441478, "learning_rate": 9.877479657332617e-07, "loss": 0.2382, "step": 1545 }, { "epoch": 0.1, "grad_norm": 0.20322204392227114, "learning_rate": 9.877252332624837e-07, "loss": 0.0097, "step": 1546 }, { "epoch": 0.1, "grad_norm": 0.8891487395054584, "learning_rate": 9.877024799843667e-07, "loss": 0.0891, "step": 1547 }, { "epoch": 0.1, "grad_norm": 1.1298489880351104, "learning_rate": 9.876797058998817e-07, "loss": 0.165, "step": 1548 }, { "epoch": 0.1, "grad_norm": 0.514652923651593, "learning_rate": 9.8765691101e-07, "loss": 0.1222, "step": 1549 }, { "epoch": 0.1, "grad_norm": 0.6397997624940633, "learning_rate": 9.876340953156943e-07, "loss": 0.2687, "step": 1550 }, { "epoch": 0.1, "grad_norm": 0.7785255189564557, "learning_rate": 9.876112588179376e-07, "loss": 0.1656, "step": 1551 }, { "epoch": 0.1, "grad_norm": 0.7121913562734088, "learning_rate": 9.875884015177045e-07, "loss": 0.2444, "step": 1552 }, { "epoch": 0.1, "grad_norm": 0.6605143159514306, "learning_rate": 9.8756552341597e-07, "loss": 0.2287, "step": 1553 }, { "epoch": 0.1, "grad_norm": 0.36886705029343614, "learning_rate": 9.875426245137101e-07, "loss": 0.1895, "step": 1554 }, { "epoch": 0.1, "grad_norm": 0.7679070950832787, "learning_rate": 9.87519704811902e-07, "loss": 0.2849, "step": 1555 }, { "epoch": 0.1, "grad_norm": 0.6701865236270271, "learning_rate": 9.87496764311523e-07, "loss": 0.2261, "step": 1556 }, { "epoch": 0.1, "grad_norm": 0.42004633192356616, "learning_rate": 9.874738030135521e-07, "loss": 0.2492, "step": 1557 }, { "epoch": 0.1, "grad_norm": 0.339808480879583, "learning_rate": 9.874508209189689e-07, "loss": 0.2003, "step": 1558 }, { "epoch": 0.1, "grad_norm": 0.7761775197322209, "learning_rate": 9.874278180287536e-07, "loss": 0.2574, "step": 1559 }, { "epoch": 0.1, "grad_norm": 0.9695043231557823, "learning_rate": 9.874047943438878e-07, "loss": 0.2385, "step": 1560 }, { "epoch": 0.1, "grad_norm": 1.004971613166862, "learning_rate": 9.873817498653537e-07, "loss": 0.2249, "step": 1561 }, { "epoch": 0.1, "grad_norm": 0.5756796278033042, "learning_rate": 9.873586845941344e-07, "loss": 0.1366, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.2083389990401696, "learning_rate": 9.87335598531214e-07, "loss": 0.1626, "step": 1563 }, { "epoch": 0.1, "grad_norm": 0.9287886982072556, "learning_rate": 9.87312491677577e-07, "loss": 0.0379, "step": 1564 }, { "epoch": 0.1, "grad_norm": 0.4611163777873519, "learning_rate": 9.872893640342098e-07, "loss": 0.1681, "step": 1565 }, { "epoch": 0.1, "grad_norm": 0.6474301274319696, "learning_rate": 9.872662156020986e-07, "loss": 0.2599, "step": 1566 }, { "epoch": 0.1, "grad_norm": 0.41372820467170063, "learning_rate": 9.872430463822312e-07, "loss": 0.2448, "step": 1567 }, { "epoch": 0.1, "grad_norm": 0.5639025728086583, "learning_rate": 9.872198563755959e-07, "loss": 0.2386, "step": 1568 }, { "epoch": 0.1, "grad_norm": 0.3475531518259124, "learning_rate": 9.87196645583182e-07, "loss": 0.1789, "step": 1569 }, { "epoch": 0.1, "grad_norm": 0.6908444271896197, "learning_rate": 9.8717341400598e-07, "loss": 0.1217, "step": 1570 }, { "epoch": 0.1, "grad_norm": 0.5832040661187229, "learning_rate": 9.871501616449806e-07, "loss": 0.1626, "step": 1571 }, { "epoch": 0.1, "grad_norm": 0.3531856618380542, "learning_rate": 9.87126888501176e-07, "loss": 0.1372, "step": 1572 }, { "epoch": 0.1, "grad_norm": 0.48973224252772235, "learning_rate": 9.871035945755593e-07, "loss": 0.098, "step": 1573 }, { "epoch": 0.1, "grad_norm": 0.40065693580039274, "learning_rate": 9.87080279869124e-07, "loss": 0.0152, "step": 1574 }, { "epoch": 0.1, "grad_norm": 0.6889147943802033, "learning_rate": 9.87056944382865e-07, "loss": 0.3114, "step": 1575 }, { "epoch": 0.1, "grad_norm": 0.3608933592686679, "learning_rate": 9.870335881177773e-07, "loss": 0.2242, "step": 1576 }, { "epoch": 0.1, "grad_norm": 0.7705750279452553, "learning_rate": 9.870102110748577e-07, "loss": 0.4654, "step": 1577 }, { "epoch": 0.1, "grad_norm": 0.5027787671169268, "learning_rate": 9.869868132551036e-07, "loss": 0.0991, "step": 1578 }, { "epoch": 0.1, "grad_norm": 0.45869951418894883, "learning_rate": 9.86963394659513e-07, "loss": 0.1745, "step": 1579 }, { "epoch": 0.1, "grad_norm": 0.5005370133604695, "learning_rate": 9.869399552890852e-07, "loss": 0.1244, "step": 1580 }, { "epoch": 0.1, "grad_norm": 0.6014441781373475, "learning_rate": 9.8691649514482e-07, "loss": 0.522, "step": 1581 }, { "epoch": 0.1, "grad_norm": 0.4004666041309846, "learning_rate": 9.868930142277183e-07, "loss": 0.0131, "step": 1582 }, { "epoch": 0.1, "grad_norm": 0.503409687553413, "learning_rate": 9.868695125387817e-07, "loss": 0.2341, "step": 1583 }, { "epoch": 0.1, "grad_norm": 2.226301001193882, "learning_rate": 9.868459900790131e-07, "loss": 0.2083, "step": 1584 }, { "epoch": 0.1, "grad_norm": 0.36025836315339727, "learning_rate": 9.868224468494158e-07, "loss": 0.2862, "step": 1585 }, { "epoch": 0.1, "grad_norm": 0.6341257291602369, "learning_rate": 9.867988828509943e-07, "loss": 0.0529, "step": 1586 }, { "epoch": 0.1, "grad_norm": 0.6686417639942993, "learning_rate": 9.86775298084754e-07, "loss": 0.2086, "step": 1587 }, { "epoch": 0.1, "grad_norm": 0.15439815770014853, "learning_rate": 9.867516925517008e-07, "loss": 0.0469, "step": 1588 }, { "epoch": 0.1, "grad_norm": 0.6521696811939802, "learning_rate": 9.867280662528418e-07, "loss": 0.2327, "step": 1589 }, { "epoch": 0.1, "grad_norm": 0.4687022140046831, "learning_rate": 9.867044191891853e-07, "loss": 0.016, "step": 1590 }, { "epoch": 0.1, "grad_norm": 0.42445870053570794, "learning_rate": 9.866807513617396e-07, "loss": 0.2598, "step": 1591 }, { "epoch": 0.1, "grad_norm": 0.4178296998710727, "learning_rate": 9.86657062771515e-07, "loss": 0.3339, "step": 1592 }, { "epoch": 0.1, "grad_norm": 1.1190587607436973, "learning_rate": 9.866333534195214e-07, "loss": 0.0508, "step": 1593 }, { "epoch": 0.1, "grad_norm": 0.7492173303820396, "learning_rate": 9.86609623306771e-07, "loss": 0.0173, "step": 1594 }, { "epoch": 0.1, "grad_norm": 0.17268774248057586, "learning_rate": 9.86585872434276e-07, "loss": 0.0098, "step": 1595 }, { "epoch": 0.1, "grad_norm": 0.6882161608394176, "learning_rate": 9.865621008030492e-07, "loss": 0.4026, "step": 1596 }, { "epoch": 0.1, "grad_norm": 0.18914030733008877, "learning_rate": 9.865383084141051e-07, "loss": 0.0209, "step": 1597 }, { "epoch": 0.1, "grad_norm": 0.8754944883215988, "learning_rate": 9.865144952684588e-07, "loss": 0.064, "step": 1598 }, { "epoch": 0.1, "grad_norm": 0.2758435168021218, "learning_rate": 9.864906613671262e-07, "loss": 0.1013, "step": 1599 }, { "epoch": 0.1, "grad_norm": 0.6569159055960563, "learning_rate": 9.864668067111238e-07, "loss": 0.2068, "step": 1600 }, { "epoch": 0.1, "grad_norm": 0.4573223639888256, "learning_rate": 9.864429313014698e-07, "loss": 0.245, "step": 1601 }, { "epoch": 0.1, "grad_norm": 0.6549498989924214, "learning_rate": 9.86419035139182e-07, "loss": 0.0784, "step": 1602 }, { "epoch": 0.1, "grad_norm": 0.4307212536134745, "learning_rate": 9.863951182252808e-07, "loss": 0.0911, "step": 1603 }, { "epoch": 0.1, "grad_norm": 0.9246695738009061, "learning_rate": 9.863711805607858e-07, "loss": 0.1848, "step": 1604 }, { "epoch": 0.1, "grad_norm": 0.8584093011942867, "learning_rate": 9.863472221467188e-07, "loss": 0.2089, "step": 1605 }, { "epoch": 0.1, "grad_norm": 0.1959506485643055, "learning_rate": 9.863232429841013e-07, "loss": 0.0927, "step": 1606 }, { "epoch": 0.1, "grad_norm": 1.1312076550436183, "learning_rate": 9.862992430739569e-07, "loss": 0.2254, "step": 1607 }, { "epoch": 0.1, "grad_norm": 0.7194644325458003, "learning_rate": 9.862752224173089e-07, "loss": 0.1336, "step": 1608 }, { "epoch": 0.1, "grad_norm": 0.35806296749475824, "learning_rate": 9.862511810151827e-07, "loss": 0.091, "step": 1609 }, { "epoch": 0.1, "grad_norm": 0.3224249152615377, "learning_rate": 9.862271188686036e-07, "loss": 0.1694, "step": 1610 }, { "epoch": 0.1, "grad_norm": 0.35529476439744356, "learning_rate": 9.86203035978598e-07, "loss": 0.084, "step": 1611 }, { "epoch": 0.1, "grad_norm": 0.4637912150396062, "learning_rate": 9.861789323461936e-07, "loss": 0.1505, "step": 1612 }, { "epoch": 0.1, "grad_norm": 0.695547042266105, "learning_rate": 9.861548079724184e-07, "loss": 0.1855, "step": 1613 }, { "epoch": 0.1, "grad_norm": 0.9573012386923306, "learning_rate": 9.86130662858302e-07, "loss": 0.4129, "step": 1614 }, { "epoch": 0.1, "grad_norm": 0.8909234017172247, "learning_rate": 9.861064970048742e-07, "loss": 0.3612, "step": 1615 }, { "epoch": 0.1, "grad_norm": 0.5461979307132618, "learning_rate": 9.860823104131661e-07, "loss": 0.1456, "step": 1616 }, { "epoch": 0.1, "grad_norm": 1.450693533453506, "learning_rate": 9.860581030842094e-07, "loss": 0.3491, "step": 1617 }, { "epoch": 0.1, "grad_norm": 0.5700827078183561, "learning_rate": 9.86033875019037e-07, "loss": 0.236, "step": 1618 }, { "epoch": 0.1, "grad_norm": 0.6427023992944825, "learning_rate": 9.860096262186822e-07, "loss": 0.1428, "step": 1619 }, { "epoch": 0.1, "grad_norm": 0.5089999767887992, "learning_rate": 9.8598535668418e-07, "loss": 0.1872, "step": 1620 }, { "epoch": 0.1, "grad_norm": 0.22674102403414914, "learning_rate": 9.859610664165657e-07, "loss": 0.0153, "step": 1621 }, { "epoch": 0.1, "grad_norm": 0.6010049723526243, "learning_rate": 9.85936755416875e-07, "loss": 0.2255, "step": 1622 }, { "epoch": 0.1, "grad_norm": 0.46927568870917785, "learning_rate": 9.859124236861458e-07, "loss": 0.2207, "step": 1623 }, { "epoch": 0.1, "grad_norm": 0.6100554901334297, "learning_rate": 9.858880712254156e-07, "loss": 0.3904, "step": 1624 }, { "epoch": 0.1, "grad_norm": 0.5253195391980957, "learning_rate": 9.858636980357236e-07, "loss": 0.1064, "step": 1625 }, { "epoch": 0.1, "grad_norm": 0.3186766994435842, "learning_rate": 9.858393041181094e-07, "loss": 0.0091, "step": 1626 }, { "epoch": 0.1, "grad_norm": 0.49194077776033, "learning_rate": 9.85814889473614e-07, "loss": 0.1151, "step": 1627 }, { "epoch": 0.1, "grad_norm": 0.3014511666074654, "learning_rate": 9.857904541032788e-07, "loss": 0.0477, "step": 1628 }, { "epoch": 0.1, "grad_norm": 0.6800381451103563, "learning_rate": 9.857659980081462e-07, "loss": 0.2274, "step": 1629 }, { "epoch": 0.1, "grad_norm": 0.5048897009278337, "learning_rate": 9.857415211892597e-07, "loss": 0.2173, "step": 1630 }, { "epoch": 0.1, "grad_norm": 0.38878687723503236, "learning_rate": 9.857170236476634e-07, "loss": 0.1196, "step": 1631 }, { "epoch": 0.1, "grad_norm": 0.4921853470158305, "learning_rate": 9.856925053844024e-07, "loss": 0.2371, "step": 1632 }, { "epoch": 0.1, "grad_norm": 0.6741135439542971, "learning_rate": 9.856679664005227e-07, "loss": 0.2392, "step": 1633 }, { "epoch": 0.1, "grad_norm": 0.7293732326588783, "learning_rate": 9.856434066970713e-07, "loss": 0.4094, "step": 1634 }, { "epoch": 0.1, "grad_norm": 1.2996126678591897, "learning_rate": 9.85618826275096e-07, "loss": 0.3037, "step": 1635 }, { "epoch": 0.1, "grad_norm": 0.10446453277853311, "learning_rate": 9.855942251356452e-07, "loss": 0.0599, "step": 1636 }, { "epoch": 0.1, "grad_norm": 0.7036396263243663, "learning_rate": 9.855696032797687e-07, "loss": 0.1621, "step": 1637 }, { "epoch": 0.1, "grad_norm": 0.6439254075434799, "learning_rate": 9.855449607085168e-07, "loss": 0.146, "step": 1638 }, { "epoch": 0.1, "grad_norm": 1.0861699865315655, "learning_rate": 9.855202974229407e-07, "loss": 0.1874, "step": 1639 }, { "epoch": 0.1, "grad_norm": 0.7652036360756149, "learning_rate": 9.854956134240929e-07, "loss": 0.2367, "step": 1640 }, { "epoch": 0.1, "grad_norm": 0.491924900336481, "learning_rate": 9.85470908713026e-07, "loss": 0.2121, "step": 1641 }, { "epoch": 0.1, "grad_norm": 1.09698943161845, "learning_rate": 9.854461832907943e-07, "loss": 0.2639, "step": 1642 }, { "epoch": 0.1, "grad_norm": 0.3646403434772786, "learning_rate": 9.854214371584526e-07, "loss": 0.1764, "step": 1643 }, { "epoch": 0.1, "grad_norm": 0.5071519005871684, "learning_rate": 9.853966703170566e-07, "loss": 0.1177, "step": 1644 }, { "epoch": 0.1, "grad_norm": 0.6921879817661868, "learning_rate": 9.853718827676628e-07, "loss": 0.3415, "step": 1645 }, { "epoch": 0.1, "grad_norm": 1.1163835110493454, "learning_rate": 9.853470745113288e-07, "loss": 0.1166, "step": 1646 }, { "epoch": 0.11, "grad_norm": 0.25022652331553863, "learning_rate": 9.85322245549113e-07, "loss": 0.0072, "step": 1647 }, { "epoch": 0.11, "grad_norm": 0.6749871432923831, "learning_rate": 9.852973958820746e-07, "loss": 0.176, "step": 1648 }, { "epoch": 0.11, "grad_norm": 0.4975505186456713, "learning_rate": 9.852725255112734e-07, "loss": 0.1886, "step": 1649 }, { "epoch": 0.11, "grad_norm": 0.6232905693402038, "learning_rate": 9.85247634437771e-07, "loss": 0.3126, "step": 1650 }, { "epoch": 0.11, "grad_norm": 0.8002825993038455, "learning_rate": 9.852227226626292e-07, "loss": 0.1474, "step": 1651 }, { "epoch": 0.11, "grad_norm": 0.10381943547798866, "learning_rate": 9.851977901869105e-07, "loss": 0.0839, "step": 1652 }, { "epoch": 0.11, "grad_norm": 0.7543358346895345, "learning_rate": 9.851728370116786e-07, "loss": 0.3721, "step": 1653 }, { "epoch": 0.11, "grad_norm": 0.8141386475697114, "learning_rate": 9.851478631379982e-07, "loss": 0.1532, "step": 1654 }, { "epoch": 0.11, "grad_norm": 0.6566212458996052, "learning_rate": 9.851228685669347e-07, "loss": 0.2333, "step": 1655 }, { "epoch": 0.11, "grad_norm": 0.3501251624972066, "learning_rate": 9.850978532995545e-07, "loss": 0.1383, "step": 1656 }, { "epoch": 0.11, "grad_norm": 0.7267284790649643, "learning_rate": 9.850728173369246e-07, "loss": 0.228, "step": 1657 }, { "epoch": 0.11, "grad_norm": 0.581595390627329, "learning_rate": 9.850477606801132e-07, "loss": 0.2769, "step": 1658 }, { "epoch": 0.11, "grad_norm": 0.4579902043564122, "learning_rate": 9.850226833301892e-07, "loss": 0.2163, "step": 1659 }, { "epoch": 0.11, "grad_norm": 0.9122505674135036, "learning_rate": 9.849975852882226e-07, "loss": 0.057, "step": 1660 }, { "epoch": 0.11, "grad_norm": 0.3111394917274574, "learning_rate": 9.84972466555284e-07, "loss": 0.0714, "step": 1661 }, { "epoch": 0.11, "grad_norm": 0.8783409127064085, "learning_rate": 9.849473271324452e-07, "loss": 0.3154, "step": 1662 }, { "epoch": 0.11, "grad_norm": 0.8169232392745213, "learning_rate": 9.849221670207783e-07, "loss": 0.1238, "step": 1663 }, { "epoch": 0.11, "grad_norm": 0.34584369689229244, "learning_rate": 9.848969862213572e-07, "loss": 0.0439, "step": 1664 }, { "epoch": 0.11, "grad_norm": 0.20439358764503512, "learning_rate": 9.848717847352557e-07, "loss": 0.0118, "step": 1665 }, { "epoch": 0.11, "grad_norm": 0.8297774778868178, "learning_rate": 9.848465625635494e-07, "loss": 0.0591, "step": 1666 }, { "epoch": 0.11, "grad_norm": 0.21684237184432614, "learning_rate": 9.848213197073138e-07, "loss": 0.0772, "step": 1667 }, { "epoch": 0.11, "grad_norm": 0.5148735373998339, "learning_rate": 9.847960561676263e-07, "loss": 0.3261, "step": 1668 }, { "epoch": 0.11, "grad_norm": 0.6875334741208333, "learning_rate": 9.847707719455643e-07, "loss": 0.2342, "step": 1669 }, { "epoch": 0.11, "grad_norm": 0.21305224593370214, "learning_rate": 9.847454670422067e-07, "loss": 0.0965, "step": 1670 }, { "epoch": 0.11, "grad_norm": 0.30857334239297074, "learning_rate": 9.84720141458633e-07, "loss": 0.1509, "step": 1671 }, { "epoch": 0.11, "grad_norm": 0.5982979108595999, "learning_rate": 9.846947951959237e-07, "loss": 0.1248, "step": 1672 }, { "epoch": 0.11, "grad_norm": 0.3881707818725426, "learning_rate": 9.846694282551601e-07, "loss": 0.0306, "step": 1673 }, { "epoch": 0.11, "grad_norm": 0.7056799214476155, "learning_rate": 9.846440406374244e-07, "loss": 0.1658, "step": 1674 }, { "epoch": 0.11, "grad_norm": 0.8837410663483006, "learning_rate": 9.846186323437998e-07, "loss": 0.2022, "step": 1675 }, { "epoch": 0.11, "grad_norm": 0.8431493174441403, "learning_rate": 9.8459320337537e-07, "loss": 0.3112, "step": 1676 }, { "epoch": 0.11, "grad_norm": 0.24393374157755976, "learning_rate": 9.8456775373322e-07, "loss": 0.0037, "step": 1677 }, { "epoch": 0.11, "grad_norm": 0.5249565529368295, "learning_rate": 9.845422834184354e-07, "loss": 0.2402, "step": 1678 }, { "epoch": 0.11, "grad_norm": 0.5373256341212908, "learning_rate": 9.84516792432103e-07, "loss": 0.2174, "step": 1679 }, { "epoch": 0.11, "grad_norm": 0.5148799176302914, "learning_rate": 9.844912807753102e-07, "loss": 0.0178, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.2654821914220962, "learning_rate": 9.844657484491455e-07, "loss": 0.5374, "step": 1681 }, { "epoch": 0.11, "grad_norm": 0.34012022508386563, "learning_rate": 9.844401954546982e-07, "loss": 0.2204, "step": 1682 }, { "epoch": 0.11, "grad_norm": 0.6504776717129892, "learning_rate": 9.84414621793058e-07, "loss": 0.4022, "step": 1683 }, { "epoch": 0.11, "grad_norm": 0.5094296043064444, "learning_rate": 9.843890274653164e-07, "loss": 0.2673, "step": 1684 }, { "epoch": 0.11, "grad_norm": 0.9684770030115625, "learning_rate": 9.843634124725653e-07, "loss": 0.1952, "step": 1685 }, { "epoch": 0.11, "grad_norm": 0.6669948245222741, "learning_rate": 9.843377768158971e-07, "loss": 0.2342, "step": 1686 }, { "epoch": 0.11, "grad_norm": 0.4466624802946475, "learning_rate": 9.843121204964057e-07, "loss": 0.1335, "step": 1687 }, { "epoch": 0.11, "grad_norm": 0.60892533892139, "learning_rate": 9.842864435151859e-07, "loss": 0.3891, "step": 1688 }, { "epoch": 0.11, "grad_norm": 0.4968500382457546, "learning_rate": 9.842607458733325e-07, "loss": 0.2572, "step": 1689 }, { "epoch": 0.11, "grad_norm": 1.670389115941225, "learning_rate": 9.842350275719426e-07, "loss": 0.0334, "step": 1690 }, { "epoch": 0.11, "grad_norm": 0.6025367438324835, "learning_rate": 9.842092886121127e-07, "loss": 0.1054, "step": 1691 }, { "epoch": 0.11, "grad_norm": 0.4752594685453358, "learning_rate": 9.841835289949412e-07, "loss": 0.2078, "step": 1692 }, { "epoch": 0.11, "grad_norm": 0.7298000651694033, "learning_rate": 9.84157748721527e-07, "loss": 0.0499, "step": 1693 }, { "epoch": 0.11, "grad_norm": 0.44105453765342617, "learning_rate": 9.841319477929702e-07, "loss": 0.2132, "step": 1694 }, { "epoch": 0.11, "grad_norm": 0.5848802588162659, "learning_rate": 9.841061262103712e-07, "loss": 0.3941, "step": 1695 }, { "epoch": 0.11, "grad_norm": 0.5989033355167053, "learning_rate": 9.840802839748313e-07, "loss": 0.2946, "step": 1696 }, { "epoch": 0.11, "grad_norm": 0.4800551168081081, "learning_rate": 9.84054421087454e-07, "loss": 0.1525, "step": 1697 }, { "epoch": 0.11, "grad_norm": 0.17172942266235122, "learning_rate": 9.840285375493416e-07, "loss": 0.0979, "step": 1698 }, { "epoch": 0.11, "grad_norm": 0.6392176185366084, "learning_rate": 9.840026333615987e-07, "loss": 0.0169, "step": 1699 }, { "epoch": 0.11, "grad_norm": 1.846588000790528, "learning_rate": 9.839767085253307e-07, "loss": 0.2467, "step": 1700 }, { "epoch": 0.11, "grad_norm": 0.9009554247070751, "learning_rate": 9.839507630416436e-07, "loss": 0.1856, "step": 1701 }, { "epoch": 0.11, "grad_norm": 0.30164687180032423, "learning_rate": 9.839247969116437e-07, "loss": 0.2101, "step": 1702 }, { "epoch": 0.11, "grad_norm": 1.0101689422412143, "learning_rate": 9.838988101364394e-07, "loss": 0.0621, "step": 1703 }, { "epoch": 0.11, "grad_norm": 0.6279014376251323, "learning_rate": 9.838728027171388e-07, "loss": 0.3739, "step": 1704 }, { "epoch": 0.11, "grad_norm": 1.4724623998971567, "learning_rate": 9.83846774654852e-07, "loss": 0.3858, "step": 1705 }, { "epoch": 0.11, "grad_norm": 0.3424337059410447, "learning_rate": 9.83820725950689e-07, "loss": 0.343, "step": 1706 }, { "epoch": 0.11, "grad_norm": 0.2760635915213745, "learning_rate": 9.837946566057614e-07, "loss": 0.1629, "step": 1707 }, { "epoch": 0.11, "grad_norm": 0.7490635728472637, "learning_rate": 9.83768566621181e-07, "loss": 0.1155, "step": 1708 }, { "epoch": 0.11, "grad_norm": 0.8384511238522884, "learning_rate": 9.837424559980612e-07, "loss": 0.1987, "step": 1709 }, { "epoch": 0.11, "grad_norm": 0.8215913083693003, "learning_rate": 9.837163247375157e-07, "loss": 0.2006, "step": 1710 }, { "epoch": 0.11, "grad_norm": 0.5060243417944514, "learning_rate": 9.836901728406594e-07, "loss": 0.3762, "step": 1711 }, { "epoch": 0.11, "grad_norm": 0.5476501914212019, "learning_rate": 9.83664000308608e-07, "loss": 0.2034, "step": 1712 }, { "epoch": 0.11, "grad_norm": 0.44823886681977815, "learning_rate": 9.83637807142478e-07, "loss": 0.2989, "step": 1713 }, { "epoch": 0.11, "grad_norm": 0.5106181915582283, "learning_rate": 9.83611593343387e-07, "loss": 0.2934, "step": 1714 }, { "epoch": 0.11, "grad_norm": 0.282226581431579, "learning_rate": 9.835853589124531e-07, "loss": 0.2696, "step": 1715 }, { "epoch": 0.11, "grad_norm": 0.6971792264132003, "learning_rate": 9.83559103850796e-07, "loss": 0.2182, "step": 1716 }, { "epoch": 0.11, "grad_norm": 0.18208110199973887, "learning_rate": 9.835328281595351e-07, "loss": 0.1607, "step": 1717 }, { "epoch": 0.11, "grad_norm": 0.7076367669794615, "learning_rate": 9.83506531839792e-07, "loss": 0.3098, "step": 1718 }, { "epoch": 0.11, "grad_norm": 0.7060300322733915, "learning_rate": 9.834802148926882e-07, "loss": 0.1326, "step": 1719 }, { "epoch": 0.11, "grad_norm": 0.5726378716837955, "learning_rate": 9.834538773193463e-07, "loss": 0.2372, "step": 1720 }, { "epoch": 0.11, "grad_norm": 0.09794177964706838, "learning_rate": 9.834275191208902e-07, "loss": 0.004, "step": 1721 }, { "epoch": 0.11, "grad_norm": 0.33448780003817236, "learning_rate": 9.834011402984445e-07, "loss": 0.119, "step": 1722 }, { "epoch": 0.11, "grad_norm": 0.570240963744505, "learning_rate": 9.833747408531344e-07, "loss": 0.4368, "step": 1723 }, { "epoch": 0.11, "grad_norm": 0.553949900799787, "learning_rate": 9.833483207860859e-07, "loss": 0.0991, "step": 1724 }, { "epoch": 0.11, "grad_norm": 0.29333690283732444, "learning_rate": 9.833218800984266e-07, "loss": 0.0875, "step": 1725 }, { "epoch": 0.11, "grad_norm": 0.3069001815477144, "learning_rate": 9.832954187912843e-07, "loss": 0.0407, "step": 1726 }, { "epoch": 0.11, "grad_norm": 0.5057713466762443, "learning_rate": 9.832689368657879e-07, "loss": 0.2316, "step": 1727 }, { "epoch": 0.11, "grad_norm": 0.4303615533563246, "learning_rate": 9.83242434323067e-07, "loss": 0.0915, "step": 1728 }, { "epoch": 0.11, "grad_norm": 0.6236747153961416, "learning_rate": 9.832159111642526e-07, "loss": 0.0566, "step": 1729 }, { "epoch": 0.11, "grad_norm": 0.3078982598280512, "learning_rate": 9.831893673904759e-07, "loss": 0.1209, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.2862679414751008, "learning_rate": 9.831628030028696e-07, "loss": 0.2588, "step": 1731 }, { "epoch": 0.11, "grad_norm": 0.3677271815970271, "learning_rate": 9.831362180025666e-07, "loss": 0.3474, "step": 1732 }, { "epoch": 0.11, "grad_norm": 0.6741049769726314, "learning_rate": 9.831096123907015e-07, "loss": 0.2151, "step": 1733 }, { "epoch": 0.11, "grad_norm": 0.6127332879081245, "learning_rate": 9.83082986168409e-07, "loss": 0.2339, "step": 1734 }, { "epoch": 0.11, "grad_norm": 0.31925479786507616, "learning_rate": 9.830563393368255e-07, "loss": 0.0889, "step": 1735 }, { "epoch": 0.11, "grad_norm": 0.48704935384966197, "learning_rate": 9.830296718970872e-07, "loss": 0.1856, "step": 1736 }, { "epoch": 0.11, "grad_norm": 0.7564513724340943, "learning_rate": 9.830029838503322e-07, "loss": 0.1297, "step": 1737 }, { "epoch": 0.11, "grad_norm": 0.8743928928257639, "learning_rate": 9.829762751976991e-07, "loss": 0.3113, "step": 1738 }, { "epoch": 0.11, "grad_norm": 0.4996887780505269, "learning_rate": 9.82949545940327e-07, "loss": 0.2354, "step": 1739 }, { "epoch": 0.11, "grad_norm": 0.7579165889952131, "learning_rate": 9.829227960793564e-07, "loss": 0.196, "step": 1740 }, { "epoch": 0.11, "grad_norm": 0.2615387161682434, "learning_rate": 9.828960256159287e-07, "loss": 0.0289, "step": 1741 }, { "epoch": 0.11, "grad_norm": 0.3813425408339454, "learning_rate": 9.828692345511857e-07, "loss": 0.1092, "step": 1742 }, { "epoch": 0.11, "grad_norm": 0.27618223710505885, "learning_rate": 9.828424228862703e-07, "loss": 0.0345, "step": 1743 }, { "epoch": 0.11, "grad_norm": 0.5707560816531704, "learning_rate": 9.828155906223267e-07, "loss": 0.0679, "step": 1744 }, { "epoch": 0.11, "grad_norm": 0.8166632332306311, "learning_rate": 9.827887377604995e-07, "loss": 0.2064, "step": 1745 }, { "epoch": 0.11, "grad_norm": 0.13638838624686567, "learning_rate": 9.827618643019339e-07, "loss": 0.081, "step": 1746 }, { "epoch": 0.11, "grad_norm": 0.10007512364566283, "learning_rate": 9.82734970247777e-07, "loss": 0.0394, "step": 1747 }, { "epoch": 0.11, "grad_norm": 0.6577255730064722, "learning_rate": 9.827080555991759e-07, "loss": 0.4635, "step": 1748 }, { "epoch": 0.11, "grad_norm": 0.30082004733304346, "learning_rate": 9.826811203572785e-07, "loss": 0.051, "step": 1749 }, { "epoch": 0.11, "grad_norm": 1.970628457968785, "learning_rate": 9.826541645232344e-07, "loss": 0.1083, "step": 1750 }, { "epoch": 0.11, "grad_norm": 0.45907943999897577, "learning_rate": 9.826271880981934e-07, "loss": 0.2844, "step": 1751 }, { "epoch": 0.11, "grad_norm": 0.34774585567832594, "learning_rate": 9.826001910833062e-07, "loss": 0.4047, "step": 1752 }, { "epoch": 0.11, "grad_norm": 0.5476944428879885, "learning_rate": 9.825731734797246e-07, "loss": 0.2125, "step": 1753 }, { "epoch": 0.11, "grad_norm": 0.5065585280997207, "learning_rate": 9.825461352886016e-07, "loss": 0.3552, "step": 1754 }, { "epoch": 0.11, "grad_norm": 0.5620406168997668, "learning_rate": 9.825190765110904e-07, "loss": 0.1863, "step": 1755 }, { "epoch": 0.11, "grad_norm": 0.8806592318224842, "learning_rate": 9.824919971483451e-07, "loss": 0.3026, "step": 1756 }, { "epoch": 0.11, "grad_norm": 0.647794205861562, "learning_rate": 9.824648972015218e-07, "loss": 0.1532, "step": 1757 }, { "epoch": 0.11, "grad_norm": 0.3841639771848569, "learning_rate": 9.824377766717758e-07, "loss": 0.1178, "step": 1758 }, { "epoch": 0.11, "grad_norm": 0.47163025371174877, "learning_rate": 9.824106355602643e-07, "loss": 0.0146, "step": 1759 }, { "epoch": 0.11, "grad_norm": 0.917001845279771, "learning_rate": 9.823834738681454e-07, "loss": 0.1631, "step": 1760 }, { "epoch": 0.11, "grad_norm": 0.3793247641315772, "learning_rate": 9.823562915965779e-07, "loss": 0.265, "step": 1761 }, { "epoch": 0.11, "grad_norm": 0.9563626167521188, "learning_rate": 9.823290887467213e-07, "loss": 0.3483, "step": 1762 }, { "epoch": 0.11, "grad_norm": 0.42631993786465006, "learning_rate": 9.82301865319736e-07, "loss": 0.1072, "step": 1763 }, { "epoch": 0.11, "grad_norm": 0.4259209625898786, "learning_rate": 9.82274621316784e-07, "loss": 0.1468, "step": 1764 }, { "epoch": 0.11, "grad_norm": 0.4871455260003286, "learning_rate": 9.822473567390269e-07, "loss": 0.4063, "step": 1765 }, { "epoch": 0.11, "grad_norm": 0.5626342707483665, "learning_rate": 9.82220071587628e-07, "loss": 0.2594, "step": 1766 }, { "epoch": 0.11, "grad_norm": 0.3353387516102091, "learning_rate": 9.821927658637517e-07, "loss": 0.0104, "step": 1767 }, { "epoch": 0.11, "grad_norm": 0.7804416552276265, "learning_rate": 9.821654395685626e-07, "loss": 0.1929, "step": 1768 }, { "epoch": 0.11, "grad_norm": 0.23512946681285746, "learning_rate": 9.821380927032264e-07, "loss": 0.1229, "step": 1769 }, { "epoch": 0.11, "grad_norm": 0.5555530016103668, "learning_rate": 9.821107252689102e-07, "loss": 0.2054, "step": 1770 }, { "epoch": 0.11, "grad_norm": 0.5443962688924795, "learning_rate": 9.820833372667812e-07, "loss": 0.1442, "step": 1771 }, { "epoch": 0.11, "grad_norm": 0.39619915615185125, "learning_rate": 9.82055928698008e-07, "loss": 0.2412, "step": 1772 }, { "epoch": 0.11, "grad_norm": 0.3305805628162596, "learning_rate": 9.820284995637595e-07, "loss": 0.2055, "step": 1773 }, { "epoch": 0.11, "grad_norm": 0.6046842976387699, "learning_rate": 9.820010498652064e-07, "loss": 0.4141, "step": 1774 }, { "epoch": 0.11, "grad_norm": 0.4031768210777634, "learning_rate": 9.819735796035197e-07, "loss": 0.1269, "step": 1775 }, { "epoch": 0.11, "grad_norm": 0.6229167909879683, "learning_rate": 9.819460887798713e-07, "loss": 0.1951, "step": 1776 }, { "epoch": 0.11, "grad_norm": 0.40434643232921913, "learning_rate": 9.819185773954335e-07, "loss": 0.1349, "step": 1777 }, { "epoch": 0.11, "grad_norm": 0.433914946149375, "learning_rate": 9.818910454513808e-07, "loss": 0.2466, "step": 1778 }, { "epoch": 0.11, "grad_norm": 0.5308097070441203, "learning_rate": 9.818634929488872e-07, "loss": 0.1349, "step": 1779 }, { "epoch": 0.11, "grad_norm": 0.3651917806208901, "learning_rate": 9.818359198891284e-07, "loss": 0.083, "step": 1780 }, { "epoch": 0.11, "grad_norm": 0.4163734535240564, "learning_rate": 9.818083262732806e-07, "loss": 0.2195, "step": 1781 }, { "epoch": 0.11, "grad_norm": 0.16223345234529754, "learning_rate": 9.81780712102521e-07, "loss": 0.2932, "step": 1782 }, { "epoch": 0.11, "grad_norm": 0.6132952793487771, "learning_rate": 9.817530773780276e-07, "loss": 0.1361, "step": 1783 }, { "epoch": 0.11, "grad_norm": 0.45207397666049487, "learning_rate": 9.817254221009798e-07, "loss": 0.448, "step": 1784 }, { "epoch": 0.11, "grad_norm": 0.34689781529461494, "learning_rate": 9.81697746272557e-07, "loss": 0.1557, "step": 1785 }, { "epoch": 0.11, "grad_norm": 0.5331420713596847, "learning_rate": 9.816700498939399e-07, "loss": 0.0829, "step": 1786 }, { "epoch": 0.11, "grad_norm": 0.49071365729277694, "learning_rate": 9.8164233296631e-07, "loss": 0.124, "step": 1787 }, { "epoch": 0.11, "grad_norm": 0.596526687363129, "learning_rate": 9.816145954908503e-07, "loss": 0.154, "step": 1788 }, { "epoch": 0.11, "grad_norm": 0.39534540517531486, "learning_rate": 9.815868374687436e-07, "loss": 0.0784, "step": 1789 }, { "epoch": 0.11, "grad_norm": 0.42546356363112214, "learning_rate": 9.815590589011746e-07, "loss": 0.1693, "step": 1790 }, { "epoch": 0.11, "grad_norm": 0.31141271989092173, "learning_rate": 9.815312597893278e-07, "loss": 0.0798, "step": 1791 }, { "epoch": 0.11, "grad_norm": 0.4670777628172246, "learning_rate": 9.815034401343896e-07, "loss": 0.1561, "step": 1792 }, { "epoch": 0.11, "grad_norm": 0.6577163324054129, "learning_rate": 9.814755999375466e-07, "loss": 0.2182, "step": 1793 }, { "epoch": 0.11, "grad_norm": 0.624311653075215, "learning_rate": 9.814477391999867e-07, "loss": 0.3685, "step": 1794 }, { "epoch": 0.11, "grad_norm": 0.4001669059106104, "learning_rate": 9.814198579228985e-07, "loss": 0.2468, "step": 1795 }, { "epoch": 0.11, "grad_norm": 0.6506927806465743, "learning_rate": 9.81391956107471e-07, "loss": 0.2173, "step": 1796 }, { "epoch": 0.11, "grad_norm": 0.6632104392930209, "learning_rate": 9.813640337548954e-07, "loss": 0.213, "step": 1797 }, { "epoch": 0.11, "grad_norm": 0.5259042393102924, "learning_rate": 9.813360908663621e-07, "loss": 0.1574, "step": 1798 }, { "epoch": 0.11, "grad_norm": 0.6201969188977602, "learning_rate": 9.813081274430636e-07, "loss": 0.1324, "step": 1799 }, { "epoch": 0.11, "grad_norm": 1.1144665506804912, "learning_rate": 9.81280143486193e-07, "loss": 0.1556, "step": 1800 }, { "epoch": 0.11, "grad_norm": 0.4889335289184677, "learning_rate": 9.81252138996944e-07, "loss": 0.1724, "step": 1801 }, { "epoch": 0.11, "grad_norm": 0.18328905865515413, "learning_rate": 9.812241139765112e-07, "loss": 0.0105, "step": 1802 }, { "epoch": 0.11, "grad_norm": 1.1410813212247752, "learning_rate": 9.811960684260906e-07, "loss": 0.1477, "step": 1803 }, { "epoch": 0.12, "grad_norm": 0.24938386629140047, "learning_rate": 9.81168002346878e-07, "loss": 0.0134, "step": 1804 }, { "epoch": 0.12, "grad_norm": 0.5242348551646572, "learning_rate": 9.811399157400712e-07, "loss": 0.315, "step": 1805 }, { "epoch": 0.12, "grad_norm": 0.19186140756534406, "learning_rate": 9.811118086068687e-07, "loss": 0.0754, "step": 1806 }, { "epoch": 0.12, "grad_norm": 0.3366882759389052, "learning_rate": 9.810836809484689e-07, "loss": 0.1253, "step": 1807 }, { "epoch": 0.12, "grad_norm": 0.31379195803906235, "learning_rate": 9.810555327660723e-07, "loss": 0.3143, "step": 1808 }, { "epoch": 0.12, "grad_norm": 0.798839770675499, "learning_rate": 9.810273640608798e-07, "loss": 0.151, "step": 1809 }, { "epoch": 0.12, "grad_norm": 0.5005194608080867, "learning_rate": 9.809991748340926e-07, "loss": 0.1507, "step": 1810 }, { "epoch": 0.12, "grad_norm": 0.32667542686374074, "learning_rate": 9.80970965086914e-07, "loss": 0.1517, "step": 1811 }, { "epoch": 0.12, "grad_norm": 1.2176304917892244, "learning_rate": 9.80942734820547e-07, "loss": 0.0326, "step": 1812 }, { "epoch": 0.12, "grad_norm": 0.7991156169284748, "learning_rate": 9.809144840361963e-07, "loss": 0.3402, "step": 1813 }, { "epoch": 0.12, "grad_norm": 0.7622023192574338, "learning_rate": 9.808862127350668e-07, "loss": 0.4426, "step": 1814 }, { "epoch": 0.12, "grad_norm": 0.4018101359851353, "learning_rate": 9.808579209183648e-07, "loss": 0.088, "step": 1815 }, { "epoch": 0.12, "grad_norm": 0.42093073883317284, "learning_rate": 9.808296085872971e-07, "loss": 0.158, "step": 1816 }, { "epoch": 0.12, "grad_norm": 0.31664305537217885, "learning_rate": 9.80801275743072e-07, "loss": 0.1224, "step": 1817 }, { "epoch": 0.12, "grad_norm": 0.34672074617306375, "learning_rate": 9.807729223868978e-07, "loss": 0.1798, "step": 1818 }, { "epoch": 0.12, "grad_norm": 0.3266763970395985, "learning_rate": 9.807445485199842e-07, "loss": 0.0133, "step": 1819 }, { "epoch": 0.12, "grad_norm": 0.7941496523634355, "learning_rate": 9.807161541435417e-07, "loss": 0.4453, "step": 1820 }, { "epoch": 0.12, "grad_norm": 0.5296019648078376, "learning_rate": 9.80687739258782e-07, "loss": 0.205, "step": 1821 }, { "epoch": 0.12, "grad_norm": 0.6639361373691804, "learning_rate": 9.806593038669167e-07, "loss": 0.3553, "step": 1822 }, { "epoch": 0.12, "grad_norm": 0.20200816961047624, "learning_rate": 9.806308479691594e-07, "loss": 0.1893, "step": 1823 }, { "epoch": 0.12, "grad_norm": 0.4058296945525068, "learning_rate": 9.80602371566724e-07, "loss": 0.3192, "step": 1824 }, { "epoch": 0.12, "grad_norm": 0.31605906656818067, "learning_rate": 9.805738746608251e-07, "loss": 0.1119, "step": 1825 }, { "epoch": 0.12, "grad_norm": 0.22778975778087518, "learning_rate": 9.805453572526787e-07, "loss": 0.0946, "step": 1826 }, { "epoch": 0.12, "grad_norm": 0.4639766727675487, "learning_rate": 9.805168193435014e-07, "loss": 0.1883, "step": 1827 }, { "epoch": 0.12, "grad_norm": 0.6688244519223763, "learning_rate": 9.804882609345106e-07, "loss": 0.0828, "step": 1828 }, { "epoch": 0.12, "grad_norm": 0.4956690050397604, "learning_rate": 9.804596820269249e-07, "loss": 0.1604, "step": 1829 }, { "epoch": 0.12, "grad_norm": 0.441950423118919, "learning_rate": 9.80431082621963e-07, "loss": 0.2277, "step": 1830 }, { "epoch": 0.12, "grad_norm": 0.4657042253417374, "learning_rate": 9.804024627208456e-07, "loss": 0.1976, "step": 1831 }, { "epoch": 0.12, "grad_norm": 0.4808679457578324, "learning_rate": 9.80373822324793e-07, "loss": 0.0222, "step": 1832 }, { "epoch": 0.12, "grad_norm": 0.6024860333388573, "learning_rate": 9.803451614350278e-07, "loss": 0.1906, "step": 1833 }, { "epoch": 0.12, "grad_norm": 0.37904837274505715, "learning_rate": 9.803164800527723e-07, "loss": 0.2039, "step": 1834 }, { "epoch": 0.12, "grad_norm": 0.3253697459300525, "learning_rate": 9.802877781792503e-07, "loss": 0.1406, "step": 1835 }, { "epoch": 0.12, "grad_norm": 0.3089051198714158, "learning_rate": 9.802590558156861e-07, "loss": 0.1815, "step": 1836 }, { "epoch": 0.12, "grad_norm": 1.7715997059014499, "learning_rate": 9.802303129633052e-07, "loss": 0.3338, "step": 1837 }, { "epoch": 0.12, "grad_norm": 0.9373057696770362, "learning_rate": 9.802015496233336e-07, "loss": 0.3327, "step": 1838 }, { "epoch": 0.12, "grad_norm": 0.39383632011750946, "learning_rate": 9.801727657969987e-07, "loss": 0.1205, "step": 1839 }, { "epoch": 0.12, "grad_norm": 0.4442621155258366, "learning_rate": 9.801439614855285e-07, "loss": 0.086, "step": 1840 }, { "epoch": 0.12, "grad_norm": 0.36569334529732345, "learning_rate": 9.801151366901514e-07, "loss": 0.1447, "step": 1841 }, { "epoch": 0.12, "grad_norm": 0.6557356390290756, "learning_rate": 9.800862914120975e-07, "loss": 0.3614, "step": 1842 }, { "epoch": 0.12, "grad_norm": 0.2791723331236794, "learning_rate": 9.800574256525974e-07, "loss": 0.0677, "step": 1843 }, { "epoch": 0.12, "grad_norm": 0.4177283348243584, "learning_rate": 9.800285394128824e-07, "loss": 0.1132, "step": 1844 }, { "epoch": 0.12, "grad_norm": 0.5267707960046631, "learning_rate": 9.799996326941849e-07, "loss": 0.1599, "step": 1845 }, { "epoch": 0.12, "grad_norm": 1.349914681365973, "learning_rate": 9.799707054977382e-07, "loss": 0.3591, "step": 1846 }, { "epoch": 0.12, "grad_norm": 0.22452490998338034, "learning_rate": 9.799417578247764e-07, "loss": 0.1453, "step": 1847 }, { "epoch": 0.12, "grad_norm": 0.6393751850329222, "learning_rate": 9.799127896765344e-07, "loss": 0.2642, "step": 1848 }, { "epoch": 0.12, "grad_norm": 0.27700682909654584, "learning_rate": 9.798838010542482e-07, "loss": 0.1942, "step": 1849 }, { "epoch": 0.12, "grad_norm": 0.8556120715818918, "learning_rate": 9.79854791959154e-07, "loss": 0.1704, "step": 1850 }, { "epoch": 0.12, "grad_norm": 0.285872714361692, "learning_rate": 9.798257623924899e-07, "loss": 0.0514, "step": 1851 }, { "epoch": 0.12, "grad_norm": 0.4804723649355166, "learning_rate": 9.797967123554943e-07, "loss": 0.2293, "step": 1852 }, { "epoch": 0.12, "grad_norm": 0.8830097789972284, "learning_rate": 9.797676418494063e-07, "loss": 0.1981, "step": 1853 }, { "epoch": 0.12, "grad_norm": 0.39143972961572615, "learning_rate": 9.797385508754664e-07, "loss": 0.1841, "step": 1854 }, { "epoch": 0.12, "grad_norm": 0.5713067759027882, "learning_rate": 9.797094394349152e-07, "loss": 0.4405, "step": 1855 }, { "epoch": 0.12, "grad_norm": 0.1961522534586305, "learning_rate": 9.796803075289953e-07, "loss": 0.1621, "step": 1856 }, { "epoch": 0.12, "grad_norm": 0.39600435921384797, "learning_rate": 9.79651155158949e-07, "loss": 0.1969, "step": 1857 }, { "epoch": 0.12, "grad_norm": 0.28276205103716984, "learning_rate": 9.796219823260203e-07, "loss": 0.1092, "step": 1858 }, { "epoch": 0.12, "grad_norm": 0.18106232221884677, "learning_rate": 9.795927890314536e-07, "loss": 0.1051, "step": 1859 }, { "epoch": 0.12, "grad_norm": 0.49736151366559794, "learning_rate": 9.795635752764947e-07, "loss": 0.2484, "step": 1860 }, { "epoch": 0.12, "grad_norm": 0.6512118286875864, "learning_rate": 9.795343410623893e-07, "loss": 0.3593, "step": 1861 }, { "epoch": 0.12, "grad_norm": 0.6993336560272034, "learning_rate": 9.795050863903851e-07, "loss": 0.1588, "step": 1862 }, { "epoch": 0.12, "grad_norm": 0.7299234256592234, "learning_rate": 9.7947581126173e-07, "loss": 0.43, "step": 1863 }, { "epoch": 0.12, "grad_norm": 0.8753700912961243, "learning_rate": 9.794465156776727e-07, "loss": 0.2569, "step": 1864 }, { "epoch": 0.12, "grad_norm": 0.7229558812877958, "learning_rate": 9.794171996394636e-07, "loss": 0.4534, "step": 1865 }, { "epoch": 0.12, "grad_norm": 0.3767583159896383, "learning_rate": 9.793878631483528e-07, "loss": 0.2364, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.0080189319702062, "learning_rate": 9.79358506205592e-07, "loss": 0.2474, "step": 1867 }, { "epoch": 0.12, "grad_norm": 0.32366932783785746, "learning_rate": 9.793291288124339e-07, "loss": 0.2354, "step": 1868 }, { "epoch": 0.12, "grad_norm": 0.41794399068829174, "learning_rate": 9.792997309701314e-07, "loss": 0.2055, "step": 1869 }, { "epoch": 0.12, "grad_norm": 0.8669801849769293, "learning_rate": 9.79270312679939e-07, "loss": 0.3206, "step": 1870 }, { "epoch": 0.12, "grad_norm": 1.1628062491234834, "learning_rate": 9.792408739431115e-07, "loss": 0.1916, "step": 1871 }, { "epoch": 0.12, "grad_norm": 0.22763899811533927, "learning_rate": 9.79211414760905e-07, "loss": 0.0115, "step": 1872 }, { "epoch": 0.12, "grad_norm": 0.8178078855246522, "learning_rate": 9.79181935134576e-07, "loss": 0.2622, "step": 1873 }, { "epoch": 0.12, "grad_norm": 0.5902380529764373, "learning_rate": 9.791524350653825e-07, "loss": 0.3348, "step": 1874 }, { "epoch": 0.12, "grad_norm": 0.3034829566721241, "learning_rate": 9.791229145545832e-07, "loss": 0.163, "step": 1875 }, { "epoch": 0.12, "grad_norm": 0.6596373558294532, "learning_rate": 9.790933736034367e-07, "loss": 0.2436, "step": 1876 }, { "epoch": 0.12, "grad_norm": 1.2110218039827778, "learning_rate": 9.790638122132042e-07, "loss": 0.3134, "step": 1877 }, { "epoch": 0.12, "grad_norm": 0.6319633247353574, "learning_rate": 9.790342303851462e-07, "loss": 0.6044, "step": 1878 }, { "epoch": 0.12, "grad_norm": 0.678909030370214, "learning_rate": 9.79004628120525e-07, "loss": 0.3747, "step": 1879 }, { "epoch": 0.12, "grad_norm": 0.31815956324995, "learning_rate": 9.789750054206035e-07, "loss": 0.1646, "step": 1880 }, { "epoch": 0.12, "grad_norm": 0.8783055343424494, "learning_rate": 9.789453622866453e-07, "loss": 0.1787, "step": 1881 }, { "epoch": 0.12, "grad_norm": 0.20925826497237446, "learning_rate": 9.789156987199154e-07, "loss": 0.0833, "step": 1882 }, { "epoch": 0.12, "grad_norm": 0.5741813855041612, "learning_rate": 9.788860147216788e-07, "loss": 0.3773, "step": 1883 }, { "epoch": 0.12, "grad_norm": 0.9213884749639869, "learning_rate": 9.788563102932021e-07, "loss": 0.2624, "step": 1884 }, { "epoch": 0.12, "grad_norm": 0.4642293176264571, "learning_rate": 9.788265854357527e-07, "loss": 0.2079, "step": 1885 }, { "epoch": 0.12, "grad_norm": 0.7433205464784334, "learning_rate": 9.787968401505987e-07, "loss": 0.2458, "step": 1886 }, { "epoch": 0.12, "grad_norm": 1.3769902608715658, "learning_rate": 9.787670744390088e-07, "loss": 0.4972, "step": 1887 }, { "epoch": 0.12, "grad_norm": 0.2847382639762413, "learning_rate": 9.787372883022531e-07, "loss": 0.2299, "step": 1888 }, { "epoch": 0.12, "grad_norm": 0.4660825408621928, "learning_rate": 9.787074817416023e-07, "loss": 0.233, "step": 1889 }, { "epoch": 0.12, "grad_norm": 0.8257495471447167, "learning_rate": 9.786776547583281e-07, "loss": 0.2907, "step": 1890 }, { "epoch": 0.12, "grad_norm": 0.4428450110946486, "learning_rate": 9.786478073537029e-07, "loss": 0.1748, "step": 1891 }, { "epoch": 0.12, "grad_norm": 0.9232552991844433, "learning_rate": 9.78617939529e-07, "loss": 0.0626, "step": 1892 }, { "epoch": 0.12, "grad_norm": 1.3262947492739359, "learning_rate": 9.785880512854935e-07, "loss": 0.1915, "step": 1893 }, { "epoch": 0.12, "grad_norm": 0.42104398880951505, "learning_rate": 9.785581426244588e-07, "loss": 0.0546, "step": 1894 }, { "epoch": 0.12, "grad_norm": 0.4887469321417179, "learning_rate": 9.785282135471718e-07, "loss": 0.0486, "step": 1895 }, { "epoch": 0.12, "grad_norm": 0.7994460322423196, "learning_rate": 9.78498264054909e-07, "loss": 0.1747, "step": 1896 }, { "epoch": 0.12, "grad_norm": 0.43210748121813813, "learning_rate": 9.784682941489484e-07, "loss": 0.4854, "step": 1897 }, { "epoch": 0.12, "grad_norm": 0.4616134580844671, "learning_rate": 9.784383038305687e-07, "loss": 0.2818, "step": 1898 }, { "epoch": 0.12, "grad_norm": 0.4322884457982743, "learning_rate": 9.78408293101049e-07, "loss": 0.2974, "step": 1899 }, { "epoch": 0.12, "grad_norm": 0.569075289211957, "learning_rate": 9.7837826196167e-07, "loss": 0.0971, "step": 1900 }, { "epoch": 0.12, "grad_norm": 0.5319529424582372, "learning_rate": 9.783482104137127e-07, "loss": 0.1652, "step": 1901 }, { "epoch": 0.12, "grad_norm": 0.41210358606979236, "learning_rate": 9.783181384584589e-07, "loss": 0.0598, "step": 1902 }, { "epoch": 0.12, "grad_norm": 0.48852613679855594, "learning_rate": 9.782880460971918e-07, "loss": 0.2678, "step": 1903 }, { "epoch": 0.12, "grad_norm": 0.6702462521981983, "learning_rate": 9.782579333311954e-07, "loss": 0.1188, "step": 1904 }, { "epoch": 0.12, "grad_norm": 0.5641643685730272, "learning_rate": 9.78227800161754e-07, "loss": 0.039, "step": 1905 }, { "epoch": 0.12, "grad_norm": 0.6205175895376341, "learning_rate": 9.781976465901532e-07, "loss": 0.288, "step": 1906 }, { "epoch": 0.12, "grad_norm": 0.7442542022079933, "learning_rate": 9.781674726176797e-07, "loss": 0.1192, "step": 1907 }, { "epoch": 0.12, "grad_norm": 0.4395414020640999, "learning_rate": 9.781372782456204e-07, "loss": 0.2025, "step": 1908 }, { "epoch": 0.12, "grad_norm": 0.2712850025784868, "learning_rate": 9.781070634752637e-07, "loss": 0.2414, "step": 1909 }, { "epoch": 0.12, "grad_norm": 0.459579176644862, "learning_rate": 9.780768283078986e-07, "loss": 0.1618, "step": 1910 }, { "epoch": 0.12, "grad_norm": 0.37994028366968474, "learning_rate": 9.780465727448149e-07, "loss": 0.1511, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.332826870108756, "learning_rate": 9.780162967873034e-07, "loss": 0.1548, "step": 1912 }, { "epoch": 0.12, "grad_norm": 0.7704599714078209, "learning_rate": 9.779860004366559e-07, "loss": 0.4119, "step": 1913 }, { "epoch": 0.12, "grad_norm": 0.6648900381526484, "learning_rate": 9.779556836941645e-07, "loss": 0.2656, "step": 1914 }, { "epoch": 0.12, "grad_norm": 0.4220809764577071, "learning_rate": 9.77925346561123e-07, "loss": 0.2177, "step": 1915 }, { "epoch": 0.12, "grad_norm": 0.6930813390204651, "learning_rate": 9.778949890388254e-07, "loss": 0.1372, "step": 1916 }, { "epoch": 0.12, "grad_norm": 0.5281738830083382, "learning_rate": 9.778646111285667e-07, "loss": 0.1023, "step": 1917 }, { "epoch": 0.12, "grad_norm": 0.14482613321771215, "learning_rate": 9.778342128316432e-07, "loss": 0.0347, "step": 1918 }, { "epoch": 0.12, "grad_norm": 0.3616251398336747, "learning_rate": 9.778037941493518e-07, "loss": 0.1497, "step": 1919 }, { "epoch": 0.12, "grad_norm": 2.3712443218442867, "learning_rate": 9.7777335508299e-07, "loss": 0.4352, "step": 1920 }, { "epoch": 0.12, "grad_norm": 0.6481102215820237, "learning_rate": 9.777428956338562e-07, "loss": 0.2932, "step": 1921 }, { "epoch": 0.12, "grad_norm": 0.6974521070601287, "learning_rate": 9.777124158032502e-07, "loss": 0.244, "step": 1922 }, { "epoch": 0.12, "grad_norm": 0.8303239942182551, "learning_rate": 9.776819155924724e-07, "loss": 0.1638, "step": 1923 }, { "epoch": 0.12, "grad_norm": 0.3098626885906418, "learning_rate": 9.776513950028235e-07, "loss": 0.2514, "step": 1924 }, { "epoch": 0.12, "grad_norm": 0.6372159472920058, "learning_rate": 9.77620854035606e-07, "loss": 0.3313, "step": 1925 }, { "epoch": 0.12, "grad_norm": 0.41749912640256737, "learning_rate": 9.775902926921228e-07, "loss": 0.1376, "step": 1926 }, { "epoch": 0.12, "grad_norm": 0.7259535137665333, "learning_rate": 9.775597109736774e-07, "loss": 0.1202, "step": 1927 }, { "epoch": 0.12, "grad_norm": 0.7724715954337328, "learning_rate": 9.77529108881575e-07, "loss": 0.3796, "step": 1928 }, { "epoch": 0.12, "grad_norm": 0.36036089208488853, "learning_rate": 9.77498486417121e-07, "loss": 0.3772, "step": 1929 }, { "epoch": 0.12, "grad_norm": 0.3278680317156285, "learning_rate": 9.774678435816212e-07, "loss": 0.3091, "step": 1930 }, { "epoch": 0.12, "grad_norm": 0.8595227782650138, "learning_rate": 9.774371803763837e-07, "loss": 0.2001, "step": 1931 }, { "epoch": 0.12, "grad_norm": 0.4584120445869711, "learning_rate": 9.774064968027162e-07, "loss": 0.0665, "step": 1932 }, { "epoch": 0.12, "grad_norm": 0.3755435083092351, "learning_rate": 9.77375792861928e-07, "loss": 0.0687, "step": 1933 }, { "epoch": 0.12, "grad_norm": 1.2137723759695154, "learning_rate": 9.773450685553285e-07, "loss": 0.1917, "step": 1934 }, { "epoch": 0.12, "grad_norm": 0.9278592061036838, "learning_rate": 9.773143238842293e-07, "loss": 0.1079, "step": 1935 }, { "epoch": 0.12, "grad_norm": 0.8340899317370496, "learning_rate": 9.77283558849941e-07, "loss": 0.1827, "step": 1936 }, { "epoch": 0.12, "grad_norm": 0.3445475578282629, "learning_rate": 9.77252773453777e-07, "loss": 0.1261, "step": 1937 }, { "epoch": 0.12, "grad_norm": 0.4587220861021094, "learning_rate": 9.7722196769705e-07, "loss": 0.2183, "step": 1938 }, { "epoch": 0.12, "grad_norm": 0.5146792374457008, "learning_rate": 9.771911415810746e-07, "loss": 0.4111, "step": 1939 }, { "epoch": 0.12, "grad_norm": 0.5514524419459433, "learning_rate": 9.77160295107166e-07, "loss": 0.1075, "step": 1940 }, { "epoch": 0.12, "grad_norm": 1.2040956696373666, "learning_rate": 9.771294282766399e-07, "loss": 0.1792, "step": 1941 }, { "epoch": 0.12, "grad_norm": 1.3809124391047485, "learning_rate": 9.77098541090813e-07, "loss": 0.155, "step": 1942 }, { "epoch": 0.12, "grad_norm": 0.6677320796447382, "learning_rate": 9.770676335510036e-07, "loss": 0.0639, "step": 1943 }, { "epoch": 0.12, "grad_norm": 0.6887584945356219, "learning_rate": 9.770367056585298e-07, "loss": 0.0538, "step": 1944 }, { "epoch": 0.12, "grad_norm": 1.1499177396189217, "learning_rate": 9.77005757414711e-07, "loss": 0.042, "step": 1945 }, { "epoch": 0.12, "grad_norm": 0.508774266944597, "learning_rate": 9.769747888208678e-07, "loss": 0.1313, "step": 1946 }, { "epoch": 0.12, "grad_norm": 0.3653623836731963, "learning_rate": 9.769437998783214e-07, "loss": 0.2212, "step": 1947 }, { "epoch": 0.12, "grad_norm": 0.2563068904160823, "learning_rate": 9.769127905883937e-07, "loss": 0.0084, "step": 1948 }, { "epoch": 0.12, "grad_norm": 0.8803497788997626, "learning_rate": 9.768817609524075e-07, "loss": 0.2744, "step": 1949 }, { "epoch": 0.12, "grad_norm": 0.4595795259897732, "learning_rate": 9.768507109716868e-07, "loss": 0.1341, "step": 1950 }, { "epoch": 0.12, "grad_norm": 0.6557764103477656, "learning_rate": 9.768196406475563e-07, "loss": 0.3394, "step": 1951 }, { "epoch": 0.12, "grad_norm": 0.6006556222801455, "learning_rate": 9.76788549981341e-07, "loss": 0.0615, "step": 1952 }, { "epoch": 0.12, "grad_norm": 0.2228324990992887, "learning_rate": 9.767574389743681e-07, "loss": 0.1342, "step": 1953 }, { "epoch": 0.12, "grad_norm": 0.7179146374553077, "learning_rate": 9.767263076279643e-07, "loss": 0.1997, "step": 1954 }, { "epoch": 0.12, "grad_norm": 0.9693337983360613, "learning_rate": 9.76695155943458e-07, "loss": 0.112, "step": 1955 }, { "epoch": 0.12, "grad_norm": 0.667522800453616, "learning_rate": 9.76663983922178e-07, "loss": 0.1727, "step": 1956 }, { "epoch": 0.12, "grad_norm": 0.5507093670357065, "learning_rate": 9.766327915654541e-07, "loss": 0.1278, "step": 1957 }, { "epoch": 0.12, "grad_norm": 0.6860862337370913, "learning_rate": 9.766015788746173e-07, "loss": 0.3761, "step": 1958 }, { "epoch": 0.12, "grad_norm": 0.40121908942416445, "learning_rate": 9.76570345850999e-07, "loss": 0.1011, "step": 1959 }, { "epoch": 0.12, "grad_norm": 0.411413968367731, "learning_rate": 9.765390924959317e-07, "loss": 0.368, "step": 1960 }, { "epoch": 0.13, "grad_norm": 0.2786316838571697, "learning_rate": 9.765078188107487e-07, "loss": 0.1497, "step": 1961 }, { "epoch": 0.13, "grad_norm": 1.059609911889508, "learning_rate": 9.764765247967843e-07, "loss": 0.4543, "step": 1962 }, { "epoch": 0.13, "grad_norm": 1.1661323197525828, "learning_rate": 9.764452104553736e-07, "loss": 0.2113, "step": 1963 }, { "epoch": 0.13, "grad_norm": 0.534199010122491, "learning_rate": 9.764138757878524e-07, "loss": 0.1222, "step": 1964 }, { "epoch": 0.13, "grad_norm": 0.24197334755360245, "learning_rate": 9.763825207955577e-07, "loss": 0.027, "step": 1965 }, { "epoch": 0.13, "grad_norm": 0.46587338170337794, "learning_rate": 9.763511454798266e-07, "loss": 0.478, "step": 1966 }, { "epoch": 0.13, "grad_norm": 0.32889061524493196, "learning_rate": 9.763197498419984e-07, "loss": 0.1475, "step": 1967 }, { "epoch": 0.13, "grad_norm": 0.42973472107561617, "learning_rate": 9.76288333883412e-07, "loss": 0.1379, "step": 1968 }, { "epoch": 0.13, "grad_norm": 0.34327729390832834, "learning_rate": 9.76256897605408e-07, "loss": 0.1892, "step": 1969 }, { "epoch": 0.13, "grad_norm": 0.8733412381690852, "learning_rate": 9.76225441009327e-07, "loss": 0.1151, "step": 1970 }, { "epoch": 0.13, "grad_norm": 0.6990838412039561, "learning_rate": 9.761939640965118e-07, "loss": 0.2961, "step": 1971 }, { "epoch": 0.13, "grad_norm": 1.103030258430781, "learning_rate": 9.761624668683044e-07, "loss": 0.184, "step": 1972 }, { "epoch": 0.13, "grad_norm": 0.47537482875737563, "learning_rate": 9.761309493260492e-07, "loss": 0.2184, "step": 1973 }, { "epoch": 0.13, "grad_norm": 0.8109631954245425, "learning_rate": 9.760994114710904e-07, "loss": 0.1935, "step": 1974 }, { "epoch": 0.13, "grad_norm": 0.7905566985070605, "learning_rate": 9.76067853304774e-07, "loss": 0.1877, "step": 1975 }, { "epoch": 0.13, "grad_norm": 0.31555085957082785, "learning_rate": 9.760362748284456e-07, "loss": 0.1956, "step": 1976 }, { "epoch": 0.13, "grad_norm": 2.145092587492478, "learning_rate": 9.760046760434529e-07, "loss": 0.485, "step": 1977 }, { "epoch": 0.13, "grad_norm": 0.7701666012878458, "learning_rate": 9.759730569511438e-07, "loss": 0.3231, "step": 1978 }, { "epoch": 0.13, "grad_norm": 0.7721610642616265, "learning_rate": 9.759414175528671e-07, "loss": 0.189, "step": 1979 }, { "epoch": 0.13, "grad_norm": 1.0110930986742899, "learning_rate": 9.75909757849973e-07, "loss": 0.1744, "step": 1980 }, { "epoch": 0.13, "grad_norm": 0.6197373928767853, "learning_rate": 9.758780778438117e-07, "loss": 0.0371, "step": 1981 }, { "epoch": 0.13, "grad_norm": 0.9999683879013466, "learning_rate": 9.758463775357352e-07, "loss": 0.133, "step": 1982 }, { "epoch": 0.13, "grad_norm": 0.5769867949119858, "learning_rate": 9.758146569270956e-07, "loss": 0.0779, "step": 1983 }, { "epoch": 0.13, "grad_norm": 0.5880562635464869, "learning_rate": 9.757829160192462e-07, "loss": 0.2503, "step": 1984 }, { "epoch": 0.13, "grad_norm": 0.3987440069337375, "learning_rate": 9.757511548135411e-07, "loss": 0.1259, "step": 1985 }, { "epoch": 0.13, "grad_norm": 0.4624906612642204, "learning_rate": 9.757193733113355e-07, "loss": 0.141, "step": 1986 }, { "epoch": 0.13, "grad_norm": 0.45979987576149456, "learning_rate": 9.75687571513985e-07, "loss": 0.0776, "step": 1987 }, { "epoch": 0.13, "grad_norm": 0.5147345228133103, "learning_rate": 9.756557494228464e-07, "loss": 0.1651, "step": 1988 }, { "epoch": 0.13, "grad_norm": 0.6293209370364992, "learning_rate": 9.756239070392776e-07, "loss": 0.1009, "step": 1989 }, { "epoch": 0.13, "grad_norm": 0.5651852931621907, "learning_rate": 9.755920443646364e-07, "loss": 0.3339, "step": 1990 }, { "epoch": 0.13, "grad_norm": 0.9320924431987775, "learning_rate": 9.755601614002828e-07, "loss": 0.0483, "step": 1991 }, { "epoch": 0.13, "grad_norm": 0.8842636608675487, "learning_rate": 9.755282581475767e-07, "loss": 0.2291, "step": 1992 }, { "epoch": 0.13, "grad_norm": 0.43139096736901833, "learning_rate": 9.754963346078792e-07, "loss": 0.1092, "step": 1993 }, { "epoch": 0.13, "grad_norm": 0.6004594186436094, "learning_rate": 9.754643907825522e-07, "loss": 0.2012, "step": 1994 }, { "epoch": 0.13, "grad_norm": 0.7253309011594199, "learning_rate": 9.754324266729583e-07, "loss": 0.5189, "step": 1995 }, { "epoch": 0.13, "grad_norm": 0.26812043890197435, "learning_rate": 9.754004422804616e-07, "loss": 0.1146, "step": 1996 }, { "epoch": 0.13, "grad_norm": 0.6805844637181703, "learning_rate": 9.753684376064262e-07, "loss": 0.317, "step": 1997 }, { "epoch": 0.13, "grad_norm": 0.9486966659565942, "learning_rate": 9.753364126522177e-07, "loss": 0.4689, "step": 1998 }, { "epoch": 0.13, "grad_norm": 0.5718265247519756, "learning_rate": 9.753043674192022e-07, "loss": 0.2353, "step": 1999 }, { "epoch": 0.13, "grad_norm": 0.8737459338518712, "learning_rate": 9.75272301908747e-07, "loss": 0.2111, "step": 2000 }, { "epoch": 0.13, "grad_norm": 0.39156741362398484, "learning_rate": 9.7524021612222e-07, "loss": 0.201, "step": 2001 }, { "epoch": 0.13, "grad_norm": 0.7781864891728763, "learning_rate": 9.7520811006099e-07, "loss": 0.0336, "step": 2002 }, { "epoch": 0.13, "grad_norm": 0.5638928565719611, "learning_rate": 9.751759837264267e-07, "loss": 0.2304, "step": 2003 }, { "epoch": 0.13, "grad_norm": 0.7209768030488014, "learning_rate": 9.751438371199006e-07, "loss": 0.3343, "step": 2004 }, { "epoch": 0.13, "grad_norm": 0.519758722621288, "learning_rate": 9.751116702427833e-07, "loss": 0.332, "step": 2005 }, { "epoch": 0.13, "grad_norm": 0.7667403647507963, "learning_rate": 9.750794830964472e-07, "loss": 0.205, "step": 2006 }, { "epoch": 0.13, "grad_norm": 0.4957310847385013, "learning_rate": 9.750472756822652e-07, "loss": 0.2274, "step": 2007 }, { "epoch": 0.13, "grad_norm": 0.29985426137213894, "learning_rate": 9.750150480016114e-07, "loss": 0.1888, "step": 2008 }, { "epoch": 0.13, "grad_norm": 0.4795539366511488, "learning_rate": 9.74982800055861e-07, "loss": 0.2566, "step": 2009 }, { "epoch": 0.13, "grad_norm": 0.6106050578516774, "learning_rate": 9.749505318463894e-07, "loss": 0.2971, "step": 2010 }, { "epoch": 0.13, "grad_norm": 0.3604109131880696, "learning_rate": 9.749182433745732e-07, "loss": 0.1832, "step": 2011 }, { "epoch": 0.13, "grad_norm": 0.7558736410748353, "learning_rate": 9.7488593464179e-07, "loss": 0.3393, "step": 2012 }, { "epoch": 0.13, "grad_norm": 0.515464229619306, "learning_rate": 9.748536056494186e-07, "loss": 0.238, "step": 2013 }, { "epoch": 0.13, "grad_norm": 0.7572105402428722, "learning_rate": 9.748212563988375e-07, "loss": 0.1486, "step": 2014 }, { "epoch": 0.13, "grad_norm": 0.6228214472009372, "learning_rate": 9.74788886891427e-07, "loss": 0.1377, "step": 2015 }, { "epoch": 0.13, "grad_norm": 1.5848550671694532, "learning_rate": 9.747564971285684e-07, "loss": 0.2804, "step": 2016 }, { "epoch": 0.13, "grad_norm": 0.6466063998668526, "learning_rate": 9.747240871116432e-07, "loss": 0.2071, "step": 2017 }, { "epoch": 0.13, "grad_norm": 0.6349211533366779, "learning_rate": 9.74691656842034e-07, "loss": 0.1981, "step": 2018 }, { "epoch": 0.13, "grad_norm": 0.3721453325045769, "learning_rate": 9.746592063211246e-07, "loss": 0.2082, "step": 2019 }, { "epoch": 0.13, "grad_norm": 0.15608617273693193, "learning_rate": 9.746267355502991e-07, "loss": 0.0133, "step": 2020 }, { "epoch": 0.13, "grad_norm": 0.6938914285732772, "learning_rate": 9.74594244530943e-07, "loss": 0.1737, "step": 2021 }, { "epoch": 0.13, "grad_norm": 0.3990814093962295, "learning_rate": 9.745617332644424e-07, "loss": 0.2083, "step": 2022 }, { "epoch": 0.13, "grad_norm": 0.3651754782509643, "learning_rate": 9.745292017521842e-07, "loss": 0.2143, "step": 2023 }, { "epoch": 0.13, "grad_norm": 0.4697368479888514, "learning_rate": 9.744966499955565e-07, "loss": 0.2256, "step": 2024 }, { "epoch": 0.13, "grad_norm": 0.43175384708877673, "learning_rate": 9.744640779959477e-07, "loss": 0.3387, "step": 2025 }, { "epoch": 0.13, "grad_norm": 0.47758884710700433, "learning_rate": 9.744314857547476e-07, "loss": 0.1533, "step": 2026 }, { "epoch": 0.13, "grad_norm": 0.9890250844300368, "learning_rate": 9.743988732733466e-07, "loss": 0.2262, "step": 2027 }, { "epoch": 0.13, "grad_norm": 0.2470302772005145, "learning_rate": 9.743662405531359e-07, "loss": 0.0653, "step": 2028 }, { "epoch": 0.13, "grad_norm": 1.0875355284708483, "learning_rate": 9.74333587595508e-07, "loss": 0.2291, "step": 2029 }, { "epoch": 0.13, "grad_norm": 0.753026424832712, "learning_rate": 9.743009144018556e-07, "loss": 0.3202, "step": 2030 }, { "epoch": 0.13, "grad_norm": 0.7075134086327713, "learning_rate": 9.742682209735727e-07, "loss": 0.1801, "step": 2031 }, { "epoch": 0.13, "grad_norm": 0.4536577664712018, "learning_rate": 9.742355073120542e-07, "loss": 0.2078, "step": 2032 }, { "epoch": 0.13, "grad_norm": 0.8891863029578857, "learning_rate": 9.742027734186955e-07, "loss": 0.3736, "step": 2033 }, { "epoch": 0.13, "grad_norm": 0.38843643792649585, "learning_rate": 9.741700192948934e-07, "loss": 0.1576, "step": 2034 }, { "epoch": 0.13, "grad_norm": 0.4305876125032848, "learning_rate": 9.741372449420448e-07, "loss": 0.3007, "step": 2035 }, { "epoch": 0.13, "grad_norm": 0.8447349856615494, "learning_rate": 9.741044503615484e-07, "loss": 0.2363, "step": 2036 }, { "epoch": 0.13, "grad_norm": 1.066234023029225, "learning_rate": 9.740716355548028e-07, "loss": 0.3245, "step": 2037 }, { "epoch": 0.13, "grad_norm": 0.49150715679904, "learning_rate": 9.740388005232085e-07, "loss": 0.0474, "step": 2038 }, { "epoch": 0.13, "grad_norm": 0.41928467630095934, "learning_rate": 9.74005945268166e-07, "loss": 0.1455, "step": 2039 }, { "epoch": 0.13, "grad_norm": 0.42009305044203954, "learning_rate": 9.73973069791077e-07, "loss": 0.0243, "step": 2040 }, { "epoch": 0.13, "grad_norm": 0.39566816871536853, "learning_rate": 9.73940174093344e-07, "loss": 0.1708, "step": 2041 }, { "epoch": 0.13, "grad_norm": 0.5963582248153219, "learning_rate": 9.739072581763704e-07, "loss": 0.04, "step": 2042 }, { "epoch": 0.13, "grad_norm": 1.0059675591387007, "learning_rate": 9.738743220415607e-07, "loss": 0.2017, "step": 2043 }, { "epoch": 0.13, "grad_norm": 0.46461982600549445, "learning_rate": 9.738413656903197e-07, "loss": 0.2319, "step": 2044 }, { "epoch": 0.13, "grad_norm": 1.8383189109340576, "learning_rate": 9.738083891240534e-07, "loss": 0.2545, "step": 2045 }, { "epoch": 0.13, "grad_norm": 0.5723536110817462, "learning_rate": 9.737753923441687e-07, "loss": 0.2316, "step": 2046 }, { "epoch": 0.13, "grad_norm": 0.5474228215810266, "learning_rate": 9.737423753520734e-07, "loss": 0.1426, "step": 2047 }, { "epoch": 0.13, "grad_norm": 0.48171784417932656, "learning_rate": 9.737093381491761e-07, "loss": 0.2324, "step": 2048 }, { "epoch": 0.13, "grad_norm": 0.3155652055117869, "learning_rate": 9.73676280736886e-07, "loss": 0.2159, "step": 2049 }, { "epoch": 0.13, "grad_norm": 0.8692302547729482, "learning_rate": 9.736432031166138e-07, "loss": 0.2041, "step": 2050 }, { "epoch": 0.13, "grad_norm": 0.4674316385906084, "learning_rate": 9.736101052897704e-07, "loss": 0.3144, "step": 2051 }, { "epoch": 0.13, "grad_norm": 0.7351636609704858, "learning_rate": 9.735769872577677e-07, "loss": 0.0644, "step": 2052 }, { "epoch": 0.13, "grad_norm": 0.4570046401607926, "learning_rate": 9.735438490220186e-07, "loss": 0.022, "step": 2053 }, { "epoch": 0.13, "grad_norm": 0.8104489250680039, "learning_rate": 9.735106905839372e-07, "loss": 0.2011, "step": 2054 }, { "epoch": 0.13, "grad_norm": 0.4759229744029291, "learning_rate": 9.734775119449378e-07, "loss": 0.2255, "step": 2055 }, { "epoch": 0.13, "grad_norm": 0.594143220537935, "learning_rate": 9.73444313106436e-07, "loss": 0.1352, "step": 2056 }, { "epoch": 0.13, "grad_norm": 0.6073301301142678, "learning_rate": 9.73411094069848e-07, "loss": 0.1248, "step": 2057 }, { "epoch": 0.13, "grad_norm": 0.5823100815634501, "learning_rate": 9.73377854836591e-07, "loss": 0.2456, "step": 2058 }, { "epoch": 0.13, "grad_norm": 0.7276842922878295, "learning_rate": 9.73344595408083e-07, "loss": 0.2906, "step": 2059 }, { "epoch": 0.13, "grad_norm": 0.4718660706642817, "learning_rate": 9.733113157857433e-07, "loss": 0.1925, "step": 2060 }, { "epoch": 0.13, "grad_norm": 0.3890053909921082, "learning_rate": 9.732780159709912e-07, "loss": 0.0667, "step": 2061 }, { "epoch": 0.13, "grad_norm": 0.5305031062772791, "learning_rate": 9.732446959652475e-07, "loss": 0.1155, "step": 2062 }, { "epoch": 0.13, "grad_norm": 0.7979789520933933, "learning_rate": 9.732113557699337e-07, "loss": 0.0327, "step": 2063 }, { "epoch": 0.13, "grad_norm": 0.34268174794172396, "learning_rate": 9.731779953864723e-07, "loss": 0.0952, "step": 2064 }, { "epoch": 0.13, "grad_norm": 0.6587714619738644, "learning_rate": 9.731446148162866e-07, "loss": 0.1926, "step": 2065 }, { "epoch": 0.13, "grad_norm": 0.5532658298962123, "learning_rate": 9.731112140608003e-07, "loss": 0.19, "step": 2066 }, { "epoch": 0.13, "grad_norm": 1.2003756135881918, "learning_rate": 9.730777931214383e-07, "loss": 0.4008, "step": 2067 }, { "epoch": 0.13, "grad_norm": 0.6556766157080656, "learning_rate": 9.730443519996269e-07, "loss": 0.2006, "step": 2068 }, { "epoch": 0.13, "grad_norm": 0.899811363077273, "learning_rate": 9.730108906967923e-07, "loss": 0.2034, "step": 2069 }, { "epoch": 0.13, "grad_norm": 1.1664057054531014, "learning_rate": 9.729774092143626e-07, "loss": 0.1783, "step": 2070 }, { "epoch": 0.13, "grad_norm": 0.18792340268741953, "learning_rate": 9.729439075537655e-07, "loss": 0.099, "step": 2071 }, { "epoch": 0.13, "grad_norm": 0.3933597350424269, "learning_rate": 9.729103857164308e-07, "loss": 0.1759, "step": 2072 }, { "epoch": 0.13, "grad_norm": 0.8924769614688599, "learning_rate": 9.728768437037882e-07, "loss": 0.1931, "step": 2073 }, { "epoch": 0.13, "grad_norm": 0.7779664685121285, "learning_rate": 9.728432815172688e-07, "loss": 0.1541, "step": 2074 }, { "epoch": 0.13, "grad_norm": 0.5050616246557152, "learning_rate": 9.728096991583047e-07, "loss": 0.2209, "step": 2075 }, { "epoch": 0.13, "grad_norm": 0.9552024925810386, "learning_rate": 9.727760966283283e-07, "loss": 0.1533, "step": 2076 }, { "epoch": 0.13, "grad_norm": 1.2126531806060135, "learning_rate": 9.727424739287731e-07, "loss": 0.2265, "step": 2077 }, { "epoch": 0.13, "grad_norm": 0.2747233731372835, "learning_rate": 9.727088310610738e-07, "loss": 0.1232, "step": 2078 }, { "epoch": 0.13, "grad_norm": 0.6592036867883576, "learning_rate": 9.726751680266652e-07, "loss": 0.3436, "step": 2079 }, { "epoch": 0.13, "grad_norm": 0.6027860660602337, "learning_rate": 9.72641484826984e-07, "loss": 0.2032, "step": 2080 }, { "epoch": 0.13, "grad_norm": 0.7120638297149764, "learning_rate": 9.72607781463467e-07, "loss": 0.39, "step": 2081 }, { "epoch": 0.13, "grad_norm": 0.7869806889949933, "learning_rate": 9.725740579375516e-07, "loss": 0.1947, "step": 2082 }, { "epoch": 0.13, "grad_norm": 0.7442396837728845, "learning_rate": 9.725403142506772e-07, "loss": 0.0312, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.2172920833957057, "learning_rate": 9.725065504042832e-07, "loss": 0.2183, "step": 2084 }, { "epoch": 0.13, "grad_norm": 0.2403537132885519, "learning_rate": 9.724727663998096e-07, "loss": 0.0849, "step": 2085 }, { "epoch": 0.13, "grad_norm": 0.4990650843509188, "learning_rate": 9.724389622386982e-07, "loss": 0.19, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.4999932947105477, "learning_rate": 9.724051379223908e-07, "loss": 0.4414, "step": 2087 }, { "epoch": 0.13, "grad_norm": 0.9431875913326456, "learning_rate": 9.723712934523306e-07, "loss": 0.2161, "step": 2088 }, { "epoch": 0.13, "grad_norm": 0.9023665974170813, "learning_rate": 9.723374288299614e-07, "loss": 0.3656, "step": 2089 }, { "epoch": 0.13, "grad_norm": 0.21688367093736713, "learning_rate": 9.72303544056728e-07, "loss": 0.1726, "step": 2090 }, { "epoch": 0.13, "grad_norm": 0.5682566962497361, "learning_rate": 9.72269639134076e-07, "loss": 0.2856, "step": 2091 }, { "epoch": 0.13, "grad_norm": 0.24899213121243702, "learning_rate": 9.722357140634518e-07, "loss": 0.0443, "step": 2092 }, { "epoch": 0.13, "grad_norm": 0.8775025550605597, "learning_rate": 9.722017688463026e-07, "loss": 0.2971, "step": 2093 }, { "epoch": 0.13, "grad_norm": 0.4040510763057725, "learning_rate": 9.72167803484077e-07, "loss": 0.1474, "step": 2094 }, { "epoch": 0.13, "grad_norm": 0.5417357806702155, "learning_rate": 9.721338179782235e-07, "loss": 0.1652, "step": 2095 }, { "epoch": 0.13, "grad_norm": 0.38910553878469045, "learning_rate": 9.720998123301922e-07, "loss": 0.1546, "step": 2096 }, { "epoch": 0.13, "grad_norm": 0.7782408360767257, "learning_rate": 9.720657865414338e-07, "loss": 0.1887, "step": 2097 }, { "epoch": 0.13, "grad_norm": 0.8654013443872984, "learning_rate": 9.720317406134002e-07, "loss": 0.1594, "step": 2098 }, { "epoch": 0.13, "grad_norm": 0.6049097490226051, "learning_rate": 9.719976745475435e-07, "loss": 0.1257, "step": 2099 }, { "epoch": 0.13, "grad_norm": 1.2215042007525334, "learning_rate": 9.719635883453174e-07, "loss": 0.2492, "step": 2100 }, { "epoch": 0.13, "grad_norm": 0.6352060577546463, "learning_rate": 9.719294820081755e-07, "loss": 0.1563, "step": 2101 }, { "epoch": 0.13, "grad_norm": 0.627771964044561, "learning_rate": 9.718953555375734e-07, "loss": 0.1785, "step": 2102 }, { "epoch": 0.13, "grad_norm": 0.9944669068364482, "learning_rate": 9.718612089349669e-07, "loss": 0.2305, "step": 2103 }, { "epoch": 0.13, "grad_norm": 0.5532890803846539, "learning_rate": 9.718270422018124e-07, "loss": 0.1413, "step": 2104 }, { "epoch": 0.13, "grad_norm": 0.6236204924539288, "learning_rate": 9.71792855339568e-07, "loss": 0.0515, "step": 2105 }, { "epoch": 0.13, "grad_norm": 0.7052950318639188, "learning_rate": 9.71758648349692e-07, "loss": 0.203, "step": 2106 }, { "epoch": 0.13, "grad_norm": 1.42809166385761, "learning_rate": 9.717244212336436e-07, "loss": 0.0337, "step": 2107 }, { "epoch": 0.13, "grad_norm": 0.41174206684281756, "learning_rate": 9.716901739928831e-07, "loss": 0.1834, "step": 2108 }, { "epoch": 0.13, "grad_norm": 0.665161076998366, "learning_rate": 9.716559066288714e-07, "loss": 0.2682, "step": 2109 }, { "epoch": 0.13, "grad_norm": 0.4224887983642203, "learning_rate": 9.716216191430708e-07, "loss": 0.1781, "step": 2110 }, { "epoch": 0.13, "grad_norm": 0.2939570486585365, "learning_rate": 9.715873115369439e-07, "loss": 0.1703, "step": 2111 }, { "epoch": 0.13, "grad_norm": 0.6171708726170932, "learning_rate": 9.71552983811954e-07, "loss": 0.1676, "step": 2112 }, { "epoch": 0.13, "grad_norm": 0.8375808528722852, "learning_rate": 9.71518635969566e-07, "loss": 0.1593, "step": 2113 }, { "epoch": 0.13, "grad_norm": 0.5639128985805464, "learning_rate": 9.714842680112455e-07, "loss": 0.3167, "step": 2114 }, { "epoch": 0.13, "grad_norm": 0.550662219443129, "learning_rate": 9.714498799384578e-07, "loss": 0.2902, "step": 2115 }, { "epoch": 0.13, "grad_norm": 0.5102697864545089, "learning_rate": 9.714154717526708e-07, "loss": 0.2088, "step": 2116 }, { "epoch": 0.14, "grad_norm": 0.5303789548006846, "learning_rate": 9.713810434553519e-07, "loss": 0.1205, "step": 2117 }, { "epoch": 0.14, "grad_norm": 0.42365031034017475, "learning_rate": 9.713465950479703e-07, "loss": 0.1049, "step": 2118 }, { "epoch": 0.14, "grad_norm": 0.31375022282377896, "learning_rate": 9.713121265319952e-07, "loss": 0.2105, "step": 2119 }, { "epoch": 0.14, "grad_norm": 0.44697664741525117, "learning_rate": 9.712776379088975e-07, "loss": 0.2007, "step": 2120 }, { "epoch": 0.14, "grad_norm": 0.63302274232392, "learning_rate": 9.712431291801482e-07, "loss": 0.2584, "step": 2121 }, { "epoch": 0.14, "grad_norm": 0.9660387719003406, "learning_rate": 9.7120860034722e-07, "loss": 0.1291, "step": 2122 }, { "epoch": 0.14, "grad_norm": 1.706858797654844, "learning_rate": 9.711740514115853e-07, "loss": 0.1969, "step": 2123 }, { "epoch": 0.14, "grad_norm": 0.4814523309403148, "learning_rate": 9.711394823747185e-07, "loss": 0.1819, "step": 2124 }, { "epoch": 0.14, "grad_norm": 0.36670555841369007, "learning_rate": 9.711048932380944e-07, "loss": 0.1673, "step": 2125 }, { "epoch": 0.14, "grad_norm": 0.6323553144994762, "learning_rate": 9.710702840031885e-07, "loss": 0.4136, "step": 2126 }, { "epoch": 0.14, "grad_norm": 0.5761880647757363, "learning_rate": 9.710356546714772e-07, "loss": 0.1918, "step": 2127 }, { "epoch": 0.14, "grad_norm": 0.49994957241813537, "learning_rate": 9.71001005244438e-07, "loss": 0.1708, "step": 2128 }, { "epoch": 0.14, "grad_norm": 0.6593229239558963, "learning_rate": 9.709663357235492e-07, "loss": 0.1929, "step": 2129 }, { "epoch": 0.14, "grad_norm": 0.608489529045219, "learning_rate": 9.709316461102897e-07, "loss": 0.125, "step": 2130 }, { "epoch": 0.14, "grad_norm": 1.468802224090361, "learning_rate": 9.708969364061394e-07, "loss": 0.2931, "step": 2131 }, { "epoch": 0.14, "grad_norm": 0.49492412495848126, "learning_rate": 9.708622066125793e-07, "loss": 0.2214, "step": 2132 }, { "epoch": 0.14, "grad_norm": 0.18891886191960253, "learning_rate": 9.708274567310908e-07, "loss": 0.0844, "step": 2133 }, { "epoch": 0.14, "grad_norm": 0.7454011611673332, "learning_rate": 9.707926867631566e-07, "loss": 0.211, "step": 2134 }, { "epoch": 0.14, "grad_norm": 0.6777257527568694, "learning_rate": 9.707578967102598e-07, "loss": 0.2128, "step": 2135 }, { "epoch": 0.14, "grad_norm": 0.7101565817484278, "learning_rate": 9.70723086573885e-07, "loss": 0.0073, "step": 2136 }, { "epoch": 0.14, "grad_norm": 0.5642559087375111, "learning_rate": 9.70688256355517e-07, "loss": 0.0641, "step": 2137 }, { "epoch": 0.14, "grad_norm": 0.44727627988655927, "learning_rate": 9.706534060566418e-07, "loss": 0.1266, "step": 2138 }, { "epoch": 0.14, "grad_norm": 0.27122243561276915, "learning_rate": 9.70618535678746e-07, "loss": 0.0889, "step": 2139 }, { "epoch": 0.14, "grad_norm": 1.1558674339226116, "learning_rate": 9.705836452233174e-07, "loss": 0.4229, "step": 2140 }, { "epoch": 0.14, "grad_norm": 0.36806759305293124, "learning_rate": 9.705487346918447e-07, "loss": 0.3808, "step": 2141 }, { "epoch": 0.14, "grad_norm": 0.6306939368757434, "learning_rate": 9.705138040858169e-07, "loss": 0.0952, "step": 2142 }, { "epoch": 0.14, "grad_norm": 1.2239928866755898, "learning_rate": 9.704788534067246e-07, "loss": 0.2595, "step": 2143 }, { "epoch": 0.14, "grad_norm": 0.6745385669592355, "learning_rate": 9.704438826560584e-07, "loss": 0.1144, "step": 2144 }, { "epoch": 0.14, "grad_norm": 0.8328907271771963, "learning_rate": 9.704088918353107e-07, "loss": 0.0156, "step": 2145 }, { "epoch": 0.14, "grad_norm": 0.9253891868737265, "learning_rate": 9.703738809459738e-07, "loss": 0.1115, "step": 2146 }, { "epoch": 0.14, "grad_norm": 0.8604095819465852, "learning_rate": 9.703388499895414e-07, "loss": 0.3714, "step": 2147 }, { "epoch": 0.14, "grad_norm": 0.5903399009294115, "learning_rate": 9.703037989675086e-07, "loss": 0.2533, "step": 2148 }, { "epoch": 0.14, "grad_norm": 0.6681768886008498, "learning_rate": 9.7026872788137e-07, "loss": 0.1211, "step": 2149 }, { "epoch": 0.14, "grad_norm": 0.9656416768461943, "learning_rate": 9.702336367326222e-07, "loss": 0.3824, "step": 2150 }, { "epoch": 0.14, "grad_norm": 0.8419538800878464, "learning_rate": 9.701985255227624e-07, "loss": 0.1727, "step": 2151 }, { "epoch": 0.14, "grad_norm": 0.6389175782612608, "learning_rate": 9.701633942532879e-07, "loss": 0.3651, "step": 2152 }, { "epoch": 0.14, "grad_norm": 0.5482342468742762, "learning_rate": 9.701282429256982e-07, "loss": 0.175, "step": 2153 }, { "epoch": 0.14, "grad_norm": 0.9234820789636007, "learning_rate": 9.700930715414923e-07, "loss": 0.1268, "step": 2154 }, { "epoch": 0.14, "grad_norm": 0.4561882955178448, "learning_rate": 9.70057880102171e-07, "loss": 0.2099, "step": 2155 }, { "epoch": 0.14, "grad_norm": 0.6178272401758675, "learning_rate": 9.700226686092357e-07, "loss": 0.2364, "step": 2156 }, { "epoch": 0.14, "grad_norm": 0.3762372836218014, "learning_rate": 9.699874370641885e-07, "loss": 0.1661, "step": 2157 }, { "epoch": 0.14, "grad_norm": 0.7340596125330184, "learning_rate": 9.699521854685324e-07, "loss": 0.4084, "step": 2158 }, { "epoch": 0.14, "grad_norm": 0.4741231741355246, "learning_rate": 9.699169138237714e-07, "loss": 0.2054, "step": 2159 }, { "epoch": 0.14, "grad_norm": 0.7000852701286417, "learning_rate": 9.6988162213141e-07, "loss": 0.1913, "step": 2160 }, { "epoch": 0.14, "grad_norm": 0.8700922457417857, "learning_rate": 9.698463103929541e-07, "loss": 0.148, "step": 2161 }, { "epoch": 0.14, "grad_norm": 0.21168941718294554, "learning_rate": 9.698109786099103e-07, "loss": 0.0822, "step": 2162 }, { "epoch": 0.14, "grad_norm": 0.8618181796727525, "learning_rate": 9.697756267837855e-07, "loss": 0.2028, "step": 2163 }, { "epoch": 0.14, "grad_norm": 0.33734627838567355, "learning_rate": 9.69740254916088e-07, "loss": 0.0987, "step": 2164 }, { "epoch": 0.14, "grad_norm": 0.7313910951546378, "learning_rate": 9.697048630083271e-07, "loss": 0.1143, "step": 2165 }, { "epoch": 0.14, "grad_norm": 0.5104355299439599, "learning_rate": 9.696694510620126e-07, "loss": 0.1054, "step": 2166 }, { "epoch": 0.14, "grad_norm": 0.26417956245786783, "learning_rate": 9.696340190786551e-07, "loss": 0.013, "step": 2167 }, { "epoch": 0.14, "grad_norm": 1.1226080990347633, "learning_rate": 9.695985670597662e-07, "loss": 0.2777, "step": 2168 }, { "epoch": 0.14, "grad_norm": 0.5572058221991034, "learning_rate": 9.695630950068585e-07, "loss": 0.0854, "step": 2169 }, { "epoch": 0.14, "grad_norm": 0.5143647653811192, "learning_rate": 9.695276029214452e-07, "loss": 0.074, "step": 2170 }, { "epoch": 0.14, "grad_norm": 0.4317050042711363, "learning_rate": 9.694920908050405e-07, "loss": 0.1415, "step": 2171 }, { "epoch": 0.14, "grad_norm": 0.3311311852121749, "learning_rate": 9.694565586591593e-07, "loss": 0.118, "step": 2172 }, { "epoch": 0.14, "grad_norm": 0.6783385015164943, "learning_rate": 9.694210064853176e-07, "loss": 0.2808, "step": 2173 }, { "epoch": 0.14, "grad_norm": 0.3257814175320826, "learning_rate": 9.693854342850322e-07, "loss": 0.1957, "step": 2174 }, { "epoch": 0.14, "grad_norm": 0.7204555437328546, "learning_rate": 9.693498420598206e-07, "loss": 0.0513, "step": 2175 }, { "epoch": 0.14, "grad_norm": 0.49848661994604865, "learning_rate": 9.693142298112012e-07, "loss": 0.0496, "step": 2176 }, { "epoch": 0.14, "grad_norm": 0.4716172545068549, "learning_rate": 9.692785975406933e-07, "loss": 0.2037, "step": 2177 }, { "epoch": 0.14, "grad_norm": 1.0155619549011925, "learning_rate": 9.692429452498171e-07, "loss": 0.1208, "step": 2178 }, { "epoch": 0.14, "grad_norm": 0.5177398619634386, "learning_rate": 9.692072729400936e-07, "loss": 0.3334, "step": 2179 }, { "epoch": 0.14, "grad_norm": 0.6641217851323812, "learning_rate": 9.691715806130445e-07, "loss": 0.3065, "step": 2180 }, { "epoch": 0.14, "grad_norm": 0.24003933327188864, "learning_rate": 9.691358682701926e-07, "loss": 0.1132, "step": 2181 }, { "epoch": 0.14, "grad_norm": 0.9836596156748919, "learning_rate": 9.691001359130614e-07, "loss": 0.2834, "step": 2182 }, { "epoch": 0.14, "grad_norm": 0.3591091840601756, "learning_rate": 9.690643835431756e-07, "loss": 0.0909, "step": 2183 }, { "epoch": 0.14, "grad_norm": 0.7763253196946548, "learning_rate": 9.690286111620601e-07, "loss": 0.3036, "step": 2184 }, { "epoch": 0.14, "grad_norm": 0.6384863997698524, "learning_rate": 9.689928187712414e-07, "loss": 0.3303, "step": 2185 }, { "epoch": 0.14, "grad_norm": 1.0370332070148198, "learning_rate": 9.68957006372246e-07, "loss": 0.1551, "step": 2186 }, { "epoch": 0.14, "grad_norm": 0.6683096996660153, "learning_rate": 9.689211739666022e-07, "loss": 0.1618, "step": 2187 }, { "epoch": 0.14, "grad_norm": 0.6021385146080815, "learning_rate": 9.688853215558384e-07, "loss": 0.0951, "step": 2188 }, { "epoch": 0.14, "grad_norm": 0.4977924246182438, "learning_rate": 9.688494491414842e-07, "loss": 0.1774, "step": 2189 }, { "epoch": 0.14, "grad_norm": 0.6280800769891541, "learning_rate": 9.6881355672507e-07, "loss": 0.3283, "step": 2190 }, { "epoch": 0.14, "grad_norm": 0.49895923109815055, "learning_rate": 9.687776443081268e-07, "loss": 0.2626, "step": 2191 }, { "epoch": 0.14, "grad_norm": 0.5822084227475768, "learning_rate": 9.687417118921872e-07, "loss": 0.2192, "step": 2192 }, { "epoch": 0.14, "grad_norm": 0.40429637307764493, "learning_rate": 9.68705759478784e-07, "loss": 0.0835, "step": 2193 }, { "epoch": 0.14, "grad_norm": 0.3349807403298228, "learning_rate": 9.686697870694507e-07, "loss": 0.0688, "step": 2194 }, { "epoch": 0.14, "grad_norm": 0.7251416008599905, "learning_rate": 9.68633794665722e-07, "loss": 0.1115, "step": 2195 }, { "epoch": 0.14, "grad_norm": 0.6856654549230466, "learning_rate": 9.685977822691338e-07, "loss": 0.0439, "step": 2196 }, { "epoch": 0.14, "grad_norm": 0.4043057449881499, "learning_rate": 9.685617498812221e-07, "loss": 0.1153, "step": 2197 }, { "epoch": 0.14, "grad_norm": 0.24902936667567233, "learning_rate": 9.68525697503524e-07, "loss": 0.0159, "step": 2198 }, { "epoch": 0.14, "grad_norm": 0.3618034565837879, "learning_rate": 9.684896251375783e-07, "loss": 0.0098, "step": 2199 }, { "epoch": 0.14, "grad_norm": 0.838832455790905, "learning_rate": 9.684535327849231e-07, "loss": 0.1777, "step": 2200 }, { "epoch": 0.14, "grad_norm": 1.0008950477903942, "learning_rate": 9.684174204470985e-07, "loss": 0.44, "step": 2201 }, { "epoch": 0.14, "grad_norm": 0.17549592469279013, "learning_rate": 9.68381288125645e-07, "loss": 0.0694, "step": 2202 }, { "epoch": 0.14, "grad_norm": 0.5686764235835852, "learning_rate": 9.683451358221044e-07, "loss": 0.1453, "step": 2203 }, { "epoch": 0.14, "grad_norm": 0.5265235396622149, "learning_rate": 9.683089635380185e-07, "loss": 0.167, "step": 2204 }, { "epoch": 0.14, "grad_norm": 1.2578997011400992, "learning_rate": 9.682727712749311e-07, "loss": 0.0425, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.786053876989963, "learning_rate": 9.68236559034386e-07, "loss": 0.1647, "step": 2206 }, { "epoch": 0.14, "grad_norm": 0.4285561195686105, "learning_rate": 9.682003268179276e-07, "loss": 0.2602, "step": 2207 }, { "epoch": 0.14, "grad_norm": 0.05317384824930782, "learning_rate": 9.681640746271026e-07, "loss": 0.0014, "step": 2208 }, { "epoch": 0.14, "grad_norm": 0.3058819496896352, "learning_rate": 9.681278024634568e-07, "loss": 0.0511, "step": 2209 }, { "epoch": 0.14, "grad_norm": 0.48222105925547604, "learning_rate": 9.680915103285376e-07, "loss": 0.2603, "step": 2210 }, { "epoch": 0.14, "grad_norm": 0.35396914352780745, "learning_rate": 9.68055198223894e-07, "loss": 0.1367, "step": 2211 }, { "epoch": 0.14, "grad_norm": 0.8654080235768975, "learning_rate": 9.680188661510746e-07, "loss": 0.3336, "step": 2212 }, { "epoch": 0.14, "grad_norm": 0.5383644093618157, "learning_rate": 9.679825141116294e-07, "loss": 0.2385, "step": 2213 }, { "epoch": 0.14, "grad_norm": 0.5615487289719443, "learning_rate": 9.679461421071096e-07, "loss": 0.0606, "step": 2214 }, { "epoch": 0.14, "grad_norm": 0.8546193412942816, "learning_rate": 9.679097501390666e-07, "loss": 0.084, "step": 2215 }, { "epoch": 0.14, "grad_norm": 0.4234781567013046, "learning_rate": 9.67873338209053e-07, "loss": 0.1631, "step": 2216 }, { "epoch": 0.14, "grad_norm": 0.5009648123432349, "learning_rate": 9.678369063186222e-07, "loss": 0.3513, "step": 2217 }, { "epoch": 0.14, "grad_norm": 0.46509501902431477, "learning_rate": 9.678004544693285e-07, "loss": 0.1144, "step": 2218 }, { "epoch": 0.14, "grad_norm": 0.41297550452765464, "learning_rate": 9.677639826627271e-07, "loss": 0.1154, "step": 2219 }, { "epoch": 0.14, "grad_norm": 0.3134990194762429, "learning_rate": 9.67727490900374e-07, "loss": 0.2109, "step": 2220 }, { "epoch": 0.14, "grad_norm": 0.6121985257256408, "learning_rate": 9.676909791838257e-07, "loss": 0.2242, "step": 2221 }, { "epoch": 0.14, "grad_norm": 4.245129433818458, "learning_rate": 9.676544475146402e-07, "loss": 0.439, "step": 2222 }, { "epoch": 0.14, "grad_norm": 0.6649483068722379, "learning_rate": 9.676178958943756e-07, "loss": 0.1686, "step": 2223 }, { "epoch": 0.14, "grad_norm": 0.35886327928050354, "learning_rate": 9.675813243245918e-07, "loss": 0.131, "step": 2224 }, { "epoch": 0.14, "grad_norm": 0.4399401243529808, "learning_rate": 9.675447328068488e-07, "loss": 0.128, "step": 2225 }, { "epoch": 0.14, "grad_norm": 1.0505089769550253, "learning_rate": 9.675081213427074e-07, "loss": 0.2093, "step": 2226 }, { "epoch": 0.14, "grad_norm": 1.0391963369372383, "learning_rate": 9.674714899337298e-07, "loss": 0.1625, "step": 2227 }, { "epoch": 0.14, "grad_norm": 0.8679782758021442, "learning_rate": 9.674348385814786e-07, "loss": 0.1818, "step": 2228 }, { "epoch": 0.14, "grad_norm": 0.34390251896014173, "learning_rate": 9.673981672875178e-07, "loss": 0.1262, "step": 2229 }, { "epoch": 0.14, "grad_norm": 0.550592707040225, "learning_rate": 9.673614760534114e-07, "loss": 0.0191, "step": 2230 }, { "epoch": 0.14, "grad_norm": 0.4974107876985201, "learning_rate": 9.673247648807248e-07, "loss": 0.2038, "step": 2231 }, { "epoch": 0.14, "grad_norm": 0.5077694190800893, "learning_rate": 9.672880337710246e-07, "loss": 0.2585, "step": 2232 }, { "epoch": 0.14, "grad_norm": 0.37577197112213945, "learning_rate": 9.672512827258773e-07, "loss": 0.1661, "step": 2233 }, { "epoch": 0.14, "grad_norm": 0.5223572975879749, "learning_rate": 9.67214511746851e-07, "loss": 0.1267, "step": 2234 }, { "epoch": 0.14, "grad_norm": 0.31077655176892444, "learning_rate": 9.671777208355145e-07, "loss": 0.0997, "step": 2235 }, { "epoch": 0.14, "grad_norm": 0.4371748183684701, "learning_rate": 9.671409099934372e-07, "loss": 0.1151, "step": 2236 }, { "epoch": 0.14, "grad_norm": 0.28810427124386667, "learning_rate": 9.671040792221896e-07, "loss": 0.1184, "step": 2237 }, { "epoch": 0.14, "grad_norm": 0.6674952069968723, "learning_rate": 9.67067228523343e-07, "loss": 0.2114, "step": 2238 }, { "epoch": 0.14, "grad_norm": 0.6422148799994809, "learning_rate": 9.670303578984696e-07, "loss": 0.3188, "step": 2239 }, { "epoch": 0.14, "grad_norm": 0.6988796102022719, "learning_rate": 9.669934673491423e-07, "loss": 0.3983, "step": 2240 }, { "epoch": 0.14, "grad_norm": 1.3211078676716037, "learning_rate": 9.669565568769347e-07, "loss": 0.1478, "step": 2241 }, { "epoch": 0.14, "grad_norm": 0.882084442419305, "learning_rate": 9.669196264834219e-07, "loss": 0.3688, "step": 2242 }, { "epoch": 0.14, "grad_norm": 0.3245513035662919, "learning_rate": 9.668826761701791e-07, "loss": 0.1576, "step": 2243 }, { "epoch": 0.14, "grad_norm": 0.8463906820598696, "learning_rate": 9.668457059387826e-07, "loss": 0.1455, "step": 2244 }, { "epoch": 0.14, "grad_norm": 0.7189558460863216, "learning_rate": 9.6680871579081e-07, "loss": 0.1305, "step": 2245 }, { "epoch": 0.14, "grad_norm": 0.5163374702417902, "learning_rate": 9.667717057278393e-07, "loss": 0.2395, "step": 2246 }, { "epoch": 0.14, "grad_norm": 0.7378784092646128, "learning_rate": 9.667346757514492e-07, "loss": 0.1645, "step": 2247 }, { "epoch": 0.14, "grad_norm": 0.5902167984125476, "learning_rate": 9.666976258632195e-07, "loss": 0.2945, "step": 2248 }, { "epoch": 0.14, "grad_norm": 0.6015192434654577, "learning_rate": 9.66660556064731e-07, "loss": 0.2368, "step": 2249 }, { "epoch": 0.14, "grad_norm": 0.5495202520755843, "learning_rate": 9.66623466357565e-07, "loss": 0.1012, "step": 2250 }, { "epoch": 0.14, "grad_norm": 0.5985745751093206, "learning_rate": 9.66586356743304e-07, "loss": 0.0957, "step": 2251 }, { "epoch": 0.14, "grad_norm": 0.5664242279788742, "learning_rate": 9.665492272235309e-07, "loss": 0.4868, "step": 2252 }, { "epoch": 0.14, "grad_norm": 0.3543322807729264, "learning_rate": 9.665120777998302e-07, "loss": 0.2164, "step": 2253 }, { "epoch": 0.14, "grad_norm": 1.012639129934071, "learning_rate": 9.664749084737862e-07, "loss": 0.1544, "step": 2254 }, { "epoch": 0.14, "grad_norm": 0.81779050790222, "learning_rate": 9.664377192469848e-07, "loss": 0.3724, "step": 2255 }, { "epoch": 0.14, "grad_norm": 0.8172736485518265, "learning_rate": 9.664005101210128e-07, "loss": 0.0195, "step": 2256 }, { "epoch": 0.14, "grad_norm": 0.2967269382121912, "learning_rate": 9.663632810974574e-07, "loss": 0.148, "step": 2257 }, { "epoch": 0.14, "grad_norm": 0.45317727513112177, "learning_rate": 9.66326032177907e-07, "loss": 0.1707, "step": 2258 }, { "epoch": 0.14, "grad_norm": 0.7766974709362029, "learning_rate": 9.662887633639505e-07, "loss": 0.266, "step": 2259 }, { "epoch": 0.14, "grad_norm": 0.4984038311157407, "learning_rate": 9.66251474657178e-07, "loss": 0.0535, "step": 2260 }, { "epoch": 0.14, "grad_norm": 0.1891626931876308, "learning_rate": 9.662141660591804e-07, "loss": 0.0147, "step": 2261 }, { "epoch": 0.14, "grad_norm": 0.3164793955238129, "learning_rate": 9.661768375715491e-07, "loss": 0.2204, "step": 2262 }, { "epoch": 0.14, "grad_norm": 0.49853749721826085, "learning_rate": 9.66139489195877e-07, "loss": 0.1248, "step": 2263 }, { "epoch": 0.14, "grad_norm": 0.976423822686046, "learning_rate": 9.661021209337573e-07, "loss": 0.1819, "step": 2264 }, { "epoch": 0.14, "grad_norm": 0.32536603027870525, "learning_rate": 9.660647327867838e-07, "loss": 0.213, "step": 2265 }, { "epoch": 0.14, "grad_norm": 0.41416220367858053, "learning_rate": 9.660273247565522e-07, "loss": 0.1143, "step": 2266 }, { "epoch": 0.14, "grad_norm": 0.7376481330398571, "learning_rate": 9.659898968446578e-07, "loss": 0.1566, "step": 2267 }, { "epoch": 0.14, "grad_norm": 0.3830419810598921, "learning_rate": 9.659524490526979e-07, "loss": 0.0325, "step": 2268 }, { "epoch": 0.14, "grad_norm": 0.9413732448889611, "learning_rate": 9.659149813822697e-07, "loss": 0.3643, "step": 2269 }, { "epoch": 0.14, "grad_norm": 0.5660978306764866, "learning_rate": 9.658774938349718e-07, "loss": 0.4425, "step": 2270 }, { "epoch": 0.14, "grad_norm": 0.6829693757465235, "learning_rate": 9.658399864124035e-07, "loss": 0.2359, "step": 2271 }, { "epoch": 0.14, "grad_norm": 0.4514127261456764, "learning_rate": 9.658024591161651e-07, "loss": 0.1984, "step": 2272 }, { "epoch": 0.14, "grad_norm": 0.4498924244285713, "learning_rate": 9.657649119478572e-07, "loss": 0.0579, "step": 2273 }, { "epoch": 0.15, "grad_norm": 0.7614178656215915, "learning_rate": 9.65727344909082e-07, "loss": 0.2248, "step": 2274 }, { "epoch": 0.15, "grad_norm": 0.6076176180082014, "learning_rate": 9.65689758001442e-07, "loss": 0.121, "step": 2275 }, { "epoch": 0.15, "grad_norm": 0.28470817960489736, "learning_rate": 9.656521512265405e-07, "loss": 0.266, "step": 2276 }, { "epoch": 0.15, "grad_norm": 0.3198352442511579, "learning_rate": 9.656145245859823e-07, "loss": 0.1984, "step": 2277 }, { "epoch": 0.15, "grad_norm": 0.38328997388644065, "learning_rate": 9.655768780813727e-07, "loss": 0.2533, "step": 2278 }, { "epoch": 0.15, "grad_norm": 1.0348423197625813, "learning_rate": 9.655392117143172e-07, "loss": 0.2776, "step": 2279 }, { "epoch": 0.15, "grad_norm": 0.8350534328849376, "learning_rate": 9.655015254864233e-07, "loss": 0.0954, "step": 2280 }, { "epoch": 0.15, "grad_norm": 0.6564781601015938, "learning_rate": 9.654638193992986e-07, "loss": 0.3759, "step": 2281 }, { "epoch": 0.15, "grad_norm": 0.4569331204775347, "learning_rate": 9.654260934545518e-07, "loss": 0.0432, "step": 2282 }, { "epoch": 0.15, "grad_norm": 0.8782757935857414, "learning_rate": 9.653883476537918e-07, "loss": 0.0517, "step": 2283 }, { "epoch": 0.15, "grad_norm": 0.27739462172948237, "learning_rate": 9.653505819986297e-07, "loss": 0.1759, "step": 2284 }, { "epoch": 0.15, "grad_norm": 0.9304604462430499, "learning_rate": 9.653127964906762e-07, "loss": 0.0239, "step": 2285 }, { "epoch": 0.15, "grad_norm": 0.6302011693903139, "learning_rate": 9.652749911315433e-07, "loss": 0.1394, "step": 2286 }, { "epoch": 0.15, "grad_norm": 0.7351225115861103, "learning_rate": 9.65237165922844e-07, "loss": 0.3691, "step": 2287 }, { "epoch": 0.15, "grad_norm": 0.7122696323469048, "learning_rate": 9.65199320866192e-07, "loss": 0.2106, "step": 2288 }, { "epoch": 0.15, "grad_norm": 0.2624750293746841, "learning_rate": 9.651614559632017e-07, "loss": 0.0104, "step": 2289 }, { "epoch": 0.15, "grad_norm": 1.6971316836718546, "learning_rate": 9.651235712154887e-07, "loss": 0.331, "step": 2290 }, { "epoch": 0.15, "grad_norm": 0.552145582263909, "learning_rate": 9.650856666246692e-07, "loss": 0.2856, "step": 2291 }, { "epoch": 0.15, "grad_norm": 0.8679718247585241, "learning_rate": 9.650477421923602e-07, "loss": 0.1551, "step": 2292 }, { "epoch": 0.15, "grad_norm": 0.44643504149248464, "learning_rate": 9.650097979201795e-07, "loss": 0.0522, "step": 2293 }, { "epoch": 0.15, "grad_norm": 0.8235716914964851, "learning_rate": 9.649718338097462e-07, "loss": 0.1463, "step": 2294 }, { "epoch": 0.15, "grad_norm": 1.0343432708886806, "learning_rate": 9.649338498626795e-07, "loss": 0.3718, "step": 2295 }, { "epoch": 0.15, "grad_norm": 0.7726073379068243, "learning_rate": 9.648958460806003e-07, "loss": 0.2415, "step": 2296 }, { "epoch": 0.15, "grad_norm": 0.9999413965119969, "learning_rate": 9.648578224651299e-07, "loss": 0.2055, "step": 2297 }, { "epoch": 0.15, "grad_norm": 0.5979325658254782, "learning_rate": 9.6481977901789e-07, "loss": 0.306, "step": 2298 }, { "epoch": 0.15, "grad_norm": 0.3014136895704547, "learning_rate": 9.647817157405043e-07, "loss": 0.1481, "step": 2299 }, { "epoch": 0.15, "grad_norm": 0.4454559117522062, "learning_rate": 9.64743632634596e-07, "loss": 0.2429, "step": 2300 }, { "epoch": 0.15, "grad_norm": 0.6919515419081649, "learning_rate": 9.647055297017902e-07, "loss": 0.0867, "step": 2301 }, { "epoch": 0.15, "grad_norm": 0.5852256753102256, "learning_rate": 9.646674069437122e-07, "loss": 0.2837, "step": 2302 }, { "epoch": 0.15, "grad_norm": 0.8964603662872278, "learning_rate": 9.646292643619887e-07, "loss": 0.3714, "step": 2303 }, { "epoch": 0.15, "grad_norm": 0.8299760482054624, "learning_rate": 9.645911019582465e-07, "loss": 0.3254, "step": 2304 }, { "epoch": 0.15, "grad_norm": 0.7201842646276856, "learning_rate": 9.645529197341142e-07, "loss": 0.2404, "step": 2305 }, { "epoch": 0.15, "grad_norm": 0.4962911567973602, "learning_rate": 9.645147176912203e-07, "loss": 0.0751, "step": 2306 }, { "epoch": 0.15, "grad_norm": 0.9255300170601806, "learning_rate": 9.64476495831195e-07, "loss": 0.395, "step": 2307 }, { "epoch": 0.15, "grad_norm": 0.5418588930887981, "learning_rate": 9.644382541556684e-07, "loss": 0.119, "step": 2308 }, { "epoch": 0.15, "grad_norm": 0.8536579121334795, "learning_rate": 9.643999926662723e-07, "loss": 0.1972, "step": 2309 }, { "epoch": 0.15, "grad_norm": 0.9897793684100308, "learning_rate": 9.643617113646392e-07, "loss": 0.2981, "step": 2310 }, { "epoch": 0.15, "grad_norm": 0.6466825868896282, "learning_rate": 9.643234102524017e-07, "loss": 0.1807, "step": 2311 }, { "epoch": 0.15, "grad_norm": 0.33705126163694304, "learning_rate": 9.642850893311942e-07, "loss": 0.2265, "step": 2312 }, { "epoch": 0.15, "grad_norm": 0.5435083251237673, "learning_rate": 9.642467486026516e-07, "loss": 0.287, "step": 2313 }, { "epoch": 0.15, "grad_norm": 0.3867512644692106, "learning_rate": 9.642083880684093e-07, "loss": 0.1887, "step": 2314 }, { "epoch": 0.15, "grad_norm": 1.0036525121321793, "learning_rate": 9.64170007730104e-07, "loss": 0.2691, "step": 2315 }, { "epoch": 0.15, "grad_norm": 1.5478909722228598, "learning_rate": 9.641316075893731e-07, "loss": 0.1671, "step": 2316 }, { "epoch": 0.15, "grad_norm": 0.9728080261290837, "learning_rate": 9.640931876478546e-07, "loss": 0.0473, "step": 2317 }, { "epoch": 0.15, "grad_norm": 0.47594745891612705, "learning_rate": 9.64054747907188e-07, "loss": 0.3223, "step": 2318 }, { "epoch": 0.15, "grad_norm": 1.3104477542687964, "learning_rate": 9.64016288369013e-07, "loss": 0.1943, "step": 2319 }, { "epoch": 0.15, "grad_norm": 1.1109103011879944, "learning_rate": 9.639778090349705e-07, "loss": 0.3341, "step": 2320 }, { "epoch": 0.15, "grad_norm": 0.7667004818078823, "learning_rate": 9.639393099067015e-07, "loss": 0.2947, "step": 2321 }, { "epoch": 0.15, "grad_norm": 0.33847381927963693, "learning_rate": 9.639007909858493e-07, "loss": 0.3664, "step": 2322 }, { "epoch": 0.15, "grad_norm": 0.4719779746135633, "learning_rate": 9.638622522740565e-07, "loss": 0.2952, "step": 2323 }, { "epoch": 0.15, "grad_norm": 0.32241616455743904, "learning_rate": 9.638236937729678e-07, "loss": 0.1506, "step": 2324 }, { "epoch": 0.15, "grad_norm": 0.39757686248786983, "learning_rate": 9.637851154842279e-07, "loss": 0.1362, "step": 2325 }, { "epoch": 0.15, "grad_norm": 0.04706264417641275, "learning_rate": 9.637465174094825e-07, "loss": 0.002, "step": 2326 }, { "epoch": 0.15, "grad_norm": 0.48218257664301306, "learning_rate": 9.637078995503784e-07, "loss": 0.3685, "step": 2327 }, { "epoch": 0.15, "grad_norm": 1.6935869139609587, "learning_rate": 9.636692619085633e-07, "loss": 0.2605, "step": 2328 }, { "epoch": 0.15, "grad_norm": 1.0172783780874317, "learning_rate": 9.636306044856853e-07, "loss": 0.2496, "step": 2329 }, { "epoch": 0.15, "grad_norm": 0.7990364493575447, "learning_rate": 9.635919272833937e-07, "loss": 0.2556, "step": 2330 }, { "epoch": 0.15, "grad_norm": 0.6195611723475155, "learning_rate": 9.635532303033385e-07, "loss": 0.2784, "step": 2331 }, { "epoch": 0.15, "grad_norm": 0.969221530484212, "learning_rate": 9.635145135471708e-07, "loss": 0.1225, "step": 2332 }, { "epoch": 0.15, "grad_norm": 0.4353391986370538, "learning_rate": 9.63475777016542e-07, "loss": 0.2233, "step": 2333 }, { "epoch": 0.15, "grad_norm": 0.4104028272530483, "learning_rate": 9.634370207131047e-07, "loss": 0.1183, "step": 2334 }, { "epoch": 0.15, "grad_norm": 0.8507423609935023, "learning_rate": 9.633982446385129e-07, "loss": 0.1365, "step": 2335 }, { "epoch": 0.15, "grad_norm": 0.4205435335862078, "learning_rate": 9.633594487944202e-07, "loss": 0.0591, "step": 2336 }, { "epoch": 0.15, "grad_norm": 0.5029415640201254, "learning_rate": 9.63320633182482e-07, "loss": 0.1185, "step": 2337 }, { "epoch": 0.15, "grad_norm": 0.6350187922324667, "learning_rate": 9.632817978043542e-07, "loss": 0.3711, "step": 2338 }, { "epoch": 0.15, "grad_norm": 0.6837763626589852, "learning_rate": 9.632429426616935e-07, "loss": 0.2258, "step": 2339 }, { "epoch": 0.15, "grad_norm": 1.1247648723040353, "learning_rate": 9.632040677561577e-07, "loss": 0.1972, "step": 2340 }, { "epoch": 0.15, "grad_norm": 0.5807963694314374, "learning_rate": 9.63165173089405e-07, "loss": 0.2031, "step": 2341 }, { "epoch": 0.15, "grad_norm": 0.6817570329315167, "learning_rate": 9.631262586630952e-07, "loss": 0.1247, "step": 2342 }, { "epoch": 0.15, "grad_norm": 0.6135669974590318, "learning_rate": 9.630873244788882e-07, "loss": 0.341, "step": 2343 }, { "epoch": 0.15, "grad_norm": 0.6181313290501207, "learning_rate": 9.63048370538445e-07, "loss": 0.2672, "step": 2344 }, { "epoch": 0.15, "grad_norm": 0.4007863289709619, "learning_rate": 9.630093968434275e-07, "loss": 0.1462, "step": 2345 }, { "epoch": 0.15, "grad_norm": 1.0453733218873578, "learning_rate": 9.629704033954981e-07, "loss": 0.0337, "step": 2346 }, { "epoch": 0.15, "grad_norm": 0.4082676620728667, "learning_rate": 9.62931390196321e-07, "loss": 0.115, "step": 2347 }, { "epoch": 0.15, "grad_norm": 0.6161421143518172, "learning_rate": 9.628923572475598e-07, "loss": 0.3096, "step": 2348 }, { "epoch": 0.15, "grad_norm": 0.2977332789299097, "learning_rate": 9.628533045508803e-07, "loss": 0.0868, "step": 2349 }, { "epoch": 0.15, "grad_norm": 0.8224576365366434, "learning_rate": 9.628142321079485e-07, "loss": 0.0306, "step": 2350 }, { "epoch": 0.15, "grad_norm": 0.6319045401424148, "learning_rate": 9.62775139920431e-07, "loss": 0.2043, "step": 2351 }, { "epoch": 0.15, "grad_norm": 1.6003548495530138, "learning_rate": 9.627360279899956e-07, "loss": 0.06, "step": 2352 }, { "epoch": 0.15, "grad_norm": 0.985280074315826, "learning_rate": 9.626968963183113e-07, "loss": 0.1847, "step": 2353 }, { "epoch": 0.15, "grad_norm": 0.2260737403352769, "learning_rate": 9.626577449070473e-07, "loss": 0.0967, "step": 2354 }, { "epoch": 0.15, "grad_norm": 0.33288818264685915, "learning_rate": 9.626185737578737e-07, "loss": 0.1423, "step": 2355 }, { "epoch": 0.15, "grad_norm": 1.076048043320009, "learning_rate": 9.625793828724618e-07, "loss": 0.2161, "step": 2356 }, { "epoch": 0.15, "grad_norm": 0.28497596382596746, "learning_rate": 9.625401722524835e-07, "loss": 0.0466, "step": 2357 }, { "epoch": 0.15, "grad_norm": 0.8612805387264523, "learning_rate": 9.625009418996115e-07, "loss": 0.4057, "step": 2358 }, { "epoch": 0.15, "grad_norm": 0.896418765709942, "learning_rate": 9.624616918155197e-07, "loss": 0.1871, "step": 2359 }, { "epoch": 0.15, "grad_norm": 1.1315802655313565, "learning_rate": 9.624224220018826e-07, "loss": 0.0523, "step": 2360 }, { "epoch": 0.15, "grad_norm": 0.3753553941282118, "learning_rate": 9.623831324603752e-07, "loss": 0.1157, "step": 2361 }, { "epoch": 0.15, "grad_norm": 0.36621201404913223, "learning_rate": 9.623438231926739e-07, "loss": 0.1089, "step": 2362 }, { "epoch": 0.15, "grad_norm": 0.5573117921494466, "learning_rate": 9.623044942004557e-07, "loss": 0.1621, "step": 2363 }, { "epoch": 0.15, "grad_norm": 0.78492953971227, "learning_rate": 9.622651454853985e-07, "loss": 0.3114, "step": 2364 }, { "epoch": 0.15, "grad_norm": 0.3565610982750958, "learning_rate": 9.62225777049181e-07, "loss": 0.2818, "step": 2365 }, { "epoch": 0.15, "grad_norm": 0.3618723782921361, "learning_rate": 9.621863888934824e-07, "loss": 0.1987, "step": 2366 }, { "epoch": 0.15, "grad_norm": 0.5948519558003095, "learning_rate": 9.621469810199833e-07, "loss": 0.3976, "step": 2367 }, { "epoch": 0.15, "grad_norm": 0.657854215589905, "learning_rate": 9.621075534303654e-07, "loss": 0.5349, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.1712196716947927, "learning_rate": 9.620681061263098e-07, "loss": 0.3451, "step": 2369 }, { "epoch": 0.15, "grad_norm": 0.16272161629774692, "learning_rate": 9.620286391095003e-07, "loss": 0.1046, "step": 2370 }, { "epoch": 0.15, "grad_norm": 0.5854611664085622, "learning_rate": 9.619891523816202e-07, "loss": 0.4168, "step": 2371 }, { "epoch": 0.15, "grad_norm": 0.3570465267301881, "learning_rate": 9.61949645944354e-07, "loss": 0.0927, "step": 2372 }, { "epoch": 0.15, "grad_norm": 0.5179421249236883, "learning_rate": 9.619101197993874e-07, "loss": 0.2437, "step": 2373 }, { "epoch": 0.15, "grad_norm": 0.8658125477907648, "learning_rate": 9.618705739484064e-07, "loss": 0.4816, "step": 2374 }, { "epoch": 0.15, "grad_norm": 0.6956739182159973, "learning_rate": 9.618310083930983e-07, "loss": 0.1711, "step": 2375 }, { "epoch": 0.15, "grad_norm": 0.7735803198214356, "learning_rate": 9.617914231351508e-07, "loss": 0.2274, "step": 2376 }, { "epoch": 0.15, "grad_norm": 0.775590982406566, "learning_rate": 9.617518181762531e-07, "loss": 0.4208, "step": 2377 }, { "epoch": 0.15, "grad_norm": 0.39703609197852757, "learning_rate": 9.617121935180946e-07, "loss": 0.1647, "step": 2378 }, { "epoch": 0.15, "grad_norm": 0.3233245062122669, "learning_rate": 9.616725491623657e-07, "loss": 0.1064, "step": 2379 }, { "epoch": 0.15, "grad_norm": 0.5076525687307184, "learning_rate": 9.616328851107578e-07, "loss": 0.2319, "step": 2380 }, { "epoch": 0.15, "grad_norm": 0.42553805338670764, "learning_rate": 9.61593201364963e-07, "loss": 0.1853, "step": 2381 }, { "epoch": 0.15, "grad_norm": 0.9205030596838517, "learning_rate": 9.615534979266744e-07, "loss": 0.4135, "step": 2382 }, { "epoch": 0.15, "grad_norm": 0.5379667422988014, "learning_rate": 9.615137747975857e-07, "loss": 0.3133, "step": 2383 }, { "epoch": 0.15, "grad_norm": 0.5742670032412414, "learning_rate": 9.614740319793915e-07, "loss": 0.2185, "step": 2384 }, { "epoch": 0.15, "grad_norm": 0.2402152679106932, "learning_rate": 9.614342694737876e-07, "loss": 0.2124, "step": 2385 }, { "epoch": 0.15, "grad_norm": 0.7856792205453577, "learning_rate": 9.6139448728247e-07, "loss": 0.3608, "step": 2386 }, { "epoch": 0.15, "grad_norm": 0.4222629909090372, "learning_rate": 9.613546854071362e-07, "loss": 0.1599, "step": 2387 }, { "epoch": 0.15, "grad_norm": 0.8568168659751535, "learning_rate": 9.613148638494839e-07, "loss": 0.0264, "step": 2388 }, { "epoch": 0.15, "grad_norm": 0.248520687448224, "learning_rate": 9.612750226112122e-07, "loss": 0.25, "step": 2389 }, { "epoch": 0.15, "grad_norm": 1.1878111336995245, "learning_rate": 9.612351616940209e-07, "loss": 0.2917, "step": 2390 }, { "epoch": 0.15, "grad_norm": 0.40762221093022905, "learning_rate": 9.611952810996102e-07, "loss": 0.287, "step": 2391 }, { "epoch": 0.15, "grad_norm": 0.3397480071058737, "learning_rate": 9.611553808296818e-07, "loss": 0.1617, "step": 2392 }, { "epoch": 0.15, "grad_norm": 0.689809580736861, "learning_rate": 9.611154608859378e-07, "loss": 0.1587, "step": 2393 }, { "epoch": 0.15, "grad_norm": 0.5182216233375155, "learning_rate": 9.610755212700814e-07, "loss": 0.0876, "step": 2394 }, { "epoch": 0.15, "grad_norm": 1.223606523800506, "learning_rate": 9.610355619838162e-07, "loss": 0.0617, "step": 2395 }, { "epoch": 0.15, "grad_norm": 1.10688526188776, "learning_rate": 9.609955830288471e-07, "loss": 0.0832, "step": 2396 }, { "epoch": 0.15, "grad_norm": 0.5905240026624127, "learning_rate": 9.6095558440688e-07, "loss": 0.2074, "step": 2397 }, { "epoch": 0.15, "grad_norm": 0.8937538166310866, "learning_rate": 9.609155661196209e-07, "loss": 0.2132, "step": 2398 }, { "epoch": 0.15, "grad_norm": 0.6143162927375626, "learning_rate": 9.60875528168777e-07, "loss": 0.3599, "step": 2399 }, { "epoch": 0.15, "grad_norm": 0.433598283328746, "learning_rate": 9.608354705560567e-07, "loss": 0.1139, "step": 2400 }, { "epoch": 0.15, "grad_norm": 0.811439499470575, "learning_rate": 9.60795393283169e-07, "loss": 0.1481, "step": 2401 }, { "epoch": 0.15, "grad_norm": 0.7920695747098897, "learning_rate": 9.607552963518232e-07, "loss": 0.2026, "step": 2402 }, { "epoch": 0.15, "grad_norm": 0.9343304919547966, "learning_rate": 9.607151797637305e-07, "loss": 0.2416, "step": 2403 }, { "epoch": 0.15, "grad_norm": 0.44922222916730686, "learning_rate": 9.606750435206019e-07, "loss": 0.0723, "step": 2404 }, { "epoch": 0.15, "grad_norm": 0.9007662843357239, "learning_rate": 9.6063488762415e-07, "loss": 0.2407, "step": 2405 }, { "epoch": 0.15, "grad_norm": 1.2592833143028035, "learning_rate": 9.605947120760877e-07, "loss": 0.2817, "step": 2406 }, { "epoch": 0.15, "grad_norm": 0.8598279190527072, "learning_rate": 9.605545168781291e-07, "loss": 0.0815, "step": 2407 }, { "epoch": 0.15, "grad_norm": 1.4779322437716764, "learning_rate": 9.60514302031989e-07, "loss": 0.1776, "step": 2408 }, { "epoch": 0.15, "grad_norm": 0.5904274798959935, "learning_rate": 9.60474067539383e-07, "loss": 0.345, "step": 2409 }, { "epoch": 0.15, "grad_norm": 0.7881322561250321, "learning_rate": 9.604338134020278e-07, "loss": 0.2772, "step": 2410 }, { "epoch": 0.15, "grad_norm": 0.6838207405531467, "learning_rate": 9.603935396216404e-07, "loss": 0.3399, "step": 2411 }, { "epoch": 0.15, "grad_norm": 0.5401657859931392, "learning_rate": 9.60353246199939e-07, "loss": 0.2189, "step": 2412 }, { "epoch": 0.15, "grad_norm": 0.3508163251196253, "learning_rate": 9.603129331386425e-07, "loss": 0.1616, "step": 2413 }, { "epoch": 0.15, "grad_norm": 0.6713099313881182, "learning_rate": 9.602726004394713e-07, "loss": 0.2643, "step": 2414 }, { "epoch": 0.15, "grad_norm": 1.1978295176181208, "learning_rate": 9.602322481041457e-07, "loss": 0.1534, "step": 2415 }, { "epoch": 0.15, "grad_norm": 0.8870010417763626, "learning_rate": 9.60191876134387e-07, "loss": 0.088, "step": 2416 }, { "epoch": 0.15, "grad_norm": 0.37156038596610425, "learning_rate": 9.601514845319178e-07, "loss": 0.0091, "step": 2417 }, { "epoch": 0.15, "grad_norm": 0.5673089570323873, "learning_rate": 9.601110732984611e-07, "loss": 0.0818, "step": 2418 }, { "epoch": 0.15, "grad_norm": 0.8794165913808688, "learning_rate": 9.600706424357414e-07, "loss": 0.1543, "step": 2419 }, { "epoch": 0.15, "grad_norm": 1.8860515292708557, "learning_rate": 9.600301919454832e-07, "loss": 0.1811, "step": 2420 }, { "epoch": 0.15, "grad_norm": 0.41049406078086303, "learning_rate": 9.599897218294121e-07, "loss": 0.2829, "step": 2421 }, { "epoch": 0.15, "grad_norm": 0.4387965860439652, "learning_rate": 9.599492320892548e-07, "loss": 0.0282, "step": 2422 }, { "epoch": 0.15, "grad_norm": 0.08653651633786842, "learning_rate": 9.599087227267386e-07, "loss": 0.0023, "step": 2423 }, { "epoch": 0.15, "grad_norm": 0.5448737842767393, "learning_rate": 9.598681937435918e-07, "loss": 0.3058, "step": 2424 }, { "epoch": 0.15, "grad_norm": 0.7449450919035487, "learning_rate": 9.598276451415436e-07, "loss": 0.1391, "step": 2425 }, { "epoch": 0.15, "grad_norm": 0.5389733243028549, "learning_rate": 9.597870769223234e-07, "loss": 0.202, "step": 2426 }, { "epoch": 0.15, "grad_norm": 0.9581193222381307, "learning_rate": 9.597464890876623e-07, "loss": 0.0766, "step": 2427 }, { "epoch": 0.15, "grad_norm": 0.7255173443642816, "learning_rate": 9.597058816392917e-07, "loss": 0.2205, "step": 2428 }, { "epoch": 0.15, "grad_norm": 0.5465860010520189, "learning_rate": 9.596652545789441e-07, "loss": 0.1827, "step": 2429 }, { "epoch": 0.15, "grad_norm": 0.44458216987835736, "learning_rate": 9.59624607908353e-07, "loss": 0.0578, "step": 2430 }, { "epoch": 0.16, "grad_norm": 0.9803309693892769, "learning_rate": 9.595839416292516e-07, "loss": 0.1905, "step": 2431 }, { "epoch": 0.16, "grad_norm": 0.4503185098868732, "learning_rate": 9.595432557433758e-07, "loss": 0.1836, "step": 2432 }, { "epoch": 0.16, "grad_norm": 0.4490701976502874, "learning_rate": 9.595025502524607e-07, "loss": 0.3053, "step": 2433 }, { "epoch": 0.16, "grad_norm": 0.5301201782137251, "learning_rate": 9.594618251582433e-07, "loss": 0.1949, "step": 2434 }, { "epoch": 0.16, "grad_norm": 0.4630095575443994, "learning_rate": 9.594210804624608e-07, "loss": 0.1619, "step": 2435 }, { "epoch": 0.16, "grad_norm": 0.5665595307354613, "learning_rate": 9.593803161668511e-07, "loss": 0.1135, "step": 2436 }, { "epoch": 0.16, "grad_norm": 0.4015014708988468, "learning_rate": 9.59339532273154e-07, "loss": 0.1545, "step": 2437 }, { "epoch": 0.16, "grad_norm": 0.7615373792978193, "learning_rate": 9.59298728783109e-07, "loss": 0.2178, "step": 2438 }, { "epoch": 0.16, "grad_norm": 0.3709279526175329, "learning_rate": 9.59257905698457e-07, "loss": 0.1777, "step": 2439 }, { "epoch": 0.16, "grad_norm": 0.16915273068108697, "learning_rate": 9.592170630209393e-07, "loss": 0.0898, "step": 2440 }, { "epoch": 0.16, "grad_norm": 0.4554536599748477, "learning_rate": 9.591762007522986e-07, "loss": 0.2354, "step": 2441 }, { "epoch": 0.16, "grad_norm": 0.6277257649198804, "learning_rate": 9.591353188942782e-07, "loss": 0.2001, "step": 2442 }, { "epoch": 0.16, "grad_norm": 0.7339984100844941, "learning_rate": 9.59094417448622e-07, "loss": 0.1953, "step": 2443 }, { "epoch": 0.16, "grad_norm": 0.3210775223225176, "learning_rate": 9.590534964170751e-07, "loss": 0.0939, "step": 2444 }, { "epoch": 0.16, "grad_norm": 0.4799228540673482, "learning_rate": 9.590125558013833e-07, "loss": 0.2605, "step": 2445 }, { "epoch": 0.16, "grad_norm": 0.5003199535506793, "learning_rate": 9.589715956032931e-07, "loss": 0.1503, "step": 2446 }, { "epoch": 0.16, "grad_norm": 1.2093919587016295, "learning_rate": 9.589306158245519e-07, "loss": 0.0823, "step": 2447 }, { "epoch": 0.16, "grad_norm": 1.211062135323147, "learning_rate": 9.588896164669083e-07, "loss": 0.0798, "step": 2448 }, { "epoch": 0.16, "grad_norm": 0.8255251624391429, "learning_rate": 9.58848597532111e-07, "loss": 0.3699, "step": 2449 }, { "epoch": 0.16, "grad_norm": 0.7480180932204308, "learning_rate": 9.5880755902191e-07, "loss": 0.1169, "step": 2450 }, { "epoch": 0.16, "grad_norm": 0.5885945131637391, "learning_rate": 9.587665009380564e-07, "loss": 0.2496, "step": 2451 }, { "epoch": 0.16, "grad_norm": 0.6911377401986551, "learning_rate": 9.587254232823017e-07, "loss": 0.1732, "step": 2452 }, { "epoch": 0.16, "grad_norm": 0.2802853173063493, "learning_rate": 9.586843260563981e-07, "loss": 0.2303, "step": 2453 }, { "epoch": 0.16, "grad_norm": 0.5706821042495621, "learning_rate": 9.586432092620993e-07, "loss": 0.1628, "step": 2454 }, { "epoch": 0.16, "grad_norm": 0.5149533528375522, "learning_rate": 9.586020729011591e-07, "loss": 0.0887, "step": 2455 }, { "epoch": 0.16, "grad_norm": 0.8646381630516424, "learning_rate": 9.585609169753323e-07, "loss": 0.1842, "step": 2456 }, { "epoch": 0.16, "grad_norm": 0.5109092992198471, "learning_rate": 9.585197414863754e-07, "loss": 0.1276, "step": 2457 }, { "epoch": 0.16, "grad_norm": 0.34629214369350364, "learning_rate": 9.584785464360442e-07, "loss": 0.0915, "step": 2458 }, { "epoch": 0.16, "grad_norm": 0.7161245704669781, "learning_rate": 9.584373318260968e-07, "loss": 0.1963, "step": 2459 }, { "epoch": 0.16, "grad_norm": 0.4039344109309137, "learning_rate": 9.583960976582913e-07, "loss": 0.1153, "step": 2460 }, { "epoch": 0.16, "grad_norm": 0.7991041837940455, "learning_rate": 9.583548439343864e-07, "loss": 0.2508, "step": 2461 }, { "epoch": 0.16, "grad_norm": 0.39917016746617423, "learning_rate": 9.583135706561427e-07, "loss": 0.1676, "step": 2462 }, { "epoch": 0.16, "grad_norm": 0.4471927740248249, "learning_rate": 9.582722778253209e-07, "loss": 0.0567, "step": 2463 }, { "epoch": 0.16, "grad_norm": 0.5560947222512375, "learning_rate": 9.582309654436824e-07, "loss": 0.0954, "step": 2464 }, { "epoch": 0.16, "grad_norm": 0.7713230345468085, "learning_rate": 9.581896335129896e-07, "loss": 0.1478, "step": 2465 }, { "epoch": 0.16, "grad_norm": 0.5992686563278361, "learning_rate": 9.581482820350062e-07, "loss": 0.199, "step": 2466 }, { "epoch": 0.16, "grad_norm": 0.5839880897771591, "learning_rate": 9.581069110114959e-07, "loss": 0.0778, "step": 2467 }, { "epoch": 0.16, "grad_norm": 0.7665261128257583, "learning_rate": 9.58065520444224e-07, "loss": 0.2288, "step": 2468 }, { "epoch": 0.16, "grad_norm": 0.3608221879220619, "learning_rate": 9.58024110334956e-07, "loss": 0.1416, "step": 2469 }, { "epoch": 0.16, "grad_norm": 0.6580402781787531, "learning_rate": 9.579826806854587e-07, "loss": 0.2062, "step": 2470 }, { "epoch": 0.16, "grad_norm": 0.616037261545377, "learning_rate": 9.579412314974996e-07, "loss": 0.1413, "step": 2471 }, { "epoch": 0.16, "grad_norm": 0.3845961631809863, "learning_rate": 9.578997627728472e-07, "loss": 0.1416, "step": 2472 }, { "epoch": 0.16, "grad_norm": 0.6196184136165247, "learning_rate": 9.578582745132702e-07, "loss": 0.4322, "step": 2473 }, { "epoch": 0.16, "grad_norm": 0.6062145541363272, "learning_rate": 9.57816766720539e-07, "loss": 0.0266, "step": 2474 }, { "epoch": 0.16, "grad_norm": 0.4826225528256578, "learning_rate": 9.57775239396424e-07, "loss": 0.0091, "step": 2475 }, { "epoch": 0.16, "grad_norm": 0.39785134353497276, "learning_rate": 9.577336925426972e-07, "loss": 0.1898, "step": 2476 }, { "epoch": 0.16, "grad_norm": 0.6171680118428898, "learning_rate": 9.576921261611308e-07, "loss": 0.1495, "step": 2477 }, { "epoch": 0.16, "grad_norm": 0.7923348973742145, "learning_rate": 9.576505402534984e-07, "loss": 0.1283, "step": 2478 }, { "epoch": 0.16, "grad_norm": 0.45345901198394867, "learning_rate": 9.576089348215738e-07, "loss": 0.2268, "step": 2479 }, { "epoch": 0.16, "grad_norm": 0.48493822051526925, "learning_rate": 9.575673098671322e-07, "loss": 0.2149, "step": 2480 }, { "epoch": 0.16, "grad_norm": 0.7903018784896051, "learning_rate": 9.575256653919492e-07, "loss": 0.1499, "step": 2481 }, { "epoch": 0.16, "grad_norm": 0.3313231203351894, "learning_rate": 9.574840013978016e-07, "loss": 0.0865, "step": 2482 }, { "epoch": 0.16, "grad_norm": 0.37845476055638494, "learning_rate": 9.57442317886467e-07, "loss": 0.3144, "step": 2483 }, { "epoch": 0.16, "grad_norm": 1.8310784224755157, "learning_rate": 9.574006148597237e-07, "loss": 0.2645, "step": 2484 }, { "epoch": 0.16, "grad_norm": 0.5913650808489613, "learning_rate": 9.573588923193504e-07, "loss": 0.1877, "step": 2485 }, { "epoch": 0.16, "grad_norm": 0.39724934213196095, "learning_rate": 9.573171502671272e-07, "loss": 0.1766, "step": 2486 }, { "epoch": 0.16, "grad_norm": 0.49356068507374784, "learning_rate": 9.572753887048352e-07, "loss": 0.2308, "step": 2487 }, { "epoch": 0.16, "grad_norm": 0.6912161352337316, "learning_rate": 9.57233607634256e-07, "loss": 0.2348, "step": 2488 }, { "epoch": 0.16, "grad_norm": 0.6673408530739752, "learning_rate": 9.57191807057172e-07, "loss": 0.182, "step": 2489 }, { "epoch": 0.16, "grad_norm": 0.11580191835952931, "learning_rate": 9.57149986975366e-07, "loss": 0.0047, "step": 2490 }, { "epoch": 0.16, "grad_norm": 0.3003908593523906, "learning_rate": 9.571081473906231e-07, "loss": 0.0654, "step": 2491 }, { "epoch": 0.16, "grad_norm": 0.43900193929379083, "learning_rate": 9.570662883047273e-07, "loss": 0.131, "step": 2492 }, { "epoch": 0.16, "grad_norm": 0.5021973118101629, "learning_rate": 9.570244097194652e-07, "loss": 0.1366, "step": 2493 }, { "epoch": 0.16, "grad_norm": 0.513413478436164, "learning_rate": 9.569825116366229e-07, "loss": 0.1909, "step": 2494 }, { "epoch": 0.16, "grad_norm": 0.975627560256554, "learning_rate": 9.569405940579879e-07, "loss": 0.1584, "step": 2495 }, { "epoch": 0.16, "grad_norm": 0.9213510930240534, "learning_rate": 9.568986569853487e-07, "loss": 0.356, "step": 2496 }, { "epoch": 0.16, "grad_norm": 0.5005513591545933, "learning_rate": 9.568567004204942e-07, "loss": 0.424, "step": 2497 }, { "epoch": 0.16, "grad_norm": 0.3215748174192554, "learning_rate": 9.568147243652145e-07, "loss": 0.2754, "step": 2498 }, { "epoch": 0.16, "grad_norm": 0.592545580116721, "learning_rate": 9.567727288213004e-07, "loss": 0.2503, "step": 2499 }, { "epoch": 0.16, "grad_norm": 0.32801051559945615, "learning_rate": 9.567307137905433e-07, "loss": 0.239, "step": 2500 }, { "epoch": 0.16, "grad_norm": 0.7970070432843507, "learning_rate": 9.56688679274736e-07, "loss": 0.2516, "step": 2501 }, { "epoch": 0.16, "grad_norm": 0.6526024385015151, "learning_rate": 9.566466252756717e-07, "loss": 0.2077, "step": 2502 }, { "epoch": 0.16, "grad_norm": 0.27808512487183307, "learning_rate": 9.566045517951443e-07, "loss": 0.2123, "step": 2503 }, { "epoch": 0.16, "grad_norm": 0.6026659699645546, "learning_rate": 9.565624588349486e-07, "loss": 0.3233, "step": 2504 }, { "epoch": 0.16, "grad_norm": 0.421675142640018, "learning_rate": 9.565203463968807e-07, "loss": 0.2718, "step": 2505 }, { "epoch": 0.16, "grad_norm": 0.9978363566766066, "learning_rate": 9.564782144827372e-07, "loss": 0.2785, "step": 2506 }, { "epoch": 0.16, "grad_norm": 0.5901030590545348, "learning_rate": 9.564360630943153e-07, "loss": 0.143, "step": 2507 }, { "epoch": 0.16, "grad_norm": 0.8699076113639252, "learning_rate": 9.563938922334136e-07, "loss": 0.3662, "step": 2508 }, { "epoch": 0.16, "grad_norm": 0.5544078938630749, "learning_rate": 9.563517019018308e-07, "loss": 0.2223, "step": 2509 }, { "epoch": 0.16, "grad_norm": 0.9647226312710868, "learning_rate": 9.563094921013672e-07, "loss": 0.0912, "step": 2510 }, { "epoch": 0.16, "grad_norm": 0.4990970038813086, "learning_rate": 9.562672628338231e-07, "loss": 0.1941, "step": 2511 }, { "epoch": 0.16, "grad_norm": 0.15629989285131096, "learning_rate": 9.562250141010006e-07, "loss": 0.1158, "step": 2512 }, { "epoch": 0.16, "grad_norm": 0.9254353943652026, "learning_rate": 9.561827459047017e-07, "loss": 0.2113, "step": 2513 }, { "epoch": 0.16, "grad_norm": 0.3158188291288584, "learning_rate": 9.5614045824673e-07, "loss": 0.1268, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.4468005883944894, "learning_rate": 9.560981511288892e-07, "loss": 0.3139, "step": 2515 }, { "epoch": 0.16, "grad_norm": 0.3096031642338491, "learning_rate": 9.560558245529847e-07, "loss": 0.0759, "step": 2516 }, { "epoch": 0.16, "grad_norm": 0.5855834841151715, "learning_rate": 9.560134785208215e-07, "loss": 0.2879, "step": 2517 }, { "epoch": 0.16, "grad_norm": 0.6364203530360595, "learning_rate": 9.55971113034207e-07, "loss": 0.3691, "step": 2518 }, { "epoch": 0.16, "grad_norm": 0.5258385288479613, "learning_rate": 9.55928728094948e-07, "loss": 0.1821, "step": 2519 }, { "epoch": 0.16, "grad_norm": 0.31328462882732316, "learning_rate": 9.558863237048528e-07, "loss": 0.2089, "step": 2520 }, { "epoch": 0.16, "grad_norm": 0.4188983438063981, "learning_rate": 9.55843899865731e-07, "loss": 0.2094, "step": 2521 }, { "epoch": 0.16, "grad_norm": 0.7633257798796065, "learning_rate": 9.558014565793917e-07, "loss": 0.2042, "step": 2522 }, { "epoch": 0.16, "grad_norm": 0.3434334507804224, "learning_rate": 9.557589938476462e-07, "loss": 0.0868, "step": 2523 }, { "epoch": 0.16, "grad_norm": 1.4018431164075182, "learning_rate": 9.557165116723056e-07, "loss": 0.3267, "step": 2524 }, { "epoch": 0.16, "grad_norm": 0.44637835500273204, "learning_rate": 9.556740100551827e-07, "loss": 0.1597, "step": 2525 }, { "epoch": 0.16, "grad_norm": 0.2952894519625469, "learning_rate": 9.556314889980906e-07, "loss": 0.1849, "step": 2526 }, { "epoch": 0.16, "grad_norm": 0.606974143033971, "learning_rate": 9.55588948502843e-07, "loss": 0.2545, "step": 2527 }, { "epoch": 0.16, "grad_norm": 0.808011575425698, "learning_rate": 9.55546388571255e-07, "loss": 0.4429, "step": 2528 }, { "epoch": 0.16, "grad_norm": 0.38018435236573644, "learning_rate": 9.555038092051424e-07, "loss": 0.1824, "step": 2529 }, { "epoch": 0.16, "grad_norm": 0.5634119634022953, "learning_rate": 9.554612104063218e-07, "loss": 0.2054, "step": 2530 }, { "epoch": 0.16, "grad_norm": 0.4168603419719356, "learning_rate": 9.5541859217661e-07, "loss": 0.055, "step": 2531 }, { "epoch": 0.16, "grad_norm": 0.7034347148926832, "learning_rate": 9.55375954517826e-07, "loss": 0.4097, "step": 2532 }, { "epoch": 0.16, "grad_norm": 0.6641957985703538, "learning_rate": 9.553332974317881e-07, "loss": 0.0477, "step": 2533 }, { "epoch": 0.16, "grad_norm": 0.16677956900373406, "learning_rate": 9.552906209203164e-07, "loss": 0.093, "step": 2534 }, { "epoch": 0.16, "grad_norm": 0.672648093655843, "learning_rate": 9.552479249852314e-07, "loss": 0.2203, "step": 2535 }, { "epoch": 0.16, "grad_norm": 0.9316536883359925, "learning_rate": 9.55205209628355e-07, "loss": 0.0684, "step": 2536 }, { "epoch": 0.16, "grad_norm": 0.7425895926971369, "learning_rate": 9.551624748515093e-07, "loss": 0.3351, "step": 2537 }, { "epoch": 0.16, "grad_norm": 0.5577282767065026, "learning_rate": 9.551197206565172e-07, "loss": 0.25, "step": 2538 }, { "epoch": 0.16, "grad_norm": 0.7144229171110558, "learning_rate": 9.55076947045203e-07, "loss": 0.0449, "step": 2539 }, { "epoch": 0.16, "grad_norm": 0.6754350692010835, "learning_rate": 9.550341540193915e-07, "loss": 0.239, "step": 2540 }, { "epoch": 0.16, "grad_norm": 0.48017233439651535, "learning_rate": 9.549913415809083e-07, "loss": 0.1089, "step": 2541 }, { "epoch": 0.16, "grad_norm": 0.2157533579531004, "learning_rate": 9.549485097315797e-07, "loss": 0.0096, "step": 2542 }, { "epoch": 0.16, "grad_norm": 0.593163716860367, "learning_rate": 9.549056584732332e-07, "loss": 0.1677, "step": 2543 }, { "epoch": 0.16, "grad_norm": 0.9026525850890124, "learning_rate": 9.54862787807697e-07, "loss": 0.0445, "step": 2544 }, { "epoch": 0.16, "grad_norm": 0.7539484755867265, "learning_rate": 9.548198977367997e-07, "loss": 0.3909, "step": 2545 }, { "epoch": 0.16, "grad_norm": 0.5651341354415033, "learning_rate": 9.547769882623711e-07, "loss": 0.2107, "step": 2546 }, { "epoch": 0.16, "grad_norm": 0.5613177694374492, "learning_rate": 9.54734059386242e-07, "loss": 0.0349, "step": 2547 }, { "epoch": 0.16, "grad_norm": 1.7745166693905394, "learning_rate": 9.54691111110244e-07, "loss": 0.1507, "step": 2548 }, { "epoch": 0.16, "grad_norm": 0.5372854238568204, "learning_rate": 9.54648143436209e-07, "loss": 0.2093, "step": 2549 }, { "epoch": 0.16, "grad_norm": 0.9249320537401026, "learning_rate": 9.546051563659703e-07, "loss": 0.1975, "step": 2550 }, { "epoch": 0.16, "grad_norm": 0.7554143629377637, "learning_rate": 9.545621499013618e-07, "loss": 0.1915, "step": 2551 }, { "epoch": 0.16, "grad_norm": 0.7802398204245109, "learning_rate": 9.545191240442181e-07, "loss": 0.1643, "step": 2552 }, { "epoch": 0.16, "grad_norm": 0.8053293945513413, "learning_rate": 9.54476078796375e-07, "loss": 0.1896, "step": 2553 }, { "epoch": 0.16, "grad_norm": 0.6111219177554211, "learning_rate": 9.544330141596687e-07, "loss": 0.1185, "step": 2554 }, { "epoch": 0.16, "grad_norm": 0.7438516675839351, "learning_rate": 9.543899301359365e-07, "loss": 0.2293, "step": 2555 }, { "epoch": 0.16, "grad_norm": 1.210898332409302, "learning_rate": 9.543468267270164e-07, "loss": 0.1928, "step": 2556 }, { "epoch": 0.16, "grad_norm": 0.5612228283646611, "learning_rate": 9.543037039347473e-07, "loss": 0.2255, "step": 2557 }, { "epoch": 0.16, "grad_norm": 0.6662477311234537, "learning_rate": 9.54260561760969e-07, "loss": 0.1428, "step": 2558 }, { "epoch": 0.16, "grad_norm": 0.37963751492009234, "learning_rate": 9.54217400207522e-07, "loss": 0.2283, "step": 2559 }, { "epoch": 0.16, "grad_norm": 0.48889752434070105, "learning_rate": 9.541742192762476e-07, "loss": 0.2742, "step": 2560 }, { "epoch": 0.16, "grad_norm": 0.7399252688970139, "learning_rate": 9.541310189689879e-07, "loss": 0.3073, "step": 2561 }, { "epoch": 0.16, "grad_norm": 0.8568202974158361, "learning_rate": 9.54087799287586e-07, "loss": 0.116, "step": 2562 }, { "epoch": 0.16, "grad_norm": 0.35524417309513245, "learning_rate": 9.540445602338859e-07, "loss": 0.1225, "step": 2563 }, { "epoch": 0.16, "grad_norm": 0.8459374834491684, "learning_rate": 9.54001301809732e-07, "loss": 0.2193, "step": 2564 }, { "epoch": 0.16, "grad_norm": 0.36609078980197424, "learning_rate": 9.5395802401697e-07, "loss": 0.0446, "step": 2565 }, { "epoch": 0.16, "grad_norm": 0.6045336995181586, "learning_rate": 9.539147268574459e-07, "loss": 0.3463, "step": 2566 }, { "epoch": 0.16, "grad_norm": 0.6689902923021831, "learning_rate": 9.538714103330073e-07, "loss": 0.2253, "step": 2567 }, { "epoch": 0.16, "grad_norm": 0.9679760657231808, "learning_rate": 9.53828074445502e-07, "loss": 0.0427, "step": 2568 }, { "epoch": 0.16, "grad_norm": 0.11885571743426666, "learning_rate": 9.537847191967785e-07, "loss": 0.0941, "step": 2569 }, { "epoch": 0.16, "grad_norm": 0.5651051388610489, "learning_rate": 9.537413445886868e-07, "loss": 0.0448, "step": 2570 }, { "epoch": 0.16, "grad_norm": 0.924577730359375, "learning_rate": 9.53697950623077e-07, "loss": 0.3433, "step": 2571 }, { "epoch": 0.16, "grad_norm": 1.1859239450215893, "learning_rate": 9.536545373018009e-07, "loss": 0.1671, "step": 2572 }, { "epoch": 0.16, "grad_norm": 0.6339655036031436, "learning_rate": 9.536111046267102e-07, "loss": 0.1357, "step": 2573 }, { "epoch": 0.16, "grad_norm": 0.9970807259261836, "learning_rate": 9.535676525996577e-07, "loss": 0.1535, "step": 2574 }, { "epoch": 0.16, "grad_norm": 1.262159878337895, "learning_rate": 9.535241812224975e-07, "loss": 0.2395, "step": 2575 }, { "epoch": 0.16, "grad_norm": 0.3263401335027573, "learning_rate": 9.53480690497084e-07, "loss": 0.1853, "step": 2576 }, { "epoch": 0.16, "grad_norm": 0.7486868284212916, "learning_rate": 9.534371804252726e-07, "loss": 0.1686, "step": 2577 }, { "epoch": 0.16, "grad_norm": 0.8823603113615602, "learning_rate": 9.533936510089197e-07, "loss": 0.3605, "step": 2578 }, { "epoch": 0.16, "grad_norm": 0.5844132652817103, "learning_rate": 9.533501022498821e-07, "loss": 0.2721, "step": 2579 }, { "epoch": 0.16, "grad_norm": 0.3938624575027053, "learning_rate": 9.533065341500178e-07, "loss": 0.0185, "step": 2580 }, { "epoch": 0.16, "grad_norm": 1.0230096021188217, "learning_rate": 9.532629467111855e-07, "loss": 0.206, "step": 2581 }, { "epoch": 0.16, "grad_norm": 0.47886346771279825, "learning_rate": 9.532193399352448e-07, "loss": 0.1239, "step": 2582 }, { "epoch": 0.16, "grad_norm": 0.41974794747755956, "learning_rate": 9.531757138240559e-07, "loss": 0.231, "step": 2583 }, { "epoch": 0.16, "grad_norm": 1.4259696823285501, "learning_rate": 9.5313206837948e-07, "loss": 0.3168, "step": 2584 }, { "epoch": 0.16, "grad_norm": 0.21675652071370596, "learning_rate": 9.530884036033793e-07, "loss": 0.1029, "step": 2585 }, { "epoch": 0.16, "grad_norm": 0.163548726262206, "learning_rate": 9.530447194976163e-07, "loss": 0.0652, "step": 2586 }, { "epoch": 0.16, "grad_norm": 0.44003913761455, "learning_rate": 9.53001016064055e-07, "loss": 0.2302, "step": 2587 }, { "epoch": 0.17, "grad_norm": 0.20866023849578044, "learning_rate": 9.529572933045595e-07, "loss": 0.0638, "step": 2588 }, { "epoch": 0.17, "grad_norm": 0.7631673046005106, "learning_rate": 9.529135512209955e-07, "loss": 0.3408, "step": 2589 }, { "epoch": 0.17, "grad_norm": 1.371969954717516, "learning_rate": 9.528697898152288e-07, "loss": 0.3842, "step": 2590 }, { "epoch": 0.17, "grad_norm": 0.6568255870713744, "learning_rate": 9.528260090891266e-07, "loss": 0.2441, "step": 2591 }, { "epoch": 0.17, "grad_norm": 0.41472603573328204, "learning_rate": 9.527822090445565e-07, "loss": 0.1132, "step": 2592 }, { "epoch": 0.17, "grad_norm": 0.8264338215602798, "learning_rate": 9.527383896833872e-07, "loss": 0.3289, "step": 2593 }, { "epoch": 0.17, "grad_norm": 0.7378902329751158, "learning_rate": 9.52694551007488e-07, "loss": 0.1971, "step": 2594 }, { "epoch": 0.17, "grad_norm": 0.5173460944303994, "learning_rate": 9.526506930187292e-07, "loss": 0.1739, "step": 2595 }, { "epoch": 0.17, "grad_norm": 0.8927570683629182, "learning_rate": 9.526068157189819e-07, "loss": 0.4867, "step": 2596 }, { "epoch": 0.17, "grad_norm": 0.522756344053833, "learning_rate": 9.525629191101181e-07, "loss": 0.0821, "step": 2597 }, { "epoch": 0.17, "grad_norm": 0.9624025756487392, "learning_rate": 9.525190031940105e-07, "loss": 0.1431, "step": 2598 }, { "epoch": 0.17, "grad_norm": 0.9228900074603312, "learning_rate": 9.524750679725323e-07, "loss": 0.3242, "step": 2599 }, { "epoch": 0.17, "grad_norm": 0.31772156343051866, "learning_rate": 9.524311134475582e-07, "loss": 0.2404, "step": 2600 }, { "epoch": 0.17, "grad_norm": 0.5661715722613065, "learning_rate": 9.523871396209633e-07, "loss": 0.1122, "step": 2601 }, { "epoch": 0.17, "grad_norm": 1.888294519797739, "learning_rate": 9.523431464946236e-07, "loss": 0.2555, "step": 2602 }, { "epoch": 0.17, "grad_norm": 1.4107675325749358, "learning_rate": 9.522991340704161e-07, "loss": 0.3525, "step": 2603 }, { "epoch": 0.17, "grad_norm": 0.7403158963458948, "learning_rate": 9.522551023502181e-07, "loss": 0.2662, "step": 2604 }, { "epoch": 0.17, "grad_norm": 0.5424851219006864, "learning_rate": 9.522110513359083e-07, "loss": 0.1412, "step": 2605 }, { "epoch": 0.17, "grad_norm": 1.4147819856967123, "learning_rate": 9.521669810293661e-07, "loss": 0.0952, "step": 2606 }, { "epoch": 0.17, "grad_norm": 0.5717881857910588, "learning_rate": 9.521228914324715e-07, "loss": 0.0613, "step": 2607 }, { "epoch": 0.17, "grad_norm": 1.4241907311844593, "learning_rate": 9.520787825471055e-07, "loss": 0.1777, "step": 2608 }, { "epoch": 0.17, "grad_norm": 0.2865823410065802, "learning_rate": 9.520346543751496e-07, "loss": 0.0107, "step": 2609 }, { "epoch": 0.17, "grad_norm": 0.8866862834291714, "learning_rate": 9.519905069184869e-07, "loss": 0.2322, "step": 2610 }, { "epoch": 0.17, "grad_norm": 0.8196263524169246, "learning_rate": 9.519463401790004e-07, "loss": 0.2045, "step": 2611 }, { "epoch": 0.17, "grad_norm": 0.7649383541668356, "learning_rate": 9.519021541585748e-07, "loss": 0.3651, "step": 2612 }, { "epoch": 0.17, "grad_norm": 0.44313444995716733, "learning_rate": 9.518579488590946e-07, "loss": 0.3697, "step": 2613 }, { "epoch": 0.17, "grad_norm": 0.5828627418819751, "learning_rate": 9.51813724282446e-07, "loss": 0.2626, "step": 2614 }, { "epoch": 0.17, "grad_norm": 0.2969889590099513, "learning_rate": 9.517694804305156e-07, "loss": 0.182, "step": 2615 }, { "epoch": 0.17, "grad_norm": 0.20698729443897265, "learning_rate": 9.517252173051911e-07, "loss": 0.004, "step": 2616 }, { "epoch": 0.17, "grad_norm": 0.9516054489380347, "learning_rate": 9.516809349083608e-07, "loss": 0.1901, "step": 2617 }, { "epoch": 0.17, "grad_norm": 0.3040730187488041, "learning_rate": 9.516366332419137e-07, "loss": 0.07, "step": 2618 }, { "epoch": 0.17, "grad_norm": 1.7427736545565709, "learning_rate": 9.515923123077399e-07, "loss": 0.3721, "step": 2619 }, { "epoch": 0.17, "grad_norm": 1.4227813936269338, "learning_rate": 9.515479721077303e-07, "loss": 0.1311, "step": 2620 }, { "epoch": 0.17, "grad_norm": 0.9890664145268651, "learning_rate": 9.515036126437766e-07, "loss": 0.1963, "step": 2621 }, { "epoch": 0.17, "grad_norm": 0.4338410237920062, "learning_rate": 9.514592339177709e-07, "loss": 0.1358, "step": 2622 }, { "epoch": 0.17, "grad_norm": 0.6461712191502266, "learning_rate": 9.514148359316069e-07, "loss": 0.1093, "step": 2623 }, { "epoch": 0.17, "grad_norm": 1.0781316168167339, "learning_rate": 9.513704186871785e-07, "loss": 0.5929, "step": 2624 }, { "epoch": 0.17, "grad_norm": 0.5937921628955494, "learning_rate": 9.513259821863806e-07, "loss": 0.025, "step": 2625 }, { "epoch": 0.17, "grad_norm": 2.2050441558443006, "learning_rate": 9.512815264311092e-07, "loss": 0.3322, "step": 2626 }, { "epoch": 0.17, "grad_norm": 0.6447535671899982, "learning_rate": 9.512370514232606e-07, "loss": 0.2322, "step": 2627 }, { "epoch": 0.17, "grad_norm": 0.38982095788275767, "learning_rate": 9.511925571647322e-07, "loss": 0.1749, "step": 2628 }, { "epoch": 0.17, "grad_norm": 1.2553585210304645, "learning_rate": 9.511480436574224e-07, "loss": 0.2043, "step": 2629 }, { "epoch": 0.17, "grad_norm": 0.6165582812152801, "learning_rate": 9.511035109032301e-07, "loss": 0.0743, "step": 2630 }, { "epoch": 0.17, "grad_norm": 1.3624701319014514, "learning_rate": 9.510589589040552e-07, "loss": 0.3819, "step": 2631 }, { "epoch": 0.17, "grad_norm": 0.4195544643620065, "learning_rate": 9.510143876617985e-07, "loss": 0.1449, "step": 2632 }, { "epoch": 0.17, "grad_norm": 0.2960278562831275, "learning_rate": 9.509697971783612e-07, "loss": 0.0074, "step": 2633 }, { "epoch": 0.17, "grad_norm": 0.45621861203351, "learning_rate": 9.509251874556459e-07, "loss": 0.2407, "step": 2634 }, { "epoch": 0.17, "grad_norm": 0.9044462460962452, "learning_rate": 9.508805584955555e-07, "loss": 0.0322, "step": 2635 }, { "epoch": 0.17, "grad_norm": 0.6058690813013976, "learning_rate": 9.508359102999941e-07, "loss": 0.2087, "step": 2636 }, { "epoch": 0.17, "grad_norm": 0.7613827905763582, "learning_rate": 9.507912428708666e-07, "loss": 0.3585, "step": 2637 }, { "epoch": 0.17, "grad_norm": 0.6886394154987239, "learning_rate": 9.507465562100784e-07, "loss": 0.2638, "step": 2638 }, { "epoch": 0.17, "grad_norm": 0.7294311137348841, "learning_rate": 9.507018503195361e-07, "loss": 0.1544, "step": 2639 }, { "epoch": 0.17, "grad_norm": 0.5800813312282401, "learning_rate": 9.506571252011466e-07, "loss": 0.2576, "step": 2640 }, { "epoch": 0.17, "grad_norm": 0.982939595532959, "learning_rate": 9.506123808568185e-07, "loss": 0.1237, "step": 2641 }, { "epoch": 0.17, "grad_norm": 0.49116126395133736, "learning_rate": 9.505676172884601e-07, "loss": 0.1741, "step": 2642 }, { "epoch": 0.17, "grad_norm": 0.8696972031988437, "learning_rate": 9.505228344979817e-07, "loss": 0.2839, "step": 2643 }, { "epoch": 0.17, "grad_norm": 0.45227011206891493, "learning_rate": 9.504780324872932e-07, "loss": 0.1896, "step": 2644 }, { "epoch": 0.17, "grad_norm": 0.9564678587797452, "learning_rate": 9.504332112583064e-07, "loss": 0.3192, "step": 2645 }, { "epoch": 0.17, "grad_norm": 0.6422774084741891, "learning_rate": 9.503883708129334e-07, "loss": 0.2231, "step": 2646 }, { "epoch": 0.17, "grad_norm": 0.9070393905137192, "learning_rate": 9.50343511153087e-07, "loss": 0.2916, "step": 2647 }, { "epoch": 0.17, "grad_norm": 0.7973637615785845, "learning_rate": 9.502986322806811e-07, "loss": 0.5046, "step": 2648 }, { "epoch": 0.17, "grad_norm": 0.7107042441039871, "learning_rate": 9.502537341976305e-07, "loss": 0.1953, "step": 2649 }, { "epoch": 0.17, "grad_norm": 0.6169596807696893, "learning_rate": 9.502088169058503e-07, "loss": 0.1974, "step": 2650 }, { "epoch": 0.17, "grad_norm": 0.5224394109098526, "learning_rate": 9.501638804072569e-07, "loss": 0.2402, "step": 2651 }, { "epoch": 0.17, "grad_norm": 0.3155133421120215, "learning_rate": 9.501189247037675e-07, "loss": 0.0924, "step": 2652 }, { "epoch": 0.17, "grad_norm": 0.4912769084674077, "learning_rate": 9.500739497973e-07, "loss": 0.1181, "step": 2653 }, { "epoch": 0.17, "grad_norm": 0.8258919552020955, "learning_rate": 9.500289556897729e-07, "loss": 0.254, "step": 2654 }, { "epoch": 0.17, "grad_norm": 0.5399399057606297, "learning_rate": 9.49983942383106e-07, "loss": 0.2062, "step": 2655 }, { "epoch": 0.17, "grad_norm": 0.48393195306922543, "learning_rate": 9.499389098792196e-07, "loss": 0.1076, "step": 2656 }, { "epoch": 0.17, "grad_norm": 1.0377411795960845, "learning_rate": 9.498938581800347e-07, "loss": 0.1113, "step": 2657 }, { "epoch": 0.17, "grad_norm": 0.4560969694650829, "learning_rate": 9.498487872874733e-07, "loss": 0.3368, "step": 2658 }, { "epoch": 0.17, "grad_norm": 0.9862958881707448, "learning_rate": 9.498036972034584e-07, "loss": 0.1763, "step": 2659 }, { "epoch": 0.17, "grad_norm": 0.6833198751285896, "learning_rate": 9.497585879299137e-07, "loss": 0.28, "step": 2660 }, { "epoch": 0.17, "grad_norm": 1.4888347324512454, "learning_rate": 9.497134594687634e-07, "loss": 0.2217, "step": 2661 }, { "epoch": 0.17, "grad_norm": 0.5719202825316656, "learning_rate": 9.496683118219328e-07, "loss": 0.2603, "step": 2662 }, { "epoch": 0.17, "grad_norm": 0.7848817116848156, "learning_rate": 9.496231449913482e-07, "loss": 0.0865, "step": 2663 }, { "epoch": 0.17, "grad_norm": 0.5583436160757501, "learning_rate": 9.495779589789364e-07, "loss": 0.1155, "step": 2664 }, { "epoch": 0.17, "grad_norm": 0.29229710747176135, "learning_rate": 9.495327537866249e-07, "loss": 0.0897, "step": 2665 }, { "epoch": 0.17, "grad_norm": 0.6378875468827189, "learning_rate": 9.494875294163427e-07, "loss": 0.3491, "step": 2666 }, { "epoch": 0.17, "grad_norm": 0.7831721155048247, "learning_rate": 9.494422858700187e-07, "loss": 0.2136, "step": 2667 }, { "epoch": 0.17, "grad_norm": 0.7366203540518024, "learning_rate": 9.493970231495834e-07, "loss": 0.3171, "step": 2668 }, { "epoch": 0.17, "grad_norm": 0.3410124132656613, "learning_rate": 9.493517412569678e-07, "loss": 0.0108, "step": 2669 }, { "epoch": 0.17, "grad_norm": 2.0068122108176234, "learning_rate": 9.493064401941034e-07, "loss": 0.1751, "step": 2670 }, { "epoch": 0.17, "grad_norm": 0.8529491418660656, "learning_rate": 9.492611199629232e-07, "loss": 0.2523, "step": 2671 }, { "epoch": 0.17, "grad_norm": 0.25242722415960495, "learning_rate": 9.492157805653604e-07, "loss": 0.1797, "step": 2672 }, { "epoch": 0.17, "grad_norm": 1.085744296075026, "learning_rate": 9.491704220033494e-07, "loss": 0.1487, "step": 2673 }, { "epoch": 0.17, "grad_norm": 0.5368273019639394, "learning_rate": 9.491250442788252e-07, "loss": 0.1409, "step": 2674 }, { "epoch": 0.17, "grad_norm": 0.7625266156549131, "learning_rate": 9.490796473937238e-07, "loss": 0.2027, "step": 2675 }, { "epoch": 0.17, "grad_norm": 0.48853974831983377, "learning_rate": 9.49034231349982e-07, "loss": 0.0527, "step": 2676 }, { "epoch": 0.17, "grad_norm": 0.5674460569210614, "learning_rate": 9.48988796149537e-07, "loss": 0.1686, "step": 2677 }, { "epoch": 0.17, "grad_norm": 0.3302941832085532, "learning_rate": 9.489433417943275e-07, "loss": 0.2061, "step": 2678 }, { "epoch": 0.17, "grad_norm": 0.6617601654386271, "learning_rate": 9.488978682862925e-07, "loss": 0.0978, "step": 2679 }, { "epoch": 0.17, "grad_norm": 1.2120206993789706, "learning_rate": 9.488523756273721e-07, "loss": 0.1629, "step": 2680 }, { "epoch": 0.17, "grad_norm": 0.27118500248326494, "learning_rate": 9.488068638195071e-07, "loss": 0.1445, "step": 2681 }, { "epoch": 0.17, "grad_norm": 1.2673674372884545, "learning_rate": 9.487613328646389e-07, "loss": 0.1109, "step": 2682 }, { "epoch": 0.17, "grad_norm": 0.39638542360387147, "learning_rate": 9.487157827647101e-07, "loss": 0.1716, "step": 2683 }, { "epoch": 0.17, "grad_norm": 0.8976279257317973, "learning_rate": 9.486702135216643e-07, "loss": 0.3226, "step": 2684 }, { "epoch": 0.17, "grad_norm": 0.8303920226304105, "learning_rate": 9.486246251374449e-07, "loss": 0.253, "step": 2685 }, { "epoch": 0.17, "grad_norm": 1.5509054405484801, "learning_rate": 9.485790176139972e-07, "loss": 0.2561, "step": 2686 }, { "epoch": 0.17, "grad_norm": 0.4830660775150088, "learning_rate": 9.485333909532669e-07, "loss": 0.1147, "step": 2687 }, { "epoch": 0.17, "grad_norm": 0.8118934988535611, "learning_rate": 9.484877451572007e-07, "loss": 0.4417, "step": 2688 }, { "epoch": 0.17, "grad_norm": 0.3074240992680236, "learning_rate": 9.484420802277454e-07, "loss": 0.0119, "step": 2689 }, { "epoch": 0.17, "grad_norm": 0.4397118420494491, "learning_rate": 9.483963961668496e-07, "loss": 0.3114, "step": 2690 }, { "epoch": 0.17, "grad_norm": 0.5599381421398959, "learning_rate": 9.483506929764622e-07, "loss": 0.1553, "step": 2691 }, { "epoch": 0.17, "grad_norm": 0.5900923425201272, "learning_rate": 9.483049706585329e-07, "loss": 0.0354, "step": 2692 }, { "epoch": 0.17, "grad_norm": 0.7315701933263555, "learning_rate": 9.482592292150122e-07, "loss": 0.1048, "step": 2693 }, { "epoch": 0.17, "grad_norm": 0.5995646513358464, "learning_rate": 9.482134686478518e-07, "loss": 0.2526, "step": 2694 }, { "epoch": 0.17, "grad_norm": 0.5594556337693576, "learning_rate": 9.481676889590038e-07, "loss": 0.2493, "step": 2695 }, { "epoch": 0.17, "grad_norm": 1.006129048690075, "learning_rate": 9.481218901504214e-07, "loss": 0.4051, "step": 2696 }, { "epoch": 0.17, "grad_norm": 0.46685641387092186, "learning_rate": 9.480760722240582e-07, "loss": 0.2618, "step": 2697 }, { "epoch": 0.17, "grad_norm": 1.2050649689990065, "learning_rate": 9.480302351818689e-07, "loss": 0.1889, "step": 2698 }, { "epoch": 0.17, "grad_norm": 0.35983640486842944, "learning_rate": 9.479843790258093e-07, "loss": 0.1236, "step": 2699 }, { "epoch": 0.17, "grad_norm": 0.6665101621244433, "learning_rate": 9.479385037578354e-07, "loss": 0.3935, "step": 2700 }, { "epoch": 0.17, "grad_norm": 0.7461767823578016, "learning_rate": 9.478926093799045e-07, "loss": 0.0802, "step": 2701 }, { "epoch": 0.17, "grad_norm": 0.729629630723568, "learning_rate": 9.478466958939745e-07, "loss": 0.2092, "step": 2702 }, { "epoch": 0.17, "grad_norm": 0.5086910171019614, "learning_rate": 9.478007633020042e-07, "loss": 0.5959, "step": 2703 }, { "epoch": 0.17, "grad_norm": 1.004591934260547, "learning_rate": 9.47754811605953e-07, "loss": 0.2812, "step": 2704 }, { "epoch": 0.17, "grad_norm": 0.46586024315715674, "learning_rate": 9.477088408077816e-07, "loss": 0.1034, "step": 2705 }, { "epoch": 0.17, "grad_norm": 0.6870177141639254, "learning_rate": 9.476628509094511e-07, "loss": 0.0949, "step": 2706 }, { "epoch": 0.17, "grad_norm": 0.890797400301904, "learning_rate": 9.476168419129234e-07, "loss": 0.2011, "step": 2707 }, { "epoch": 0.17, "grad_norm": 0.4284635485018434, "learning_rate": 9.475708138201612e-07, "loss": 0.0761, "step": 2708 }, { "epoch": 0.17, "grad_norm": 0.49520868939639545, "learning_rate": 9.475247666331285e-07, "loss": 0.1412, "step": 2709 }, { "epoch": 0.17, "grad_norm": 0.7249237889122515, "learning_rate": 9.474787003537897e-07, "loss": 0.305, "step": 2710 }, { "epoch": 0.17, "grad_norm": 0.3673553451629228, "learning_rate": 9.474326149841099e-07, "loss": 0.1136, "step": 2711 }, { "epoch": 0.17, "grad_norm": 0.9513134842408071, "learning_rate": 9.473865105260554e-07, "loss": 0.0823, "step": 2712 }, { "epoch": 0.17, "grad_norm": 0.275082949196564, "learning_rate": 9.47340386981593e-07, "loss": 0.0768, "step": 2713 }, { "epoch": 0.17, "grad_norm": 0.38376522592074097, "learning_rate": 9.472942443526903e-07, "loss": 0.016, "step": 2714 }, { "epoch": 0.17, "grad_norm": 0.6354252208438675, "learning_rate": 9.472480826413161e-07, "loss": 0.2452, "step": 2715 }, { "epoch": 0.17, "grad_norm": 0.7838325564258156, "learning_rate": 9.472019018494395e-07, "loss": 0.2635, "step": 2716 }, { "epoch": 0.17, "grad_norm": 0.7046123557223944, "learning_rate": 9.471557019790308e-07, "loss": 0.1751, "step": 2717 }, { "epoch": 0.17, "grad_norm": 0.1994700866665963, "learning_rate": 9.471094830320609e-07, "loss": 0.0422, "step": 2718 }, { "epoch": 0.17, "grad_norm": 1.0426057348591593, "learning_rate": 9.470632450105018e-07, "loss": 0.1409, "step": 2719 }, { "epoch": 0.17, "grad_norm": 0.2426962738025242, "learning_rate": 9.470169879163258e-07, "loss": 0.1847, "step": 2720 }, { "epoch": 0.17, "grad_norm": 1.1177294538934748, "learning_rate": 9.469707117515066e-07, "loss": 0.2524, "step": 2721 }, { "epoch": 0.17, "grad_norm": 0.843378487215984, "learning_rate": 9.469244165180183e-07, "loss": 0.1706, "step": 2722 }, { "epoch": 0.17, "grad_norm": 0.526731905595799, "learning_rate": 9.46878102217836e-07, "loss": 0.3975, "step": 2723 }, { "epoch": 0.17, "grad_norm": 0.37146034006379797, "learning_rate": 9.468317688529354e-07, "loss": 0.1483, "step": 2724 }, { "epoch": 0.17, "grad_norm": 0.1941258775726189, "learning_rate": 9.467854164252934e-07, "loss": 0.0108, "step": 2725 }, { "epoch": 0.17, "grad_norm": 0.6248765243759603, "learning_rate": 9.467390449368873e-07, "loss": 0.1815, "step": 2726 }, { "epoch": 0.17, "grad_norm": 0.5849445940729799, "learning_rate": 9.466926543896954e-07, "loss": 0.2543, "step": 2727 }, { "epoch": 0.17, "grad_norm": 0.6364373563334739, "learning_rate": 9.466462447856971e-07, "loss": 0.1878, "step": 2728 }, { "epoch": 0.17, "grad_norm": 0.6116281503984071, "learning_rate": 9.46599816126872e-07, "loss": 0.1425, "step": 2729 }, { "epoch": 0.17, "grad_norm": 0.8351472099940584, "learning_rate": 9.465533684152009e-07, "loss": 0.1702, "step": 2730 }, { "epoch": 0.17, "grad_norm": 0.22066066406570445, "learning_rate": 9.465069016526657e-07, "loss": 0.0086, "step": 2731 }, { "epoch": 0.17, "grad_norm": 0.5012185405558273, "learning_rate": 9.464604158412483e-07, "loss": 0.3493, "step": 2732 }, { "epoch": 0.17, "grad_norm": 0.7384931294052169, "learning_rate": 9.46413910982932e-07, "loss": 0.1253, "step": 2733 }, { "epoch": 0.17, "grad_norm": 0.55046884653624, "learning_rate": 9.463673870797008e-07, "loss": 0.1763, "step": 2734 }, { "epoch": 0.17, "grad_norm": 0.6388104129942009, "learning_rate": 9.463208441335398e-07, "loss": 0.3693, "step": 2735 }, { "epoch": 0.17, "grad_norm": 0.7641560230791178, "learning_rate": 9.462742821464342e-07, "loss": 0.2299, "step": 2736 }, { "epoch": 0.17, "grad_norm": 0.2596875295580591, "learning_rate": 9.462277011203707e-07, "loss": 0.1694, "step": 2737 }, { "epoch": 0.17, "grad_norm": 0.551082981625449, "learning_rate": 9.461811010573364e-07, "loss": 0.0953, "step": 2738 }, { "epoch": 0.17, "grad_norm": 0.4609512229823886, "learning_rate": 9.461344819593193e-07, "loss": 0.0984, "step": 2739 }, { "epoch": 0.17, "grad_norm": 0.4749796653783331, "learning_rate": 9.460878438283085e-07, "loss": 0.1183, "step": 2740 }, { "epoch": 0.17, "grad_norm": 0.7080032973932419, "learning_rate": 9.460411866662935e-07, "loss": 0.3335, "step": 2741 }, { "epoch": 0.17, "grad_norm": 0.3570665673311762, "learning_rate": 9.459945104752648e-07, "loss": 0.1678, "step": 2742 }, { "epoch": 0.17, "grad_norm": 1.0982488308184704, "learning_rate": 9.459478152572138e-07, "loss": 0.1659, "step": 2743 }, { "epoch": 0.17, "grad_norm": 0.7899017343218295, "learning_rate": 9.459011010141324e-07, "loss": 0.1995, "step": 2744 }, { "epoch": 0.18, "grad_norm": 0.87676712175917, "learning_rate": 9.458543677480137e-07, "loss": 0.1066, "step": 2745 }, { "epoch": 0.18, "grad_norm": 0.6629090266480245, "learning_rate": 9.458076154608515e-07, "loss": 0.2194, "step": 2746 }, { "epoch": 0.18, "grad_norm": 0.6939508591988042, "learning_rate": 9.457608441546401e-07, "loss": 0.2803, "step": 2747 }, { "epoch": 0.18, "grad_norm": 1.0121285258968407, "learning_rate": 9.45714053831375e-07, "loss": 0.148, "step": 2748 }, { "epoch": 0.18, "grad_norm": 0.7661209179901457, "learning_rate": 9.456672444930524e-07, "loss": 0.0927, "step": 2749 }, { "epoch": 0.18, "grad_norm": 1.7991497350903882, "learning_rate": 9.456204161416692e-07, "loss": 0.0553, "step": 2750 }, { "epoch": 0.18, "grad_norm": 0.7160695877161593, "learning_rate": 9.455735687792232e-07, "loss": 0.3314, "step": 2751 }, { "epoch": 0.18, "grad_norm": 1.0368872036386638, "learning_rate": 9.455267024077131e-07, "loss": 0.1253, "step": 2752 }, { "epoch": 0.18, "grad_norm": 0.4879145975404485, "learning_rate": 9.454798170291384e-07, "loss": 0.396, "step": 2753 }, { "epoch": 0.18, "grad_norm": 0.8331207912215928, "learning_rate": 9.454329126454991e-07, "loss": 0.0842, "step": 2754 }, { "epoch": 0.18, "grad_norm": 0.47135914673985724, "learning_rate": 9.453859892587962e-07, "loss": 0.2683, "step": 2755 }, { "epoch": 0.18, "grad_norm": 0.5011380936496737, "learning_rate": 9.453390468710317e-07, "loss": 0.2232, "step": 2756 }, { "epoch": 0.18, "grad_norm": 0.5455151754771657, "learning_rate": 9.452920854842084e-07, "loss": 0.0862, "step": 2757 }, { "epoch": 0.18, "grad_norm": 0.4558905703488411, "learning_rate": 9.452451051003294e-07, "loss": 0.2177, "step": 2758 }, { "epoch": 0.18, "grad_norm": 0.7678508793696209, "learning_rate": 9.451981057213991e-07, "loss": 0.4681, "step": 2759 }, { "epoch": 0.18, "grad_norm": 0.7600235819809289, "learning_rate": 9.451510873494228e-07, "loss": 0.2709, "step": 2760 }, { "epoch": 0.18, "grad_norm": 1.2709085011050159, "learning_rate": 9.451040499864061e-07, "loss": 0.34, "step": 2761 }, { "epoch": 0.18, "grad_norm": 2.0000470715173306, "learning_rate": 9.45056993634356e-07, "loss": 0.1959, "step": 2762 }, { "epoch": 0.18, "grad_norm": 3.0978895279532295, "learning_rate": 9.450099182952797e-07, "loss": 0.2758, "step": 2763 }, { "epoch": 0.18, "grad_norm": 0.15437705270089677, "learning_rate": 9.449628239711859e-07, "loss": 0.0788, "step": 2764 }, { "epoch": 0.18, "grad_norm": 0.6322614890421141, "learning_rate": 9.449157106640834e-07, "loss": 0.2018, "step": 2765 }, { "epoch": 0.18, "grad_norm": 0.4956315422057888, "learning_rate": 9.448685783759825e-07, "loss": 0.1677, "step": 2766 }, { "epoch": 0.18, "grad_norm": 0.6298075310166793, "learning_rate": 9.448214271088936e-07, "loss": 0.1066, "step": 2767 }, { "epoch": 0.18, "grad_norm": 1.0729077712714175, "learning_rate": 9.447742568648285e-07, "loss": 0.0818, "step": 2768 }, { "epoch": 0.18, "grad_norm": 0.47651090783674244, "learning_rate": 9.447270676457994e-07, "loss": 0.3393, "step": 2769 }, { "epoch": 0.18, "grad_norm": 0.5555946853100611, "learning_rate": 9.446798594538194e-07, "loss": 0.356, "step": 2770 }, { "epoch": 0.18, "grad_norm": 0.47069576540041774, "learning_rate": 9.446326322909031e-07, "loss": 0.177, "step": 2771 }, { "epoch": 0.18, "grad_norm": 1.022874544414808, "learning_rate": 9.445853861590646e-07, "loss": 0.1407, "step": 2772 }, { "epoch": 0.18, "grad_norm": 0.76619778994947, "learning_rate": 9.445381210603198e-07, "loss": 0.1177, "step": 2773 }, { "epoch": 0.18, "grad_norm": 0.816355372826185, "learning_rate": 9.444908369966852e-07, "loss": 0.3085, "step": 2774 }, { "epoch": 0.18, "grad_norm": 1.0502165711859144, "learning_rate": 9.444435339701779e-07, "loss": 0.1333, "step": 2775 }, { "epoch": 0.18, "grad_norm": 0.4496538403505556, "learning_rate": 9.443962119828161e-07, "loss": 0.0114, "step": 2776 }, { "epoch": 0.18, "grad_norm": 0.7821882847088305, "learning_rate": 9.443488710366184e-07, "loss": 0.2463, "step": 2777 }, { "epoch": 0.18, "grad_norm": 1.2748034457238213, "learning_rate": 9.443015111336048e-07, "loss": 0.4336, "step": 2778 }, { "epoch": 0.18, "grad_norm": 0.46986003249958325, "learning_rate": 9.442541322757953e-07, "loss": 0.281, "step": 2779 }, { "epoch": 0.18, "grad_norm": 1.0312067062208974, "learning_rate": 9.442067344652117e-07, "loss": 0.3935, "step": 2780 }, { "epoch": 0.18, "grad_norm": 0.8290958117572361, "learning_rate": 9.441593177038758e-07, "loss": 0.098, "step": 2781 }, { "epoch": 0.18, "grad_norm": 0.4844868718260467, "learning_rate": 9.441118819938104e-07, "loss": 0.1814, "step": 2782 }, { "epoch": 0.18, "grad_norm": 0.6000533724746735, "learning_rate": 9.440644273370395e-07, "loss": 0.0846, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.2138730908567696, "learning_rate": 9.440169537355873e-07, "loss": 0.0233, "step": 2784 }, { "epoch": 0.18, "grad_norm": 0.5013900350971062, "learning_rate": 9.439694611914795e-07, "loss": 0.1756, "step": 2785 }, { "epoch": 0.18, "grad_norm": 0.36931333974677744, "learning_rate": 9.439219497067417e-07, "loss": 0.0095, "step": 2786 }, { "epoch": 0.18, "grad_norm": 0.3750648838577161, "learning_rate": 9.438744192834013e-07, "loss": 0.1656, "step": 2787 }, { "epoch": 0.18, "grad_norm": 0.605318119090337, "learning_rate": 9.438268699234858e-07, "loss": 0.0886, "step": 2788 }, { "epoch": 0.18, "grad_norm": 0.2959328351693167, "learning_rate": 9.437793016290239e-07, "loss": 0.115, "step": 2789 }, { "epoch": 0.18, "grad_norm": 0.4474127652051423, "learning_rate": 9.43731714402045e-07, "loss": 0.0102, "step": 2790 }, { "epoch": 0.18, "grad_norm": 0.6136794135858379, "learning_rate": 9.436841082445788e-07, "loss": 0.0886, "step": 2791 }, { "epoch": 0.18, "grad_norm": 1.1717763396087342, "learning_rate": 9.436364831586569e-07, "loss": 0.3371, "step": 2792 }, { "epoch": 0.18, "grad_norm": 0.7635911988360328, "learning_rate": 9.435888391463107e-07, "loss": 0.3099, "step": 2793 }, { "epoch": 0.18, "grad_norm": 0.4099248645231432, "learning_rate": 9.435411762095729e-07, "loss": 0.0585, "step": 2794 }, { "epoch": 0.18, "grad_norm": 1.011576269807772, "learning_rate": 9.43493494350477e-07, "loss": 0.1689, "step": 2795 }, { "epoch": 0.18, "grad_norm": 0.22326415266088995, "learning_rate": 9.434457935710569e-07, "loss": 0.0892, "step": 2796 }, { "epoch": 0.18, "grad_norm": 0.19328249836507289, "learning_rate": 9.43398073873348e-07, "loss": 0.0992, "step": 2797 }, { "epoch": 0.18, "grad_norm": 0.9261282826568348, "learning_rate": 9.433503352593859e-07, "loss": 0.2951, "step": 2798 }, { "epoch": 0.18, "grad_norm": 0.40628338413841864, "learning_rate": 9.433025777312072e-07, "loss": 0.0931, "step": 2799 }, { "epoch": 0.18, "grad_norm": 0.8013568734705643, "learning_rate": 9.432548012908495e-07, "loss": 0.2714, "step": 2800 }, { "epoch": 0.18, "grad_norm": 0.7317670146233269, "learning_rate": 9.432070059403507e-07, "loss": 0.1874, "step": 2801 }, { "epoch": 0.18, "grad_norm": 0.9686810419542108, "learning_rate": 9.431591916817502e-07, "loss": 0.1488, "step": 2802 }, { "epoch": 0.18, "grad_norm": 0.8012002521344556, "learning_rate": 9.431113585170877e-07, "loss": 0.1894, "step": 2803 }, { "epoch": 0.18, "grad_norm": 0.8454033223737674, "learning_rate": 9.430635064484038e-07, "loss": 0.2057, "step": 2804 }, { "epoch": 0.18, "grad_norm": 0.7072694430681157, "learning_rate": 9.430156354777402e-07, "loss": 0.4607, "step": 2805 }, { "epoch": 0.18, "grad_norm": 0.4874392147836718, "learning_rate": 9.42967745607139e-07, "loss": 0.1571, "step": 2806 }, { "epoch": 0.18, "grad_norm": 0.5946393715378242, "learning_rate": 9.429198368386433e-07, "loss": 0.0976, "step": 2807 }, { "epoch": 0.18, "grad_norm": 1.51813433087222, "learning_rate": 9.428719091742968e-07, "loss": 0.2874, "step": 2808 }, { "epoch": 0.18, "grad_norm": 0.4208942596352635, "learning_rate": 9.428239626161445e-07, "loss": 0.0901, "step": 2809 }, { "epoch": 0.18, "grad_norm": 0.23755054114355956, "learning_rate": 9.427759971662318e-07, "loss": 0.1652, "step": 2810 }, { "epoch": 0.18, "grad_norm": 0.8589361478411537, "learning_rate": 9.427280128266049e-07, "loss": 0.1959, "step": 2811 }, { "epoch": 0.18, "grad_norm": 0.6991347503127461, "learning_rate": 9.426800095993111e-07, "loss": 0.3735, "step": 2812 }, { "epoch": 0.18, "grad_norm": 0.7716760270043876, "learning_rate": 9.426319874863981e-07, "loss": 0.1688, "step": 2813 }, { "epoch": 0.18, "grad_norm": 0.7459044544613995, "learning_rate": 9.425839464899145e-07, "loss": 0.5136, "step": 2814 }, { "epoch": 0.18, "grad_norm": 0.7709215179212534, "learning_rate": 9.425358866119104e-07, "loss": 0.2338, "step": 2815 }, { "epoch": 0.18, "grad_norm": 0.3334461217493869, "learning_rate": 9.424878078544356e-07, "loss": 0.2086, "step": 2816 }, { "epoch": 0.18, "grad_norm": 0.7471230427630468, "learning_rate": 9.424397102195414e-07, "loss": 0.2253, "step": 2817 }, { "epoch": 0.18, "grad_norm": 0.8213268370779977, "learning_rate": 9.423915937092798e-07, "loss": 0.1502, "step": 2818 }, { "epoch": 0.18, "grad_norm": 0.4561214217091379, "learning_rate": 9.423434583257035e-07, "loss": 0.1782, "step": 2819 }, { "epoch": 0.18, "grad_norm": 0.807199616528415, "learning_rate": 9.42295304070866e-07, "loss": 0.154, "step": 2820 }, { "epoch": 0.18, "grad_norm": 0.41297631557583814, "learning_rate": 9.422471309468216e-07, "loss": 0.1923, "step": 2821 }, { "epoch": 0.18, "grad_norm": 0.5799683306955786, "learning_rate": 9.421989389556258e-07, "loss": 0.2438, "step": 2822 }, { "epoch": 0.18, "grad_norm": 1.772270986558091, "learning_rate": 9.421507280993341e-07, "loss": 0.2805, "step": 2823 }, { "epoch": 0.18, "grad_norm": 1.1538887519345769, "learning_rate": 9.421024983800037e-07, "loss": 0.1897, "step": 2824 }, { "epoch": 0.18, "grad_norm": 0.3294123485329724, "learning_rate": 9.42054249799692e-07, "loss": 0.1754, "step": 2825 }, { "epoch": 0.18, "grad_norm": 0.43713400313651707, "learning_rate": 9.420059823604571e-07, "loss": 0.0802, "step": 2826 }, { "epoch": 0.18, "grad_norm": 1.6594140278897644, "learning_rate": 9.419576960643587e-07, "loss": 0.2665, "step": 2827 }, { "epoch": 0.18, "grad_norm": 0.8470930858443656, "learning_rate": 9.419093909134563e-07, "loss": 0.2339, "step": 2828 }, { "epoch": 0.18, "grad_norm": 0.4306736376756811, "learning_rate": 9.418610669098113e-07, "loss": 0.1545, "step": 2829 }, { "epoch": 0.18, "grad_norm": 1.365434225331647, "learning_rate": 9.418127240554845e-07, "loss": 0.2879, "step": 2830 }, { "epoch": 0.18, "grad_norm": 0.7651480910982791, "learning_rate": 9.417643623525391e-07, "loss": 0.2465, "step": 2831 }, { "epoch": 0.18, "grad_norm": 0.4917679686000983, "learning_rate": 9.417159818030378e-07, "loss": 0.0449, "step": 2832 }, { "epoch": 0.18, "grad_norm": 1.1656930051141519, "learning_rate": 9.416675824090448e-07, "loss": 0.1094, "step": 2833 }, { "epoch": 0.18, "grad_norm": 0.41184955136228063, "learning_rate": 9.416191641726248e-07, "loss": 0.1345, "step": 2834 }, { "epoch": 0.18, "grad_norm": 0.4588694991771305, "learning_rate": 9.415707270958435e-07, "loss": 0.1318, "step": 2835 }, { "epoch": 0.18, "grad_norm": 1.807216627429904, "learning_rate": 9.415222711807673e-07, "loss": 0.3597, "step": 2836 }, { "epoch": 0.18, "grad_norm": 0.7407881832879403, "learning_rate": 9.414737964294634e-07, "loss": 0.2336, "step": 2837 }, { "epoch": 0.18, "grad_norm": 0.5398464107680441, "learning_rate": 9.41425302844e-07, "loss": 0.1482, "step": 2838 }, { "epoch": 0.18, "grad_norm": 1.2362484444063, "learning_rate": 9.413767904264457e-07, "loss": 0.128, "step": 2839 }, { "epoch": 0.18, "grad_norm": 0.45437418205698415, "learning_rate": 9.413282591788703e-07, "loss": 0.0624, "step": 2840 }, { "epoch": 0.18, "grad_norm": 0.37334653359496567, "learning_rate": 9.412797091033442e-07, "loss": 0.0551, "step": 2841 }, { "epoch": 0.18, "grad_norm": 0.9241025056858144, "learning_rate": 9.412311402019387e-07, "loss": 0.0881, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.2378366271539436, "learning_rate": 9.411825524767255e-07, "loss": 0.0359, "step": 2843 }, { "epoch": 0.18, "grad_norm": 1.0344821750785687, "learning_rate": 9.411339459297779e-07, "loss": 0.0915, "step": 2844 }, { "epoch": 0.18, "grad_norm": 0.9693798456261467, "learning_rate": 9.410853205631693e-07, "loss": 0.3174, "step": 2845 }, { "epoch": 0.18, "grad_norm": 0.5421670416631592, "learning_rate": 9.410366763789743e-07, "loss": 0.1069, "step": 2846 }, { "epoch": 0.18, "grad_norm": 0.2709530149224765, "learning_rate": 9.409880133792682e-07, "loss": 0.1021, "step": 2847 }, { "epoch": 0.18, "grad_norm": 0.6347101893040504, "learning_rate": 9.409393315661268e-07, "loss": 0.1221, "step": 2848 }, { "epoch": 0.18, "grad_norm": 0.8951731804134153, "learning_rate": 9.408906309416271e-07, "loss": 0.1671, "step": 2849 }, { "epoch": 0.18, "grad_norm": 0.7422000431521963, "learning_rate": 9.40841911507847e-07, "loss": 0.2216, "step": 2850 }, { "epoch": 0.18, "grad_norm": 0.670706390499011, "learning_rate": 9.407931732668645e-07, "loss": 0.185, "step": 2851 }, { "epoch": 0.18, "grad_norm": 0.2784967892109064, "learning_rate": 9.407444162207591e-07, "loss": 0.0595, "step": 2852 }, { "epoch": 0.18, "grad_norm": 0.6966841610794263, "learning_rate": 9.40695640371611e-07, "loss": 0.4388, "step": 2853 }, { "epoch": 0.18, "grad_norm": 0.8689733905563939, "learning_rate": 9.406468457215011e-07, "loss": 0.3751, "step": 2854 }, { "epoch": 0.18, "grad_norm": 1.3656774912938305, "learning_rate": 9.405980322725109e-07, "loss": 0.1813, "step": 2855 }, { "epoch": 0.18, "grad_norm": 0.6905757753799551, "learning_rate": 9.405492000267228e-07, "loss": 0.2256, "step": 2856 }, { "epoch": 0.18, "grad_norm": 1.1243153878301837, "learning_rate": 9.405003489862202e-07, "loss": 0.1498, "step": 2857 }, { "epoch": 0.18, "grad_norm": 1.7253835102938415, "learning_rate": 9.404514791530873e-07, "loss": 0.4478, "step": 2858 }, { "epoch": 0.18, "grad_norm": 0.4978015300325407, "learning_rate": 9.404025905294088e-07, "loss": 0.1165, "step": 2859 }, { "epoch": 0.18, "grad_norm": 0.44617374946937977, "learning_rate": 9.403536831172706e-07, "loss": 0.0725, "step": 2860 }, { "epoch": 0.18, "grad_norm": 0.6314616661282472, "learning_rate": 9.40304756918759e-07, "loss": 0.1408, "step": 2861 }, { "epoch": 0.18, "grad_norm": 0.6965625633951027, "learning_rate": 9.402558119359614e-07, "loss": 0.1106, "step": 2862 }, { "epoch": 0.18, "grad_norm": 0.22985774636408715, "learning_rate": 9.402068481709657e-07, "loss": 0.1237, "step": 2863 }, { "epoch": 0.18, "grad_norm": 0.6809065562402292, "learning_rate": 9.40157865625861e-07, "loss": 0.2483, "step": 2864 }, { "epoch": 0.18, "grad_norm": 0.9062160343423031, "learning_rate": 9.401088643027369e-07, "loss": 0.196, "step": 2865 }, { "epoch": 0.18, "grad_norm": 1.7100489542755926, "learning_rate": 9.400598442036839e-07, "loss": 0.063, "step": 2866 }, { "epoch": 0.18, "grad_norm": 0.42267334696169123, "learning_rate": 9.400108053307934e-07, "loss": 0.0933, "step": 2867 }, { "epoch": 0.18, "grad_norm": 0.22334642641603122, "learning_rate": 9.399617476861573e-07, "loss": 0.2095, "step": 2868 }, { "epoch": 0.18, "grad_norm": 1.2243792781462794, "learning_rate": 9.399126712718687e-07, "loss": 0.1065, "step": 2869 }, { "epoch": 0.18, "grad_norm": 0.362080305206458, "learning_rate": 9.398635760900211e-07, "loss": 0.0415, "step": 2870 }, { "epoch": 0.18, "grad_norm": 0.8801455382905071, "learning_rate": 9.398144621427093e-07, "loss": 0.2932, "step": 2871 }, { "epoch": 0.18, "grad_norm": 1.0103986879678948, "learning_rate": 9.397653294320282e-07, "loss": 0.2368, "step": 2872 }, { "epoch": 0.18, "grad_norm": 0.625343372049108, "learning_rate": 9.397161779600742e-07, "loss": 0.216, "step": 2873 }, { "epoch": 0.18, "grad_norm": 0.1910556190819925, "learning_rate": 9.396670077289441e-07, "loss": 0.1636, "step": 2874 }, { "epoch": 0.18, "grad_norm": 0.5439027466683719, "learning_rate": 9.396178187407356e-07, "loss": 0.3946, "step": 2875 }, { "epoch": 0.18, "grad_norm": 1.3236032792196466, "learning_rate": 9.395686109975473e-07, "loss": 0.1851, "step": 2876 }, { "epoch": 0.18, "grad_norm": 0.2515487957717811, "learning_rate": 9.395193845014784e-07, "loss": 0.0908, "step": 2877 }, { "epoch": 0.18, "grad_norm": 0.8810352395602445, "learning_rate": 9.39470139254629e-07, "loss": 0.2022, "step": 2878 }, { "epoch": 0.18, "grad_norm": 0.6101326355028606, "learning_rate": 9.394208752590997e-07, "loss": 0.0564, "step": 2879 }, { "epoch": 0.18, "grad_norm": 0.8622326528124292, "learning_rate": 9.393715925169929e-07, "loss": 0.1853, "step": 2880 }, { "epoch": 0.18, "grad_norm": 0.6068834151155335, "learning_rate": 9.393222910304106e-07, "loss": 0.3243, "step": 2881 }, { "epoch": 0.18, "grad_norm": 0.7504937712945394, "learning_rate": 9.392729708014562e-07, "loss": 0.1388, "step": 2882 }, { "epoch": 0.18, "grad_norm": 0.8035627430314166, "learning_rate": 9.392236318322337e-07, "loss": 0.2444, "step": 2883 }, { "epoch": 0.18, "grad_norm": 0.5813276585105368, "learning_rate": 9.391742741248483e-07, "loss": 0.0145, "step": 2884 }, { "epoch": 0.18, "grad_norm": 1.3399477342637993, "learning_rate": 9.391248976814054e-07, "loss": 0.3238, "step": 2885 }, { "epoch": 0.18, "grad_norm": 0.35618492077932135, "learning_rate": 9.390755025040118e-07, "loss": 0.1506, "step": 2886 }, { "epoch": 0.18, "grad_norm": 1.10978232209715, "learning_rate": 9.390260885947745e-07, "loss": 0.1978, "step": 2887 }, { "epoch": 0.18, "grad_norm": 0.9966430862469495, "learning_rate": 9.389766559558017e-07, "loss": 0.0761, "step": 2888 }, { "epoch": 0.18, "grad_norm": 0.5628107443022811, "learning_rate": 9.389272045892023e-07, "loss": 0.1672, "step": 2889 }, { "epoch": 0.18, "grad_norm": 0.5324227945428179, "learning_rate": 9.38877734497086e-07, "loss": 0.098, "step": 2890 }, { "epoch": 0.18, "grad_norm": 1.1450986980950073, "learning_rate": 9.388282456815632e-07, "loss": 0.0692, "step": 2891 }, { "epoch": 0.18, "grad_norm": 0.2870732688804849, "learning_rate": 9.387787381447454e-07, "loss": 0.1587, "step": 2892 }, { "epoch": 0.18, "grad_norm": 1.1026915337376895, "learning_rate": 9.387292118887444e-07, "loss": 0.2455, "step": 2893 }, { "epoch": 0.18, "grad_norm": 0.6398918224849218, "learning_rate": 9.386796669156735e-07, "loss": 0.3308, "step": 2894 }, { "epoch": 0.18, "grad_norm": 0.9118360693252334, "learning_rate": 9.386301032276461e-07, "loss": 0.1218, "step": 2895 }, { "epoch": 0.18, "grad_norm": 0.44660226248487206, "learning_rate": 9.385805208267766e-07, "loss": 0.0959, "step": 2896 }, { "epoch": 0.18, "grad_norm": 1.05321455559004, "learning_rate": 9.385309197151805e-07, "loss": 0.1418, "step": 2897 }, { "epoch": 0.18, "grad_norm": 0.6948435045112532, "learning_rate": 9.384812998949739e-07, "loss": 0.2146, "step": 2898 }, { "epoch": 0.18, "grad_norm": 0.45948361273294575, "learning_rate": 9.384316613682735e-07, "loss": 0.2599, "step": 2899 }, { "epoch": 0.18, "grad_norm": 0.3149086661546606, "learning_rate": 9.38382004137197e-07, "loss": 0.129, "step": 2900 }, { "epoch": 0.19, "grad_norm": 0.6401148842503128, "learning_rate": 9.383323282038631e-07, "loss": 0.2047, "step": 2901 }, { "epoch": 0.19, "grad_norm": 1.1647592754413674, "learning_rate": 9.382826335703908e-07, "loss": 0.1428, "step": 2902 }, { "epoch": 0.19, "grad_norm": 3.349040834073399, "learning_rate": 9.382329202389003e-07, "loss": 0.2031, "step": 2903 }, { "epoch": 0.19, "grad_norm": 0.80803777930169, "learning_rate": 9.381831882115126e-07, "loss": 0.0875, "step": 2904 }, { "epoch": 0.19, "grad_norm": 0.6901241070061314, "learning_rate": 9.381334374903491e-07, "loss": 0.3223, "step": 2905 }, { "epoch": 0.19, "grad_norm": 0.44340982598308587, "learning_rate": 9.380836680775324e-07, "loss": 0.1118, "step": 2906 }, { "epoch": 0.19, "grad_norm": 0.36681370962138576, "learning_rate": 9.380338799751858e-07, "loss": 0.3782, "step": 2907 }, { "epoch": 0.19, "grad_norm": 0.6694382343922336, "learning_rate": 9.379840731854334e-07, "loss": 0.3132, "step": 2908 }, { "epoch": 0.19, "grad_norm": 0.7581315571969631, "learning_rate": 9.379342477103998e-07, "loss": 0.1568, "step": 2909 }, { "epoch": 0.19, "grad_norm": 0.41683816538272617, "learning_rate": 9.37884403552211e-07, "loss": 0.2433, "step": 2910 }, { "epoch": 0.19, "grad_norm": 1.0928002264705523, "learning_rate": 9.378345407129931e-07, "loss": 0.356, "step": 2911 }, { "epoch": 0.19, "grad_norm": 1.6464702572252623, "learning_rate": 9.377846591948737e-07, "loss": 0.2987, "step": 2912 }, { "epoch": 0.19, "grad_norm": 2.213944566539613, "learning_rate": 9.377347589999806e-07, "loss": 0.4899, "step": 2913 }, { "epoch": 0.19, "grad_norm": 0.2569801615400901, "learning_rate": 9.376848401304428e-07, "loss": 0.215, "step": 2914 }, { "epoch": 0.19, "grad_norm": 0.7564381394785538, "learning_rate": 9.376349025883899e-07, "loss": 0.1616, "step": 2915 }, { "epoch": 0.19, "grad_norm": 0.9610078231646705, "learning_rate": 9.375849463759522e-07, "loss": 0.1232, "step": 2916 }, { "epoch": 0.19, "grad_norm": 0.3820691631100241, "learning_rate": 9.375349714952609e-07, "loss": 0.0886, "step": 2917 }, { "epoch": 0.19, "grad_norm": 0.5951588990461749, "learning_rate": 9.374849779484484e-07, "loss": 0.2954, "step": 2918 }, { "epoch": 0.19, "grad_norm": 0.44345965442842933, "learning_rate": 9.374349657376472e-07, "loss": 0.2337, "step": 2919 }, { "epoch": 0.19, "grad_norm": 0.47022348751950854, "learning_rate": 9.373849348649909e-07, "loss": 0.0834, "step": 2920 }, { "epoch": 0.19, "grad_norm": 0.5549102258857699, "learning_rate": 9.373348853326142e-07, "loss": 0.3269, "step": 2921 }, { "epoch": 0.19, "grad_norm": 1.3390363419600777, "learning_rate": 9.372848171426522e-07, "loss": 0.0601, "step": 2922 }, { "epoch": 0.19, "grad_norm": 0.4553715406375695, "learning_rate": 9.372347302972407e-07, "loss": 0.1892, "step": 2923 }, { "epoch": 0.19, "grad_norm": 2.315082537773244, "learning_rate": 9.371846247985166e-07, "loss": 0.1059, "step": 2924 }, { "epoch": 0.19, "grad_norm": 0.7620149541053305, "learning_rate": 9.371345006486176e-07, "loss": 0.1488, "step": 2925 }, { "epoch": 0.19, "grad_norm": 0.4819073686687152, "learning_rate": 9.37084357849682e-07, "loss": 0.2861, "step": 2926 }, { "epoch": 0.19, "grad_norm": 0.7497531244440875, "learning_rate": 9.370341964038492e-07, "loss": 0.1649, "step": 2927 }, { "epoch": 0.19, "grad_norm": 0.9647720741092505, "learning_rate": 9.36984016313259e-07, "loss": 0.4131, "step": 2928 }, { "epoch": 0.19, "grad_norm": 0.7008518156276032, "learning_rate": 9.369338175800521e-07, "loss": 0.3531, "step": 2929 }, { "epoch": 0.19, "grad_norm": 0.5904076191224823, "learning_rate": 9.368836002063703e-07, "loss": 0.2638, "step": 2930 }, { "epoch": 0.19, "grad_norm": 1.1396464800581088, "learning_rate": 9.368333641943558e-07, "loss": 0.0155, "step": 2931 }, { "epoch": 0.19, "grad_norm": 1.2158797865005662, "learning_rate": 9.367831095461518e-07, "loss": 0.4174, "step": 2932 }, { "epoch": 0.19, "grad_norm": 0.42457405895374317, "learning_rate": 9.367328362639024e-07, "loss": 0.0814, "step": 2933 }, { "epoch": 0.19, "grad_norm": 0.1301786172208941, "learning_rate": 9.366825443497522e-07, "loss": 0.008, "step": 2934 }, { "epoch": 0.19, "grad_norm": 0.9093203437447465, "learning_rate": 9.366322338058469e-07, "loss": 0.4459, "step": 2935 }, { "epoch": 0.19, "grad_norm": 0.49447847109671217, "learning_rate": 9.365819046343328e-07, "loss": 0.1887, "step": 2936 }, { "epoch": 0.19, "grad_norm": 3.0850813480109034, "learning_rate": 9.365315568373568e-07, "loss": 0.0585, "step": 2937 }, { "epoch": 0.19, "grad_norm": 0.4610127694846558, "learning_rate": 9.364811904170672e-07, "loss": 0.0876, "step": 2938 }, { "epoch": 0.19, "grad_norm": 1.2660699869751306, "learning_rate": 9.364308053756126e-07, "loss": 0.1479, "step": 2939 }, { "epoch": 0.19, "grad_norm": 0.5483732666481342, "learning_rate": 9.363804017151424e-07, "loss": 0.1685, "step": 2940 }, { "epoch": 0.19, "grad_norm": 0.3343442458788976, "learning_rate": 9.363299794378071e-07, "loss": 0.0201, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.001151416510769, "learning_rate": 9.362795385457578e-07, "loss": 0.2791, "step": 2942 }, { "epoch": 0.19, "grad_norm": 0.8030334382420369, "learning_rate": 9.362290790411463e-07, "loss": 0.1531, "step": 2943 }, { "epoch": 0.19, "grad_norm": 0.2317266383697126, "learning_rate": 9.361786009261252e-07, "loss": 0.0073, "step": 2944 }, { "epoch": 0.19, "grad_norm": 1.047789694645446, "learning_rate": 9.361281042028484e-07, "loss": 0.2145, "step": 2945 }, { "epoch": 0.19, "grad_norm": 0.4685516692379248, "learning_rate": 9.360775888734697e-07, "loss": 0.3019, "step": 2946 }, { "epoch": 0.19, "grad_norm": 0.7773936351369717, "learning_rate": 9.360270549401445e-07, "loss": 0.3539, "step": 2947 }, { "epoch": 0.19, "grad_norm": 0.5734200535640486, "learning_rate": 9.359765024050288e-07, "loss": 0.1606, "step": 2948 }, { "epoch": 0.19, "grad_norm": 0.7189535865509294, "learning_rate": 9.35925931270279e-07, "loss": 0.4715, "step": 2949 }, { "epoch": 0.19, "grad_norm": 0.3679596216667496, "learning_rate": 9.358753415380527e-07, "loss": 0.0498, "step": 2950 }, { "epoch": 0.19, "grad_norm": 1.1985638110274803, "learning_rate": 9.35824733210508e-07, "loss": 0.2713, "step": 2951 }, { "epoch": 0.19, "grad_norm": 0.41398293651477885, "learning_rate": 9.35774106289804e-07, "loss": 0.2053, "step": 2952 }, { "epoch": 0.19, "grad_norm": 0.6099238968783113, "learning_rate": 9.357234607781008e-07, "loss": 0.1821, "step": 2953 }, { "epoch": 0.19, "grad_norm": 0.8709531253950371, "learning_rate": 9.356727966775587e-07, "loss": 0.3359, "step": 2954 }, { "epoch": 0.19, "grad_norm": 0.2278086119851056, "learning_rate": 9.356221139903393e-07, "loss": 0.005, "step": 2955 }, { "epoch": 0.19, "grad_norm": 0.9272032589655799, "learning_rate": 9.355714127186048e-07, "loss": 0.3826, "step": 2956 }, { "epoch": 0.19, "grad_norm": 1.0236446162888828, "learning_rate": 9.355206928645183e-07, "loss": 0.2564, "step": 2957 }, { "epoch": 0.19, "grad_norm": 0.8801309550772775, "learning_rate": 9.354699544302435e-07, "loss": 0.272, "step": 2958 }, { "epoch": 0.19, "grad_norm": 0.7152614180118375, "learning_rate": 9.354191974179451e-07, "loss": 0.1935, "step": 2959 }, { "epoch": 0.19, "grad_norm": 1.242140104161178, "learning_rate": 9.353684218297884e-07, "loss": 0.1313, "step": 2960 }, { "epoch": 0.19, "grad_norm": 0.4786856065967177, "learning_rate": 9.353176276679395e-07, "loss": 0.1123, "step": 2961 }, { "epoch": 0.19, "grad_norm": 0.6804098926691428, "learning_rate": 9.352668149345657e-07, "loss": 0.3683, "step": 2962 }, { "epoch": 0.19, "grad_norm": 0.3810212011607888, "learning_rate": 9.352159836318345e-07, "loss": 0.3455, "step": 2963 }, { "epoch": 0.19, "grad_norm": 1.5196253006025056, "learning_rate": 9.351651337619144e-07, "loss": 0.2195, "step": 2964 }, { "epoch": 0.19, "grad_norm": 0.7831762124216888, "learning_rate": 9.351142653269752e-07, "loss": 0.1341, "step": 2965 }, { "epoch": 0.19, "grad_norm": 0.401533870177438, "learning_rate": 9.350633783291866e-07, "loss": 0.0186, "step": 2966 }, { "epoch": 0.19, "grad_norm": 0.2830055046461243, "learning_rate": 9.350124727707196e-07, "loss": 0.0125, "step": 2967 }, { "epoch": 0.19, "grad_norm": 0.9275444900338184, "learning_rate": 9.349615486537461e-07, "loss": 0.1766, "step": 2968 }, { "epoch": 0.19, "grad_norm": 1.4813644088477882, "learning_rate": 9.349106059804386e-07, "loss": 0.2933, "step": 2969 }, { "epoch": 0.19, "grad_norm": 0.5543308751412848, "learning_rate": 9.348596447529702e-07, "loss": 0.3231, "step": 2970 }, { "epoch": 0.19, "grad_norm": 0.3809371773300341, "learning_rate": 9.348086649735154e-07, "loss": 0.086, "step": 2971 }, { "epoch": 0.19, "grad_norm": 1.1614202413974644, "learning_rate": 9.347576666442487e-07, "loss": 0.1066, "step": 2972 }, { "epoch": 0.19, "grad_norm": 0.5684829679157605, "learning_rate": 9.347066497673461e-07, "loss": 0.0861, "step": 2973 }, { "epoch": 0.19, "grad_norm": 0.8791000751341507, "learning_rate": 9.34655614344984e-07, "loss": 0.2266, "step": 2974 }, { "epoch": 0.19, "grad_norm": 1.5369128656478885, "learning_rate": 9.346045603793394e-07, "loss": 0.1987, "step": 2975 }, { "epoch": 0.19, "grad_norm": 1.2612040497881984, "learning_rate": 9.345534878725907e-07, "loss": 0.0356, "step": 2976 }, { "epoch": 0.19, "grad_norm": 0.5417937297154061, "learning_rate": 9.345023968269167e-07, "loss": 0.1972, "step": 2977 }, { "epoch": 0.19, "grad_norm": 1.427665547698021, "learning_rate": 9.344512872444969e-07, "loss": 0.2863, "step": 2978 }, { "epoch": 0.19, "grad_norm": 1.0737672201175752, "learning_rate": 9.344001591275119e-07, "loss": 0.1526, "step": 2979 }, { "epoch": 0.19, "grad_norm": 0.7145115557551948, "learning_rate": 9.343490124781428e-07, "loss": 0.3481, "step": 2980 }, { "epoch": 0.19, "grad_norm": 1.119609581908684, "learning_rate": 9.342978472985718e-07, "loss": 0.2666, "step": 2981 }, { "epoch": 0.19, "grad_norm": 0.7464592130899599, "learning_rate": 9.342466635909815e-07, "loss": 0.2481, "step": 2982 }, { "epoch": 0.19, "grad_norm": 0.12397808819221302, "learning_rate": 9.341954613575555e-07, "loss": 0.0045, "step": 2983 }, { "epoch": 0.19, "grad_norm": 0.40409144562835825, "learning_rate": 9.341442406004784e-07, "loss": 0.1173, "step": 2984 }, { "epoch": 0.19, "grad_norm": 0.3297218341450486, "learning_rate": 9.340930013219352e-07, "loss": 0.0489, "step": 2985 }, { "epoch": 0.19, "grad_norm": 0.382178387832423, "learning_rate": 9.340417435241119e-07, "loss": 0.0356, "step": 2986 }, { "epoch": 0.19, "grad_norm": 0.7806082011581832, "learning_rate": 9.339904672091953e-07, "loss": 0.1423, "step": 2987 }, { "epoch": 0.19, "grad_norm": 0.5177359123813741, "learning_rate": 9.33939172379373e-07, "loss": 0.2016, "step": 2988 }, { "epoch": 0.19, "grad_norm": 0.31598636718736156, "learning_rate": 9.338878590368333e-07, "loss": 0.076, "step": 2989 }, { "epoch": 0.19, "grad_norm": 1.3549764100792365, "learning_rate": 9.338365271837654e-07, "loss": 0.1827, "step": 2990 }, { "epoch": 0.19, "grad_norm": 1.1013235744070637, "learning_rate": 9.337851768223588e-07, "loss": 0.3258, "step": 2991 }, { "epoch": 0.19, "grad_norm": 0.772870087027144, "learning_rate": 9.337338079548048e-07, "loss": 0.4289, "step": 2992 }, { "epoch": 0.19, "grad_norm": 0.5294229312283212, "learning_rate": 9.336824205832947e-07, "loss": 0.2537, "step": 2993 }, { "epoch": 0.19, "grad_norm": 1.1287496183325993, "learning_rate": 9.336310147100205e-07, "loss": 0.1337, "step": 2994 }, { "epoch": 0.19, "grad_norm": 0.44638174383621354, "learning_rate": 9.335795903371755e-07, "loss": 0.2678, "step": 2995 }, { "epoch": 0.19, "grad_norm": 0.593454638131262, "learning_rate": 9.335281474669538e-07, "loss": 0.2518, "step": 2996 }, { "epoch": 0.19, "grad_norm": 0.44976684179108467, "learning_rate": 9.334766861015496e-07, "loss": 0.0079, "step": 2997 }, { "epoch": 0.19, "grad_norm": 0.4337280332740164, "learning_rate": 9.334252062431587e-07, "loss": 0.3132, "step": 2998 }, { "epoch": 0.19, "grad_norm": 0.2867560830291846, "learning_rate": 9.333737078939772e-07, "loss": 0.1556, "step": 2999 }, { "epoch": 0.19, "grad_norm": 0.4448942958304109, "learning_rate": 9.333221910562022e-07, "loss": 0.1751, "step": 3000 }, { "epoch": 0.19, "grad_norm": 1.263005784438112, "learning_rate": 9.332706557320314e-07, "loss": 0.1779, "step": 3001 }, { "epoch": 0.19, "grad_norm": 0.5466353098286592, "learning_rate": 9.332191019236632e-07, "loss": 0.099, "step": 3002 }, { "epoch": 0.19, "grad_norm": 0.7976615097556736, "learning_rate": 9.331675296332975e-07, "loss": 0.1148, "step": 3003 }, { "epoch": 0.19, "grad_norm": 0.8784737078696657, "learning_rate": 9.331159388631341e-07, "loss": 0.2069, "step": 3004 }, { "epoch": 0.19, "grad_norm": 0.535056162166755, "learning_rate": 9.330643296153742e-07, "loss": 0.1783, "step": 3005 }, { "epoch": 0.19, "grad_norm": 0.239662280674292, "learning_rate": 9.330127018922193e-07, "loss": 0.1772, "step": 3006 }, { "epoch": 0.19, "grad_norm": 1.4329142584792107, "learning_rate": 9.329610556958722e-07, "loss": 0.2907, "step": 3007 }, { "epoch": 0.19, "grad_norm": 0.28893600436723366, "learning_rate": 9.32909391028536e-07, "loss": 0.1061, "step": 3008 }, { "epoch": 0.19, "grad_norm": 0.6270169813375313, "learning_rate": 9.32857707892415e-07, "loss": 0.1671, "step": 3009 }, { "epoch": 0.19, "grad_norm": 0.7369538297638233, "learning_rate": 9.328060062897138e-07, "loss": 0.0758, "step": 3010 }, { "epoch": 0.19, "grad_norm": 0.4241868162913618, "learning_rate": 9.327542862226386e-07, "loss": 0.1742, "step": 3011 }, { "epoch": 0.19, "grad_norm": 0.34388789875246983, "learning_rate": 9.327025476933954e-07, "loss": 0.0318, "step": 3012 }, { "epoch": 0.19, "grad_norm": 0.5254738570119284, "learning_rate": 9.326507907041918e-07, "loss": 0.067, "step": 3013 }, { "epoch": 0.19, "grad_norm": 0.3090281030578117, "learning_rate": 9.325990152572358e-07, "loss": 0.1806, "step": 3014 }, { "epoch": 0.19, "grad_norm": 0.573799180637348, "learning_rate": 9.32547221354736e-07, "loss": 0.1102, "step": 3015 }, { "epoch": 0.19, "grad_norm": 1.5302728596888309, "learning_rate": 9.324954089989023e-07, "loss": 0.2121, "step": 3016 }, { "epoch": 0.19, "grad_norm": 0.33753031745126294, "learning_rate": 9.324435781919449e-07, "loss": 0.1485, "step": 3017 }, { "epoch": 0.19, "grad_norm": 0.998994753179129, "learning_rate": 9.323917289360753e-07, "loss": 0.2142, "step": 3018 }, { "epoch": 0.19, "grad_norm": 0.19605758441521368, "learning_rate": 9.323398612335054e-07, "loss": 0.0065, "step": 3019 }, { "epoch": 0.19, "grad_norm": 0.4283434558037134, "learning_rate": 9.322879750864476e-07, "loss": 0.0639, "step": 3020 }, { "epoch": 0.19, "grad_norm": 0.8714663833200217, "learning_rate": 9.32236070497116e-07, "loss": 0.2954, "step": 3021 }, { "epoch": 0.19, "grad_norm": 0.858049201538456, "learning_rate": 9.321841474677247e-07, "loss": 0.1448, "step": 3022 }, { "epoch": 0.19, "grad_norm": 1.1389143844340983, "learning_rate": 9.321322060004888e-07, "loss": 0.3223, "step": 3023 }, { "epoch": 0.19, "grad_norm": 0.5234024329695265, "learning_rate": 9.320802460976245e-07, "loss": 0.1825, "step": 3024 }, { "epoch": 0.19, "grad_norm": 0.9722075998628446, "learning_rate": 9.32028267761348e-07, "loss": 0.2889, "step": 3025 }, { "epoch": 0.19, "grad_norm": 0.9437832253730604, "learning_rate": 9.319762709938775e-07, "loss": 0.3222, "step": 3026 }, { "epoch": 0.19, "grad_norm": 0.830519395355223, "learning_rate": 9.319242557974305e-07, "loss": 0.3216, "step": 3027 }, { "epoch": 0.19, "grad_norm": 0.46865160802583294, "learning_rate": 9.318722221742267e-07, "loss": 0.1342, "step": 3028 }, { "epoch": 0.19, "grad_norm": 0.7704922069197175, "learning_rate": 9.318201701264857e-07, "loss": 0.2531, "step": 3029 }, { "epoch": 0.19, "grad_norm": 0.9057929614247713, "learning_rate": 9.317680996564281e-07, "loss": 0.1031, "step": 3030 }, { "epoch": 0.19, "grad_norm": 0.6183863786282269, "learning_rate": 9.317160107662754e-07, "loss": 0.0579, "step": 3031 }, { "epoch": 0.19, "grad_norm": 0.6514179057007335, "learning_rate": 9.316639034582498e-07, "loss": 0.1904, "step": 3032 }, { "epoch": 0.19, "grad_norm": 1.2777188197898017, "learning_rate": 9.316117777345746e-07, "loss": 0.036, "step": 3033 }, { "epoch": 0.19, "grad_norm": 0.7732503085369049, "learning_rate": 9.315596335974731e-07, "loss": 0.4658, "step": 3034 }, { "epoch": 0.19, "grad_norm": 0.8356075156372184, "learning_rate": 9.3150747104917e-07, "loss": 0.2359, "step": 3035 }, { "epoch": 0.19, "grad_norm": 0.4573641176035874, "learning_rate": 9.314552900918908e-07, "loss": 0.0818, "step": 3036 }, { "epoch": 0.19, "grad_norm": 1.085807918386808, "learning_rate": 9.314030907278618e-07, "loss": 0.4449, "step": 3037 }, { "epoch": 0.19, "grad_norm": 0.8291466502959902, "learning_rate": 9.313508729593096e-07, "loss": 0.2132, "step": 3038 }, { "epoch": 0.19, "grad_norm": 3.2470427402668394, "learning_rate": 9.312986367884619e-07, "loss": 0.1351, "step": 3039 }, { "epoch": 0.19, "grad_norm": 0.8357623921356458, "learning_rate": 9.312463822175474e-07, "loss": 0.2655, "step": 3040 }, { "epoch": 0.19, "grad_norm": 0.6289347384014446, "learning_rate": 9.311941092487954e-07, "loss": 0.1375, "step": 3041 }, { "epoch": 0.19, "grad_norm": 0.6852238162700425, "learning_rate": 9.31141817884436e-07, "loss": 0.1956, "step": 3042 }, { "epoch": 0.19, "grad_norm": 1.2071504666592814, "learning_rate": 9.310895081266996e-07, "loss": 0.4316, "step": 3043 }, { "epoch": 0.19, "grad_norm": 0.672865138314787, "learning_rate": 9.310371799778184e-07, "loss": 0.1289, "step": 3044 }, { "epoch": 0.19, "grad_norm": 1.15820142374764, "learning_rate": 9.309848334400245e-07, "loss": 0.1639, "step": 3045 }, { "epoch": 0.19, "grad_norm": 0.5501107913660684, "learning_rate": 9.309324685155513e-07, "loss": 0.2278, "step": 3046 }, { "epoch": 0.19, "grad_norm": 0.8376405818747217, "learning_rate": 9.308800852066328e-07, "loss": 0.2784, "step": 3047 }, { "epoch": 0.19, "grad_norm": 0.8444680283046364, "learning_rate": 9.308276835155036e-07, "loss": 0.1992, "step": 3048 }, { "epoch": 0.19, "grad_norm": 0.9159911788834041, "learning_rate": 9.307752634443992e-07, "loss": 0.3426, "step": 3049 }, { "epoch": 0.19, "grad_norm": 1.2841327552971487, "learning_rate": 9.307228249955563e-07, "loss": 0.2586, "step": 3050 }, { "epoch": 0.19, "grad_norm": 0.3766795409358213, "learning_rate": 9.306703681712118e-07, "loss": 0.2657, "step": 3051 }, { "epoch": 0.19, "grad_norm": 0.6124945721716994, "learning_rate": 9.306178929736037e-07, "loss": 0.1604, "step": 3052 }, { "epoch": 0.19, "grad_norm": 1.0815748384024368, "learning_rate": 9.305653994049705e-07, "loss": 0.2442, "step": 3053 }, { "epoch": 0.19, "grad_norm": 1.2882148876130204, "learning_rate": 9.305128874675519e-07, "loss": 0.1155, "step": 3054 }, { "epoch": 0.19, "grad_norm": 0.3607427777409394, "learning_rate": 9.304603571635879e-07, "loss": 0.2395, "step": 3055 }, { "epoch": 0.19, "grad_norm": 0.6693896201291227, "learning_rate": 9.3040780849532e-07, "loss": 0.2456, "step": 3056 }, { "epoch": 0.19, "grad_norm": 0.31469024781178107, "learning_rate": 9.303552414649896e-07, "loss": 0.1758, "step": 3057 }, { "epoch": 0.2, "grad_norm": 0.4505317171746773, "learning_rate": 9.303026560748395e-07, "loss": 0.297, "step": 3058 }, { "epoch": 0.2, "grad_norm": 0.6597112278091493, "learning_rate": 9.302500523271131e-07, "loss": 0.1577, "step": 3059 }, { "epoch": 0.2, "grad_norm": 0.7323757246455531, "learning_rate": 9.301974302240545e-07, "loss": 0.2976, "step": 3060 }, { "epoch": 0.2, "grad_norm": 1.7159980576059044, "learning_rate": 9.301447897679087e-07, "loss": 0.2212, "step": 3061 }, { "epoch": 0.2, "grad_norm": 0.6263325445076474, "learning_rate": 9.300921309609215e-07, "loss": 0.2819, "step": 3062 }, { "epoch": 0.2, "grad_norm": 0.7504126780496241, "learning_rate": 9.300394538053394e-07, "loss": 0.203, "step": 3063 }, { "epoch": 0.2, "grad_norm": 0.8346360508181327, "learning_rate": 9.299867583034098e-07, "loss": 0.2596, "step": 3064 }, { "epoch": 0.2, "grad_norm": 0.296066955293461, "learning_rate": 9.299340444573807e-07, "loss": 0.1294, "step": 3065 }, { "epoch": 0.2, "grad_norm": 0.7019518256264955, "learning_rate": 9.298813122695009e-07, "loss": 0.1725, "step": 3066 }, { "epoch": 0.2, "grad_norm": 0.7518499734295849, "learning_rate": 9.298285617420202e-07, "loss": 0.1102, "step": 3067 }, { "epoch": 0.2, "grad_norm": 1.3364157984802234, "learning_rate": 9.29775792877189e-07, "loss": 0.1319, "step": 3068 }, { "epoch": 0.2, "grad_norm": 0.33961466118762706, "learning_rate": 9.297230056772585e-07, "loss": 0.191, "step": 3069 }, { "epoch": 0.2, "grad_norm": 0.5594391802486758, "learning_rate": 9.296702001444807e-07, "loss": 0.1917, "step": 3070 }, { "epoch": 0.2, "grad_norm": 1.2721794436919729, "learning_rate": 9.296173762811084e-07, "loss": 0.3695, "step": 3071 }, { "epoch": 0.2, "grad_norm": 0.9533258107642867, "learning_rate": 9.295645340893952e-07, "loss": 0.2842, "step": 3072 }, { "epoch": 0.2, "grad_norm": 1.073891511275848, "learning_rate": 9.295116735715955e-07, "loss": 0.518, "step": 3073 }, { "epoch": 0.2, "grad_norm": 0.9572885740850663, "learning_rate": 9.294587947299644e-07, "loss": 0.0734, "step": 3074 }, { "epoch": 0.2, "grad_norm": 1.2986012575020074, "learning_rate": 9.294058975667575e-07, "loss": 0.3546, "step": 3075 }, { "epoch": 0.2, "grad_norm": 0.4913653469206503, "learning_rate": 9.293529820842322e-07, "loss": 0.1909, "step": 3076 }, { "epoch": 0.2, "grad_norm": 0.7482298590180433, "learning_rate": 9.293000482846453e-07, "loss": 0.141, "step": 3077 }, { "epoch": 0.2, "grad_norm": 0.4274984032690888, "learning_rate": 9.292470961702555e-07, "loss": 0.273, "step": 3078 }, { "epoch": 0.2, "grad_norm": 1.0737864233379126, "learning_rate": 9.291941257433217e-07, "loss": 0.1208, "step": 3079 }, { "epoch": 0.2, "grad_norm": 0.6277167426847763, "learning_rate": 9.291411370061036e-07, "loss": 0.314, "step": 3080 }, { "epoch": 0.2, "grad_norm": 0.490536857820418, "learning_rate": 9.29088129960862e-07, "loss": 0.1106, "step": 3081 }, { "epoch": 0.2, "grad_norm": 0.9631473008227158, "learning_rate": 9.290351046098581e-07, "loss": 0.1021, "step": 3082 }, { "epoch": 0.2, "grad_norm": 1.040663390524604, "learning_rate": 9.289820609553542e-07, "loss": 0.0639, "step": 3083 }, { "epoch": 0.2, "grad_norm": 0.5977638524386644, "learning_rate": 9.289289989996132e-07, "loss": 0.2086, "step": 3084 }, { "epoch": 0.2, "grad_norm": 0.7715634833014068, "learning_rate": 9.288759187448989e-07, "loss": 0.1186, "step": 3085 }, { "epoch": 0.2, "grad_norm": 2.0831515958975575, "learning_rate": 9.288228201934758e-07, "loss": 0.1367, "step": 3086 }, { "epoch": 0.2, "grad_norm": 0.59423851730006, "learning_rate": 9.287697033476091e-07, "loss": 0.4256, "step": 3087 }, { "epoch": 0.2, "grad_norm": 2.4208760634114546, "learning_rate": 9.287165682095649e-07, "loss": 0.3848, "step": 3088 }, { "epoch": 0.2, "grad_norm": 0.23706709793319297, "learning_rate": 9.286634147816102e-07, "loss": 0.2232, "step": 3089 }, { "epoch": 0.2, "grad_norm": 1.3404176398453622, "learning_rate": 9.286102430660123e-07, "loss": 0.2283, "step": 3090 }, { "epoch": 0.2, "grad_norm": 1.1211225236398437, "learning_rate": 9.285570530650399e-07, "loss": 0.2886, "step": 3091 }, { "epoch": 0.2, "grad_norm": 0.8640493538302704, "learning_rate": 9.285038447809621e-07, "loss": 0.1125, "step": 3092 }, { "epoch": 0.2, "grad_norm": 0.7734970842558655, "learning_rate": 9.284506182160489e-07, "loss": 0.2469, "step": 3093 }, { "epoch": 0.2, "grad_norm": 0.6817783569138928, "learning_rate": 9.283973733725709e-07, "loss": 0.0149, "step": 3094 }, { "epoch": 0.2, "grad_norm": 0.746164700972667, "learning_rate": 9.283441102528e-07, "loss": 0.4098, "step": 3095 }, { "epoch": 0.2, "grad_norm": 0.6777464618768618, "learning_rate": 9.282908288590082e-07, "loss": 0.1893, "step": 3096 }, { "epoch": 0.2, "grad_norm": 0.631082316411457, "learning_rate": 9.282375291934685e-07, "loss": 0.1531, "step": 3097 }, { "epoch": 0.2, "grad_norm": 0.4404346136544487, "learning_rate": 9.281842112584552e-07, "loss": 0.0854, "step": 3098 }, { "epoch": 0.2, "grad_norm": 0.6649865624253605, "learning_rate": 9.281308750562425e-07, "loss": 0.1809, "step": 3099 }, { "epoch": 0.2, "grad_norm": 0.553830471140561, "learning_rate": 9.280775205891062e-07, "loss": 0.2709, "step": 3100 }, { "epoch": 0.2, "grad_norm": 0.787758787758307, "learning_rate": 9.280241478593221e-07, "loss": 0.31, "step": 3101 }, { "epoch": 0.2, "grad_norm": 0.36627677398165226, "learning_rate": 9.279707568691676e-07, "loss": 0.2286, "step": 3102 }, { "epoch": 0.2, "grad_norm": 0.5283829688432854, "learning_rate": 9.279173476209202e-07, "loss": 0.3185, "step": 3103 }, { "epoch": 0.2, "grad_norm": 1.6095925885429754, "learning_rate": 9.278639201168585e-07, "loss": 0.0636, "step": 3104 }, { "epoch": 0.2, "grad_norm": 1.4672601309115711, "learning_rate": 9.27810474359262e-07, "loss": 0.2262, "step": 3105 }, { "epoch": 0.2, "grad_norm": 0.8983970054704082, "learning_rate": 9.277570103504104e-07, "loss": 0.1639, "step": 3106 }, { "epoch": 0.2, "grad_norm": 0.6342052380139972, "learning_rate": 9.277035280925852e-07, "loss": 0.1156, "step": 3107 }, { "epoch": 0.2, "grad_norm": 1.0159746958773266, "learning_rate": 9.276500275880675e-07, "loss": 0.2254, "step": 3108 }, { "epoch": 0.2, "grad_norm": 1.431654174276687, "learning_rate": 9.275965088391397e-07, "loss": 0.2284, "step": 3109 }, { "epoch": 0.2, "grad_norm": 1.1935745659077637, "learning_rate": 9.275429718480856e-07, "loss": 0.1168, "step": 3110 }, { "epoch": 0.2, "grad_norm": 0.4555984205316003, "learning_rate": 9.274894166171887e-07, "loss": 0.06, "step": 3111 }, { "epoch": 0.2, "grad_norm": 1.1012201002196862, "learning_rate": 9.274358431487339e-07, "loss": 0.2859, "step": 3112 }, { "epoch": 0.2, "grad_norm": 0.9332877898438722, "learning_rate": 9.27382251445007e-07, "loss": 0.2569, "step": 3113 }, { "epoch": 0.2, "grad_norm": 0.7874397460911885, "learning_rate": 9.273286415082939e-07, "loss": 0.3878, "step": 3114 }, { "epoch": 0.2, "grad_norm": 0.7959578824051317, "learning_rate": 9.272750133408819e-07, "loss": 0.1595, "step": 3115 }, { "epoch": 0.2, "grad_norm": 0.46285371181947127, "learning_rate": 9.27221366945059e-07, "loss": 0.1919, "step": 3116 }, { "epoch": 0.2, "grad_norm": 2.1482722745697806, "learning_rate": 9.271677023231137e-07, "loss": 0.2325, "step": 3117 }, { "epoch": 0.2, "grad_norm": 1.7778395195759555, "learning_rate": 9.271140194773355e-07, "loss": 0.3524, "step": 3118 }, { "epoch": 0.2, "grad_norm": 0.5775770200927671, "learning_rate": 9.270603184100148e-07, "loss": 0.2144, "step": 3119 }, { "epoch": 0.2, "grad_norm": 0.857440018690515, "learning_rate": 9.270065991234421e-07, "loss": 0.2713, "step": 3120 }, { "epoch": 0.2, "grad_norm": 0.5705712224451414, "learning_rate": 9.269528616199097e-07, "loss": 0.2801, "step": 3121 }, { "epoch": 0.2, "grad_norm": 0.4706964326756469, "learning_rate": 9.2689910590171e-07, "loss": 0.0459, "step": 3122 }, { "epoch": 0.2, "grad_norm": 0.42521057349481917, "learning_rate": 9.268453319711362e-07, "loss": 0.1089, "step": 3123 }, { "epoch": 0.2, "grad_norm": 0.40657016370441595, "learning_rate": 9.267915398304823e-07, "loss": 0.1521, "step": 3124 }, { "epoch": 0.2, "grad_norm": 0.46261893384349917, "learning_rate": 9.267377294820435e-07, "loss": 0.1424, "step": 3125 }, { "epoch": 0.2, "grad_norm": 0.6833032160213107, "learning_rate": 9.266839009281153e-07, "loss": 0.1854, "step": 3126 }, { "epoch": 0.2, "grad_norm": 1.7736416060711386, "learning_rate": 9.266300541709942e-07, "loss": 0.3294, "step": 3127 }, { "epoch": 0.2, "grad_norm": 1.2219178648628992, "learning_rate": 9.265761892129773e-07, "loss": 0.1536, "step": 3128 }, { "epoch": 0.2, "grad_norm": 0.6033314381868828, "learning_rate": 9.265223060563626e-07, "loss": 0.1082, "step": 3129 }, { "epoch": 0.2, "grad_norm": 0.262137011317028, "learning_rate": 9.26468404703449e-07, "loss": 0.1726, "step": 3130 }, { "epoch": 0.2, "grad_norm": 0.7195111645442679, "learning_rate": 9.264144851565359e-07, "loss": 0.1617, "step": 3131 }, { "epoch": 0.2, "grad_norm": 0.3230029545712064, "learning_rate": 9.263605474179237e-07, "loss": 0.0923, "step": 3132 }, { "epoch": 0.2, "grad_norm": 2.956611115674006, "learning_rate": 9.263065914899133e-07, "loss": 0.3059, "step": 3133 }, { "epoch": 0.2, "grad_norm": 0.359438984188222, "learning_rate": 9.262526173748069e-07, "loss": 0.0781, "step": 3134 }, { "epoch": 0.2, "grad_norm": 0.36141643563992937, "learning_rate": 9.261986250749067e-07, "loss": 0.1543, "step": 3135 }, { "epoch": 0.2, "grad_norm": 0.9103019185795369, "learning_rate": 9.261446145925167e-07, "loss": 0.2738, "step": 3136 }, { "epoch": 0.2, "grad_norm": 1.3150390424059855, "learning_rate": 9.260905859299407e-07, "loss": 0.3519, "step": 3137 }, { "epoch": 0.2, "grad_norm": 0.4051852584242612, "learning_rate": 9.260365390894837e-07, "loss": 0.0811, "step": 3138 }, { "epoch": 0.2, "grad_norm": 0.8460199513689234, "learning_rate": 9.259824740734516e-07, "loss": 0.122, "step": 3139 }, { "epoch": 0.2, "grad_norm": 0.9071104525547069, "learning_rate": 9.259283908841506e-07, "loss": 0.3743, "step": 3140 }, { "epoch": 0.2, "grad_norm": 0.9524789406700434, "learning_rate": 9.258742895238885e-07, "loss": 0.1818, "step": 3141 }, { "epoch": 0.2, "grad_norm": 0.6664822300195525, "learning_rate": 9.25820169994973e-07, "loss": 0.1822, "step": 3142 }, { "epoch": 0.2, "grad_norm": 0.6172938015588753, "learning_rate": 9.25766032299713e-07, "loss": 0.2441, "step": 3143 }, { "epoch": 0.2, "grad_norm": 1.849651525952206, "learning_rate": 9.257118764404181e-07, "loss": 0.107, "step": 3144 }, { "epoch": 0.2, "grad_norm": 0.7729040587341335, "learning_rate": 9.256577024193989e-07, "loss": 0.3767, "step": 3145 }, { "epoch": 0.2, "grad_norm": 0.6564250188192128, "learning_rate": 9.256035102389663e-07, "loss": 0.0797, "step": 3146 }, { "epoch": 0.2, "grad_norm": 3.4430327980532014, "learning_rate": 9.255492999014324e-07, "loss": 0.2038, "step": 3147 }, { "epoch": 0.2, "grad_norm": 0.6259749299365427, "learning_rate": 9.254950714091101e-07, "loss": 0.0472, "step": 3148 }, { "epoch": 0.2, "grad_norm": 1.575732959471976, "learning_rate": 9.254408247643125e-07, "loss": 0.123, "step": 3149 }, { "epoch": 0.2, "grad_norm": 0.7782952435832244, "learning_rate": 9.253865599693543e-07, "loss": 0.1398, "step": 3150 }, { "epoch": 0.2, "grad_norm": 0.5640252414464564, "learning_rate": 9.253322770265501e-07, "loss": 0.1967, "step": 3151 }, { "epoch": 0.2, "grad_norm": 0.6524550108989349, "learning_rate": 9.252779759382158e-07, "loss": 0.268, "step": 3152 }, { "epoch": 0.2, "grad_norm": 0.640403865390597, "learning_rate": 9.252236567066685e-07, "loss": 0.2419, "step": 3153 }, { "epoch": 0.2, "grad_norm": 1.1765964313625707, "learning_rate": 9.251693193342249e-07, "loss": 0.3511, "step": 3154 }, { "epoch": 0.2, "grad_norm": 1.7874003331630535, "learning_rate": 9.251149638232037e-07, "loss": 0.1543, "step": 3155 }, { "epoch": 0.2, "grad_norm": 1.1031380268003386, "learning_rate": 9.250605901759233e-07, "loss": 0.2093, "step": 3156 }, { "epoch": 0.2, "grad_norm": 0.7965048234082887, "learning_rate": 9.250061983947038e-07, "loss": 0.3321, "step": 3157 }, { "epoch": 0.2, "grad_norm": 0.6196023978433362, "learning_rate": 9.249517884818654e-07, "loss": 0.4197, "step": 3158 }, { "epoch": 0.2, "grad_norm": 0.5654349449389893, "learning_rate": 9.248973604397295e-07, "loss": 0.1719, "step": 3159 }, { "epoch": 0.2, "grad_norm": 0.698012012240679, "learning_rate": 9.248429142706181e-07, "loss": 0.2616, "step": 3160 }, { "epoch": 0.2, "grad_norm": 0.2680825715026603, "learning_rate": 9.247884499768539e-07, "loss": 0.1239, "step": 3161 }, { "epoch": 0.2, "grad_norm": 0.4076732361684476, "learning_rate": 9.247339675607605e-07, "loss": 0.1892, "step": 3162 }, { "epoch": 0.2, "grad_norm": 0.3428337471960096, "learning_rate": 9.246794670246623e-07, "loss": 0.0122, "step": 3163 }, { "epoch": 0.2, "grad_norm": 0.6622795978962026, "learning_rate": 9.246249483708842e-07, "loss": 0.4124, "step": 3164 }, { "epoch": 0.2, "grad_norm": 0.43485167127355784, "learning_rate": 9.24570411601752e-07, "loss": 0.1117, "step": 3165 }, { "epoch": 0.2, "grad_norm": 0.5284284378374157, "learning_rate": 9.24515856719593e-07, "loss": 0.1605, "step": 3166 }, { "epoch": 0.2, "grad_norm": 0.451271690731251, "learning_rate": 9.244612837267338e-07, "loss": 0.1461, "step": 3167 }, { "epoch": 0.2, "grad_norm": 0.41283869920204125, "learning_rate": 9.244066926255031e-07, "loss": 0.0468, "step": 3168 }, { "epoch": 0.2, "grad_norm": 0.859544709261444, "learning_rate": 9.243520834182297e-07, "loss": 0.0915, "step": 3169 }, { "epoch": 0.2, "grad_norm": 0.5412777735384318, "learning_rate": 9.242974561072436e-07, "loss": 0.1003, "step": 3170 }, { "epoch": 0.2, "grad_norm": 0.8852762174299448, "learning_rate": 9.242428106948748e-07, "loss": 0.035, "step": 3171 }, { "epoch": 0.2, "grad_norm": 0.46427628358900325, "learning_rate": 9.241881471834549e-07, "loss": 0.2438, "step": 3172 }, { "epoch": 0.2, "grad_norm": 0.3303918204521397, "learning_rate": 9.24133465575316e-07, "loss": 0.1165, "step": 3173 }, { "epoch": 0.2, "grad_norm": 0.3878750718711695, "learning_rate": 9.240787658727909e-07, "loss": 0.3216, "step": 3174 }, { "epoch": 0.2, "grad_norm": 0.5824463727444047, "learning_rate": 9.240240480782129e-07, "loss": 0.1372, "step": 3175 }, { "epoch": 0.2, "grad_norm": 0.4037450558744446, "learning_rate": 9.239693121939168e-07, "loss": 0.1651, "step": 3176 }, { "epoch": 0.2, "grad_norm": 0.8191030154457623, "learning_rate": 9.239145582222376e-07, "loss": 0.2761, "step": 3177 }, { "epoch": 0.2, "grad_norm": 0.4330613171027677, "learning_rate": 9.238597861655111e-07, "loss": 0.1141, "step": 3178 }, { "epoch": 0.2, "grad_norm": 1.9178433176483896, "learning_rate": 9.23804996026074e-07, "loss": 0.2528, "step": 3179 }, { "epoch": 0.2, "grad_norm": 0.8536592757608799, "learning_rate": 9.237501878062638e-07, "loss": 0.4177, "step": 3180 }, { "epoch": 0.2, "grad_norm": 0.5320097823585402, "learning_rate": 9.236953615084189e-07, "loss": 0.3115, "step": 3181 }, { "epoch": 0.2, "grad_norm": 0.5953625757265195, "learning_rate": 9.23640517134878e-07, "loss": 0.1737, "step": 3182 }, { "epoch": 0.2, "grad_norm": 0.8007362504841291, "learning_rate": 9.23585654687981e-07, "loss": 0.0391, "step": 3183 }, { "epoch": 0.2, "grad_norm": 0.8458625569678068, "learning_rate": 9.235307741700685e-07, "loss": 0.0691, "step": 3184 }, { "epoch": 0.2, "grad_norm": 0.7845374611852582, "learning_rate": 9.234758755834818e-07, "loss": 0.4652, "step": 3185 }, { "epoch": 0.2, "grad_norm": 0.9736760960263428, "learning_rate": 9.234209589305629e-07, "loss": 0.1573, "step": 3186 }, { "epoch": 0.2, "grad_norm": 1.2100880068248985, "learning_rate": 9.233660242136548e-07, "loss": 0.2671, "step": 3187 }, { "epoch": 0.2, "grad_norm": 1.8863558069875754, "learning_rate": 9.233110714351009e-07, "loss": 0.285, "step": 3188 }, { "epoch": 0.2, "grad_norm": 0.387485326428505, "learning_rate": 9.232561005972459e-07, "loss": 0.1589, "step": 3189 }, { "epoch": 0.2, "grad_norm": 0.7961383788030786, "learning_rate": 9.232011117024348e-07, "loss": 0.2707, "step": 3190 }, { "epoch": 0.2, "grad_norm": 1.9879379142928602, "learning_rate": 9.231461047530133e-07, "loss": 0.2961, "step": 3191 }, { "epoch": 0.2, "grad_norm": 0.7928095490159558, "learning_rate": 9.230910797513285e-07, "loss": 0.143, "step": 3192 }, { "epoch": 0.2, "grad_norm": 0.7905290172441409, "learning_rate": 9.230360366997277e-07, "loss": 0.3047, "step": 3193 }, { "epoch": 0.2, "grad_norm": 1.171167431695269, "learning_rate": 9.229809756005592e-07, "loss": 0.0411, "step": 3194 }, { "epoch": 0.2, "grad_norm": 0.7363608158765037, "learning_rate": 9.229258964561719e-07, "loss": 0.2254, "step": 3195 }, { "epoch": 0.2, "grad_norm": 1.054144945273629, "learning_rate": 9.228707992689157e-07, "loss": 0.2566, "step": 3196 }, { "epoch": 0.2, "grad_norm": 0.1728080823178616, "learning_rate": 9.228156840411411e-07, "loss": 0.0789, "step": 3197 }, { "epoch": 0.2, "grad_norm": 0.4382911752143824, "learning_rate": 9.227605507751997e-07, "loss": 0.0144, "step": 3198 }, { "epoch": 0.2, "grad_norm": 1.4021783476774723, "learning_rate": 9.227053994734431e-07, "loss": 0.4258, "step": 3199 }, { "epoch": 0.2, "grad_norm": 0.469696850201351, "learning_rate": 9.226502301382244e-07, "loss": 0.0046, "step": 3200 }, { "epoch": 0.2, "grad_norm": 0.7784926479436306, "learning_rate": 9.225950427718974e-07, "loss": 0.187, "step": 3201 }, { "epoch": 0.2, "grad_norm": 0.8830903774871972, "learning_rate": 9.225398373768163e-07, "loss": 0.2908, "step": 3202 }, { "epoch": 0.2, "grad_norm": 0.7850095466288786, "learning_rate": 9.224846139553362e-07, "loss": 0.2986, "step": 3203 }, { "epoch": 0.2, "grad_norm": 0.7171573984108145, "learning_rate": 9.224293725098132e-07, "loss": 0.136, "step": 3204 }, { "epoch": 0.2, "grad_norm": 0.7147338692888996, "learning_rate": 9.223741130426041e-07, "loss": 0.2694, "step": 3205 }, { "epoch": 0.2, "grad_norm": 0.5449525920143408, "learning_rate": 9.223188355560663e-07, "loss": 0.1357, "step": 3206 }, { "epoch": 0.2, "grad_norm": 0.8683597603208748, "learning_rate": 9.222635400525578e-07, "loss": 0.0655, "step": 3207 }, { "epoch": 0.2, "grad_norm": 0.7005967370533396, "learning_rate": 9.22208226534438e-07, "loss": 0.1175, "step": 3208 }, { "epoch": 0.2, "grad_norm": 0.43738091256188927, "learning_rate": 9.221528950040663e-07, "loss": 0.1073, "step": 3209 }, { "epoch": 0.2, "grad_norm": 0.8581921996171491, "learning_rate": 9.220975454638036e-07, "loss": 0.081, "step": 3210 }, { "epoch": 0.2, "grad_norm": 1.1351383388479153, "learning_rate": 9.220421779160111e-07, "loss": 0.0515, "step": 3211 }, { "epoch": 0.2, "grad_norm": 0.650205733208947, "learning_rate": 9.219867923630509e-07, "loss": 0.1263, "step": 3212 }, { "epoch": 0.2, "grad_norm": 0.5094960839844335, "learning_rate": 9.219313888072859e-07, "loss": 0.1935, "step": 3213 }, { "epoch": 0.2, "grad_norm": 2.3775566066303138, "learning_rate": 9.218759672510795e-07, "loss": 0.29, "step": 3214 }, { "epoch": 0.21, "grad_norm": 0.5839181438936653, "learning_rate": 9.218205276967963e-07, "loss": 0.121, "step": 3215 }, { "epoch": 0.21, "grad_norm": 0.619965388979965, "learning_rate": 9.217650701468015e-07, "loss": 0.2328, "step": 3216 }, { "epoch": 0.21, "grad_norm": 0.4594208800221896, "learning_rate": 9.217095946034609e-07, "loss": 0.1759, "step": 3217 }, { "epoch": 0.21, "grad_norm": 0.894595064160595, "learning_rate": 9.216541010691412e-07, "loss": 0.0834, "step": 3218 }, { "epoch": 0.21, "grad_norm": 0.6950540322671729, "learning_rate": 9.215985895462101e-07, "loss": 0.0271, "step": 3219 }, { "epoch": 0.21, "grad_norm": 0.7352186234647778, "learning_rate": 9.215430600370356e-07, "loss": 0.1694, "step": 3220 }, { "epoch": 0.21, "grad_norm": 0.44343233299035845, "learning_rate": 9.214875125439865e-07, "loss": 0.1934, "step": 3221 }, { "epoch": 0.21, "grad_norm": 1.4432183421572362, "learning_rate": 9.21431947069433e-07, "loss": 0.1698, "step": 3222 }, { "epoch": 0.21, "grad_norm": 0.48105287720482787, "learning_rate": 9.213763636157454e-07, "loss": 0.1607, "step": 3223 }, { "epoch": 0.21, "grad_norm": 0.31192982752598486, "learning_rate": 9.213207621852952e-07, "loss": 0.0758, "step": 3224 }, { "epoch": 0.21, "grad_norm": 0.6726849577678, "learning_rate": 9.212651427804543e-07, "loss": 0.1775, "step": 3225 }, { "epoch": 0.21, "grad_norm": 0.6263180529191299, "learning_rate": 9.212095054035955e-07, "loss": 0.0706, "step": 3226 }, { "epoch": 0.21, "grad_norm": 0.4635604648015689, "learning_rate": 9.211538500570923e-07, "loss": 0.1287, "step": 3227 }, { "epoch": 0.21, "grad_norm": 0.8474045210307591, "learning_rate": 9.210981767433195e-07, "loss": 0.3819, "step": 3228 }, { "epoch": 0.21, "grad_norm": 0.6874972538717036, "learning_rate": 9.210424854646519e-07, "loss": 0.2192, "step": 3229 }, { "epoch": 0.21, "grad_norm": 0.7691094876067304, "learning_rate": 9.209867762234653e-07, "loss": 0.022, "step": 3230 }, { "epoch": 0.21, "grad_norm": 0.696244205564151, "learning_rate": 9.209310490221367e-07, "loss": 0.3037, "step": 3231 }, { "epoch": 0.21, "grad_norm": 0.8153250419759999, "learning_rate": 9.208753038630434e-07, "loss": 0.1564, "step": 3232 }, { "epoch": 0.21, "grad_norm": 0.9541435679121205, "learning_rate": 9.208195407485634e-07, "loss": 0.0973, "step": 3233 }, { "epoch": 0.21, "grad_norm": 0.8509587442576104, "learning_rate": 9.20763759681076e-07, "loss": 0.1268, "step": 3234 }, { "epoch": 0.21, "grad_norm": 0.2920801299379249, "learning_rate": 9.207079606629606e-07, "loss": 0.1542, "step": 3235 }, { "epoch": 0.21, "grad_norm": 1.1822533011617806, "learning_rate": 9.206521436965981e-07, "loss": 0.1604, "step": 3236 }, { "epoch": 0.21, "grad_norm": 1.0059318658786853, "learning_rate": 9.205963087843693e-07, "loss": 0.2971, "step": 3237 }, { "epoch": 0.21, "grad_norm": 0.6283855641188356, "learning_rate": 9.205404559286567e-07, "loss": 0.2701, "step": 3238 }, { "epoch": 0.21, "grad_norm": 0.6352247159513633, "learning_rate": 9.204845851318427e-07, "loss": 0.2549, "step": 3239 }, { "epoch": 0.21, "grad_norm": 1.0524122518502468, "learning_rate": 9.204286963963111e-07, "loss": 0.3879, "step": 3240 }, { "epoch": 0.21, "grad_norm": 0.9223261899788481, "learning_rate": 9.20372789724446e-07, "loss": 0.1511, "step": 3241 }, { "epoch": 0.21, "grad_norm": 1.0375883721197088, "learning_rate": 9.203168651186329e-07, "loss": 0.1093, "step": 3242 }, { "epoch": 0.21, "grad_norm": 0.6249258115181283, "learning_rate": 9.202609225812571e-07, "loss": 0.1495, "step": 3243 }, { "epoch": 0.21, "grad_norm": 0.3723623613833438, "learning_rate": 9.202049621147055e-07, "loss": 0.2153, "step": 3244 }, { "epoch": 0.21, "grad_norm": 1.116965781580051, "learning_rate": 9.201489837213658e-07, "loss": 0.1362, "step": 3245 }, { "epoch": 0.21, "grad_norm": 1.5384575646385588, "learning_rate": 9.200929874036257e-07, "loss": 0.2366, "step": 3246 }, { "epoch": 0.21, "grad_norm": 1.0716566854155367, "learning_rate": 9.200369731638741e-07, "loss": 0.0693, "step": 3247 }, { "epoch": 0.21, "grad_norm": 0.6249185693333995, "learning_rate": 9.19980941004501e-07, "loss": 0.1311, "step": 3248 }, { "epoch": 0.21, "grad_norm": 0.7468108907428338, "learning_rate": 9.199248909278967e-07, "loss": 0.1688, "step": 3249 }, { "epoch": 0.21, "grad_norm": 1.3635267021327973, "learning_rate": 9.198688229364524e-07, "loss": 0.0944, "step": 3250 }, { "epoch": 0.21, "grad_norm": 0.49514635687869285, "learning_rate": 9.198127370325601e-07, "loss": 0.1282, "step": 3251 }, { "epoch": 0.21, "grad_norm": 0.5079375544928111, "learning_rate": 9.197566332186124e-07, "loss": 0.106, "step": 3252 }, { "epoch": 0.21, "grad_norm": 1.4813235463590242, "learning_rate": 9.197005114970029e-07, "loss": 0.0962, "step": 3253 }, { "epoch": 0.21, "grad_norm": 0.4454324149510626, "learning_rate": 9.19644371870126e-07, "loss": 0.2286, "step": 3254 }, { "epoch": 0.21, "grad_norm": 0.4247545785031421, "learning_rate": 9.195882143403766e-07, "loss": 0.0118, "step": 3255 }, { "epoch": 0.21, "grad_norm": 0.6139334715147486, "learning_rate": 9.195320389101504e-07, "loss": 0.3141, "step": 3256 }, { "epoch": 0.21, "grad_norm": 0.8679100346942271, "learning_rate": 9.194758455818441e-07, "loss": 0.1987, "step": 3257 }, { "epoch": 0.21, "grad_norm": 0.7583339385851732, "learning_rate": 9.194196343578549e-07, "loss": 0.4295, "step": 3258 }, { "epoch": 0.21, "grad_norm": 0.5185716101109296, "learning_rate": 9.193634052405811e-07, "loss": 0.2579, "step": 3259 }, { "epoch": 0.21, "grad_norm": 2.2887558954245897, "learning_rate": 9.193071582324213e-07, "loss": 0.0536, "step": 3260 }, { "epoch": 0.21, "grad_norm": 0.7758585411009997, "learning_rate": 9.192508933357752e-07, "loss": 0.171, "step": 3261 }, { "epoch": 0.21, "grad_norm": 0.2598361492938699, "learning_rate": 9.191946105530432e-07, "loss": 0.0823, "step": 3262 }, { "epoch": 0.21, "grad_norm": 0.5276822194378585, "learning_rate": 9.191383098866265e-07, "loss": 0.1623, "step": 3263 }, { "epoch": 0.21, "grad_norm": 0.6953044661041045, "learning_rate": 9.190819913389269e-07, "loss": 0.2203, "step": 3264 }, { "epoch": 0.21, "grad_norm": 0.27018655248726875, "learning_rate": 9.190256549123471e-07, "loss": 0.2548, "step": 3265 }, { "epoch": 0.21, "grad_norm": 0.6395899427747753, "learning_rate": 9.189693006092905e-07, "loss": 0.0718, "step": 3266 }, { "epoch": 0.21, "grad_norm": 0.22490555307165516, "learning_rate": 9.189129284321614e-07, "loss": 0.0889, "step": 3267 }, { "epoch": 0.21, "grad_norm": 0.9837568311853337, "learning_rate": 9.188565383833647e-07, "loss": 0.1677, "step": 3268 }, { "epoch": 0.21, "grad_norm": 0.6026477433004154, "learning_rate": 9.188001304653058e-07, "loss": 0.1218, "step": 3269 }, { "epoch": 0.21, "grad_norm": 0.8915853470145135, "learning_rate": 9.187437046803915e-07, "loss": 0.1409, "step": 3270 }, { "epoch": 0.21, "grad_norm": 0.9289081429824383, "learning_rate": 9.18687261031029e-07, "loss": 0.2263, "step": 3271 }, { "epoch": 0.21, "grad_norm": 0.7102800021580643, "learning_rate": 9.186307995196263e-07, "loss": 0.0423, "step": 3272 }, { "epoch": 0.21, "grad_norm": 0.40979456304112044, "learning_rate": 9.185743201485922e-07, "loss": 0.2591, "step": 3273 }, { "epoch": 0.21, "grad_norm": 0.8633246845478875, "learning_rate": 9.185178229203361e-07, "loss": 0.1852, "step": 3274 }, { "epoch": 0.21, "grad_norm": 0.36422496372365826, "learning_rate": 9.184613078372685e-07, "loss": 0.2195, "step": 3275 }, { "epoch": 0.21, "grad_norm": 0.6511353010145003, "learning_rate": 9.184047749018e-07, "loss": 0.1936, "step": 3276 }, { "epoch": 0.21, "grad_norm": 0.6264697924714663, "learning_rate": 9.183482241163429e-07, "loss": 0.1882, "step": 3277 }, { "epoch": 0.21, "grad_norm": 0.6652060262657263, "learning_rate": 9.182916554833096e-07, "loss": 0.2467, "step": 3278 }, { "epoch": 0.21, "grad_norm": 2.013861147834192, "learning_rate": 9.182350690051132e-07, "loss": 0.2543, "step": 3279 }, { "epoch": 0.21, "grad_norm": 0.5113679897931713, "learning_rate": 9.181784646841683e-07, "loss": 0.317, "step": 3280 }, { "epoch": 0.21, "grad_norm": 0.5882751125353862, "learning_rate": 9.181218425228893e-07, "loss": 0.1704, "step": 3281 }, { "epoch": 0.21, "grad_norm": 0.9396655086336284, "learning_rate": 9.180652025236919e-07, "loss": 0.1283, "step": 3282 }, { "epoch": 0.21, "grad_norm": 0.40541933418651066, "learning_rate": 9.180085446889927e-07, "loss": 0.1214, "step": 3283 }, { "epoch": 0.21, "grad_norm": 1.1760734602911802, "learning_rate": 9.179518690212085e-07, "loss": 0.1954, "step": 3284 }, { "epoch": 0.21, "grad_norm": 0.46856952626879744, "learning_rate": 9.178951755227575e-07, "loss": 0.0292, "step": 3285 }, { "epoch": 0.21, "grad_norm": 0.7802914955954653, "learning_rate": 9.178384641960582e-07, "loss": 0.4202, "step": 3286 }, { "epoch": 0.21, "grad_norm": 0.7315321948269884, "learning_rate": 9.1778173504353e-07, "loss": 0.1565, "step": 3287 }, { "epoch": 0.21, "grad_norm": 1.222393850481576, "learning_rate": 9.177249880675933e-07, "loss": 0.2781, "step": 3288 }, { "epoch": 0.21, "grad_norm": 0.666436708168778, "learning_rate": 9.176682232706687e-07, "loss": 0.1222, "step": 3289 }, { "epoch": 0.21, "grad_norm": 1.9280795230892156, "learning_rate": 9.176114406551783e-07, "loss": 0.1178, "step": 3290 }, { "epoch": 0.21, "grad_norm": 0.7398973842010642, "learning_rate": 9.175546402235441e-07, "loss": 0.326, "step": 3291 }, { "epoch": 0.21, "grad_norm": 0.4088669314826049, "learning_rate": 9.174978219781897e-07, "loss": 0.2255, "step": 3292 }, { "epoch": 0.21, "grad_norm": 0.175157809384668, "learning_rate": 9.174409859215388e-07, "loss": 0.0047, "step": 3293 }, { "epoch": 0.21, "grad_norm": 0.9253171979955276, "learning_rate": 9.173841320560164e-07, "loss": 0.0849, "step": 3294 }, { "epoch": 0.21, "grad_norm": 0.7178912350948362, "learning_rate": 9.173272603840478e-07, "loss": 0.115, "step": 3295 }, { "epoch": 0.21, "grad_norm": 0.8153411201239142, "learning_rate": 9.172703709080593e-07, "loss": 0.1246, "step": 3296 }, { "epoch": 0.21, "grad_norm": 0.5726353685004575, "learning_rate": 9.172134636304782e-07, "loss": 0.37, "step": 3297 }, { "epoch": 0.21, "grad_norm": 0.460500779942402, "learning_rate": 9.171565385537318e-07, "loss": 0.1719, "step": 3298 }, { "epoch": 0.21, "grad_norm": 1.2420851980112937, "learning_rate": 9.17099595680249e-07, "loss": 0.182, "step": 3299 }, { "epoch": 0.21, "grad_norm": 0.4513015773792623, "learning_rate": 9.170426350124589e-07, "loss": 0.2616, "step": 3300 }, { "epoch": 0.21, "grad_norm": 0.5483890191093747, "learning_rate": 9.169856565527916e-07, "loss": 0.0853, "step": 3301 }, { "epoch": 0.21, "grad_norm": 1.3191579094376202, "learning_rate": 9.169286603036781e-07, "loss": 0.2658, "step": 3302 }, { "epoch": 0.21, "grad_norm": 0.49489582655847625, "learning_rate": 9.168716462675497e-07, "loss": 0.2081, "step": 3303 }, { "epoch": 0.21, "grad_norm": 0.40434010307233975, "learning_rate": 9.168146144468388e-07, "loss": 0.1118, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.965669223985352, "learning_rate": 9.167575648439787e-07, "loss": 0.0482, "step": 3305 }, { "epoch": 0.21, "grad_norm": 0.8037598988165185, "learning_rate": 9.16700497461403e-07, "loss": 0.1939, "step": 3306 }, { "epoch": 0.21, "grad_norm": 1.0228184358035761, "learning_rate": 9.166434123015462e-07, "loss": 0.1571, "step": 3307 }, { "epoch": 0.21, "grad_norm": 1.106800100492122, "learning_rate": 9.165863093668442e-07, "loss": 0.2343, "step": 3308 }, { "epoch": 0.21, "grad_norm": 0.13701452775901876, "learning_rate": 9.165291886597328e-07, "loss": 0.0035, "step": 3309 }, { "epoch": 0.21, "grad_norm": 0.6820776536663483, "learning_rate": 9.164720501826488e-07, "loss": 0.1915, "step": 3310 }, { "epoch": 0.21, "grad_norm": 0.5055141620505854, "learning_rate": 9.164148939380299e-07, "loss": 0.3251, "step": 3311 }, { "epoch": 0.21, "grad_norm": 0.730238558757734, "learning_rate": 9.163577199283144e-07, "loss": 0.261, "step": 3312 }, { "epoch": 0.21, "grad_norm": 1.3908591679499098, "learning_rate": 9.163005281559418e-07, "loss": 0.2514, "step": 3313 }, { "epoch": 0.21, "grad_norm": 0.7861487704535053, "learning_rate": 9.162433186233516e-07, "loss": 0.2354, "step": 3314 }, { "epoch": 0.21, "grad_norm": 0.6252351727346455, "learning_rate": 9.161860913329848e-07, "loss": 0.144, "step": 3315 }, { "epoch": 0.21, "grad_norm": 4.939100655055246, "learning_rate": 9.161288462872827e-07, "loss": 0.2447, "step": 3316 }, { "epoch": 0.21, "grad_norm": 0.9484435047301384, "learning_rate": 9.160715834886876e-07, "loss": 0.3138, "step": 3317 }, { "epoch": 0.21, "grad_norm": 0.9739724864529796, "learning_rate": 9.160143029396421e-07, "loss": 0.1434, "step": 3318 }, { "epoch": 0.21, "grad_norm": 1.4506480021370867, "learning_rate": 9.159570046425902e-07, "loss": 0.0983, "step": 3319 }, { "epoch": 0.21, "grad_norm": 0.5178633961922393, "learning_rate": 9.158996885999763e-07, "loss": 0.1196, "step": 3320 }, { "epoch": 0.21, "grad_norm": 0.9579539480681765, "learning_rate": 9.158423548142458e-07, "loss": 0.2286, "step": 3321 }, { "epoch": 0.21, "grad_norm": 0.3888085819064977, "learning_rate": 9.157850032878445e-07, "loss": 0.0631, "step": 3322 }, { "epoch": 0.21, "grad_norm": 0.845659596007509, "learning_rate": 9.157276340232189e-07, "loss": 0.167, "step": 3323 }, { "epoch": 0.21, "grad_norm": 0.3534485994896798, "learning_rate": 9.156702470228168e-07, "loss": 0.0901, "step": 3324 }, { "epoch": 0.21, "grad_norm": 1.1605635089106716, "learning_rate": 9.156128422890864e-07, "loss": 0.3227, "step": 3325 }, { "epoch": 0.21, "grad_norm": 0.7428022722660884, "learning_rate": 9.155554198244766e-07, "loss": 0.1184, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.2918648195664704, "learning_rate": 9.154979796314373e-07, "loss": 0.3346, "step": 3327 }, { "epoch": 0.21, "grad_norm": 0.5856853294500018, "learning_rate": 9.15440521712419e-07, "loss": 0.1636, "step": 3328 }, { "epoch": 0.21, "grad_norm": 0.9437189009731898, "learning_rate": 9.153830460698727e-07, "loss": 0.0998, "step": 3329 }, { "epoch": 0.21, "grad_norm": 0.11880129973324605, "learning_rate": 9.153255527062508e-07, "loss": 0.0034, "step": 3330 }, { "epoch": 0.21, "grad_norm": 0.7106187310767957, "learning_rate": 9.152680416240058e-07, "loss": 0.1795, "step": 3331 }, { "epoch": 0.21, "grad_norm": 1.165237470034804, "learning_rate": 9.152105128255913e-07, "loss": 0.1602, "step": 3332 }, { "epoch": 0.21, "grad_norm": 0.5943122879158438, "learning_rate": 9.151529663134618e-07, "loss": 0.2253, "step": 3333 }, { "epoch": 0.21, "grad_norm": 0.6323695057504283, "learning_rate": 9.150954020900721e-07, "loss": 0.27, "step": 3334 }, { "epoch": 0.21, "grad_norm": 0.6057181664144572, "learning_rate": 9.150378201578783e-07, "loss": 0.0806, "step": 3335 }, { "epoch": 0.21, "grad_norm": 0.8036668462639066, "learning_rate": 9.149802205193364e-07, "loss": 0.3558, "step": 3336 }, { "epoch": 0.21, "grad_norm": 0.9407285413340083, "learning_rate": 9.149226031769044e-07, "loss": 0.5969, "step": 3337 }, { "epoch": 0.21, "grad_norm": 1.0538561578320227, "learning_rate": 9.148649681330399e-07, "loss": 0.2665, "step": 3338 }, { "epoch": 0.21, "grad_norm": 0.9601589805852963, "learning_rate": 9.148073153902018e-07, "loss": 0.1061, "step": 3339 }, { "epoch": 0.21, "grad_norm": 1.2809480204308987, "learning_rate": 9.1474964495085e-07, "loss": 0.3411, "step": 3340 }, { "epoch": 0.21, "grad_norm": 0.9292083215413531, "learning_rate": 9.146919568174443e-07, "loss": 0.4065, "step": 3341 }, { "epoch": 0.21, "grad_norm": 0.8696332294107295, "learning_rate": 9.146342509924463e-07, "loss": 0.1685, "step": 3342 }, { "epoch": 0.21, "grad_norm": 0.8419173746837841, "learning_rate": 9.145765274783176e-07, "loss": 0.3281, "step": 3343 }, { "epoch": 0.21, "grad_norm": 0.7068294963354437, "learning_rate": 9.145187862775208e-07, "loss": 0.3983, "step": 3344 }, { "epoch": 0.21, "grad_norm": 2.7314959557750793, "learning_rate": 9.144610273925193e-07, "loss": 0.1848, "step": 3345 }, { "epoch": 0.21, "grad_norm": 0.33549357986216816, "learning_rate": 9.144032508257772e-07, "loss": 0.0108, "step": 3346 }, { "epoch": 0.21, "grad_norm": 1.2287606074263027, "learning_rate": 9.143454565797593e-07, "loss": 0.3093, "step": 3347 }, { "epoch": 0.21, "grad_norm": 0.5120194785272824, "learning_rate": 9.142876446569314e-07, "loss": 0.1189, "step": 3348 }, { "epoch": 0.21, "grad_norm": 0.7351605139084882, "learning_rate": 9.142298150597597e-07, "loss": 0.2479, "step": 3349 }, { "epoch": 0.21, "grad_norm": 0.8778220062242337, "learning_rate": 9.141719677907114e-07, "loss": 0.2634, "step": 3350 }, { "epoch": 0.21, "grad_norm": 0.6896967732039392, "learning_rate": 9.141141028522544e-07, "loss": 0.4148, "step": 3351 }, { "epoch": 0.21, "grad_norm": 0.14780695521541093, "learning_rate": 9.140562202468571e-07, "loss": 0.0727, "step": 3352 }, { "epoch": 0.21, "grad_norm": 0.33412376814886113, "learning_rate": 9.139983199769894e-07, "loss": 0.1369, "step": 3353 }, { "epoch": 0.21, "grad_norm": 0.4228120604743642, "learning_rate": 9.139404020451209e-07, "loss": 0.3021, "step": 3354 }, { "epoch": 0.21, "grad_norm": 0.5890829771220137, "learning_rate": 9.138824664537228e-07, "loss": 0.3531, "step": 3355 }, { "epoch": 0.21, "grad_norm": 0.9781033684600996, "learning_rate": 9.138245132052667e-07, "loss": 0.1932, "step": 3356 }, { "epoch": 0.21, "grad_norm": 0.224919100610762, "learning_rate": 9.13766542302225e-07, "loss": 0.1376, "step": 3357 }, { "epoch": 0.21, "grad_norm": 0.6696608362091178, "learning_rate": 9.137085537470706e-07, "loss": 0.3244, "step": 3358 }, { "epoch": 0.21, "grad_norm": 0.43231195722747895, "learning_rate": 9.136505475422778e-07, "loss": 0.0095, "step": 3359 }, { "epoch": 0.21, "grad_norm": 0.6223939786173917, "learning_rate": 9.135925236903212e-07, "loss": 0.1022, "step": 3360 }, { "epoch": 0.21, "grad_norm": 0.5430507895496046, "learning_rate": 9.135344821936759e-07, "loss": 0.3908, "step": 3361 }, { "epoch": 0.21, "grad_norm": 0.4061619624480858, "learning_rate": 9.134764230548184e-07, "loss": 0.0202, "step": 3362 }, { "epoch": 0.21, "grad_norm": 0.7312764284057695, "learning_rate": 9.134183462762255e-07, "loss": 0.132, "step": 3363 }, { "epoch": 0.21, "grad_norm": 0.4608353146168241, "learning_rate": 9.133602518603749e-07, "loss": 0.4335, "step": 3364 }, { "epoch": 0.21, "grad_norm": 0.726778267690586, "learning_rate": 9.133021398097449e-07, "loss": 0.3721, "step": 3365 }, { "epoch": 0.21, "grad_norm": 0.3008911729782962, "learning_rate": 9.132440101268149e-07, "loss": 0.0331, "step": 3366 }, { "epoch": 0.21, "grad_norm": 0.34111277979521704, "learning_rate": 9.131858628140647e-07, "loss": 0.1128, "step": 3367 }, { "epoch": 0.21, "grad_norm": 0.6935948489725566, "learning_rate": 9.131276978739748e-07, "loss": 0.0844, "step": 3368 }, { "epoch": 0.21, "grad_norm": 1.2863030783735738, "learning_rate": 9.130695153090271e-07, "loss": 0.1082, "step": 3369 }, { "epoch": 0.21, "grad_norm": 0.28620158719811806, "learning_rate": 9.130113151217033e-07, "loss": 0.0241, "step": 3370 }, { "epoch": 0.21, "grad_norm": 0.8812706385404825, "learning_rate": 9.129530973144866e-07, "loss": 0.2179, "step": 3371 }, { "epoch": 0.22, "grad_norm": 0.56321103348772, "learning_rate": 9.128948618898606e-07, "loss": 0.3122, "step": 3372 }, { "epoch": 0.22, "grad_norm": 0.7809217075713873, "learning_rate": 9.128366088503098e-07, "loss": 0.2553, "step": 3373 }, { "epoch": 0.22, "grad_norm": 2.487430715921176, "learning_rate": 9.127783381983194e-07, "loss": 0.1502, "step": 3374 }, { "epoch": 0.22, "grad_norm": 0.5128932989123394, "learning_rate": 9.127200499363752e-07, "loss": 0.0713, "step": 3375 }, { "epoch": 0.22, "grad_norm": 0.686968120630709, "learning_rate": 9.126617440669641e-07, "loss": 0.2624, "step": 3376 }, { "epoch": 0.22, "grad_norm": 0.9774532076092192, "learning_rate": 9.126034205925733e-07, "loss": 0.1044, "step": 3377 }, { "epoch": 0.22, "grad_norm": 0.7031551533349818, "learning_rate": 9.125450795156912e-07, "loss": 0.0799, "step": 3378 }, { "epoch": 0.22, "grad_norm": 0.550485547328557, "learning_rate": 9.124867208388067e-07, "loss": 0.2822, "step": 3379 }, { "epoch": 0.22, "grad_norm": 0.6038386615340078, "learning_rate": 9.124283445644097e-07, "loss": 0.1177, "step": 3380 }, { "epoch": 0.22, "grad_norm": 0.3482277354928197, "learning_rate": 9.123699506949901e-07, "loss": 0.0413, "step": 3381 }, { "epoch": 0.22, "grad_norm": 0.5948768393364658, "learning_rate": 9.123115392330396e-07, "loss": 0.1652, "step": 3382 }, { "epoch": 0.22, "grad_norm": 0.3894779516719231, "learning_rate": 9.1225311018105e-07, "loss": 0.1263, "step": 3383 }, { "epoch": 0.22, "grad_norm": 0.4785026138141128, "learning_rate": 9.121946635415139e-07, "loss": 0.0096, "step": 3384 }, { "epoch": 0.22, "grad_norm": 0.8184672825827085, "learning_rate": 9.121361993169249e-07, "loss": 0.0957, "step": 3385 }, { "epoch": 0.22, "grad_norm": 1.246811980644127, "learning_rate": 9.120777175097771e-07, "loss": 0.2403, "step": 3386 }, { "epoch": 0.22, "grad_norm": 0.9640966324191959, "learning_rate": 9.120192181225656e-07, "loss": 0.3179, "step": 3387 }, { "epoch": 0.22, "grad_norm": 0.6895051692302739, "learning_rate": 9.119607011577859e-07, "loss": 0.2984, "step": 3388 }, { "epoch": 0.22, "grad_norm": 0.29280048192975544, "learning_rate": 9.119021666179345e-07, "loss": 0.134, "step": 3389 }, { "epoch": 0.22, "grad_norm": 2.137237951547494, "learning_rate": 9.118436145055089e-07, "loss": 0.1653, "step": 3390 }, { "epoch": 0.22, "grad_norm": 0.5688838425370352, "learning_rate": 9.117850448230065e-07, "loss": 0.1128, "step": 3391 }, { "epoch": 0.22, "grad_norm": 0.4400530005215225, "learning_rate": 9.117264575729265e-07, "loss": 0.0096, "step": 3392 }, { "epoch": 0.22, "grad_norm": 0.7937961745154153, "learning_rate": 9.116678527577679e-07, "loss": 0.1149, "step": 3393 }, { "epoch": 0.22, "grad_norm": 1.2148334514226202, "learning_rate": 9.116092303800314e-07, "loss": 0.1107, "step": 3394 }, { "epoch": 0.22, "grad_norm": 0.9967046538924188, "learning_rate": 9.115505904422175e-07, "loss": 0.4086, "step": 3395 }, { "epoch": 0.22, "grad_norm": 1.4439013094625506, "learning_rate": 9.114919329468282e-07, "loss": 0.2029, "step": 3396 }, { "epoch": 0.22, "grad_norm": 0.5850941191041946, "learning_rate": 9.114332578963657e-07, "loss": 0.0204, "step": 3397 }, { "epoch": 0.22, "grad_norm": 0.5596567682767649, "learning_rate": 9.113745652933336e-07, "loss": 0.2591, "step": 3398 }, { "epoch": 0.22, "grad_norm": 0.7153097443238435, "learning_rate": 9.113158551402354e-07, "loss": 0.3206, "step": 3399 }, { "epoch": 0.22, "grad_norm": 0.660737198000805, "learning_rate": 9.11257127439576e-07, "loss": 0.3275, "step": 3400 }, { "epoch": 0.22, "grad_norm": 0.8420471598318644, "learning_rate": 9.111983821938607e-07, "loss": 0.2957, "step": 3401 }, { "epoch": 0.22, "grad_norm": 0.3974637733227828, "learning_rate": 9.11139619405596e-07, "loss": 0.1292, "step": 3402 }, { "epoch": 0.22, "grad_norm": 0.8618460911519606, "learning_rate": 9.110808390772885e-07, "loss": 0.2779, "step": 3403 }, { "epoch": 0.22, "grad_norm": 0.5857774826771328, "learning_rate": 9.110220412114461e-07, "loss": 0.2278, "step": 3404 }, { "epoch": 0.22, "grad_norm": 1.1973854569132587, "learning_rate": 9.10963225810577e-07, "loss": 0.1789, "step": 3405 }, { "epoch": 0.22, "grad_norm": 0.6026320213695966, "learning_rate": 9.109043928771909e-07, "loss": 0.1854, "step": 3406 }, { "epoch": 0.22, "grad_norm": 0.7815164386452087, "learning_rate": 9.10845542413797e-07, "loss": 0.2431, "step": 3407 }, { "epoch": 0.22, "grad_norm": 0.4577216473190088, "learning_rate": 9.107866744229066e-07, "loss": 0.1924, "step": 3408 }, { "epoch": 0.22, "grad_norm": 0.40027900877199785, "learning_rate": 9.107277889070309e-07, "loss": 0.0155, "step": 3409 }, { "epoch": 0.22, "grad_norm": 2.293075533621946, "learning_rate": 9.106688858686819e-07, "loss": 0.1404, "step": 3410 }, { "epoch": 0.22, "grad_norm": 1.0836027098191086, "learning_rate": 9.106099653103727e-07, "loss": 0.2441, "step": 3411 }, { "epoch": 0.22, "grad_norm": 0.23668753584277077, "learning_rate": 9.10551027234617e-07, "loss": 0.1216, "step": 3412 }, { "epoch": 0.22, "grad_norm": 0.58709382814804, "learning_rate": 9.104920716439293e-07, "loss": 0.2864, "step": 3413 }, { "epoch": 0.22, "grad_norm": 0.5794583746193465, "learning_rate": 9.104330985408244e-07, "loss": 0.2686, "step": 3414 }, { "epoch": 0.22, "grad_norm": 0.8895225246835184, "learning_rate": 9.103741079278186e-07, "loss": 0.5283, "step": 3415 }, { "epoch": 0.22, "grad_norm": 0.39283921122768195, "learning_rate": 9.103150998074283e-07, "loss": 0.2204, "step": 3416 }, { "epoch": 0.22, "grad_norm": 1.804370593211932, "learning_rate": 9.10256074182171e-07, "loss": 0.1024, "step": 3417 }, { "epoch": 0.22, "grad_norm": 0.7469095195242345, "learning_rate": 9.101970310545649e-07, "loss": 0.1825, "step": 3418 }, { "epoch": 0.22, "grad_norm": 0.4301125149502763, "learning_rate": 9.101379704271288e-07, "loss": 0.1813, "step": 3419 }, { "epoch": 0.22, "grad_norm": 0.43787974520146306, "learning_rate": 9.100788923023826e-07, "loss": 0.1324, "step": 3420 }, { "epoch": 0.22, "grad_norm": 0.9814626283804289, "learning_rate": 9.100197966828462e-07, "loss": 0.1023, "step": 3421 }, { "epoch": 0.22, "grad_norm": 0.6053470594817225, "learning_rate": 9.099606835710413e-07, "loss": 0.305, "step": 3422 }, { "epoch": 0.22, "grad_norm": 0.43098791540108977, "learning_rate": 9.099015529694893e-07, "loss": 0.0927, "step": 3423 }, { "epoch": 0.22, "grad_norm": 0.30846510323918225, "learning_rate": 9.098424048807131e-07, "loss": 0.2379, "step": 3424 }, { "epoch": 0.22, "grad_norm": 0.3731493271743987, "learning_rate": 9.097832393072362e-07, "loss": 0.1818, "step": 3425 }, { "epoch": 0.22, "grad_norm": 0.6252877550256376, "learning_rate": 9.097240562515824e-07, "loss": 0.1592, "step": 3426 }, { "epoch": 0.22, "grad_norm": 0.5703630961706992, "learning_rate": 9.096648557162767e-07, "loss": 0.3112, "step": 3427 }, { "epoch": 0.22, "grad_norm": 1.0771455845190558, "learning_rate": 9.096056377038448e-07, "loss": 0.2474, "step": 3428 }, { "epoch": 0.22, "grad_norm": 0.6758815883228315, "learning_rate": 9.095464022168129e-07, "loss": 0.2477, "step": 3429 }, { "epoch": 0.22, "grad_norm": 0.3800595338507114, "learning_rate": 9.094871492577081e-07, "loss": 0.2124, "step": 3430 }, { "epoch": 0.22, "grad_norm": 0.6086154417384398, "learning_rate": 9.094278788290586e-07, "loss": 0.142, "step": 3431 }, { "epoch": 0.22, "grad_norm": 0.8861271522221894, "learning_rate": 9.093685909333925e-07, "loss": 0.081, "step": 3432 }, { "epoch": 0.22, "grad_norm": 0.5888344856273661, "learning_rate": 9.093092855732395e-07, "loss": 0.5064, "step": 3433 }, { "epoch": 0.22, "grad_norm": 0.5248938488932281, "learning_rate": 9.092499627511295e-07, "loss": 0.0358, "step": 3434 }, { "epoch": 0.22, "grad_norm": 0.6760893053749472, "learning_rate": 9.091906224695935e-07, "loss": 0.1522, "step": 3435 }, { "epoch": 0.22, "grad_norm": 0.5565931103970779, "learning_rate": 9.091312647311629e-07, "loss": 0.2553, "step": 3436 }, { "epoch": 0.22, "grad_norm": 0.43821355945024576, "learning_rate": 9.0907188953837e-07, "loss": 0.115, "step": 3437 }, { "epoch": 0.22, "grad_norm": 2.0487219325921577, "learning_rate": 9.09012496893748e-07, "loss": 0.2093, "step": 3438 }, { "epoch": 0.22, "grad_norm": 0.710696025931875, "learning_rate": 9.089530867998307e-07, "loss": 0.1013, "step": 3439 }, { "epoch": 0.22, "grad_norm": 3.76015426839559, "learning_rate": 9.088936592591528e-07, "loss": 0.2028, "step": 3440 }, { "epoch": 0.22, "grad_norm": 0.6190455024758854, "learning_rate": 9.088342142742492e-07, "loss": 0.3572, "step": 3441 }, { "epoch": 0.22, "grad_norm": 1.0968735196658128, "learning_rate": 9.087747518476561e-07, "loss": 0.0332, "step": 3442 }, { "epoch": 0.22, "grad_norm": 0.5591905620825228, "learning_rate": 9.087152719819104e-07, "loss": 0.2026, "step": 3443 }, { "epoch": 0.22, "grad_norm": 0.8116329837785369, "learning_rate": 9.086557746795497e-07, "loss": 0.2132, "step": 3444 }, { "epoch": 0.22, "grad_norm": 0.5818075691140604, "learning_rate": 9.085962599431121e-07, "loss": 0.2607, "step": 3445 }, { "epoch": 0.22, "grad_norm": 0.7708689610124019, "learning_rate": 9.085367277751366e-07, "loss": 0.29, "step": 3446 }, { "epoch": 0.22, "grad_norm": 0.8386997305596612, "learning_rate": 9.084771781781631e-07, "loss": 0.0975, "step": 3447 }, { "epoch": 0.22, "grad_norm": 0.2847712370763844, "learning_rate": 9.08417611154732e-07, "loss": 0.0138, "step": 3448 }, { "epoch": 0.22, "grad_norm": 0.5567018486648099, "learning_rate": 9.083580267073846e-07, "loss": 0.2348, "step": 3449 }, { "epoch": 0.22, "grad_norm": 0.14978507688318166, "learning_rate": 9.082984248386629e-07, "loss": 0.0757, "step": 3450 }, { "epoch": 0.22, "grad_norm": 0.8480235839216598, "learning_rate": 9.082388055511096e-07, "loss": 0.1654, "step": 3451 }, { "epoch": 0.22, "grad_norm": 0.758476965822796, "learning_rate": 9.081791688472682e-07, "loss": 0.0173, "step": 3452 }, { "epoch": 0.22, "grad_norm": 0.28901291714694527, "learning_rate": 9.081195147296829e-07, "loss": 0.1958, "step": 3453 }, { "epoch": 0.22, "grad_norm": 1.5632435502419064, "learning_rate": 9.080598432008986e-07, "loss": 0.1931, "step": 3454 }, { "epoch": 0.22, "grad_norm": 0.20853929637542068, "learning_rate": 9.080001542634612e-07, "loss": 0.0078, "step": 3455 }, { "epoch": 0.22, "grad_norm": 0.8147606927698112, "learning_rate": 9.079404479199168e-07, "loss": 0.2416, "step": 3456 }, { "epoch": 0.22, "grad_norm": 0.5896403903375568, "learning_rate": 9.078807241728131e-07, "loss": 0.0463, "step": 3457 }, { "epoch": 0.22, "grad_norm": 0.6193532850598485, "learning_rate": 9.078209830246977e-07, "loss": 0.3053, "step": 3458 }, { "epoch": 0.22, "grad_norm": 0.3673911368542017, "learning_rate": 9.077612244781195e-07, "loss": 0.095, "step": 3459 }, { "epoch": 0.22, "grad_norm": 0.4741756122471617, "learning_rate": 9.077014485356274e-07, "loss": 0.2309, "step": 3460 }, { "epoch": 0.22, "grad_norm": 0.6886654317580722, "learning_rate": 9.076416551997721e-07, "loss": 0.1479, "step": 3461 }, { "epoch": 0.22, "grad_norm": 0.3884602630498089, "learning_rate": 9.075818444731044e-07, "loss": 0.0143, "step": 3462 }, { "epoch": 0.22, "grad_norm": 0.9800609006038236, "learning_rate": 9.075220163581758e-07, "loss": 0.2561, "step": 3463 }, { "epoch": 0.22, "grad_norm": 0.38942150556075106, "learning_rate": 9.074621708575387e-07, "loss": 0.103, "step": 3464 }, { "epoch": 0.22, "grad_norm": 1.3611265364250646, "learning_rate": 9.074023079737465e-07, "loss": 0.2226, "step": 3465 }, { "epoch": 0.22, "grad_norm": 0.8001913346072398, "learning_rate": 9.073424277093527e-07, "loss": 0.4161, "step": 3466 }, { "epoch": 0.22, "grad_norm": 0.44584243740872953, "learning_rate": 9.07282530066912e-07, "loss": 0.1643, "step": 3467 }, { "epoch": 0.22, "grad_norm": 0.44721985428880234, "learning_rate": 9.0722261504898e-07, "loss": 0.0276, "step": 3468 }, { "epoch": 0.22, "grad_norm": 0.5476807711359044, "learning_rate": 9.071626826581124e-07, "loss": 0.3451, "step": 3469 }, { "epoch": 0.22, "grad_norm": 0.4448469967113406, "learning_rate": 9.071027328968665e-07, "loss": 0.2502, "step": 3470 }, { "epoch": 0.22, "grad_norm": 0.9780446987211733, "learning_rate": 9.070427657677994e-07, "loss": 0.1259, "step": 3471 }, { "epoch": 0.22, "grad_norm": 0.6524682311126996, "learning_rate": 9.069827812734698e-07, "loss": 0.1111, "step": 3472 }, { "epoch": 0.22, "grad_norm": 0.8261236053892801, "learning_rate": 9.069227794164366e-07, "loss": 0.273, "step": 3473 }, { "epoch": 0.22, "grad_norm": 0.8456024771978315, "learning_rate": 9.068627601992598e-07, "loss": 0.1448, "step": 3474 }, { "epoch": 0.22, "grad_norm": 0.4122556009766779, "learning_rate": 9.068027236244995e-07, "loss": 0.2236, "step": 3475 }, { "epoch": 0.22, "grad_norm": 0.8702978327038665, "learning_rate": 9.067426696947174e-07, "loss": 0.3429, "step": 3476 }, { "epoch": 0.22, "grad_norm": 1.6658536681051026, "learning_rate": 9.066825984124751e-07, "loss": 0.2262, "step": 3477 }, { "epoch": 0.22, "grad_norm": 0.8015228154440659, "learning_rate": 9.066225097803358e-07, "loss": 0.1998, "step": 3478 }, { "epoch": 0.22, "grad_norm": 0.674366712686864, "learning_rate": 9.065624038008628e-07, "loss": 0.1484, "step": 3479 }, { "epoch": 0.22, "grad_norm": 0.6267862760521721, "learning_rate": 9.065022804766204e-07, "loss": 0.107, "step": 3480 }, { "epoch": 0.22, "grad_norm": 1.0302001191660715, "learning_rate": 9.064421398101733e-07, "loss": 0.3326, "step": 3481 }, { "epoch": 0.22, "grad_norm": 0.7957291490691056, "learning_rate": 9.063819818040878e-07, "loss": 0.1871, "step": 3482 }, { "epoch": 0.22, "grad_norm": 1.1249923368632135, "learning_rate": 9.063218064609299e-07, "loss": 0.2669, "step": 3483 }, { "epoch": 0.22, "grad_norm": 0.7913429341548515, "learning_rate": 9.062616137832668e-07, "loss": 0.4369, "step": 3484 }, { "epoch": 0.22, "grad_norm": 0.4232030837204642, "learning_rate": 9.062014037736667e-07, "loss": 0.3205, "step": 3485 }, { "epoch": 0.22, "grad_norm": 0.5464013784120936, "learning_rate": 9.061411764346982e-07, "loss": 0.0086, "step": 3486 }, { "epoch": 0.22, "grad_norm": 0.47319299397048015, "learning_rate": 9.060809317689306e-07, "loss": 0.2368, "step": 3487 }, { "epoch": 0.22, "grad_norm": 1.461021949249322, "learning_rate": 9.060206697789341e-07, "loss": 0.1098, "step": 3488 }, { "epoch": 0.22, "grad_norm": 0.50489660755318, "learning_rate": 9.059603904672797e-07, "loss": 0.1486, "step": 3489 }, { "epoch": 0.22, "grad_norm": 0.7062686015839136, "learning_rate": 9.059000938365388e-07, "loss": 0.378, "step": 3490 }, { "epoch": 0.22, "grad_norm": 0.2318923336010885, "learning_rate": 9.058397798892841e-07, "loss": 0.0119, "step": 3491 }, { "epoch": 0.22, "grad_norm": 6.119039946903587, "learning_rate": 9.057794486280885e-07, "loss": 0.1205, "step": 3492 }, { "epoch": 0.22, "grad_norm": 1.1552504432905428, "learning_rate": 9.057191000555259e-07, "loss": 0.2216, "step": 3493 }, { "epoch": 0.22, "grad_norm": 0.18240345717577552, "learning_rate": 9.056587341741708e-07, "loss": 0.0044, "step": 3494 }, { "epoch": 0.22, "grad_norm": 1.920239819563848, "learning_rate": 9.055983509865988e-07, "loss": 0.1902, "step": 3495 }, { "epoch": 0.22, "grad_norm": 0.5578636167243833, "learning_rate": 9.055379504953857e-07, "loss": 0.2409, "step": 3496 }, { "epoch": 0.22, "grad_norm": 0.32673939571085087, "learning_rate": 9.054775327031084e-07, "loss": 0.1857, "step": 3497 }, { "epoch": 0.22, "grad_norm": 0.3054597557355878, "learning_rate": 9.054170976123444e-07, "loss": 0.074, "step": 3498 }, { "epoch": 0.22, "grad_norm": 0.47660839361366525, "learning_rate": 9.05356645225672e-07, "loss": 0.1878, "step": 3499 }, { "epoch": 0.22, "grad_norm": 0.8488780011047519, "learning_rate": 9.052961755456704e-07, "loss": 0.2275, "step": 3500 }, { "epoch": 0.22, "grad_norm": 0.4759906315349322, "learning_rate": 9.052356885749191e-07, "loss": 0.2249, "step": 3501 }, { "epoch": 0.22, "grad_norm": 0.18884537303613233, "learning_rate": 9.051751843159987e-07, "loss": 0.1303, "step": 3502 }, { "epoch": 0.22, "grad_norm": 0.8426818040273766, "learning_rate": 9.051146627714905e-07, "loss": 0.188, "step": 3503 }, { "epoch": 0.22, "grad_norm": 1.1731863384220949, "learning_rate": 9.050541239439763e-07, "loss": 0.2124, "step": 3504 }, { "epoch": 0.22, "grad_norm": 2.3368678322200815, "learning_rate": 9.04993567836039e-07, "loss": 0.4364, "step": 3505 }, { "epoch": 0.22, "grad_norm": 0.621984000338872, "learning_rate": 9.049329944502619e-07, "loss": 0.3331, "step": 3506 }, { "epoch": 0.22, "grad_norm": 1.0557024485074031, "learning_rate": 9.048724037892293e-07, "loss": 0.2421, "step": 3507 }, { "epoch": 0.22, "grad_norm": 1.034919908349202, "learning_rate": 9.04811795855526e-07, "loss": 0.216, "step": 3508 }, { "epoch": 0.22, "grad_norm": 0.5535103811207018, "learning_rate": 9.047511706517377e-07, "loss": 0.2032, "step": 3509 }, { "epoch": 0.22, "grad_norm": 0.422120351200221, "learning_rate": 9.046905281804509e-07, "loss": 0.2738, "step": 3510 }, { "epoch": 0.22, "grad_norm": 1.001188790890657, "learning_rate": 9.046298684442525e-07, "loss": 0.3171, "step": 3511 }, { "epoch": 0.22, "grad_norm": 0.7747187881056723, "learning_rate": 9.045691914457305e-07, "loss": 0.0981, "step": 3512 }, { "epoch": 0.22, "grad_norm": 1.279504743833741, "learning_rate": 9.045084971874737e-07, "loss": 0.0469, "step": 3513 }, { "epoch": 0.22, "grad_norm": 0.5824157956468737, "learning_rate": 9.044477856720712e-07, "loss": 0.2563, "step": 3514 }, { "epoch": 0.22, "grad_norm": 3.0540488650606123, "learning_rate": 9.04387056902113e-07, "loss": 0.199, "step": 3515 }, { "epoch": 0.22, "grad_norm": 1.0224820944299369, "learning_rate": 9.043263108801901e-07, "loss": 0.3534, "step": 3516 }, { "epoch": 0.22, "grad_norm": 0.480155624685507, "learning_rate": 9.04265547608894e-07, "loss": 0.0166, "step": 3517 }, { "epoch": 0.22, "grad_norm": 2.1695620230090724, "learning_rate": 9.042047670908169e-07, "loss": 0.1602, "step": 3518 }, { "epoch": 0.22, "grad_norm": 0.616620524514955, "learning_rate": 9.041439693285519e-07, "loss": 0.1087, "step": 3519 }, { "epoch": 0.22, "grad_norm": 1.0588946185105392, "learning_rate": 9.040831543246928e-07, "loss": 0.1376, "step": 3520 }, { "epoch": 0.22, "grad_norm": 0.43301704960439397, "learning_rate": 9.040223220818339e-07, "loss": 0.1515, "step": 3521 }, { "epoch": 0.22, "grad_norm": 0.9372290561504759, "learning_rate": 9.039614726025706e-07, "loss": 0.0296, "step": 3522 }, { "epoch": 0.22, "grad_norm": 0.7290055521646502, "learning_rate": 9.039006058894988e-07, "loss": 0.1478, "step": 3523 }, { "epoch": 0.22, "grad_norm": 0.6009524441824383, "learning_rate": 9.038397219452154e-07, "loss": 0.0492, "step": 3524 }, { "epoch": 0.22, "grad_norm": 0.3510990732077877, "learning_rate": 9.037788207723174e-07, "loss": 0.1141, "step": 3525 }, { "epoch": 0.22, "grad_norm": 0.1877054520282975, "learning_rate": 9.037179023734034e-07, "loss": 0.0938, "step": 3526 }, { "epoch": 0.22, "grad_norm": 0.38774478221254344, "learning_rate": 9.036569667510719e-07, "loss": 0.2048, "step": 3527 }, { "epoch": 0.22, "grad_norm": 0.2650309446210396, "learning_rate": 9.035960139079229e-07, "loss": 0.1019, "step": 3528 }, { "epoch": 0.23, "grad_norm": 1.4097990888608176, "learning_rate": 9.035350438465566e-07, "loss": 0.0862, "step": 3529 }, { "epoch": 0.23, "grad_norm": 0.4170593866752147, "learning_rate": 9.034740565695741e-07, "loss": 0.0864, "step": 3530 }, { "epoch": 0.23, "grad_norm": 0.49179527875071183, "learning_rate": 9.034130520795773e-07, "loss": 0.0727, "step": 3531 }, { "epoch": 0.23, "grad_norm": 0.4844474975087014, "learning_rate": 9.033520303791686e-07, "loss": 0.1681, "step": 3532 }, { "epoch": 0.23, "grad_norm": 1.7568547366344485, "learning_rate": 9.032909914709516e-07, "loss": 0.065, "step": 3533 }, { "epoch": 0.23, "grad_norm": 0.439624171337634, "learning_rate": 9.032299353575301e-07, "loss": 0.1562, "step": 3534 }, { "epoch": 0.23, "grad_norm": 0.40402549015647976, "learning_rate": 9.03168862041509e-07, "loss": 0.108, "step": 3535 }, { "epoch": 0.23, "grad_norm": 0.7998233890169933, "learning_rate": 9.031077715254937e-07, "loss": 0.2988, "step": 3536 }, { "epoch": 0.23, "grad_norm": 0.3299364680259563, "learning_rate": 9.030466638120907e-07, "loss": 0.1662, "step": 3537 }, { "epoch": 0.23, "grad_norm": 0.33398480048584933, "learning_rate": 9.029855389039067e-07, "loss": 0.0746, "step": 3538 }, { "epoch": 0.23, "grad_norm": 0.5046925690501163, "learning_rate": 9.029243968035495e-07, "loss": 0.1415, "step": 3539 }, { "epoch": 0.23, "grad_norm": 1.0899228427771, "learning_rate": 9.028632375136276e-07, "loss": 0.2744, "step": 3540 }, { "epoch": 0.23, "grad_norm": 1.4820328028191452, "learning_rate": 9.028020610367499e-07, "loss": 0.0907, "step": 3541 }, { "epoch": 0.23, "grad_norm": 1.6686129981117492, "learning_rate": 9.027408673755268e-07, "loss": 0.1155, "step": 3542 }, { "epoch": 0.23, "grad_norm": 0.7304402378877142, "learning_rate": 9.026796565325687e-07, "loss": 0.0507, "step": 3543 }, { "epoch": 0.23, "grad_norm": 0.6368166231070489, "learning_rate": 9.026184285104867e-07, "loss": 0.3022, "step": 3544 }, { "epoch": 0.23, "grad_norm": 0.6703179137627501, "learning_rate": 9.025571833118935e-07, "loss": 0.2356, "step": 3545 }, { "epoch": 0.23, "grad_norm": 0.8085542338999495, "learning_rate": 9.024959209394014e-07, "loss": 0.2126, "step": 3546 }, { "epoch": 0.23, "grad_norm": 2.228714239918408, "learning_rate": 9.024346413956243e-07, "loss": 0.2395, "step": 3547 }, { "epoch": 0.23, "grad_norm": 1.0296238013379133, "learning_rate": 9.023733446831762e-07, "loss": 0.1334, "step": 3548 }, { "epoch": 0.23, "grad_norm": 0.8240975558791944, "learning_rate": 9.023120308046726e-07, "loss": 0.2793, "step": 3549 }, { "epoch": 0.23, "grad_norm": 0.5901663841476886, "learning_rate": 9.022506997627289e-07, "loss": 0.1835, "step": 3550 }, { "epoch": 0.23, "grad_norm": 0.4576929666443724, "learning_rate": 9.021893515599618e-07, "loss": 0.107, "step": 3551 }, { "epoch": 0.23, "grad_norm": 0.7028228227971861, "learning_rate": 9.021279861989884e-07, "loss": 0.1563, "step": 3552 }, { "epoch": 0.23, "grad_norm": 0.49652258624001755, "learning_rate": 9.020666036824267e-07, "loss": 0.1915, "step": 3553 }, { "epoch": 0.23, "grad_norm": 1.0911481865292731, "learning_rate": 9.020052040128955e-07, "loss": 0.2879, "step": 3554 }, { "epoch": 0.23, "grad_norm": 0.9767828769964186, "learning_rate": 9.019437871930143e-07, "loss": 0.2459, "step": 3555 }, { "epoch": 0.23, "grad_norm": 0.6713058897409105, "learning_rate": 9.018823532254028e-07, "loss": 0.1628, "step": 3556 }, { "epoch": 0.23, "grad_norm": 0.8181149095416154, "learning_rate": 9.018209021126824e-07, "loss": 0.1507, "step": 3557 }, { "epoch": 0.23, "grad_norm": 1.2652378181965303, "learning_rate": 9.017594338574745e-07, "loss": 0.2984, "step": 3558 }, { "epoch": 0.23, "grad_norm": 0.5574580406275796, "learning_rate": 9.016979484624017e-07, "loss": 0.2797, "step": 3559 }, { "epoch": 0.23, "grad_norm": 1.085960464776571, "learning_rate": 9.016364459300867e-07, "loss": 0.0731, "step": 3560 }, { "epoch": 0.23, "grad_norm": 0.9820920115828107, "learning_rate": 9.015749262631535e-07, "loss": 0.1038, "step": 3561 }, { "epoch": 0.23, "grad_norm": 0.7786666678562189, "learning_rate": 9.015133894642268e-07, "loss": 0.1636, "step": 3562 }, { "epoch": 0.23, "grad_norm": 1.8077732798249364, "learning_rate": 9.014518355359318e-07, "loss": 0.2149, "step": 3563 }, { "epoch": 0.23, "grad_norm": 0.8379422447571268, "learning_rate": 9.013902644808944e-07, "loss": 0.0319, "step": 3564 }, { "epoch": 0.23, "grad_norm": 0.8020231600160077, "learning_rate": 9.013286763017414e-07, "loss": 0.0149, "step": 3565 }, { "epoch": 0.23, "grad_norm": 0.7451250982089039, "learning_rate": 9.012670710011003e-07, "loss": 0.1621, "step": 3566 }, { "epoch": 0.23, "grad_norm": 0.5775504911964549, "learning_rate": 9.012054485815993e-07, "loss": 0.0363, "step": 3567 }, { "epoch": 0.23, "grad_norm": 0.36901694160006543, "learning_rate": 9.011438090458674e-07, "loss": 0.1155, "step": 3568 }, { "epoch": 0.23, "grad_norm": 0.4530151377449247, "learning_rate": 9.010821523965342e-07, "loss": 0.4444, "step": 3569 }, { "epoch": 0.23, "grad_norm": 0.7453481219095509, "learning_rate": 9.0102047863623e-07, "loss": 0.401, "step": 3570 }, { "epoch": 0.23, "grad_norm": 0.6608854831066162, "learning_rate": 9.009587877675862e-07, "loss": 0.0626, "step": 3571 }, { "epoch": 0.23, "grad_norm": 1.6146155111136735, "learning_rate": 9.008970797932343e-07, "loss": 0.1994, "step": 3572 }, { "epoch": 0.23, "grad_norm": 0.6827877979600937, "learning_rate": 9.008353547158072e-07, "loss": 0.1298, "step": 3573 }, { "epoch": 0.23, "grad_norm": 0.44509956643235676, "learning_rate": 9.00773612537938e-07, "loss": 0.0597, "step": 3574 }, { "epoch": 0.23, "grad_norm": 0.9247401941630845, "learning_rate": 9.007118532622608e-07, "loss": 0.3066, "step": 3575 }, { "epoch": 0.23, "grad_norm": 0.3500663477620438, "learning_rate": 9.006500768914106e-07, "loss": 0.2817, "step": 3576 }, { "epoch": 0.23, "grad_norm": 1.147320025553286, "learning_rate": 9.005882834280226e-07, "loss": 0.335, "step": 3577 }, { "epoch": 0.23, "grad_norm": 0.5504171426043073, "learning_rate": 9.005264728747331e-07, "loss": 0.3613, "step": 3578 }, { "epoch": 0.23, "grad_norm": 0.8790360380964415, "learning_rate": 9.004646452341792e-07, "loss": 0.0743, "step": 3579 }, { "epoch": 0.23, "grad_norm": 1.3644271116628688, "learning_rate": 9.004028005089985e-07, "loss": 0.1868, "step": 3580 }, { "epoch": 0.23, "grad_norm": 0.34683762899779425, "learning_rate": 9.003409387018293e-07, "loss": 0.0939, "step": 3581 }, { "epoch": 0.23, "grad_norm": 1.7087190263976482, "learning_rate": 9.00279059815311e-07, "loss": 0.1915, "step": 3582 }, { "epoch": 0.23, "grad_norm": 0.9889991485407398, "learning_rate": 9.002171638520833e-07, "loss": 0.2909, "step": 3583 }, { "epoch": 0.23, "grad_norm": 0.6306306038822274, "learning_rate": 9.001552508147868e-07, "loss": 0.3875, "step": 3584 }, { "epoch": 0.23, "grad_norm": 0.8414396902575082, "learning_rate": 9.000933207060629e-07, "loss": 0.2494, "step": 3585 }, { "epoch": 0.23, "grad_norm": 0.3442777423239455, "learning_rate": 9.000313735285537e-07, "loss": 0.241, "step": 3586 }, { "epoch": 0.23, "grad_norm": 0.7755224462971616, "learning_rate": 8.999694092849019e-07, "loss": 0.0217, "step": 3587 }, { "epoch": 0.23, "grad_norm": 0.7497411582538355, "learning_rate": 8.999074279777511e-07, "loss": 0.269, "step": 3588 }, { "epoch": 0.23, "grad_norm": 0.9658463537603914, "learning_rate": 8.998454296097456e-07, "loss": 0.216, "step": 3589 }, { "epoch": 0.23, "grad_norm": 1.156068956355357, "learning_rate": 8.997834141835301e-07, "loss": 0.0136, "step": 3590 }, { "epoch": 0.23, "grad_norm": 1.0337821522188728, "learning_rate": 8.997213817017506e-07, "loss": 0.1946, "step": 3591 }, { "epoch": 0.23, "grad_norm": 0.5773588168066646, "learning_rate": 8.996593321670533e-07, "loss": 0.203, "step": 3592 }, { "epoch": 0.23, "grad_norm": 0.5244756314694453, "learning_rate": 8.995972655820856e-07, "loss": 0.2456, "step": 3593 }, { "epoch": 0.23, "grad_norm": 0.4893468963190216, "learning_rate": 8.995351819494952e-07, "loss": 0.0508, "step": 3594 }, { "epoch": 0.23, "grad_norm": 0.8466957374097075, "learning_rate": 8.994730812719307e-07, "loss": 0.1263, "step": 3595 }, { "epoch": 0.23, "grad_norm": 0.7415694023113186, "learning_rate": 8.994109635520416e-07, "loss": 0.2457, "step": 3596 }, { "epoch": 0.23, "grad_norm": 0.7099841326077181, "learning_rate": 8.99348828792478e-07, "loss": 0.0197, "step": 3597 }, { "epoch": 0.23, "grad_norm": 0.38683694772595284, "learning_rate": 8.992866769958904e-07, "loss": 0.1075, "step": 3598 }, { "epoch": 0.23, "grad_norm": 0.17439063080097897, "learning_rate": 8.992245081649304e-07, "loss": 0.0243, "step": 3599 }, { "epoch": 0.23, "grad_norm": 2.970067514021893, "learning_rate": 8.991623223022505e-07, "loss": 0.1047, "step": 3600 }, { "epoch": 0.23, "grad_norm": 1.4837965208476653, "learning_rate": 8.991001194105034e-07, "loss": 0.0782, "step": 3601 }, { "epoch": 0.23, "grad_norm": 0.9435958016207453, "learning_rate": 8.990378994923431e-07, "loss": 0.2051, "step": 3602 }, { "epoch": 0.23, "grad_norm": 1.2429304708913325, "learning_rate": 8.989756625504237e-07, "loss": 0.1081, "step": 3603 }, { "epoch": 0.23, "grad_norm": 1.055464231568157, "learning_rate": 8.989134085874006e-07, "loss": 0.2532, "step": 3604 }, { "epoch": 0.23, "grad_norm": 2.569220773678353, "learning_rate": 8.988511376059295e-07, "loss": 0.1961, "step": 3605 }, { "epoch": 0.23, "grad_norm": 0.24562876287549157, "learning_rate": 8.98788849608667e-07, "loss": 0.0949, "step": 3606 }, { "epoch": 0.23, "grad_norm": 0.5056491008363143, "learning_rate": 8.987265445982706e-07, "loss": 0.1763, "step": 3607 }, { "epoch": 0.23, "grad_norm": 0.9208834173216598, "learning_rate": 8.986642225773984e-07, "loss": 0.3181, "step": 3608 }, { "epoch": 0.23, "grad_norm": 0.5927033106464928, "learning_rate": 8.986018835487088e-07, "loss": 0.1896, "step": 3609 }, { "epoch": 0.23, "grad_norm": 0.8667824774310198, "learning_rate": 8.985395275148618e-07, "loss": 0.1819, "step": 3610 }, { "epoch": 0.23, "grad_norm": 0.5911908766796049, "learning_rate": 8.984771544785172e-07, "loss": 0.4034, "step": 3611 }, { "epoch": 0.23, "grad_norm": 0.730668819028163, "learning_rate": 8.984147644423361e-07, "loss": 0.1424, "step": 3612 }, { "epoch": 0.23, "grad_norm": 0.5961925230821833, "learning_rate": 8.983523574089805e-07, "loss": 0.3239, "step": 3613 }, { "epoch": 0.23, "grad_norm": 0.3782501076675327, "learning_rate": 8.982899333811123e-07, "loss": 0.2761, "step": 3614 }, { "epoch": 0.23, "grad_norm": 1.225164008271313, "learning_rate": 8.98227492361395e-07, "loss": 0.1838, "step": 3615 }, { "epoch": 0.23, "grad_norm": 0.6473365017567108, "learning_rate": 8.981650343524923e-07, "loss": 0.0759, "step": 3616 }, { "epoch": 0.23, "grad_norm": 1.7565623573857385, "learning_rate": 8.981025593570689e-07, "loss": 0.2515, "step": 3617 }, { "epoch": 0.23, "grad_norm": 0.4702202017001563, "learning_rate": 8.980400673777899e-07, "loss": 0.0947, "step": 3618 }, { "epoch": 0.23, "grad_norm": 0.9656628407236097, "learning_rate": 8.979775584173215e-07, "loss": 0.2115, "step": 3619 }, { "epoch": 0.23, "grad_norm": 0.860858477890622, "learning_rate": 8.979150324783304e-07, "loss": 0.0973, "step": 3620 }, { "epoch": 0.23, "grad_norm": 1.0181446681122626, "learning_rate": 8.978524895634842e-07, "loss": 0.1194, "step": 3621 }, { "epoch": 0.23, "grad_norm": 2.53271215351201, "learning_rate": 8.97789929675451e-07, "loss": 0.4167, "step": 3622 }, { "epoch": 0.23, "grad_norm": 0.6576937400855337, "learning_rate": 8.977273528168995e-07, "loss": 0.2884, "step": 3623 }, { "epoch": 0.23, "grad_norm": 0.4513117586626686, "learning_rate": 8.976647589905e-07, "loss": 0.1806, "step": 3624 }, { "epoch": 0.23, "grad_norm": 0.8760306204582416, "learning_rate": 8.976021481989222e-07, "loss": 0.2436, "step": 3625 }, { "epoch": 0.23, "grad_norm": 0.5021420846358876, "learning_rate": 8.975395204448375e-07, "loss": 0.2186, "step": 3626 }, { "epoch": 0.23, "grad_norm": 0.6725821463708842, "learning_rate": 8.974768757309178e-07, "loss": 0.234, "step": 3627 }, { "epoch": 0.23, "grad_norm": 0.5985002495509291, "learning_rate": 8.974142140598355e-07, "loss": 0.2526, "step": 3628 }, { "epoch": 0.23, "grad_norm": 0.07129142280051656, "learning_rate": 8.97351535434264e-07, "loss": 0.003, "step": 3629 }, { "epoch": 0.23, "grad_norm": 0.7747541248706957, "learning_rate": 8.972888398568771e-07, "loss": 0.0597, "step": 3630 }, { "epoch": 0.23, "grad_norm": 0.42375668303332786, "learning_rate": 8.972261273303496e-07, "loss": 0.1236, "step": 3631 }, { "epoch": 0.23, "grad_norm": 0.20468387464282548, "learning_rate": 8.971633978573572e-07, "loss": 0.151, "step": 3632 }, { "epoch": 0.23, "grad_norm": 0.650357402418647, "learning_rate": 8.971006514405757e-07, "loss": 0.1957, "step": 3633 }, { "epoch": 0.23, "grad_norm": 0.5187919697079516, "learning_rate": 8.970378880826821e-07, "loss": 0.1054, "step": 3634 }, { "epoch": 0.23, "grad_norm": 1.0042025818428884, "learning_rate": 8.969751077863541e-07, "loss": 0.2455, "step": 3635 }, { "epoch": 0.23, "grad_norm": 3.1524725878424387, "learning_rate": 8.969123105542701e-07, "loss": 0.1395, "step": 3636 }, { "epoch": 0.23, "grad_norm": 0.5317960185564282, "learning_rate": 8.968494963891088e-07, "loss": 0.1611, "step": 3637 }, { "epoch": 0.23, "grad_norm": 0.2729045166459671, "learning_rate": 8.967866652935505e-07, "loss": 0.19, "step": 3638 }, { "epoch": 0.23, "grad_norm": 1.7719217778106593, "learning_rate": 8.967238172702752e-07, "loss": 0.2054, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.0165043542325558, "learning_rate": 8.966609523219644e-07, "loss": 0.1063, "step": 3640 }, { "epoch": 0.23, "grad_norm": 0.9964120352545338, "learning_rate": 8.965980704513001e-07, "loss": 0.4258, "step": 3641 }, { "epoch": 0.23, "grad_norm": 0.5789623426836484, "learning_rate": 8.965351716609646e-07, "loss": 0.2288, "step": 3642 }, { "epoch": 0.23, "grad_norm": 0.4845448513474563, "learning_rate": 8.964722559536417e-07, "loss": 0.1012, "step": 3643 }, { "epoch": 0.23, "grad_norm": 0.6260612248673366, "learning_rate": 8.964093233320154e-07, "loss": 0.1403, "step": 3644 }, { "epoch": 0.23, "grad_norm": 0.7093553891900518, "learning_rate": 8.963463737987705e-07, "loss": 0.1666, "step": 3645 }, { "epoch": 0.23, "grad_norm": 0.9548331172676473, "learning_rate": 8.962834073565923e-07, "loss": 0.1197, "step": 3646 }, { "epoch": 0.23, "grad_norm": 0.6320170371849005, "learning_rate": 8.962204240081675e-07, "loss": 0.2755, "step": 3647 }, { "epoch": 0.23, "grad_norm": 0.8136771418377878, "learning_rate": 8.961574237561829e-07, "loss": 0.178, "step": 3648 }, { "epoch": 0.23, "grad_norm": 0.6243822576557541, "learning_rate": 8.960944066033262e-07, "loss": 0.336, "step": 3649 }, { "epoch": 0.23, "grad_norm": 0.5325262107975081, "learning_rate": 8.960313725522859e-07, "loss": 0.131, "step": 3650 }, { "epoch": 0.23, "grad_norm": 0.18395353413330576, "learning_rate": 8.959683216057511e-07, "loss": 0.1292, "step": 3651 }, { "epoch": 0.23, "grad_norm": 0.9914847779468045, "learning_rate": 8.959052537664117e-07, "loss": 0.1831, "step": 3652 }, { "epoch": 0.23, "grad_norm": 0.38738104396809164, "learning_rate": 8.958421690369583e-07, "loss": 0.1754, "step": 3653 }, { "epoch": 0.23, "grad_norm": 0.836668169740809, "learning_rate": 8.957790674200822e-07, "loss": 0.2014, "step": 3654 }, { "epoch": 0.23, "grad_norm": 0.6888123401638223, "learning_rate": 8.957159489184756e-07, "loss": 0.2075, "step": 3655 }, { "epoch": 0.23, "grad_norm": 0.40301539098331646, "learning_rate": 8.956528135348309e-07, "loss": 0.049, "step": 3656 }, { "epoch": 0.23, "grad_norm": 0.1883763894185228, "learning_rate": 8.955896612718419e-07, "loss": 0.0882, "step": 3657 }, { "epoch": 0.23, "grad_norm": 0.41368478323780855, "learning_rate": 8.955264921322028e-07, "loss": 0.273, "step": 3658 }, { "epoch": 0.23, "grad_norm": 0.8492349494450537, "learning_rate": 8.954633061186085e-07, "loss": 0.1135, "step": 3659 }, { "epoch": 0.23, "grad_norm": 0.9084133800749307, "learning_rate": 8.954001032337544e-07, "loss": 0.237, "step": 3660 }, { "epoch": 0.23, "grad_norm": 0.9855625340864884, "learning_rate": 8.953368834803371e-07, "loss": 0.2454, "step": 3661 }, { "epoch": 0.23, "grad_norm": 0.5264867254320804, "learning_rate": 8.952736468610537e-07, "loss": 0.1048, "step": 3662 }, { "epoch": 0.23, "grad_norm": 0.3548027213828942, "learning_rate": 8.952103933786018e-07, "loss": 0.1395, "step": 3663 }, { "epoch": 0.23, "grad_norm": 0.20752820812496128, "learning_rate": 8.951471230356802e-07, "loss": 0.0762, "step": 3664 }, { "epoch": 0.23, "grad_norm": 0.2104146494183132, "learning_rate": 8.950838358349879e-07, "loss": 0.0713, "step": 3665 }, { "epoch": 0.23, "grad_norm": 0.5097759774422576, "learning_rate": 8.950205317792248e-07, "loss": 0.149, "step": 3666 }, { "epoch": 0.23, "grad_norm": 0.8002322322693497, "learning_rate": 8.949572108710919e-07, "loss": 0.1094, "step": 3667 }, { "epoch": 0.23, "grad_norm": 0.7567734818145712, "learning_rate": 8.948938731132905e-07, "loss": 0.1855, "step": 3668 }, { "epoch": 0.23, "grad_norm": 0.5485931173642535, "learning_rate": 8.948305185085224e-07, "loss": 0.1999, "step": 3669 }, { "epoch": 0.23, "grad_norm": 0.7758814921664122, "learning_rate": 8.947671470594909e-07, "loss": 0.253, "step": 3670 }, { "epoch": 0.23, "grad_norm": 1.548740515899794, "learning_rate": 8.947037587688991e-07, "loss": 0.1348, "step": 3671 }, { "epoch": 0.23, "grad_norm": 0.8562468637051345, "learning_rate": 8.946403536394517e-07, "loss": 0.2659, "step": 3672 }, { "epoch": 0.23, "grad_norm": 0.5972999892965324, "learning_rate": 8.945769316738534e-07, "loss": 0.3071, "step": 3673 }, { "epoch": 0.23, "grad_norm": 0.7637563834327782, "learning_rate": 8.945134928748099e-07, "loss": 0.2423, "step": 3674 }, { "epoch": 0.23, "grad_norm": 1.724880431718904, "learning_rate": 8.944500372450279e-07, "loss": 0.0644, "step": 3675 }, { "epoch": 0.23, "grad_norm": 0.29369274739420687, "learning_rate": 8.943865647872142e-07, "loss": 0.0833, "step": 3676 }, { "epoch": 0.23, "grad_norm": 0.5174022107305446, "learning_rate": 8.943230755040769e-07, "loss": 0.0989, "step": 3677 }, { "epoch": 0.23, "grad_norm": 0.3386383112516127, "learning_rate": 8.942595693983246e-07, "loss": 0.1398, "step": 3678 }, { "epoch": 0.23, "grad_norm": 1.2894587813608773, "learning_rate": 8.941960464726664e-07, "loss": 0.2248, "step": 3679 }, { "epoch": 0.23, "grad_norm": 0.9263771756047618, "learning_rate": 8.941325067298125e-07, "loss": 0.1979, "step": 3680 }, { "epoch": 0.23, "grad_norm": 0.8364227497770703, "learning_rate": 8.940689501724736e-07, "loss": 0.2238, "step": 3681 }, { "epoch": 0.23, "grad_norm": 0.6979653931635937, "learning_rate": 8.940053768033608e-07, "loss": 0.1571, "step": 3682 }, { "epoch": 0.23, "grad_norm": 0.11305024212531636, "learning_rate": 8.93941786625187e-07, "loss": 0.0059, "step": 3683 }, { "epoch": 0.23, "grad_norm": 0.5546405488659987, "learning_rate": 8.938781796406645e-07, "loss": 0.4754, "step": 3684 }, { "epoch": 0.23, "grad_norm": 0.8013374007740004, "learning_rate": 8.93814555852507e-07, "loss": 0.223, "step": 3685 }, { "epoch": 0.24, "grad_norm": 0.36380398795993935, "learning_rate": 8.937509152634288e-07, "loss": 0.1786, "step": 3686 }, { "epoch": 0.24, "grad_norm": 0.30294934011358565, "learning_rate": 8.936872578761452e-07, "loss": 0.0062, "step": 3687 }, { "epoch": 0.24, "grad_norm": 1.113872932046635, "learning_rate": 8.936235836933716e-07, "loss": 0.1251, "step": 3688 }, { "epoch": 0.24, "grad_norm": 0.927337177241909, "learning_rate": 8.935598927178247e-07, "loss": 0.198, "step": 3689 }, { "epoch": 0.24, "grad_norm": 0.9345968878431729, "learning_rate": 8.934961849522218e-07, "loss": 0.1778, "step": 3690 }, { "epoch": 0.24, "grad_norm": 0.7948996619172269, "learning_rate": 8.934324603992803e-07, "loss": 0.0913, "step": 3691 }, { "epoch": 0.24, "grad_norm": 0.8982354655223829, "learning_rate": 8.933687190617194e-07, "loss": 0.1007, "step": 3692 }, { "epoch": 0.24, "grad_norm": 0.72415753707952, "learning_rate": 8.933049609422581e-07, "loss": 0.3315, "step": 3693 }, { "epoch": 0.24, "grad_norm": 0.506559311550516, "learning_rate": 8.932411860436165e-07, "loss": 0.1811, "step": 3694 }, { "epoch": 0.24, "grad_norm": 0.5399864366168942, "learning_rate": 8.931773943685155e-07, "loss": 0.0679, "step": 3695 }, { "epoch": 0.24, "grad_norm": 0.5290569932581717, "learning_rate": 8.931135859196762e-07, "loss": 0.2001, "step": 3696 }, { "epoch": 0.24, "grad_norm": 0.683327435889844, "learning_rate": 8.930497606998213e-07, "loss": 0.2632, "step": 3697 }, { "epoch": 0.24, "grad_norm": 0.5840598372568698, "learning_rate": 8.929859187116734e-07, "loss": 0.1184, "step": 3698 }, { "epoch": 0.24, "grad_norm": 0.5773771090751436, "learning_rate": 8.929220599579562e-07, "loss": 0.1974, "step": 3699 }, { "epoch": 0.24, "grad_norm": 2.7974491427480053, "learning_rate": 8.92858184441394e-07, "loss": 0.1526, "step": 3700 }, { "epoch": 0.24, "grad_norm": 0.87679290499142, "learning_rate": 8.92794292164712e-07, "loss": 0.1365, "step": 3701 }, { "epoch": 0.24, "grad_norm": 0.44484177771603206, "learning_rate": 8.927303831306358e-07, "loss": 0.0774, "step": 3702 }, { "epoch": 0.24, "grad_norm": 0.819815447823791, "learning_rate": 8.926664573418922e-07, "loss": 0.177, "step": 3703 }, { "epoch": 0.24, "grad_norm": 0.9231950718037863, "learning_rate": 8.92602514801208e-07, "loss": 0.2868, "step": 3704 }, { "epoch": 0.24, "grad_norm": 0.20792229694152603, "learning_rate": 8.925385555113111e-07, "loss": 0.0374, "step": 3705 }, { "epoch": 0.24, "grad_norm": 0.6290562676820869, "learning_rate": 8.924745794749307e-07, "loss": 0.1801, "step": 3706 }, { "epoch": 0.24, "grad_norm": 1.8703829044456655, "learning_rate": 8.924105866947955e-07, "loss": 0.1889, "step": 3707 }, { "epoch": 0.24, "grad_norm": 0.38764591570315005, "learning_rate": 8.923465771736359e-07, "loss": 0.3859, "step": 3708 }, { "epoch": 0.24, "grad_norm": 0.7098724276435888, "learning_rate": 8.922825509141827e-07, "loss": 0.2385, "step": 3709 }, { "epoch": 0.24, "grad_norm": 0.38401416793688486, "learning_rate": 8.922185079191671e-07, "loss": 0.0083, "step": 3710 }, { "epoch": 0.24, "grad_norm": 1.0987334480298931, "learning_rate": 8.921544481913217e-07, "loss": 0.3234, "step": 3711 }, { "epoch": 0.24, "grad_norm": 0.5960848589499238, "learning_rate": 8.920903717333789e-07, "loss": 0.1536, "step": 3712 }, { "epoch": 0.24, "grad_norm": 0.7386602970647814, "learning_rate": 8.92026278548073e-07, "loss": 0.2815, "step": 3713 }, { "epoch": 0.24, "grad_norm": 0.31023055252201687, "learning_rate": 8.919621686381378e-07, "loss": 0.0413, "step": 3714 }, { "epoch": 0.24, "grad_norm": 1.0399407344066338, "learning_rate": 8.918980420063086e-07, "loss": 0.1246, "step": 3715 }, { "epoch": 0.24, "grad_norm": 0.2737335065949902, "learning_rate": 8.918338986553211e-07, "loss": 0.0326, "step": 3716 }, { "epoch": 0.24, "grad_norm": 1.296067112610091, "learning_rate": 8.917697385879117e-07, "loss": 0.3811, "step": 3717 }, { "epoch": 0.24, "grad_norm": 0.794927169010393, "learning_rate": 8.917055618068178e-07, "loss": 0.2224, "step": 3718 }, { "epoch": 0.24, "grad_norm": 0.4387865933496512, "learning_rate": 8.916413683147772e-07, "loss": 0.1924, "step": 3719 }, { "epoch": 0.24, "grad_norm": 0.5285970623958716, "learning_rate": 8.915771581145285e-07, "loss": 0.1733, "step": 3720 }, { "epoch": 0.24, "grad_norm": 0.6351532285632555, "learning_rate": 8.915129312088112e-07, "loss": 0.0195, "step": 3721 }, { "epoch": 0.24, "grad_norm": 0.42340552146390065, "learning_rate": 8.914486876003649e-07, "loss": 0.1661, "step": 3722 }, { "epoch": 0.24, "grad_norm": 0.8304222957107547, "learning_rate": 8.913844272919309e-07, "loss": 0.3068, "step": 3723 }, { "epoch": 0.24, "grad_norm": 0.5835541018828425, "learning_rate": 8.913201502862504e-07, "loss": 0.1241, "step": 3724 }, { "epoch": 0.24, "grad_norm": 1.0814322182376175, "learning_rate": 8.912558565860657e-07, "loss": 0.1055, "step": 3725 }, { "epoch": 0.24, "grad_norm": 0.5133578413313381, "learning_rate": 8.911915461941196e-07, "loss": 0.2432, "step": 3726 }, { "epoch": 0.24, "grad_norm": 0.7605543377735867, "learning_rate": 8.911272191131559e-07, "loss": 0.016, "step": 3727 }, { "epoch": 0.24, "grad_norm": 0.6770768411894138, "learning_rate": 8.910628753459184e-07, "loss": 0.1394, "step": 3728 }, { "epoch": 0.24, "grad_norm": 0.40152719956764704, "learning_rate": 8.909985148951528e-07, "loss": 0.0609, "step": 3729 }, { "epoch": 0.24, "grad_norm": 0.7574561410672707, "learning_rate": 8.909341377636044e-07, "loss": 0.0764, "step": 3730 }, { "epoch": 0.24, "grad_norm": 0.7746749103878682, "learning_rate": 8.908697439540198e-07, "loss": 0.0302, "step": 3731 }, { "epoch": 0.24, "grad_norm": 0.6624940135187026, "learning_rate": 8.908053334691463e-07, "loss": 0.0921, "step": 3732 }, { "epoch": 0.24, "grad_norm": 0.328521198911137, "learning_rate": 8.907409063117317e-07, "loss": 0.2881, "step": 3733 }, { "epoch": 0.24, "grad_norm": 0.9408310520518564, "learning_rate": 8.906764624845244e-07, "loss": 0.0253, "step": 3734 }, { "epoch": 0.24, "grad_norm": 1.829655235082486, "learning_rate": 8.906120019902739e-07, "loss": 0.0305, "step": 3735 }, { "epoch": 0.24, "grad_norm": 0.5149580058749262, "learning_rate": 8.905475248317302e-07, "loss": 0.3189, "step": 3736 }, { "epoch": 0.24, "grad_norm": 0.16660177430372192, "learning_rate": 8.904830310116439e-07, "loss": 0.0031, "step": 3737 }, { "epoch": 0.24, "grad_norm": 0.3968282733334314, "learning_rate": 8.904185205327666e-07, "loss": 0.1126, "step": 3738 }, { "epoch": 0.24, "grad_norm": 0.4412258546287834, "learning_rate": 8.903539933978504e-07, "loss": 0.2691, "step": 3739 }, { "epoch": 0.24, "grad_norm": 0.4155714819587609, "learning_rate": 8.902894496096481e-07, "loss": 0.1032, "step": 3740 }, { "epoch": 0.24, "grad_norm": 0.6939420353806862, "learning_rate": 8.902248891709132e-07, "loss": 0.1945, "step": 3741 }, { "epoch": 0.24, "grad_norm": 0.31966914646427064, "learning_rate": 8.901603120844003e-07, "loss": 0.108, "step": 3742 }, { "epoch": 0.24, "grad_norm": 0.7902264693969758, "learning_rate": 8.900957183528639e-07, "loss": 0.3815, "step": 3743 }, { "epoch": 0.24, "grad_norm": 1.0140197309363976, "learning_rate": 8.900311079790601e-07, "loss": 0.1647, "step": 3744 }, { "epoch": 0.24, "grad_norm": 0.3103666139597173, "learning_rate": 8.899664809657453e-07, "loss": 0.0161, "step": 3745 }, { "epoch": 0.24, "grad_norm": 0.9515611665852027, "learning_rate": 8.899018373156763e-07, "loss": 0.1369, "step": 3746 }, { "epoch": 0.24, "grad_norm": 0.5987886609029958, "learning_rate": 8.898371770316111e-07, "loss": 0.2235, "step": 3747 }, { "epoch": 0.24, "grad_norm": 0.6756562521449005, "learning_rate": 8.897725001163083e-07, "loss": 0.058, "step": 3748 }, { "epoch": 0.24, "grad_norm": 0.31828381595214017, "learning_rate": 8.897078065725272e-07, "loss": 0.2259, "step": 3749 }, { "epoch": 0.24, "grad_norm": 0.7350619858446041, "learning_rate": 8.896430964030277e-07, "loss": 0.0763, "step": 3750 }, { "epoch": 0.24, "grad_norm": 1.3275744319752052, "learning_rate": 8.895783696105703e-07, "loss": 0.0234, "step": 3751 }, { "epoch": 0.24, "grad_norm": 0.9536052494238427, "learning_rate": 8.895136261979166e-07, "loss": 0.0572, "step": 3752 }, { "epoch": 0.24, "grad_norm": 0.6724851710180773, "learning_rate": 8.894488661678285e-07, "loss": 0.0616, "step": 3753 }, { "epoch": 0.24, "grad_norm": 0.2867622074934325, "learning_rate": 8.893840895230689e-07, "loss": 0.0287, "step": 3754 }, { "epoch": 0.24, "grad_norm": 1.579933443117816, "learning_rate": 8.893192962664012e-07, "loss": 0.0795, "step": 3755 }, { "epoch": 0.24, "grad_norm": 0.7035797738352091, "learning_rate": 8.892544864005898e-07, "loss": 0.035, "step": 3756 }, { "epoch": 0.24, "grad_norm": 0.42956073102948233, "learning_rate": 8.891896599283994e-07, "loss": 0.3451, "step": 3757 }, { "epoch": 0.24, "grad_norm": 0.8172843223769409, "learning_rate": 8.891248168525957e-07, "loss": 0.1576, "step": 3758 }, { "epoch": 0.24, "grad_norm": 0.8120322099943349, "learning_rate": 8.890599571759454e-07, "loss": 0.4472, "step": 3759 }, { "epoch": 0.24, "grad_norm": 1.6618354528280581, "learning_rate": 8.88995080901215e-07, "loss": 0.1247, "step": 3760 }, { "epoch": 0.24, "grad_norm": 0.9775245784699352, "learning_rate": 8.889301880311724e-07, "loss": 0.1023, "step": 3761 }, { "epoch": 0.24, "grad_norm": 0.555236487849536, "learning_rate": 8.888652785685861e-07, "loss": 0.1841, "step": 3762 }, { "epoch": 0.24, "grad_norm": 0.3382029675723734, "learning_rate": 8.888003525162256e-07, "loss": 0.1968, "step": 3763 }, { "epoch": 0.24, "grad_norm": 0.8481347461852061, "learning_rate": 8.887354098768602e-07, "loss": 0.0487, "step": 3764 }, { "epoch": 0.24, "grad_norm": 0.8186472691297958, "learning_rate": 8.886704506532609e-07, "loss": 0.1845, "step": 3765 }, { "epoch": 0.24, "grad_norm": 0.5869573020823922, "learning_rate": 8.886054748481988e-07, "loss": 0.1403, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.4056982892023109, "learning_rate": 8.885404824644459e-07, "loss": 0.1476, "step": 3767 }, { "epoch": 0.24, "grad_norm": 0.8142308314391018, "learning_rate": 8.88475473504775e-07, "loss": 0.2135, "step": 3768 }, { "epoch": 0.24, "grad_norm": 0.7653636598760375, "learning_rate": 8.884104479719594e-07, "loss": 0.1933, "step": 3769 }, { "epoch": 0.24, "grad_norm": 0.6010367539071723, "learning_rate": 8.883454058687734e-07, "loss": 0.1709, "step": 3770 }, { "epoch": 0.24, "grad_norm": 0.8750845813632222, "learning_rate": 8.882803471979916e-07, "loss": 0.2474, "step": 3771 }, { "epoch": 0.24, "grad_norm": 0.9290887108245592, "learning_rate": 8.882152719623898e-07, "loss": 0.1797, "step": 3772 }, { "epoch": 0.24, "grad_norm": 1.1949534146437264, "learning_rate": 8.881501801647439e-07, "loss": 0.0995, "step": 3773 }, { "epoch": 0.24, "grad_norm": 0.6126499073367744, "learning_rate": 8.880850718078312e-07, "loss": 0.3397, "step": 3774 }, { "epoch": 0.24, "grad_norm": 0.892068876882014, "learning_rate": 8.880199468944291e-07, "loss": 0.1426, "step": 3775 }, { "epoch": 0.24, "grad_norm": 0.6419376933464616, "learning_rate": 8.87954805427316e-07, "loss": 0.2155, "step": 3776 }, { "epoch": 0.24, "grad_norm": 0.20500359466811582, "learning_rate": 8.878896474092712e-07, "loss": 0.0904, "step": 3777 }, { "epoch": 0.24, "grad_norm": 0.5216916846480123, "learning_rate": 8.878244728430742e-07, "loss": 0.1431, "step": 3778 }, { "epoch": 0.24, "grad_norm": 2.3122812443037244, "learning_rate": 8.877592817315054e-07, "loss": 0.1966, "step": 3779 }, { "epoch": 0.24, "grad_norm": 0.7561955810362375, "learning_rate": 8.876940740773463e-07, "loss": 0.4138, "step": 3780 }, { "epoch": 0.24, "grad_norm": 1.1604726993973071, "learning_rate": 8.876288498833786e-07, "loss": 0.3771, "step": 3781 }, { "epoch": 0.24, "grad_norm": 0.7917105456265333, "learning_rate": 8.87563609152385e-07, "loss": 0.0606, "step": 3782 }, { "epoch": 0.24, "grad_norm": 1.3562561210111628, "learning_rate": 8.874983518871486e-07, "loss": 0.2973, "step": 3783 }, { "epoch": 0.24, "grad_norm": 0.5272699290899061, "learning_rate": 8.874330780904537e-07, "loss": 0.1819, "step": 3784 }, { "epoch": 0.24, "grad_norm": 0.5617773042352198, "learning_rate": 8.873677877650847e-07, "loss": 0.2149, "step": 3785 }, { "epoch": 0.24, "grad_norm": 0.7927284887821636, "learning_rate": 8.873024809138272e-07, "loss": 0.2816, "step": 3786 }, { "epoch": 0.24, "grad_norm": 1.127433478609849, "learning_rate": 8.872371575394674e-07, "loss": 0.3469, "step": 3787 }, { "epoch": 0.24, "grad_norm": 0.8124809502485485, "learning_rate": 8.87171817644792e-07, "loss": 0.1047, "step": 3788 }, { "epoch": 0.24, "grad_norm": 0.6961022059067764, "learning_rate": 8.871064612325885e-07, "loss": 0.2903, "step": 3789 }, { "epoch": 0.24, "grad_norm": 0.8602851144623159, "learning_rate": 8.870410883056451e-07, "loss": 0.026, "step": 3790 }, { "epoch": 0.24, "grad_norm": 0.9576690687775362, "learning_rate": 8.869756988667508e-07, "loss": 0.2327, "step": 3791 }, { "epoch": 0.24, "grad_norm": 0.38831604542025516, "learning_rate": 8.869102929186953e-07, "loss": 0.1492, "step": 3792 }, { "epoch": 0.24, "grad_norm": 1.4334388429938352, "learning_rate": 8.868448704642691e-07, "loss": 0.267, "step": 3793 }, { "epoch": 0.24, "grad_norm": 1.055976164987003, "learning_rate": 8.867794315062629e-07, "loss": 0.3208, "step": 3794 }, { "epoch": 0.24, "grad_norm": 0.5403932677561274, "learning_rate": 8.867139760474687e-07, "loss": 0.1477, "step": 3795 }, { "epoch": 0.24, "grad_norm": 0.21410656598523806, "learning_rate": 8.866485040906788e-07, "loss": 0.0974, "step": 3796 }, { "epoch": 0.24, "grad_norm": 0.9468380997808159, "learning_rate": 8.865830156386866e-07, "loss": 0.2684, "step": 3797 }, { "epoch": 0.24, "grad_norm": 0.9282333660947354, "learning_rate": 8.865175106942857e-07, "loss": 0.3556, "step": 3798 }, { "epoch": 0.24, "grad_norm": 0.33742281295306603, "learning_rate": 8.864519892602709e-07, "loss": 0.0425, "step": 3799 }, { "epoch": 0.24, "grad_norm": 1.2564181620645343, "learning_rate": 8.863864513394372e-07, "loss": 0.227, "step": 3800 }, { "epoch": 0.24, "grad_norm": 0.023836021967167408, "learning_rate": 8.863208969345809e-07, "loss": 0.0004, "step": 3801 }, { "epoch": 0.24, "grad_norm": 1.4932632983783014, "learning_rate": 8.862553260484984e-07, "loss": 0.0972, "step": 3802 }, { "epoch": 0.24, "grad_norm": 0.8612358583508158, "learning_rate": 8.861897386839874e-07, "loss": 0.2424, "step": 3803 }, { "epoch": 0.24, "grad_norm": 0.8281799329071436, "learning_rate": 8.861241348438457e-07, "loss": 0.2149, "step": 3804 }, { "epoch": 0.24, "grad_norm": 0.3504376151949932, "learning_rate": 8.860585145308722e-07, "loss": 0.0925, "step": 3805 }, { "epoch": 0.24, "grad_norm": 0.7966390297010175, "learning_rate": 8.859928777478664e-07, "loss": 0.2819, "step": 3806 }, { "epoch": 0.24, "grad_norm": 0.8976778103953185, "learning_rate": 8.859272244976286e-07, "loss": 0.0704, "step": 3807 }, { "epoch": 0.24, "grad_norm": 0.6317080723498039, "learning_rate": 8.858615547829594e-07, "loss": 0.1435, "step": 3808 }, { "epoch": 0.24, "grad_norm": 1.633199222329602, "learning_rate": 8.857958686066607e-07, "loss": 0.2054, "step": 3809 }, { "epoch": 0.24, "grad_norm": 3.2357002728651416, "learning_rate": 8.857301659715347e-07, "loss": 0.1984, "step": 3810 }, { "epoch": 0.24, "grad_norm": 1.4089302300435955, "learning_rate": 8.856644468803845e-07, "loss": 0.3983, "step": 3811 }, { "epoch": 0.24, "grad_norm": 1.4135613464162196, "learning_rate": 8.855987113360134e-07, "loss": 0.176, "step": 3812 }, { "epoch": 0.24, "grad_norm": 0.37719396902233837, "learning_rate": 8.855329593412264e-07, "loss": 0.0489, "step": 3813 }, { "epoch": 0.24, "grad_norm": 1.8875365007259433, "learning_rate": 8.854671908988283e-07, "loss": 0.1269, "step": 3814 }, { "epoch": 0.24, "grad_norm": 1.0181689889722372, "learning_rate": 8.854014060116249e-07, "loss": 0.2733, "step": 3815 }, { "epoch": 0.24, "grad_norm": 0.5273144421567434, "learning_rate": 8.853356046824228e-07, "loss": 0.1581, "step": 3816 }, { "epoch": 0.24, "grad_norm": 0.9671347036238022, "learning_rate": 8.852697869140292e-07, "loss": 0.1124, "step": 3817 }, { "epoch": 0.24, "grad_norm": 0.7650637101924961, "learning_rate": 8.85203952709252e-07, "loss": 0.2541, "step": 3818 }, { "epoch": 0.24, "grad_norm": 0.4237115447194473, "learning_rate": 8.851381020708998e-07, "loss": 0.0422, "step": 3819 }, { "epoch": 0.24, "grad_norm": 0.6233864538524351, "learning_rate": 8.850722350017818e-07, "loss": 0.0644, "step": 3820 }, { "epoch": 0.24, "grad_norm": 0.428489971635069, "learning_rate": 8.850063515047083e-07, "loss": 0.183, "step": 3821 }, { "epoch": 0.24, "grad_norm": 0.8953283052132391, "learning_rate": 8.8494045158249e-07, "loss": 0.3399, "step": 3822 }, { "epoch": 0.24, "grad_norm": 0.6535801662496468, "learning_rate": 8.848745352379381e-07, "loss": 0.1782, "step": 3823 }, { "epoch": 0.24, "grad_norm": 0.49997927614318116, "learning_rate": 8.848086024738648e-07, "loss": 0.1637, "step": 3824 }, { "epoch": 0.24, "grad_norm": 0.6410418209206888, "learning_rate": 8.847426532930829e-07, "loss": 0.1569, "step": 3825 }, { "epoch": 0.24, "grad_norm": 0.9459033036201963, "learning_rate": 8.846766876984061e-07, "loss": 0.195, "step": 3826 }, { "epoch": 0.24, "grad_norm": 0.6702740291675603, "learning_rate": 8.846107056926484e-07, "loss": 0.4062, "step": 3827 }, { "epoch": 0.24, "grad_norm": 0.4524134624553657, "learning_rate": 8.84544707278625e-07, "loss": 0.1911, "step": 3828 }, { "epoch": 0.24, "grad_norm": 0.9767117423338574, "learning_rate": 8.844786924591512e-07, "loss": 0.0574, "step": 3829 }, { "epoch": 0.24, "grad_norm": 0.881510543980618, "learning_rate": 8.844126612370435e-07, "loss": 0.0707, "step": 3830 }, { "epoch": 0.24, "grad_norm": 0.7627701634454506, "learning_rate": 8.84346613615119e-07, "loss": 0.057, "step": 3831 }, { "epoch": 0.24, "grad_norm": 0.2173122088258248, "learning_rate": 8.842805495961952e-07, "loss": 0.002, "step": 3832 }, { "epoch": 0.24, "grad_norm": 0.580379117277505, "learning_rate": 8.842144691830906e-07, "loss": 0.1342, "step": 3833 }, { "epoch": 0.24, "grad_norm": 1.120180075118133, "learning_rate": 8.841483723786246e-07, "loss": 0.2354, "step": 3834 }, { "epoch": 0.24, "grad_norm": 0.4914165517828563, "learning_rate": 8.840822591856167e-07, "loss": 0.4501, "step": 3835 }, { "epoch": 0.24, "grad_norm": 1.0371190255382727, "learning_rate": 8.840161296068876e-07, "loss": 0.2488, "step": 3836 }, { "epoch": 0.24, "grad_norm": 0.5973299600775365, "learning_rate": 8.839499836452582e-07, "loss": 0.3872, "step": 3837 }, { "epoch": 0.24, "grad_norm": 0.395174487770906, "learning_rate": 8.838838213035509e-07, "loss": 0.1156, "step": 3838 }, { "epoch": 0.24, "grad_norm": 3.356218367510238, "learning_rate": 8.83817642584588e-07, "loss": 0.1328, "step": 3839 }, { "epoch": 0.24, "grad_norm": 1.0232113436227914, "learning_rate": 8.837514474911929e-07, "loss": 0.0872, "step": 3840 }, { "epoch": 0.24, "grad_norm": 0.6967371947473936, "learning_rate": 8.836852360261895e-07, "loss": 0.1095, "step": 3841 }, { "epoch": 0.25, "grad_norm": 2.6899814270235796, "learning_rate": 8.836190081924027e-07, "loss": 0.0583, "step": 3842 }, { "epoch": 0.25, "grad_norm": 0.47253467787743975, "learning_rate": 8.835527639926579e-07, "loss": 0.1843, "step": 3843 }, { "epoch": 0.25, "grad_norm": 0.6822384251805668, "learning_rate": 8.834865034297812e-07, "loss": 0.2154, "step": 3844 }, { "epoch": 0.25, "grad_norm": 0.5155496044580115, "learning_rate": 8.834202265065993e-07, "loss": 0.0798, "step": 3845 }, { "epoch": 0.25, "grad_norm": 0.6233805862568065, "learning_rate": 8.833539332259396e-07, "loss": 0.2562, "step": 3846 }, { "epoch": 0.25, "grad_norm": 0.745255601651051, "learning_rate": 8.832876235906306e-07, "loss": 0.1589, "step": 3847 }, { "epoch": 0.25, "grad_norm": 0.18299751295070765, "learning_rate": 8.832212976035012e-07, "loss": 0.0722, "step": 3848 }, { "epoch": 0.25, "grad_norm": 0.37131939273704917, "learning_rate": 8.831549552673807e-07, "loss": 0.0921, "step": 3849 }, { "epoch": 0.25, "grad_norm": 1.0402267070631617, "learning_rate": 8.830885965850998e-07, "loss": 0.1206, "step": 3850 }, { "epoch": 0.25, "grad_norm": 0.8733116472389192, "learning_rate": 8.83022221559489e-07, "loss": 0.3168, "step": 3851 }, { "epoch": 0.25, "grad_norm": 2.158788411492282, "learning_rate": 8.829558301933804e-07, "loss": 0.1649, "step": 3852 }, { "epoch": 0.25, "grad_norm": 0.7594693834484741, "learning_rate": 8.828894224896062e-07, "loss": 0.3117, "step": 3853 }, { "epoch": 0.25, "grad_norm": 1.00607259640321, "learning_rate": 8.828229984509996e-07, "loss": 0.3921, "step": 3854 }, { "epoch": 0.25, "grad_norm": 1.3712145253287933, "learning_rate": 8.827565580803941e-07, "loss": 0.1097, "step": 3855 }, { "epoch": 0.25, "grad_norm": 1.5518431096903318, "learning_rate": 8.826901013806245e-07, "loss": 0.0852, "step": 3856 }, { "epoch": 0.25, "grad_norm": 7.260101596858923, "learning_rate": 8.826236283545259e-07, "loss": 0.2245, "step": 3857 }, { "epoch": 0.25, "grad_norm": 0.9216122789470282, "learning_rate": 8.825571390049343e-07, "loss": 0.141, "step": 3858 }, { "epoch": 0.25, "grad_norm": 0.6574770388498139, "learning_rate": 8.824906333346858e-07, "loss": 0.1655, "step": 3859 }, { "epoch": 0.25, "grad_norm": 0.4896930052088437, "learning_rate": 8.824241113466182e-07, "loss": 0.129, "step": 3860 }, { "epoch": 0.25, "grad_norm": 1.045338539757439, "learning_rate": 8.823575730435693e-07, "loss": 0.1246, "step": 3861 }, { "epoch": 0.25, "grad_norm": 1.5777459068356137, "learning_rate": 8.822910184283776e-07, "loss": 0.2317, "step": 3862 }, { "epoch": 0.25, "grad_norm": 1.7493117669883436, "learning_rate": 8.822244475038825e-07, "loss": 0.2838, "step": 3863 }, { "epoch": 0.25, "grad_norm": 0.7158063307446045, "learning_rate": 8.821578602729241e-07, "loss": 0.0901, "step": 3864 }, { "epoch": 0.25, "grad_norm": 0.9323696081863153, "learning_rate": 8.820912567383432e-07, "loss": 0.4398, "step": 3865 }, { "epoch": 0.25, "grad_norm": 0.5992314213267559, "learning_rate": 8.820246369029812e-07, "loss": 0.106, "step": 3866 }, { "epoch": 0.25, "grad_norm": 0.5060150979246018, "learning_rate": 8.819580007696802e-07, "loss": 0.1455, "step": 3867 }, { "epoch": 0.25, "grad_norm": 1.1236957189997694, "learning_rate": 8.818913483412831e-07, "loss": 0.3267, "step": 3868 }, { "epoch": 0.25, "grad_norm": 0.7399667473326889, "learning_rate": 8.818246796206332e-07, "loss": 0.0643, "step": 3869 }, { "epoch": 0.25, "grad_norm": 0.5064265662293846, "learning_rate": 8.817579946105751e-07, "loss": 0.148, "step": 3870 }, { "epoch": 0.25, "grad_norm": 0.6474326731435445, "learning_rate": 8.816912933139535e-07, "loss": 0.2105, "step": 3871 }, { "epoch": 0.25, "grad_norm": 0.8501109047346274, "learning_rate": 8.81624575733614e-07, "loss": 0.3672, "step": 3872 }, { "epoch": 0.25, "grad_norm": 0.7323724092358175, "learning_rate": 8.81557841872403e-07, "loss": 0.317, "step": 3873 }, { "epoch": 0.25, "grad_norm": 1.2862806210495028, "learning_rate": 8.814910917331673e-07, "loss": 0.0497, "step": 3874 }, { "epoch": 0.25, "grad_norm": 0.3478016951604009, "learning_rate": 8.814243253187548e-07, "loss": 0.0718, "step": 3875 }, { "epoch": 0.25, "grad_norm": 0.38831128658614433, "learning_rate": 8.813575426320139e-07, "loss": 0.1864, "step": 3876 }, { "epoch": 0.25, "grad_norm": 1.9050264415368248, "learning_rate": 8.812907436757935e-07, "loss": 0.2218, "step": 3877 }, { "epoch": 0.25, "grad_norm": 0.9137963310217068, "learning_rate": 8.812239284529435e-07, "loss": 0.0833, "step": 3878 }, { "epoch": 0.25, "grad_norm": 2.2065487505746293, "learning_rate": 8.811570969663144e-07, "loss": 0.1434, "step": 3879 }, { "epoch": 0.25, "grad_norm": 0.5826485797134789, "learning_rate": 8.810902492187573e-07, "loss": 0.1305, "step": 3880 }, { "epoch": 0.25, "grad_norm": 0.5433145356257765, "learning_rate": 8.81023385213124e-07, "loss": 0.1828, "step": 3881 }, { "epoch": 0.25, "grad_norm": 0.8426871437234196, "learning_rate": 8.809565049522671e-07, "loss": 0.2539, "step": 3882 }, { "epoch": 0.25, "grad_norm": 0.6029357850617248, "learning_rate": 8.8088960843904e-07, "loss": 0.082, "step": 3883 }, { "epoch": 0.25, "grad_norm": 0.8689748220497369, "learning_rate": 8.808226956762967e-07, "loss": 0.3289, "step": 3884 }, { "epoch": 0.25, "grad_norm": 0.5100034663399725, "learning_rate": 8.807557666668914e-07, "loss": 0.3517, "step": 3885 }, { "epoch": 0.25, "grad_norm": 0.9071223821693056, "learning_rate": 8.806888214136798e-07, "loss": 0.1622, "step": 3886 }, { "epoch": 0.25, "grad_norm": 0.25651385324206816, "learning_rate": 8.806218599195177e-07, "loss": 0.1169, "step": 3887 }, { "epoch": 0.25, "grad_norm": 1.6205044009298561, "learning_rate": 8.805548821872619e-07, "loss": 0.3588, "step": 3888 }, { "epoch": 0.25, "grad_norm": 0.2051808882190857, "learning_rate": 8.8048788821977e-07, "loss": 0.0084, "step": 3889 }, { "epoch": 0.25, "grad_norm": 0.5313437251721552, "learning_rate": 8.804208780198997e-07, "loss": 0.156, "step": 3890 }, { "epoch": 0.25, "grad_norm": 0.5608721803888229, "learning_rate": 8.8035385159051e-07, "loss": 0.1737, "step": 3891 }, { "epoch": 0.25, "grad_norm": 2.114285425950021, "learning_rate": 8.802868089344605e-07, "loss": 0.0337, "step": 3892 }, { "epoch": 0.25, "grad_norm": 1.1108144278333296, "learning_rate": 8.802197500546112e-07, "loss": 0.2733, "step": 3893 }, { "epoch": 0.25, "grad_norm": 0.6756606510327932, "learning_rate": 8.801526749538231e-07, "loss": 0.0114, "step": 3894 }, { "epoch": 0.25, "grad_norm": 0.7827544871261352, "learning_rate": 8.800855836349577e-07, "loss": 0.1278, "step": 3895 }, { "epoch": 0.25, "grad_norm": 0.8810978403334839, "learning_rate": 8.800184761008771e-07, "loss": 0.2304, "step": 3896 }, { "epoch": 0.25, "grad_norm": 1.0625358915168617, "learning_rate": 8.799513523544444e-07, "loss": 0.0853, "step": 3897 }, { "epoch": 0.25, "grad_norm": 0.8936459555487971, "learning_rate": 8.798842123985233e-07, "loss": 0.1544, "step": 3898 }, { "epoch": 0.25, "grad_norm": 0.27459104503815157, "learning_rate": 8.798170562359779e-07, "loss": 0.1053, "step": 3899 }, { "epoch": 0.25, "grad_norm": 1.0829617560998321, "learning_rate": 8.797498838696735e-07, "loss": 0.3077, "step": 3900 }, { "epoch": 0.25, "grad_norm": 0.4402056171559821, "learning_rate": 8.796826953024756e-07, "loss": 0.1567, "step": 3901 }, { "epoch": 0.25, "grad_norm": 0.625065524314885, "learning_rate": 8.796154905372506e-07, "loss": 0.1401, "step": 3902 }, { "epoch": 0.25, "grad_norm": 0.09889685619163836, "learning_rate": 8.795482695768656e-07, "loss": 0.006, "step": 3903 }, { "epoch": 0.25, "grad_norm": 0.40167191233341654, "learning_rate": 8.794810324241886e-07, "loss": 0.1823, "step": 3904 }, { "epoch": 0.25, "grad_norm": 0.9797198670885936, "learning_rate": 8.794137790820879e-07, "loss": 0.0842, "step": 3905 }, { "epoch": 0.25, "grad_norm": 0.6342464441119157, "learning_rate": 8.793465095534327e-07, "loss": 0.0054, "step": 3906 }, { "epoch": 0.25, "grad_norm": 0.8674351748185777, "learning_rate": 8.792792238410926e-07, "loss": 0.2681, "step": 3907 }, { "epoch": 0.25, "grad_norm": 0.8127929286039683, "learning_rate": 8.792119219479386e-07, "loss": 0.3621, "step": 3908 }, { "epoch": 0.25, "grad_norm": 0.998942057555394, "learning_rate": 8.791446038768415e-07, "loss": 0.1985, "step": 3909 }, { "epoch": 0.25, "grad_norm": 0.733145838965004, "learning_rate": 8.790772696306736e-07, "loss": 0.0689, "step": 3910 }, { "epoch": 0.25, "grad_norm": 0.8314182062716361, "learning_rate": 8.790099192123073e-07, "loss": 0.3592, "step": 3911 }, { "epoch": 0.25, "grad_norm": 1.429362481326908, "learning_rate": 8.789425526246159e-07, "loss": 0.0961, "step": 3912 }, { "epoch": 0.25, "grad_norm": 0.5162858269850434, "learning_rate": 8.788751698704734e-07, "loss": 0.0957, "step": 3913 }, { "epoch": 0.25, "grad_norm": 0.778633990635694, "learning_rate": 8.788077709527546e-07, "loss": 0.1796, "step": 3914 }, { "epoch": 0.25, "grad_norm": 0.2358032788236904, "learning_rate": 8.787403558743347e-07, "loss": 0.0903, "step": 3915 }, { "epoch": 0.25, "grad_norm": 0.6581315004821515, "learning_rate": 8.786729246380899e-07, "loss": 0.006, "step": 3916 }, { "epoch": 0.25, "grad_norm": 0.9809002158680535, "learning_rate": 8.786054772468968e-07, "loss": 0.1373, "step": 3917 }, { "epoch": 0.25, "grad_norm": 1.0868837241043616, "learning_rate": 8.785380137036331e-07, "loss": 0.3455, "step": 3918 }, { "epoch": 0.25, "grad_norm": 0.33965775754026445, "learning_rate": 8.784705340111767e-07, "loss": 0.1726, "step": 3919 }, { "epoch": 0.25, "grad_norm": 0.3881610503078679, "learning_rate": 8.784030381724066e-07, "loss": 0.1477, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.2779562197772847, "learning_rate": 8.783355261902021e-07, "loss": 0.3554, "step": 3921 }, { "epoch": 0.25, "grad_norm": 0.46884013910490463, "learning_rate": 8.782679980674436e-07, "loss": 0.1424, "step": 3922 }, { "epoch": 0.25, "grad_norm": 0.4142878280564671, "learning_rate": 8.782004538070118e-07, "loss": 0.049, "step": 3923 }, { "epoch": 0.25, "grad_norm": 0.435630693591566, "learning_rate": 8.781328934117885e-07, "loss": 0.1453, "step": 3924 }, { "epoch": 0.25, "grad_norm": 0.7895980903877988, "learning_rate": 8.780653168846556e-07, "loss": 0.1049, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.078528448372595, "learning_rate": 8.779977242284964e-07, "loss": 0.1944, "step": 3926 }, { "epoch": 0.25, "grad_norm": 0.3513130449104446, "learning_rate": 8.779301154461945e-07, "loss": 0.0848, "step": 3927 }, { "epoch": 0.25, "grad_norm": 0.6341401280500651, "learning_rate": 8.778624905406339e-07, "loss": 0.0727, "step": 3928 }, { "epoch": 0.25, "grad_norm": 1.1356196801368217, "learning_rate": 8.777948495147e-07, "loss": 0.0199, "step": 3929 }, { "epoch": 0.25, "grad_norm": 1.036735641930418, "learning_rate": 8.777271923712783e-07, "loss": 0.1751, "step": 3930 }, { "epoch": 0.25, "grad_norm": 0.9435075619147448, "learning_rate": 8.776595191132553e-07, "loss": 0.3591, "step": 3931 }, { "epoch": 0.25, "grad_norm": 0.6932852345179985, "learning_rate": 8.775918297435181e-07, "loss": 0.2179, "step": 3932 }, { "epoch": 0.25, "grad_norm": 0.6888035519656425, "learning_rate": 8.775241242649543e-07, "loss": 0.0518, "step": 3933 }, { "epoch": 0.25, "grad_norm": 0.5971998529801377, "learning_rate": 8.774564026804525e-07, "loss": 0.231, "step": 3934 }, { "epoch": 0.25, "grad_norm": 1.1419428339338078, "learning_rate": 8.773886649929017e-07, "loss": 0.1401, "step": 3935 }, { "epoch": 0.25, "grad_norm": 0.627408617573541, "learning_rate": 8.773209112051918e-07, "loss": 0.1987, "step": 3936 }, { "epoch": 0.25, "grad_norm": 0.9453981933126301, "learning_rate": 8.772531413202133e-07, "loss": 0.2172, "step": 3937 }, { "epoch": 0.25, "grad_norm": 1.3180858625553908, "learning_rate": 8.771853553408575e-07, "loss": 0.1285, "step": 3938 }, { "epoch": 0.25, "grad_norm": 0.3865235113430008, "learning_rate": 8.771175532700162e-07, "loss": 0.158, "step": 3939 }, { "epoch": 0.25, "grad_norm": 0.9173031682855154, "learning_rate": 8.770497351105819e-07, "loss": 0.2518, "step": 3940 }, { "epoch": 0.25, "grad_norm": 0.6388942602289808, "learning_rate": 8.76981900865448e-07, "loss": 0.1793, "step": 3941 }, { "epoch": 0.25, "grad_norm": 0.5680970316244751, "learning_rate": 8.769140505375083e-07, "loss": 0.1978, "step": 3942 }, { "epoch": 0.25, "grad_norm": 0.9833493507997556, "learning_rate": 8.768461841296577e-07, "loss": 0.2289, "step": 3943 }, { "epoch": 0.25, "grad_norm": 0.8079163924290765, "learning_rate": 8.767783016447912e-07, "loss": 0.0793, "step": 3944 }, { "epoch": 0.25, "grad_norm": 0.5906476527055019, "learning_rate": 8.767104030858049e-07, "loss": 0.1805, "step": 3945 }, { "epoch": 0.25, "grad_norm": 0.6493009863255066, "learning_rate": 8.766424884555956e-07, "loss": 0.2852, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.8273038894585243, "learning_rate": 8.765745577570606e-07, "loss": 0.2025, "step": 3947 }, { "epoch": 0.25, "grad_norm": 0.44501179601301144, "learning_rate": 8.765066109930979e-07, "loss": 0.1043, "step": 3948 }, { "epoch": 0.25, "grad_norm": 0.5612978662180629, "learning_rate": 8.764386481666062e-07, "loss": 0.0897, "step": 3949 }, { "epoch": 0.25, "grad_norm": 0.6603623168005797, "learning_rate": 8.763706692804852e-07, "loss": 0.3171, "step": 3950 }, { "epoch": 0.25, "grad_norm": 1.1833506083656713, "learning_rate": 8.763026743376347e-07, "loss": 0.264, "step": 3951 }, { "epoch": 0.25, "grad_norm": 0.456151883137029, "learning_rate": 8.762346633409559e-07, "loss": 0.2565, "step": 3952 }, { "epoch": 0.25, "grad_norm": 0.9147030031313685, "learning_rate": 8.761666362933497e-07, "loss": 0.2614, "step": 3953 }, { "epoch": 0.25, "grad_norm": 0.6258556487063822, "learning_rate": 8.760985931977189e-07, "loss": 0.3442, "step": 3954 }, { "epoch": 0.25, "grad_norm": 0.5065709907473496, "learning_rate": 8.76030534056966e-07, "loss": 0.3571, "step": 3955 }, { "epoch": 0.25, "grad_norm": 0.7141061511668348, "learning_rate": 8.759624588739945e-07, "loss": 0.0737, "step": 3956 }, { "epoch": 0.25, "grad_norm": 0.8226379117225698, "learning_rate": 8.758943676517088e-07, "loss": 0.1746, "step": 3957 }, { "epoch": 0.25, "grad_norm": 0.564638146772833, "learning_rate": 8.758262603930137e-07, "loss": 0.1079, "step": 3958 }, { "epoch": 0.25, "grad_norm": 0.27651579401960985, "learning_rate": 8.757581371008148e-07, "loss": 0.1224, "step": 3959 }, { "epoch": 0.25, "grad_norm": 0.3603241503001796, "learning_rate": 8.756899977780185e-07, "loss": 0.1384, "step": 3960 }, { "epoch": 0.25, "grad_norm": 0.7128421237886637, "learning_rate": 8.756218424275315e-07, "loss": 0.3432, "step": 3961 }, { "epoch": 0.25, "grad_norm": 0.5427821502755281, "learning_rate": 8.755536710522617e-07, "loss": 0.109, "step": 3962 }, { "epoch": 0.25, "grad_norm": 0.8949546193401502, "learning_rate": 8.754854836551173e-07, "loss": 0.1083, "step": 3963 }, { "epoch": 0.25, "grad_norm": 1.7523697441707426, "learning_rate": 8.754172802390074e-07, "loss": 0.2896, "step": 3964 }, { "epoch": 0.25, "grad_norm": 0.470811108919015, "learning_rate": 8.753490608068416e-07, "loss": 0.204, "step": 3965 }, { "epoch": 0.25, "grad_norm": 0.859132997973097, "learning_rate": 8.752808253615305e-07, "loss": 0.1744, "step": 3966 }, { "epoch": 0.25, "grad_norm": 0.7903062271790547, "learning_rate": 8.752125739059848e-07, "loss": 0.0353, "step": 3967 }, { "epoch": 0.25, "grad_norm": 0.6433512424106262, "learning_rate": 8.751443064431164e-07, "loss": 0.3293, "step": 3968 }, { "epoch": 0.25, "grad_norm": 0.9272328463302985, "learning_rate": 8.750760229758378e-07, "loss": 0.0907, "step": 3969 }, { "epoch": 0.25, "grad_norm": 1.0245838538405079, "learning_rate": 8.750077235070621e-07, "loss": 0.3167, "step": 3970 }, { "epoch": 0.25, "grad_norm": 0.6949014063101808, "learning_rate": 8.74939408039703e-07, "loss": 0.3784, "step": 3971 }, { "epoch": 0.25, "grad_norm": 0.16563874919046356, "learning_rate": 8.748710765766752e-07, "loss": 0.0028, "step": 3972 }, { "epoch": 0.25, "grad_norm": 1.2013521468154433, "learning_rate": 8.748027291208934e-07, "loss": 0.0904, "step": 3973 }, { "epoch": 0.25, "grad_norm": 0.6037748857188424, "learning_rate": 8.747343656752739e-07, "loss": 0.4494, "step": 3974 }, { "epoch": 0.25, "grad_norm": 0.3484933915549889, "learning_rate": 8.74665986242733e-07, "loss": 0.3483, "step": 3975 }, { "epoch": 0.25, "grad_norm": 0.8444821672923177, "learning_rate": 8.74597590826188e-07, "loss": 0.1801, "step": 3976 }, { "epoch": 0.25, "grad_norm": 0.5820251833319969, "learning_rate": 8.745291794285568e-07, "loss": 0.1341, "step": 3977 }, { "epoch": 0.25, "grad_norm": 1.5458408601507707, "learning_rate": 8.744607520527577e-07, "loss": 0.1478, "step": 3978 }, { "epoch": 0.25, "grad_norm": 0.5041824124099171, "learning_rate": 8.743923087017102e-07, "loss": 0.2239, "step": 3979 }, { "epoch": 0.25, "grad_norm": 0.6601179844297378, "learning_rate": 8.743238493783343e-07, "loss": 0.3004, "step": 3980 }, { "epoch": 0.25, "grad_norm": 0.8275809964913583, "learning_rate": 8.742553740855505e-07, "loss": 0.2218, "step": 3981 }, { "epoch": 0.25, "grad_norm": 0.3065065175720807, "learning_rate": 8.741868828262802e-07, "loss": 0.145, "step": 3982 }, { "epoch": 0.25, "grad_norm": 0.8819701262901135, "learning_rate": 8.741183756034449e-07, "loss": 0.2202, "step": 3983 }, { "epoch": 0.25, "grad_norm": 1.0010176950752636, "learning_rate": 8.74049852419968e-07, "loss": 0.3801, "step": 3984 }, { "epoch": 0.25, "grad_norm": 0.5025045189329125, "learning_rate": 8.739813132787723e-07, "loss": 0.008, "step": 3985 }, { "epoch": 0.25, "grad_norm": 0.6235355600080691, "learning_rate": 8.73912758182782e-07, "loss": 0.1102, "step": 3986 }, { "epoch": 0.25, "grad_norm": 0.9359451485156095, "learning_rate": 8.738441871349219e-07, "loss": 0.1621, "step": 3987 }, { "epoch": 0.25, "grad_norm": 0.9931543106272482, "learning_rate": 8.737756001381169e-07, "loss": 0.1736, "step": 3988 }, { "epoch": 0.25, "grad_norm": 0.48887747494279876, "learning_rate": 8.737069971952937e-07, "loss": 0.3141, "step": 3989 }, { "epoch": 0.25, "grad_norm": 1.3276454607633839, "learning_rate": 8.736383783093787e-07, "loss": 0.4351, "step": 3990 }, { "epoch": 0.25, "grad_norm": 0.6281135719184645, "learning_rate": 8.735697434832993e-07, "loss": 0.1919, "step": 3991 }, { "epoch": 0.25, "grad_norm": 0.9004611249581926, "learning_rate": 8.735010927199837e-07, "loss": 0.1233, "step": 3992 }, { "epoch": 0.25, "grad_norm": 0.6245121951560472, "learning_rate": 8.734324260223608e-07, "loss": 0.01, "step": 3993 }, { "epoch": 0.25, "grad_norm": 0.8564528634864154, "learning_rate": 8.733637433933598e-07, "loss": 0.3092, "step": 3994 }, { "epoch": 0.25, "grad_norm": 0.6624523979672308, "learning_rate": 8.732950448359109e-07, "loss": 0.2709, "step": 3995 }, { "epoch": 0.25, "grad_norm": 0.876141690078714, "learning_rate": 8.732263303529451e-07, "loss": 0.2849, "step": 3996 }, { "epoch": 0.25, "grad_norm": 1.0187704625173444, "learning_rate": 8.731575999473936e-07, "loss": 0.2479, "step": 3997 }, { "epoch": 0.25, "grad_norm": 0.9978460960576566, "learning_rate": 8.73088853622189e-07, "loss": 0.146, "step": 3998 }, { "epoch": 0.26, "grad_norm": 0.8034763387545482, "learning_rate": 8.730200913802637e-07, "loss": 0.1926, "step": 3999 }, { "epoch": 0.26, "grad_norm": 1.4719686255204056, "learning_rate": 8.729513132245514e-07, "loss": 0.231, "step": 4000 }, { "epoch": 0.26, "grad_norm": 1.3168863902789227, "learning_rate": 8.728825191579865e-07, "loss": 0.0533, "step": 4001 }, { "epoch": 0.26, "grad_norm": 0.5907737292904186, "learning_rate": 8.728137091835038e-07, "loss": 0.1546, "step": 4002 }, { "epoch": 0.26, "grad_norm": 0.43436093732337505, "learning_rate": 8.727448833040386e-07, "loss": 0.1933, "step": 4003 }, { "epoch": 0.26, "grad_norm": 1.182335143143738, "learning_rate": 8.726760415225274e-07, "loss": 0.0765, "step": 4004 }, { "epoch": 0.26, "grad_norm": 0.8972510251624722, "learning_rate": 8.726071838419072e-07, "loss": 0.1038, "step": 4005 }, { "epoch": 0.26, "grad_norm": 0.6094007452363491, "learning_rate": 8.725383102651153e-07, "loss": 0.1785, "step": 4006 }, { "epoch": 0.26, "grad_norm": 0.29551837758871335, "learning_rate": 8.724694207950905e-07, "loss": 0.1089, "step": 4007 }, { "epoch": 0.26, "grad_norm": 1.1253683787788942, "learning_rate": 8.724005154347713e-07, "loss": 0.2786, "step": 4008 }, { "epoch": 0.26, "grad_norm": 0.423186108471077, "learning_rate": 8.723315941870974e-07, "loss": 0.1108, "step": 4009 }, { "epoch": 0.26, "grad_norm": 0.48852532601750126, "learning_rate": 8.722626570550093e-07, "loss": 0.2561, "step": 4010 }, { "epoch": 0.26, "grad_norm": 0.7463282720614175, "learning_rate": 8.72193704041448e-07, "loss": 0.3525, "step": 4011 }, { "epoch": 0.26, "grad_norm": 0.5450143580604226, "learning_rate": 8.72124735149355e-07, "loss": 0.2108, "step": 4012 }, { "epoch": 0.26, "grad_norm": 0.9253499962593161, "learning_rate": 8.720557503816728e-07, "loss": 0.3795, "step": 4013 }, { "epoch": 0.26, "grad_norm": 0.46562963399667523, "learning_rate": 8.719867497413443e-07, "loss": 0.007, "step": 4014 }, { "epoch": 0.26, "grad_norm": 0.5332682655484945, "learning_rate": 8.719177332313131e-07, "loss": 0.186, "step": 4015 }, { "epoch": 0.26, "grad_norm": 0.5535338783673729, "learning_rate": 8.71848700854524e-07, "loss": 0.1613, "step": 4016 }, { "epoch": 0.26, "grad_norm": 0.6403299260618557, "learning_rate": 8.717796526139217e-07, "loss": 0.2925, "step": 4017 }, { "epoch": 0.26, "grad_norm": 0.6254917532022537, "learning_rate": 8.71710588512452e-07, "loss": 0.2384, "step": 4018 }, { "epoch": 0.26, "grad_norm": 0.5750364342194938, "learning_rate": 8.716415085530615e-07, "loss": 0.0331, "step": 4019 }, { "epoch": 0.26, "grad_norm": 0.4569063266582358, "learning_rate": 8.71572412738697e-07, "loss": 0.3298, "step": 4020 }, { "epoch": 0.26, "grad_norm": 0.891163346975924, "learning_rate": 8.715033010723065e-07, "loss": 0.0961, "step": 4021 }, { "epoch": 0.26, "grad_norm": 1.3413829687791063, "learning_rate": 8.714341735568384e-07, "loss": 0.0372, "step": 4022 }, { "epoch": 0.26, "grad_norm": 0.49756565405090214, "learning_rate": 8.713650301952418e-07, "loss": 0.0951, "step": 4023 }, { "epoch": 0.26, "grad_norm": 0.9182125512484611, "learning_rate": 8.712958709904664e-07, "loss": 0.2331, "step": 4024 }, { "epoch": 0.26, "grad_norm": 0.9725612485583259, "learning_rate": 8.712266959454628e-07, "loss": 0.2, "step": 4025 }, { "epoch": 0.26, "grad_norm": 1.4010945358686009, "learning_rate": 8.711575050631821e-07, "loss": 0.031, "step": 4026 }, { "epoch": 0.26, "grad_norm": 0.5290640232322119, "learning_rate": 8.71088298346576e-07, "loss": 0.2232, "step": 4027 }, { "epoch": 0.26, "grad_norm": 0.22726065289959244, "learning_rate": 8.710190757985971e-07, "loss": 0.0997, "step": 4028 }, { "epoch": 0.26, "grad_norm": 1.5089060046635212, "learning_rate": 8.709498374221987e-07, "loss": 0.0809, "step": 4029 }, { "epoch": 0.26, "grad_norm": 0.8505876302845143, "learning_rate": 8.708805832203345e-07, "loss": 0.1974, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.5800888585351849, "learning_rate": 8.708113131959591e-07, "loss": 0.3174, "step": 4031 }, { "epoch": 0.26, "grad_norm": 1.2127749732338962, "learning_rate": 8.707420273520276e-07, "loss": 0.2511, "step": 4032 }, { "epoch": 0.26, "grad_norm": 0.9619904283254103, "learning_rate": 8.706727256914959e-07, "loss": 0.2079, "step": 4033 }, { "epoch": 0.26, "grad_norm": 0.788683859357756, "learning_rate": 8.706034082173205e-07, "loss": 0.2252, "step": 4034 }, { "epoch": 0.26, "grad_norm": 2.8666167015743347, "learning_rate": 8.70534074932459e-07, "loss": 0.2964, "step": 4035 }, { "epoch": 0.26, "grad_norm": 0.8339818264528626, "learning_rate": 8.704647258398688e-07, "loss": 0.1802, "step": 4036 }, { "epoch": 0.26, "grad_norm": 1.7500161946388975, "learning_rate": 8.703953609425087e-07, "loss": 0.234, "step": 4037 }, { "epoch": 0.26, "grad_norm": 1.0373680778071752, "learning_rate": 8.703259802433378e-07, "loss": 0.0323, "step": 4038 }, { "epoch": 0.26, "grad_norm": 0.7686844705049926, "learning_rate": 8.702565837453163e-07, "loss": 0.1601, "step": 4039 }, { "epoch": 0.26, "grad_norm": 0.34216488586559624, "learning_rate": 8.701871714514046e-07, "loss": 0.0436, "step": 4040 }, { "epoch": 0.26, "grad_norm": 0.7622924070033409, "learning_rate": 8.701177433645639e-07, "loss": 0.3274, "step": 4041 }, { "epoch": 0.26, "grad_norm": 1.876673436189718, "learning_rate": 8.700482994877563e-07, "loss": 0.1475, "step": 4042 }, { "epoch": 0.26, "grad_norm": 0.3006986467282531, "learning_rate": 8.699788398239442e-07, "loss": 0.0065, "step": 4043 }, { "epoch": 0.26, "grad_norm": 0.35755714554411305, "learning_rate": 8.699093643760913e-07, "loss": 0.1083, "step": 4044 }, { "epoch": 0.26, "grad_norm": 1.9828179439260103, "learning_rate": 8.698398731471612e-07, "loss": 0.1531, "step": 4045 }, { "epoch": 0.26, "grad_norm": 0.39576422761612184, "learning_rate": 8.697703661401185e-07, "loss": 0.2006, "step": 4046 }, { "epoch": 0.26, "grad_norm": 0.8437275424897863, "learning_rate": 8.697008433579289e-07, "loss": 0.2849, "step": 4047 }, { "epoch": 0.26, "grad_norm": 1.7544173146891293, "learning_rate": 8.69631304803558e-07, "loss": 0.3707, "step": 4048 }, { "epoch": 0.26, "grad_norm": 0.7168024231112956, "learning_rate": 8.695617504799726e-07, "loss": 0.1958, "step": 4049 }, { "epoch": 0.26, "grad_norm": 0.19259379309971653, "learning_rate": 8.694921803901401e-07, "loss": 0.0996, "step": 4050 }, { "epoch": 0.26, "grad_norm": 0.5194265600276591, "learning_rate": 8.694225945370282e-07, "loss": 0.0215, "step": 4051 }, { "epoch": 0.26, "grad_norm": 0.6012828951597724, "learning_rate": 8.693529929236058e-07, "loss": 0.2555, "step": 4052 }, { "epoch": 0.26, "grad_norm": 0.8690152717732422, "learning_rate": 8.692833755528424e-07, "loss": 0.0856, "step": 4053 }, { "epoch": 0.26, "grad_norm": 1.3277992090830388, "learning_rate": 8.692137424277079e-07, "loss": 0.3708, "step": 4054 }, { "epoch": 0.26, "grad_norm": 0.5039056616220217, "learning_rate": 8.691440935511726e-07, "loss": 0.3282, "step": 4055 }, { "epoch": 0.26, "grad_norm": 0.42331557662746544, "learning_rate": 8.690744289262086e-07, "loss": 0.3823, "step": 4056 }, { "epoch": 0.26, "grad_norm": 0.9142575813248571, "learning_rate": 8.690047485557873e-07, "loss": 0.2637, "step": 4057 }, { "epoch": 0.26, "grad_norm": 3.7196566229620287, "learning_rate": 8.689350524428817e-07, "loss": 0.24, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.2910743427622435, "learning_rate": 8.688653405904651e-07, "loss": 0.2829, "step": 4059 }, { "epoch": 0.26, "grad_norm": 0.9113426193291279, "learning_rate": 8.687956130015115e-07, "loss": 0.2902, "step": 4060 }, { "epoch": 0.26, "grad_norm": 0.5598130609352595, "learning_rate": 8.687258696789957e-07, "loss": 0.1831, "step": 4061 }, { "epoch": 0.26, "grad_norm": 0.5193925121894146, "learning_rate": 8.686561106258932e-07, "loss": 0.0891, "step": 4062 }, { "epoch": 0.26, "grad_norm": 0.6753090305556863, "learning_rate": 8.685863358451797e-07, "loss": 0.2076, "step": 4063 }, { "epoch": 0.26, "grad_norm": 0.3933141989346814, "learning_rate": 8.685165453398323e-07, "loss": 0.1361, "step": 4064 }, { "epoch": 0.26, "grad_norm": 0.9869841152758954, "learning_rate": 8.684467391128283e-07, "loss": 0.0818, "step": 4065 }, { "epoch": 0.26, "grad_norm": 0.5617361231289953, "learning_rate": 8.683769171671456e-07, "loss": 0.0773, "step": 4066 }, { "epoch": 0.26, "grad_norm": 0.9474663227051654, "learning_rate": 8.683070795057632e-07, "loss": 0.1526, "step": 4067 }, { "epoch": 0.26, "grad_norm": 0.08787256262239636, "learning_rate": 8.682372261316603e-07, "loss": 0.0033, "step": 4068 }, { "epoch": 0.26, "grad_norm": 0.5999741487371676, "learning_rate": 8.681673570478172e-07, "loss": 0.0251, "step": 4069 }, { "epoch": 0.26, "grad_norm": 0.5186519952801143, "learning_rate": 8.680974722572144e-07, "loss": 0.1501, "step": 4070 }, { "epoch": 0.26, "grad_norm": 0.4551390871962548, "learning_rate": 8.680275717628336e-07, "loss": 0.185, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.3686354176626132, "learning_rate": 8.679576555676566e-07, "loss": 0.1716, "step": 4072 }, { "epoch": 0.26, "grad_norm": 0.7757396827823145, "learning_rate": 8.678877236746664e-07, "loss": 0.1621, "step": 4073 }, { "epoch": 0.26, "grad_norm": 0.8828172389719617, "learning_rate": 8.678177760868466e-07, "loss": 0.2308, "step": 4074 }, { "epoch": 0.26, "grad_norm": 0.6983117036471638, "learning_rate": 8.677478128071808e-07, "loss": 0.2866, "step": 4075 }, { "epoch": 0.26, "grad_norm": 0.38771195245275397, "learning_rate": 8.676778338386541e-07, "loss": 0.1309, "step": 4076 }, { "epoch": 0.26, "grad_norm": 0.6801236831527018, "learning_rate": 8.676078391842518e-07, "loss": 0.2795, "step": 4077 }, { "epoch": 0.26, "grad_norm": 1.5694146176378214, "learning_rate": 8.675378288469603e-07, "loss": 0.175, "step": 4078 }, { "epoch": 0.26, "grad_norm": 1.6219395904385054, "learning_rate": 8.674678028297659e-07, "loss": 0.247, "step": 4079 }, { "epoch": 0.26, "grad_norm": 0.34342221249927024, "learning_rate": 8.673977611356566e-07, "loss": 0.1954, "step": 4080 }, { "epoch": 0.26, "grad_norm": 0.7600292127363575, "learning_rate": 8.673277037676201e-07, "loss": 0.3134, "step": 4081 }, { "epoch": 0.26, "grad_norm": 0.3481644163119903, "learning_rate": 8.672576307286455e-07, "loss": 0.0081, "step": 4082 }, { "epoch": 0.26, "grad_norm": 1.6503751638113178, "learning_rate": 8.671875420217218e-07, "loss": 0.0743, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.1873273700527, "learning_rate": 8.671174376498396e-07, "loss": 0.3154, "step": 4084 }, { "epoch": 0.26, "grad_norm": 0.8396698016655777, "learning_rate": 8.670473176159896e-07, "loss": 0.0073, "step": 4085 }, { "epoch": 0.26, "grad_norm": 0.6112612695587594, "learning_rate": 8.66977181923163e-07, "loss": 0.3248, "step": 4086 }, { "epoch": 0.26, "grad_norm": 0.7350065130972876, "learning_rate": 8.669070305743522e-07, "loss": 0.1309, "step": 4087 }, { "epoch": 0.26, "grad_norm": 1.0898361003584103, "learning_rate": 8.668368635725498e-07, "loss": 0.2625, "step": 4088 }, { "epoch": 0.26, "grad_norm": 0.5157372002681769, "learning_rate": 8.667666809207494e-07, "loss": 0.1469, "step": 4089 }, { "epoch": 0.26, "grad_norm": 0.7806264136291773, "learning_rate": 8.666964826219452e-07, "loss": 0.221, "step": 4090 }, { "epoch": 0.26, "grad_norm": 0.913072198088876, "learning_rate": 8.666262686791317e-07, "loss": 0.4681, "step": 4091 }, { "epoch": 0.26, "grad_norm": 0.764650493860681, "learning_rate": 8.665560390953048e-07, "loss": 0.4531, "step": 4092 }, { "epoch": 0.26, "grad_norm": 0.8470516436514497, "learning_rate": 8.664857938734603e-07, "loss": 0.4609, "step": 4093 }, { "epoch": 0.26, "grad_norm": 2.3627619097956996, "learning_rate": 8.664155330165951e-07, "loss": 0.315, "step": 4094 }, { "epoch": 0.26, "grad_norm": 0.9197571070088587, "learning_rate": 8.663452565277066e-07, "loss": 0.4106, "step": 4095 }, { "epoch": 0.26, "grad_norm": 0.4429333737292287, "learning_rate": 8.66274964409793e-07, "loss": 0.1465, "step": 4096 }, { "epoch": 0.26, "grad_norm": 1.3301824337049717, "learning_rate": 8.662046566658534e-07, "loss": 0.155, "step": 4097 }, { "epoch": 0.26, "grad_norm": 0.8237105848216212, "learning_rate": 8.661343332988868e-07, "loss": 0.3154, "step": 4098 }, { "epoch": 0.26, "grad_norm": 0.8642760805583275, "learning_rate": 8.660639943118935e-07, "loss": 0.2097, "step": 4099 }, { "epoch": 0.26, "grad_norm": 0.9988221169075769, "learning_rate": 8.659936397078742e-07, "loss": 0.1076, "step": 4100 }, { "epoch": 0.26, "grad_norm": 1.08165401706748, "learning_rate": 8.659232694898306e-07, "loss": 0.1478, "step": 4101 }, { "epoch": 0.26, "grad_norm": 0.9392110429916637, "learning_rate": 8.658528836607648e-07, "loss": 0.2727, "step": 4102 }, { "epoch": 0.26, "grad_norm": 0.6255102925376163, "learning_rate": 8.657824822236794e-07, "loss": 0.1134, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.531671919199018, "learning_rate": 8.657120651815781e-07, "loss": 0.097, "step": 4104 }, { "epoch": 0.26, "grad_norm": 1.0349825853821242, "learning_rate": 8.656416325374649e-07, "loss": 0.248, "step": 4105 }, { "epoch": 0.26, "grad_norm": 1.41708388544839, "learning_rate": 8.655711842943446e-07, "loss": 0.2295, "step": 4106 }, { "epoch": 0.26, "grad_norm": 1.31534042476054, "learning_rate": 8.655007204552227e-07, "loss": 0.3639, "step": 4107 }, { "epoch": 0.26, "grad_norm": 0.40837283380562023, "learning_rate": 8.654302410231054e-07, "loss": 0.2332, "step": 4108 }, { "epoch": 0.26, "grad_norm": 0.30675613563980453, "learning_rate": 8.653597460009993e-07, "loss": 0.0117, "step": 4109 }, { "epoch": 0.26, "grad_norm": 0.5425657633918686, "learning_rate": 8.652892353919119e-07, "loss": 0.0441, "step": 4110 }, { "epoch": 0.26, "grad_norm": 0.4927982659656652, "learning_rate": 8.652187091988516e-07, "loss": 0.1758, "step": 4111 }, { "epoch": 0.26, "grad_norm": 0.8854424625965419, "learning_rate": 8.651481674248267e-07, "loss": 0.4199, "step": 4112 }, { "epoch": 0.26, "grad_norm": 0.7812687945958526, "learning_rate": 8.650776100728471e-07, "loss": 0.1715, "step": 4113 }, { "epoch": 0.26, "grad_norm": 2.0582769701423547, "learning_rate": 8.650070371459228e-07, "loss": 0.0757, "step": 4114 }, { "epoch": 0.26, "grad_norm": 0.3886860276188469, "learning_rate": 8.649364486470646e-07, "loss": 0.1217, "step": 4115 }, { "epoch": 0.26, "grad_norm": 0.7357733802014849, "learning_rate": 8.648658445792838e-07, "loss": 0.3296, "step": 4116 }, { "epoch": 0.26, "grad_norm": 3.7205992295519654, "learning_rate": 8.647952249455925e-07, "loss": 0.1298, "step": 4117 }, { "epoch": 0.26, "grad_norm": 1.2698102001347191, "learning_rate": 8.647245897490036e-07, "loss": 0.0348, "step": 4118 }, { "epoch": 0.26, "grad_norm": 1.0515583970453326, "learning_rate": 8.646539389925307e-07, "loss": 0.1677, "step": 4119 }, { "epoch": 0.26, "grad_norm": 0.5393363410624852, "learning_rate": 8.645832726791876e-07, "loss": 0.1455, "step": 4120 }, { "epoch": 0.26, "grad_norm": 0.7251985075075046, "learning_rate": 8.645125908119892e-07, "loss": 0.1008, "step": 4121 }, { "epoch": 0.26, "grad_norm": 0.8636367474351939, "learning_rate": 8.644418933939508e-07, "loss": 0.3264, "step": 4122 }, { "epoch": 0.26, "grad_norm": 0.6678994209994875, "learning_rate": 8.643711804280888e-07, "loss": 0.2842, "step": 4123 }, { "epoch": 0.26, "grad_norm": 0.4542562535018774, "learning_rate": 8.643004519174197e-07, "loss": 0.078, "step": 4124 }, { "epoch": 0.26, "grad_norm": 0.569213117890287, "learning_rate": 8.642297078649608e-07, "loss": 0.2563, "step": 4125 }, { "epoch": 0.26, "grad_norm": 0.886986849481526, "learning_rate": 8.641589482737306e-07, "loss": 0.1211, "step": 4126 }, { "epoch": 0.26, "grad_norm": 1.3409962692945223, "learning_rate": 8.640881731467475e-07, "loss": 0.1253, "step": 4127 }, { "epoch": 0.26, "grad_norm": 1.0903431349800219, "learning_rate": 8.640173824870311e-07, "loss": 0.0725, "step": 4128 }, { "epoch": 0.26, "grad_norm": 0.9233903438867485, "learning_rate": 8.639465762976013e-07, "loss": 0.2513, "step": 4129 }, { "epoch": 0.26, "grad_norm": 0.731236143951161, "learning_rate": 8.638757545814789e-07, "loss": 0.0069, "step": 4130 }, { "epoch": 0.26, "grad_norm": 0.9535380261485513, "learning_rate": 8.638049173416855e-07, "loss": 0.2661, "step": 4131 }, { "epoch": 0.26, "grad_norm": 1.341231042585357, "learning_rate": 8.637340645812429e-07, "loss": 0.1597, "step": 4132 }, { "epoch": 0.26, "grad_norm": 0.8995777054348525, "learning_rate": 8.636631963031739e-07, "loss": 0.2024, "step": 4133 }, { "epoch": 0.26, "grad_norm": 0.16401430139744694, "learning_rate": 8.635923125105018e-07, "loss": 0.0235, "step": 4134 }, { "epoch": 0.26, "grad_norm": 1.843344059109508, "learning_rate": 8.63521413206251e-07, "loss": 0.0331, "step": 4135 }, { "epoch": 0.26, "grad_norm": 0.6263545356874659, "learning_rate": 8.634504983934456e-07, "loss": 0.3176, "step": 4136 }, { "epoch": 0.26, "grad_norm": 0.4808943140162613, "learning_rate": 8.633795680751116e-07, "loss": 0.1106, "step": 4137 }, { "epoch": 0.26, "grad_norm": 1.6112905735795604, "learning_rate": 8.633086222542746e-07, "loss": 0.1585, "step": 4138 }, { "epoch": 0.26, "grad_norm": 0.5330909187707842, "learning_rate": 8.632376609339615e-07, "loss": 0.0261, "step": 4139 }, { "epoch": 0.26, "grad_norm": 0.5105147410937386, "learning_rate": 8.631666841171995e-07, "loss": 0.225, "step": 4140 }, { "epoch": 0.26, "grad_norm": 0.4487846225878657, "learning_rate": 8.630956918070167e-07, "loss": 0.022, "step": 4141 }, { "epoch": 0.26, "grad_norm": 0.8339169275027136, "learning_rate": 8.630246840064419e-07, "loss": 0.2695, "step": 4142 }, { "epoch": 0.26, "grad_norm": 1.0898084150738283, "learning_rate": 8.629536607185041e-07, "loss": 0.3548, "step": 4143 }, { "epoch": 0.26, "grad_norm": 1.2029661526671163, "learning_rate": 8.628826219462337e-07, "loss": 0.0799, "step": 4144 }, { "epoch": 0.26, "grad_norm": 0.9993333429873376, "learning_rate": 8.62811567692661e-07, "loss": 0.3005, "step": 4145 }, { "epoch": 0.26, "grad_norm": 0.4544022280862907, "learning_rate": 8.627404979608177e-07, "loss": 0.1412, "step": 4146 }, { "epoch": 0.26, "grad_norm": 2.992980608984845, "learning_rate": 8.626694127537354e-07, "loss": 0.1137, "step": 4147 }, { "epoch": 0.26, "grad_norm": 1.205693774665616, "learning_rate": 8.625983120744469e-07, "loss": 0.1241, "step": 4148 }, { "epoch": 0.26, "grad_norm": 1.1628846331352964, "learning_rate": 8.625271959259856e-07, "loss": 0.2407, "step": 4149 }, { "epoch": 0.26, "grad_norm": 2.680904559539922, "learning_rate": 8.624560643113852e-07, "loss": 0.3343, "step": 4150 }, { "epoch": 0.26, "grad_norm": 1.0543011777212534, "learning_rate": 8.623849172336805e-07, "loss": 0.1855, "step": 4151 }, { "epoch": 0.26, "grad_norm": 0.8079673985740236, "learning_rate": 8.623137546959068e-07, "loss": 0.3205, "step": 4152 }, { "epoch": 0.26, "grad_norm": 0.8083249109115577, "learning_rate": 8.622425767011e-07, "loss": 0.2182, "step": 4153 }, { "epoch": 0.26, "grad_norm": 0.5073675655689701, "learning_rate": 8.621713832522966e-07, "loss": 0.2256, "step": 4154 }, { "epoch": 0.26, "grad_norm": 0.6951868169192322, "learning_rate": 8.62100174352534e-07, "loss": 0.2103, "step": 4155 }, { "epoch": 0.27, "grad_norm": 0.46143325777616173, "learning_rate": 8.620289500048501e-07, "loss": 0.0863, "step": 4156 }, { "epoch": 0.27, "grad_norm": 0.8654396278114236, "learning_rate": 8.619577102122833e-07, "loss": 0.21, "step": 4157 }, { "epoch": 0.27, "grad_norm": 4.844883718788873, "learning_rate": 8.61886454977873e-07, "loss": 0.1231, "step": 4158 }, { "epoch": 0.27, "grad_norm": 0.9131056581684664, "learning_rate": 8.618151843046591e-07, "loss": 0.2919, "step": 4159 }, { "epoch": 0.27, "grad_norm": 0.7942533845691214, "learning_rate": 8.617438981956819e-07, "loss": 0.1762, "step": 4160 }, { "epoch": 0.27, "grad_norm": 0.5094814576754809, "learning_rate": 8.616725966539831e-07, "loss": 0.1071, "step": 4161 }, { "epoch": 0.27, "grad_norm": 0.3741130726984353, "learning_rate": 8.616012796826041e-07, "loss": 0.1949, "step": 4162 }, { "epoch": 0.27, "grad_norm": 0.2950232822520885, "learning_rate": 8.615299472845876e-07, "loss": 0.1617, "step": 4163 }, { "epoch": 0.27, "grad_norm": 1.1629432705817841, "learning_rate": 8.614585994629769e-07, "loss": 0.072, "step": 4164 }, { "epoch": 0.27, "grad_norm": 0.43310283494739926, "learning_rate": 8.613872362208157e-07, "loss": 0.2264, "step": 4165 }, { "epoch": 0.27, "grad_norm": 0.53920155113599, "learning_rate": 8.613158575611484e-07, "loss": 0.1356, "step": 4166 }, { "epoch": 0.27, "grad_norm": 0.588776252123757, "learning_rate": 8.612444634870204e-07, "loss": 0.1094, "step": 4167 }, { "epoch": 0.27, "grad_norm": 1.255630474149303, "learning_rate": 8.611730540014772e-07, "loss": 0.125, "step": 4168 }, { "epoch": 0.27, "grad_norm": 0.1919129974843073, "learning_rate": 8.611016291075656e-07, "loss": 0.1151, "step": 4169 }, { "epoch": 0.27, "grad_norm": 0.23080007066420352, "learning_rate": 8.610301888083327e-07, "loss": 0.129, "step": 4170 }, { "epoch": 0.27, "grad_norm": 0.6946707730211346, "learning_rate": 8.60958733106826e-07, "loss": 0.591, "step": 4171 }, { "epoch": 0.27, "grad_norm": 0.45516192914906256, "learning_rate": 8.608872620060943e-07, "loss": 0.1313, "step": 4172 }, { "epoch": 0.27, "grad_norm": 0.8978727104176235, "learning_rate": 8.608157755091864e-07, "loss": 0.4418, "step": 4173 }, { "epoch": 0.27, "grad_norm": 1.8782263943138546, "learning_rate": 8.607442736191521e-07, "loss": 0.2521, "step": 4174 }, { "epoch": 0.27, "grad_norm": 0.8028398729137047, "learning_rate": 8.606727563390421e-07, "loss": 0.188, "step": 4175 }, { "epoch": 0.27, "grad_norm": 0.21838660358954995, "learning_rate": 8.606012236719073e-07, "loss": 0.1483, "step": 4176 }, { "epoch": 0.27, "grad_norm": 1.1902942487618677, "learning_rate": 8.605296756207993e-07, "loss": 0.2007, "step": 4177 }, { "epoch": 0.27, "grad_norm": 0.28907669710924344, "learning_rate": 8.604581121887706e-07, "loss": 0.1095, "step": 4178 }, { "epoch": 0.27, "grad_norm": 1.318027359896157, "learning_rate": 8.60386533378874e-07, "loss": 0.3918, "step": 4179 }, { "epoch": 0.27, "grad_norm": 1.751457980254541, "learning_rate": 8.603149391941638e-07, "loss": 0.2503, "step": 4180 }, { "epoch": 0.27, "grad_norm": 0.5886811753906173, "learning_rate": 8.602433296376937e-07, "loss": 0.348, "step": 4181 }, { "epoch": 0.27, "grad_norm": 1.5060503883283605, "learning_rate": 8.601717047125191e-07, "loss": 0.1095, "step": 4182 }, { "epoch": 0.27, "grad_norm": 1.2121079761848257, "learning_rate": 8.601000644216955e-07, "loss": 0.223, "step": 4183 }, { "epoch": 0.27, "grad_norm": 1.4177256732661798, "learning_rate": 8.600284087682792e-07, "loss": 0.4123, "step": 4184 }, { "epoch": 0.27, "grad_norm": 1.930620527742801, "learning_rate": 8.599567377553274e-07, "loss": 0.1856, "step": 4185 }, { "epoch": 0.27, "grad_norm": 1.5694074812344485, "learning_rate": 8.598850513858975e-07, "loss": 0.1965, "step": 4186 }, { "epoch": 0.27, "grad_norm": 1.343892461363804, "learning_rate": 8.598133496630477e-07, "loss": 0.1279, "step": 4187 }, { "epoch": 0.27, "grad_norm": 0.5437361057509675, "learning_rate": 8.597416325898373e-07, "loss": 0.0913, "step": 4188 }, { "epoch": 0.27, "grad_norm": 1.159113119356025, "learning_rate": 8.596699001693255e-07, "loss": 0.6262, "step": 4189 }, { "epoch": 0.27, "grad_norm": 0.4849247105383991, "learning_rate": 8.595981524045729e-07, "loss": 0.2941, "step": 4190 }, { "epoch": 0.27, "grad_norm": 2.164368321820298, "learning_rate": 8.595263892986402e-07, "loss": 0.1936, "step": 4191 }, { "epoch": 0.27, "grad_norm": 1.2799470303055762, "learning_rate": 8.594546108545888e-07, "loss": 0.4101, "step": 4192 }, { "epoch": 0.27, "grad_norm": 1.0023004339422021, "learning_rate": 8.593828170754813e-07, "loss": 0.1801, "step": 4193 }, { "epoch": 0.27, "grad_norm": 1.4085905230365117, "learning_rate": 8.593110079643803e-07, "loss": 0.2139, "step": 4194 }, { "epoch": 0.27, "grad_norm": 1.5349107399844006, "learning_rate": 8.592391835243495e-07, "loss": 0.4644, "step": 4195 }, { "epoch": 0.27, "grad_norm": 0.6752446175755001, "learning_rate": 8.591673437584528e-07, "loss": 0.0281, "step": 4196 }, { "epoch": 0.27, "grad_norm": 0.18305378645340323, "learning_rate": 8.590954886697553e-07, "loss": 0.0904, "step": 4197 }, { "epoch": 0.27, "grad_norm": 1.26359003237301, "learning_rate": 8.590236182613224e-07, "loss": 0.0882, "step": 4198 }, { "epoch": 0.27, "grad_norm": 0.38274271577641134, "learning_rate": 8.589517325362201e-07, "loss": 0.0959, "step": 4199 }, { "epoch": 0.27, "grad_norm": 0.41296319207016935, "learning_rate": 8.588798314975154e-07, "loss": 0.1613, "step": 4200 }, { "epoch": 0.27, "grad_norm": 1.2904919767391199, "learning_rate": 8.588079151482756e-07, "loss": 0.1069, "step": 4201 }, { "epoch": 0.27, "grad_norm": 1.5138270595551349, "learning_rate": 8.58735983491569e-07, "loss": 0.2439, "step": 4202 }, { "epoch": 0.27, "grad_norm": 2.0077304062080574, "learning_rate": 8.58664036530464e-07, "loss": 0.2434, "step": 4203 }, { "epoch": 0.27, "grad_norm": 0.81302314365293, "learning_rate": 8.585920742680304e-07, "loss": 0.4034, "step": 4204 }, { "epoch": 0.27, "grad_norm": 3.4243340845075125, "learning_rate": 8.585200967073378e-07, "loss": 0.2181, "step": 4205 }, { "epoch": 0.27, "grad_norm": 1.412047738650341, "learning_rate": 8.584481038514572e-07, "loss": 0.1865, "step": 4206 }, { "epoch": 0.27, "grad_norm": 0.7237220104875748, "learning_rate": 8.583760957034601e-07, "loss": 0.3277, "step": 4207 }, { "epoch": 0.27, "grad_norm": 0.7640779684273645, "learning_rate": 8.583040722664183e-07, "loss": 0.0173, "step": 4208 }, { "epoch": 0.27, "grad_norm": 0.6214385840338672, "learning_rate": 8.582320335434045e-07, "loss": 0.2223, "step": 4209 }, { "epoch": 0.27, "grad_norm": 0.9811865311502546, "learning_rate": 8.581599795374918e-07, "loss": 0.1144, "step": 4210 }, { "epoch": 0.27, "grad_norm": 0.2670950809825193, "learning_rate": 8.580879102517547e-07, "loss": 0.1149, "step": 4211 }, { "epoch": 0.27, "grad_norm": 1.1956863785449852, "learning_rate": 8.580158256892672e-07, "loss": 0.3023, "step": 4212 }, { "epoch": 0.27, "grad_norm": 1.2071355650323525, "learning_rate": 8.57943725853105e-07, "loss": 0.4585, "step": 4213 }, { "epoch": 0.27, "grad_norm": 0.6563889522905176, "learning_rate": 8.578716107463439e-07, "loss": 0.2213, "step": 4214 }, { "epoch": 0.27, "grad_norm": 1.215855606348245, "learning_rate": 8.577994803720605e-07, "loss": 0.1861, "step": 4215 }, { "epoch": 0.27, "grad_norm": 0.8469151423347601, "learning_rate": 8.57727334733332e-07, "loss": 0.2754, "step": 4216 }, { "epoch": 0.27, "grad_norm": 0.9923423963328847, "learning_rate": 8.576551738332362e-07, "loss": 0.2037, "step": 4217 }, { "epoch": 0.27, "grad_norm": 1.971817001544454, "learning_rate": 8.575829976748519e-07, "loss": 0.1913, "step": 4218 }, { "epoch": 0.27, "grad_norm": 1.8981390336960575, "learning_rate": 8.575108062612579e-07, "loss": 0.1501, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.2470736773943565, "learning_rate": 8.574385995955342e-07, "loss": 0.2306, "step": 4220 }, { "epoch": 0.27, "grad_norm": 0.8319293764527479, "learning_rate": 8.573663776807614e-07, "loss": 0.318, "step": 4221 }, { "epoch": 0.27, "grad_norm": 2.5835820336388515, "learning_rate": 8.572941405200205e-07, "loss": 0.2691, "step": 4222 }, { "epoch": 0.27, "grad_norm": 1.0016540452258798, "learning_rate": 8.572218881163932e-07, "loss": 0.4274, "step": 4223 }, { "epoch": 0.27, "grad_norm": 5.360160874124541, "learning_rate": 8.571496204729623e-07, "loss": 0.0397, "step": 4224 }, { "epoch": 0.27, "grad_norm": 0.42308800478398084, "learning_rate": 8.570773375928104e-07, "loss": 0.2743, "step": 4225 }, { "epoch": 0.27, "grad_norm": 2.8029133247581695, "learning_rate": 8.570050394790216e-07, "loss": 0.4695, "step": 4226 }, { "epoch": 0.27, "grad_norm": 0.9014662793121879, "learning_rate": 8.569327261346801e-07, "loss": 0.2776, "step": 4227 }, { "epoch": 0.27, "grad_norm": 0.420264671288587, "learning_rate": 8.568603975628709e-07, "loss": 0.2015, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.1059528845144877, "learning_rate": 8.567880537666799e-07, "loss": 0.2098, "step": 4229 }, { "epoch": 0.27, "grad_norm": 1.256259511897296, "learning_rate": 8.567156947491932e-07, "loss": 0.2977, "step": 4230 }, { "epoch": 0.27, "grad_norm": 0.23574349099195155, "learning_rate": 8.56643320513498e-07, "loss": 0.0856, "step": 4231 }, { "epoch": 0.27, "grad_norm": 0.341729503021465, "learning_rate": 8.565709310626816e-07, "loss": 0.1234, "step": 4232 }, { "epoch": 0.27, "grad_norm": 1.454600428281282, "learning_rate": 8.564985263998325e-07, "loss": 0.2918, "step": 4233 }, { "epoch": 0.27, "grad_norm": 0.7518635243267678, "learning_rate": 8.564261065280398e-07, "loss": 0.0767, "step": 4234 }, { "epoch": 0.27, "grad_norm": 0.8384725035293839, "learning_rate": 8.563536714503928e-07, "loss": 0.1559, "step": 4235 }, { "epoch": 0.27, "grad_norm": 0.800838341046286, "learning_rate": 8.562812211699816e-07, "loss": 0.1743, "step": 4236 }, { "epoch": 0.27, "grad_norm": 0.8013847848128576, "learning_rate": 8.562087556898976e-07, "loss": 0.2045, "step": 4237 }, { "epoch": 0.27, "grad_norm": 0.31272994473471644, "learning_rate": 8.56136275013232e-07, "loss": 0.2826, "step": 4238 }, { "epoch": 0.27, "grad_norm": 0.49798282015021056, "learning_rate": 8.560637791430768e-07, "loss": 0.1582, "step": 4239 }, { "epoch": 0.27, "grad_norm": 1.621320727025723, "learning_rate": 8.559912680825252e-07, "loss": 0.3903, "step": 4240 }, { "epoch": 0.27, "grad_norm": 0.28427224685192864, "learning_rate": 8.559187418346702e-07, "loss": 0.166, "step": 4241 }, { "epoch": 0.27, "grad_norm": 0.9318865664038639, "learning_rate": 8.558462004026063e-07, "loss": 0.2234, "step": 4242 }, { "epoch": 0.27, "grad_norm": 0.4894180794341328, "learning_rate": 8.557736437894283e-07, "loss": 0.1665, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.5977000699130905, "learning_rate": 8.557010719982314e-07, "loss": 0.3489, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.8276768852244196, "learning_rate": 8.556284850321116e-07, "loss": 0.4377, "step": 4245 }, { "epoch": 0.27, "grad_norm": 0.5917687601179266, "learning_rate": 8.555558828941658e-07, "loss": 0.1854, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.0923027668181713, "learning_rate": 8.554832655874913e-07, "loss": 0.1182, "step": 4247 }, { "epoch": 0.27, "grad_norm": 0.9908405100430573, "learning_rate": 8.554106331151861e-07, "loss": 0.1269, "step": 4248 }, { "epoch": 0.27, "grad_norm": 0.9048833651868197, "learning_rate": 8.553379854803488e-07, "loss": 0.196, "step": 4249 }, { "epoch": 0.27, "grad_norm": 7.05747604125339, "learning_rate": 8.552653226860787e-07, "loss": 0.1749, "step": 4250 }, { "epoch": 0.27, "grad_norm": 0.6215436704290188, "learning_rate": 8.551926447354758e-07, "loss": 0.1114, "step": 4251 }, { "epoch": 0.27, "grad_norm": 0.5683414048674594, "learning_rate": 8.551199516316407e-07, "loss": 0.3162, "step": 4252 }, { "epoch": 0.27, "grad_norm": 0.5778537672359699, "learning_rate": 8.550472433776744e-07, "loss": 0.1325, "step": 4253 }, { "epoch": 0.27, "grad_norm": 0.3504835916634373, "learning_rate": 8.549745199766791e-07, "loss": 0.1099, "step": 4254 }, { "epoch": 0.27, "grad_norm": 0.18648009972585985, "learning_rate": 8.549017814317572e-07, "loss": 0.0076, "step": 4255 }, { "epoch": 0.27, "grad_norm": 0.7118260180527443, "learning_rate": 8.548290277460118e-07, "loss": 0.4571, "step": 4256 }, { "epoch": 0.27, "grad_norm": 2.7519858159781565, "learning_rate": 8.547562589225468e-07, "loss": 0.2112, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.2938195918426418, "learning_rate": 8.546834749644666e-07, "loss": 0.2733, "step": 4258 }, { "epoch": 0.27, "grad_norm": 0.3827592266894607, "learning_rate": 8.546106758748763e-07, "loss": 0.0178, "step": 4259 }, { "epoch": 0.27, "grad_norm": 0.8115191964053644, "learning_rate": 8.545378616568819e-07, "loss": 0.1191, "step": 4260 }, { "epoch": 0.27, "grad_norm": 0.756630858578341, "learning_rate": 8.544650323135895e-07, "loss": 0.007, "step": 4261 }, { "epoch": 0.27, "grad_norm": 0.8229417280241946, "learning_rate": 8.543921878481063e-07, "loss": 0.2933, "step": 4262 }, { "epoch": 0.27, "grad_norm": 1.1633109845165468, "learning_rate": 8.543193282635399e-07, "loss": 0.209, "step": 4263 }, { "epoch": 0.27, "grad_norm": 0.7959031999077241, "learning_rate": 8.542464535629987e-07, "loss": 0.0123, "step": 4264 }, { "epoch": 0.27, "grad_norm": 0.32068562583454563, "learning_rate": 8.541735637495917e-07, "loss": 0.161, "step": 4265 }, { "epoch": 0.27, "grad_norm": 3.1121281956030695, "learning_rate": 8.541006588264286e-07, "loss": 0.1259, "step": 4266 }, { "epoch": 0.27, "grad_norm": 0.7871506502797747, "learning_rate": 8.540277387966193e-07, "loss": 0.1888, "step": 4267 }, { "epoch": 0.27, "grad_norm": 0.3010533321259848, "learning_rate": 8.539548036632751e-07, "loss": 0.1007, "step": 4268 }, { "epoch": 0.27, "grad_norm": 0.6552887264914901, "learning_rate": 8.538818534295075e-07, "loss": 0.0432, "step": 4269 }, { "epoch": 0.27, "grad_norm": 1.4525615232710638, "learning_rate": 8.538088880984285e-07, "loss": 0.1468, "step": 4270 }, { "epoch": 0.27, "grad_norm": 1.7399268411262727, "learning_rate": 8.537359076731512e-07, "loss": 0.1885, "step": 4271 }, { "epoch": 0.27, "grad_norm": 0.3202130344918199, "learning_rate": 8.53662912156789e-07, "loss": 0.0391, "step": 4272 }, { "epoch": 0.27, "grad_norm": 0.5685864471737712, "learning_rate": 8.535899015524559e-07, "loss": 0.1587, "step": 4273 }, { "epoch": 0.27, "grad_norm": 0.6777539456984496, "learning_rate": 8.535168758632669e-07, "loss": 0.1231, "step": 4274 }, { "epoch": 0.27, "grad_norm": 0.2755821239711442, "learning_rate": 8.534438350923373e-07, "loss": 0.1061, "step": 4275 }, { "epoch": 0.27, "grad_norm": 0.5404488903288633, "learning_rate": 8.533707792427831e-07, "loss": 0.2662, "step": 4276 }, { "epoch": 0.27, "grad_norm": 2.6068717340366714, "learning_rate": 8.532977083177213e-07, "loss": 0.2832, "step": 4277 }, { "epoch": 0.27, "grad_norm": 0.6639554778520406, "learning_rate": 8.532246223202688e-07, "loss": 0.1044, "step": 4278 }, { "epoch": 0.27, "grad_norm": 0.1836937541968538, "learning_rate": 8.53151521253544e-07, "loss": 0.1085, "step": 4279 }, { "epoch": 0.27, "grad_norm": 0.4925808917133856, "learning_rate": 8.530784051206653e-07, "loss": 0.1829, "step": 4280 }, { "epoch": 0.27, "grad_norm": 0.7013712454806474, "learning_rate": 8.530052739247521e-07, "loss": 0.1747, "step": 4281 }, { "epoch": 0.27, "grad_norm": 0.36553437198870564, "learning_rate": 8.529321276689244e-07, "loss": 0.218, "step": 4282 }, { "epoch": 0.27, "grad_norm": 1.1997752663695453, "learning_rate": 8.528589663563024e-07, "loss": 0.224, "step": 4283 }, { "epoch": 0.27, "grad_norm": 1.7877892821714088, "learning_rate": 8.527857899900077e-07, "loss": 0.0509, "step": 4284 }, { "epoch": 0.27, "grad_norm": 0.5189093467424057, "learning_rate": 8.527125985731621e-07, "loss": 0.1515, "step": 4285 }, { "epoch": 0.27, "grad_norm": 0.794490678121487, "learning_rate": 8.526393921088877e-07, "loss": 0.0195, "step": 4286 }, { "epoch": 0.27, "grad_norm": 0.9463137408634047, "learning_rate": 8.525661706003082e-07, "loss": 0.1803, "step": 4287 }, { "epoch": 0.27, "grad_norm": 1.733742604782018, "learning_rate": 8.524929340505471e-07, "loss": 0.3151, "step": 4288 }, { "epoch": 0.27, "grad_norm": 0.888426562772552, "learning_rate": 8.524196824627288e-07, "loss": 0.3374, "step": 4289 }, { "epoch": 0.27, "grad_norm": 0.5635952352373081, "learning_rate": 8.523464158399783e-07, "loss": 0.0777, "step": 4290 }, { "epoch": 0.27, "grad_norm": 0.9068369267471855, "learning_rate": 8.522731341854215e-07, "loss": 0.1167, "step": 4291 }, { "epoch": 0.27, "grad_norm": 0.16553678920009054, "learning_rate": 8.521998375021846e-07, "loss": 0.0062, "step": 4292 }, { "epoch": 0.27, "grad_norm": 0.751025825975747, "learning_rate": 8.521265257933947e-07, "loss": 0.2014, "step": 4293 }, { "epoch": 0.27, "grad_norm": 1.5086890057313618, "learning_rate": 8.520531990621792e-07, "loss": 0.0529, "step": 4294 }, { "epoch": 0.27, "grad_norm": 0.5195701383131921, "learning_rate": 8.519798573116666e-07, "loss": 0.1368, "step": 4295 }, { "epoch": 0.27, "grad_norm": 0.4886367835918936, "learning_rate": 8.519065005449857e-07, "loss": 0.3366, "step": 4296 }, { "epoch": 0.27, "grad_norm": 1.345860065984868, "learning_rate": 8.51833128765266e-07, "loss": 0.2093, "step": 4297 }, { "epoch": 0.27, "grad_norm": 0.6163713218739876, "learning_rate": 8.51759741975638e-07, "loss": 0.1095, "step": 4298 }, { "epoch": 0.27, "grad_norm": 1.2497052208533497, "learning_rate": 8.51686340179232e-07, "loss": 0.1417, "step": 4299 }, { "epoch": 0.27, "grad_norm": 2.0159735011566458, "learning_rate": 8.516129233791799e-07, "loss": 0.1864, "step": 4300 }, { "epoch": 0.27, "grad_norm": 0.48030318363378866, "learning_rate": 8.515394915786136e-07, "loss": 0.0638, "step": 4301 }, { "epoch": 0.27, "grad_norm": 0.7073627873066558, "learning_rate": 8.51466044780666e-07, "loss": 0.5101, "step": 4302 }, { "epoch": 0.27, "grad_norm": 0.42053252529749324, "learning_rate": 8.513925829884703e-07, "loss": 0.244, "step": 4303 }, { "epoch": 0.27, "grad_norm": 0.7435353546687125, "learning_rate": 8.513191062051607e-07, "loss": 0.1459, "step": 4304 }, { "epoch": 0.27, "grad_norm": 0.47268336912859865, "learning_rate": 8.512456144338716e-07, "loss": 0.3455, "step": 4305 }, { "epoch": 0.27, "grad_norm": 0.7420844477051433, "learning_rate": 8.511721076777387e-07, "loss": 0.169, "step": 4306 }, { "epoch": 0.27, "grad_norm": 0.9693912136338695, "learning_rate": 8.510985859398976e-07, "loss": 0.4203, "step": 4307 }, { "epoch": 0.27, "grad_norm": 2.2386222437024794, "learning_rate": 8.510250492234852e-07, "loss": 0.0616, "step": 4308 }, { "epoch": 0.27, "grad_norm": 0.9317269982300498, "learning_rate": 8.509514975316384e-07, "loss": 0.2988, "step": 4309 }, { "epoch": 0.27, "grad_norm": 0.3509798007559648, "learning_rate": 8.508779308674952e-07, "loss": 0.1942, "step": 4310 }, { "epoch": 0.27, "grad_norm": 0.8425443795757862, "learning_rate": 8.508043492341943e-07, "loss": 0.1486, "step": 4311 }, { "epoch": 0.27, "grad_norm": 0.3790680364923795, "learning_rate": 8.507307526348744e-07, "loss": 0.0867, "step": 4312 }, { "epoch": 0.28, "grad_norm": 0.6909365406472099, "learning_rate": 8.506571410726758e-07, "loss": 0.1258, "step": 4313 }, { "epoch": 0.28, "grad_norm": 1.112657815616951, "learning_rate": 8.505835145507385e-07, "loss": 0.1601, "step": 4314 }, { "epoch": 0.28, "grad_norm": 0.8192066693388006, "learning_rate": 8.505098730722038e-07, "loss": 0.0203, "step": 4315 }, { "epoch": 0.28, "grad_norm": 2.023798586627928, "learning_rate": 8.504362166402131e-07, "loss": 0.41, "step": 4316 }, { "epoch": 0.28, "grad_norm": 1.4184967784167302, "learning_rate": 8.503625452579091e-07, "loss": 0.1804, "step": 4317 }, { "epoch": 0.28, "grad_norm": 1.2020954289720547, "learning_rate": 8.502888589284346e-07, "loss": 0.1918, "step": 4318 }, { "epoch": 0.28, "grad_norm": 0.9166679015975862, "learning_rate": 8.502151576549332e-07, "loss": 0.0869, "step": 4319 }, { "epoch": 0.28, "grad_norm": 0.7368593006208842, "learning_rate": 8.50141441440549e-07, "loss": 0.0869, "step": 4320 }, { "epoch": 0.28, "grad_norm": 0.9989025002647027, "learning_rate": 8.500677102884273e-07, "loss": 0.0395, "step": 4321 }, { "epoch": 0.28, "grad_norm": 0.8430282057065523, "learning_rate": 8.499939642017131e-07, "loss": 0.3408, "step": 4322 }, { "epoch": 0.28, "grad_norm": 0.4278914053106401, "learning_rate": 8.49920203183553e-07, "loss": 0.2442, "step": 4323 }, { "epoch": 0.28, "grad_norm": 0.8225644744674708, "learning_rate": 8.498464272370936e-07, "loss": 0.3567, "step": 4324 }, { "epoch": 0.28, "grad_norm": 2.8640436344809874, "learning_rate": 8.497726363654823e-07, "loss": 0.1426, "step": 4325 }, { "epoch": 0.28, "grad_norm": 1.3736799735416585, "learning_rate": 8.496988305718671e-07, "loss": 0.2412, "step": 4326 }, { "epoch": 0.28, "grad_norm": 0.7029669234188077, "learning_rate": 8.496250098593968e-07, "loss": 0.2283, "step": 4327 }, { "epoch": 0.28, "grad_norm": 1.4224616227847302, "learning_rate": 8.495511742312209e-07, "loss": 0.439, "step": 4328 }, { "epoch": 0.28, "grad_norm": 0.585775683575167, "learning_rate": 8.494773236904891e-07, "loss": 0.2986, "step": 4329 }, { "epoch": 0.28, "grad_norm": 0.6835802848282123, "learning_rate": 8.494034582403522e-07, "loss": 0.2607, "step": 4330 }, { "epoch": 0.28, "grad_norm": 0.2957920462701642, "learning_rate": 8.493295778839614e-07, "loss": 0.0239, "step": 4331 }, { "epoch": 0.28, "grad_norm": 0.5438956127282539, "learning_rate": 8.492556826244686e-07, "loss": 0.0947, "step": 4332 }, { "epoch": 0.28, "grad_norm": 0.9298790936968806, "learning_rate": 8.491817724650261e-07, "loss": 0.2266, "step": 4333 }, { "epoch": 0.28, "grad_norm": 0.6943324528089971, "learning_rate": 8.491078474087873e-07, "loss": 0.4088, "step": 4334 }, { "epoch": 0.28, "grad_norm": 0.32700302844867757, "learning_rate": 8.49033907458906e-07, "loss": 0.4245, "step": 4335 }, { "epoch": 0.28, "grad_norm": 1.598715524527703, "learning_rate": 8.489599526185366e-07, "loss": 0.2194, "step": 4336 }, { "epoch": 0.28, "grad_norm": 1.7196597826352433, "learning_rate": 8.488859828908341e-07, "loss": 0.291, "step": 4337 }, { "epoch": 0.28, "grad_norm": 5.118949806060416, "learning_rate": 8.488119982789541e-07, "loss": 0.1923, "step": 4338 }, { "epoch": 0.28, "grad_norm": 0.5843986075098508, "learning_rate": 8.487379987860533e-07, "loss": 0.0867, "step": 4339 }, { "epoch": 0.28, "grad_norm": 0.9454531375694244, "learning_rate": 8.486639844152881e-07, "loss": 0.3082, "step": 4340 }, { "epoch": 0.28, "grad_norm": 0.9987469523016238, "learning_rate": 8.485899551698166e-07, "loss": 0.3858, "step": 4341 }, { "epoch": 0.28, "grad_norm": 0.40715029117676954, "learning_rate": 8.485159110527969e-07, "loss": 0.1751, "step": 4342 }, { "epoch": 0.28, "grad_norm": 0.499792871528072, "learning_rate": 8.484418520673878e-07, "loss": 0.0946, "step": 4343 }, { "epoch": 0.28, "grad_norm": 2.262308483993313, "learning_rate": 8.483677782167489e-07, "loss": 0.1008, "step": 4344 }, { "epoch": 0.28, "grad_norm": 0.4171213140551655, "learning_rate": 8.482936895040402e-07, "loss": 0.1927, "step": 4345 }, { "epoch": 0.28, "grad_norm": 0.5700755835615721, "learning_rate": 8.482195859324225e-07, "loss": 0.3416, "step": 4346 }, { "epoch": 0.28, "grad_norm": 0.525540193469076, "learning_rate": 8.481454675050572e-07, "loss": 0.0943, "step": 4347 }, { "epoch": 0.28, "grad_norm": 1.7161781408228356, "learning_rate": 8.480713342251065e-07, "loss": 0.1144, "step": 4348 }, { "epoch": 0.28, "grad_norm": 0.48178956519723415, "learning_rate": 8.47997186095733e-07, "loss": 0.0822, "step": 4349 }, { "epoch": 0.28, "grad_norm": 0.4131590219410887, "learning_rate": 8.479230231201e-07, "loss": 0.1703, "step": 4350 }, { "epoch": 0.28, "grad_norm": 0.4610693527349393, "learning_rate": 8.478488453013713e-07, "loss": 0.0302, "step": 4351 }, { "epoch": 0.28, "grad_norm": 0.572453851246035, "learning_rate": 8.477746526427117e-07, "loss": 0.2856, "step": 4352 }, { "epoch": 0.28, "grad_norm": 1.0447256574844934, "learning_rate": 8.477004451472862e-07, "loss": 0.1349, "step": 4353 }, { "epoch": 0.28, "grad_norm": 1.1982465611298891, "learning_rate": 8.476262228182608e-07, "loss": 0.3739, "step": 4354 }, { "epoch": 0.28, "grad_norm": 0.524392966559799, "learning_rate": 8.475519856588019e-07, "loss": 0.1272, "step": 4355 }, { "epoch": 0.28, "grad_norm": 0.40241971349987826, "learning_rate": 8.474777336720766e-07, "loss": 0.1078, "step": 4356 }, { "epoch": 0.28, "grad_norm": 0.6715665943043577, "learning_rate": 8.474034668612528e-07, "loss": 0.3504, "step": 4357 }, { "epoch": 0.28, "grad_norm": 1.0836109662832707, "learning_rate": 8.473291852294986e-07, "loss": 0.0272, "step": 4358 }, { "epoch": 0.28, "grad_norm": 0.2938766956526506, "learning_rate": 8.472548887799833e-07, "loss": 0.0779, "step": 4359 }, { "epoch": 0.28, "grad_norm": 0.816943845780774, "learning_rate": 8.471805775158762e-07, "loss": 0.2372, "step": 4360 }, { "epoch": 0.28, "grad_norm": 4.75841659991107, "learning_rate": 8.471062514403478e-07, "loss": 0.1443, "step": 4361 }, { "epoch": 0.28, "grad_norm": 0.47206538795515435, "learning_rate": 8.470319105565689e-07, "loss": 0.2239, "step": 4362 }, { "epoch": 0.28, "grad_norm": 1.411996083151186, "learning_rate": 8.469575548677111e-07, "loss": 0.2346, "step": 4363 }, { "epoch": 0.28, "grad_norm": 1.9391920864355068, "learning_rate": 8.468831843769466e-07, "loss": 0.0221, "step": 4364 }, { "epoch": 0.28, "grad_norm": 0.49146848990345166, "learning_rate": 8.468087990874479e-07, "loss": 0.175, "step": 4365 }, { "epoch": 0.28, "grad_norm": 0.5375892507560971, "learning_rate": 8.46734399002389e-07, "loss": 0.0121, "step": 4366 }, { "epoch": 0.28, "grad_norm": 1.0964068343453515, "learning_rate": 8.466599841249435e-07, "loss": 0.3299, "step": 4367 }, { "epoch": 0.28, "grad_norm": 0.5268867637463387, "learning_rate": 8.465855544582861e-07, "loss": 0.0925, "step": 4368 }, { "epoch": 0.28, "grad_norm": 0.5820936578181061, "learning_rate": 8.465111100055922e-07, "loss": 0.2211, "step": 4369 }, { "epoch": 0.28, "grad_norm": 0.45649807209567056, "learning_rate": 8.46436650770038e-07, "loss": 0.1642, "step": 4370 }, { "epoch": 0.28, "grad_norm": 1.3095959954200143, "learning_rate": 8.463621767547997e-07, "loss": 0.0989, "step": 4371 }, { "epoch": 0.28, "grad_norm": 3.032850155819076, "learning_rate": 8.462876879630547e-07, "loss": 0.0369, "step": 4372 }, { "epoch": 0.28, "grad_norm": 0.7607701314968326, "learning_rate": 8.462131843979808e-07, "loss": 0.1293, "step": 4373 }, { "epoch": 0.28, "grad_norm": 0.47250294306162127, "learning_rate": 8.461386660627563e-07, "loss": 0.0511, "step": 4374 }, { "epoch": 0.28, "grad_norm": 0.8788969238131757, "learning_rate": 8.460641329605607e-07, "loss": 0.2171, "step": 4375 }, { "epoch": 0.28, "grad_norm": 1.72297044819535, "learning_rate": 8.459895850945735e-07, "loss": 0.2224, "step": 4376 }, { "epoch": 0.28, "grad_norm": 1.4394715100207545, "learning_rate": 8.45915022467975e-07, "loss": 0.1797, "step": 4377 }, { "epoch": 0.28, "grad_norm": 0.3353120173309568, "learning_rate": 8.458404450839462e-07, "loss": 0.1442, "step": 4378 }, { "epoch": 0.28, "grad_norm": 0.6724250722787823, "learning_rate": 8.457658529456689e-07, "loss": 0.2112, "step": 4379 }, { "epoch": 0.28, "grad_norm": 0.8829546336921816, "learning_rate": 8.456912460563253e-07, "loss": 0.1709, "step": 4380 }, { "epoch": 0.28, "grad_norm": 0.6053766450016911, "learning_rate": 8.456166244190981e-07, "loss": 0.2125, "step": 4381 }, { "epoch": 0.28, "grad_norm": 1.1657277703372586, "learning_rate": 8.455419880371709e-07, "loss": 0.2216, "step": 4382 }, { "epoch": 0.28, "grad_norm": 0.5844690456974323, "learning_rate": 8.45467336913728e-07, "loss": 0.152, "step": 4383 }, { "epoch": 0.28, "grad_norm": 0.6908481820040442, "learning_rate": 8.453926710519539e-07, "loss": 0.1361, "step": 4384 }, { "epoch": 0.28, "grad_norm": 0.45749695868996737, "learning_rate": 8.453179904550343e-07, "loss": 0.0473, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.3220879391832774, "learning_rate": 8.452432951261548e-07, "loss": 0.2768, "step": 4386 }, { "epoch": 0.28, "grad_norm": 0.7971791292876084, "learning_rate": 8.451685850685025e-07, "loss": 0.0975, "step": 4387 }, { "epoch": 0.28, "grad_norm": 0.8164272301833383, "learning_rate": 8.450938602852644e-07, "loss": 0.1351, "step": 4388 }, { "epoch": 0.28, "grad_norm": 5.015208488226374, "learning_rate": 8.450191207796285e-07, "loss": 0.2406, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.0417216634065258, "learning_rate": 8.449443665547833e-07, "loss": 0.2907, "step": 4390 }, { "epoch": 0.28, "grad_norm": 0.38297480315899257, "learning_rate": 8.44869597613918e-07, "loss": 0.2583, "step": 4391 }, { "epoch": 0.28, "grad_norm": 1.8137261665206976, "learning_rate": 8.447948139602225e-07, "loss": 0.1434, "step": 4392 }, { "epoch": 0.28, "grad_norm": 0.8724312544785039, "learning_rate": 8.44720015596887e-07, "loss": 0.118, "step": 4393 }, { "epoch": 0.28, "grad_norm": 1.307410897826849, "learning_rate": 8.446452025271027e-07, "loss": 0.1632, "step": 4394 }, { "epoch": 0.28, "grad_norm": 0.17111584161678214, "learning_rate": 8.445703747540613e-07, "loss": 0.0935, "step": 4395 }, { "epoch": 0.28, "grad_norm": 1.286088804021365, "learning_rate": 8.444955322809548e-07, "loss": 0.212, "step": 4396 }, { "epoch": 0.28, "grad_norm": 0.9218959587526094, "learning_rate": 8.444206751109766e-07, "loss": 0.2434, "step": 4397 }, { "epoch": 0.28, "grad_norm": 0.608305385898315, "learning_rate": 8.443458032473201e-07, "loss": 0.2542, "step": 4398 }, { "epoch": 0.28, "grad_norm": 0.7082549088828657, "learning_rate": 8.442709166931792e-07, "loss": 0.0726, "step": 4399 }, { "epoch": 0.28, "grad_norm": 0.6564970763572002, "learning_rate": 8.44196015451749e-07, "loss": 0.0889, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.5232565841103223, "learning_rate": 8.441210995262249e-07, "loss": 0.1873, "step": 4401 }, { "epoch": 0.28, "grad_norm": 0.6616604078127134, "learning_rate": 8.440461689198029e-07, "loss": 0.2932, "step": 4402 }, { "epoch": 0.28, "grad_norm": 0.7353454228622197, "learning_rate": 8.439712236356797e-07, "loss": 0.224, "step": 4403 }, { "epoch": 0.28, "grad_norm": 1.0530861035964485, "learning_rate": 8.438962636770526e-07, "loss": 0.2295, "step": 4404 }, { "epoch": 0.28, "grad_norm": 1.2168156362266176, "learning_rate": 8.438212890471197e-07, "loss": 0.3593, "step": 4405 }, { "epoch": 0.28, "grad_norm": 0.48474040718209976, "learning_rate": 8.437462997490793e-07, "loss": 0.006, "step": 4406 }, { "epoch": 0.28, "grad_norm": 1.7252387637225124, "learning_rate": 8.436712957861308e-07, "loss": 0.0512, "step": 4407 }, { "epoch": 0.28, "grad_norm": 0.5362488869203919, "learning_rate": 8.435962771614739e-07, "loss": 0.1622, "step": 4408 }, { "epoch": 0.28, "grad_norm": 0.7438715173511999, "learning_rate": 8.435212438783091e-07, "loss": 0.269, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.120037485777825, "learning_rate": 8.434461959398376e-07, "loss": 0.1691, "step": 4410 }, { "epoch": 0.28, "grad_norm": 0.47257631506788905, "learning_rate": 8.433711333492608e-07, "loss": 0.2809, "step": 4411 }, { "epoch": 0.28, "grad_norm": 8.824371668720856, "learning_rate": 8.432960561097814e-07, "loss": 0.3591, "step": 4412 }, { "epoch": 0.28, "grad_norm": 0.6413136399100594, "learning_rate": 8.43220964224602e-07, "loss": 0.1596, "step": 4413 }, { "epoch": 0.28, "grad_norm": 0.30765493077013994, "learning_rate": 8.431458576969263e-07, "loss": 0.1539, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.817059073970843, "learning_rate": 8.430707365299585e-07, "loss": 0.3303, "step": 4415 }, { "epoch": 0.28, "grad_norm": 1.219533996366765, "learning_rate": 8.429956007269035e-07, "loss": 0.0526, "step": 4416 }, { "epoch": 0.28, "grad_norm": 0.9711890001749441, "learning_rate": 8.429204502909666e-07, "loss": 0.2074, "step": 4417 }, { "epoch": 0.28, "grad_norm": 0.5516315541274354, "learning_rate": 8.428452852253538e-07, "loss": 0.1186, "step": 4418 }, { "epoch": 0.28, "grad_norm": 1.382794531359862, "learning_rate": 8.427701055332721e-07, "loss": 0.1941, "step": 4419 }, { "epoch": 0.28, "grad_norm": 1.3404852144539467, "learning_rate": 8.426949112179287e-07, "loss": 0.1036, "step": 4420 }, { "epoch": 0.28, "grad_norm": 0.6916350369653512, "learning_rate": 8.426197022825313e-07, "loss": 0.1553, "step": 4421 }, { "epoch": 0.28, "grad_norm": 0.8270012871780776, "learning_rate": 8.425444787302887e-07, "loss": 0.1212, "step": 4422 }, { "epoch": 0.28, "grad_norm": 1.246385309263991, "learning_rate": 8.4246924056441e-07, "loss": 0.2484, "step": 4423 }, { "epoch": 0.28, "grad_norm": 0.673906575969224, "learning_rate": 8.423939877881053e-07, "loss": 0.1861, "step": 4424 }, { "epoch": 0.28, "grad_norm": 0.7879070177283408, "learning_rate": 8.423187204045845e-07, "loss": 0.339, "step": 4425 }, { "epoch": 0.28, "grad_norm": 0.348886836199494, "learning_rate": 8.422434384170591e-07, "loss": 0.1119, "step": 4426 }, { "epoch": 0.28, "grad_norm": 0.6745275690026145, "learning_rate": 8.421681418287405e-07, "loss": 0.3574, "step": 4427 }, { "epoch": 0.28, "grad_norm": 0.43383469870148433, "learning_rate": 8.420928306428412e-07, "loss": 0.123, "step": 4428 }, { "epoch": 0.28, "grad_norm": 0.7763078183179507, "learning_rate": 8.420175048625742e-07, "loss": 0.1554, "step": 4429 }, { "epoch": 0.28, "grad_norm": 0.7748784863518428, "learning_rate": 8.419421644911526e-07, "loss": 0.0832, "step": 4430 }, { "epoch": 0.28, "grad_norm": 0.6505815947911184, "learning_rate": 8.418668095317911e-07, "loss": 0.248, "step": 4431 }, { "epoch": 0.28, "grad_norm": 0.5520398041901158, "learning_rate": 8.417914399877042e-07, "loss": 0.1497, "step": 4432 }, { "epoch": 0.28, "grad_norm": 1.0835156592478004, "learning_rate": 8.417160558621073e-07, "loss": 0.3796, "step": 4433 }, { "epoch": 0.28, "grad_norm": 0.5971967168656509, "learning_rate": 8.416406571582167e-07, "loss": 0.242, "step": 4434 }, { "epoch": 0.28, "grad_norm": 0.916643175209122, "learning_rate": 8.415652438792486e-07, "loss": 0.3095, "step": 4435 }, { "epoch": 0.28, "grad_norm": 0.775772313594287, "learning_rate": 8.414898160284207e-07, "loss": 0.1477, "step": 4436 }, { "epoch": 0.28, "grad_norm": 0.9152256704284947, "learning_rate": 8.414143736089509e-07, "loss": 0.0795, "step": 4437 }, { "epoch": 0.28, "grad_norm": 1.0285415379650333, "learning_rate": 8.413389166240574e-07, "loss": 0.3897, "step": 4438 }, { "epoch": 0.28, "grad_norm": 0.6570578741704823, "learning_rate": 8.412634450769598e-07, "loss": 0.181, "step": 4439 }, { "epoch": 0.28, "grad_norm": 1.2676443027375748, "learning_rate": 8.411879589708775e-07, "loss": 0.1724, "step": 4440 }, { "epoch": 0.28, "grad_norm": 0.7798554241070158, "learning_rate": 8.411124583090307e-07, "loss": 0.0496, "step": 4441 }, { "epoch": 0.28, "grad_norm": 0.5629140076740263, "learning_rate": 8.410369430946412e-07, "loss": 0.2308, "step": 4442 }, { "epoch": 0.28, "grad_norm": 0.780273770448044, "learning_rate": 8.409614133309297e-07, "loss": 0.2241, "step": 4443 }, { "epoch": 0.28, "grad_norm": 0.5670293097966727, "learning_rate": 8.408858690211191e-07, "loss": 0.3358, "step": 4444 }, { "epoch": 0.28, "grad_norm": 0.5042621290216677, "learning_rate": 8.408103101684321e-07, "loss": 0.142, "step": 4445 }, { "epoch": 0.28, "grad_norm": 0.2387795036670779, "learning_rate": 8.40734736776092e-07, "loss": 0.0405, "step": 4446 }, { "epoch": 0.28, "grad_norm": 0.7124827909652498, "learning_rate": 8.406591488473232e-07, "loss": 0.2548, "step": 4447 }, { "epoch": 0.28, "grad_norm": 0.5194044697111061, "learning_rate": 8.405835463853502e-07, "loss": 0.1652, "step": 4448 }, { "epoch": 0.28, "grad_norm": 0.5701851869266625, "learning_rate": 8.405079293933985e-07, "loss": 0.0565, "step": 4449 }, { "epoch": 0.28, "grad_norm": 1.0910496102773226, "learning_rate": 8.404322978746941e-07, "loss": 0.4179, "step": 4450 }, { "epoch": 0.28, "grad_norm": 0.6895464900298918, "learning_rate": 8.403566518324634e-07, "loss": 0.1947, "step": 4451 }, { "epoch": 0.28, "grad_norm": 0.6471583129533248, "learning_rate": 8.402809912699338e-07, "loss": 0.3407, "step": 4452 }, { "epoch": 0.28, "grad_norm": 2.0732438394333603, "learning_rate": 8.402053161903331e-07, "loss": 0.2065, "step": 4453 }, { "epoch": 0.28, "grad_norm": 0.47199993676472796, "learning_rate": 8.401296265968896e-07, "loss": 0.0839, "step": 4454 }, { "epoch": 0.28, "grad_norm": 0.6270475259464904, "learning_rate": 8.400539224928325e-07, "loss": 0.2564, "step": 4455 }, { "epoch": 0.28, "grad_norm": 1.084180740135046, "learning_rate": 8.399782038813916e-07, "loss": 0.1497, "step": 4456 }, { "epoch": 0.28, "grad_norm": 0.5701749049464911, "learning_rate": 8.399024707657969e-07, "loss": 0.1572, "step": 4457 }, { "epoch": 0.28, "grad_norm": 0.6142287732929258, "learning_rate": 8.398267231492797e-07, "loss": 0.0791, "step": 4458 }, { "epoch": 0.28, "grad_norm": 0.42223800371297737, "learning_rate": 8.397509610350712e-07, "loss": 0.1058, "step": 4459 }, { "epoch": 0.28, "grad_norm": 0.18619950043951156, "learning_rate": 8.396751844264038e-07, "loss": 0.002, "step": 4460 }, { "epoch": 0.28, "grad_norm": 1.0950956996937822, "learning_rate": 8.395993933265101e-07, "loss": 0.295, "step": 4461 }, { "epoch": 0.28, "grad_norm": 1.1258189986203382, "learning_rate": 8.395235877386236e-07, "loss": 0.1735, "step": 4462 }, { "epoch": 0.28, "grad_norm": 0.6911988427258985, "learning_rate": 8.394477676659784e-07, "loss": 0.2315, "step": 4463 }, { "epoch": 0.28, "grad_norm": 0.8369721166034685, "learning_rate": 8.39371933111809e-07, "loss": 0.4423, "step": 4464 }, { "epoch": 0.28, "grad_norm": 1.2261142755252696, "learning_rate": 8.392960840793508e-07, "loss": 0.346, "step": 4465 }, { "epoch": 0.28, "grad_norm": 0.6335384642094622, "learning_rate": 8.392202205718395e-07, "loss": 0.1489, "step": 4466 }, { "epoch": 0.28, "grad_norm": 0.5759397326677361, "learning_rate": 8.391443425925118e-07, "loss": 0.1198, "step": 4467 }, { "epoch": 0.28, "grad_norm": 0.8138314939314969, "learning_rate": 8.390684501446046e-07, "loss": 0.3187, "step": 4468 }, { "epoch": 0.28, "grad_norm": 0.9922259606701328, "learning_rate": 8.389925432313556e-07, "loss": 0.2434, "step": 4469 }, { "epoch": 0.29, "grad_norm": 1.9439010131970127, "learning_rate": 8.389166218560033e-07, "loss": 0.281, "step": 4470 }, { "epoch": 0.29, "grad_norm": 0.928976400579361, "learning_rate": 8.388406860217867e-07, "loss": 0.1049, "step": 4471 }, { "epoch": 0.29, "grad_norm": 3.8344175767333075, "learning_rate": 8.387647357319452e-07, "loss": 0.1803, "step": 4472 }, { "epoch": 0.29, "grad_norm": 0.3053728221696224, "learning_rate": 8.38688770989719e-07, "loss": 0.2961, "step": 4473 }, { "epoch": 0.29, "grad_norm": 1.6094112650877064, "learning_rate": 8.386127917983492e-07, "loss": 0.2262, "step": 4474 }, { "epoch": 0.29, "grad_norm": 0.39069129879624453, "learning_rate": 8.385367981610769e-07, "loss": 0.0516, "step": 4475 }, { "epoch": 0.29, "grad_norm": 0.33031838581037737, "learning_rate": 8.384607900811441e-07, "loss": 0.1315, "step": 4476 }, { "epoch": 0.29, "grad_norm": 2.073025608080206, "learning_rate": 8.383847675617938e-07, "loss": 0.1077, "step": 4477 }, { "epoch": 0.29, "grad_norm": 0.47848041576443184, "learning_rate": 8.383087306062689e-07, "loss": 0.1378, "step": 4478 }, { "epoch": 0.29, "grad_norm": 3.092192500525181, "learning_rate": 8.382326792178136e-07, "loss": 0.1555, "step": 4479 }, { "epoch": 0.29, "grad_norm": 0.4073851225514589, "learning_rate": 8.381566133996722e-07, "loss": 0.1103, "step": 4480 }, { "epoch": 0.29, "grad_norm": 0.5885160317150906, "learning_rate": 8.3808053315509e-07, "loss": 0.1013, "step": 4481 }, { "epoch": 0.29, "grad_norm": 1.4601262119205156, "learning_rate": 8.380044384873126e-07, "loss": 0.0265, "step": 4482 }, { "epoch": 0.29, "grad_norm": 0.4609037119067469, "learning_rate": 8.379283293995862e-07, "loss": 0.1188, "step": 4483 }, { "epoch": 0.29, "grad_norm": 0.7100195224733987, "learning_rate": 8.378522058951582e-07, "loss": 0.1741, "step": 4484 }, { "epoch": 0.29, "grad_norm": 0.9272890709236896, "learning_rate": 8.377760679772759e-07, "loss": 0.2117, "step": 4485 }, { "epoch": 0.29, "grad_norm": 4.396616398343421, "learning_rate": 8.376999156491873e-07, "loss": 0.2864, "step": 4486 }, { "epoch": 0.29, "grad_norm": 0.9904552251003063, "learning_rate": 8.376237489141416e-07, "loss": 0.3021, "step": 4487 }, { "epoch": 0.29, "grad_norm": 0.668455169482055, "learning_rate": 8.37547567775388e-07, "loss": 0.4558, "step": 4488 }, { "epoch": 0.29, "grad_norm": 1.3726506649987562, "learning_rate": 8.374713722361766e-07, "loss": 0.3314, "step": 4489 }, { "epoch": 0.29, "grad_norm": 0.50682511573133, "learning_rate": 8.373951622997581e-07, "loss": 0.2216, "step": 4490 }, { "epoch": 0.29, "grad_norm": 0.6726497042068822, "learning_rate": 8.373189379693837e-07, "loss": 0.2344, "step": 4491 }, { "epoch": 0.29, "grad_norm": 1.609016184030008, "learning_rate": 8.372426992483054e-07, "loss": 0.3915, "step": 4492 }, { "epoch": 0.29, "grad_norm": 1.1723781811917382, "learning_rate": 8.371664461397756e-07, "loss": 0.3616, "step": 4493 }, { "epoch": 0.29, "grad_norm": 0.5127086883005437, "learning_rate": 8.370901786470474e-07, "loss": 0.1553, "step": 4494 }, { "epoch": 0.29, "grad_norm": 0.67868702046723, "learning_rate": 8.370138967733744e-07, "loss": 0.1686, "step": 4495 }, { "epoch": 0.29, "grad_norm": 1.0072948668837383, "learning_rate": 8.369376005220113e-07, "loss": 0.3462, "step": 4496 }, { "epoch": 0.29, "grad_norm": 1.9595075385103153, "learning_rate": 8.368612898962126e-07, "loss": 0.1108, "step": 4497 }, { "epoch": 0.29, "grad_norm": 3.602483397325656, "learning_rate": 8.367849648992342e-07, "loss": 0.0887, "step": 4498 }, { "epoch": 0.29, "grad_norm": 1.374804428447395, "learning_rate": 8.367086255343322e-07, "loss": 0.4079, "step": 4499 }, { "epoch": 0.29, "grad_norm": 0.874141071120875, "learning_rate": 8.366322718047634e-07, "loss": 0.299, "step": 4500 }, { "epoch": 0.29, "grad_norm": 1.2232745684819886, "learning_rate": 8.36555903713785e-07, "loss": 0.3907, "step": 4501 }, { "epoch": 0.29, "grad_norm": 0.9824008189289976, "learning_rate": 8.364795212646553e-07, "loss": 0.2109, "step": 4502 }, { "epoch": 0.29, "grad_norm": 1.2373988796750734, "learning_rate": 8.364031244606329e-07, "loss": 0.0894, "step": 4503 }, { "epoch": 0.29, "grad_norm": 1.2971308108811155, "learning_rate": 8.363267133049768e-07, "loss": 0.145, "step": 4504 }, { "epoch": 0.29, "grad_norm": 0.6923696073736749, "learning_rate": 8.362502878009472e-07, "loss": 0.1401, "step": 4505 }, { "epoch": 0.29, "grad_norm": 0.8011960752370959, "learning_rate": 8.361738479518041e-07, "loss": 0.2743, "step": 4506 }, { "epoch": 0.29, "grad_norm": 0.9141753106075413, "learning_rate": 8.360973937608091e-07, "loss": 0.2063, "step": 4507 }, { "epoch": 0.29, "grad_norm": 0.9111172786342697, "learning_rate": 8.360209252312236e-07, "loss": 0.3673, "step": 4508 }, { "epoch": 0.29, "grad_norm": 0.21164414406498477, "learning_rate": 8.359444423663099e-07, "loss": 0.1103, "step": 4509 }, { "epoch": 0.29, "grad_norm": 1.2468528092728297, "learning_rate": 8.358679451693309e-07, "loss": 0.1975, "step": 4510 }, { "epoch": 0.29, "grad_norm": 0.3175666792175712, "learning_rate": 8.357914336435503e-07, "loss": 0.1048, "step": 4511 }, { "epoch": 0.29, "grad_norm": 0.41181223836699016, "learning_rate": 8.357149077922321e-07, "loss": 0.003, "step": 4512 }, { "epoch": 0.29, "grad_norm": 1.0454166567368108, "learning_rate": 8.356383676186409e-07, "loss": 0.3953, "step": 4513 }, { "epoch": 0.29, "grad_norm": 0.6133492766439936, "learning_rate": 8.355618131260424e-07, "loss": 0.1993, "step": 4514 }, { "epoch": 0.29, "grad_norm": 0.5328039269781779, "learning_rate": 8.354852443177023e-07, "loss": 0.0975, "step": 4515 }, { "epoch": 0.29, "grad_norm": 0.5075796705113474, "learning_rate": 8.354086611968871e-07, "loss": 0.1081, "step": 4516 }, { "epoch": 0.29, "grad_norm": 0.6732698907190383, "learning_rate": 8.353320637668644e-07, "loss": 0.1204, "step": 4517 }, { "epoch": 0.29, "grad_norm": 0.8895655181978893, "learning_rate": 8.352554520309017e-07, "loss": 0.2238, "step": 4518 }, { "epoch": 0.29, "grad_norm": 0.36560431686336975, "learning_rate": 8.351788259922676e-07, "loss": 0.1056, "step": 4519 }, { "epoch": 0.29, "grad_norm": 0.565130468908776, "learning_rate": 8.351021856542308e-07, "loss": 0.2859, "step": 4520 }, { "epoch": 0.29, "grad_norm": 0.7907019642042231, "learning_rate": 8.350255310200611e-07, "loss": 0.1872, "step": 4521 }, { "epoch": 0.29, "grad_norm": 0.9747517445554856, "learning_rate": 8.349488620930289e-07, "loss": 0.1934, "step": 4522 }, { "epoch": 0.29, "grad_norm": 0.9964218880977478, "learning_rate": 8.348721788764048e-07, "loss": 0.4043, "step": 4523 }, { "epoch": 0.29, "grad_norm": 3.487042549188682, "learning_rate": 8.347954813734605e-07, "loss": 0.1708, "step": 4524 }, { "epoch": 0.29, "grad_norm": 0.8260511257866386, "learning_rate": 8.347187695874678e-07, "loss": 0.0331, "step": 4525 }, { "epoch": 0.29, "grad_norm": 2.2344444427781944, "learning_rate": 8.346420435216996e-07, "loss": 0.1984, "step": 4526 }, { "epoch": 0.29, "grad_norm": 3.441167080806813, "learning_rate": 8.34565303179429e-07, "loss": 0.239, "step": 4527 }, { "epoch": 0.29, "grad_norm": 1.164332167445458, "learning_rate": 8.344885485639302e-07, "loss": 0.1012, "step": 4528 }, { "epoch": 0.29, "grad_norm": 0.774718838597696, "learning_rate": 8.344117796784773e-07, "loss": 0.1094, "step": 4529 }, { "epoch": 0.29, "grad_norm": 0.5371450454458172, "learning_rate": 8.343349965263457e-07, "loss": 0.2501, "step": 4530 }, { "epoch": 0.29, "grad_norm": 0.5861537448207714, "learning_rate": 8.342581991108112e-07, "loss": 0.1648, "step": 4531 }, { "epoch": 0.29, "grad_norm": 0.7117029575482539, "learning_rate": 8.341813874351499e-07, "loss": 0.1814, "step": 4532 }, { "epoch": 0.29, "grad_norm": 0.49056028703580873, "learning_rate": 8.341045615026388e-07, "loss": 0.1694, "step": 4533 }, { "epoch": 0.29, "grad_norm": 0.48242432652565337, "learning_rate": 8.340277213165554e-07, "loss": 0.2004, "step": 4534 }, { "epoch": 0.29, "grad_norm": 0.922951045137883, "learning_rate": 8.33950866880178e-07, "loss": 0.1574, "step": 4535 }, { "epoch": 0.29, "grad_norm": 0.3845890214990481, "learning_rate": 8.338739981967853e-07, "loss": 0.2496, "step": 4536 }, { "epoch": 0.29, "grad_norm": 1.1293397170787918, "learning_rate": 8.337971152696565e-07, "loss": 0.2678, "step": 4537 }, { "epoch": 0.29, "grad_norm": 0.9948576701873844, "learning_rate": 8.33720218102072e-07, "loss": 0.1849, "step": 4538 }, { "epoch": 0.29, "grad_norm": 0.5148305889359297, "learning_rate": 8.336433066973121e-07, "loss": 0.1714, "step": 4539 }, { "epoch": 0.29, "grad_norm": 1.1373115981841952, "learning_rate": 8.33566381058658e-07, "loss": 0.1738, "step": 4540 }, { "epoch": 0.29, "grad_norm": 0.6299221472633558, "learning_rate": 8.334894411893913e-07, "loss": 0.4132, "step": 4541 }, { "epoch": 0.29, "grad_norm": 0.7694773051375428, "learning_rate": 8.33412487092795e-07, "loss": 0.189, "step": 4542 }, { "epoch": 0.29, "grad_norm": 0.9193780317310255, "learning_rate": 8.333355187721515e-07, "loss": 0.5219, "step": 4543 }, { "epoch": 0.29, "grad_norm": 0.3733278175440414, "learning_rate": 8.332585362307447e-07, "loss": 0.1146, "step": 4544 }, { "epoch": 0.29, "grad_norm": 0.39541841523059135, "learning_rate": 8.331815394718589e-07, "loss": 0.1744, "step": 4545 }, { "epoch": 0.29, "grad_norm": 0.5466051194717116, "learning_rate": 8.331045284987789e-07, "loss": 0.1114, "step": 4546 }, { "epoch": 0.29, "grad_norm": 0.5900345574222747, "learning_rate": 8.3302750331479e-07, "loss": 0.114, "step": 4547 }, { "epoch": 0.29, "grad_norm": 0.9652657532302261, "learning_rate": 8.329504639231783e-07, "loss": 0.5105, "step": 4548 }, { "epoch": 0.29, "grad_norm": 0.9475445901641059, "learning_rate": 8.328734103272306e-07, "loss": 0.3432, "step": 4549 }, { "epoch": 0.29, "grad_norm": 0.6756970679755269, "learning_rate": 8.32796342530234e-07, "loss": 0.3576, "step": 4550 }, { "epoch": 0.29, "grad_norm": 0.7712110044567839, "learning_rate": 8.327192605354765e-07, "loss": 0.278, "step": 4551 }, { "epoch": 0.29, "grad_norm": 0.9991316278556664, "learning_rate": 8.326421643462465e-07, "loss": 0.3683, "step": 4552 }, { "epoch": 0.29, "grad_norm": 0.4824304657385116, "learning_rate": 8.325650539658329e-07, "loss": 0.2923, "step": 4553 }, { "epoch": 0.29, "grad_norm": 0.6166678229650034, "learning_rate": 8.324879293975257e-07, "loss": 0.2142, "step": 4554 }, { "epoch": 0.29, "grad_norm": 2.9273594079410605, "learning_rate": 8.32410790644615e-07, "loss": 0.1816, "step": 4555 }, { "epoch": 0.29, "grad_norm": 1.2826245959745397, "learning_rate": 8.323336377103917e-07, "loss": 0.3449, "step": 4556 }, { "epoch": 0.29, "grad_norm": 0.6230622406872659, "learning_rate": 8.322564705981474e-07, "loss": 0.6153, "step": 4557 }, { "epoch": 0.29, "grad_norm": 0.3643263834950898, "learning_rate": 8.321792893111741e-07, "loss": 0.1063, "step": 4558 }, { "epoch": 0.29, "grad_norm": 1.0041656352019737, "learning_rate": 8.321020938527645e-07, "loss": 0.3541, "step": 4559 }, { "epoch": 0.29, "grad_norm": 0.875425035071312, "learning_rate": 8.320248842262121e-07, "loss": 0.2437, "step": 4560 }, { "epoch": 0.29, "grad_norm": 3.158182413474993, "learning_rate": 8.319476604348106e-07, "loss": 0.0853, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.1630444341751074, "learning_rate": 8.318704224818547e-07, "loss": 0.2336, "step": 4562 }, { "epoch": 0.29, "grad_norm": 0.741685929124548, "learning_rate": 8.317931703706393e-07, "loss": 0.2526, "step": 4563 }, { "epoch": 0.29, "grad_norm": 0.1489538592135694, "learning_rate": 8.317159041044604e-07, "loss": 0.0017, "step": 4564 }, { "epoch": 0.29, "grad_norm": 0.5622834596398911, "learning_rate": 8.316386236866142e-07, "loss": 0.2137, "step": 4565 }, { "epoch": 0.29, "grad_norm": 2.4250045625899355, "learning_rate": 8.315613291203976e-07, "loss": 0.1515, "step": 4566 }, { "epoch": 0.29, "grad_norm": 0.9054182827221595, "learning_rate": 8.314840204091082e-07, "loss": 0.3344, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.0175592120730144, "learning_rate": 8.314066975560441e-07, "loss": 0.3002, "step": 4568 }, { "epoch": 0.29, "grad_norm": 0.7709846955208957, "learning_rate": 8.31329360564504e-07, "loss": 0.2873, "step": 4569 }, { "epoch": 0.29, "grad_norm": 1.1544419692367198, "learning_rate": 8.312520094377872e-07, "loss": 0.0196, "step": 4570 }, { "epoch": 0.29, "grad_norm": 0.860307808512491, "learning_rate": 8.311746441791941e-07, "loss": 0.2551, "step": 4571 }, { "epoch": 0.29, "grad_norm": 0.36251735774144683, "learning_rate": 8.310972647920247e-07, "loss": 0.4366, "step": 4572 }, { "epoch": 0.29, "grad_norm": 4.193437641023994, "learning_rate": 8.310198712795806e-07, "loss": 0.1652, "step": 4573 }, { "epoch": 0.29, "grad_norm": 0.4583817126495332, "learning_rate": 8.309424636451632e-07, "loss": 0.0906, "step": 4574 }, { "epoch": 0.29, "grad_norm": 0.57855699259455, "learning_rate": 8.308650418920751e-07, "loss": 0.3078, "step": 4575 }, { "epoch": 0.29, "grad_norm": 0.6839847152348562, "learning_rate": 8.307876060236191e-07, "loss": 0.1316, "step": 4576 }, { "epoch": 0.29, "grad_norm": 0.7829264233824589, "learning_rate": 8.307101560430989e-07, "loss": 0.2309, "step": 4577 }, { "epoch": 0.29, "grad_norm": 1.2290653221772467, "learning_rate": 8.306326919538186e-07, "loss": 0.1449, "step": 4578 }, { "epoch": 0.29, "grad_norm": 0.7162220578243333, "learning_rate": 8.305552137590831e-07, "loss": 0.206, "step": 4579 }, { "epoch": 0.29, "grad_norm": 0.9666480360558779, "learning_rate": 8.304777214621976e-07, "loss": 0.1349, "step": 4580 }, { "epoch": 0.29, "grad_norm": 1.1651099365529307, "learning_rate": 8.304002150664682e-07, "loss": 0.2618, "step": 4581 }, { "epoch": 0.29, "grad_norm": 0.15035581837467768, "learning_rate": 8.303226945752014e-07, "loss": 0.0857, "step": 4582 }, { "epoch": 0.29, "grad_norm": 0.6239645041727994, "learning_rate": 8.302451599917044e-07, "loss": 0.3263, "step": 4583 }, { "epoch": 0.29, "grad_norm": 1.3917517143758265, "learning_rate": 8.301676113192852e-07, "loss": 0.28, "step": 4584 }, { "epoch": 0.29, "grad_norm": 0.3866381229855671, "learning_rate": 8.300900485612518e-07, "loss": 0.0685, "step": 4585 }, { "epoch": 0.29, "grad_norm": 0.4654222159644312, "learning_rate": 8.300124717209134e-07, "loss": 0.2056, "step": 4586 }, { "epoch": 0.29, "grad_norm": 0.6494826203489248, "learning_rate": 8.299348808015795e-07, "loss": 0.0846, "step": 4587 }, { "epoch": 0.29, "grad_norm": 0.8289824743122928, "learning_rate": 8.298572758065602e-07, "loss": 0.1841, "step": 4588 }, { "epoch": 0.29, "grad_norm": 0.35527394307717897, "learning_rate": 8.297796567391668e-07, "loss": 0.1913, "step": 4589 }, { "epoch": 0.29, "grad_norm": 0.6139715393711419, "learning_rate": 8.297020236027101e-07, "loss": 0.2243, "step": 4590 }, { "epoch": 0.29, "grad_norm": 1.037744256135214, "learning_rate": 8.296243764005022e-07, "loss": 0.0647, "step": 4591 }, { "epoch": 0.29, "grad_norm": 0.40674151977736805, "learning_rate": 8.295467151358559e-07, "loss": 0.098, "step": 4592 }, { "epoch": 0.29, "grad_norm": 2.3260904276436847, "learning_rate": 8.294690398120842e-07, "loss": 0.1931, "step": 4593 }, { "epoch": 0.29, "grad_norm": 1.3688959676407841, "learning_rate": 8.29391350432501e-07, "loss": 0.1907, "step": 4594 }, { "epoch": 0.29, "grad_norm": 0.7784545250374711, "learning_rate": 8.293136470004206e-07, "loss": 0.2911, "step": 4595 }, { "epoch": 0.29, "grad_norm": 0.7557252652780297, "learning_rate": 8.29235929519158e-07, "loss": 0.1921, "step": 4596 }, { "epoch": 0.29, "grad_norm": 0.703423784255771, "learning_rate": 8.291581979920288e-07, "loss": 0.3141, "step": 4597 }, { "epoch": 0.29, "grad_norm": 0.5752349269239349, "learning_rate": 8.290804524223491e-07, "loss": 0.0607, "step": 4598 }, { "epoch": 0.29, "grad_norm": 0.9029668416968109, "learning_rate": 8.29002692813436e-07, "loss": 0.1323, "step": 4599 }, { "epoch": 0.29, "grad_norm": 2.2662017446158336, "learning_rate": 8.289249191686063e-07, "loss": 0.3032, "step": 4600 }, { "epoch": 0.29, "grad_norm": 0.7251310077502929, "learning_rate": 8.288471314911786e-07, "loss": 0.1822, "step": 4601 }, { "epoch": 0.29, "grad_norm": 0.11923768026772609, "learning_rate": 8.287693297844711e-07, "loss": 0.0059, "step": 4602 }, { "epoch": 0.29, "grad_norm": 0.6914129724847398, "learning_rate": 8.286915140518032e-07, "loss": 0.081, "step": 4603 }, { "epoch": 0.29, "grad_norm": 0.5500244242234917, "learning_rate": 8.286136842964944e-07, "loss": 0.3442, "step": 4604 }, { "epoch": 0.29, "grad_norm": 0.5948898502347938, "learning_rate": 8.285358405218654e-07, "loss": 0.204, "step": 4605 }, { "epoch": 0.29, "grad_norm": 0.5806936398897938, "learning_rate": 8.284579827312368e-07, "loss": 0.1383, "step": 4606 }, { "epoch": 0.29, "grad_norm": 0.2924043730778653, "learning_rate": 8.283801109279305e-07, "loss": 0.007, "step": 4607 }, { "epoch": 0.29, "grad_norm": 0.888021047655419, "learning_rate": 8.283022251152685e-07, "loss": 0.2459, "step": 4608 }, { "epoch": 0.29, "grad_norm": 0.4635528743666432, "learning_rate": 8.282243252965737e-07, "loss": 0.2149, "step": 4609 }, { "epoch": 0.29, "grad_norm": 1.7244105226734352, "learning_rate": 8.281464114751692e-07, "loss": 0.2014, "step": 4610 }, { "epoch": 0.29, "grad_norm": 2.4907907933620943, "learning_rate": 8.280684836543793e-07, "loss": 0.1493, "step": 4611 }, { "epoch": 0.29, "grad_norm": 1.6492772531144928, "learning_rate": 8.279905418375283e-07, "loss": 0.1192, "step": 4612 }, { "epoch": 0.29, "grad_norm": 1.1096932116011753, "learning_rate": 8.279125860279415e-07, "loss": 0.4534, "step": 4613 }, { "epoch": 0.29, "grad_norm": 0.5776216364701081, "learning_rate": 8.278346162289445e-07, "loss": 0.1065, "step": 4614 }, { "epoch": 0.29, "grad_norm": 0.7320969443695299, "learning_rate": 8.277566324438639e-07, "loss": 0.3739, "step": 4615 }, { "epoch": 0.29, "grad_norm": 1.5258340744849652, "learning_rate": 8.276786346760262e-07, "loss": 0.2819, "step": 4616 }, { "epoch": 0.29, "grad_norm": 2.405493453938572, "learning_rate": 8.276006229287594e-07, "loss": 0.0787, "step": 4617 }, { "epoch": 0.29, "grad_norm": 1.9396627536486883, "learning_rate": 8.275225972053917e-07, "loss": 0.0345, "step": 4618 }, { "epoch": 0.29, "grad_norm": 0.8877227374545172, "learning_rate": 8.274445575092512e-07, "loss": 0.0973, "step": 4619 }, { "epoch": 0.29, "grad_norm": 1.189231121656869, "learning_rate": 8.273665038436679e-07, "loss": 0.111, "step": 4620 }, { "epoch": 0.29, "grad_norm": 2.423798873505761, "learning_rate": 8.272884362119712e-07, "loss": 0.2148, "step": 4621 }, { "epoch": 0.29, "grad_norm": 2.554313924010514, "learning_rate": 8.272103546174921e-07, "loss": 0.0598, "step": 4622 }, { "epoch": 0.29, "grad_norm": 0.46607967107758846, "learning_rate": 8.271322590635615e-07, "loss": 0.1261, "step": 4623 }, { "epoch": 0.29, "grad_norm": 1.0127076897066063, "learning_rate": 8.27054149553511e-07, "loss": 0.2251, "step": 4624 }, { "epoch": 0.29, "grad_norm": 0.9804034283357304, "learning_rate": 8.269760260906731e-07, "loss": 0.1116, "step": 4625 }, { "epoch": 0.3, "grad_norm": 0.7108680094980235, "learning_rate": 8.268978886783805e-07, "loss": 0.311, "step": 4626 }, { "epoch": 0.3, "grad_norm": 0.47837300243419517, "learning_rate": 8.268197373199669e-07, "loss": 0.0157, "step": 4627 }, { "epoch": 0.3, "grad_norm": 0.8288377237677884, "learning_rate": 8.267415720187662e-07, "loss": 0.1729, "step": 4628 }, { "epoch": 0.3, "grad_norm": 1.7902554788474534, "learning_rate": 8.266633927781135e-07, "loss": 0.3295, "step": 4629 }, { "epoch": 0.3, "grad_norm": 1.550425026591602, "learning_rate": 8.265851996013436e-07, "loss": 0.0477, "step": 4630 }, { "epoch": 0.3, "grad_norm": 0.7396059757718584, "learning_rate": 8.265069924917924e-07, "loss": 0.0871, "step": 4631 }, { "epoch": 0.3, "grad_norm": 0.6696288100632679, "learning_rate": 8.264287714527969e-07, "loss": 0.0493, "step": 4632 }, { "epoch": 0.3, "grad_norm": 0.5719594532710218, "learning_rate": 8.263505364876937e-07, "loss": 0.1919, "step": 4633 }, { "epoch": 0.3, "grad_norm": 0.7864193915426383, "learning_rate": 8.262722875998204e-07, "loss": 0.1095, "step": 4634 }, { "epoch": 0.3, "grad_norm": 0.9594146892344215, "learning_rate": 8.261940247925154e-07, "loss": 0.1684, "step": 4635 }, { "epoch": 0.3, "grad_norm": 1.6935617213124876, "learning_rate": 8.261157480691178e-07, "loss": 0.3149, "step": 4636 }, { "epoch": 0.3, "grad_norm": 1.38494366850966, "learning_rate": 8.260374574329668e-07, "loss": 0.1426, "step": 4637 }, { "epoch": 0.3, "grad_norm": 0.7821993996292126, "learning_rate": 8.259591528874022e-07, "loss": 0.1792, "step": 4638 }, { "epoch": 0.3, "grad_norm": 0.5951570960976268, "learning_rate": 8.25880834435765e-07, "loss": 0.1402, "step": 4639 }, { "epoch": 0.3, "grad_norm": 1.3405780359458321, "learning_rate": 8.258025020813963e-07, "loss": 0.1172, "step": 4640 }, { "epoch": 0.3, "grad_norm": 0.6995719513746204, "learning_rate": 8.25724155827638e-07, "loss": 0.0942, "step": 4641 }, { "epoch": 0.3, "grad_norm": 1.2475965460830065, "learning_rate": 8.256457956778324e-07, "loss": 0.1576, "step": 4642 }, { "epoch": 0.3, "grad_norm": 0.6485662326946411, "learning_rate": 8.255674216353224e-07, "loss": 0.0878, "step": 4643 }, { "epoch": 0.3, "grad_norm": 0.9023323330171991, "learning_rate": 8.254890337034519e-07, "loss": 0.3953, "step": 4644 }, { "epoch": 0.3, "grad_norm": 1.1302258505314244, "learning_rate": 8.254106318855648e-07, "loss": 0.0984, "step": 4645 }, { "epoch": 0.3, "grad_norm": 0.3699190511740946, "learning_rate": 8.25332216185006e-07, "loss": 0.0037, "step": 4646 }, { "epoch": 0.3, "grad_norm": 0.336241396869838, "learning_rate": 8.252537866051208e-07, "loss": 0.3013, "step": 4647 }, { "epoch": 0.3, "grad_norm": 1.0047472333264649, "learning_rate": 8.251753431492553e-07, "loss": 0.0824, "step": 4648 }, { "epoch": 0.3, "grad_norm": 0.7740210959644076, "learning_rate": 8.250968858207559e-07, "loss": 0.1275, "step": 4649 }, { "epoch": 0.3, "grad_norm": 0.6199947318242983, "learning_rate": 8.2501841462297e-07, "loss": 0.1217, "step": 4650 }, { "epoch": 0.3, "grad_norm": 0.8882994311764234, "learning_rate": 8.24939929559245e-07, "loss": 0.1321, "step": 4651 }, { "epoch": 0.3, "grad_norm": 0.889286149368149, "learning_rate": 8.248614306329295e-07, "loss": 0.3605, "step": 4652 }, { "epoch": 0.3, "grad_norm": 2.649976919422674, "learning_rate": 8.247829178473722e-07, "loss": 0.145, "step": 4653 }, { "epoch": 0.3, "grad_norm": 0.9265333723529212, "learning_rate": 8.247043912059228e-07, "loss": 0.2944, "step": 4654 }, { "epoch": 0.3, "grad_norm": 0.2498562724746331, "learning_rate": 8.246258507119313e-07, "loss": 0.0102, "step": 4655 }, { "epoch": 0.3, "grad_norm": 2.876708751259279, "learning_rate": 8.245472963687484e-07, "loss": 0.0302, "step": 4656 }, { "epoch": 0.3, "grad_norm": 0.8298423753477335, "learning_rate": 8.244687281797254e-07, "loss": 0.4475, "step": 4657 }, { "epoch": 0.3, "grad_norm": 0.6697123832860922, "learning_rate": 8.243901461482143e-07, "loss": 0.096, "step": 4658 }, { "epoch": 0.3, "grad_norm": 0.49985156868772807, "learning_rate": 8.243115502775675e-07, "loss": 0.1561, "step": 4659 }, { "epoch": 0.3, "grad_norm": 0.8748840466630781, "learning_rate": 8.242329405711379e-07, "loss": 0.3036, "step": 4660 }, { "epoch": 0.3, "grad_norm": 0.4727218219758068, "learning_rate": 8.241543170322793e-07, "loss": 0.1853, "step": 4661 }, { "epoch": 0.3, "grad_norm": 0.6569204405804752, "learning_rate": 8.240756796643459e-07, "loss": 0.1419, "step": 4662 }, { "epoch": 0.3, "grad_norm": 0.7974699904017554, "learning_rate": 8.239970284706925e-07, "loss": 0.1973, "step": 4663 }, { "epoch": 0.3, "grad_norm": 2.2116589416155774, "learning_rate": 8.239183634546746e-07, "loss": 0.0917, "step": 4664 }, { "epoch": 0.3, "grad_norm": 1.9486569978203159, "learning_rate": 8.238396846196481e-07, "loss": 0.2162, "step": 4665 }, { "epoch": 0.3, "grad_norm": 0.585708348381636, "learning_rate": 8.237609919689696e-07, "loss": 0.2014, "step": 4666 }, { "epoch": 0.3, "grad_norm": 0.7825492050299566, "learning_rate": 8.236822855059965e-07, "loss": 0.1459, "step": 4667 }, { "epoch": 0.3, "grad_norm": 0.8328841281850551, "learning_rate": 8.236035652340864e-07, "loss": 0.4653, "step": 4668 }, { "epoch": 0.3, "grad_norm": 0.7004520417155415, "learning_rate": 8.235248311565977e-07, "loss": 0.0492, "step": 4669 }, { "epoch": 0.3, "grad_norm": 2.5439465978243794, "learning_rate": 8.234460832768893e-07, "loss": 0.266, "step": 4670 }, { "epoch": 0.3, "grad_norm": 0.40523210034565077, "learning_rate": 8.233673215983205e-07, "loss": 0.2039, "step": 4671 }, { "epoch": 0.3, "grad_norm": 0.8095650622236058, "learning_rate": 8.23288546124252e-07, "loss": 0.0136, "step": 4672 }, { "epoch": 0.3, "grad_norm": 0.9191328261543144, "learning_rate": 8.232097568580443e-07, "loss": 0.0245, "step": 4673 }, { "epoch": 0.3, "grad_norm": 1.0014710037084338, "learning_rate": 8.231309538030585e-07, "loss": 0.1874, "step": 4674 }, { "epoch": 0.3, "grad_norm": 0.8227465043781447, "learning_rate": 8.230521369626567e-07, "loss": 0.1479, "step": 4675 }, { "epoch": 0.3, "grad_norm": 0.8263908331466868, "learning_rate": 8.229733063402012e-07, "loss": 0.0944, "step": 4676 }, { "epoch": 0.3, "grad_norm": 1.2417722896625074, "learning_rate": 8.228944619390554e-07, "loss": 0.1235, "step": 4677 }, { "epoch": 0.3, "grad_norm": 0.5775048761976556, "learning_rate": 8.228156037625826e-07, "loss": 0.2096, "step": 4678 }, { "epoch": 0.3, "grad_norm": 0.14880514998075245, "learning_rate": 8.227367318141471e-07, "loss": 0.0576, "step": 4679 }, { "epoch": 0.3, "grad_norm": 0.8514689445420394, "learning_rate": 8.226578460971141e-07, "loss": 0.2681, "step": 4680 }, { "epoch": 0.3, "grad_norm": 0.353638450122576, "learning_rate": 8.225789466148487e-07, "loss": 0.0365, "step": 4681 }, { "epoch": 0.3, "grad_norm": 1.3520576242029907, "learning_rate": 8.225000333707169e-07, "loss": 0.2149, "step": 4682 }, { "epoch": 0.3, "grad_norm": 1.2241922239463783, "learning_rate": 8.224211063680852e-07, "loss": 0.0974, "step": 4683 }, { "epoch": 0.3, "grad_norm": 1.0618219535173103, "learning_rate": 8.223421656103212e-07, "loss": 0.1163, "step": 4684 }, { "epoch": 0.3, "grad_norm": 1.0534455178092248, "learning_rate": 8.222632111007924e-07, "loss": 0.1829, "step": 4685 }, { "epoch": 0.3, "grad_norm": 0.5360778566064571, "learning_rate": 8.221842428428672e-07, "loss": 0.3163, "step": 4686 }, { "epoch": 0.3, "grad_norm": 0.407809610791214, "learning_rate": 8.221052608399144e-07, "loss": 0.0037, "step": 4687 }, { "epoch": 0.3, "grad_norm": 0.5739234045881964, "learning_rate": 8.220262650953037e-07, "loss": 0.1646, "step": 4688 }, { "epoch": 0.3, "grad_norm": 0.4669324054169145, "learning_rate": 8.219472556124052e-07, "loss": 0.4065, "step": 4689 }, { "epoch": 0.3, "grad_norm": 0.5825268662027375, "learning_rate": 8.218682323945895e-07, "loss": 0.2036, "step": 4690 }, { "epoch": 0.3, "grad_norm": 4.4307318733168914, "learning_rate": 8.217891954452281e-07, "loss": 0.3523, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.0414104009912146, "learning_rate": 8.217101447676928e-07, "loss": 0.4357, "step": 4692 }, { "epoch": 0.3, "grad_norm": 0.35544067754905573, "learning_rate": 8.216310803653559e-07, "loss": 0.2828, "step": 4693 }, { "epoch": 0.3, "grad_norm": 0.760208699033786, "learning_rate": 8.215520022415905e-07, "loss": 0.2042, "step": 4694 }, { "epoch": 0.3, "grad_norm": 0.5296508099772066, "learning_rate": 8.214729103997704e-07, "loss": 0.2238, "step": 4695 }, { "epoch": 0.3, "grad_norm": 1.5131090269687375, "learning_rate": 8.213938048432696e-07, "loss": 0.152, "step": 4696 }, { "epoch": 0.3, "grad_norm": 2.266830854764602, "learning_rate": 8.213146855754632e-07, "loss": 0.1553, "step": 4697 }, { "epoch": 0.3, "grad_norm": 0.7225666498136933, "learning_rate": 8.212355525997261e-07, "loss": 0.2243, "step": 4698 }, { "epoch": 0.3, "grad_norm": 1.1794197600511782, "learning_rate": 8.211564059194347e-07, "loss": 0.0153, "step": 4699 }, { "epoch": 0.3, "grad_norm": 0.8186364889420014, "learning_rate": 8.210772455379656e-07, "loss": 0.1994, "step": 4700 }, { "epoch": 0.3, "grad_norm": 0.8741105299284755, "learning_rate": 8.209980714586955e-07, "loss": 0.3851, "step": 4701 }, { "epoch": 0.3, "grad_norm": 0.9295748485479789, "learning_rate": 8.209188836850024e-07, "loss": 0.145, "step": 4702 }, { "epoch": 0.3, "grad_norm": 0.8603070684898152, "learning_rate": 8.208396822202647e-07, "loss": 0.1078, "step": 4703 }, { "epoch": 0.3, "grad_norm": 2.1274263718944066, "learning_rate": 8.207604670678612e-07, "loss": 0.0527, "step": 4704 }, { "epoch": 0.3, "grad_norm": 0.4563964387411023, "learning_rate": 8.206812382311712e-07, "loss": 0.2161, "step": 4705 }, { "epoch": 0.3, "grad_norm": 0.7493729818410559, "learning_rate": 8.20601995713575e-07, "loss": 0.4807, "step": 4706 }, { "epoch": 0.3, "grad_norm": 0.8077790117145862, "learning_rate": 8.205227395184533e-07, "loss": 0.0051, "step": 4707 }, { "epoch": 0.3, "grad_norm": 2.3994268722811354, "learning_rate": 8.204434696491871e-07, "loss": 0.0114, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.9107950272394667, "learning_rate": 8.203641861091582e-07, "loss": 0.0118, "step": 4709 }, { "epoch": 0.3, "grad_norm": 0.7385816562045615, "learning_rate": 8.202848889017493e-07, "loss": 0.2511, "step": 4710 }, { "epoch": 0.3, "grad_norm": 0.14026937237341053, "learning_rate": 8.202055780303431e-07, "loss": 0.0637, "step": 4711 }, { "epoch": 0.3, "grad_norm": 1.0653851814224091, "learning_rate": 8.201262534983232e-07, "loss": 0.156, "step": 4712 }, { "epoch": 0.3, "grad_norm": 0.8707820745859838, "learning_rate": 8.200469153090739e-07, "loss": 0.202, "step": 4713 }, { "epoch": 0.3, "grad_norm": 5.559567352341435, "learning_rate": 8.199675634659798e-07, "loss": 0.2786, "step": 4714 }, { "epoch": 0.3, "grad_norm": 0.8831184985543606, "learning_rate": 8.198881979724262e-07, "loss": 0.023, "step": 4715 }, { "epoch": 0.3, "grad_norm": 0.7523378082412079, "learning_rate": 8.19808818831799e-07, "loss": 0.2363, "step": 4716 }, { "epoch": 0.3, "grad_norm": 1.2402894662560693, "learning_rate": 8.197294260474848e-07, "loss": 0.2434, "step": 4717 }, { "epoch": 0.3, "grad_norm": 1.9662177624727837, "learning_rate": 8.196500196228704e-07, "loss": 0.2969, "step": 4718 }, { "epoch": 0.3, "grad_norm": 1.8447923730615003, "learning_rate": 8.195705995613436e-07, "loss": 0.0784, "step": 4719 }, { "epoch": 0.3, "grad_norm": 1.1200294678811002, "learning_rate": 8.194911658662927e-07, "loss": 0.356, "step": 4720 }, { "epoch": 0.3, "grad_norm": 0.4122942864613039, "learning_rate": 8.194117185411062e-07, "loss": 0.1679, "step": 4721 }, { "epoch": 0.3, "grad_norm": 0.5700926088637928, "learning_rate": 8.193322575891739e-07, "loss": 0.3432, "step": 4722 }, { "epoch": 0.3, "grad_norm": 0.5156157924268693, "learning_rate": 8.192527830138856e-07, "loss": 0.0841, "step": 4723 }, { "epoch": 0.3, "grad_norm": 0.4370874859474639, "learning_rate": 8.191732948186316e-07, "loss": 0.239, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.093057648590071, "learning_rate": 8.190937930068033e-07, "loss": 0.2754, "step": 4725 }, { "epoch": 0.3, "grad_norm": 1.2555123969253632, "learning_rate": 8.190142775817923e-07, "loss": 0.3539, "step": 4726 }, { "epoch": 0.3, "grad_norm": 0.41472778104884717, "learning_rate": 8.189347485469911e-07, "loss": 0.1164, "step": 4727 }, { "epoch": 0.3, "grad_norm": 1.8913508137970223, "learning_rate": 8.188552059057923e-07, "loss": 0.071, "step": 4728 }, { "epoch": 0.3, "grad_norm": 1.278132125183237, "learning_rate": 8.187756496615895e-07, "loss": 0.17, "step": 4729 }, { "epoch": 0.3, "grad_norm": 0.8443359027948496, "learning_rate": 8.186960798177765e-07, "loss": 0.2811, "step": 4730 }, { "epoch": 0.3, "grad_norm": 0.5137199003709494, "learning_rate": 8.18616496377748e-07, "loss": 0.0148, "step": 4731 }, { "epoch": 0.3, "grad_norm": 0.848923482668164, "learning_rate": 8.185368993448993e-07, "loss": 0.0536, "step": 4732 }, { "epoch": 0.3, "grad_norm": 0.6488707373978129, "learning_rate": 8.184572887226263e-07, "loss": 0.0111, "step": 4733 }, { "epoch": 0.3, "grad_norm": 0.27362005094595204, "learning_rate": 8.183776645143252e-07, "loss": 0.2206, "step": 4734 }, { "epoch": 0.3, "grad_norm": 0.2135317880173803, "learning_rate": 8.182980267233927e-07, "loss": 0.016, "step": 4735 }, { "epoch": 0.3, "grad_norm": 3.5562540739822275, "learning_rate": 8.182183753532268e-07, "loss": 0.3809, "step": 4736 }, { "epoch": 0.3, "grad_norm": 0.4851006141667179, "learning_rate": 8.18138710407225e-07, "loss": 0.1858, "step": 4737 }, { "epoch": 0.3, "grad_norm": 0.8018425586726883, "learning_rate": 8.180590318887866e-07, "loss": 0.1847, "step": 4738 }, { "epoch": 0.3, "grad_norm": 0.3074670578761211, "learning_rate": 8.179793398013102e-07, "loss": 0.0349, "step": 4739 }, { "epoch": 0.3, "grad_norm": 1.998361311194355, "learning_rate": 8.178996341481961e-07, "loss": 0.1196, "step": 4740 }, { "epoch": 0.3, "grad_norm": 0.668081158103692, "learning_rate": 8.178199149328446e-07, "loss": 0.2594, "step": 4741 }, { "epoch": 0.3, "grad_norm": 0.5350107573420417, "learning_rate": 8.177401821586567e-07, "loss": 0.2036, "step": 4742 }, { "epoch": 0.3, "grad_norm": 0.3575336661143542, "learning_rate": 8.176604358290338e-07, "loss": 0.1067, "step": 4743 }, { "epoch": 0.3, "grad_norm": 0.34226669859146513, "learning_rate": 8.17580675947378e-07, "loss": 0.1925, "step": 4744 }, { "epoch": 0.3, "grad_norm": 0.5353285593550388, "learning_rate": 8.175009025170922e-07, "loss": 0.2514, "step": 4745 }, { "epoch": 0.3, "grad_norm": 2.4845438326924945, "learning_rate": 8.174211155415798e-07, "loss": 0.2897, "step": 4746 }, { "epoch": 0.3, "grad_norm": 1.6661578403739732, "learning_rate": 8.173413150242444e-07, "loss": 0.2717, "step": 4747 }, { "epoch": 0.3, "grad_norm": 0.46315836179789466, "learning_rate": 8.172615009684905e-07, "loss": 0.2408, "step": 4748 }, { "epoch": 0.3, "grad_norm": 0.8786415967748011, "learning_rate": 8.171816733777232e-07, "loss": 0.1703, "step": 4749 }, { "epoch": 0.3, "grad_norm": 0.3677112592552782, "learning_rate": 8.171018322553482e-07, "loss": 0.1777, "step": 4750 }, { "epoch": 0.3, "grad_norm": 2.531046619906957, "learning_rate": 8.170219776047715e-07, "loss": 0.3927, "step": 4751 }, { "epoch": 0.3, "grad_norm": 0.6553025686291939, "learning_rate": 8.169421094294e-07, "loss": 0.28, "step": 4752 }, { "epoch": 0.3, "grad_norm": 0.7449111866321586, "learning_rate": 8.168622277326409e-07, "loss": 0.308, "step": 4753 }, { "epoch": 0.3, "grad_norm": 0.49963409292098165, "learning_rate": 8.167823325179023e-07, "loss": 0.1949, "step": 4754 }, { "epoch": 0.3, "grad_norm": 1.4718151113525169, "learning_rate": 8.167024237885927e-07, "loss": 0.2263, "step": 4755 }, { "epoch": 0.3, "grad_norm": 0.45236383778448064, "learning_rate": 8.166225015481208e-07, "loss": 0.267, "step": 4756 }, { "epoch": 0.3, "grad_norm": 0.6146614210267652, "learning_rate": 8.165425657998966e-07, "loss": 0.1569, "step": 4757 }, { "epoch": 0.3, "grad_norm": 0.6358796438298233, "learning_rate": 8.164626165473302e-07, "loss": 0.243, "step": 4758 }, { "epoch": 0.3, "grad_norm": 0.720794840933295, "learning_rate": 8.163826537938323e-07, "loss": 0.3877, "step": 4759 }, { "epoch": 0.3, "grad_norm": 0.6339632423895353, "learning_rate": 8.163026775428146e-07, "loss": 0.1155, "step": 4760 }, { "epoch": 0.3, "grad_norm": 0.7540032251864254, "learning_rate": 8.162226877976886e-07, "loss": 0.1105, "step": 4761 }, { "epoch": 0.3, "grad_norm": 0.7409483817849162, "learning_rate": 8.161426845618671e-07, "loss": 0.1984, "step": 4762 }, { "epoch": 0.3, "grad_norm": 0.8535988151051185, "learning_rate": 8.160626678387632e-07, "loss": 0.2088, "step": 4763 }, { "epoch": 0.3, "grad_norm": 1.0317250470698331, "learning_rate": 8.159826376317906e-07, "loss": 0.2576, "step": 4764 }, { "epoch": 0.3, "grad_norm": 0.7235580007157351, "learning_rate": 8.159025939443634e-07, "loss": 0.2651, "step": 4765 }, { "epoch": 0.3, "grad_norm": 0.42120452784390594, "learning_rate": 8.158225367798966e-07, "loss": 0.2216, "step": 4766 }, { "epoch": 0.3, "grad_norm": 0.2914330147067489, "learning_rate": 8.157424661418054e-07, "loss": 0.0033, "step": 4767 }, { "epoch": 0.3, "grad_norm": 3.3571703923846106, "learning_rate": 8.156623820335058e-07, "loss": 0.3403, "step": 4768 }, { "epoch": 0.3, "grad_norm": 1.9458826621621397, "learning_rate": 8.155822844584145e-07, "loss": 0.2017, "step": 4769 }, { "epoch": 0.3, "grad_norm": 0.8301991987674295, "learning_rate": 8.155021734199486e-07, "loss": 0.0587, "step": 4770 }, { "epoch": 0.3, "grad_norm": 0.13018554076094138, "learning_rate": 8.154220489215256e-07, "loss": 0.0026, "step": 4771 }, { "epoch": 0.3, "grad_norm": 0.4448594031418288, "learning_rate": 8.153419109665641e-07, "loss": 0.1517, "step": 4772 }, { "epoch": 0.3, "grad_norm": 0.5194526639357139, "learning_rate": 8.152617595584825e-07, "loss": 0.1278, "step": 4773 }, { "epoch": 0.3, "grad_norm": 1.1607749301962398, "learning_rate": 8.151815947007007e-07, "loss": 0.4137, "step": 4774 }, { "epoch": 0.3, "grad_norm": 3.8357605391741285, "learning_rate": 8.151014163966384e-07, "loss": 0.0445, "step": 4775 }, { "epoch": 0.3, "grad_norm": 0.5136069215013145, "learning_rate": 8.150212246497164e-07, "loss": 0.099, "step": 4776 }, { "epoch": 0.3, "grad_norm": 1.9389764063044246, "learning_rate": 8.149410194633554e-07, "loss": 0.0236, "step": 4777 }, { "epoch": 0.3, "grad_norm": 0.23778314579438078, "learning_rate": 8.148608008409775e-07, "loss": 0.0924, "step": 4778 }, { "epoch": 0.3, "grad_norm": 0.7102426735803246, "learning_rate": 8.14780568786005e-07, "loss": 0.0872, "step": 4779 }, { "epoch": 0.3, "grad_norm": 1.6122280657389112, "learning_rate": 8.147003233018605e-07, "loss": 0.1284, "step": 4780 }, { "epoch": 0.3, "grad_norm": 0.5522734828408397, "learning_rate": 8.146200643919676e-07, "loss": 0.162, "step": 4781 }, { "epoch": 0.3, "grad_norm": 0.3747575208807832, "learning_rate": 8.145397920597505e-07, "loss": 0.0953, "step": 4782 }, { "epoch": 0.31, "grad_norm": 0.7088931277898864, "learning_rate": 8.144595063086335e-07, "loss": 0.0691, "step": 4783 }, { "epoch": 0.31, "grad_norm": 0.6193736346032409, "learning_rate": 8.143792071420417e-07, "loss": 0.0099, "step": 4784 }, { "epoch": 0.31, "grad_norm": 1.0455965018983127, "learning_rate": 8.142988945634009e-07, "loss": 0.1422, "step": 4785 }, { "epoch": 0.31, "grad_norm": 0.6153189687685371, "learning_rate": 8.142185685761375e-07, "loss": 0.1013, "step": 4786 }, { "epoch": 0.31, "grad_norm": 0.5786634886782436, "learning_rate": 8.141382291836783e-07, "loss": 0.1876, "step": 4787 }, { "epoch": 0.31, "grad_norm": 0.33442578786895083, "learning_rate": 8.140578763894508e-07, "loss": 0.0659, "step": 4788 }, { "epoch": 0.31, "grad_norm": 0.7147668461843721, "learning_rate": 8.139775101968829e-07, "loss": 0.164, "step": 4789 }, { "epoch": 0.31, "grad_norm": 4.151225999367275, "learning_rate": 8.138971306094033e-07, "loss": 0.3153, "step": 4790 }, { "epoch": 0.31, "grad_norm": 1.0706416998260024, "learning_rate": 8.13816737630441e-07, "loss": 0.0871, "step": 4791 }, { "epoch": 0.31, "grad_norm": 0.584298758890585, "learning_rate": 8.137363312634258e-07, "loss": 0.1895, "step": 4792 }, { "epoch": 0.31, "grad_norm": 0.6246688718089787, "learning_rate": 8.136559115117881e-07, "loss": 0.2332, "step": 4793 }, { "epoch": 0.31, "grad_norm": 1.2684598818504136, "learning_rate": 8.135754783789587e-07, "loss": 0.0355, "step": 4794 }, { "epoch": 0.31, "grad_norm": 1.5161168130043927, "learning_rate": 8.134950318683691e-07, "loss": 0.1363, "step": 4795 }, { "epoch": 0.31, "grad_norm": 0.645203944916089, "learning_rate": 8.134145719834511e-07, "loss": 0.1471, "step": 4796 }, { "epoch": 0.31, "grad_norm": 0.7895951644051908, "learning_rate": 8.133340987276375e-07, "loss": 0.2596, "step": 4797 }, { "epoch": 0.31, "grad_norm": 0.6105455471126823, "learning_rate": 8.132536121043613e-07, "loss": 0.1016, "step": 4798 }, { "epoch": 0.31, "grad_norm": 0.7463146149380439, "learning_rate": 8.131731121170563e-07, "loss": 0.4144, "step": 4799 }, { "epoch": 0.31, "grad_norm": 0.703424109933384, "learning_rate": 8.130925987691568e-07, "loss": 0.215, "step": 4800 }, { "epoch": 0.31, "grad_norm": 1.1396604059930846, "learning_rate": 8.130120720640976e-07, "loss": 0.3308, "step": 4801 }, { "epoch": 0.31, "grad_norm": 1.1795720050065013, "learning_rate": 8.129315320053143e-07, "loss": 0.1339, "step": 4802 }, { "epoch": 0.31, "grad_norm": 0.8063374908340881, "learning_rate": 8.128509785962427e-07, "loss": 0.2179, "step": 4803 }, { "epoch": 0.31, "grad_norm": 0.38986296951702754, "learning_rate": 8.127704118403194e-07, "loss": 0.2449, "step": 4804 }, { "epoch": 0.31, "grad_norm": 1.3440784561137122, "learning_rate": 8.126898317409816e-07, "loss": 0.1668, "step": 4805 }, { "epoch": 0.31, "grad_norm": 0.27625082109998883, "learning_rate": 8.12609238301667e-07, "loss": 0.1278, "step": 4806 }, { "epoch": 0.31, "grad_norm": 0.5367519695412856, "learning_rate": 8.125286315258139e-07, "loss": 0.1201, "step": 4807 }, { "epoch": 0.31, "grad_norm": 0.3586161860975536, "learning_rate": 8.124480114168611e-07, "loss": 0.1583, "step": 4808 }, { "epoch": 0.31, "grad_norm": 0.5083697358079993, "learning_rate": 8.12367377978248e-07, "loss": 0.1727, "step": 4809 }, { "epoch": 0.31, "grad_norm": 0.37507640697416395, "learning_rate": 8.122867312134147e-07, "loss": 0.2108, "step": 4810 }, { "epoch": 0.31, "grad_norm": 0.9980083401077641, "learning_rate": 8.122060711258017e-07, "loss": 0.2243, "step": 4811 }, { "epoch": 0.31, "grad_norm": 0.8726441713134545, "learning_rate": 8.121253977188499e-07, "loss": 0.266, "step": 4812 }, { "epoch": 0.31, "grad_norm": 0.8992933782380833, "learning_rate": 8.120447109960014e-07, "loss": 0.3619, "step": 4813 }, { "epoch": 0.31, "grad_norm": 0.26986011613380834, "learning_rate": 8.11964010960698e-07, "loss": 0.0722, "step": 4814 }, { "epoch": 0.31, "grad_norm": 0.8749899382429204, "learning_rate": 8.11883297616383e-07, "loss": 0.2008, "step": 4815 }, { "epoch": 0.31, "grad_norm": 0.41960032165463595, "learning_rate": 8.118025709664994e-07, "loss": 0.1805, "step": 4816 }, { "epoch": 0.31, "grad_norm": 0.5619040980184855, "learning_rate": 8.117218310144913e-07, "loss": 0.3494, "step": 4817 }, { "epoch": 0.31, "grad_norm": 0.3700513371571906, "learning_rate": 8.116410777638034e-07, "loss": 0.06, "step": 4818 }, { "epoch": 0.31, "grad_norm": 0.669604798969257, "learning_rate": 8.115603112178806e-07, "loss": 0.2491, "step": 4819 }, { "epoch": 0.31, "grad_norm": 1.265929445107414, "learning_rate": 8.114795313801686e-07, "loss": 0.1283, "step": 4820 }, { "epoch": 0.31, "grad_norm": 2.125882682580364, "learning_rate": 8.113987382541136e-07, "loss": 0.0843, "step": 4821 }, { "epoch": 0.31, "grad_norm": 0.5303452032060689, "learning_rate": 8.113179318431624e-07, "loss": 0.3097, "step": 4822 }, { "epoch": 0.31, "grad_norm": 0.6740244575310982, "learning_rate": 8.112371121507625e-07, "loss": 0.1511, "step": 4823 }, { "epoch": 0.31, "grad_norm": 1.8220889534531641, "learning_rate": 8.111562791803617e-07, "loss": 0.132, "step": 4824 }, { "epoch": 0.31, "grad_norm": 5.358675217109917, "learning_rate": 8.110754329354086e-07, "loss": 0.3406, "step": 4825 }, { "epoch": 0.31, "grad_norm": 4.891636297348018, "learning_rate": 8.10994573419352e-07, "loss": 0.2714, "step": 4826 }, { "epoch": 0.31, "grad_norm": 0.2897380692260539, "learning_rate": 8.109137006356419e-07, "loss": 0.1168, "step": 4827 }, { "epoch": 0.31, "grad_norm": 1.487431741184058, "learning_rate": 8.108328145877282e-07, "loss": 0.3414, "step": 4828 }, { "epoch": 0.31, "grad_norm": 0.661817392387723, "learning_rate": 8.107519152790619e-07, "loss": 0.3947, "step": 4829 }, { "epoch": 0.31, "grad_norm": 1.0208401780032037, "learning_rate": 8.10671002713094e-07, "loss": 0.2575, "step": 4830 }, { "epoch": 0.31, "grad_norm": 1.2116693190295287, "learning_rate": 8.105900768932767e-07, "loss": 0.3512, "step": 4831 }, { "epoch": 0.31, "grad_norm": 1.4987501829098024, "learning_rate": 8.105091378230624e-07, "loss": 0.0436, "step": 4832 }, { "epoch": 0.31, "grad_norm": 0.8342103669152526, "learning_rate": 8.10428185505904e-07, "loss": 0.5072, "step": 4833 }, { "epoch": 0.31, "grad_norm": 0.8757327035704235, "learning_rate": 8.103472199452553e-07, "loss": 0.4112, "step": 4834 }, { "epoch": 0.31, "grad_norm": 0.5415438431193039, "learning_rate": 8.102662411445702e-07, "loss": 0.2089, "step": 4835 }, { "epoch": 0.31, "grad_norm": 0.4508114582543965, "learning_rate": 8.101852491073036e-07, "loss": 0.262, "step": 4836 }, { "epoch": 0.31, "grad_norm": 1.5815245402928737, "learning_rate": 8.101042438369108e-07, "loss": 0.1776, "step": 4837 }, { "epoch": 0.31, "grad_norm": 0.8965872238094595, "learning_rate": 8.100232253368474e-07, "loss": 0.33, "step": 4838 }, { "epoch": 0.31, "grad_norm": 0.35410971342035186, "learning_rate": 8.099421936105702e-07, "loss": 0.0168, "step": 4839 }, { "epoch": 0.31, "grad_norm": 2.2150666514128075, "learning_rate": 8.098611486615357e-07, "loss": 0.0392, "step": 4840 }, { "epoch": 0.31, "grad_norm": 1.033711989512499, "learning_rate": 8.097800904932018e-07, "loss": 0.2871, "step": 4841 }, { "epoch": 0.31, "grad_norm": 1.0445909649421055, "learning_rate": 8.096990191090265e-07, "loss": 0.211, "step": 4842 }, { "epoch": 0.31, "grad_norm": 3.588543664066317, "learning_rate": 8.096179345124685e-07, "loss": 0.1082, "step": 4843 }, { "epoch": 0.31, "grad_norm": 0.914117310489967, "learning_rate": 8.09536836706987e-07, "loss": 0.0876, "step": 4844 }, { "epoch": 0.31, "grad_norm": 0.36171155517429354, "learning_rate": 8.094557256960419e-07, "loss": 0.1091, "step": 4845 }, { "epoch": 0.31, "grad_norm": 1.9676226298196176, "learning_rate": 8.093746014830933e-07, "loss": 0.0858, "step": 4846 }, { "epoch": 0.31, "grad_norm": 0.6714200761593854, "learning_rate": 8.092934640716023e-07, "loss": 0.2552, "step": 4847 }, { "epoch": 0.31, "grad_norm": 0.7545950416077731, "learning_rate": 8.092123134650304e-07, "loss": 0.3534, "step": 4848 }, { "epoch": 0.31, "grad_norm": 0.4545882543203872, "learning_rate": 8.091311496668396e-07, "loss": 0.0389, "step": 4849 }, { "epoch": 0.31, "grad_norm": 1.4086507364432175, "learning_rate": 8.090499726804924e-07, "loss": 0.3278, "step": 4850 }, { "epoch": 0.31, "grad_norm": 0.6553200148490134, "learning_rate": 8.089687825094524e-07, "loss": 0.1967, "step": 4851 }, { "epoch": 0.31, "grad_norm": 0.9493328856123927, "learning_rate": 8.088875791571829e-07, "loss": 0.0726, "step": 4852 }, { "epoch": 0.31, "grad_norm": 0.7316791064460415, "learning_rate": 8.088063626271482e-07, "loss": 0.3481, "step": 4853 }, { "epoch": 0.31, "grad_norm": 0.8532338216162878, "learning_rate": 8.087251329228135e-07, "loss": 0.4165, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.182696331300221, "learning_rate": 8.086438900476437e-07, "loss": 0.0125, "step": 4855 }, { "epoch": 0.31, "grad_norm": 1.2246967929530859, "learning_rate": 8.085626340051054e-07, "loss": 0.0899, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.5421316152513138, "learning_rate": 8.084813647986648e-07, "loss": 0.1918, "step": 4857 }, { "epoch": 0.31, "grad_norm": 0.5652342853883778, "learning_rate": 8.08400082431789e-07, "loss": 0.1306, "step": 4858 }, { "epoch": 0.31, "grad_norm": 0.585916229816176, "learning_rate": 8.083187869079458e-07, "loss": 0.1555, "step": 4859 }, { "epoch": 0.31, "grad_norm": 0.4553496465574186, "learning_rate": 8.082374782306032e-07, "loss": 0.1156, "step": 4860 }, { "epoch": 0.31, "grad_norm": 0.2827301344247135, "learning_rate": 8.081561564032302e-07, "loss": 0.2535, "step": 4861 }, { "epoch": 0.31, "grad_norm": 0.4708930856967913, "learning_rate": 8.080748214292961e-07, "loss": 0.1788, "step": 4862 }, { "epoch": 0.31, "grad_norm": 0.895992974866067, "learning_rate": 8.079934733122707e-07, "loss": 0.2384, "step": 4863 }, { "epoch": 0.31, "grad_norm": 0.9405193118592018, "learning_rate": 8.079121120556247e-07, "loss": 0.0869, "step": 4864 }, { "epoch": 0.31, "grad_norm": 0.7232272466958303, "learning_rate": 8.07830737662829e-07, "loss": 0.1209, "step": 4865 }, { "epoch": 0.31, "grad_norm": 2.066643556053722, "learning_rate": 8.077493501373554e-07, "loss": 0.1455, "step": 4866 }, { "epoch": 0.31, "grad_norm": 0.47089877034493316, "learning_rate": 8.076679494826757e-07, "loss": 0.141, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.121471486827043, "learning_rate": 8.075865357022628e-07, "loss": 0.046, "step": 4868 }, { "epoch": 0.31, "grad_norm": 2.158549015956193, "learning_rate": 8.075051087995899e-07, "loss": 0.1862, "step": 4869 }, { "epoch": 0.31, "grad_norm": 0.9730069506178508, "learning_rate": 8.074236687781309e-07, "loss": 0.4972, "step": 4870 }, { "epoch": 0.31, "grad_norm": 1.0793488888012408, "learning_rate": 8.073422156413603e-07, "loss": 0.373, "step": 4871 }, { "epoch": 0.31, "grad_norm": 1.1993090734989529, "learning_rate": 8.072607493927528e-07, "loss": 0.0676, "step": 4872 }, { "epoch": 0.31, "grad_norm": 0.5660302550557087, "learning_rate": 8.071792700357842e-07, "loss": 0.2421, "step": 4873 }, { "epoch": 0.31, "grad_norm": 0.7597048493635756, "learning_rate": 8.070977775739304e-07, "loss": 0.2999, "step": 4874 }, { "epoch": 0.31, "grad_norm": 0.38808110568411536, "learning_rate": 8.070162720106679e-07, "loss": 0.2452, "step": 4875 }, { "epoch": 0.31, "grad_norm": 0.8209025724927607, "learning_rate": 8.069347533494744e-07, "loss": 0.1285, "step": 4876 }, { "epoch": 0.31, "grad_norm": 0.8209839213021729, "learning_rate": 8.068532215938269e-07, "loss": 0.2401, "step": 4877 }, { "epoch": 0.31, "grad_norm": 0.9038983135784068, "learning_rate": 8.067716767472044e-07, "loss": 0.1235, "step": 4878 }, { "epoch": 0.31, "grad_norm": 0.7128631175040728, "learning_rate": 8.066901188130854e-07, "loss": 0.3511, "step": 4879 }, { "epoch": 0.31, "grad_norm": 0.7170598170129175, "learning_rate": 8.066085477949494e-07, "loss": 0.0227, "step": 4880 }, { "epoch": 0.31, "grad_norm": 0.62173391419443, "learning_rate": 8.065269636962763e-07, "loss": 0.4253, "step": 4881 }, { "epoch": 0.31, "grad_norm": 6.947325453129527, "learning_rate": 8.06445366520547e-07, "loss": 0.1273, "step": 4882 }, { "epoch": 0.31, "grad_norm": 1.0286287147679303, "learning_rate": 8.063637562712421e-07, "loss": 0.1125, "step": 4883 }, { "epoch": 0.31, "grad_norm": 1.187943991890367, "learning_rate": 8.062821329518435e-07, "loss": 0.1838, "step": 4884 }, { "epoch": 0.31, "grad_norm": 0.2909373566917518, "learning_rate": 8.062004965658336e-07, "loss": 0.0796, "step": 4885 }, { "epoch": 0.31, "grad_norm": 0.11145047618391998, "learning_rate": 8.061188471166947e-07, "loss": 0.0973, "step": 4886 }, { "epoch": 0.31, "grad_norm": 0.5975929567822871, "learning_rate": 8.060371846079106e-07, "loss": 0.2333, "step": 4887 }, { "epoch": 0.31, "grad_norm": 0.8888907966553493, "learning_rate": 8.059555090429649e-07, "loss": 0.3266, "step": 4888 }, { "epoch": 0.31, "grad_norm": 0.6178344911289139, "learning_rate": 8.058738204253421e-07, "loss": 0.4046, "step": 4889 }, { "epoch": 0.31, "grad_norm": 0.3801275160196431, "learning_rate": 8.057921187585273e-07, "loss": 0.2519, "step": 4890 }, { "epoch": 0.31, "grad_norm": 1.8939016166809852, "learning_rate": 8.057104040460061e-07, "loss": 0.2632, "step": 4891 }, { "epoch": 0.31, "grad_norm": 0.6356544303041343, "learning_rate": 8.056286762912643e-07, "loss": 0.1726, "step": 4892 }, { "epoch": 0.31, "grad_norm": 0.3755835281239818, "learning_rate": 8.055469354977889e-07, "loss": 0.1538, "step": 4893 }, { "epoch": 0.31, "grad_norm": 0.45209701749726516, "learning_rate": 8.054651816690669e-07, "loss": 0.0788, "step": 4894 }, { "epoch": 0.31, "grad_norm": 0.584728209799264, "learning_rate": 8.053834148085864e-07, "loss": 0.1629, "step": 4895 }, { "epoch": 0.31, "grad_norm": 0.6760059551912412, "learning_rate": 8.053016349198354e-07, "loss": 0.1113, "step": 4896 }, { "epoch": 0.31, "grad_norm": 0.36028504095486713, "learning_rate": 8.052198420063029e-07, "loss": 0.0753, "step": 4897 }, { "epoch": 0.31, "grad_norm": 0.3252513191684513, "learning_rate": 8.051380360714783e-07, "loss": 0.1073, "step": 4898 }, { "epoch": 0.31, "grad_norm": 0.9650826262026042, "learning_rate": 8.050562171188519e-07, "loss": 0.3438, "step": 4899 }, { "epoch": 0.31, "grad_norm": 0.6943397455835096, "learning_rate": 8.049743851519139e-07, "loss": 0.0816, "step": 4900 }, { "epoch": 0.31, "grad_norm": 0.6936148521736064, "learning_rate": 8.048925401741555e-07, "loss": 0.2057, "step": 4901 }, { "epoch": 0.31, "grad_norm": 1.5132755435461887, "learning_rate": 8.048106821890686e-07, "loss": 0.1608, "step": 4902 }, { "epoch": 0.31, "grad_norm": 0.6639518584758515, "learning_rate": 8.04728811200145e-07, "loss": 0.37, "step": 4903 }, { "epoch": 0.31, "grad_norm": 0.7177421838414897, "learning_rate": 8.046469272108779e-07, "loss": 0.2359, "step": 4904 }, { "epoch": 0.31, "grad_norm": 1.5432046176468095, "learning_rate": 8.045650302247604e-07, "loss": 0.2579, "step": 4905 }, { "epoch": 0.31, "grad_norm": 1.2688296023595715, "learning_rate": 8.044831202452864e-07, "loss": 0.111, "step": 4906 }, { "epoch": 0.31, "grad_norm": 0.1354848906039784, "learning_rate": 8.044011972759507e-07, "loss": 0.0047, "step": 4907 }, { "epoch": 0.31, "grad_norm": 0.3150780133923562, "learning_rate": 8.043192613202479e-07, "loss": 0.0636, "step": 4908 }, { "epoch": 0.31, "grad_norm": 0.43616701414274484, "learning_rate": 8.042373123816735e-07, "loss": 0.1428, "step": 4909 }, { "epoch": 0.31, "grad_norm": 2.4719856867245222, "learning_rate": 8.041553504637237e-07, "loss": 0.1362, "step": 4910 }, { "epoch": 0.31, "grad_norm": 2.52111897260746, "learning_rate": 8.040733755698955e-07, "loss": 0.1843, "step": 4911 }, { "epoch": 0.31, "grad_norm": 0.8024011424709545, "learning_rate": 8.039913877036855e-07, "loss": 0.3262, "step": 4912 }, { "epoch": 0.31, "grad_norm": 0.9411942662342757, "learning_rate": 8.03909386868592e-07, "loss": 0.1619, "step": 4913 }, { "epoch": 0.31, "grad_norm": 4.379404535635887, "learning_rate": 8.038273730681131e-07, "loss": 0.1127, "step": 4914 }, { "epoch": 0.31, "grad_norm": 0.3482281884152491, "learning_rate": 8.037453463057476e-07, "loss": 0.2595, "step": 4915 }, { "epoch": 0.31, "grad_norm": 1.5181406813260363, "learning_rate": 8.036633065849952e-07, "loss": 0.1973, "step": 4916 }, { "epoch": 0.31, "grad_norm": 2.9929727899045164, "learning_rate": 8.035812539093556e-07, "loss": 0.2721, "step": 4917 }, { "epoch": 0.31, "grad_norm": 0.41468132224453363, "learning_rate": 8.034991882823295e-07, "loss": 0.1691, "step": 4918 }, { "epoch": 0.31, "grad_norm": 0.7564116362954902, "learning_rate": 8.034171097074178e-07, "loss": 0.2941, "step": 4919 }, { "epoch": 0.31, "grad_norm": 1.0419087637830673, "learning_rate": 8.033350181881223e-07, "loss": 0.3469, "step": 4920 }, { "epoch": 0.31, "grad_norm": 0.9600973468805437, "learning_rate": 8.032529137279452e-07, "loss": 0.3099, "step": 4921 }, { "epoch": 0.31, "grad_norm": 0.8859863115949071, "learning_rate": 8.03170796330389e-07, "loss": 0.2808, "step": 4922 }, { "epoch": 0.31, "grad_norm": 0.3729753295058565, "learning_rate": 8.030886659989575e-07, "loss": 0.1751, "step": 4923 }, { "epoch": 0.31, "grad_norm": 1.7550129699030261, "learning_rate": 8.03006522737154e-07, "loss": 0.294, "step": 4924 }, { "epoch": 0.31, "grad_norm": 1.1835190904054413, "learning_rate": 8.029243665484832e-07, "loss": 0.2282, "step": 4925 }, { "epoch": 0.31, "grad_norm": 0.4322986351748098, "learning_rate": 8.028421974364499e-07, "loss": 0.3293, "step": 4926 }, { "epoch": 0.31, "grad_norm": 0.6129394757652573, "learning_rate": 8.027600154045597e-07, "loss": 0.2775, "step": 4927 }, { "epoch": 0.31, "grad_norm": 0.6999076153066456, "learning_rate": 8.026778204563186e-07, "loss": 0.3713, "step": 4928 }, { "epoch": 0.31, "grad_norm": 0.528830069210588, "learning_rate": 8.025956125952333e-07, "loss": 0.1196, "step": 4929 }, { "epoch": 0.31, "grad_norm": 1.061599656854315, "learning_rate": 8.025133918248108e-07, "loss": 0.2902, "step": 4930 }, { "epoch": 0.31, "grad_norm": 0.5694961102214035, "learning_rate": 8.024311581485588e-07, "loss": 0.0822, "step": 4931 }, { "epoch": 0.31, "grad_norm": 0.5561115290492764, "learning_rate": 8.023489115699857e-07, "loss": 0.1893, "step": 4932 }, { "epoch": 0.31, "grad_norm": 0.6327645403669329, "learning_rate": 8.022666520926003e-07, "loss": 0.3066, "step": 4933 }, { "epoch": 0.31, "grad_norm": 0.589508861362452, "learning_rate": 8.021843797199119e-07, "loss": 0.0339, "step": 4934 }, { "epoch": 0.31, "grad_norm": 0.8778632804172244, "learning_rate": 8.021020944554304e-07, "loss": 0.0416, "step": 4935 }, { "epoch": 0.31, "grad_norm": 0.7568867874491414, "learning_rate": 8.020197963026662e-07, "loss": 0.333, "step": 4936 }, { "epoch": 0.31, "grad_norm": 0.5578812210514541, "learning_rate": 8.019374852651302e-07, "loss": 0.1473, "step": 4937 }, { "epoch": 0.31, "grad_norm": 0.30723336690661857, "learning_rate": 8.018551613463344e-07, "loss": 0.2139, "step": 4938 }, { "epoch": 0.31, "grad_norm": 0.2492561977176489, "learning_rate": 8.017728245497903e-07, "loss": 0.1933, "step": 4939 }, { "epoch": 0.32, "grad_norm": 0.6182145102174467, "learning_rate": 8.016904748790112e-07, "loss": 0.1276, "step": 4940 }, { "epoch": 0.32, "grad_norm": 0.31288799637361114, "learning_rate": 8.016081123375097e-07, "loss": 0.0768, "step": 4941 }, { "epoch": 0.32, "grad_norm": 0.578768112587766, "learning_rate": 8.015257369287999e-07, "loss": 0.2145, "step": 4942 }, { "epoch": 0.32, "grad_norm": 0.6243010586183911, "learning_rate": 8.014433486563961e-07, "loss": 0.2143, "step": 4943 }, { "epoch": 0.32, "grad_norm": 0.5772522433540507, "learning_rate": 8.01360947523813e-07, "loss": 0.1336, "step": 4944 }, { "epoch": 0.32, "grad_norm": 3.154009492996312, "learning_rate": 8.01278533534566e-07, "loss": 0.1029, "step": 4945 }, { "epoch": 0.32, "grad_norm": 0.5637675702015749, "learning_rate": 8.011961066921712e-07, "loss": 0.3088, "step": 4946 }, { "epoch": 0.32, "grad_norm": 0.8447865031556825, "learning_rate": 8.01113667000145e-07, "loss": 0.2675, "step": 4947 }, { "epoch": 0.32, "grad_norm": 0.8427574294223372, "learning_rate": 8.010312144620045e-07, "loss": 0.0268, "step": 4948 }, { "epoch": 0.32, "grad_norm": 0.6887712855980462, "learning_rate": 8.009487490812671e-07, "loss": 0.1334, "step": 4949 }, { "epoch": 0.32, "grad_norm": 1.150689543029537, "learning_rate": 8.008662708614513e-07, "loss": 0.11, "step": 4950 }, { "epoch": 0.32, "grad_norm": 0.19335587316152167, "learning_rate": 8.007837798060754e-07, "loss": 0.07, "step": 4951 }, { "epoch": 0.32, "grad_norm": 5.580050594643492, "learning_rate": 8.007012759186589e-07, "loss": 0.2524, "step": 4952 }, { "epoch": 0.32, "grad_norm": 0.3589227727435151, "learning_rate": 8.006187592027213e-07, "loss": 0.1275, "step": 4953 }, { "epoch": 0.32, "grad_norm": 1.0979953933170263, "learning_rate": 8.005362296617833e-07, "loss": 0.4041, "step": 4954 }, { "epoch": 0.32, "grad_norm": 0.8801438577089142, "learning_rate": 8.004536872993655e-07, "loss": 0.0907, "step": 4955 }, { "epoch": 0.32, "grad_norm": 0.6296143796044339, "learning_rate": 8.003711321189895e-07, "loss": 0.0866, "step": 4956 }, { "epoch": 0.32, "grad_norm": 0.7793038701861162, "learning_rate": 8.00288564124177e-07, "loss": 0.2095, "step": 4957 }, { "epoch": 0.32, "grad_norm": 0.37715338537872434, "learning_rate": 8.002059833184509e-07, "loss": 0.0245, "step": 4958 }, { "epoch": 0.32, "grad_norm": 0.7644760400658014, "learning_rate": 8.001233897053339e-07, "loss": 0.3113, "step": 4959 }, { "epoch": 0.32, "grad_norm": 0.6204781145389356, "learning_rate": 8.000407832883498e-07, "loss": 0.2237, "step": 4960 }, { "epoch": 0.32, "grad_norm": 0.6320265579175591, "learning_rate": 7.999581640710229e-07, "loss": 0.1138, "step": 4961 }, { "epoch": 0.32, "grad_norm": 0.8650039911746136, "learning_rate": 7.998755320568777e-07, "loss": 0.0433, "step": 4962 }, { "epoch": 0.32, "grad_norm": 0.4204365329912512, "learning_rate": 7.997928872494393e-07, "loss": 0.0049, "step": 4963 }, { "epoch": 0.32, "grad_norm": 0.5922103444989494, "learning_rate": 7.997102296522338e-07, "loss": 0.3073, "step": 4964 }, { "epoch": 0.32, "grad_norm": 0.4635430753988371, "learning_rate": 7.996275592687873e-07, "loss": 0.1315, "step": 4965 }, { "epoch": 0.32, "grad_norm": 1.4221298468018966, "learning_rate": 7.995448761026269e-07, "loss": 0.0955, "step": 4966 }, { "epoch": 0.32, "grad_norm": 0.7395847734958292, "learning_rate": 7.994621801572799e-07, "loss": 0.0599, "step": 4967 }, { "epoch": 0.32, "grad_norm": 2.738676426321658, "learning_rate": 7.993794714362743e-07, "loss": 0.2013, "step": 4968 }, { "epoch": 0.32, "grad_norm": 1.726919285608865, "learning_rate": 7.992967499431386e-07, "loss": 0.3061, "step": 4969 }, { "epoch": 0.32, "grad_norm": 1.6451475246976672, "learning_rate": 7.992140156814018e-07, "loss": 0.1609, "step": 4970 }, { "epoch": 0.32, "grad_norm": 0.7994856861678853, "learning_rate": 7.991312686545937e-07, "loss": 0.2259, "step": 4971 }, { "epoch": 0.32, "grad_norm": 0.5418123855404976, "learning_rate": 7.990485088662444e-07, "loss": 0.0922, "step": 4972 }, { "epoch": 0.32, "grad_norm": 1.2629885242231957, "learning_rate": 7.989657363198844e-07, "loss": 0.3647, "step": 4973 }, { "epoch": 0.32, "grad_norm": 0.7995087081555955, "learning_rate": 7.988829510190451e-07, "loss": 0.3445, "step": 4974 }, { "epoch": 0.32, "grad_norm": 0.6500852431645989, "learning_rate": 7.988001529672586e-07, "loss": 0.0953, "step": 4975 }, { "epoch": 0.32, "grad_norm": 0.4354991781289608, "learning_rate": 7.987173421680566e-07, "loss": 0.3204, "step": 4976 }, { "epoch": 0.32, "grad_norm": 0.45323271759854744, "learning_rate": 7.986345186249724e-07, "loss": 0.1694, "step": 4977 }, { "epoch": 0.32, "grad_norm": 0.6257471185124001, "learning_rate": 7.985516823415393e-07, "loss": 0.2966, "step": 4978 }, { "epoch": 0.32, "grad_norm": 1.9748700051738195, "learning_rate": 7.984688333212911e-07, "loss": 0.3093, "step": 4979 }, { "epoch": 0.32, "grad_norm": 1.766471716127174, "learning_rate": 7.983859715677626e-07, "loss": 0.0133, "step": 4980 }, { "epoch": 0.32, "grad_norm": 1.780145636884804, "learning_rate": 7.983030970844886e-07, "loss": 0.1167, "step": 4981 }, { "epoch": 0.32, "grad_norm": 1.514590505070607, "learning_rate": 7.98220209875005e-07, "loss": 0.2056, "step": 4982 }, { "epoch": 0.32, "grad_norm": 0.7500044643537416, "learning_rate": 7.981373099428477e-07, "loss": 0.1141, "step": 4983 }, { "epoch": 0.32, "grad_norm": 1.9443011383275393, "learning_rate": 7.980543972915534e-07, "loss": 0.0075, "step": 4984 }, { "epoch": 0.32, "grad_norm": 0.3823654877624576, "learning_rate": 7.979714719246594e-07, "loss": 0.1604, "step": 4985 }, { "epoch": 0.32, "grad_norm": 0.5027893630783101, "learning_rate": 7.978885338457033e-07, "loss": 0.1724, "step": 4986 }, { "epoch": 0.32, "grad_norm": 0.490008379196501, "learning_rate": 7.978055830582235e-07, "loss": 0.1139, "step": 4987 }, { "epoch": 0.32, "grad_norm": 3.825093526811515, "learning_rate": 7.97722619565759e-07, "loss": 0.1198, "step": 4988 }, { "epoch": 0.32, "grad_norm": 1.312071666363919, "learning_rate": 7.976396433718491e-07, "loss": 0.1773, "step": 4989 }, { "epoch": 0.32, "grad_norm": 0.620296038300229, "learning_rate": 7.975566544800336e-07, "loss": 0.3288, "step": 4990 }, { "epoch": 0.32, "grad_norm": 0.5364438222770149, "learning_rate": 7.97473652893853e-07, "loss": 0.229, "step": 4991 }, { "epoch": 0.32, "grad_norm": 0.6438409456228207, "learning_rate": 7.973906386168484e-07, "loss": 0.3674, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.1231179404140257, "learning_rate": 7.973076116525613e-07, "loss": 0.2368, "step": 4993 }, { "epoch": 0.32, "grad_norm": 1.486323983764233, "learning_rate": 7.972245720045339e-07, "loss": 0.0239, "step": 4994 }, { "epoch": 0.32, "grad_norm": 0.5149627780660425, "learning_rate": 7.971415196763087e-07, "loss": 0.1882, "step": 4995 }, { "epoch": 0.32, "grad_norm": 0.48207292341649965, "learning_rate": 7.97058454671429e-07, "loss": 0.2135, "step": 4996 }, { "epoch": 0.32, "grad_norm": 0.451393549295626, "learning_rate": 7.969753769934385e-07, "loss": 0.1189, "step": 4997 }, { "epoch": 0.32, "grad_norm": 1.8268227433301185, "learning_rate": 7.968922866458812e-07, "loss": 0.0217, "step": 4998 }, { "epoch": 0.32, "grad_norm": 1.8272394906628622, "learning_rate": 7.968091836323024e-07, "loss": 0.1134, "step": 4999 }, { "epoch": 0.32, "grad_norm": 0.47506817233651355, "learning_rate": 7.967260679562469e-07, "loss": 0.2445, "step": 5000 }, { "epoch": 0.32, "grad_norm": 0.5423948336077351, "learning_rate": 7.966429396212609e-07, "loss": 0.3635, "step": 5001 }, { "epoch": 0.32, "grad_norm": 2.06826896756584, "learning_rate": 7.96559798630891e-07, "loss": 0.2838, "step": 5002 }, { "epoch": 0.32, "grad_norm": 0.416550947442645, "learning_rate": 7.964766449886837e-07, "loss": 0.1613, "step": 5003 }, { "epoch": 0.32, "grad_norm": 0.4322128171498775, "learning_rate": 7.963934786981869e-07, "loss": 0.0897, "step": 5004 }, { "epoch": 0.32, "grad_norm": 0.3111004790221444, "learning_rate": 7.963102997629483e-07, "loss": 0.0062, "step": 5005 }, { "epoch": 0.32, "grad_norm": 0.4275767116908408, "learning_rate": 7.962271081865168e-07, "loss": 0.088, "step": 5006 }, { "epoch": 0.32, "grad_norm": 0.6975253562041295, "learning_rate": 7.961439039724413e-07, "loss": 0.1739, "step": 5007 }, { "epoch": 0.32, "grad_norm": 0.4395987336849303, "learning_rate": 7.960606871242715e-07, "loss": 0.0743, "step": 5008 }, { "epoch": 0.32, "grad_norm": 1.1549968254191867, "learning_rate": 7.959774576455575e-07, "loss": 0.3855, "step": 5009 }, { "epoch": 0.32, "grad_norm": 0.9447495405402501, "learning_rate": 7.958942155398505e-07, "loss": 0.2621, "step": 5010 }, { "epoch": 0.32, "grad_norm": 0.9002141884337671, "learning_rate": 7.958109608107013e-07, "loss": 0.2056, "step": 5011 }, { "epoch": 0.32, "grad_norm": 0.47126443233971976, "learning_rate": 7.957276934616617e-07, "loss": 0.1012, "step": 5012 }, { "epoch": 0.32, "grad_norm": 0.8064655018044103, "learning_rate": 7.956444134962845e-07, "loss": 0.209, "step": 5013 }, { "epoch": 0.32, "grad_norm": 1.5132628000304005, "learning_rate": 7.955611209181221e-07, "loss": 0.3676, "step": 5014 }, { "epoch": 0.32, "grad_norm": 0.7680638476418366, "learning_rate": 7.954778157307282e-07, "loss": 0.2247, "step": 5015 }, { "epoch": 0.32, "grad_norm": 0.3801852438888622, "learning_rate": 7.953944979376566e-07, "loss": 0.1818, "step": 5016 }, { "epoch": 0.32, "grad_norm": 1.0267432775653922, "learning_rate": 7.953111675424621e-07, "loss": 0.1018, "step": 5017 }, { "epoch": 0.32, "grad_norm": 0.3157116719404107, "learning_rate": 7.952278245486994e-07, "loss": 0.0912, "step": 5018 }, { "epoch": 0.32, "grad_norm": 0.677428114293082, "learning_rate": 7.951444689599243e-07, "loss": 0.062, "step": 5019 }, { "epoch": 0.32, "grad_norm": 0.538623523501602, "learning_rate": 7.950611007796926e-07, "loss": 0.1322, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.734947706556767, "learning_rate": 7.949777200115614e-07, "loss": 0.3189, "step": 5021 }, { "epoch": 0.32, "grad_norm": 0.5005699296473773, "learning_rate": 7.948943266590877e-07, "loss": 0.1052, "step": 5022 }, { "epoch": 0.32, "grad_norm": 0.956372170598805, "learning_rate": 7.948109207258291e-07, "loss": 0.038, "step": 5023 }, { "epoch": 0.32, "grad_norm": 0.29887402833314614, "learning_rate": 7.947275022153442e-07, "loss": 0.1527, "step": 5024 }, { "epoch": 0.32, "grad_norm": 0.5773722082686125, "learning_rate": 7.946440711311913e-07, "loss": 0.0651, "step": 5025 }, { "epoch": 0.32, "grad_norm": 0.7473159928478327, "learning_rate": 7.9456062747693e-07, "loss": 0.3616, "step": 5026 }, { "epoch": 0.32, "grad_norm": 1.1540229629180792, "learning_rate": 7.944771712561205e-07, "loss": 0.1379, "step": 5027 }, { "epoch": 0.32, "grad_norm": 2.402763511775212, "learning_rate": 7.943937024723227e-07, "loss": 0.1207, "step": 5028 }, { "epoch": 0.32, "grad_norm": 3.1898707403402686, "learning_rate": 7.943102211290978e-07, "loss": 0.157, "step": 5029 }, { "epoch": 0.32, "grad_norm": 3.4718896143639624, "learning_rate": 7.942267272300073e-07, "loss": 0.3093, "step": 5030 }, { "epoch": 0.32, "grad_norm": 0.6827378006526623, "learning_rate": 7.941432207786129e-07, "loss": 0.3476, "step": 5031 }, { "epoch": 0.32, "grad_norm": 0.838382983533253, "learning_rate": 7.940597017784777e-07, "loss": 0.5095, "step": 5032 }, { "epoch": 0.32, "grad_norm": 10.25771237914452, "learning_rate": 7.939761702331643e-07, "loss": 0.0783, "step": 5033 }, { "epoch": 0.32, "grad_norm": 0.6697000274673348, "learning_rate": 7.938926261462365e-07, "loss": 0.1997, "step": 5034 }, { "epoch": 0.32, "grad_norm": 0.45422559182740535, "learning_rate": 7.938090695212586e-07, "loss": 0.1168, "step": 5035 }, { "epoch": 0.32, "grad_norm": 0.749265570823233, "learning_rate": 7.937255003617951e-07, "loss": 0.177, "step": 5036 }, { "epoch": 0.32, "grad_norm": 3.2582617138738326, "learning_rate": 7.936419186714112e-07, "loss": 0.2065, "step": 5037 }, { "epoch": 0.32, "grad_norm": 2.1733341664774195, "learning_rate": 7.935583244536729e-07, "loss": 0.1761, "step": 5038 }, { "epoch": 0.32, "grad_norm": 1.290772020618876, "learning_rate": 7.934747177121462e-07, "loss": 0.1028, "step": 5039 }, { "epoch": 0.32, "grad_norm": 0.6410520876912739, "learning_rate": 7.933910984503982e-07, "loss": 0.2144, "step": 5040 }, { "epoch": 0.32, "grad_norm": 1.6934250674178204, "learning_rate": 7.933074666719961e-07, "loss": 0.0993, "step": 5041 }, { "epoch": 0.32, "grad_norm": 1.047608512089884, "learning_rate": 7.932238223805078e-07, "loss": 0.2721, "step": 5042 }, { "epoch": 0.32, "grad_norm": 0.18622970671123631, "learning_rate": 7.93140165579502e-07, "loss": 0.0026, "step": 5043 }, { "epoch": 0.32, "grad_norm": 0.4924091168199843, "learning_rate": 7.930564962725474e-07, "loss": 0.0448, "step": 5044 }, { "epoch": 0.32, "grad_norm": 1.0139061701689729, "learning_rate": 7.929728144632134e-07, "loss": 0.2306, "step": 5045 }, { "epoch": 0.32, "grad_norm": 0.42514525358423894, "learning_rate": 7.928891201550702e-07, "loss": 0.1029, "step": 5046 }, { "epoch": 0.32, "grad_norm": 0.6606443876500947, "learning_rate": 7.928054133516884e-07, "loss": 0.0526, "step": 5047 }, { "epoch": 0.32, "grad_norm": 0.5533387802104182, "learning_rate": 7.92721694056639e-07, "loss": 0.2507, "step": 5048 }, { "epoch": 0.32, "grad_norm": 4.392260737879013, "learning_rate": 7.926379622734938e-07, "loss": 0.0962, "step": 5049 }, { "epoch": 0.32, "grad_norm": 0.6639063362367161, "learning_rate": 7.925542180058248e-07, "loss": 0.059, "step": 5050 }, { "epoch": 0.32, "grad_norm": 0.5527057815532743, "learning_rate": 7.924704612572048e-07, "loss": 0.1333, "step": 5051 }, { "epoch": 0.32, "grad_norm": 0.8813921275691163, "learning_rate": 7.923866920312067e-07, "loss": 0.245, "step": 5052 }, { "epoch": 0.32, "grad_norm": 0.6499958517425046, "learning_rate": 7.923029103314049e-07, "loss": 0.2011, "step": 5053 }, { "epoch": 0.32, "grad_norm": 0.6815565286916628, "learning_rate": 7.922191161613731e-07, "loss": 0.0448, "step": 5054 }, { "epoch": 0.32, "grad_norm": 1.9708873600600691, "learning_rate": 7.921353095246866e-07, "loss": 0.2493, "step": 5055 }, { "epoch": 0.32, "grad_norm": 0.3973793893255023, "learning_rate": 7.920514904249204e-07, "loss": 0.095, "step": 5056 }, { "epoch": 0.32, "grad_norm": 0.4297452153589135, "learning_rate": 7.919676588656505e-07, "loss": 0.1796, "step": 5057 }, { "epoch": 0.32, "grad_norm": 0.8032506156239824, "learning_rate": 7.918838148504535e-07, "loss": 0.1595, "step": 5058 }, { "epoch": 0.32, "grad_norm": 0.622115335804525, "learning_rate": 7.91799958382906e-07, "loss": 0.1206, "step": 5059 }, { "epoch": 0.32, "grad_norm": 0.7769521696973315, "learning_rate": 7.917160894665858e-07, "loss": 0.1069, "step": 5060 }, { "epoch": 0.32, "grad_norm": 0.2420123982376446, "learning_rate": 7.916322081050709e-07, "loss": 0.1345, "step": 5061 }, { "epoch": 0.32, "grad_norm": 2.3595717039836064, "learning_rate": 7.915483143019395e-07, "loss": 0.1815, "step": 5062 }, { "epoch": 0.32, "grad_norm": 0.5631361585386891, "learning_rate": 7.91464408060771e-07, "loss": 0.2543, "step": 5063 }, { "epoch": 0.32, "grad_norm": 0.5781748938014992, "learning_rate": 7.91380489385145e-07, "loss": 0.321, "step": 5064 }, { "epoch": 0.32, "grad_norm": 0.4852625379275442, "learning_rate": 7.912965582786415e-07, "loss": 0.2118, "step": 5065 }, { "epoch": 0.32, "grad_norm": 0.5333212149403556, "learning_rate": 7.912126147448413e-07, "loss": 0.1468, "step": 5066 }, { "epoch": 0.32, "grad_norm": 0.18167122602406438, "learning_rate": 7.911286587873256e-07, "loss": 0.1121, "step": 5067 }, { "epoch": 0.32, "grad_norm": 0.3149254689417239, "learning_rate": 7.910446904096759e-07, "loss": 0.1077, "step": 5068 }, { "epoch": 0.32, "grad_norm": 6.907826203685545, "learning_rate": 7.909607096154749e-07, "loss": 0.318, "step": 5069 }, { "epoch": 0.32, "grad_norm": 1.2544560723678784, "learning_rate": 7.908767164083049e-07, "loss": 0.3798, "step": 5070 }, { "epoch": 0.32, "grad_norm": 3.1243881852506736, "learning_rate": 7.907927107917495e-07, "loss": 0.1088, "step": 5071 }, { "epoch": 0.32, "grad_norm": 1.4039041042046467, "learning_rate": 7.907086927693925e-07, "loss": 0.1204, "step": 5072 }, { "epoch": 0.32, "grad_norm": 1.337441881657516, "learning_rate": 7.906246623448183e-07, "loss": 0.2662, "step": 5073 }, { "epoch": 0.32, "grad_norm": 1.3869484615283691, "learning_rate": 7.905406195216117e-07, "loss": 0.194, "step": 5074 }, { "epoch": 0.32, "grad_norm": 1.3772397922865434, "learning_rate": 7.904565643033583e-07, "loss": 0.1906, "step": 5075 }, { "epoch": 0.32, "grad_norm": 1.075791385822625, "learning_rate": 7.90372496693644e-07, "loss": 0.2967, "step": 5076 }, { "epoch": 0.32, "grad_norm": 1.1951627905017583, "learning_rate": 7.902884166960551e-07, "loss": 0.0529, "step": 5077 }, { "epoch": 0.32, "grad_norm": 0.4299172986861356, "learning_rate": 7.902043243141789e-07, "loss": 0.1034, "step": 5078 }, { "epoch": 0.32, "grad_norm": 0.27911503588386616, "learning_rate": 7.901202195516028e-07, "loss": 0.0044, "step": 5079 }, { "epoch": 0.32, "grad_norm": 0.3465537627965502, "learning_rate": 7.90036102411915e-07, "loss": 0.0845, "step": 5080 }, { "epoch": 0.32, "grad_norm": 1.8558313789097676, "learning_rate": 7.89951972898704e-07, "loss": 0.169, "step": 5081 }, { "epoch": 0.32, "grad_norm": 2.4629701720174717, "learning_rate": 7.898678310155589e-07, "loss": 0.1314, "step": 5082 }, { "epoch": 0.32, "grad_norm": 0.5101844974842896, "learning_rate": 7.897836767660695e-07, "loss": 0.1962, "step": 5083 }, { "epoch": 0.32, "grad_norm": 0.5551973788374136, "learning_rate": 7.896995101538259e-07, "loss": 0.1197, "step": 5084 }, { "epoch": 0.32, "grad_norm": 0.5410604563063769, "learning_rate": 7.896153311824188e-07, "loss": 0.245, "step": 5085 }, { "epoch": 0.32, "grad_norm": 1.950281909471845, "learning_rate": 7.895311398554394e-07, "loss": 0.2554, "step": 5086 }, { "epoch": 0.32, "grad_norm": 0.8591973063541584, "learning_rate": 7.894469361764798e-07, "loss": 0.1654, "step": 5087 }, { "epoch": 0.32, "grad_norm": 0.522447476544494, "learning_rate": 7.893627201491318e-07, "loss": 0.2683, "step": 5088 }, { "epoch": 0.32, "grad_norm": 0.5860219876403301, "learning_rate": 7.892784917769884e-07, "loss": 0.1917, "step": 5089 }, { "epoch": 0.32, "grad_norm": 0.3691056995316721, "learning_rate": 7.891942510636431e-07, "loss": 0.1302, "step": 5090 }, { "epoch": 0.32, "grad_norm": 1.5598536136114467, "learning_rate": 7.891099980126899e-07, "loss": 0.4858, "step": 5091 }, { "epoch": 0.32, "grad_norm": 0.6224550751677805, "learning_rate": 7.890257326277227e-07, "loss": 0.1352, "step": 5092 }, { "epoch": 0.32, "grad_norm": 0.5249354254426418, "learning_rate": 7.889414549123369e-07, "loss": 0.0301, "step": 5093 }, { "epoch": 0.32, "grad_norm": 0.5549734263590133, "learning_rate": 7.888571648701277e-07, "loss": 0.4364, "step": 5094 }, { "epoch": 0.32, "grad_norm": 1.252075031879599, "learning_rate": 7.887728625046912e-07, "loss": 0.2175, "step": 5095 }, { "epoch": 0.32, "grad_norm": 0.577590550688623, "learning_rate": 7.88688547819624e-07, "loss": 0.2646, "step": 5096 }, { "epoch": 0.33, "grad_norm": 0.9450411854298872, "learning_rate": 7.886042208185229e-07, "loss": 0.2347, "step": 5097 }, { "epoch": 0.33, "grad_norm": 0.5053901836705134, "learning_rate": 7.885198815049857e-07, "loss": 0.2114, "step": 5098 }, { "epoch": 0.33, "grad_norm": 2.3266340655180504, "learning_rate": 7.884355298826102e-07, "loss": 0.1107, "step": 5099 }, { "epoch": 0.33, "grad_norm": 0.9160849313524885, "learning_rate": 7.883511659549952e-07, "loss": 0.1369, "step": 5100 }, { "epoch": 0.33, "grad_norm": 0.8819874018164324, "learning_rate": 7.882667897257398e-07, "loss": 0.3088, "step": 5101 }, { "epoch": 0.33, "grad_norm": 0.6339437987921024, "learning_rate": 7.881824011984437e-07, "loss": 0.2548, "step": 5102 }, { "epoch": 0.33, "grad_norm": 0.18967550245166595, "learning_rate": 7.880980003767071e-07, "loss": 0.0126, "step": 5103 }, { "epoch": 0.33, "grad_norm": 0.7208254564609355, "learning_rate": 7.880135872641305e-07, "loss": 0.2752, "step": 5104 }, { "epoch": 0.33, "grad_norm": 0.4640709049335785, "learning_rate": 7.879291618643154e-07, "loss": 0.1532, "step": 5105 }, { "epoch": 0.33, "grad_norm": 0.8370361202927852, "learning_rate": 7.878447241808634e-07, "loss": 0.0573, "step": 5106 }, { "epoch": 0.33, "grad_norm": 0.7902582978217804, "learning_rate": 7.877602742173768e-07, "loss": 0.1405, "step": 5107 }, { "epoch": 0.33, "grad_norm": 0.6673761349895175, "learning_rate": 7.876758119774585e-07, "loss": 0.0331, "step": 5108 }, { "epoch": 0.33, "grad_norm": 0.9481514783975585, "learning_rate": 7.875913374647116e-07, "loss": 0.2157, "step": 5109 }, { "epoch": 0.33, "grad_norm": 0.7219554855726629, "learning_rate": 7.875068506827403e-07, "loss": 0.1573, "step": 5110 }, { "epoch": 0.33, "grad_norm": 0.723677411654189, "learning_rate": 7.874223516351487e-07, "loss": 0.1056, "step": 5111 }, { "epoch": 0.33, "grad_norm": 2.314401714303733, "learning_rate": 7.873378403255419e-07, "loss": 0.209, "step": 5112 }, { "epoch": 0.33, "grad_norm": 1.9923260613096916, "learning_rate": 7.872533167575251e-07, "loss": 0.1989, "step": 5113 }, { "epoch": 0.33, "grad_norm": 1.3756478802285395, "learning_rate": 7.871687809347045e-07, "loss": 0.2573, "step": 5114 }, { "epoch": 0.33, "grad_norm": 0.960812771431285, "learning_rate": 7.870842328606862e-07, "loss": 0.1823, "step": 5115 }, { "epoch": 0.33, "grad_norm": 0.7207169323889557, "learning_rate": 7.869996725390775e-07, "loss": 0.2595, "step": 5116 }, { "epoch": 0.33, "grad_norm": 0.7463231305737968, "learning_rate": 7.869150999734859e-07, "loss": 0.0896, "step": 5117 }, { "epoch": 0.33, "grad_norm": 0.819749616163352, "learning_rate": 7.868305151675192e-07, "loss": 0.2348, "step": 5118 }, { "epoch": 0.33, "grad_norm": 1.1100931362103976, "learning_rate": 7.867459181247863e-07, "loss": 0.2892, "step": 5119 }, { "epoch": 0.33, "grad_norm": 0.511772839504525, "learning_rate": 7.866613088488961e-07, "loss": 0.2312, "step": 5120 }, { "epoch": 0.33, "grad_norm": 1.3402164407726527, "learning_rate": 7.86576687343458e-07, "loss": 0.0916, "step": 5121 }, { "epoch": 0.33, "grad_norm": 0.9386732144001584, "learning_rate": 7.864920536120825e-07, "loss": 0.5051, "step": 5122 }, { "epoch": 0.33, "grad_norm": 0.5137049325530657, "learning_rate": 7.864074076583799e-07, "loss": 0.2519, "step": 5123 }, { "epoch": 0.33, "grad_norm": 3.1003313269417925, "learning_rate": 7.863227494859619e-07, "loss": 0.0859, "step": 5124 }, { "epoch": 0.33, "grad_norm": 0.6060528816479874, "learning_rate": 7.862380790984396e-07, "loss": 0.2056, "step": 5125 }, { "epoch": 0.33, "grad_norm": 0.6734469477166636, "learning_rate": 7.861533964994254e-07, "loss": 0.1005, "step": 5126 }, { "epoch": 0.33, "grad_norm": 0.9326064729223768, "learning_rate": 7.860687016925321e-07, "loss": 0.1176, "step": 5127 }, { "epoch": 0.33, "grad_norm": 0.5140312436999763, "learning_rate": 7.859839946813729e-07, "loss": 0.1963, "step": 5128 }, { "epoch": 0.33, "grad_norm": 1.046058161699342, "learning_rate": 7.858992754695617e-07, "loss": 0.1158, "step": 5129 }, { "epoch": 0.33, "grad_norm": 0.5555687076704139, "learning_rate": 7.858145440607124e-07, "loss": 0.2217, "step": 5130 }, { "epoch": 0.33, "grad_norm": 1.0269123056921516, "learning_rate": 7.857298004584403e-07, "loss": 0.1093, "step": 5131 }, { "epoch": 0.33, "grad_norm": 2.51625517947423, "learning_rate": 7.856450446663605e-07, "loss": 0.187, "step": 5132 }, { "epoch": 0.33, "grad_norm": 1.529660144764917, "learning_rate": 7.855602766880889e-07, "loss": 0.1998, "step": 5133 }, { "epoch": 0.33, "grad_norm": 1.7936212130851568, "learning_rate": 7.854754965272419e-07, "loss": 0.1397, "step": 5134 }, { "epoch": 0.33, "grad_norm": 0.38627722105736007, "learning_rate": 7.853907041874361e-07, "loss": 0.2149, "step": 5135 }, { "epoch": 0.33, "grad_norm": 0.7981630064468679, "learning_rate": 7.853058996722895e-07, "loss": 0.1107, "step": 5136 }, { "epoch": 0.33, "grad_norm": 0.4979561136612249, "learning_rate": 7.852210829854193e-07, "loss": 0.2579, "step": 5137 }, { "epoch": 0.33, "grad_norm": 2.6970440182009874, "learning_rate": 7.851362541304446e-07, "loss": 0.2837, "step": 5138 }, { "epoch": 0.33, "grad_norm": 0.5434395421011616, "learning_rate": 7.85051413110984e-07, "loss": 0.0616, "step": 5139 }, { "epoch": 0.33, "grad_norm": 0.9180870135310615, "learning_rate": 7.849665599306572e-07, "loss": 0.3237, "step": 5140 }, { "epoch": 0.33, "grad_norm": 0.7764686954870078, "learning_rate": 7.848816945930841e-07, "loss": 0.1006, "step": 5141 }, { "epoch": 0.33, "grad_norm": 2.104607388805604, "learning_rate": 7.84796817101885e-07, "loss": 0.2477, "step": 5142 }, { "epoch": 0.33, "grad_norm": 1.474006178564293, "learning_rate": 7.847119274606814e-07, "loss": 0.2312, "step": 5143 }, { "epoch": 0.33, "grad_norm": 0.7354791717268591, "learning_rate": 7.846270256730945e-07, "loss": 0.4119, "step": 5144 }, { "epoch": 0.33, "grad_norm": 0.8495632459382707, "learning_rate": 7.845421117427466e-07, "loss": 0.2803, "step": 5145 }, { "epoch": 0.33, "grad_norm": 0.9405873283152801, "learning_rate": 7.844571856732601e-07, "loss": 0.275, "step": 5146 }, { "epoch": 0.33, "grad_norm": 1.1669588160239925, "learning_rate": 7.843722474682583e-07, "loss": 0.258, "step": 5147 }, { "epoch": 0.33, "grad_norm": 0.41844449084585966, "learning_rate": 7.842872971313648e-07, "loss": 0.0111, "step": 5148 }, { "epoch": 0.33, "grad_norm": 0.18090898926874738, "learning_rate": 7.842023346662036e-07, "loss": 0.0212, "step": 5149 }, { "epoch": 0.33, "grad_norm": 0.9171412220481466, "learning_rate": 7.841173600763996e-07, "loss": 0.2245, "step": 5150 }, { "epoch": 0.33, "grad_norm": 0.40540167800773685, "learning_rate": 7.840323733655778e-07, "loss": 0.1728, "step": 5151 }, { "epoch": 0.33, "grad_norm": 0.37619284814137943, "learning_rate": 7.839473745373642e-07, "loss": 0.1055, "step": 5152 }, { "epoch": 0.33, "grad_norm": 2.264378141577581, "learning_rate": 7.838623635953845e-07, "loss": 0.0205, "step": 5153 }, { "epoch": 0.33, "grad_norm": 0.866356084279496, "learning_rate": 7.83777340543266e-07, "loss": 0.1617, "step": 5154 }, { "epoch": 0.33, "grad_norm": 0.7757190835594977, "learning_rate": 7.836923053846354e-07, "loss": 0.2728, "step": 5155 }, { "epoch": 0.33, "grad_norm": 0.3825698622205671, "learning_rate": 7.836072581231209e-07, "loss": 0.1509, "step": 5156 }, { "epoch": 0.33, "grad_norm": 0.8802356116921449, "learning_rate": 7.835221987623506e-07, "loss": 0.2452, "step": 5157 }, { "epoch": 0.33, "grad_norm": 0.3535140613776956, "learning_rate": 7.834371273059534e-07, "loss": 0.2698, "step": 5158 }, { "epoch": 0.33, "grad_norm": 0.7063955805418285, "learning_rate": 7.833520437575585e-07, "loss": 0.4772, "step": 5159 }, { "epoch": 0.33, "grad_norm": 0.8677992657140369, "learning_rate": 7.832669481207958e-07, "loss": 0.1712, "step": 5160 }, { "epoch": 0.33, "grad_norm": 1.0564033872324807, "learning_rate": 7.831818403992958e-07, "loss": 0.2168, "step": 5161 }, { "epoch": 0.33, "grad_norm": 0.1737715122182201, "learning_rate": 7.830967205966892e-07, "loss": 0.0726, "step": 5162 }, { "epoch": 0.33, "grad_norm": 0.4813717911308954, "learning_rate": 7.830115887166072e-07, "loss": 0.0935, "step": 5163 }, { "epoch": 0.33, "grad_norm": 0.21965268421480677, "learning_rate": 7.82926444762682e-07, "loss": 0.0828, "step": 5164 }, { "epoch": 0.33, "grad_norm": 0.6091583475193905, "learning_rate": 7.82841288738546e-07, "loss": 0.1261, "step": 5165 }, { "epoch": 0.33, "grad_norm": 0.41979584647571394, "learning_rate": 7.82756120647832e-07, "loss": 0.012, "step": 5166 }, { "epoch": 0.33, "grad_norm": 0.3935984589136889, "learning_rate": 7.826709404941735e-07, "loss": 0.0103, "step": 5167 }, { "epoch": 0.33, "grad_norm": 0.822421978783359, "learning_rate": 7.825857482812044e-07, "loss": 0.1331, "step": 5168 }, { "epoch": 0.33, "grad_norm": 1.1806423913775659, "learning_rate": 7.825005440125593e-07, "loss": 0.2662, "step": 5169 }, { "epoch": 0.33, "grad_norm": 0.15141360722514163, "learning_rate": 7.824153276918731e-07, "loss": 0.0022, "step": 5170 }, { "epoch": 0.33, "grad_norm": 2.003885880421857, "learning_rate": 7.82330099322781e-07, "loss": 0.3928, "step": 5171 }, { "epoch": 0.33, "grad_norm": 0.43733114030772724, "learning_rate": 7.822448589089197e-07, "loss": 0.1057, "step": 5172 }, { "epoch": 0.33, "grad_norm": 1.6463146289694512, "learning_rate": 7.821596064539251e-07, "loss": 0.1843, "step": 5173 }, { "epoch": 0.33, "grad_norm": 0.7131295128071986, "learning_rate": 7.820743419614345e-07, "loss": 0.2946, "step": 5174 }, { "epoch": 0.33, "grad_norm": 0.3734179210841508, "learning_rate": 7.819890654350855e-07, "loss": 0.0989, "step": 5175 }, { "epoch": 0.33, "grad_norm": 1.0249790109579513, "learning_rate": 7.819037768785159e-07, "loss": 0.3991, "step": 5176 }, { "epoch": 0.33, "grad_norm": 0.4857390485229206, "learning_rate": 7.818184762953648e-07, "loss": 0.0315, "step": 5177 }, { "epoch": 0.33, "grad_norm": 1.364107220300501, "learning_rate": 7.817331636892708e-07, "loss": 0.0135, "step": 5178 }, { "epoch": 0.33, "grad_norm": 1.6163913761357585, "learning_rate": 7.816478390638737e-07, "loss": 0.0153, "step": 5179 }, { "epoch": 0.33, "grad_norm": 0.7892463179352985, "learning_rate": 7.815625024228136e-07, "loss": 0.198, "step": 5180 }, { "epoch": 0.33, "grad_norm": 0.4749420067595575, "learning_rate": 7.814771537697311e-07, "loss": 0.1432, "step": 5181 }, { "epoch": 0.33, "grad_norm": 0.6439197328856424, "learning_rate": 7.813917931082675e-07, "loss": 0.1727, "step": 5182 }, { "epoch": 0.33, "grad_norm": 0.9286148777239651, "learning_rate": 7.813064204420643e-07, "loss": 0.2073, "step": 5183 }, { "epoch": 0.33, "grad_norm": 0.5687769427647333, "learning_rate": 7.812210357747635e-07, "loss": 0.1604, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.1008413881744683, "learning_rate": 7.811356391100081e-07, "loss": 0.2216, "step": 5185 }, { "epoch": 0.33, "grad_norm": 1.2699507225473614, "learning_rate": 7.810502304514413e-07, "loss": 0.2641, "step": 5186 }, { "epoch": 0.33, "grad_norm": 2.1134241983281936, "learning_rate": 7.809648098027066e-07, "loss": 0.3247, "step": 5187 }, { "epoch": 0.33, "grad_norm": 4.625339701996283, "learning_rate": 7.808793771674484e-07, "loss": 0.2065, "step": 5188 }, { "epoch": 0.33, "grad_norm": 2.3914872799377487, "learning_rate": 7.807939325493112e-07, "loss": 0.2097, "step": 5189 }, { "epoch": 0.33, "grad_norm": 7.376255327123015, "learning_rate": 7.807084759519404e-07, "loss": 0.1322, "step": 5190 }, { "epoch": 0.33, "grad_norm": 0.6763194881524864, "learning_rate": 7.806230073789818e-07, "loss": 0.3152, "step": 5191 }, { "epoch": 0.33, "grad_norm": 0.863660255909624, "learning_rate": 7.805375268340814e-07, "loss": 0.0923, "step": 5192 }, { "epoch": 0.33, "grad_norm": 0.8275494096837781, "learning_rate": 7.804520343208865e-07, "loss": 0.2021, "step": 5193 }, { "epoch": 0.33, "grad_norm": 3.269752146604219, "learning_rate": 7.803665298430437e-07, "loss": 0.1851, "step": 5194 }, { "epoch": 0.33, "grad_norm": 0.699971489648117, "learning_rate": 7.802810134042013e-07, "loss": 0.1585, "step": 5195 }, { "epoch": 0.33, "grad_norm": 4.795300966736381, "learning_rate": 7.801954850080074e-07, "loss": 0.105, "step": 5196 }, { "epoch": 0.33, "grad_norm": 0.5764921562418399, "learning_rate": 7.801099446581107e-07, "loss": 0.2153, "step": 5197 }, { "epoch": 0.33, "grad_norm": 0.5092916387115642, "learning_rate": 7.800243923581609e-07, "loss": 0.0748, "step": 5198 }, { "epoch": 0.33, "grad_norm": 0.45864637235860056, "learning_rate": 7.799388281118076e-07, "loss": 0.1253, "step": 5199 }, { "epoch": 0.33, "grad_norm": 0.2302589845756009, "learning_rate": 7.79853251922701e-07, "loss": 0.1033, "step": 5200 }, { "epoch": 0.33, "grad_norm": 0.6796973251578036, "learning_rate": 7.797676637944921e-07, "loss": 0.2755, "step": 5201 }, { "epoch": 0.33, "grad_norm": 0.9209543229656066, "learning_rate": 7.796820637308323e-07, "loss": 0.3127, "step": 5202 }, { "epoch": 0.33, "grad_norm": 0.5949097161132205, "learning_rate": 7.795964517353733e-07, "loss": 0.3117, "step": 5203 }, { "epoch": 0.33, "grad_norm": 0.6011857914950647, "learning_rate": 7.795108278117678e-07, "loss": 0.0846, "step": 5204 }, { "epoch": 0.33, "grad_norm": 1.7723605848731894, "learning_rate": 7.794251919636685e-07, "loss": 0.1317, "step": 5205 }, { "epoch": 0.33, "grad_norm": 0.42760420529026383, "learning_rate": 7.793395441947287e-07, "loss": 0.1835, "step": 5206 }, { "epoch": 0.33, "grad_norm": 0.8185202619388978, "learning_rate": 7.792538845086024e-07, "loss": 0.1726, "step": 5207 }, { "epoch": 0.33, "grad_norm": 1.3600276855074998, "learning_rate": 7.79168212908944e-07, "loss": 0.194, "step": 5208 }, { "epoch": 0.33, "grad_norm": 1.1362800072731472, "learning_rate": 7.790825293994086e-07, "loss": 0.2714, "step": 5209 }, { "epoch": 0.33, "grad_norm": 1.039725504216318, "learning_rate": 7.789968339836514e-07, "loss": 0.2195, "step": 5210 }, { "epoch": 0.33, "grad_norm": 1.2588782562309624, "learning_rate": 7.789111266653283e-07, "loss": 0.3284, "step": 5211 }, { "epoch": 0.33, "grad_norm": 1.9196694656825979, "learning_rate": 7.78825407448096e-07, "loss": 0.0946, "step": 5212 }, { "epoch": 0.33, "grad_norm": 0.8374289363909836, "learning_rate": 7.787396763356111e-07, "loss": 0.3104, "step": 5213 }, { "epoch": 0.33, "grad_norm": 0.7024580538454515, "learning_rate": 7.786539333315315e-07, "loss": 0.1579, "step": 5214 }, { "epoch": 0.33, "grad_norm": 0.7842770416764756, "learning_rate": 7.785681784395148e-07, "loss": 0.2168, "step": 5215 }, { "epoch": 0.33, "grad_norm": 0.9262774147299662, "learning_rate": 7.784824116632196e-07, "loss": 0.4278, "step": 5216 }, { "epoch": 0.33, "grad_norm": 0.8510804981741832, "learning_rate": 7.78396633006305e-07, "loss": 0.261, "step": 5217 }, { "epoch": 0.33, "grad_norm": 0.5361973129473463, "learning_rate": 7.783108424724303e-07, "loss": 0.399, "step": 5218 }, { "epoch": 0.33, "grad_norm": 4.024259909503765, "learning_rate": 7.782250400652556e-07, "loss": 0.0766, "step": 5219 }, { "epoch": 0.33, "grad_norm": 0.6120064942509154, "learning_rate": 7.781392257884415e-07, "loss": 0.1121, "step": 5220 }, { "epoch": 0.33, "grad_norm": 0.3654686798068923, "learning_rate": 7.780533996456489e-07, "loss": 0.0905, "step": 5221 }, { "epoch": 0.33, "grad_norm": 0.6877879776820551, "learning_rate": 7.779675616405391e-07, "loss": 0.2864, "step": 5222 }, { "epoch": 0.33, "grad_norm": 0.8962972809336581, "learning_rate": 7.778817117767747e-07, "loss": 0.2471, "step": 5223 }, { "epoch": 0.33, "grad_norm": 1.1438333180741294, "learning_rate": 7.777958500580175e-07, "loss": 0.2553, "step": 5224 }, { "epoch": 0.33, "grad_norm": 6.01063537119916, "learning_rate": 7.777099764879311e-07, "loss": 0.1714, "step": 5225 }, { "epoch": 0.33, "grad_norm": 0.547708033777952, "learning_rate": 7.776240910701787e-07, "loss": 0.2227, "step": 5226 }, { "epoch": 0.33, "grad_norm": 0.9399754562836553, "learning_rate": 7.775381938084245e-07, "loss": 0.1651, "step": 5227 }, { "epoch": 0.33, "grad_norm": 0.573631678102885, "learning_rate": 7.77452284706333e-07, "loss": 0.2619, "step": 5228 }, { "epoch": 0.33, "grad_norm": 0.8042545184195301, "learning_rate": 7.773663637675694e-07, "loss": 0.3167, "step": 5229 }, { "epoch": 0.33, "grad_norm": 2.7307787292330183, "learning_rate": 7.77280430995799e-07, "loss": 0.357, "step": 5230 }, { "epoch": 0.33, "grad_norm": 0.41078368085224987, "learning_rate": 7.771944863946882e-07, "loss": 0.1782, "step": 5231 }, { "epoch": 0.33, "grad_norm": 0.5626768386257761, "learning_rate": 7.771085299679033e-07, "loss": 0.1987, "step": 5232 }, { "epoch": 0.33, "grad_norm": 0.30757453219627906, "learning_rate": 7.770225617191115e-07, "loss": 0.1097, "step": 5233 }, { "epoch": 0.33, "grad_norm": 0.7249081777321619, "learning_rate": 7.769365816519802e-07, "loss": 0.0807, "step": 5234 }, { "epoch": 0.33, "grad_norm": 1.9128744385671228, "learning_rate": 7.768505897701777e-07, "loss": 0.2948, "step": 5235 }, { "epoch": 0.33, "grad_norm": 2.5775201829528736, "learning_rate": 7.767645860773725e-07, "loss": 0.1744, "step": 5236 }, { "epoch": 0.33, "grad_norm": 0.7369713102491516, "learning_rate": 7.766785705772338e-07, "loss": 0.1896, "step": 5237 }, { "epoch": 0.33, "grad_norm": 0.35607296837526103, "learning_rate": 7.765925432734309e-07, "loss": 0.186, "step": 5238 }, { "epoch": 0.33, "grad_norm": 0.4936113410080766, "learning_rate": 7.765065041696341e-07, "loss": 0.1866, "step": 5239 }, { "epoch": 0.33, "grad_norm": 0.6865003619440185, "learning_rate": 7.764204532695141e-07, "loss": 0.17, "step": 5240 }, { "epoch": 0.33, "grad_norm": 0.3432388715961265, "learning_rate": 7.763343905767419e-07, "loss": 0.1504, "step": 5241 }, { "epoch": 0.33, "grad_norm": 0.6067028995860944, "learning_rate": 7.762483160949888e-07, "loss": 0.1617, "step": 5242 }, { "epoch": 0.33, "grad_norm": 0.9284293087567614, "learning_rate": 7.761622298279276e-07, "loss": 0.1262, "step": 5243 }, { "epoch": 0.33, "grad_norm": 0.6042827755347948, "learning_rate": 7.760761317792303e-07, "loss": 0.0869, "step": 5244 }, { "epoch": 0.33, "grad_norm": 0.6440464766518622, "learning_rate": 7.759900219525703e-07, "loss": 0.3204, "step": 5245 }, { "epoch": 0.33, "grad_norm": 0.6139440459427701, "learning_rate": 7.759039003516211e-07, "loss": 0.0814, "step": 5246 }, { "epoch": 0.33, "grad_norm": 0.3709636354163115, "learning_rate": 7.758177669800568e-07, "loss": 0.1332, "step": 5247 }, { "epoch": 0.33, "grad_norm": 0.5544713127409657, "learning_rate": 7.757316218415523e-07, "loss": 0.2422, "step": 5248 }, { "epoch": 0.33, "grad_norm": 1.157099304716162, "learning_rate": 7.756454649397824e-07, "loss": 0.052, "step": 5249 }, { "epoch": 0.33, "grad_norm": 0.6744716019691676, "learning_rate": 7.75559296278423e-07, "loss": 0.1596, "step": 5250 }, { "epoch": 0.33, "grad_norm": 0.6174406562037895, "learning_rate": 7.754731158611498e-07, "loss": 0.3179, "step": 5251 }, { "epoch": 0.33, "grad_norm": 0.5190619052521371, "learning_rate": 7.753869236916399e-07, "loss": 0.3108, "step": 5252 }, { "epoch": 0.33, "grad_norm": 0.9681623585174963, "learning_rate": 7.753007197735703e-07, "loss": 0.0746, "step": 5253 }, { "epoch": 0.34, "grad_norm": 0.7248986660787387, "learning_rate": 7.752145041106184e-07, "loss": 0.2915, "step": 5254 }, { "epoch": 0.34, "grad_norm": 0.8032978124242338, "learning_rate": 7.751282767064626e-07, "loss": 0.1974, "step": 5255 }, { "epoch": 0.34, "grad_norm": 0.7604321560764785, "learning_rate": 7.750420375647815e-07, "loss": 0.3739, "step": 5256 }, { "epoch": 0.34, "grad_norm": 3.8691280985179755, "learning_rate": 7.74955786689254e-07, "loss": 0.3987, "step": 5257 }, { "epoch": 0.34, "grad_norm": 0.7048945497271688, "learning_rate": 7.7486952408356e-07, "loss": 0.1943, "step": 5258 }, { "epoch": 0.34, "grad_norm": 5.26554239065647, "learning_rate": 7.747832497513795e-07, "loss": 0.2652, "step": 5259 }, { "epoch": 0.34, "grad_norm": 0.43491317356013665, "learning_rate": 7.746969636963933e-07, "loss": 0.2595, "step": 5260 }, { "epoch": 0.34, "grad_norm": 1.6478578418172165, "learning_rate": 7.746106659222823e-07, "loss": 0.25, "step": 5261 }, { "epoch": 0.34, "grad_norm": 2.1504782887633436, "learning_rate": 7.745243564327283e-07, "loss": 0.1144, "step": 5262 }, { "epoch": 0.34, "grad_norm": 4.101033691732244, "learning_rate": 7.744380352314134e-07, "loss": 0.0991, "step": 5263 }, { "epoch": 0.34, "grad_norm": 0.8055943990726835, "learning_rate": 7.743517023220203e-07, "loss": 0.3228, "step": 5264 }, { "epoch": 0.34, "grad_norm": 0.7353106485944705, "learning_rate": 7.742653577082318e-07, "loss": 0.1819, "step": 5265 }, { "epoch": 0.34, "grad_norm": 1.0442489787901628, "learning_rate": 7.741790013937321e-07, "loss": 0.3745, "step": 5266 }, { "epoch": 0.34, "grad_norm": 2.4147080737370854, "learning_rate": 7.740926333822049e-07, "loss": 0.298, "step": 5267 }, { "epoch": 0.34, "grad_norm": 0.4203104667063722, "learning_rate": 7.74006253677335e-07, "loss": 0.2799, "step": 5268 }, { "epoch": 0.34, "grad_norm": 0.5690448686724064, "learning_rate": 7.739198622828073e-07, "loss": 0.0067, "step": 5269 }, { "epoch": 0.34, "grad_norm": 0.37927647647532053, "learning_rate": 7.738334592023079e-07, "loss": 0.2907, "step": 5270 }, { "epoch": 0.34, "grad_norm": 1.9868200960221822, "learning_rate": 7.737470444395226e-07, "loss": 0.3532, "step": 5271 }, { "epoch": 0.34, "grad_norm": 2.577393204628036, "learning_rate": 7.73660617998138e-07, "loss": 0.3059, "step": 5272 }, { "epoch": 0.34, "grad_norm": 1.33371205203811, "learning_rate": 7.735741798818414e-07, "loss": 0.1558, "step": 5273 }, { "epoch": 0.34, "grad_norm": 0.547719657318448, "learning_rate": 7.734877300943202e-07, "loss": 0.1946, "step": 5274 }, { "epoch": 0.34, "grad_norm": 0.9383937743435579, "learning_rate": 7.734012686392628e-07, "loss": 0.1982, "step": 5275 }, { "epoch": 0.34, "grad_norm": 0.9772115285261509, "learning_rate": 7.733147955203576e-07, "loss": 0.1183, "step": 5276 }, { "epoch": 0.34, "grad_norm": 0.6408425941334519, "learning_rate": 7.732283107412938e-07, "loss": 0.2941, "step": 5277 }, { "epoch": 0.34, "grad_norm": 3.2603429994239863, "learning_rate": 7.731418143057611e-07, "loss": 0.2918, "step": 5278 }, { "epoch": 0.34, "grad_norm": 1.190932591688707, "learning_rate": 7.730553062174494e-07, "loss": 0.4235, "step": 5279 }, { "epoch": 0.34, "grad_norm": 0.9899504115425867, "learning_rate": 7.729687864800494e-07, "loss": 0.0565, "step": 5280 }, { "epoch": 0.34, "grad_norm": 1.0326428444784788, "learning_rate": 7.728822550972522e-07, "loss": 0.016, "step": 5281 }, { "epoch": 0.34, "grad_norm": 3.7889003525642555, "learning_rate": 7.727957120727495e-07, "loss": 0.0446, "step": 5282 }, { "epoch": 0.34, "grad_norm": 0.4504325124157612, "learning_rate": 7.727091574102334e-07, "loss": 0.1024, "step": 5283 }, { "epoch": 0.34, "grad_norm": 0.5993252725635362, "learning_rate": 7.726225911133965e-07, "loss": 0.2273, "step": 5284 }, { "epoch": 0.34, "grad_norm": 0.6508119301918696, "learning_rate": 7.725360131859317e-07, "loss": 0.0937, "step": 5285 }, { "epoch": 0.34, "grad_norm": 1.7281171500449641, "learning_rate": 7.724494236315327e-07, "loss": 0.3123, "step": 5286 }, { "epoch": 0.34, "grad_norm": 0.7887649925558102, "learning_rate": 7.723628224538937e-07, "loss": 0.0142, "step": 5287 }, { "epoch": 0.34, "grad_norm": 0.3721712379544822, "learning_rate": 7.722762096567089e-07, "loss": 0.1807, "step": 5288 }, { "epoch": 0.34, "grad_norm": 0.570330342971187, "learning_rate": 7.721895852436739e-07, "loss": 0.1508, "step": 5289 }, { "epoch": 0.34, "grad_norm": 1.017985400053795, "learning_rate": 7.72102949218484e-07, "loss": 0.1733, "step": 5290 }, { "epoch": 0.34, "grad_norm": 1.6570959670651932, "learning_rate": 7.720163015848352e-07, "loss": 0.1342, "step": 5291 }, { "epoch": 0.34, "grad_norm": 0.31935624622912157, "learning_rate": 7.719296423464243e-07, "loss": 0.0046, "step": 5292 }, { "epoch": 0.34, "grad_norm": 1.02227589469212, "learning_rate": 7.718429715069481e-07, "loss": 0.2811, "step": 5293 }, { "epoch": 0.34, "grad_norm": 1.0304873467200701, "learning_rate": 7.717562890701043e-07, "loss": 0.278, "step": 5294 }, { "epoch": 0.34, "grad_norm": 2.0786198227661057, "learning_rate": 7.716695950395908e-07, "loss": 0.2424, "step": 5295 }, { "epoch": 0.34, "grad_norm": 1.161837307128368, "learning_rate": 7.715828894191063e-07, "loss": 0.1475, "step": 5296 }, { "epoch": 0.34, "grad_norm": 2.0594379087178116, "learning_rate": 7.714961722123498e-07, "loss": 0.0915, "step": 5297 }, { "epoch": 0.34, "grad_norm": 1.613491070317713, "learning_rate": 7.71409443423021e-07, "loss": 0.1684, "step": 5298 }, { "epoch": 0.34, "grad_norm": 0.48638051637562585, "learning_rate": 7.713227030548195e-07, "loss": 0.1002, "step": 5299 }, { "epoch": 0.34, "grad_norm": 1.9718802165761276, "learning_rate": 7.712359511114461e-07, "loss": 0.2009, "step": 5300 }, { "epoch": 0.34, "grad_norm": 1.755756688591537, "learning_rate": 7.711491875966019e-07, "loss": 0.2435, "step": 5301 }, { "epoch": 0.34, "grad_norm": 0.4700821926865363, "learning_rate": 7.710624125139882e-07, "loss": 0.1341, "step": 5302 }, { "epoch": 0.34, "grad_norm": 0.7815767874893984, "learning_rate": 7.70975625867307e-07, "loss": 0.2079, "step": 5303 }, { "epoch": 0.34, "grad_norm": 0.41626744975865426, "learning_rate": 7.708888276602609e-07, "loss": 0.2388, "step": 5304 }, { "epoch": 0.34, "grad_norm": 1.498746829276991, "learning_rate": 7.70802017896553e-07, "loss": 0.2425, "step": 5305 }, { "epoch": 0.34, "grad_norm": 0.38162158973715743, "learning_rate": 7.707151965798866e-07, "loss": 0.2501, "step": 5306 }, { "epoch": 0.34, "grad_norm": 1.016607126681126, "learning_rate": 7.706283637139657e-07, "loss": 0.2299, "step": 5307 }, { "epoch": 0.34, "grad_norm": 0.7391466955217589, "learning_rate": 7.705415193024947e-07, "loss": 0.1713, "step": 5308 }, { "epoch": 0.34, "grad_norm": 4.020352974328961, "learning_rate": 7.704546633491787e-07, "loss": 0.0339, "step": 5309 }, { "epoch": 0.34, "grad_norm": 0.6292426340417446, "learning_rate": 7.703677958577231e-07, "loss": 0.217, "step": 5310 }, { "epoch": 0.34, "grad_norm": 4.829214248599567, "learning_rate": 7.702809168318337e-07, "loss": 0.1083, "step": 5311 }, { "epoch": 0.34, "grad_norm": 0.5662842095404212, "learning_rate": 7.701940262752171e-07, "loss": 0.2041, "step": 5312 }, { "epoch": 0.34, "grad_norm": 0.650322089205414, "learning_rate": 7.701071241915802e-07, "loss": 0.2903, "step": 5313 }, { "epoch": 0.34, "grad_norm": 0.5903015047276998, "learning_rate": 7.700202105846303e-07, "loss": 0.3205, "step": 5314 }, { "epoch": 0.34, "grad_norm": 2.0010473667544635, "learning_rate": 7.699332854580756e-07, "loss": 0.1309, "step": 5315 }, { "epoch": 0.34, "grad_norm": 0.267577849455619, "learning_rate": 7.698463488156241e-07, "loss": 0.1097, "step": 5316 }, { "epoch": 0.34, "grad_norm": 0.8435166915977652, "learning_rate": 7.69759400660985e-07, "loss": 0.0634, "step": 5317 }, { "epoch": 0.34, "grad_norm": 1.0060092200291906, "learning_rate": 7.696724409978677e-07, "loss": 0.1808, "step": 5318 }, { "epoch": 0.34, "grad_norm": 0.6870243220663309, "learning_rate": 7.695854698299819e-07, "loss": 0.0972, "step": 5319 }, { "epoch": 0.34, "grad_norm": 0.5387727882044846, "learning_rate": 7.694984871610379e-07, "loss": 0.349, "step": 5320 }, { "epoch": 0.34, "grad_norm": 5.325458812570029, "learning_rate": 7.694114929947469e-07, "loss": 0.1727, "step": 5321 }, { "epoch": 0.34, "grad_norm": 0.5208017490544538, "learning_rate": 7.693244873348197e-07, "loss": 0.1576, "step": 5322 }, { "epoch": 0.34, "grad_norm": 0.49418080354995575, "learning_rate": 7.692374701849687e-07, "loss": 0.0074, "step": 5323 }, { "epoch": 0.34, "grad_norm": 0.7438736074259235, "learning_rate": 7.691504415489058e-07, "loss": 0.2522, "step": 5324 }, { "epoch": 0.34, "grad_norm": 1.271699322155713, "learning_rate": 7.690634014303441e-07, "loss": 0.3671, "step": 5325 }, { "epoch": 0.34, "grad_norm": 0.8152545212787211, "learning_rate": 7.689763498329969e-07, "loss": 0.2574, "step": 5326 }, { "epoch": 0.34, "grad_norm": 0.9140083684797955, "learning_rate": 7.688892867605778e-07, "loss": 0.1236, "step": 5327 }, { "epoch": 0.34, "grad_norm": 0.6380556696314743, "learning_rate": 7.688022122168012e-07, "loss": 0.319, "step": 5328 }, { "epoch": 0.34, "grad_norm": 0.6314997933841358, "learning_rate": 7.68715126205382e-07, "loss": 0.3729, "step": 5329 }, { "epoch": 0.34, "grad_norm": 0.8182762182779711, "learning_rate": 7.686280287300352e-07, "loss": 0.0136, "step": 5330 }, { "epoch": 0.34, "grad_norm": 0.3156776685054188, "learning_rate": 7.685409197944768e-07, "loss": 0.0872, "step": 5331 }, { "epoch": 0.34, "grad_norm": 0.831457989118222, "learning_rate": 7.684537994024228e-07, "loss": 0.1813, "step": 5332 }, { "epoch": 0.34, "grad_norm": 0.7341932180942895, "learning_rate": 7.683666675575901e-07, "loss": 0.1579, "step": 5333 }, { "epoch": 0.34, "grad_norm": 0.6616775004102179, "learning_rate": 7.682795242636958e-07, "loss": 0.1285, "step": 5334 }, { "epoch": 0.34, "grad_norm": 8.092916168305884, "learning_rate": 7.681923695244578e-07, "loss": 0.0534, "step": 5335 }, { "epoch": 0.34, "grad_norm": 3.1168417053413657, "learning_rate": 7.681052033435942e-07, "loss": 0.2868, "step": 5336 }, { "epoch": 0.34, "grad_norm": 10.866520975312506, "learning_rate": 7.680180257248235e-07, "loss": 0.2316, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.3393368998848683, "learning_rate": 7.679308366718652e-07, "loss": 0.1601, "step": 5338 }, { "epoch": 0.34, "grad_norm": 0.427738334861401, "learning_rate": 7.678436361884388e-07, "loss": 0.2265, "step": 5339 }, { "epoch": 0.34, "grad_norm": 2.454088902427455, "learning_rate": 7.677564242782644e-07, "loss": 0.2034, "step": 5340 }, { "epoch": 0.34, "grad_norm": 0.4295246616964228, "learning_rate": 7.676692009450626e-07, "loss": 0.0805, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.056853686692961, "learning_rate": 7.675819661925547e-07, "loss": 0.3044, "step": 5342 }, { "epoch": 0.34, "grad_norm": 0.9531499999715173, "learning_rate": 7.674947200244622e-07, "loss": 0.1795, "step": 5343 }, { "epoch": 0.34, "grad_norm": 4.417559042119717, "learning_rate": 7.67407462444507e-07, "loss": 0.1089, "step": 5344 }, { "epoch": 0.34, "grad_norm": 1.6925665703216968, "learning_rate": 7.673201934564122e-07, "loss": 0.1718, "step": 5345 }, { "epoch": 0.34, "grad_norm": 1.2011792930434808, "learning_rate": 7.672329130639005e-07, "loss": 0.3044, "step": 5346 }, { "epoch": 0.34, "grad_norm": 0.5730655337707138, "learning_rate": 7.671456212706956e-07, "loss": 0.1644, "step": 5347 }, { "epoch": 0.34, "grad_norm": 1.3752020337562845, "learning_rate": 7.670583180805213e-07, "loss": 0.1373, "step": 5348 }, { "epoch": 0.34, "grad_norm": 0.6841256839270571, "learning_rate": 7.669710034971024e-07, "loss": 0.2897, "step": 5349 }, { "epoch": 0.34, "grad_norm": 0.4700650050761052, "learning_rate": 7.668836775241638e-07, "loss": 0.0782, "step": 5350 }, { "epoch": 0.34, "grad_norm": 3.6904821541206747, "learning_rate": 7.667963401654308e-07, "loss": 0.3029, "step": 5351 }, { "epoch": 0.34, "grad_norm": 0.25120294168718676, "learning_rate": 7.667089914246299e-07, "loss": 0.1555, "step": 5352 }, { "epoch": 0.34, "grad_norm": 0.77480443285652, "learning_rate": 7.666216313054871e-07, "loss": 0.2512, "step": 5353 }, { "epoch": 0.34, "grad_norm": 0.9440982341360855, "learning_rate": 7.665342598117296e-07, "loss": 0.4071, "step": 5354 }, { "epoch": 0.34, "grad_norm": 0.5085383472991699, "learning_rate": 7.664468769470847e-07, "loss": 0.3125, "step": 5355 }, { "epoch": 0.34, "grad_norm": 0.48361497281116317, "learning_rate": 7.663594827152805e-07, "loss": 0.1925, "step": 5356 }, { "epoch": 0.34, "grad_norm": 0.3529085530710126, "learning_rate": 7.662720771200452e-07, "loss": 0.2793, "step": 5357 }, { "epoch": 0.34, "grad_norm": 0.5916127837381652, "learning_rate": 7.66184660165108e-07, "loss": 0.1994, "step": 5358 }, { "epoch": 0.34, "grad_norm": 0.4505598947898049, "learning_rate": 7.660972318541981e-07, "loss": 0.1177, "step": 5359 }, { "epoch": 0.34, "grad_norm": 0.1316654222220077, "learning_rate": 7.660097921910451e-07, "loss": 0.002, "step": 5360 }, { "epoch": 0.34, "grad_norm": 0.4002509006801685, "learning_rate": 7.659223411793799e-07, "loss": 0.1185, "step": 5361 }, { "epoch": 0.34, "grad_norm": 0.40040102302262925, "learning_rate": 7.658348788229329e-07, "loss": 0.2263, "step": 5362 }, { "epoch": 0.34, "grad_norm": 15.19910629247316, "learning_rate": 7.657474051254356e-07, "loss": 0.3614, "step": 5363 }, { "epoch": 0.34, "grad_norm": 1.2013994840857332, "learning_rate": 7.656599200906197e-07, "loss": 0.1998, "step": 5364 }, { "epoch": 0.34, "grad_norm": 1.6985400370745665, "learning_rate": 7.655724237222177e-07, "loss": 0.1717, "step": 5365 }, { "epoch": 0.34, "grad_norm": 0.8095266896357952, "learning_rate": 7.654849160239623e-07, "loss": 0.216, "step": 5366 }, { "epoch": 0.34, "grad_norm": 0.264126168233839, "learning_rate": 7.653973969995865e-07, "loss": 0.195, "step": 5367 }, { "epoch": 0.34, "grad_norm": 1.1730880943862239, "learning_rate": 7.653098666528244e-07, "loss": 0.2856, "step": 5368 }, { "epoch": 0.34, "grad_norm": 6.68317277219934, "learning_rate": 7.652223249874098e-07, "loss": 0.2053, "step": 5369 }, { "epoch": 0.34, "grad_norm": 0.44636271056922516, "learning_rate": 7.651347720070777e-07, "loss": 0.1098, "step": 5370 }, { "epoch": 0.34, "grad_norm": 0.35610538276850995, "learning_rate": 7.650472077155634e-07, "loss": 0.2497, "step": 5371 }, { "epoch": 0.34, "grad_norm": 0.6266563931129285, "learning_rate": 7.649596321166024e-07, "loss": 0.3499, "step": 5372 }, { "epoch": 0.34, "grad_norm": 0.16752537143108334, "learning_rate": 7.648720452139308e-07, "loss": 0.0047, "step": 5373 }, { "epoch": 0.34, "grad_norm": 2.633535854388574, "learning_rate": 7.647844470112854e-07, "loss": 0.2081, "step": 5374 }, { "epoch": 0.34, "grad_norm": 0.6073753413259145, "learning_rate": 7.646968375124032e-07, "loss": 0.297, "step": 5375 }, { "epoch": 0.34, "grad_norm": 0.899192877962476, "learning_rate": 7.646092167210216e-07, "loss": 0.1677, "step": 5376 }, { "epoch": 0.34, "grad_norm": 0.4810626287457248, "learning_rate": 7.64521584640879e-07, "loss": 0.2114, "step": 5377 }, { "epoch": 0.34, "grad_norm": 6.842217120586512, "learning_rate": 7.644339412757138e-07, "loss": 0.2486, "step": 5378 }, { "epoch": 0.34, "grad_norm": 3.236412666233146, "learning_rate": 7.643462866292651e-07, "loss": 0.1284, "step": 5379 }, { "epoch": 0.34, "grad_norm": 0.465679153010764, "learning_rate": 7.642586207052726e-07, "loss": 0.0819, "step": 5380 }, { "epoch": 0.34, "grad_norm": 0.5932907918056111, "learning_rate": 7.641709435074759e-07, "loss": 0.025, "step": 5381 }, { "epoch": 0.34, "grad_norm": 1.8587739825305312, "learning_rate": 7.640832550396157e-07, "loss": 0.1432, "step": 5382 }, { "epoch": 0.34, "grad_norm": 0.7548112413610587, "learning_rate": 7.639955553054331e-07, "loss": 0.2997, "step": 5383 }, { "epoch": 0.34, "grad_norm": 1.3740770436734397, "learning_rate": 7.639078443086693e-07, "loss": 0.0577, "step": 5384 }, { "epoch": 0.34, "grad_norm": 0.6269091369820854, "learning_rate": 7.638201220530663e-07, "loss": 0.219, "step": 5385 }, { "epoch": 0.34, "grad_norm": 0.35717292554144603, "learning_rate": 7.637323885423667e-07, "loss": 0.1343, "step": 5386 }, { "epoch": 0.34, "grad_norm": 2.3400871097740708, "learning_rate": 7.63644643780313e-07, "loss": 0.3339, "step": 5387 }, { "epoch": 0.34, "grad_norm": 0.8314382332385768, "learning_rate": 7.635568877706491e-07, "loss": 0.1248, "step": 5388 }, { "epoch": 0.34, "grad_norm": 0.7448008465431624, "learning_rate": 7.634691205171185e-07, "loss": 0.2172, "step": 5389 }, { "epoch": 0.34, "grad_norm": 1.3429403669183726, "learning_rate": 7.633813420234654e-07, "loss": 0.1637, "step": 5390 }, { "epoch": 0.34, "grad_norm": 0.5481036834158419, "learning_rate": 7.632935522934349e-07, "loss": 0.3444, "step": 5391 }, { "epoch": 0.34, "grad_norm": 0.26179972085160513, "learning_rate": 7.632057513307721e-07, "loss": 0.0127, "step": 5392 }, { "epoch": 0.34, "grad_norm": 0.7934907645401681, "learning_rate": 7.63117939139223e-07, "loss": 0.3575, "step": 5393 }, { "epoch": 0.34, "grad_norm": 0.4847344941338364, "learning_rate": 7.630301157225335e-07, "loss": 0.0863, "step": 5394 }, { "epoch": 0.34, "grad_norm": 0.5144340151696197, "learning_rate": 7.629422810844506e-07, "loss": 0.058, "step": 5395 }, { "epoch": 0.34, "grad_norm": 0.5197423783363038, "learning_rate": 7.628544352287213e-07, "loss": 0.1952, "step": 5396 }, { "epoch": 0.34, "grad_norm": 0.9357355154274088, "learning_rate": 7.627665781590936e-07, "loss": 0.0964, "step": 5397 }, { "epoch": 0.34, "grad_norm": 0.7982943321960051, "learning_rate": 7.626787098793153e-07, "loss": 0.2457, "step": 5398 }, { "epoch": 0.34, "grad_norm": 0.7873596990257173, "learning_rate": 7.625908303931352e-07, "loss": 0.1931, "step": 5399 }, { "epoch": 0.34, "grad_norm": 0.03574043489678248, "learning_rate": 7.625029397043024e-07, "loss": 0.0002, "step": 5400 }, { "epoch": 0.34, "grad_norm": 1.3735738747832986, "learning_rate": 7.624150378165665e-07, "loss": 0.3125, "step": 5401 }, { "epoch": 0.34, "grad_norm": 0.6538545634079106, "learning_rate": 7.623271247336776e-07, "loss": 0.1629, "step": 5402 }, { "epoch": 0.34, "grad_norm": 1.2481701307765698, "learning_rate": 7.622392004593861e-07, "loss": 0.2579, "step": 5403 }, { "epoch": 0.34, "grad_norm": 1.1906190333031517, "learning_rate": 7.621512649974434e-07, "loss": 0.3404, "step": 5404 }, { "epoch": 0.34, "grad_norm": 0.7951479589285008, "learning_rate": 7.620633183516004e-07, "loss": 0.2589, "step": 5405 }, { "epoch": 0.34, "grad_norm": 0.6350964950267514, "learning_rate": 7.619753605256096e-07, "loss": 0.1353, "step": 5406 }, { "epoch": 0.34, "grad_norm": 0.14833055215320196, "learning_rate": 7.618873915232233e-07, "loss": 0.0045, "step": 5407 }, { "epoch": 0.34, "grad_norm": 0.7273594822670096, "learning_rate": 7.617994113481944e-07, "loss": 0.2311, "step": 5408 }, { "epoch": 0.34, "grad_norm": 0.3214943997490827, "learning_rate": 7.617114200042764e-07, "loss": 0.131, "step": 5409 }, { "epoch": 0.35, "grad_norm": 0.7704313806013201, "learning_rate": 7.61623417495223e-07, "loss": 0.1477, "step": 5410 }, { "epoch": 0.35, "grad_norm": 0.48631190898778454, "learning_rate": 7.615354038247887e-07, "loss": 0.0818, "step": 5411 }, { "epoch": 0.35, "grad_norm": 0.6814791525704665, "learning_rate": 7.614473789967284e-07, "loss": 0.3285, "step": 5412 }, { "epoch": 0.35, "grad_norm": 0.5000201149916452, "learning_rate": 7.613593430147973e-07, "loss": 0.3307, "step": 5413 }, { "epoch": 0.35, "grad_norm": 6.086899312102441, "learning_rate": 7.612712958827511e-07, "loss": 0.2334, "step": 5414 }, { "epoch": 0.35, "grad_norm": 0.43445168035263737, "learning_rate": 7.611832376043464e-07, "loss": 0.1203, "step": 5415 }, { "epoch": 0.35, "grad_norm": 0.5063496868375928, "learning_rate": 7.610951681833397e-07, "loss": 0.1298, "step": 5416 }, { "epoch": 0.35, "grad_norm": 2.1891589968534975, "learning_rate": 7.610070876234882e-07, "loss": 0.1901, "step": 5417 }, { "epoch": 0.35, "grad_norm": 0.5705667873301133, "learning_rate": 7.609189959285497e-07, "loss": 0.0862, "step": 5418 }, { "epoch": 0.35, "grad_norm": 2.165052832144716, "learning_rate": 7.608308931022822e-07, "loss": 0.2605, "step": 5419 }, { "epoch": 0.35, "grad_norm": 3.0442779890428175, "learning_rate": 7.607427791484447e-07, "loss": 0.1809, "step": 5420 }, { "epoch": 0.35, "grad_norm": 0.4496070838672782, "learning_rate": 7.606546540707959e-07, "loss": 0.1794, "step": 5421 }, { "epoch": 0.35, "grad_norm": 0.6030649034819161, "learning_rate": 7.605665178730956e-07, "loss": 0.1341, "step": 5422 }, { "epoch": 0.35, "grad_norm": 0.2806532213380455, "learning_rate": 7.604783705591039e-07, "loss": 0.1071, "step": 5423 }, { "epoch": 0.35, "grad_norm": 0.747730965098597, "learning_rate": 7.603902121325811e-07, "loss": 0.262, "step": 5424 }, { "epoch": 0.35, "grad_norm": 0.26019060436072894, "learning_rate": 7.603020425972886e-07, "loss": 0.119, "step": 5425 }, { "epoch": 0.35, "grad_norm": 0.6580863305263909, "learning_rate": 7.602138619569876e-07, "loss": 0.0574, "step": 5426 }, { "epoch": 0.35, "grad_norm": 0.586572500302497, "learning_rate": 7.601256702154402e-07, "loss": 0.1988, "step": 5427 }, { "epoch": 0.35, "grad_norm": 0.9429699165236303, "learning_rate": 7.600374673764087e-07, "loss": 0.1048, "step": 5428 }, { "epoch": 0.35, "grad_norm": 0.6030129263368172, "learning_rate": 7.599492534436562e-07, "loss": 0.1095, "step": 5429 }, { "epoch": 0.35, "grad_norm": 0.3970639490193903, "learning_rate": 7.598610284209459e-07, "loss": 0.123, "step": 5430 }, { "epoch": 0.35, "grad_norm": 0.8875642730833075, "learning_rate": 7.597727923120419e-07, "loss": 0.3333, "step": 5431 }, { "epoch": 0.35, "grad_norm": 0.7935667196936881, "learning_rate": 7.596845451207081e-07, "loss": 0.1299, "step": 5432 }, { "epoch": 0.35, "grad_norm": 1.8845982663534275, "learning_rate": 7.595962868507098e-07, "loss": 0.0206, "step": 5433 }, { "epoch": 0.35, "grad_norm": 0.6958167365070135, "learning_rate": 7.595080175058119e-07, "loss": 0.4635, "step": 5434 }, { "epoch": 0.35, "grad_norm": 0.8434032033922428, "learning_rate": 7.594197370897806e-07, "loss": 0.138, "step": 5435 }, { "epoch": 0.35, "grad_norm": 0.2340305106813205, "learning_rate": 7.593314456063815e-07, "loss": 0.1236, "step": 5436 }, { "epoch": 0.35, "grad_norm": 0.3510436566896165, "learning_rate": 7.592431430593818e-07, "loss": 0.2035, "step": 5437 }, { "epoch": 0.35, "grad_norm": 1.113828575830915, "learning_rate": 7.591548294525482e-07, "loss": 0.2367, "step": 5438 }, { "epoch": 0.35, "grad_norm": 0.6365103004291444, "learning_rate": 7.590665047896489e-07, "loss": 0.0133, "step": 5439 }, { "epoch": 0.35, "grad_norm": 3.5961683705584258, "learning_rate": 7.589781690744515e-07, "loss": 0.0773, "step": 5440 }, { "epoch": 0.35, "grad_norm": 0.7132547038521998, "learning_rate": 7.588898223107249e-07, "loss": 0.1689, "step": 5441 }, { "epoch": 0.35, "grad_norm": 0.4314675017187079, "learning_rate": 7.588014645022381e-07, "loss": 0.109, "step": 5442 }, { "epoch": 0.35, "grad_norm": 5.498069862488393, "learning_rate": 7.587130956527605e-07, "loss": 0.3384, "step": 5443 }, { "epoch": 0.35, "grad_norm": 0.4025239974395892, "learning_rate": 7.586247157660623e-07, "loss": 0.0816, "step": 5444 }, { "epoch": 0.35, "grad_norm": 2.006154927034557, "learning_rate": 7.585363248459138e-07, "loss": 0.0924, "step": 5445 }, { "epoch": 0.35, "grad_norm": 0.5946390055624198, "learning_rate": 7.584479228960858e-07, "loss": 0.4267, "step": 5446 }, { "epoch": 0.35, "grad_norm": 2.1102941873243526, "learning_rate": 7.583595099203499e-07, "loss": 0.1461, "step": 5447 }, { "epoch": 0.35, "grad_norm": 0.8735929599987753, "learning_rate": 7.582710859224779e-07, "loss": 0.1238, "step": 5448 }, { "epoch": 0.35, "grad_norm": 2.5119734516248617, "learning_rate": 7.581826509062422e-07, "loss": 0.0727, "step": 5449 }, { "epoch": 0.35, "grad_norm": 0.5092960882392609, "learning_rate": 7.580942048754158e-07, "loss": 0.218, "step": 5450 }, { "epoch": 0.35, "grad_norm": 0.48091907921323257, "learning_rate": 7.580057478337716e-07, "loss": 0.135, "step": 5451 }, { "epoch": 0.35, "grad_norm": 2.6804820882221856, "learning_rate": 7.579172797850835e-07, "loss": 0.1032, "step": 5452 }, { "epoch": 0.35, "grad_norm": 0.5761016630247517, "learning_rate": 7.578288007331259e-07, "loss": 0.2332, "step": 5453 }, { "epoch": 0.35, "grad_norm": 2.148788013242087, "learning_rate": 7.577403106816733e-07, "loss": 0.3384, "step": 5454 }, { "epoch": 0.35, "grad_norm": 1.577660597165397, "learning_rate": 7.576518096345008e-07, "loss": 0.2448, "step": 5455 }, { "epoch": 0.35, "grad_norm": 1.1541021511978142, "learning_rate": 7.575632975953844e-07, "loss": 0.1887, "step": 5456 }, { "epoch": 0.35, "grad_norm": 0.9556198088872199, "learning_rate": 7.574747745680998e-07, "loss": 0.4103, "step": 5457 }, { "epoch": 0.35, "grad_norm": 6.011746950565354, "learning_rate": 7.573862405564238e-07, "loss": 0.235, "step": 5458 }, { "epoch": 0.35, "grad_norm": 4.867669886562826, "learning_rate": 7.572976955641333e-07, "loss": 0.0455, "step": 5459 }, { "epoch": 0.35, "grad_norm": 1.1562166733665182, "learning_rate": 7.57209139595006e-07, "loss": 0.2442, "step": 5460 }, { "epoch": 0.35, "grad_norm": 0.9807165096759748, "learning_rate": 7.571205726528196e-07, "loss": 0.2519, "step": 5461 }, { "epoch": 0.35, "grad_norm": 0.19813535693980622, "learning_rate": 7.570319947413528e-07, "loss": 0.0902, "step": 5462 }, { "epoch": 0.35, "grad_norm": 0.33052386627680735, "learning_rate": 7.569434058643843e-07, "loss": 0.1445, "step": 5463 }, { "epoch": 0.35, "grad_norm": 1.195067167890204, "learning_rate": 7.568548060256937e-07, "loss": 0.1691, "step": 5464 }, { "epoch": 0.35, "grad_norm": 0.7083560570882403, "learning_rate": 7.567661952290607e-07, "loss": 0.1655, "step": 5465 }, { "epoch": 0.35, "grad_norm": 0.6414384746879106, "learning_rate": 7.566775734782656e-07, "loss": 0.2678, "step": 5466 }, { "epoch": 0.35, "grad_norm": 6.237016991388835, "learning_rate": 7.565889407770891e-07, "loss": 0.1284, "step": 5467 }, { "epoch": 0.35, "grad_norm": 1.1814716207331748, "learning_rate": 7.565002971293127e-07, "loss": 0.3381, "step": 5468 }, { "epoch": 0.35, "grad_norm": 1.222609216444924, "learning_rate": 7.564116425387181e-07, "loss": 0.1326, "step": 5469 }, { "epoch": 0.35, "grad_norm": 0.3834778682885506, "learning_rate": 7.563229770090873e-07, "loss": 0.4125, "step": 5470 }, { "epoch": 0.35, "grad_norm": 0.8188635420752655, "learning_rate": 7.562343005442031e-07, "loss": 0.0042, "step": 5471 }, { "epoch": 0.35, "grad_norm": 0.3317451274996792, "learning_rate": 7.561456131478486e-07, "loss": 0.1304, "step": 5472 }, { "epoch": 0.35, "grad_norm": 0.7369737169901256, "learning_rate": 7.56056914823807e-07, "loss": 0.2119, "step": 5473 }, { "epoch": 0.35, "grad_norm": 1.7048416421697092, "learning_rate": 7.55968205575863e-07, "loss": 0.2323, "step": 5474 }, { "epoch": 0.35, "grad_norm": 0.8939613133381339, "learning_rate": 7.558794854078006e-07, "loss": 0.1165, "step": 5475 }, { "epoch": 0.35, "grad_norm": 0.9367642014346794, "learning_rate": 7.55790754323405e-07, "loss": 0.2342, "step": 5476 }, { "epoch": 0.35, "grad_norm": 0.8430705717483711, "learning_rate": 7.557020123264615e-07, "loss": 0.2196, "step": 5477 }, { "epoch": 0.35, "grad_norm": 0.5027771057273157, "learning_rate": 7.556132594207564e-07, "loss": 0.0873, "step": 5478 }, { "epoch": 0.35, "grad_norm": 0.45055304957519526, "learning_rate": 7.555244956100757e-07, "loss": 0.1712, "step": 5479 }, { "epoch": 0.35, "grad_norm": 0.5384406883428974, "learning_rate": 7.554357208982063e-07, "loss": 0.2173, "step": 5480 }, { "epoch": 0.35, "grad_norm": 0.5123732613510469, "learning_rate": 7.553469352889355e-07, "loss": 0.2544, "step": 5481 }, { "epoch": 0.35, "grad_norm": 0.6622802529235681, "learning_rate": 7.552581387860513e-07, "loss": 0.172, "step": 5482 }, { "epoch": 0.35, "grad_norm": 1.246609448365193, "learning_rate": 7.551693313933416e-07, "loss": 0.3406, "step": 5483 }, { "epoch": 0.35, "grad_norm": 0.6176914522877467, "learning_rate": 7.550805131145954e-07, "loss": 0.2154, "step": 5484 }, { "epoch": 0.35, "grad_norm": 0.5857405908457256, "learning_rate": 7.549916839536017e-07, "loss": 0.1204, "step": 5485 }, { "epoch": 0.35, "grad_norm": 0.9430934698659614, "learning_rate": 7.549028439141502e-07, "loss": 0.1439, "step": 5486 }, { "epoch": 0.35, "grad_norm": 0.5069939926317235, "learning_rate": 7.548139930000308e-07, "loss": 0.132, "step": 5487 }, { "epoch": 0.35, "grad_norm": 0.9469093464280597, "learning_rate": 7.547251312150344e-07, "loss": 0.1164, "step": 5488 }, { "epoch": 0.35, "grad_norm": 0.38428685088550063, "learning_rate": 7.546362585629517e-07, "loss": 0.1245, "step": 5489 }, { "epoch": 0.35, "grad_norm": 0.4850082210504356, "learning_rate": 7.545473750475744e-07, "loss": 0.0876, "step": 5490 }, { "epoch": 0.35, "grad_norm": 0.814698988719398, "learning_rate": 7.544584806726944e-07, "loss": 0.2527, "step": 5491 }, { "epoch": 0.35, "grad_norm": 0.8193787101128907, "learning_rate": 7.54369575442104e-07, "loss": 0.0961, "step": 5492 }, { "epoch": 0.35, "grad_norm": 0.6859605299798347, "learning_rate": 7.542806593595961e-07, "loss": 0.2168, "step": 5493 }, { "epoch": 0.35, "grad_norm": 0.46656504276993166, "learning_rate": 7.541917324289644e-07, "loss": 0.0417, "step": 5494 }, { "epoch": 0.35, "grad_norm": 0.6236552184627319, "learning_rate": 7.541027946540022e-07, "loss": 0.1295, "step": 5495 }, { "epoch": 0.35, "grad_norm": 0.8537973831497453, "learning_rate": 7.540138460385039e-07, "loss": 0.1608, "step": 5496 }, { "epoch": 0.35, "grad_norm": 1.2793862954463233, "learning_rate": 7.539248865862644e-07, "loss": 0.1617, "step": 5497 }, { "epoch": 0.35, "grad_norm": 0.6398392521187805, "learning_rate": 7.538359163010789e-07, "loss": 0.1793, "step": 5498 }, { "epoch": 0.35, "grad_norm": 0.4287984261789411, "learning_rate": 7.537469351867429e-07, "loss": 0.0101, "step": 5499 }, { "epoch": 0.35, "grad_norm": 1.6166844168596135, "learning_rate": 7.536579432470525e-07, "loss": 0.3597, "step": 5500 }, { "epoch": 0.35, "grad_norm": 0.4182742071772365, "learning_rate": 7.535689404858041e-07, "loss": 0.2603, "step": 5501 }, { "epoch": 0.35, "grad_norm": 0.3854342258052628, "learning_rate": 7.534799269067951e-07, "loss": 0.1638, "step": 5502 }, { "epoch": 0.35, "grad_norm": 0.8940115750825707, "learning_rate": 7.53390902513823e-07, "loss": 0.2861, "step": 5503 }, { "epoch": 0.35, "grad_norm": 4.30640763425162, "learning_rate": 7.533018673106855e-07, "loss": 0.2263, "step": 5504 }, { "epoch": 0.35, "grad_norm": 0.7424977747555532, "learning_rate": 7.532128213011813e-07, "loss": 0.0734, "step": 5505 }, { "epoch": 0.35, "grad_norm": 1.7383830668521845, "learning_rate": 7.531237644891089e-07, "loss": 0.3017, "step": 5506 }, { "epoch": 0.35, "grad_norm": 1.6425962515759647, "learning_rate": 7.530346968782679e-07, "loss": 0.1868, "step": 5507 }, { "epoch": 0.35, "grad_norm": 0.8891034027526099, "learning_rate": 7.529456184724582e-07, "loss": 0.2093, "step": 5508 }, { "epoch": 0.35, "grad_norm": 0.6308586312027443, "learning_rate": 7.528565292754798e-07, "loss": 0.2022, "step": 5509 }, { "epoch": 0.35, "grad_norm": 1.2966416101669163, "learning_rate": 7.527674292911337e-07, "loss": 0.346, "step": 5510 }, { "epoch": 0.35, "grad_norm": 4.565257841276505, "learning_rate": 7.526783185232207e-07, "loss": 0.2415, "step": 5511 }, { "epoch": 0.35, "grad_norm": 0.6378618019879686, "learning_rate": 7.525891969755429e-07, "loss": 0.2266, "step": 5512 }, { "epoch": 0.35, "grad_norm": 0.7370565367478511, "learning_rate": 7.525000646519022e-07, "loss": 0.0854, "step": 5513 }, { "epoch": 0.35, "grad_norm": 0.13803143539431034, "learning_rate": 7.52410921556101e-07, "loss": 0.0438, "step": 5514 }, { "epoch": 0.35, "grad_norm": 0.8618364879921652, "learning_rate": 7.523217676919427e-07, "loss": 0.3591, "step": 5515 }, { "epoch": 0.35, "grad_norm": 1.3538662856414543, "learning_rate": 7.522326030632303e-07, "loss": 0.3528, "step": 5516 }, { "epoch": 0.35, "grad_norm": 0.4175646798417754, "learning_rate": 7.521434276737682e-07, "loss": 0.2288, "step": 5517 }, { "epoch": 0.35, "grad_norm": 0.7815260591885742, "learning_rate": 7.520542415273605e-07, "loss": 0.2603, "step": 5518 }, { "epoch": 0.35, "grad_norm": 2.6129913870949957, "learning_rate": 7.51965044627812e-07, "loss": 0.079, "step": 5519 }, { "epoch": 0.35, "grad_norm": 0.9206101002044337, "learning_rate": 7.518758369789284e-07, "loss": 0.0695, "step": 5520 }, { "epoch": 0.35, "grad_norm": 0.9850240890306616, "learning_rate": 7.517866185845152e-07, "loss": 0.3234, "step": 5521 }, { "epoch": 0.35, "grad_norm": 0.4658518357104681, "learning_rate": 7.516973894483788e-07, "loss": 0.3014, "step": 5522 }, { "epoch": 0.35, "grad_norm": 0.5990441004132712, "learning_rate": 7.516081495743258e-07, "loss": 0.0746, "step": 5523 }, { "epoch": 0.35, "grad_norm": 0.9125143811157321, "learning_rate": 7.515188989661631e-07, "loss": 0.098, "step": 5524 }, { "epoch": 0.35, "grad_norm": 1.6742846516022356, "learning_rate": 7.514296376276988e-07, "loss": 0.1169, "step": 5525 }, { "epoch": 0.35, "grad_norm": 0.9410200416897807, "learning_rate": 7.513403655627407e-07, "loss": 0.2208, "step": 5526 }, { "epoch": 0.35, "grad_norm": 1.1831979924336038, "learning_rate": 7.512510827750973e-07, "loss": 0.336, "step": 5527 }, { "epoch": 0.35, "grad_norm": 0.6164531950287837, "learning_rate": 7.511617892685775e-07, "loss": 0.2676, "step": 5528 }, { "epoch": 0.35, "grad_norm": 1.0073977437619148, "learning_rate": 7.51072485046991e-07, "loss": 0.0981, "step": 5529 }, { "epoch": 0.35, "grad_norm": 0.5920372762894701, "learning_rate": 7.509831701141476e-07, "loss": 0.2343, "step": 5530 }, { "epoch": 0.35, "grad_norm": 2.5246433394510883, "learning_rate": 7.508938444738575e-07, "loss": 0.2047, "step": 5531 }, { "epoch": 0.35, "grad_norm": 0.41019274446527026, "learning_rate": 7.508045081299317e-07, "loss": 0.3106, "step": 5532 }, { "epoch": 0.35, "grad_norm": 0.9708985114565645, "learning_rate": 7.507151610861815e-07, "loss": 0.2222, "step": 5533 }, { "epoch": 0.35, "grad_norm": 1.4165937334867051, "learning_rate": 7.506258033464183e-07, "loss": 0.3681, "step": 5534 }, { "epoch": 0.35, "grad_norm": 3.665154380980617, "learning_rate": 7.505364349144547e-07, "loss": 0.0905, "step": 5535 }, { "epoch": 0.35, "grad_norm": 0.3393190699864382, "learning_rate": 7.504470557941032e-07, "loss": 0.1616, "step": 5536 }, { "epoch": 0.35, "grad_norm": 1.3690012066375732, "learning_rate": 7.503576659891767e-07, "loss": 0.3203, "step": 5537 }, { "epoch": 0.35, "grad_norm": 0.8934533090948807, "learning_rate": 7.502682655034889e-07, "loss": 0.2346, "step": 5538 }, { "epoch": 0.35, "grad_norm": 0.39530251328778704, "learning_rate": 7.501788543408538e-07, "loss": 0.2292, "step": 5539 }, { "epoch": 0.35, "grad_norm": 0.7845545391057316, "learning_rate": 7.50089432505086e-07, "loss": 0.4175, "step": 5540 }, { "epoch": 0.35, "grad_norm": 0.6618787956586863, "learning_rate": 7.5e-07, "loss": 0.3286, "step": 5541 }, { "epoch": 0.35, "grad_norm": 0.7129523944298292, "learning_rate": 7.499105568294117e-07, "loss": 0.3006, "step": 5542 }, { "epoch": 0.35, "grad_norm": 0.34117821356147626, "learning_rate": 7.498211029971364e-07, "loss": 0.0433, "step": 5543 }, { "epoch": 0.35, "grad_norm": 0.31330844388871015, "learning_rate": 7.497316385069907e-07, "loss": 0.0605, "step": 5544 }, { "epoch": 0.35, "grad_norm": 0.38548290144148534, "learning_rate": 7.496421633627914e-07, "loss": 0.2189, "step": 5545 }, { "epoch": 0.35, "grad_norm": 1.0065995220738866, "learning_rate": 7.495526775683555e-07, "loss": 0.2171, "step": 5546 }, { "epoch": 0.35, "grad_norm": 0.5711721107906526, "learning_rate": 7.494631811275007e-07, "loss": 0.1296, "step": 5547 }, { "epoch": 0.35, "grad_norm": 1.3489269810369782, "learning_rate": 7.493736740440451e-07, "loss": 0.2914, "step": 5548 }, { "epoch": 0.35, "grad_norm": 0.951365099114088, "learning_rate": 7.492841563218073e-07, "loss": 0.3966, "step": 5549 }, { "epoch": 0.35, "grad_norm": 3.8814859294325017, "learning_rate": 7.491946279646063e-07, "loss": 0.2021, "step": 5550 }, { "epoch": 0.35, "grad_norm": 0.7517411318173365, "learning_rate": 7.491050889762615e-07, "loss": 0.0111, "step": 5551 }, { "epoch": 0.35, "grad_norm": 2.061138678911251, "learning_rate": 7.490155393605928e-07, "loss": 0.1306, "step": 5552 }, { "epoch": 0.35, "grad_norm": 1.0569228540036513, "learning_rate": 7.489259791214207e-07, "loss": 0.1079, "step": 5553 }, { "epoch": 0.35, "grad_norm": 0.9278994889987051, "learning_rate": 7.488364082625658e-07, "loss": 0.3206, "step": 5554 }, { "epoch": 0.35, "grad_norm": 0.8308183209066643, "learning_rate": 7.487468267878496e-07, "loss": 0.3183, "step": 5555 }, { "epoch": 0.35, "grad_norm": 1.509597860789319, "learning_rate": 7.486572347010936e-07, "loss": 0.2289, "step": 5556 }, { "epoch": 0.35, "grad_norm": 1.1048010166411033, "learning_rate": 7.485676320061203e-07, "loss": 0.214, "step": 5557 }, { "epoch": 0.35, "grad_norm": 0.39917487915009037, "learning_rate": 7.48478018706752e-07, "loss": 0.1675, "step": 5558 }, { "epoch": 0.35, "grad_norm": 0.9550023711294141, "learning_rate": 7.48388394806812e-07, "loss": 0.2732, "step": 5559 }, { "epoch": 0.35, "grad_norm": 0.29584155540136137, "learning_rate": 7.482987603101236e-07, "loss": 0.0789, "step": 5560 }, { "epoch": 0.35, "grad_norm": 0.6734566651477544, "learning_rate": 7.482091152205111e-07, "loss": 0.1139, "step": 5561 }, { "epoch": 0.35, "grad_norm": 0.2411371701448759, "learning_rate": 7.481194595417987e-07, "loss": 0.0923, "step": 5562 }, { "epoch": 0.35, "grad_norm": 0.5948113824946445, "learning_rate": 7.480297932778115e-07, "loss": 0.3879, "step": 5563 }, { "epoch": 0.35, "grad_norm": 1.2976684576031656, "learning_rate": 7.479401164323744e-07, "loss": 0.0527, "step": 5564 }, { "epoch": 0.35, "grad_norm": 0.6931557936213576, "learning_rate": 7.478504290093137e-07, "loss": 0.3137, "step": 5565 }, { "epoch": 0.35, "grad_norm": 1.93424600996979, "learning_rate": 7.477607310124556e-07, "loss": 0.0462, "step": 5566 }, { "epoch": 0.36, "grad_norm": 0.7233646757544797, "learning_rate": 7.476710224456267e-07, "loss": 0.2318, "step": 5567 }, { "epoch": 0.36, "grad_norm": 1.7154052090167797, "learning_rate": 7.475813033126539e-07, "loss": 0.2672, "step": 5568 }, { "epoch": 0.36, "grad_norm": 0.36258857569078284, "learning_rate": 7.47491573617365e-07, "loss": 0.153, "step": 5569 }, { "epoch": 0.36, "grad_norm": 1.030209076820536, "learning_rate": 7.474018333635881e-07, "loss": 0.2758, "step": 5570 }, { "epoch": 0.36, "grad_norm": 0.9478448659022397, "learning_rate": 7.473120825551516e-07, "loss": 0.2027, "step": 5571 }, { "epoch": 0.36, "grad_norm": 1.7993747337189498, "learning_rate": 7.472223211958845e-07, "loss": 0.0878, "step": 5572 }, { "epoch": 0.36, "grad_norm": 8.318048620247762, "learning_rate": 7.471325492896163e-07, "loss": 0.2963, "step": 5573 }, { "epoch": 0.36, "grad_norm": 0.7229115735007899, "learning_rate": 7.470427668401766e-07, "loss": 0.1638, "step": 5574 }, { "epoch": 0.36, "grad_norm": 0.8616759970056704, "learning_rate": 7.469529738513959e-07, "loss": 0.3767, "step": 5575 }, { "epoch": 0.36, "grad_norm": 0.2840704025590366, "learning_rate": 7.468631703271049e-07, "loss": 0.1306, "step": 5576 }, { "epoch": 0.36, "grad_norm": 0.24844192009676036, "learning_rate": 7.467733562711349e-07, "loss": 0.0068, "step": 5577 }, { "epoch": 0.36, "grad_norm": 0.7622158886912915, "learning_rate": 7.466835316873173e-07, "loss": 0.1614, "step": 5578 }, { "epoch": 0.36, "grad_norm": 0.5074933879665838, "learning_rate": 7.465936965794844e-07, "loss": 0.0773, "step": 5579 }, { "epoch": 0.36, "grad_norm": 1.2259056590904156, "learning_rate": 7.465038509514687e-07, "loss": 0.3512, "step": 5580 }, { "epoch": 0.36, "grad_norm": 1.346760402131009, "learning_rate": 7.464139948071032e-07, "loss": 0.1872, "step": 5581 }, { "epoch": 0.36, "grad_norm": 0.4691340629334052, "learning_rate": 7.463241281502213e-07, "loss": 0.2107, "step": 5582 }, { "epoch": 0.36, "grad_norm": 0.627102704587824, "learning_rate": 7.462342509846569e-07, "loss": 0.1586, "step": 5583 }, { "epoch": 0.36, "grad_norm": 0.2568205072347259, "learning_rate": 7.461443633142445e-07, "loss": 0.0881, "step": 5584 }, { "epoch": 0.36, "grad_norm": 0.4491214798196706, "learning_rate": 7.460544651428186e-07, "loss": 0.3717, "step": 5585 }, { "epoch": 0.36, "grad_norm": 1.5255583347139716, "learning_rate": 7.459645564742147e-07, "loss": 0.192, "step": 5586 }, { "epoch": 0.36, "grad_norm": 0.7139963882199662, "learning_rate": 7.458746373122682e-07, "loss": 0.2344, "step": 5587 }, { "epoch": 0.36, "grad_norm": 0.8754339092552733, "learning_rate": 7.457847076608154e-07, "loss": 0.2091, "step": 5588 }, { "epoch": 0.36, "grad_norm": 3.737726411806768, "learning_rate": 7.456947675236931e-07, "loss": 0.227, "step": 5589 }, { "epoch": 0.36, "grad_norm": 1.943797248690084, "learning_rate": 7.45604816904738e-07, "loss": 0.4343, "step": 5590 }, { "epoch": 0.36, "grad_norm": 1.5659900317588396, "learning_rate": 7.455148558077875e-07, "loss": 0.107, "step": 5591 }, { "epoch": 0.36, "grad_norm": 4.007616680055804, "learning_rate": 7.454248842366799e-07, "loss": 0.1941, "step": 5592 }, { "epoch": 0.36, "grad_norm": 0.41591064194367167, "learning_rate": 7.453349021952533e-07, "loss": 0.0516, "step": 5593 }, { "epoch": 0.36, "grad_norm": 1.3143605152013824, "learning_rate": 7.452449096873467e-07, "loss": 0.0915, "step": 5594 }, { "epoch": 0.36, "grad_norm": 0.8047655118185578, "learning_rate": 7.451549067167993e-07, "loss": 0.179, "step": 5595 }, { "epoch": 0.36, "grad_norm": 1.744229908964181, "learning_rate": 7.450648932874506e-07, "loss": 0.0828, "step": 5596 }, { "epoch": 0.36, "grad_norm": 0.6446813391299382, "learning_rate": 7.449748694031411e-07, "loss": 0.1499, "step": 5597 }, { "epoch": 0.36, "grad_norm": 0.9662495016901049, "learning_rate": 7.44884835067711e-07, "loss": 0.3574, "step": 5598 }, { "epoch": 0.36, "grad_norm": 0.7946516559915121, "learning_rate": 7.447947902850015e-07, "loss": 0.3775, "step": 5599 }, { "epoch": 0.36, "grad_norm": 0.6918885825520503, "learning_rate": 7.447047350588542e-07, "loss": 0.2905, "step": 5600 }, { "epoch": 0.36, "grad_norm": 0.9162217735464907, "learning_rate": 7.44614669393111e-07, "loss": 0.2766, "step": 5601 }, { "epoch": 0.36, "grad_norm": 0.5856221977423952, "learning_rate": 7.445245932916145e-07, "loss": 0.1782, "step": 5602 }, { "epoch": 0.36, "grad_norm": 1.130971782366612, "learning_rate": 7.44434506758207e-07, "loss": 0.3936, "step": 5603 }, { "epoch": 0.36, "grad_norm": 0.49956457392869424, "learning_rate": 7.443444097967322e-07, "loss": 0.1684, "step": 5604 }, { "epoch": 0.36, "grad_norm": 5.442124914605853, "learning_rate": 7.442543024110336e-07, "loss": 0.1308, "step": 5605 }, { "epoch": 0.36, "grad_norm": 1.1012306541307841, "learning_rate": 7.441641846049556e-07, "loss": 0.157, "step": 5606 }, { "epoch": 0.36, "grad_norm": 0.8128936184046678, "learning_rate": 7.440740563823424e-07, "loss": 0.2805, "step": 5607 }, { "epoch": 0.36, "grad_norm": 0.0958500784278856, "learning_rate": 7.439839177470395e-07, "loss": 0.0014, "step": 5608 }, { "epoch": 0.36, "grad_norm": 1.0622095849821385, "learning_rate": 7.438937687028922e-07, "loss": 0.0322, "step": 5609 }, { "epoch": 0.36, "grad_norm": 0.82240783570984, "learning_rate": 7.438036092537464e-07, "loss": 0.1379, "step": 5610 }, { "epoch": 0.36, "grad_norm": 1.0064401223486799, "learning_rate": 7.437134394034486e-07, "loss": 0.271, "step": 5611 }, { "epoch": 0.36, "grad_norm": 0.5824506377365629, "learning_rate": 7.436232591558453e-07, "loss": 0.1038, "step": 5612 }, { "epoch": 0.36, "grad_norm": 0.6278024259582801, "learning_rate": 7.435330685147842e-07, "loss": 0.3068, "step": 5613 }, { "epoch": 0.36, "grad_norm": 7.263689566913914, "learning_rate": 7.434428674841129e-07, "loss": 0.4099, "step": 5614 }, { "epoch": 0.36, "grad_norm": 0.536778118306459, "learning_rate": 7.433526560676795e-07, "loss": 0.2519, "step": 5615 }, { "epoch": 0.36, "grad_norm": 0.22056175117818586, "learning_rate": 7.432624342693325e-07, "loss": 0.0012, "step": 5616 }, { "epoch": 0.36, "grad_norm": 1.0107420371992206, "learning_rate": 7.431722020929209e-07, "loss": 0.3657, "step": 5617 }, { "epoch": 0.36, "grad_norm": 0.38133028570314137, "learning_rate": 7.430819595422944e-07, "loss": 0.0995, "step": 5618 }, { "epoch": 0.36, "grad_norm": 0.3274933073862944, "learning_rate": 7.429917066213029e-07, "loss": 0.1068, "step": 5619 }, { "epoch": 0.36, "grad_norm": 0.7391526671153332, "learning_rate": 7.429014433337968e-07, "loss": 0.2051, "step": 5620 }, { "epoch": 0.36, "grad_norm": 0.6382017648543542, "learning_rate": 7.428111696836268e-07, "loss": 0.0841, "step": 5621 }, { "epoch": 0.36, "grad_norm": 0.5479588869795209, "learning_rate": 7.427208856746443e-07, "loss": 0.0117, "step": 5622 }, { "epoch": 0.36, "grad_norm": 1.4681924472219252, "learning_rate": 7.426305913107007e-07, "loss": 0.2606, "step": 5623 }, { "epoch": 0.36, "grad_norm": 0.4349478615381459, "learning_rate": 7.425402865956484e-07, "loss": 0.2065, "step": 5624 }, { "epoch": 0.36, "grad_norm": 0.48133390750990807, "learning_rate": 7.424499715333398e-07, "loss": 0.2311, "step": 5625 }, { "epoch": 0.36, "grad_norm": 0.43393289337501645, "learning_rate": 7.42359646127628e-07, "loss": 0.0131, "step": 5626 }, { "epoch": 0.36, "grad_norm": 0.9901231319832863, "learning_rate": 7.422693103823667e-07, "loss": 0.3646, "step": 5627 }, { "epoch": 0.36, "grad_norm": 0.8897654773571841, "learning_rate": 7.421789643014095e-07, "loss": 0.2595, "step": 5628 }, { "epoch": 0.36, "grad_norm": 0.08700536088908316, "learning_rate": 7.420886078886109e-07, "loss": 0.0035, "step": 5629 }, { "epoch": 0.36, "grad_norm": 0.5091640443719467, "learning_rate": 7.419982411478255e-07, "loss": 0.2817, "step": 5630 }, { "epoch": 0.36, "grad_norm": 0.6308944245010688, "learning_rate": 7.419078640829087e-07, "loss": 0.3434, "step": 5631 }, { "epoch": 0.36, "grad_norm": 1.325752180559273, "learning_rate": 7.418174766977161e-07, "loss": 0.0618, "step": 5632 }, { "epoch": 0.36, "grad_norm": 0.7016784882125594, "learning_rate": 7.417270789961039e-07, "loss": 0.2734, "step": 5633 }, { "epoch": 0.36, "grad_norm": 0.7981139381820834, "learning_rate": 7.416366709819286e-07, "loss": 0.2542, "step": 5634 }, { "epoch": 0.36, "grad_norm": 0.570908425410927, "learning_rate": 7.415462526590471e-07, "loss": 0.315, "step": 5635 }, { "epoch": 0.36, "grad_norm": 0.6039784607978905, "learning_rate": 7.414558240313169e-07, "loss": 0.0198, "step": 5636 }, { "epoch": 0.36, "grad_norm": 0.8918662710400138, "learning_rate": 7.413653851025958e-07, "loss": 0.0737, "step": 5637 }, { "epoch": 0.36, "grad_norm": 0.6018072937725762, "learning_rate": 7.412749358767422e-07, "loss": 0.1989, "step": 5638 }, { "epoch": 0.36, "grad_norm": 0.5927248964447291, "learning_rate": 7.41184476357615e-07, "loss": 0.1069, "step": 5639 }, { "epoch": 0.36, "grad_norm": 0.8822401615052278, "learning_rate": 7.410940065490731e-07, "loss": 0.1038, "step": 5640 }, { "epoch": 0.36, "grad_norm": 0.9121659841341959, "learning_rate": 7.410035264549761e-07, "loss": 0.1719, "step": 5641 }, { "epoch": 0.36, "grad_norm": 1.2428128379739538, "learning_rate": 7.409130360791842e-07, "loss": 0.2085, "step": 5642 }, { "epoch": 0.36, "grad_norm": 1.9114363804896441, "learning_rate": 7.408225354255579e-07, "loss": 0.2893, "step": 5643 }, { "epoch": 0.36, "grad_norm": 0.56843863463519, "learning_rate": 7.407320244979581e-07, "loss": 0.1195, "step": 5644 }, { "epoch": 0.36, "grad_norm": 0.5664476519349504, "learning_rate": 7.406415033002463e-07, "loss": 0.1656, "step": 5645 }, { "epoch": 0.36, "grad_norm": 0.8187165423109634, "learning_rate": 7.405509718362841e-07, "loss": 0.0665, "step": 5646 }, { "epoch": 0.36, "grad_norm": 0.7366153668689416, "learning_rate": 7.404604301099339e-07, "loss": 0.2392, "step": 5647 }, { "epoch": 0.36, "grad_norm": 0.8249155852385768, "learning_rate": 7.403698781250586e-07, "loss": 0.4827, "step": 5648 }, { "epoch": 0.36, "grad_norm": 0.18021192622080656, "learning_rate": 7.402793158855209e-07, "loss": 0.0045, "step": 5649 }, { "epoch": 0.36, "grad_norm": 0.6927277860411692, "learning_rate": 7.401887433951847e-07, "loss": 0.1217, "step": 5650 }, { "epoch": 0.36, "grad_norm": 0.3272280206798197, "learning_rate": 7.400981606579138e-07, "loss": 0.0934, "step": 5651 }, { "epoch": 0.36, "grad_norm": 0.2544410262624939, "learning_rate": 7.400075676775724e-07, "loss": 0.1007, "step": 5652 }, { "epoch": 0.36, "grad_norm": 2.2623609796207327, "learning_rate": 7.39916964458026e-07, "loss": 0.2208, "step": 5653 }, { "epoch": 0.36, "grad_norm": 1.3304452078577227, "learning_rate": 7.398263510031395e-07, "loss": 0.2133, "step": 5654 }, { "epoch": 0.36, "grad_norm": 0.6820556533519123, "learning_rate": 7.397357273167788e-07, "loss": 0.2927, "step": 5655 }, { "epoch": 0.36, "grad_norm": 3.988366638314936, "learning_rate": 7.396450934028101e-07, "loss": 0.2815, "step": 5656 }, { "epoch": 0.36, "grad_norm": 2.792737750103525, "learning_rate": 7.395544492650999e-07, "loss": 0.077, "step": 5657 }, { "epoch": 0.36, "grad_norm": 1.1168716330631443, "learning_rate": 7.394637949075154e-07, "loss": 0.3105, "step": 5658 }, { "epoch": 0.36, "grad_norm": 0.6715781478947498, "learning_rate": 7.393731303339239e-07, "loss": 0.2448, "step": 5659 }, { "epoch": 0.36, "grad_norm": 1.4642124289382994, "learning_rate": 7.392824555481935e-07, "loss": 0.0388, "step": 5660 }, { "epoch": 0.36, "grad_norm": 0.3997446258465748, "learning_rate": 7.391917705541925e-07, "loss": 0.1816, "step": 5661 }, { "epoch": 0.36, "grad_norm": 3.179714691245469, "learning_rate": 7.391010753557898e-07, "loss": 0.1698, "step": 5662 }, { "epoch": 0.36, "grad_norm": 5.5078712312561215, "learning_rate": 7.390103699568546e-07, "loss": 0.0489, "step": 5663 }, { "epoch": 0.36, "grad_norm": 0.8097312463539856, "learning_rate": 7.389196543612566e-07, "loss": 0.2355, "step": 5664 }, { "epoch": 0.36, "grad_norm": 0.612219173660267, "learning_rate": 7.388289285728657e-07, "loss": 0.1088, "step": 5665 }, { "epoch": 0.36, "grad_norm": 0.4675509341118895, "learning_rate": 7.387381925955527e-07, "loss": 0.1119, "step": 5666 }, { "epoch": 0.36, "grad_norm": 0.36521656834890354, "learning_rate": 7.386474464331884e-07, "loss": 0.0157, "step": 5667 }, { "epoch": 0.36, "grad_norm": 0.6613329380880284, "learning_rate": 7.385566900896444e-07, "loss": 0.4098, "step": 5668 }, { "epoch": 0.36, "grad_norm": 1.308144804980323, "learning_rate": 7.384659235687923e-07, "loss": 0.1153, "step": 5669 }, { "epoch": 0.36, "grad_norm": 0.2843589636687059, "learning_rate": 7.383751468745045e-07, "loss": 0.2318, "step": 5670 }, { "epoch": 0.36, "grad_norm": 1.9875939890431915, "learning_rate": 7.382843600106539e-07, "loss": 0.3433, "step": 5671 }, { "epoch": 0.36, "grad_norm": 0.4268691867817186, "learning_rate": 7.381935629811133e-07, "loss": 0.1946, "step": 5672 }, { "epoch": 0.36, "grad_norm": 0.5107892003036639, "learning_rate": 7.381027557897567e-07, "loss": 0.0937, "step": 5673 }, { "epoch": 0.36, "grad_norm": 3.27515494830063, "learning_rate": 7.380119384404578e-07, "loss": 0.0747, "step": 5674 }, { "epoch": 0.36, "grad_norm": 0.6790032271875316, "learning_rate": 7.379211109370911e-07, "loss": 0.2643, "step": 5675 }, { "epoch": 0.36, "grad_norm": 0.26761219533808595, "learning_rate": 7.378302732835316e-07, "loss": 0.1076, "step": 5676 }, { "epoch": 0.36, "grad_norm": 0.500631776750934, "learning_rate": 7.377394254836547e-07, "loss": 0.1016, "step": 5677 }, { "epoch": 0.36, "grad_norm": 0.5614007688444749, "learning_rate": 7.376485675413356e-07, "loss": 0.1787, "step": 5678 }, { "epoch": 0.36, "grad_norm": 0.3439127880289776, "learning_rate": 7.375576994604511e-07, "loss": 0.0058, "step": 5679 }, { "epoch": 0.36, "grad_norm": 1.158611270169246, "learning_rate": 7.374668212448776e-07, "loss": 0.2079, "step": 5680 }, { "epoch": 0.36, "grad_norm": 0.9345290844976936, "learning_rate": 7.373759328984921e-07, "loss": 0.371, "step": 5681 }, { "epoch": 0.36, "grad_norm": 0.6145023831308265, "learning_rate": 7.37285034425172e-07, "loss": 0.1397, "step": 5682 }, { "epoch": 0.36, "grad_norm": 1.2459527759727258, "learning_rate": 7.371941258287955e-07, "loss": 0.0973, "step": 5683 }, { "epoch": 0.36, "grad_norm": 0.9089091616003668, "learning_rate": 7.371032071132408e-07, "loss": 0.2132, "step": 5684 }, { "epoch": 0.36, "grad_norm": 1.1794201041196493, "learning_rate": 7.370122782823866e-07, "loss": 0.3221, "step": 5685 }, { "epoch": 0.36, "grad_norm": 0.886339245117712, "learning_rate": 7.36921339340112e-07, "loss": 0.0274, "step": 5686 }, { "epoch": 0.36, "grad_norm": 0.6004655532216744, "learning_rate": 7.368303902902969e-07, "loss": 0.2572, "step": 5687 }, { "epoch": 0.36, "grad_norm": 1.5554489179893731, "learning_rate": 7.367394311368212e-07, "loss": 0.1274, "step": 5688 }, { "epoch": 0.36, "grad_norm": 1.8241326705184715, "learning_rate": 7.366484618835656e-07, "loss": 0.115, "step": 5689 }, { "epoch": 0.36, "grad_norm": 5.236697076427162, "learning_rate": 7.36557482534411e-07, "loss": 0.0819, "step": 5690 }, { "epoch": 0.36, "grad_norm": 1.0167983250243307, "learning_rate": 7.364664930932384e-07, "loss": 0.0316, "step": 5691 }, { "epoch": 0.36, "grad_norm": 0.6488208347208061, "learning_rate": 7.3637549356393e-07, "loss": 0.3033, "step": 5692 }, { "epoch": 0.36, "grad_norm": 0.6161355625411944, "learning_rate": 7.362844839503677e-07, "loss": 0.2882, "step": 5693 }, { "epoch": 0.36, "grad_norm": 1.2956359600938927, "learning_rate": 7.361934642564345e-07, "loss": 0.5045, "step": 5694 }, { "epoch": 0.36, "grad_norm": 0.3618528380705309, "learning_rate": 7.361024344860132e-07, "loss": 0.332, "step": 5695 }, { "epoch": 0.36, "grad_norm": 1.1060362685573237, "learning_rate": 7.360113946429873e-07, "loss": 0.1582, "step": 5696 }, { "epoch": 0.36, "grad_norm": 0.5879530344047207, "learning_rate": 7.35920344731241e-07, "loss": 0.1334, "step": 5697 }, { "epoch": 0.36, "grad_norm": 3.851411422399082, "learning_rate": 7.358292847546585e-07, "loss": 0.1131, "step": 5698 }, { "epoch": 0.36, "grad_norm": 0.5272105328609933, "learning_rate": 7.357382147171247e-07, "loss": 0.2389, "step": 5699 }, { "epoch": 0.36, "grad_norm": 0.5566793042569607, "learning_rate": 7.356471346225248e-07, "loss": 0.0871, "step": 5700 }, { "epoch": 0.36, "grad_norm": 0.5260969300371358, "learning_rate": 7.355560444747444e-07, "loss": 0.1068, "step": 5701 }, { "epoch": 0.36, "grad_norm": 0.43081534978532504, "learning_rate": 7.354649442776696e-07, "loss": 0.0796, "step": 5702 }, { "epoch": 0.36, "grad_norm": 0.9839624499303066, "learning_rate": 7.35373834035187e-07, "loss": 0.1926, "step": 5703 }, { "epoch": 0.36, "grad_norm": 0.4249842854795375, "learning_rate": 7.352827137511835e-07, "loss": 0.048, "step": 5704 }, { "epoch": 0.36, "grad_norm": 0.4826094211081446, "learning_rate": 7.351915834295462e-07, "loss": 0.2404, "step": 5705 }, { "epoch": 0.36, "grad_norm": 0.46693183919675874, "learning_rate": 7.351004430741633e-07, "loss": 0.1472, "step": 5706 }, { "epoch": 0.36, "grad_norm": 3.4642519926183546, "learning_rate": 7.350092926889229e-07, "loss": 0.2596, "step": 5707 }, { "epoch": 0.36, "grad_norm": 0.3382804651693839, "learning_rate": 7.349181322777137e-07, "loss": 0.0568, "step": 5708 }, { "epoch": 0.36, "grad_norm": 0.2676612799329265, "learning_rate": 7.348269618444247e-07, "loss": 0.0702, "step": 5709 }, { "epoch": 0.36, "grad_norm": 0.9893205741285498, "learning_rate": 7.347357813929454e-07, "loss": 0.3175, "step": 5710 }, { "epoch": 0.36, "grad_norm": 0.8570067933630025, "learning_rate": 7.346445909271658e-07, "loss": 0.2645, "step": 5711 }, { "epoch": 0.36, "grad_norm": 0.9840025305888707, "learning_rate": 7.345533904509763e-07, "loss": 0.0598, "step": 5712 }, { "epoch": 0.36, "grad_norm": 0.4118979936901152, "learning_rate": 7.344621799682675e-07, "loss": 0.1159, "step": 5713 }, { "epoch": 0.36, "grad_norm": 3.171307740778579, "learning_rate": 7.343709594829311e-07, "loss": 0.0209, "step": 5714 }, { "epoch": 0.36, "grad_norm": 0.6712994625698699, "learning_rate": 7.34279728998858e-07, "loss": 0.1095, "step": 5715 }, { "epoch": 0.36, "grad_norm": 1.0261338381513776, "learning_rate": 7.34188488519941e-07, "loss": 0.28, "step": 5716 }, { "epoch": 0.36, "grad_norm": 0.4612067449269904, "learning_rate": 7.340972380500722e-07, "loss": 0.244, "step": 5717 }, { "epoch": 0.36, "grad_norm": 1.6316223035935755, "learning_rate": 7.340059775931447e-07, "loss": 0.46, "step": 5718 }, { "epoch": 0.36, "grad_norm": 0.3100217606040487, "learning_rate": 7.339147071530518e-07, "loss": 0.1125, "step": 5719 }, { "epoch": 0.36, "grad_norm": 1.087385815884791, "learning_rate": 7.338234267336872e-07, "loss": 0.26, "step": 5720 }, { "epoch": 0.36, "grad_norm": 0.678636488255624, "learning_rate": 7.337321363389452e-07, "loss": 0.2709, "step": 5721 }, { "epoch": 0.36, "grad_norm": 0.62897584509711, "learning_rate": 7.336408359727203e-07, "loss": 0.2842, "step": 5722 }, { "epoch": 0.36, "grad_norm": 0.18081937509925167, "learning_rate": 7.335495256389077e-07, "loss": 0.0797, "step": 5723 }, { "epoch": 0.37, "grad_norm": 0.6151101410382191, "learning_rate": 7.334582053414029e-07, "loss": 0.1554, "step": 5724 }, { "epoch": 0.37, "grad_norm": 0.6673374149633937, "learning_rate": 7.333668750841016e-07, "loss": 0.1558, "step": 5725 }, { "epoch": 0.37, "grad_norm": 3.1063472558336795, "learning_rate": 7.332755348709005e-07, "loss": 0.1048, "step": 5726 }, { "epoch": 0.37, "grad_norm": 1.206564300336258, "learning_rate": 7.331841847056961e-07, "loss": 0.0903, "step": 5727 }, { "epoch": 0.37, "grad_norm": 0.5047740906055621, "learning_rate": 7.330928245923856e-07, "loss": 0.0925, "step": 5728 }, { "epoch": 0.37, "grad_norm": 1.3432459942569583, "learning_rate": 7.330014545348665e-07, "loss": 0.058, "step": 5729 }, { "epoch": 0.37, "grad_norm": 0.5192291835658449, "learning_rate": 7.329100745370371e-07, "loss": 0.1255, "step": 5730 }, { "epoch": 0.37, "grad_norm": 0.37345549224087526, "learning_rate": 7.328186846027958e-07, "loss": 0.1841, "step": 5731 }, { "epoch": 0.37, "grad_norm": 0.7453052128979122, "learning_rate": 7.327272847360411e-07, "loss": 0.3567, "step": 5732 }, { "epoch": 0.37, "grad_norm": 1.0458302309578975, "learning_rate": 7.326358749406729e-07, "loss": 0.2815, "step": 5733 }, { "epoch": 0.37, "grad_norm": 0.9210884382714356, "learning_rate": 7.325444552205903e-07, "loss": 0.1767, "step": 5734 }, { "epoch": 0.37, "grad_norm": 0.4306498508361315, "learning_rate": 7.32453025579694e-07, "loss": 0.2736, "step": 5735 }, { "epoch": 0.37, "grad_norm": 0.6874778263152205, "learning_rate": 7.323615860218842e-07, "loss": 0.271, "step": 5736 }, { "epoch": 0.37, "grad_norm": 4.233931078266332, "learning_rate": 7.322701365510622e-07, "loss": 0.3315, "step": 5737 }, { "epoch": 0.37, "grad_norm": 0.8323186982992874, "learning_rate": 7.321786771711291e-07, "loss": 0.3319, "step": 5738 }, { "epoch": 0.37, "grad_norm": 0.7889333812689606, "learning_rate": 7.32087207885987e-07, "loss": 0.3679, "step": 5739 }, { "epoch": 0.37, "grad_norm": 1.7308759383840684, "learning_rate": 7.31995728699538e-07, "loss": 0.3541, "step": 5740 }, { "epoch": 0.37, "grad_norm": 1.686361723910737, "learning_rate": 7.319042396156848e-07, "loss": 0.2275, "step": 5741 }, { "epoch": 0.37, "grad_norm": 0.5245958672103136, "learning_rate": 7.318127406383307e-07, "loss": 0.3172, "step": 5742 }, { "epoch": 0.37, "grad_norm": 5.226124069878523, "learning_rate": 7.317212317713789e-07, "loss": 0.1312, "step": 5743 }, { "epoch": 0.37, "grad_norm": 1.120338026666057, "learning_rate": 7.316297130187336e-07, "loss": 0.2483, "step": 5744 }, { "epoch": 0.37, "grad_norm": 0.689993816657338, "learning_rate": 7.315381843842994e-07, "loss": 0.2007, "step": 5745 }, { "epoch": 0.37, "grad_norm": 0.22364874179971245, "learning_rate": 7.314466458719805e-07, "loss": 0.0673, "step": 5746 }, { "epoch": 0.37, "grad_norm": 0.3927315160954293, "learning_rate": 7.313550974856824e-07, "loss": 0.1532, "step": 5747 }, { "epoch": 0.37, "grad_norm": 1.0515841969074804, "learning_rate": 7.312635392293108e-07, "loss": 0.3764, "step": 5748 }, { "epoch": 0.37, "grad_norm": 0.3917197689690394, "learning_rate": 7.311719711067716e-07, "loss": 0.1184, "step": 5749 }, { "epoch": 0.37, "grad_norm": 0.6463061072605664, "learning_rate": 7.310803931219717e-07, "loss": 0.0841, "step": 5750 }, { "epoch": 0.37, "grad_norm": 0.9927125562299818, "learning_rate": 7.309888052788174e-07, "loss": 0.2835, "step": 5751 }, { "epoch": 0.37, "grad_norm": 0.1886007504177757, "learning_rate": 7.308972075812165e-07, "loss": 0.0942, "step": 5752 }, { "epoch": 0.37, "grad_norm": 0.9151289201691571, "learning_rate": 7.308056000330766e-07, "loss": 0.1421, "step": 5753 }, { "epoch": 0.37, "grad_norm": 0.900235705381781, "learning_rate": 7.307139826383058e-07, "loss": 0.1209, "step": 5754 }, { "epoch": 0.37, "grad_norm": 0.5837265002658238, "learning_rate": 7.306223554008126e-07, "loss": 0.1662, "step": 5755 }, { "epoch": 0.37, "grad_norm": 2.064992461090195, "learning_rate": 7.305307183245062e-07, "loss": 0.0825, "step": 5756 }, { "epoch": 0.37, "grad_norm": 0.42029654362577756, "learning_rate": 7.304390714132958e-07, "loss": 0.1253, "step": 5757 }, { "epoch": 0.37, "grad_norm": 0.22787478007896503, "learning_rate": 7.303474146710915e-07, "loss": 0.0995, "step": 5758 }, { "epoch": 0.37, "grad_norm": 0.4476001216086352, "learning_rate": 7.302557481018034e-07, "loss": 0.1563, "step": 5759 }, { "epoch": 0.37, "grad_norm": 1.20644091302951, "learning_rate": 7.301640717093423e-07, "loss": 0.0606, "step": 5760 }, { "epoch": 0.37, "grad_norm": 0.932890582717694, "learning_rate": 7.30072385497619e-07, "loss": 0.1974, "step": 5761 }, { "epoch": 0.37, "grad_norm": 0.775157928985653, "learning_rate": 7.299806894705455e-07, "loss": 0.1684, "step": 5762 }, { "epoch": 0.37, "grad_norm": 0.5930366332677323, "learning_rate": 7.298889836320334e-07, "loss": 0.1152, "step": 5763 }, { "epoch": 0.37, "grad_norm": 1.5754587056495242, "learning_rate": 7.29797267985995e-07, "loss": 0.1185, "step": 5764 }, { "epoch": 0.37, "grad_norm": 0.4145190021337893, "learning_rate": 7.297055425363432e-07, "loss": 0.0865, "step": 5765 }, { "epoch": 0.37, "grad_norm": 4.459475753926773, "learning_rate": 7.296138072869913e-07, "loss": 0.1672, "step": 5766 }, { "epoch": 0.37, "grad_norm": 2.327574824334071, "learning_rate": 7.295220622418527e-07, "loss": 0.0507, "step": 5767 }, { "epoch": 0.37, "grad_norm": 1.345594717162949, "learning_rate": 7.294303074048415e-07, "loss": 0.0843, "step": 5768 }, { "epoch": 0.37, "grad_norm": 1.0252372310202145, "learning_rate": 7.293385427798721e-07, "loss": 0.2572, "step": 5769 }, { "epoch": 0.37, "grad_norm": 0.6216556517457695, "learning_rate": 7.292467683708596e-07, "loss": 0.2336, "step": 5770 }, { "epoch": 0.37, "grad_norm": 0.6782792319158881, "learning_rate": 7.291549841817192e-07, "loss": 0.2633, "step": 5771 }, { "epoch": 0.37, "grad_norm": 0.5688118833042158, "learning_rate": 7.290631902163664e-07, "loss": 0.0075, "step": 5772 }, { "epoch": 0.37, "grad_norm": 0.3895485852110559, "learning_rate": 7.289713864787175e-07, "loss": 0.4064, "step": 5773 }, { "epoch": 0.37, "grad_norm": 2.506150204514353, "learning_rate": 7.288795729726889e-07, "loss": 0.0723, "step": 5774 }, { "epoch": 0.37, "grad_norm": 1.0517219096754018, "learning_rate": 7.287877497021977e-07, "loss": 0.1546, "step": 5775 }, { "epoch": 0.37, "grad_norm": 0.8877197643238348, "learning_rate": 7.28695916671161e-07, "loss": 0.0975, "step": 5776 }, { "epoch": 0.37, "grad_norm": 0.6454200695293008, "learning_rate": 7.286040738834968e-07, "loss": 0.1935, "step": 5777 }, { "epoch": 0.37, "grad_norm": 0.715357209162237, "learning_rate": 7.285122213431233e-07, "loss": 0.0033, "step": 5778 }, { "epoch": 0.37, "grad_norm": 0.4877099297085567, "learning_rate": 7.28420359053959e-07, "loss": 0.1364, "step": 5779 }, { "epoch": 0.37, "grad_norm": 0.888481172298206, "learning_rate": 7.283284870199231e-07, "loss": 0.231, "step": 5780 }, { "epoch": 0.37, "grad_norm": 0.7195130961315708, "learning_rate": 7.28236605244935e-07, "loss": 0.301, "step": 5781 }, { "epoch": 0.37, "grad_norm": 0.998004839873859, "learning_rate": 7.281447137329144e-07, "loss": 0.2926, "step": 5782 }, { "epoch": 0.37, "grad_norm": 1.0589799086919884, "learning_rate": 7.280528124877817e-07, "loss": 0.0335, "step": 5783 }, { "epoch": 0.37, "grad_norm": 1.246847115705105, "learning_rate": 7.279609015134577e-07, "loss": 0.2298, "step": 5784 }, { "epoch": 0.37, "grad_norm": 0.5127667301057278, "learning_rate": 7.278689808138632e-07, "loss": 0.0162, "step": 5785 }, { "epoch": 0.37, "grad_norm": 0.5686118131564267, "learning_rate": 7.2777705039292e-07, "loss": 0.3105, "step": 5786 }, { "epoch": 0.37, "grad_norm": 1.5504845596655386, "learning_rate": 7.276851102545499e-07, "loss": 0.1512, "step": 5787 }, { "epoch": 0.37, "grad_norm": 0.3507589777277134, "learning_rate": 7.275931604026752e-07, "loss": 0.2006, "step": 5788 }, { "epoch": 0.37, "grad_norm": 0.6363302877756208, "learning_rate": 7.275012008412191e-07, "loss": 0.2754, "step": 5789 }, { "epoch": 0.37, "grad_norm": 0.7886439160855335, "learning_rate": 7.27409231574104e-07, "loss": 0.0081, "step": 5790 }, { "epoch": 0.37, "grad_norm": 0.9040161559453388, "learning_rate": 7.273172526052542e-07, "loss": 0.3219, "step": 5791 }, { "epoch": 0.37, "grad_norm": 6.417099807574797, "learning_rate": 7.272252639385935e-07, "loss": 0.3511, "step": 5792 }, { "epoch": 0.37, "grad_norm": 0.803623343303704, "learning_rate": 7.27133265578046e-07, "loss": 0.2845, "step": 5793 }, { "epoch": 0.37, "grad_norm": 12.176265351119403, "learning_rate": 7.270412575275368e-07, "loss": 0.1648, "step": 5794 }, { "epoch": 0.37, "grad_norm": 0.553487019276648, "learning_rate": 7.269492397909913e-07, "loss": 0.2341, "step": 5795 }, { "epoch": 0.37, "grad_norm": 0.20324779002863755, "learning_rate": 7.268572123723351e-07, "loss": 0.0031, "step": 5796 }, { "epoch": 0.37, "grad_norm": 0.21900193556265898, "learning_rate": 7.267651752754939e-07, "loss": 0.2145, "step": 5797 }, { "epoch": 0.37, "grad_norm": 0.8876393913576334, "learning_rate": 7.266731285043948e-07, "loss": 0.2838, "step": 5798 }, { "epoch": 0.37, "grad_norm": 6.9612015987432265, "learning_rate": 7.265810720629642e-07, "loss": 0.0532, "step": 5799 }, { "epoch": 0.37, "grad_norm": 1.6036271570510925, "learning_rate": 7.264890059551295e-07, "loss": 0.1828, "step": 5800 }, { "epoch": 0.37, "grad_norm": 0.8418700313466142, "learning_rate": 7.263969301848187e-07, "loss": 0.2084, "step": 5801 }, { "epoch": 0.37, "grad_norm": 1.3243730146921566, "learning_rate": 7.263048447559596e-07, "loss": 0.1082, "step": 5802 }, { "epoch": 0.37, "grad_norm": 0.4295623478554113, "learning_rate": 7.262127496724809e-07, "loss": 0.1655, "step": 5803 }, { "epoch": 0.37, "grad_norm": 0.42034912858230894, "learning_rate": 7.261206449383115e-07, "loss": 0.1166, "step": 5804 }, { "epoch": 0.37, "grad_norm": 0.792203867453036, "learning_rate": 7.260285305573809e-07, "loss": 0.3044, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.1834953034758922, "learning_rate": 7.259364065336188e-07, "loss": 0.2387, "step": 5806 }, { "epoch": 0.37, "grad_norm": 1.1441268848969504, "learning_rate": 7.258442728709554e-07, "loss": 0.082, "step": 5807 }, { "epoch": 0.37, "grad_norm": 7.547858405168101, "learning_rate": 7.257521295733213e-07, "loss": 0.203, "step": 5808 }, { "epoch": 0.37, "grad_norm": 0.6498259210423912, "learning_rate": 7.256599766446476e-07, "loss": 0.1696, "step": 5809 }, { "epoch": 0.37, "grad_norm": 0.589797651636558, "learning_rate": 7.255678140888657e-07, "loss": 0.203, "step": 5810 }, { "epoch": 0.37, "grad_norm": 3.295665441714581, "learning_rate": 7.254756419099073e-07, "loss": 0.0987, "step": 5811 }, { "epoch": 0.37, "grad_norm": 0.1615737150670548, "learning_rate": 7.253834601117048e-07, "loss": 0.0033, "step": 5812 }, { "epoch": 0.37, "grad_norm": 0.44912207714477587, "learning_rate": 7.252912686981907e-07, "loss": 0.1508, "step": 5813 }, { "epoch": 0.37, "grad_norm": 0.963656407308307, "learning_rate": 7.251990676732984e-07, "loss": 0.137, "step": 5814 }, { "epoch": 0.37, "grad_norm": 0.7939091592666753, "learning_rate": 7.251068570409611e-07, "loss": 0.1983, "step": 5815 }, { "epoch": 0.37, "grad_norm": 0.24188510757011006, "learning_rate": 7.250146368051126e-07, "loss": 0.0978, "step": 5816 }, { "epoch": 0.37, "grad_norm": 1.0629619612234744, "learning_rate": 7.249224069696876e-07, "loss": 0.1746, "step": 5817 }, { "epoch": 0.37, "grad_norm": 0.9571145455150913, "learning_rate": 7.248301675386204e-07, "loss": 0.18, "step": 5818 }, { "epoch": 0.37, "grad_norm": 1.0775381212771094, "learning_rate": 7.247379185158463e-07, "loss": 0.1911, "step": 5819 }, { "epoch": 0.37, "grad_norm": 3.4392557445096235, "learning_rate": 7.246456599053008e-07, "loss": 0.2503, "step": 5820 }, { "epoch": 0.37, "grad_norm": 0.9725169780983117, "learning_rate": 7.245533917109198e-07, "loss": 0.3065, "step": 5821 }, { "epoch": 0.37, "grad_norm": 5.966288576702168, "learning_rate": 7.244611139366398e-07, "loss": 0.2274, "step": 5822 }, { "epoch": 0.37, "grad_norm": 0.7998015479631919, "learning_rate": 7.243688265863974e-07, "loss": 0.318, "step": 5823 }, { "epoch": 0.37, "grad_norm": 1.1673036168895607, "learning_rate": 7.2427652966413e-07, "loss": 0.2272, "step": 5824 }, { "epoch": 0.37, "grad_norm": 1.2907306020411378, "learning_rate": 7.241842231737748e-07, "loss": 0.08, "step": 5825 }, { "epoch": 0.37, "grad_norm": 0.9708427215621421, "learning_rate": 7.2409190711927e-07, "loss": 0.3598, "step": 5826 }, { "epoch": 0.37, "grad_norm": 0.6238604424541833, "learning_rate": 7.239995815045541e-07, "loss": 0.5109, "step": 5827 }, { "epoch": 0.37, "grad_norm": 3.3065175720140885, "learning_rate": 7.239072463335657e-07, "loss": 0.1389, "step": 5828 }, { "epoch": 0.37, "grad_norm": 2.0029626689565476, "learning_rate": 7.238149016102439e-07, "loss": 0.3771, "step": 5829 }, { "epoch": 0.37, "grad_norm": 0.19797498975210448, "learning_rate": 7.237225473385286e-07, "loss": 0.0397, "step": 5830 }, { "epoch": 0.37, "grad_norm": 1.432444886934499, "learning_rate": 7.236301835223597e-07, "loss": 0.179, "step": 5831 }, { "epoch": 0.37, "grad_norm": 0.7383596119254882, "learning_rate": 7.235378101656775e-07, "loss": 0.2657, "step": 5832 }, { "epoch": 0.37, "grad_norm": 3.9716360262510553, "learning_rate": 7.234454272724231e-07, "loss": 0.0608, "step": 5833 }, { "epoch": 0.37, "grad_norm": 0.8742398751449905, "learning_rate": 7.233530348465376e-07, "loss": 0.3217, "step": 5834 }, { "epoch": 0.37, "grad_norm": 1.296015653419755, "learning_rate": 7.232606328919626e-07, "loss": 0.2541, "step": 5835 }, { "epoch": 0.37, "grad_norm": 1.0472261813186758, "learning_rate": 7.231682214126401e-07, "loss": 0.2328, "step": 5836 }, { "epoch": 0.37, "grad_norm": 0.7530299828248986, "learning_rate": 7.230758004125127e-07, "loss": 0.1346, "step": 5837 }, { "epoch": 0.37, "grad_norm": 1.4179811070545931, "learning_rate": 7.229833698955232e-07, "loss": 0.0977, "step": 5838 }, { "epoch": 0.37, "grad_norm": 0.5539109258430226, "learning_rate": 7.228909298656149e-07, "loss": 0.1684, "step": 5839 }, { "epoch": 0.37, "grad_norm": 0.685320802630804, "learning_rate": 7.227984803267315e-07, "loss": 0.1562, "step": 5840 }, { "epoch": 0.37, "grad_norm": 0.46337319748195305, "learning_rate": 7.22706021282817e-07, "loss": 0.1288, "step": 5841 }, { "epoch": 0.37, "grad_norm": 4.22438321986461, "learning_rate": 7.226135527378161e-07, "loss": 0.0241, "step": 5842 }, { "epoch": 0.37, "grad_norm": 0.6880371677332253, "learning_rate": 7.225210746956733e-07, "loss": 0.2242, "step": 5843 }, { "epoch": 0.37, "grad_norm": 0.3904455356259318, "learning_rate": 7.224285871603342e-07, "loss": 0.1122, "step": 5844 }, { "epoch": 0.37, "grad_norm": 2.80368536327891, "learning_rate": 7.223360901357445e-07, "loss": 0.0082, "step": 5845 }, { "epoch": 0.37, "grad_norm": 0.12360576823331884, "learning_rate": 7.222435836258503e-07, "loss": 0.0081, "step": 5846 }, { "epoch": 0.37, "grad_norm": 1.072400722055551, "learning_rate": 7.221510676345979e-07, "loss": 0.482, "step": 5847 }, { "epoch": 0.37, "grad_norm": 2.7497709231459546, "learning_rate": 7.220585421659344e-07, "loss": 0.2764, "step": 5848 }, { "epoch": 0.37, "grad_norm": 2.1114486730556554, "learning_rate": 7.21966007223807e-07, "loss": 0.1383, "step": 5849 }, { "epoch": 0.37, "grad_norm": 0.9980044127105586, "learning_rate": 7.218734628121638e-07, "loss": 0.1708, "step": 5850 }, { "epoch": 0.37, "grad_norm": 1.0969380583317159, "learning_rate": 7.217809089349524e-07, "loss": 0.2902, "step": 5851 }, { "epoch": 0.37, "grad_norm": 0.6849231119905266, "learning_rate": 7.216883455961218e-07, "loss": 0.0865, "step": 5852 }, { "epoch": 0.37, "grad_norm": 1.243019603258892, "learning_rate": 7.215957727996207e-07, "loss": 0.1, "step": 5853 }, { "epoch": 0.37, "grad_norm": 0.385221654833512, "learning_rate": 7.215031905493983e-07, "loss": 0.1145, "step": 5854 }, { "epoch": 0.37, "grad_norm": 0.5605233878714039, "learning_rate": 7.214105988494045e-07, "loss": 0.1167, "step": 5855 }, { "epoch": 0.37, "grad_norm": 0.6201025446143363, "learning_rate": 7.213179977035897e-07, "loss": 0.0043, "step": 5856 }, { "epoch": 0.37, "grad_norm": 0.6588082719258256, "learning_rate": 7.212253871159041e-07, "loss": 0.3062, "step": 5857 }, { "epoch": 0.37, "grad_norm": 6.448959617441987, "learning_rate": 7.211327670902988e-07, "loss": 0.2483, "step": 5858 }, { "epoch": 0.37, "grad_norm": 0.7645063728979504, "learning_rate": 7.210401376307252e-07, "loss": 0.3142, "step": 5859 }, { "epoch": 0.37, "grad_norm": 0.3454183970448466, "learning_rate": 7.209474987411346e-07, "loss": 0.0788, "step": 5860 }, { "epoch": 0.37, "grad_norm": 0.489328950945489, "learning_rate": 7.208548504254799e-07, "loss": 0.0732, "step": 5861 }, { "epoch": 0.37, "grad_norm": 1.0839647157281016, "learning_rate": 7.207621926877133e-07, "loss": 0.2387, "step": 5862 }, { "epoch": 0.37, "grad_norm": 0.6501698188666848, "learning_rate": 7.206695255317876e-07, "loss": 0.456, "step": 5863 }, { "epoch": 0.37, "grad_norm": 0.7720724698021013, "learning_rate": 7.205768489616565e-07, "loss": 0.3358, "step": 5864 }, { "epoch": 0.37, "grad_norm": 0.7459872200113052, "learning_rate": 7.204841629812734e-07, "loss": 0.2244, "step": 5865 }, { "epoch": 0.37, "grad_norm": 0.6919461914751762, "learning_rate": 7.203914675945928e-07, "loss": 0.2421, "step": 5866 }, { "epoch": 0.37, "grad_norm": 0.528182902739734, "learning_rate": 7.202987628055693e-07, "loss": 0.2035, "step": 5867 }, { "epoch": 0.37, "grad_norm": 0.46861676408355124, "learning_rate": 7.202060486181575e-07, "loss": 0.0723, "step": 5868 }, { "epoch": 0.37, "grad_norm": 2.879367632376332, "learning_rate": 7.201133250363132e-07, "loss": 0.1566, "step": 5869 }, { "epoch": 0.37, "grad_norm": 0.38307353665079696, "learning_rate": 7.200205920639918e-07, "loss": 0.0125, "step": 5870 }, { "epoch": 0.37, "grad_norm": 0.473801322198328, "learning_rate": 7.199278497051497e-07, "loss": 0.3076, "step": 5871 }, { "epoch": 0.37, "grad_norm": 0.39376928629463165, "learning_rate": 7.198350979637434e-07, "loss": 0.1229, "step": 5872 }, { "epoch": 0.37, "grad_norm": 4.502550421033535, "learning_rate": 7.197423368437299e-07, "loss": 0.3354, "step": 5873 }, { "epoch": 0.37, "grad_norm": 0.5897333491847417, "learning_rate": 7.196495663490665e-07, "loss": 0.1523, "step": 5874 }, { "epoch": 0.37, "grad_norm": 1.159565031612828, "learning_rate": 7.195567864837111e-07, "loss": 0.2187, "step": 5875 }, { "epoch": 0.37, "grad_norm": 2.502572097228924, "learning_rate": 7.194639972516218e-07, "loss": 0.2899, "step": 5876 }, { "epoch": 0.37, "grad_norm": 0.5176055018510678, "learning_rate": 7.193711986567573e-07, "loss": 0.0791, "step": 5877 }, { "epoch": 0.37, "grad_norm": 1.3059301873472071, "learning_rate": 7.192783907030765e-07, "loss": 0.1164, "step": 5878 }, { "epoch": 0.37, "grad_norm": 0.7189273682628163, "learning_rate": 7.191855733945386e-07, "loss": 0.1507, "step": 5879 }, { "epoch": 0.37, "grad_norm": 0.6557773111881142, "learning_rate": 7.190927467351037e-07, "loss": 0.3814, "step": 5880 }, { "epoch": 0.38, "grad_norm": 1.1147535581280337, "learning_rate": 7.189999107287317e-07, "loss": 0.1315, "step": 5881 }, { "epoch": 0.38, "grad_norm": 0.9928493129046877, "learning_rate": 7.189070653793833e-07, "loss": 0.0101, "step": 5882 }, { "epoch": 0.38, "grad_norm": 0.37211700710997386, "learning_rate": 7.188142106910193e-07, "loss": 0.1829, "step": 5883 }, { "epoch": 0.38, "grad_norm": 6.165946089905001, "learning_rate": 7.187213466676013e-07, "loss": 0.3081, "step": 5884 }, { "epoch": 0.38, "grad_norm": 0.4124492140865974, "learning_rate": 7.18628473313091e-07, "loss": 0.0811, "step": 5885 }, { "epoch": 0.38, "grad_norm": 0.8954038009244101, "learning_rate": 7.185355906314505e-07, "loss": 0.1387, "step": 5886 }, { "epoch": 0.38, "grad_norm": 1.3092639590667032, "learning_rate": 7.184426986266423e-07, "loss": 0.2749, "step": 5887 }, { "epoch": 0.38, "grad_norm": 0.6949233311868814, "learning_rate": 7.183497973026296e-07, "loss": 0.0865, "step": 5888 }, { "epoch": 0.38, "grad_norm": 3.2710940156925137, "learning_rate": 7.182568866633756e-07, "loss": 0.1948, "step": 5889 }, { "epoch": 0.38, "grad_norm": 1.1978573352474637, "learning_rate": 7.18163966712844e-07, "loss": 0.2208, "step": 5890 }, { "epoch": 0.38, "grad_norm": 0.6818154892923289, "learning_rate": 7.18071037454999e-07, "loss": 0.2935, "step": 5891 }, { "epoch": 0.38, "grad_norm": 1.4738591744705973, "learning_rate": 7.179780988938051e-07, "loss": 0.1539, "step": 5892 }, { "epoch": 0.38, "grad_norm": 1.1680365976824745, "learning_rate": 7.178851510332274e-07, "loss": 0.2514, "step": 5893 }, { "epoch": 0.38, "grad_norm": 0.5345444115984965, "learning_rate": 7.177921938772311e-07, "loss": 0.3865, "step": 5894 }, { "epoch": 0.38, "grad_norm": 0.4549782245854918, "learning_rate": 7.17699227429782e-07, "loss": 0.2943, "step": 5895 }, { "epoch": 0.38, "grad_norm": 0.5654763469609636, "learning_rate": 7.176062516948463e-07, "loss": 0.0977, "step": 5896 }, { "epoch": 0.38, "grad_norm": 0.46045765447375575, "learning_rate": 7.175132666763905e-07, "loss": 0.1541, "step": 5897 }, { "epoch": 0.38, "grad_norm": 0.5610590384086495, "learning_rate": 7.174202723783814e-07, "loss": 0.0925, "step": 5898 }, { "epoch": 0.38, "grad_norm": 0.2928975090718757, "learning_rate": 7.173272688047865e-07, "loss": 0.121, "step": 5899 }, { "epoch": 0.38, "grad_norm": 0.9139770998882433, "learning_rate": 7.172342559595732e-07, "loss": 0.066, "step": 5900 }, { "epoch": 0.38, "grad_norm": 1.485855768140534, "learning_rate": 7.1714123384671e-07, "loss": 0.1278, "step": 5901 }, { "epoch": 0.38, "grad_norm": 2.444138561462059, "learning_rate": 7.170482024701651e-07, "loss": 0.0571, "step": 5902 }, { "epoch": 0.38, "grad_norm": 2.6205883371938983, "learning_rate": 7.169551618339078e-07, "loss": 0.243, "step": 5903 }, { "epoch": 0.38, "grad_norm": 0.6260682375338533, "learning_rate": 7.168621119419072e-07, "loss": 0.1723, "step": 5904 }, { "epoch": 0.38, "grad_norm": 2.1867454917890057, "learning_rate": 7.167690527981327e-07, "loss": 0.3286, "step": 5905 }, { "epoch": 0.38, "grad_norm": 0.407792610972802, "learning_rate": 7.166759844065548e-07, "loss": 0.1464, "step": 5906 }, { "epoch": 0.38, "grad_norm": 1.7570667036092302, "learning_rate": 7.165829067711439e-07, "loss": 0.1031, "step": 5907 }, { "epoch": 0.38, "grad_norm": 7.360062034053246, "learning_rate": 7.164898198958706e-07, "loss": 0.1911, "step": 5908 }, { "epoch": 0.38, "grad_norm": 0.6199722405916185, "learning_rate": 7.163967237847066e-07, "loss": 0.159, "step": 5909 }, { "epoch": 0.38, "grad_norm": 2.3338296173886413, "learning_rate": 7.163036184416231e-07, "loss": 0.366, "step": 5910 }, { "epoch": 0.38, "grad_norm": 1.213067992527338, "learning_rate": 7.162105038705926e-07, "loss": 0.1768, "step": 5911 }, { "epoch": 0.38, "grad_norm": 1.480109946037491, "learning_rate": 7.161173800755874e-07, "loss": 0.1274, "step": 5912 }, { "epoch": 0.38, "grad_norm": 0.6505464364524993, "learning_rate": 7.160242470605803e-07, "loss": 0.0872, "step": 5913 }, { "epoch": 0.38, "grad_norm": 1.6077104443243688, "learning_rate": 7.159311048295444e-07, "loss": 0.2574, "step": 5914 }, { "epoch": 0.38, "grad_norm": 0.7781919438857764, "learning_rate": 7.158379533864537e-07, "loss": 0.167, "step": 5915 }, { "epoch": 0.38, "grad_norm": 1.0321930163705826, "learning_rate": 7.15744792735282e-07, "loss": 0.1722, "step": 5916 }, { "epoch": 0.38, "grad_norm": 1.5674156331427114, "learning_rate": 7.156516228800035e-07, "loss": 0.1605, "step": 5917 }, { "epoch": 0.38, "grad_norm": 0.7950157651962791, "learning_rate": 7.155584438245935e-07, "loss": 0.1945, "step": 5918 }, { "epoch": 0.38, "grad_norm": 2.091492675135481, "learning_rate": 7.154652555730267e-07, "loss": 0.0534, "step": 5919 }, { "epoch": 0.38, "grad_norm": 0.6350082291984965, "learning_rate": 7.153720581292793e-07, "loss": 0.2207, "step": 5920 }, { "epoch": 0.38, "grad_norm": 0.6751077283045518, "learning_rate": 7.152788514973267e-07, "loss": 0.1156, "step": 5921 }, { "epoch": 0.38, "grad_norm": 0.7838490625511402, "learning_rate": 7.151856356811456e-07, "loss": 0.2122, "step": 5922 }, { "epoch": 0.38, "grad_norm": 0.6838821417234526, "learning_rate": 7.150924106847127e-07, "loss": 0.2095, "step": 5923 }, { "epoch": 0.38, "grad_norm": 0.6116515680506596, "learning_rate": 7.149991765120054e-07, "loss": 0.4388, "step": 5924 }, { "epoch": 0.38, "grad_norm": 1.0175319634749764, "learning_rate": 7.149059331670008e-07, "loss": 0.1162, "step": 5925 }, { "epoch": 0.38, "grad_norm": 0.4889298794243602, "learning_rate": 7.14812680653677e-07, "loss": 0.0855, "step": 5926 }, { "epoch": 0.38, "grad_norm": 1.1329174315251007, "learning_rate": 7.147194189760124e-07, "loss": 0.2025, "step": 5927 }, { "epoch": 0.38, "grad_norm": 0.38708904205903855, "learning_rate": 7.146261481379858e-07, "loss": 0.0041, "step": 5928 }, { "epoch": 0.38, "grad_norm": 0.4835130500903364, "learning_rate": 7.145328681435764e-07, "loss": 0.1677, "step": 5929 }, { "epoch": 0.38, "grad_norm": 1.7288205730665984, "learning_rate": 7.144395789967635e-07, "loss": 0.3148, "step": 5930 }, { "epoch": 0.38, "grad_norm": 0.9432187522235403, "learning_rate": 7.14346280701527e-07, "loss": 0.0094, "step": 5931 }, { "epoch": 0.38, "grad_norm": 1.0728331646521974, "learning_rate": 7.142529732618474e-07, "loss": 0.2973, "step": 5932 }, { "epoch": 0.38, "grad_norm": 5.564085645081182, "learning_rate": 7.141596566817052e-07, "loss": 0.365, "step": 5933 }, { "epoch": 0.38, "grad_norm": 0.912222774090835, "learning_rate": 7.140663309650816e-07, "loss": 0.326, "step": 5934 }, { "epoch": 0.38, "grad_norm": 1.7604164267103952, "learning_rate": 7.13972996115958e-07, "loss": 0.2514, "step": 5935 }, { "epoch": 0.38, "grad_norm": 0.436229042738186, "learning_rate": 7.138796521383162e-07, "loss": 0.112, "step": 5936 }, { "epoch": 0.38, "grad_norm": 4.998368663674957, "learning_rate": 7.137862990361382e-07, "loss": 0.0441, "step": 5937 }, { "epoch": 0.38, "grad_norm": 0.4927003810543219, "learning_rate": 7.136929368134074e-07, "loss": 0.0082, "step": 5938 }, { "epoch": 0.38, "grad_norm": 0.7354966554476302, "learning_rate": 7.135995654741062e-07, "loss": 0.4271, "step": 5939 }, { "epoch": 0.38, "grad_norm": 0.74617529396601, "learning_rate": 7.135061850222179e-07, "loss": 0.1556, "step": 5940 }, { "epoch": 0.38, "grad_norm": 1.435093383695401, "learning_rate": 7.134127954617268e-07, "loss": 0.178, "step": 5941 }, { "epoch": 0.38, "grad_norm": 0.477204625878189, "learning_rate": 7.133193967966168e-07, "loss": 0.2543, "step": 5942 }, { "epoch": 0.38, "grad_norm": 0.43863566913845464, "learning_rate": 7.132259890308725e-07, "loss": 0.183, "step": 5943 }, { "epoch": 0.38, "grad_norm": 0.44991637776734, "learning_rate": 7.131325721684788e-07, "loss": 0.378, "step": 5944 }, { "epoch": 0.38, "grad_norm": 0.5281526357455267, "learning_rate": 7.130391462134211e-07, "loss": 0.2169, "step": 5945 }, { "epoch": 0.38, "grad_norm": 0.8521604255758248, "learning_rate": 7.129457111696852e-07, "loss": 0.174, "step": 5946 }, { "epoch": 0.38, "grad_norm": 0.68207597659719, "learning_rate": 7.128522670412571e-07, "loss": 0.0522, "step": 5947 }, { "epoch": 0.38, "grad_norm": 0.2533660514279892, "learning_rate": 7.127588138321235e-07, "loss": 0.0686, "step": 5948 }, { "epoch": 0.38, "grad_norm": 0.2090779232570901, "learning_rate": 7.126653515462713e-07, "loss": 0.0796, "step": 5949 }, { "epoch": 0.38, "grad_norm": 1.2050745694824858, "learning_rate": 7.125718801876876e-07, "loss": 0.2794, "step": 5950 }, { "epoch": 0.38, "grad_norm": 1.6509178274863041, "learning_rate": 7.1247839976036e-07, "loss": 0.0936, "step": 5951 }, { "epoch": 0.38, "grad_norm": 0.8250706157425587, "learning_rate": 7.123849102682771e-07, "loss": 0.265, "step": 5952 }, { "epoch": 0.38, "grad_norm": 0.8781829729621038, "learning_rate": 7.122914117154267e-07, "loss": 0.3591, "step": 5953 }, { "epoch": 0.38, "grad_norm": 10.066038436900582, "learning_rate": 7.12197904105798e-07, "loss": 0.1235, "step": 5954 }, { "epoch": 0.38, "grad_norm": 2.3810299623815854, "learning_rate": 7.121043874433801e-07, "loss": 0.1342, "step": 5955 }, { "epoch": 0.38, "grad_norm": 1.1710195322965773, "learning_rate": 7.120108617321627e-07, "loss": 0.1984, "step": 5956 }, { "epoch": 0.38, "grad_norm": 0.49842423271635744, "learning_rate": 7.119173269761357e-07, "loss": 0.2331, "step": 5957 }, { "epoch": 0.38, "grad_norm": 4.023827614331166, "learning_rate": 7.118237831792895e-07, "loss": 0.3535, "step": 5958 }, { "epoch": 0.38, "grad_norm": 0.25458583003373897, "learning_rate": 7.117302303456149e-07, "loss": 0.0158, "step": 5959 }, { "epoch": 0.38, "grad_norm": 0.6364437863317047, "learning_rate": 7.116366684791032e-07, "loss": 0.0611, "step": 5960 }, { "epoch": 0.38, "grad_norm": 1.823260968022451, "learning_rate": 7.115430975837456e-07, "loss": 0.1022, "step": 5961 }, { "epoch": 0.38, "grad_norm": 4.751828298169778, "learning_rate": 7.114495176635343e-07, "loss": 0.0226, "step": 5962 }, { "epoch": 0.38, "grad_norm": 0.630816834039371, "learning_rate": 7.113559287224614e-07, "loss": 0.1887, "step": 5963 }, { "epoch": 0.38, "grad_norm": 0.45860696416686153, "learning_rate": 7.112623307645198e-07, "loss": 0.2074, "step": 5964 }, { "epoch": 0.38, "grad_norm": 0.6064617076623829, "learning_rate": 7.111687237937024e-07, "loss": 0.2096, "step": 5965 }, { "epoch": 0.38, "grad_norm": 0.9400640168041525, "learning_rate": 7.11075107814003e-07, "loss": 0.276, "step": 5966 }, { "epoch": 0.38, "grad_norm": 0.9060897632718069, "learning_rate": 7.10981482829415e-07, "loss": 0.0865, "step": 5967 }, { "epoch": 0.38, "grad_norm": 0.7413702713438625, "learning_rate": 7.108878488439327e-07, "loss": 0.2822, "step": 5968 }, { "epoch": 0.38, "grad_norm": 1.7268002955728479, "learning_rate": 7.10794205861551e-07, "loss": 0.1332, "step": 5969 }, { "epoch": 0.38, "grad_norm": 0.5774531281108808, "learning_rate": 7.107005538862646e-07, "loss": 0.0733, "step": 5970 }, { "epoch": 0.38, "grad_norm": 3.129077254375828, "learning_rate": 7.10606892922069e-07, "loss": 0.0939, "step": 5971 }, { "epoch": 0.38, "grad_norm": 1.471993788958997, "learning_rate": 7.1051322297296e-07, "loss": 0.2458, "step": 5972 }, { "epoch": 0.38, "grad_norm": 0.21795629225975438, "learning_rate": 7.104195440429338e-07, "loss": 0.0039, "step": 5973 }, { "epoch": 0.38, "grad_norm": 1.0986424896095943, "learning_rate": 7.103258561359868e-07, "loss": 0.3232, "step": 5974 }, { "epoch": 0.38, "grad_norm": 0.5780048420019451, "learning_rate": 7.102321592561161e-07, "loss": 0.2073, "step": 5975 }, { "epoch": 0.38, "grad_norm": 0.2990008380893296, "learning_rate": 7.101384534073186e-07, "loss": 0.1052, "step": 5976 }, { "epoch": 0.38, "grad_norm": 1.3446679506575092, "learning_rate": 7.100447385935924e-07, "loss": 0.3446, "step": 5977 }, { "epoch": 0.38, "grad_norm": 0.7417729866746887, "learning_rate": 7.099510148189353e-07, "loss": 0.3005, "step": 5978 }, { "epoch": 0.38, "grad_norm": 4.380731448893932, "learning_rate": 7.098572820873461e-07, "loss": 0.0407, "step": 5979 }, { "epoch": 0.38, "grad_norm": 0.6480951396834793, "learning_rate": 7.097635404028233e-07, "loss": 0.2887, "step": 5980 }, { "epoch": 0.38, "grad_norm": 4.659078677383878, "learning_rate": 7.096697897693661e-07, "loss": 0.3541, "step": 5981 }, { "epoch": 0.38, "grad_norm": 1.1218545597779377, "learning_rate": 7.095760301909742e-07, "loss": 0.3356, "step": 5982 }, { "epoch": 0.38, "grad_norm": 0.9847545022936296, "learning_rate": 7.094822616716476e-07, "loss": 0.1656, "step": 5983 }, { "epoch": 0.38, "grad_norm": 1.1206415516885497, "learning_rate": 7.093884842153866e-07, "loss": 0.5928, "step": 5984 }, { "epoch": 0.38, "grad_norm": 5.9162260514925755, "learning_rate": 7.092946978261918e-07, "loss": 0.1928, "step": 5985 }, { "epoch": 0.38, "grad_norm": 0.7143376489897715, "learning_rate": 7.092009025080647e-07, "loss": 0.1116, "step": 5986 }, { "epoch": 0.38, "grad_norm": 1.5437738671263492, "learning_rate": 7.091070982650063e-07, "loss": 0.2653, "step": 5987 }, { "epoch": 0.38, "grad_norm": 1.2406481074654894, "learning_rate": 7.090132851010189e-07, "loss": 0.3299, "step": 5988 }, { "epoch": 0.38, "grad_norm": 0.9467414831579638, "learning_rate": 7.089194630201045e-07, "loss": 0.2875, "step": 5989 }, { "epoch": 0.38, "grad_norm": 1.9279443760306558, "learning_rate": 7.088256320262658e-07, "loss": 0.0794, "step": 5990 }, { "epoch": 0.38, "grad_norm": 0.5185733427488309, "learning_rate": 7.087317921235059e-07, "loss": 0.1306, "step": 5991 }, { "epoch": 0.38, "grad_norm": 0.7948092221675529, "learning_rate": 7.086379433158282e-07, "loss": 0.0462, "step": 5992 }, { "epoch": 0.38, "grad_norm": 0.6098031442702017, "learning_rate": 7.085440856072364e-07, "loss": 0.1068, "step": 5993 }, { "epoch": 0.38, "grad_norm": 2.106216605920332, "learning_rate": 7.084502190017346e-07, "loss": 0.0095, "step": 5994 }, { "epoch": 0.38, "grad_norm": 0.2519067909669138, "learning_rate": 7.083563435033275e-07, "loss": 0.1246, "step": 5995 }, { "epoch": 0.38, "grad_norm": 1.1356303064312276, "learning_rate": 7.0826245911602e-07, "loss": 0.2158, "step": 5996 }, { "epoch": 0.38, "grad_norm": 0.5893033155310382, "learning_rate": 7.081685658438172e-07, "loss": 0.236, "step": 5997 }, { "epoch": 0.38, "grad_norm": 0.5284526499896604, "learning_rate": 7.080746636907249e-07, "loss": 0.1444, "step": 5998 }, { "epoch": 0.38, "grad_norm": 0.62541020470911, "learning_rate": 7.079807526607492e-07, "loss": 0.1926, "step": 5999 }, { "epoch": 0.38, "grad_norm": 0.6633854058116887, "learning_rate": 7.078868327578965e-07, "loss": 0.3817, "step": 6000 }, { "epoch": 0.38, "grad_norm": 2.5390019204594863, "learning_rate": 7.077929039861737e-07, "loss": 0.1851, "step": 6001 }, { "epoch": 0.38, "grad_norm": 2.1361359268069435, "learning_rate": 7.076989663495877e-07, "loss": 0.1746, "step": 6002 }, { "epoch": 0.38, "grad_norm": 0.10970779392084333, "learning_rate": 7.076050198521464e-07, "loss": 0.0664, "step": 6003 }, { "epoch": 0.38, "grad_norm": 1.0860967601909939, "learning_rate": 7.075110644978577e-07, "loss": 0.4086, "step": 6004 }, { "epoch": 0.38, "grad_norm": 0.5685774998213641, "learning_rate": 7.074171002907296e-07, "loss": 0.3996, "step": 6005 }, { "epoch": 0.38, "grad_norm": 0.6201711996331208, "learning_rate": 7.073231272347713e-07, "loss": 0.3049, "step": 6006 }, { "epoch": 0.38, "grad_norm": 0.6000797086416605, "learning_rate": 7.072291453339915e-07, "loss": 0.3039, "step": 6007 }, { "epoch": 0.38, "grad_norm": 0.45780747046117626, "learning_rate": 7.071351545923998e-07, "loss": 0.2359, "step": 6008 }, { "epoch": 0.38, "grad_norm": 0.3257811079413989, "learning_rate": 7.07041155014006e-07, "loss": 0.1005, "step": 6009 }, { "epoch": 0.38, "grad_norm": 1.0872698006568964, "learning_rate": 7.069471466028203e-07, "loss": 0.1744, "step": 6010 }, { "epoch": 0.38, "grad_norm": 0.280298811781577, "learning_rate": 7.068531293628533e-07, "loss": 0.0576, "step": 6011 }, { "epoch": 0.38, "grad_norm": 3.0526762193027266, "learning_rate": 7.06759103298116e-07, "loss": 0.1753, "step": 6012 }, { "epoch": 0.38, "grad_norm": 1.288961530840641, "learning_rate": 7.066650684126198e-07, "loss": 0.3042, "step": 6013 }, { "epoch": 0.38, "grad_norm": 0.37518201346987406, "learning_rate": 7.065710247103762e-07, "loss": 0.1116, "step": 6014 }, { "epoch": 0.38, "grad_norm": 3.2338419063587356, "learning_rate": 7.064769721953975e-07, "loss": 0.0568, "step": 6015 }, { "epoch": 0.38, "grad_norm": 1.3789297878621667, "learning_rate": 7.06382910871696e-07, "loss": 0.1485, "step": 6016 }, { "epoch": 0.38, "grad_norm": 2.220911917100636, "learning_rate": 7.062888407432847e-07, "loss": 0.145, "step": 6017 }, { "epoch": 0.38, "grad_norm": 0.44760179866017075, "learning_rate": 7.061947618141768e-07, "loss": 0.2021, "step": 6018 }, { "epoch": 0.38, "grad_norm": 0.3631122735822044, "learning_rate": 7.061006740883858e-07, "loss": 0.0854, "step": 6019 }, { "epoch": 0.38, "grad_norm": 0.6197074357619311, "learning_rate": 7.060065775699257e-07, "loss": 0.1229, "step": 6020 }, { "epoch": 0.38, "grad_norm": 7.303806414473023, "learning_rate": 7.059124722628112e-07, "loss": 0.1706, "step": 6021 }, { "epoch": 0.38, "grad_norm": 0.5110289241404611, "learning_rate": 7.058183581710564e-07, "loss": 0.0506, "step": 6022 }, { "epoch": 0.38, "grad_norm": 1.2294421418777262, "learning_rate": 7.057242352986767e-07, "loss": 0.0219, "step": 6023 }, { "epoch": 0.38, "grad_norm": 2.587852148027603, "learning_rate": 7.056301036496874e-07, "loss": 0.1163, "step": 6024 }, { "epoch": 0.38, "grad_norm": 0.6857314604523593, "learning_rate": 7.055359632281048e-07, "loss": 0.5359, "step": 6025 }, { "epoch": 0.38, "grad_norm": 0.923768138429377, "learning_rate": 7.054418140379448e-07, "loss": 0.2454, "step": 6026 }, { "epoch": 0.38, "grad_norm": 0.47052950296915724, "learning_rate": 7.053476560832239e-07, "loss": 0.1317, "step": 6027 }, { "epoch": 0.38, "grad_norm": 0.4121696819836364, "learning_rate": 7.052534893679593e-07, "loss": 0.2421, "step": 6028 }, { "epoch": 0.38, "grad_norm": 0.44023866777409837, "learning_rate": 7.051593138961681e-07, "loss": 0.2292, "step": 6029 }, { "epoch": 0.38, "grad_norm": 0.696574520204921, "learning_rate": 7.050651296718683e-07, "loss": 0.0751, "step": 6030 }, { "epoch": 0.38, "grad_norm": 0.7402758210906683, "learning_rate": 7.049709366990777e-07, "loss": 0.2056, "step": 6031 }, { "epoch": 0.38, "grad_norm": 0.4702207701856258, "learning_rate": 7.04876734981815e-07, "loss": 0.0254, "step": 6032 }, { "epoch": 0.38, "grad_norm": 5.999792419930243, "learning_rate": 7.047825245240988e-07, "loss": 0.1965, "step": 6033 }, { "epoch": 0.38, "grad_norm": 0.4877844417113965, "learning_rate": 7.046883053299486e-07, "loss": 0.116, "step": 6034 }, { "epoch": 0.38, "grad_norm": 1.479980954195937, "learning_rate": 7.045940774033838e-07, "loss": 0.0313, "step": 6035 }, { "epoch": 0.38, "grad_norm": 0.6086161206512105, "learning_rate": 7.044998407484243e-07, "loss": 0.1707, "step": 6036 }, { "epoch": 0.38, "grad_norm": 1.187171812083103, "learning_rate": 7.044055953690905e-07, "loss": 0.2214, "step": 6037 }, { "epoch": 0.39, "grad_norm": 1.0984291500373335, "learning_rate": 7.043113412694031e-07, "loss": 0.3816, "step": 6038 }, { "epoch": 0.39, "grad_norm": 3.3096972089826218, "learning_rate": 7.042170784533832e-07, "loss": 0.1404, "step": 6039 }, { "epoch": 0.39, "grad_norm": 0.4580931602231195, "learning_rate": 7.041228069250522e-07, "loss": 0.1525, "step": 6040 }, { "epoch": 0.39, "grad_norm": 0.8939244281693258, "learning_rate": 7.040285266884318e-07, "loss": 0.1209, "step": 6041 }, { "epoch": 0.39, "grad_norm": 0.39145821050631974, "learning_rate": 7.039342377475443e-07, "loss": 0.0901, "step": 6042 }, { "epoch": 0.39, "grad_norm": 1.4035249113873034, "learning_rate": 7.038399401064124e-07, "loss": 0.0596, "step": 6043 }, { "epoch": 0.39, "grad_norm": 0.4781807350668425, "learning_rate": 7.037456337690588e-07, "loss": 0.1057, "step": 6044 }, { "epoch": 0.39, "grad_norm": 0.5610597985770469, "learning_rate": 7.03651318739507e-07, "loss": 0.0974, "step": 6045 }, { "epoch": 0.39, "grad_norm": 0.77816916366953, "learning_rate": 7.035569950217806e-07, "loss": 0.1531, "step": 6046 }, { "epoch": 0.39, "grad_norm": 0.5232881218806097, "learning_rate": 7.034626626199034e-07, "loss": 0.322, "step": 6047 }, { "epoch": 0.39, "grad_norm": 0.38390568318509016, "learning_rate": 7.033683215379002e-07, "loss": 0.0462, "step": 6048 }, { "epoch": 0.39, "grad_norm": 0.47140231927303317, "learning_rate": 7.032739717797954e-07, "loss": 0.2785, "step": 6049 }, { "epoch": 0.39, "grad_norm": 0.22127341481379223, "learning_rate": 7.031796133496144e-07, "loss": 0.1747, "step": 6050 }, { "epoch": 0.39, "grad_norm": 0.5056716221063638, "learning_rate": 7.030852462513826e-07, "loss": 0.3887, "step": 6051 }, { "epoch": 0.39, "grad_norm": 0.6106568240598306, "learning_rate": 7.02990870489126e-07, "loss": 0.4211, "step": 6052 }, { "epoch": 0.39, "grad_norm": 0.6996628918092257, "learning_rate": 7.028964860668706e-07, "loss": 0.3672, "step": 6053 }, { "epoch": 0.39, "grad_norm": 0.8179165141596455, "learning_rate": 7.028020929886436e-07, "loss": 0.2225, "step": 6054 }, { "epoch": 0.39, "grad_norm": 0.6077679931879819, "learning_rate": 7.027076912584714e-07, "loss": 0.1728, "step": 6055 }, { "epoch": 0.39, "grad_norm": 1.433169476401399, "learning_rate": 7.026132808803816e-07, "loss": 0.0204, "step": 6056 }, { "epoch": 0.39, "grad_norm": 0.7197853818572455, "learning_rate": 7.02518861858402e-07, "loss": 0.2011, "step": 6057 }, { "epoch": 0.39, "grad_norm": 0.6432525605362424, "learning_rate": 7.024244341965606e-07, "loss": 0.3238, "step": 6058 }, { "epoch": 0.39, "grad_norm": 0.6331576072836341, "learning_rate": 7.023299978988859e-07, "loss": 0.2177, "step": 6059 }, { "epoch": 0.39, "grad_norm": 6.527315811383846, "learning_rate": 7.022355529694068e-07, "loss": 0.0996, "step": 6060 }, { "epoch": 0.39, "grad_norm": 2.5933556118618397, "learning_rate": 7.021410994121524e-07, "loss": 0.0674, "step": 6061 }, { "epoch": 0.39, "grad_norm": 1.0699772808004508, "learning_rate": 7.020466372311525e-07, "loss": 0.0994, "step": 6062 }, { "epoch": 0.39, "grad_norm": 1.8203860558455764, "learning_rate": 7.019521664304369e-07, "loss": 0.1917, "step": 6063 }, { "epoch": 0.39, "grad_norm": 0.7259338690054316, "learning_rate": 7.018576870140357e-07, "loss": 0.1829, "step": 6064 }, { "epoch": 0.39, "grad_norm": 0.7431332243039687, "learning_rate": 7.017631989859799e-07, "loss": 0.194, "step": 6065 }, { "epoch": 0.39, "grad_norm": 0.9385818688452157, "learning_rate": 7.016687023503004e-07, "loss": 0.1326, "step": 6066 }, { "epoch": 0.39, "grad_norm": 0.6791666522907975, "learning_rate": 7.015741971110287e-07, "loss": 0.1821, "step": 6067 }, { "epoch": 0.39, "grad_norm": 0.7781131213115616, "learning_rate": 7.014796832721965e-07, "loss": 0.1181, "step": 6068 }, { "epoch": 0.39, "grad_norm": 0.5342813351196428, "learning_rate": 7.013851608378358e-07, "loss": 0.1441, "step": 6069 }, { "epoch": 0.39, "grad_norm": 0.4942220602028391, "learning_rate": 7.012906298119796e-07, "loss": 0.0107, "step": 6070 }, { "epoch": 0.39, "grad_norm": 3.515678796282028, "learning_rate": 7.011960901986603e-07, "loss": 0.1908, "step": 6071 }, { "epoch": 0.39, "grad_norm": 0.5427986787855831, "learning_rate": 7.011015420019115e-07, "loss": 0.1059, "step": 6072 }, { "epoch": 0.39, "grad_norm": 0.3379644858261285, "learning_rate": 7.010069852257665e-07, "loss": 0.013, "step": 6073 }, { "epoch": 0.39, "grad_norm": 0.744299123554008, "learning_rate": 7.009124198742595e-07, "loss": 0.0254, "step": 6074 }, { "epoch": 0.39, "grad_norm": 0.9154454539648509, "learning_rate": 7.008178459514249e-07, "loss": 0.2579, "step": 6075 }, { "epoch": 0.39, "grad_norm": 1.0282065266041691, "learning_rate": 7.007232634612972e-07, "loss": 0.1421, "step": 6076 }, { "epoch": 0.39, "grad_norm": 1.5030692551752733, "learning_rate": 7.006286724079115e-07, "loss": 0.1216, "step": 6077 }, { "epoch": 0.39, "grad_norm": 0.7340609318070733, "learning_rate": 7.005340727953034e-07, "loss": 0.3215, "step": 6078 }, { "epoch": 0.39, "grad_norm": 0.7045726026405857, "learning_rate": 7.004394646275086e-07, "loss": 0.3236, "step": 6079 }, { "epoch": 0.39, "grad_norm": 0.2988262294681189, "learning_rate": 7.003448479085634e-07, "loss": 0.096, "step": 6080 }, { "epoch": 0.39, "grad_norm": 7.053010225464449, "learning_rate": 7.002502226425041e-07, "loss": 0.116, "step": 6081 }, { "epoch": 0.39, "grad_norm": 0.8059010644650134, "learning_rate": 7.001555888333679e-07, "loss": 0.2569, "step": 6082 }, { "epoch": 0.39, "grad_norm": 0.5653028761079574, "learning_rate": 7.000609464851918e-07, "loss": 0.2474, "step": 6083 }, { "epoch": 0.39, "grad_norm": 1.2069537818478193, "learning_rate": 6.999662956020136e-07, "loss": 0.3237, "step": 6084 }, { "epoch": 0.39, "grad_norm": 0.44003547281224015, "learning_rate": 6.998716361878712e-07, "loss": 0.4035, "step": 6085 }, { "epoch": 0.39, "grad_norm": 0.9190976304451878, "learning_rate": 6.99776968246803e-07, "loss": 0.1739, "step": 6086 }, { "epoch": 0.39, "grad_norm": 1.1500311324465582, "learning_rate": 6.996822917828476e-07, "loss": 0.2147, "step": 6087 }, { "epoch": 0.39, "grad_norm": 0.03477222308533221, "learning_rate": 6.995876068000445e-07, "loss": 0.0008, "step": 6088 }, { "epoch": 0.39, "grad_norm": 0.5832092753911268, "learning_rate": 6.994929133024329e-07, "loss": 0.3132, "step": 6089 }, { "epoch": 0.39, "grad_norm": 8.524669557833631, "learning_rate": 6.993982112940525e-07, "loss": 0.1944, "step": 6090 }, { "epoch": 0.39, "grad_norm": 1.5149018659822298, "learning_rate": 6.993035007789434e-07, "loss": 0.3219, "step": 6091 }, { "epoch": 0.39, "grad_norm": 0.1683874690062729, "learning_rate": 6.992087817611466e-07, "loss": 0.0672, "step": 6092 }, { "epoch": 0.39, "grad_norm": 2.6868417573536876, "learning_rate": 6.991140542447024e-07, "loss": 0.1113, "step": 6093 }, { "epoch": 0.39, "grad_norm": 1.306435480036509, "learning_rate": 6.990193182336524e-07, "loss": 0.1352, "step": 6094 }, { "epoch": 0.39, "grad_norm": 3.981389326213064, "learning_rate": 6.989245737320383e-07, "loss": 0.1822, "step": 6095 }, { "epoch": 0.39, "grad_norm": 1.5881073089154503, "learning_rate": 6.988298207439021e-07, "loss": 0.4433, "step": 6096 }, { "epoch": 0.39, "grad_norm": 0.862365536486719, "learning_rate": 6.98735059273286e-07, "loss": 0.2503, "step": 6097 }, { "epoch": 0.39, "grad_norm": 2.137412049406784, "learning_rate": 6.986402893242326e-07, "loss": 0.0186, "step": 6098 }, { "epoch": 0.39, "grad_norm": 0.629167955438788, "learning_rate": 6.985455109007853e-07, "loss": 0.0771, "step": 6099 }, { "epoch": 0.39, "grad_norm": 0.7568199747826508, "learning_rate": 6.984507240069873e-07, "loss": 0.0125, "step": 6100 }, { "epoch": 0.39, "grad_norm": 0.4294833206324395, "learning_rate": 6.983559286468825e-07, "loss": 0.1637, "step": 6101 }, { "epoch": 0.39, "grad_norm": 1.0130177508255256, "learning_rate": 6.982611248245152e-07, "loss": 0.4066, "step": 6102 }, { "epoch": 0.39, "grad_norm": 0.6991349913435341, "learning_rate": 6.981663125439295e-07, "loss": 0.3924, "step": 6103 }, { "epoch": 0.39, "grad_norm": 0.8832818984152153, "learning_rate": 6.980714918091706e-07, "loss": 0.3424, "step": 6104 }, { "epoch": 0.39, "grad_norm": 1.3879801519145525, "learning_rate": 6.979766626242838e-07, "loss": 0.0624, "step": 6105 }, { "epoch": 0.39, "grad_norm": 1.195726083938898, "learning_rate": 6.978818249933145e-07, "loss": 0.1041, "step": 6106 }, { "epoch": 0.39, "grad_norm": 1.0888581955067818, "learning_rate": 6.977869789203088e-07, "loss": 0.247, "step": 6107 }, { "epoch": 0.39, "grad_norm": 3.1743268342712296, "learning_rate": 6.97692124409313e-07, "loss": 0.0383, "step": 6108 }, { "epoch": 0.39, "grad_norm": 1.0650161679738679, "learning_rate": 6.975972614643737e-07, "loss": 0.1798, "step": 6109 }, { "epoch": 0.39, "grad_norm": 1.9407940742659258, "learning_rate": 6.97502390089538e-07, "loss": 0.3623, "step": 6110 }, { "epoch": 0.39, "grad_norm": 0.9933576601280183, "learning_rate": 6.974075102888535e-07, "loss": 0.2004, "step": 6111 }, { "epoch": 0.39, "grad_norm": 0.9204704042847702, "learning_rate": 6.973126220663675e-07, "loss": 0.3363, "step": 6112 }, { "epoch": 0.39, "grad_norm": 1.229552263715744, "learning_rate": 6.972177254261285e-07, "loss": 0.3976, "step": 6113 }, { "epoch": 0.39, "grad_norm": 0.7396302309870308, "learning_rate": 6.971228203721848e-07, "loss": 0.1982, "step": 6114 }, { "epoch": 0.39, "grad_norm": 0.5209664358944699, "learning_rate": 6.970279069085855e-07, "loss": 0.144, "step": 6115 }, { "epoch": 0.39, "grad_norm": 1.4425491786711748, "learning_rate": 6.969329850393795e-07, "loss": 0.1312, "step": 6116 }, { "epoch": 0.39, "grad_norm": 0.5263695534515982, "learning_rate": 6.968380547686166e-07, "loss": 0.3222, "step": 6117 }, { "epoch": 0.39, "grad_norm": 1.575979932582315, "learning_rate": 6.967431161003465e-07, "loss": 0.0941, "step": 6118 }, { "epoch": 0.39, "grad_norm": 4.222430021897893, "learning_rate": 6.966481690386195e-07, "loss": 0.0992, "step": 6119 }, { "epoch": 0.39, "grad_norm": 1.2576809504348785, "learning_rate": 6.965532135874863e-07, "loss": 0.1708, "step": 6120 }, { "epoch": 0.39, "grad_norm": 6.854626612892088, "learning_rate": 6.96458249750998e-07, "loss": 0.0214, "step": 6121 }, { "epoch": 0.39, "grad_norm": 1.2937559918030428, "learning_rate": 6.963632775332055e-07, "loss": 0.3294, "step": 6122 }, { "epoch": 0.39, "grad_norm": 0.6462685025279785, "learning_rate": 6.962682969381613e-07, "loss": 0.1446, "step": 6123 }, { "epoch": 0.39, "grad_norm": 2.144011777287466, "learning_rate": 6.961733079699168e-07, "loss": 0.3612, "step": 6124 }, { "epoch": 0.39, "grad_norm": 1.762259905785336, "learning_rate": 6.960783106325246e-07, "loss": 0.4583, "step": 6125 }, { "epoch": 0.39, "grad_norm": 0.5472923641710189, "learning_rate": 6.959833049300375e-07, "loss": 0.1341, "step": 6126 }, { "epoch": 0.39, "grad_norm": 0.618147425453013, "learning_rate": 6.958882908665087e-07, "loss": 0.2312, "step": 6127 }, { "epoch": 0.39, "grad_norm": 10.039500895902682, "learning_rate": 6.957932684459915e-07, "loss": 0.2508, "step": 6128 }, { "epoch": 0.39, "grad_norm": 0.7293971604524371, "learning_rate": 6.9569823767254e-07, "loss": 0.2275, "step": 6129 }, { "epoch": 0.39, "grad_norm": 0.8612548679002621, "learning_rate": 6.956031985502084e-07, "loss": 0.0322, "step": 6130 }, { "epoch": 0.39, "grad_norm": 0.8993388064077994, "learning_rate": 6.955081510830509e-07, "loss": 0.1982, "step": 6131 }, { "epoch": 0.39, "grad_norm": 0.8921071696384599, "learning_rate": 6.954130952751227e-07, "loss": 0.1631, "step": 6132 }, { "epoch": 0.39, "grad_norm": 0.44758189450209807, "learning_rate": 6.953180311304792e-07, "loss": 0.2095, "step": 6133 }, { "epoch": 0.39, "grad_norm": 1.0596157588948227, "learning_rate": 6.952229586531756e-07, "loss": 0.2035, "step": 6134 }, { "epoch": 0.39, "grad_norm": 0.43274357682038095, "learning_rate": 6.951278778472682e-07, "loss": 0.0727, "step": 6135 }, { "epoch": 0.39, "grad_norm": 0.32998110733984454, "learning_rate": 6.950327887168133e-07, "loss": 0.2334, "step": 6136 }, { "epoch": 0.39, "grad_norm": 0.6791760685309348, "learning_rate": 6.949376912658678e-07, "loss": 0.2407, "step": 6137 }, { "epoch": 0.39, "grad_norm": 0.4853614325615889, "learning_rate": 6.948425854984883e-07, "loss": 0.1118, "step": 6138 }, { "epoch": 0.39, "grad_norm": 1.2479386879988221, "learning_rate": 6.947474714187324e-07, "loss": 0.1549, "step": 6139 }, { "epoch": 0.39, "grad_norm": 1.3361662390743687, "learning_rate": 6.946523490306578e-07, "loss": 0.0888, "step": 6140 }, { "epoch": 0.39, "grad_norm": 0.6868660305286954, "learning_rate": 6.945572183383229e-07, "loss": 0.1573, "step": 6141 }, { "epoch": 0.39, "grad_norm": 1.2057914628517277, "learning_rate": 6.944620793457857e-07, "loss": 0.3429, "step": 6142 }, { "epoch": 0.39, "grad_norm": 0.7712588457012892, "learning_rate": 6.943669320571055e-07, "loss": 0.0063, "step": 6143 }, { "epoch": 0.39, "grad_norm": 1.214776392904596, "learning_rate": 6.942717764763412e-07, "loss": 0.2139, "step": 6144 }, { "epoch": 0.39, "grad_norm": 10.824250979215668, "learning_rate": 6.941766126075524e-07, "loss": 0.1055, "step": 6145 }, { "epoch": 0.39, "grad_norm": 2.429547489762958, "learning_rate": 6.94081440454799e-07, "loss": 0.1091, "step": 6146 }, { "epoch": 0.39, "grad_norm": 0.9529996714874591, "learning_rate": 6.939862600221411e-07, "loss": 0.2029, "step": 6147 }, { "epoch": 0.39, "grad_norm": 1.5255961179150546, "learning_rate": 6.938910713136393e-07, "loss": 0.405, "step": 6148 }, { "epoch": 0.39, "grad_norm": 0.6362585419101171, "learning_rate": 6.937958743333548e-07, "loss": 0.1283, "step": 6149 }, { "epoch": 0.39, "grad_norm": 2.465940031687446, "learning_rate": 6.937006690853486e-07, "loss": 0.0199, "step": 6150 }, { "epoch": 0.39, "grad_norm": 0.7828230822884971, "learning_rate": 6.936054555736825e-07, "loss": 0.2407, "step": 6151 }, { "epoch": 0.39, "grad_norm": 0.25168434736024076, "learning_rate": 6.935102338024185e-07, "loss": 0.174, "step": 6152 }, { "epoch": 0.39, "grad_norm": 1.8794224204941419, "learning_rate": 6.93415003775619e-07, "loss": 0.3587, "step": 6153 }, { "epoch": 0.39, "grad_norm": 0.9803000681134058, "learning_rate": 6.933197654973466e-07, "loss": 0.1463, "step": 6154 }, { "epoch": 0.39, "grad_norm": 0.20458817097033785, "learning_rate": 6.932245189716643e-07, "loss": 0.0215, "step": 6155 }, { "epoch": 0.39, "grad_norm": 1.1878195279602228, "learning_rate": 6.931292642026356e-07, "loss": 0.1178, "step": 6156 }, { "epoch": 0.39, "grad_norm": 0.9650560839732295, "learning_rate": 6.930340011943244e-07, "loss": 0.3217, "step": 6157 }, { "epoch": 0.39, "grad_norm": 0.6340945096257533, "learning_rate": 6.929387299507944e-07, "loss": 0.4892, "step": 6158 }, { "epoch": 0.39, "grad_norm": 1.655075799508818, "learning_rate": 6.928434504761105e-07, "loss": 0.0638, "step": 6159 }, { "epoch": 0.39, "grad_norm": 4.61875768161717, "learning_rate": 6.927481627743373e-07, "loss": 0.1523, "step": 6160 }, { "epoch": 0.39, "grad_norm": 1.033864607713213, "learning_rate": 6.926528668495399e-07, "loss": 0.2278, "step": 6161 }, { "epoch": 0.39, "grad_norm": 0.7826141802236259, "learning_rate": 6.925575627057841e-07, "loss": 0.0551, "step": 6162 }, { "epoch": 0.39, "grad_norm": 0.28373260478851425, "learning_rate": 6.924622503471356e-07, "loss": 0.0812, "step": 6163 }, { "epoch": 0.39, "grad_norm": 2.8340090495261956, "learning_rate": 6.923669297776604e-07, "loss": 0.1287, "step": 6164 }, { "epoch": 0.39, "grad_norm": 1.672714393111128, "learning_rate": 6.922716010014255e-07, "loss": 0.1098, "step": 6165 }, { "epoch": 0.39, "grad_norm": 0.871746698251231, "learning_rate": 6.921762640224974e-07, "loss": 0.0985, "step": 6166 }, { "epoch": 0.39, "grad_norm": 1.7736099022362137, "learning_rate": 6.920809188449435e-07, "loss": 0.0043, "step": 6167 }, { "epoch": 0.39, "grad_norm": 0.8113946228779848, "learning_rate": 6.919855654728316e-07, "loss": 0.1173, "step": 6168 }, { "epoch": 0.39, "grad_norm": 0.9167850203021984, "learning_rate": 6.918902039102296e-07, "loss": 0.2546, "step": 6169 }, { "epoch": 0.39, "grad_norm": 0.7498944438835116, "learning_rate": 6.917948341612056e-07, "loss": 0.2329, "step": 6170 }, { "epoch": 0.39, "grad_norm": 8.063631914034943, "learning_rate": 6.916994562298285e-07, "loss": 0.1669, "step": 6171 }, { "epoch": 0.39, "grad_norm": 0.7192687269786702, "learning_rate": 6.916040701201674e-07, "loss": 0.2198, "step": 6172 }, { "epoch": 0.39, "grad_norm": 0.7899638685431615, "learning_rate": 6.915086758362914e-07, "loss": 0.2831, "step": 6173 }, { "epoch": 0.39, "grad_norm": 0.9522419273413391, "learning_rate": 6.914132733822701e-07, "loss": 0.0687, "step": 6174 }, { "epoch": 0.39, "grad_norm": 0.3403160937259742, "learning_rate": 6.913178627621739e-07, "loss": 0.0447, "step": 6175 }, { "epoch": 0.39, "grad_norm": 0.8785572537418758, "learning_rate": 6.912224439800731e-07, "loss": 0.08, "step": 6176 }, { "epoch": 0.39, "grad_norm": 1.0105327342004704, "learning_rate": 6.911270170400384e-07, "loss": 0.1012, "step": 6177 }, { "epoch": 0.39, "grad_norm": 2.7582801548032934, "learning_rate": 6.91031581946141e-07, "loss": 0.2048, "step": 6178 }, { "epoch": 0.39, "grad_norm": 0.4176320377241375, "learning_rate": 6.909361387024522e-07, "loss": 0.0817, "step": 6179 }, { "epoch": 0.39, "grad_norm": 6.719948092491281, "learning_rate": 6.908406873130439e-07, "loss": 0.1972, "step": 6180 }, { "epoch": 0.39, "grad_norm": 1.4633957144557999, "learning_rate": 6.907452277819883e-07, "loss": 0.2768, "step": 6181 }, { "epoch": 0.39, "grad_norm": 0.384205034374505, "learning_rate": 6.906497601133579e-07, "loss": 0.1005, "step": 6182 }, { "epoch": 0.39, "grad_norm": 0.5906550470369817, "learning_rate": 6.905542843112253e-07, "loss": 0.1545, "step": 6183 }, { "epoch": 0.39, "grad_norm": 0.5678397989120448, "learning_rate": 6.90458800379664e-07, "loss": 0.2816, "step": 6184 }, { "epoch": 0.39, "grad_norm": 1.384474132225101, "learning_rate": 6.903633083227474e-07, "loss": 0.1302, "step": 6185 }, { "epoch": 0.39, "grad_norm": 1.982603229099718, "learning_rate": 6.902678081445494e-07, "loss": 0.0161, "step": 6186 }, { "epoch": 0.39, "grad_norm": 1.2659266276101535, "learning_rate": 6.901722998491441e-07, "loss": 0.0176, "step": 6187 }, { "epoch": 0.39, "grad_norm": 1.2677240536185697, "learning_rate": 6.900767834406063e-07, "loss": 0.1869, "step": 6188 }, { "epoch": 0.39, "grad_norm": 2.5057187867373214, "learning_rate": 6.899812589230108e-07, "loss": 0.1465, "step": 6189 }, { "epoch": 0.39, "grad_norm": 2.6191932612231814, "learning_rate": 6.89885726300433e-07, "loss": 0.3231, "step": 6190 }, { "epoch": 0.39, "grad_norm": 1.2446369380745481, "learning_rate": 6.897901855769483e-07, "loss": 0.0954, "step": 6191 }, { "epoch": 0.39, "grad_norm": 0.8173892522714576, "learning_rate": 6.896946367566327e-07, "loss": 0.1784, "step": 6192 }, { "epoch": 0.39, "grad_norm": 0.5691305531822278, "learning_rate": 6.895990798435625e-07, "loss": 0.2155, "step": 6193 }, { "epoch": 0.4, "grad_norm": 1.0434859744339169, "learning_rate": 6.895035148418144e-07, "loss": 0.5934, "step": 6194 }, { "epoch": 0.4, "grad_norm": 0.35781916837890215, "learning_rate": 6.894079417554655e-07, "loss": 0.0663, "step": 6195 }, { "epoch": 0.4, "grad_norm": 0.24015978873153349, "learning_rate": 6.893123605885931e-07, "loss": 0.0623, "step": 6196 }, { "epoch": 0.4, "grad_norm": 1.321693853670986, "learning_rate": 6.892167713452748e-07, "loss": 0.3533, "step": 6197 }, { "epoch": 0.4, "grad_norm": 0.48782737986281427, "learning_rate": 6.891211740295887e-07, "loss": 0.1524, "step": 6198 }, { "epoch": 0.4, "grad_norm": 0.11333829615908207, "learning_rate": 6.890255686456133e-07, "loss": 0.0053, "step": 6199 }, { "epoch": 0.4, "grad_norm": 0.9010655888481797, "learning_rate": 6.889299551974268e-07, "loss": 0.3647, "step": 6200 }, { "epoch": 0.4, "grad_norm": 0.5065096572806629, "learning_rate": 6.888343336891087e-07, "loss": 0.2117, "step": 6201 }, { "epoch": 0.4, "grad_norm": 5.14211318909038, "learning_rate": 6.887387041247384e-07, "loss": 0.1118, "step": 6202 }, { "epoch": 0.4, "grad_norm": 0.3927341176803692, "learning_rate": 6.886430665083955e-07, "loss": 0.0819, "step": 6203 }, { "epoch": 0.4, "grad_norm": 1.3924321143043787, "learning_rate": 6.885474208441601e-07, "loss": 0.127, "step": 6204 }, { "epoch": 0.4, "grad_norm": 0.4337249428796377, "learning_rate": 6.88451767136113e-07, "loss": 0.2451, "step": 6205 }, { "epoch": 0.4, "grad_norm": 0.6311639888403874, "learning_rate": 6.883561053883344e-07, "loss": 0.401, "step": 6206 }, { "epoch": 0.4, "grad_norm": 0.5139037001185284, "learning_rate": 6.882604356049058e-07, "loss": 0.1032, "step": 6207 }, { "epoch": 0.4, "grad_norm": 1.137946904279486, "learning_rate": 6.881647577899086e-07, "loss": 0.2727, "step": 6208 }, { "epoch": 0.4, "grad_norm": 1.146831100302851, "learning_rate": 6.880690719474245e-07, "loss": 0.0998, "step": 6209 }, { "epoch": 0.4, "grad_norm": 0.8310608478545772, "learning_rate": 6.879733780815357e-07, "loss": 0.1059, "step": 6210 }, { "epoch": 0.4, "grad_norm": 0.4979772168898703, "learning_rate": 6.878776761963248e-07, "loss": 0.0443, "step": 6211 }, { "epoch": 0.4, "grad_norm": 0.8899238443742646, "learning_rate": 6.877819662958743e-07, "loss": 0.292, "step": 6212 }, { "epoch": 0.4, "grad_norm": 0.4890042180484366, "learning_rate": 6.87686248384268e-07, "loss": 0.3342, "step": 6213 }, { "epoch": 0.4, "grad_norm": 0.6739477447958372, "learning_rate": 6.875905224655889e-07, "loss": 0.2706, "step": 6214 }, { "epoch": 0.4, "grad_norm": 0.5369417685652146, "learning_rate": 6.874947885439211e-07, "loss": 0.0872, "step": 6215 }, { "epoch": 0.4, "grad_norm": 0.12216211068175181, "learning_rate": 6.873990466233486e-07, "loss": 0.0638, "step": 6216 }, { "epoch": 0.4, "grad_norm": 1.9266919734418062, "learning_rate": 6.87303296707956e-07, "loss": 0.0298, "step": 6217 }, { "epoch": 0.4, "grad_norm": 0.9563162868912487, "learning_rate": 6.872075388018284e-07, "loss": 0.072, "step": 6218 }, { "epoch": 0.4, "grad_norm": 0.86366765529282, "learning_rate": 6.871117729090508e-07, "loss": 0.2834, "step": 6219 }, { "epoch": 0.4, "grad_norm": 1.183708277852859, "learning_rate": 6.870159990337086e-07, "loss": 0.0882, "step": 6220 }, { "epoch": 0.4, "grad_norm": 7.285448526287848, "learning_rate": 6.869202171798881e-07, "loss": 0.2821, "step": 6221 }, { "epoch": 0.4, "grad_norm": 1.9273322831659865, "learning_rate": 6.868244273516755e-07, "loss": 0.2233, "step": 6222 }, { "epoch": 0.4, "grad_norm": 8.179773481144634, "learning_rate": 6.867286295531571e-07, "loss": 0.2397, "step": 6223 }, { "epoch": 0.4, "grad_norm": 0.8681641867231622, "learning_rate": 6.8663282378842e-07, "loss": 0.1859, "step": 6224 }, { "epoch": 0.4, "grad_norm": 0.5727729885534764, "learning_rate": 6.865370100615515e-07, "loss": 0.265, "step": 6225 }, { "epoch": 0.4, "grad_norm": 0.30902591923894296, "learning_rate": 6.864411883766393e-07, "loss": 0.1712, "step": 6226 }, { "epoch": 0.4, "grad_norm": 1.1296518840080736, "learning_rate": 6.863453587377711e-07, "loss": 0.3299, "step": 6227 }, { "epoch": 0.4, "grad_norm": 3.940590276393374, "learning_rate": 6.862495211490352e-07, "loss": 0.1227, "step": 6228 }, { "epoch": 0.4, "grad_norm": 0.4717924375144069, "learning_rate": 6.861536756145205e-07, "loss": 0.183, "step": 6229 }, { "epoch": 0.4, "grad_norm": 2.908760307610912, "learning_rate": 6.860578221383155e-07, "loss": 0.0143, "step": 6230 }, { "epoch": 0.4, "grad_norm": 0.9651197902918841, "learning_rate": 6.859619607245101e-07, "loss": 0.1343, "step": 6231 }, { "epoch": 0.4, "grad_norm": 0.9555572567907259, "learning_rate": 6.858660913771934e-07, "loss": 0.0517, "step": 6232 }, { "epoch": 0.4, "grad_norm": 1.0019228992172688, "learning_rate": 6.857702141004558e-07, "loss": 0.1124, "step": 6233 }, { "epoch": 0.4, "grad_norm": 1.375353658408546, "learning_rate": 6.856743288983873e-07, "loss": 0.1988, "step": 6234 }, { "epoch": 0.4, "grad_norm": 1.5454114092696687, "learning_rate": 6.855784357750786e-07, "loss": 0.4377, "step": 6235 }, { "epoch": 0.4, "grad_norm": 0.2153189426097419, "learning_rate": 6.854825347346209e-07, "loss": 0.0625, "step": 6236 }, { "epoch": 0.4, "grad_norm": 5.931577506079022, "learning_rate": 6.853866257811054e-07, "loss": 0.0245, "step": 6237 }, { "epoch": 0.4, "grad_norm": 7.686939894574234, "learning_rate": 6.852907089186236e-07, "loss": 0.0939, "step": 6238 }, { "epoch": 0.4, "grad_norm": 0.8131809197901665, "learning_rate": 6.851947841512679e-07, "loss": 0.2382, "step": 6239 }, { "epoch": 0.4, "grad_norm": 2.497959138251734, "learning_rate": 6.850988514831303e-07, "loss": 0.0311, "step": 6240 }, { "epoch": 0.4, "grad_norm": 4.275365788480474, "learning_rate": 6.850029109183038e-07, "loss": 0.1505, "step": 6241 }, { "epoch": 0.4, "grad_norm": 0.2547152339384144, "learning_rate": 6.84906962460881e-07, "loss": 0.1546, "step": 6242 }, { "epoch": 0.4, "grad_norm": 0.3994564711131799, "learning_rate": 6.848110061149555e-07, "loss": 0.0257, "step": 6243 }, { "epoch": 0.4, "grad_norm": 1.0345026380540454, "learning_rate": 6.84715041884621e-07, "loss": 0.3793, "step": 6244 }, { "epoch": 0.4, "grad_norm": 0.43390028267813124, "learning_rate": 6.846190697739714e-07, "loss": 0.0355, "step": 6245 }, { "epoch": 0.4, "grad_norm": 0.4501212776602618, "learning_rate": 6.845230897871012e-07, "loss": 0.1599, "step": 6246 }, { "epoch": 0.4, "grad_norm": 2.580929022002219, "learning_rate": 6.84427101928105e-07, "loss": 0.15, "step": 6247 }, { "epoch": 0.4, "grad_norm": 0.7918955130027382, "learning_rate": 6.84331106201078e-07, "loss": 0.1475, "step": 6248 }, { "epoch": 0.4, "grad_norm": 0.7569238348928278, "learning_rate": 6.842351026101154e-07, "loss": 0.107, "step": 6249 }, { "epoch": 0.4, "grad_norm": 0.4995717604473063, "learning_rate": 6.84139091159313e-07, "loss": 0.2009, "step": 6250 }, { "epoch": 0.4, "grad_norm": 0.9157557633717965, "learning_rate": 6.840430718527667e-07, "loss": 0.003, "step": 6251 }, { "epoch": 0.4, "grad_norm": 0.7124468562585222, "learning_rate": 6.839470446945732e-07, "loss": 0.2808, "step": 6252 }, { "epoch": 0.4, "grad_norm": 1.2051956401988941, "learning_rate": 6.838510096888288e-07, "loss": 0.2728, "step": 6253 }, { "epoch": 0.4, "grad_norm": 0.5212681126804175, "learning_rate": 6.837549668396309e-07, "loss": 0.0165, "step": 6254 }, { "epoch": 0.4, "grad_norm": 0.7271535467615731, "learning_rate": 6.836589161510766e-07, "loss": 0.1922, "step": 6255 }, { "epoch": 0.4, "grad_norm": 1.9817213786470782, "learning_rate": 6.835628576272637e-07, "loss": 0.244, "step": 6256 }, { "epoch": 0.4, "grad_norm": 1.0020861976946314, "learning_rate": 6.834667912722904e-07, "loss": 0.2997, "step": 6257 }, { "epoch": 0.4, "grad_norm": 0.44966370145242784, "learning_rate": 6.833707170902549e-07, "loss": 0.1575, "step": 6258 }, { "epoch": 0.4, "grad_norm": 0.5248286096091686, "learning_rate": 6.832746350852561e-07, "loss": 0.2467, "step": 6259 }, { "epoch": 0.4, "grad_norm": 0.6208019392122317, "learning_rate": 6.831785452613927e-07, "loss": 0.2825, "step": 6260 }, { "epoch": 0.4, "grad_norm": 1.057503488458636, "learning_rate": 6.830824476227646e-07, "loss": 0.4394, "step": 6261 }, { "epoch": 0.4, "grad_norm": 0.4542964719792616, "learning_rate": 6.82986342173471e-07, "loss": 0.3187, "step": 6262 }, { "epoch": 0.4, "grad_norm": 1.0216584528054076, "learning_rate": 6.828902289176124e-07, "loss": 0.0623, "step": 6263 }, { "epoch": 0.4, "grad_norm": 0.3721115803693167, "learning_rate": 6.827941078592888e-07, "loss": 0.2275, "step": 6264 }, { "epoch": 0.4, "grad_norm": 0.6876065045926866, "learning_rate": 6.826979790026012e-07, "loss": 0.2629, "step": 6265 }, { "epoch": 0.4, "grad_norm": 0.5959189359618354, "learning_rate": 6.826018423516505e-07, "loss": 0.0692, "step": 6266 }, { "epoch": 0.4, "grad_norm": 1.0987326793714454, "learning_rate": 6.825056979105381e-07, "loss": 0.3505, "step": 6267 }, { "epoch": 0.4, "grad_norm": 2.5305220700049564, "learning_rate": 6.82409545683366e-07, "loss": 0.2905, "step": 6268 }, { "epoch": 0.4, "grad_norm": 0.9076133219416157, "learning_rate": 6.823133856742358e-07, "loss": 0.3421, "step": 6269 }, { "epoch": 0.4, "grad_norm": 1.0344516297215987, "learning_rate": 6.822172178872501e-07, "loss": 0.1861, "step": 6270 }, { "epoch": 0.4, "grad_norm": 0.6141548408873324, "learning_rate": 6.821210423265115e-07, "loss": 0.1325, "step": 6271 }, { "epoch": 0.4, "grad_norm": 12.259281383914091, "learning_rate": 6.820248589961231e-07, "loss": 0.0573, "step": 6272 }, { "epoch": 0.4, "grad_norm": 0.1936161211338814, "learning_rate": 6.819286679001883e-07, "loss": 0.0052, "step": 6273 }, { "epoch": 0.4, "grad_norm": 0.4761750848766932, "learning_rate": 6.818324690428109e-07, "loss": 0.0708, "step": 6274 }, { "epoch": 0.4, "grad_norm": 0.30141760885628427, "learning_rate": 6.817362624280948e-07, "loss": 0.1181, "step": 6275 }, { "epoch": 0.4, "grad_norm": 1.27387131059069, "learning_rate": 6.816400480601444e-07, "loss": 0.111, "step": 6276 }, { "epoch": 0.4, "grad_norm": 5.78459022870707, "learning_rate": 6.815438259430645e-07, "loss": 0.2472, "step": 6277 }, { "epoch": 0.4, "grad_norm": 0.9573281104683642, "learning_rate": 6.8144759608096e-07, "loss": 0.1301, "step": 6278 }, { "epoch": 0.4, "grad_norm": 5.028878427849481, "learning_rate": 6.813513584779362e-07, "loss": 0.2653, "step": 6279 }, { "epoch": 0.4, "grad_norm": 0.381367328172131, "learning_rate": 6.81255113138099e-07, "loss": 0.0072, "step": 6280 }, { "epoch": 0.4, "grad_norm": 1.4528935142791577, "learning_rate": 6.811588600655542e-07, "loss": 0.1284, "step": 6281 }, { "epoch": 0.4, "grad_norm": 4.827426492833447, "learning_rate": 6.810625992644083e-07, "loss": 0.156, "step": 6282 }, { "epoch": 0.4, "grad_norm": 0.5734812805451183, "learning_rate": 6.80966330738768e-07, "loss": 0.0737, "step": 6283 }, { "epoch": 0.4, "grad_norm": 0.4426379072669722, "learning_rate": 6.808700544927402e-07, "loss": 0.1289, "step": 6284 }, { "epoch": 0.4, "grad_norm": 0.5192769835580945, "learning_rate": 6.807737705304323e-07, "loss": 0.2355, "step": 6285 }, { "epoch": 0.4, "grad_norm": 0.6008560869470537, "learning_rate": 6.806774788559519e-07, "loss": 0.2676, "step": 6286 }, { "epoch": 0.4, "grad_norm": 0.9184380307491457, "learning_rate": 6.80581179473407e-07, "loss": 0.4086, "step": 6287 }, { "epoch": 0.4, "grad_norm": 0.24984547522423572, "learning_rate": 6.804848723869061e-07, "loss": 0.2237, "step": 6288 }, { "epoch": 0.4, "grad_norm": 0.5476870152107928, "learning_rate": 6.803885576005577e-07, "loss": 0.1659, "step": 6289 }, { "epoch": 0.4, "grad_norm": 1.5843547615793472, "learning_rate": 6.802922351184707e-07, "loss": 0.1208, "step": 6290 }, { "epoch": 0.4, "grad_norm": 0.36539251437592996, "learning_rate": 6.801959049447545e-07, "loss": 0.1999, "step": 6291 }, { "epoch": 0.4, "grad_norm": 8.163480671211163, "learning_rate": 6.800995670835188e-07, "loss": 0.1565, "step": 6292 }, { "epoch": 0.4, "grad_norm": 15.468156813312886, "learning_rate": 6.800032215388736e-07, "loss": 0.1801, "step": 6293 }, { "epoch": 0.4, "grad_norm": 0.5391643643765122, "learning_rate": 6.79906868314929e-07, "loss": 0.3423, "step": 6294 }, { "epoch": 0.4, "grad_norm": 1.1876241664389382, "learning_rate": 6.798105074157958e-07, "loss": 0.1654, "step": 6295 }, { "epoch": 0.4, "grad_norm": 0.8193904871120007, "learning_rate": 6.797141388455851e-07, "loss": 0.2495, "step": 6296 }, { "epoch": 0.4, "grad_norm": 0.3866432513975203, "learning_rate": 6.796177626084078e-07, "loss": 0.164, "step": 6297 }, { "epoch": 0.4, "grad_norm": 1.0882381860432704, "learning_rate": 6.795213787083755e-07, "loss": 0.1788, "step": 6298 }, { "epoch": 0.4, "grad_norm": 0.6830235621905923, "learning_rate": 6.794249871496005e-07, "loss": 0.3599, "step": 6299 }, { "epoch": 0.4, "grad_norm": 4.695270607090052, "learning_rate": 6.793285879361948e-07, "loss": 0.0877, "step": 6300 }, { "epoch": 0.4, "grad_norm": 0.7194837379626305, "learning_rate": 6.79232181072271e-07, "loss": 0.3207, "step": 6301 }, { "epoch": 0.4, "grad_norm": 0.9357192862350031, "learning_rate": 6.79135766561942e-07, "loss": 0.3821, "step": 6302 }, { "epoch": 0.4, "grad_norm": 0.5450760651607603, "learning_rate": 6.790393444093213e-07, "loss": 0.1295, "step": 6303 }, { "epoch": 0.4, "grad_norm": 0.5605722159660916, "learning_rate": 6.789429146185222e-07, "loss": 0.0503, "step": 6304 }, { "epoch": 0.4, "grad_norm": 1.223088798425785, "learning_rate": 6.788464771936585e-07, "loss": 0.5387, "step": 6305 }, { "epoch": 0.4, "grad_norm": 0.428314502941871, "learning_rate": 6.787500321388447e-07, "loss": 0.004, "step": 6306 }, { "epoch": 0.4, "grad_norm": 1.0015165823477454, "learning_rate": 6.786535794581951e-07, "loss": 0.1085, "step": 6307 }, { "epoch": 0.4, "grad_norm": 1.178132416984318, "learning_rate": 6.785571191558247e-07, "loss": 0.1638, "step": 6308 }, { "epoch": 0.4, "grad_norm": 0.7832864868245555, "learning_rate": 6.784606512358486e-07, "loss": 0.1841, "step": 6309 }, { "epoch": 0.4, "grad_norm": 1.2342575567020215, "learning_rate": 6.783641757023825e-07, "loss": 0.3512, "step": 6310 }, { "epoch": 0.4, "grad_norm": 0.02782269276322806, "learning_rate": 6.78267692559542e-07, "loss": 0.0007, "step": 6311 }, { "epoch": 0.4, "grad_norm": 0.9632128514314175, "learning_rate": 6.781712018114434e-07, "loss": 0.2452, "step": 6312 }, { "epoch": 0.4, "grad_norm": 0.9387459585892254, "learning_rate": 6.780747034622032e-07, "loss": 0.1231, "step": 6313 }, { "epoch": 0.4, "grad_norm": 0.6726956733244531, "learning_rate": 6.77978197515938e-07, "loss": 0.1672, "step": 6314 }, { "epoch": 0.4, "grad_norm": 0.8011892718369154, "learning_rate": 6.778816839767654e-07, "loss": 0.2529, "step": 6315 }, { "epoch": 0.4, "grad_norm": 1.654576615629492, "learning_rate": 6.777851628488022e-07, "loss": 0.1318, "step": 6316 }, { "epoch": 0.4, "grad_norm": 0.468742702068239, "learning_rate": 6.776886341361668e-07, "loss": 0.1593, "step": 6317 }, { "epoch": 0.4, "grad_norm": 0.8942879143321072, "learning_rate": 6.77592097842977e-07, "loss": 0.3876, "step": 6318 }, { "epoch": 0.4, "grad_norm": 0.6368684574518247, "learning_rate": 6.774955539733514e-07, "loss": 0.0998, "step": 6319 }, { "epoch": 0.4, "grad_norm": 0.7566350153543336, "learning_rate": 6.773990025314086e-07, "loss": 0.1656, "step": 6320 }, { "epoch": 0.4, "grad_norm": 0.731691102948197, "learning_rate": 6.773024435212677e-07, "loss": 0.0195, "step": 6321 }, { "epoch": 0.4, "grad_norm": 0.6343300683315546, "learning_rate": 6.772058769470483e-07, "loss": 0.2368, "step": 6322 }, { "epoch": 0.4, "grad_norm": 3.180929949863677, "learning_rate": 6.771093028128699e-07, "loss": 0.0249, "step": 6323 }, { "epoch": 0.4, "grad_norm": 2.20453988497507, "learning_rate": 6.770127211228526e-07, "loss": 0.1609, "step": 6324 }, { "epoch": 0.4, "grad_norm": 5.817803980307327, "learning_rate": 6.769161318811165e-07, "loss": 0.1692, "step": 6325 }, { "epoch": 0.4, "grad_norm": 0.6893446552490462, "learning_rate": 6.76819535091783e-07, "loss": 0.2116, "step": 6326 }, { "epoch": 0.4, "grad_norm": 7.248398235457054, "learning_rate": 6.767229307589723e-07, "loss": 0.1265, "step": 6327 }, { "epoch": 0.4, "grad_norm": 1.1731612265180096, "learning_rate": 6.766263188868063e-07, "loss": 0.4075, "step": 6328 }, { "epoch": 0.4, "grad_norm": 2.1095975619987564, "learning_rate": 6.765296994794065e-07, "loss": 0.341, "step": 6329 }, { "epoch": 0.4, "grad_norm": 1.232819800035905, "learning_rate": 6.764330725408948e-07, "loss": 0.4152, "step": 6330 }, { "epoch": 0.4, "grad_norm": 0.40808648334532943, "learning_rate": 6.763364380753936e-07, "loss": 0.0156, "step": 6331 }, { "epoch": 0.4, "grad_norm": 0.3680753344624959, "learning_rate": 6.762397960870255e-07, "loss": 0.1849, "step": 6332 }, { "epoch": 0.4, "grad_norm": 0.9161980014580507, "learning_rate": 6.761431465799133e-07, "loss": 0.2478, "step": 6333 }, { "epoch": 0.4, "grad_norm": 0.7829457137275232, "learning_rate": 6.760464895581804e-07, "loss": 0.3173, "step": 6334 }, { "epoch": 0.4, "grad_norm": 4.025450958864549, "learning_rate": 6.759498250259503e-07, "loss": 0.2282, "step": 6335 }, { "epoch": 0.4, "grad_norm": 0.622090011675234, "learning_rate": 6.75853152987347e-07, "loss": 0.2927, "step": 6336 }, { "epoch": 0.4, "grad_norm": 0.5802494290831592, "learning_rate": 6.757564734464948e-07, "loss": 0.1875, "step": 6337 }, { "epoch": 0.4, "grad_norm": 0.4361417755347666, "learning_rate": 6.756597864075181e-07, "loss": 0.05, "step": 6338 }, { "epoch": 0.4, "grad_norm": 2.166029941431076, "learning_rate": 6.755630918745417e-07, "loss": 0.166, "step": 6339 }, { "epoch": 0.4, "grad_norm": 0.7587754413586166, "learning_rate": 6.754663898516909e-07, "loss": 0.2583, "step": 6340 }, { "epoch": 0.4, "grad_norm": 0.35510815215948655, "learning_rate": 6.753696803430913e-07, "loss": 0.1415, "step": 6341 }, { "epoch": 0.4, "grad_norm": 0.942727015461539, "learning_rate": 6.752729633528684e-07, "loss": 0.296, "step": 6342 }, { "epoch": 0.4, "grad_norm": 0.9292487618931184, "learning_rate": 6.751762388851486e-07, "loss": 0.1719, "step": 6343 }, { "epoch": 0.4, "grad_norm": 0.5921404678657156, "learning_rate": 6.750795069440582e-07, "loss": 0.0233, "step": 6344 }, { "epoch": 0.4, "grad_norm": 1.063019208120071, "learning_rate": 6.749827675337243e-07, "loss": 0.1282, "step": 6345 }, { "epoch": 0.4, "grad_norm": 0.6059948271698828, "learning_rate": 6.748860206582738e-07, "loss": 0.3899, "step": 6346 }, { "epoch": 0.4, "grad_norm": 2.819963819050505, "learning_rate": 6.747892663218339e-07, "loss": 0.0624, "step": 6347 }, { "epoch": 0.4, "grad_norm": 1.087721441077157, "learning_rate": 6.746925045285327e-07, "loss": 0.0145, "step": 6348 }, { "epoch": 0.4, "grad_norm": 0.1544841294131863, "learning_rate": 6.745957352824979e-07, "loss": 0.0223, "step": 6349 }, { "epoch": 0.4, "grad_norm": 0.9026185574747579, "learning_rate": 6.744989585878582e-07, "loss": 0.0029, "step": 6350 }, { "epoch": 0.41, "grad_norm": 2.920157172626653, "learning_rate": 6.744021744487422e-07, "loss": 0.3463, "step": 6351 }, { "epoch": 0.41, "grad_norm": 1.8754340127993847, "learning_rate": 6.743053828692787e-07, "loss": 0.2398, "step": 6352 }, { "epoch": 0.41, "grad_norm": 1.125188320794302, "learning_rate": 6.742085838535972e-07, "loss": 0.2402, "step": 6353 }, { "epoch": 0.41, "grad_norm": 0.09075343192105742, "learning_rate": 6.741117774058271e-07, "loss": 0.0033, "step": 6354 }, { "epoch": 0.41, "grad_norm": 0.591763171934437, "learning_rate": 6.740149635300989e-07, "loss": 0.3425, "step": 6355 }, { "epoch": 0.41, "grad_norm": 0.9332673410017857, "learning_rate": 6.739181422305424e-07, "loss": 0.3002, "step": 6356 }, { "epoch": 0.41, "grad_norm": 0.6561116635671101, "learning_rate": 6.738213135112884e-07, "loss": 0.1401, "step": 6357 }, { "epoch": 0.41, "grad_norm": 0.5671755029146652, "learning_rate": 6.737244773764677e-07, "loss": 0.2599, "step": 6358 }, { "epoch": 0.41, "grad_norm": 0.9630947590414323, "learning_rate": 6.736276338302115e-07, "loss": 0.2172, "step": 6359 }, { "epoch": 0.41, "grad_norm": 1.1197194309821634, "learning_rate": 6.735307828766514e-07, "loss": 0.1828, "step": 6360 }, { "epoch": 0.41, "grad_norm": 0.4267255933308179, "learning_rate": 6.734339245199194e-07, "loss": 0.2289, "step": 6361 }, { "epoch": 0.41, "grad_norm": 0.061760439965799295, "learning_rate": 6.733370587641473e-07, "loss": 0.0005, "step": 6362 }, { "epoch": 0.41, "grad_norm": 1.0401649801124897, "learning_rate": 6.732401856134681e-07, "loss": 0.2416, "step": 6363 }, { "epoch": 0.41, "grad_norm": 1.1427021284904402, "learning_rate": 6.731433050720143e-07, "loss": 0.1625, "step": 6364 }, { "epoch": 0.41, "grad_norm": 0.7584669024460573, "learning_rate": 6.730464171439189e-07, "loss": 0.5968, "step": 6365 }, { "epoch": 0.41, "grad_norm": 0.947389546604894, "learning_rate": 6.729495218333156e-07, "loss": 0.3292, "step": 6366 }, { "epoch": 0.41, "grad_norm": 1.2671454166673617, "learning_rate": 6.728526191443379e-07, "loss": 0.3977, "step": 6367 }, { "epoch": 0.41, "grad_norm": 1.2807435233821538, "learning_rate": 6.7275570908112e-07, "loss": 0.1983, "step": 6368 }, { "epoch": 0.41, "grad_norm": 1.1223775443224318, "learning_rate": 6.726587916477963e-07, "loss": 0.2572, "step": 6369 }, { "epoch": 0.41, "grad_norm": 0.7796094429732707, "learning_rate": 6.725618668485015e-07, "loss": 0.2362, "step": 6370 }, { "epoch": 0.41, "grad_norm": 0.8569312131457432, "learning_rate": 6.724649346873705e-07, "loss": 0.0895, "step": 6371 }, { "epoch": 0.41, "grad_norm": 0.3162810885051341, "learning_rate": 6.723679951685387e-07, "loss": 0.1953, "step": 6372 }, { "epoch": 0.41, "grad_norm": 1.0160207518359947, "learning_rate": 6.722710482961418e-07, "loss": 0.1238, "step": 6373 }, { "epoch": 0.41, "grad_norm": 4.239643357815333, "learning_rate": 6.721740940743155e-07, "loss": 0.101, "step": 6374 }, { "epoch": 0.41, "grad_norm": 0.7511605584213781, "learning_rate": 6.720771325071965e-07, "loss": 0.2265, "step": 6375 }, { "epoch": 0.41, "grad_norm": 1.875460261507989, "learning_rate": 6.719801635989209e-07, "loss": 0.1086, "step": 6376 }, { "epoch": 0.41, "grad_norm": 2.21785125615534, "learning_rate": 6.71883187353626e-07, "loss": 0.0764, "step": 6377 }, { "epoch": 0.41, "grad_norm": 6.68719826755472, "learning_rate": 6.717862037754486e-07, "loss": 0.2404, "step": 6378 }, { "epoch": 0.41, "grad_norm": 6.571688675166393, "learning_rate": 6.716892128685264e-07, "loss": 0.1843, "step": 6379 }, { "epoch": 0.41, "grad_norm": 0.6185389766513376, "learning_rate": 6.715922146369973e-07, "loss": 0.3114, "step": 6380 }, { "epoch": 0.41, "grad_norm": 0.3319873843100709, "learning_rate": 6.714952090849995e-07, "loss": 0.1966, "step": 6381 }, { "epoch": 0.41, "grad_norm": 0.2561438784387966, "learning_rate": 6.713981962166712e-07, "loss": 0.1524, "step": 6382 }, { "epoch": 0.41, "grad_norm": 0.28233751339411856, "learning_rate": 6.713011760361513e-07, "loss": 0.1737, "step": 6383 }, { "epoch": 0.41, "grad_norm": 1.182331344622677, "learning_rate": 6.712041485475789e-07, "loss": 0.1968, "step": 6384 }, { "epoch": 0.41, "grad_norm": 2.972702489962563, "learning_rate": 6.711071137550934e-07, "loss": 0.2662, "step": 6385 }, { "epoch": 0.41, "grad_norm": 0.7964588870798529, "learning_rate": 6.710100716628344e-07, "loss": 0.0531, "step": 6386 }, { "epoch": 0.41, "grad_norm": 1.4359035223098955, "learning_rate": 6.709130222749419e-07, "loss": 0.2233, "step": 6387 }, { "epoch": 0.41, "grad_norm": 0.8217549713020542, "learning_rate": 6.708159655955563e-07, "loss": 0.4644, "step": 6388 }, { "epoch": 0.41, "grad_norm": 1.3280261803432758, "learning_rate": 6.707189016288184e-07, "loss": 0.4257, "step": 6389 }, { "epoch": 0.41, "grad_norm": 1.3041206893016233, "learning_rate": 6.706218303788688e-07, "loss": 0.1558, "step": 6390 }, { "epoch": 0.41, "grad_norm": 0.7141351038072162, "learning_rate": 6.70524751849849e-07, "loss": 0.4258, "step": 6391 }, { "epoch": 0.41, "grad_norm": 1.1079707205512248, "learning_rate": 6.704276660459007e-07, "loss": 0.3083, "step": 6392 }, { "epoch": 0.41, "grad_norm": 1.2512500793703196, "learning_rate": 6.703305729711652e-07, "loss": 0.3654, "step": 6393 }, { "epoch": 0.41, "grad_norm": 0.8928412086560439, "learning_rate": 6.702334726297852e-07, "loss": 0.0603, "step": 6394 }, { "epoch": 0.41, "grad_norm": 0.5169301511702493, "learning_rate": 6.70136365025903e-07, "loss": 0.1779, "step": 6395 }, { "epoch": 0.41, "grad_norm": 0.9494093079091375, "learning_rate": 6.700392501636616e-07, "loss": 0.2459, "step": 6396 }, { "epoch": 0.41, "grad_norm": 0.3858153397375629, "learning_rate": 6.699421280472037e-07, "loss": 0.1034, "step": 6397 }, { "epoch": 0.41, "grad_norm": 0.8402655369890025, "learning_rate": 6.698449986806731e-07, "loss": 0.0212, "step": 6398 }, { "epoch": 0.41, "grad_norm": 0.4053758742325924, "learning_rate": 6.697478620682136e-07, "loss": 0.0908, "step": 6399 }, { "epoch": 0.41, "grad_norm": 0.6037583497943138, "learning_rate": 6.696507182139689e-07, "loss": 0.0792, "step": 6400 }, { "epoch": 0.41, "grad_norm": 1.0995171185001902, "learning_rate": 6.695535671220835e-07, "loss": 0.2685, "step": 6401 }, { "epoch": 0.41, "grad_norm": 0.7153908915965723, "learning_rate": 6.694564087967022e-07, "loss": 0.1978, "step": 6402 }, { "epoch": 0.41, "grad_norm": 5.480691566042909, "learning_rate": 6.693592432419697e-07, "loss": 0.0978, "step": 6403 }, { "epoch": 0.41, "grad_norm": 0.3212146592714469, "learning_rate": 6.692620704620315e-07, "loss": 0.1111, "step": 6404 }, { "epoch": 0.41, "grad_norm": 0.9104411249066503, "learning_rate": 6.691648904610331e-07, "loss": 0.223, "step": 6405 }, { "epoch": 0.41, "grad_norm": 0.725987599836831, "learning_rate": 6.690677032431206e-07, "loss": 0.1841, "step": 6406 }, { "epoch": 0.41, "grad_norm": 0.167513301779837, "learning_rate": 6.689705088124397e-07, "loss": 0.0675, "step": 6407 }, { "epoch": 0.41, "grad_norm": 0.6746871248555427, "learning_rate": 6.688733071731375e-07, "loss": 0.2058, "step": 6408 }, { "epoch": 0.41, "grad_norm": 3.4429761523137734, "learning_rate": 6.687760983293605e-07, "loss": 0.3075, "step": 6409 }, { "epoch": 0.41, "grad_norm": 0.8222056307600253, "learning_rate": 6.686788822852557e-07, "loss": 0.2678, "step": 6410 }, { "epoch": 0.41, "grad_norm": 0.6403125674895418, "learning_rate": 6.685816590449708e-07, "loss": 0.1277, "step": 6411 }, { "epoch": 0.41, "grad_norm": 6.387080088376487, "learning_rate": 6.684844286126534e-07, "loss": 0.175, "step": 6412 }, { "epoch": 0.41, "grad_norm": 0.8035832009531326, "learning_rate": 6.683871909924516e-07, "loss": 0.164, "step": 6413 }, { "epoch": 0.41, "grad_norm": 0.7419355566774356, "learning_rate": 6.682899461885136e-07, "loss": 0.3492, "step": 6414 }, { "epoch": 0.41, "grad_norm": 0.7583044093642305, "learning_rate": 6.681926942049882e-07, "loss": 0.1684, "step": 6415 }, { "epoch": 0.41, "grad_norm": 1.540524158474532, "learning_rate": 6.680954350460247e-07, "loss": 0.2001, "step": 6416 }, { "epoch": 0.41, "grad_norm": 1.3589512671052704, "learning_rate": 6.679981687157717e-07, "loss": 0.1495, "step": 6417 }, { "epoch": 0.41, "grad_norm": 0.6716513360157987, "learning_rate": 6.679008952183791e-07, "loss": 0.2796, "step": 6418 }, { "epoch": 0.41, "grad_norm": 0.2655350732043996, "learning_rate": 6.67803614557997e-07, "loss": 0.1029, "step": 6419 }, { "epoch": 0.41, "grad_norm": 0.7685597814453371, "learning_rate": 6.677063267387752e-07, "loss": 0.078, "step": 6420 }, { "epoch": 0.41, "grad_norm": 0.37207951166087705, "learning_rate": 6.676090317648645e-07, "loss": 0.1047, "step": 6421 }, { "epoch": 0.41, "grad_norm": 0.6874716827952146, "learning_rate": 6.675117296404155e-07, "loss": 0.2572, "step": 6422 }, { "epoch": 0.41, "grad_norm": 2.1641264249149006, "learning_rate": 6.674144203695793e-07, "loss": 0.0321, "step": 6423 }, { "epoch": 0.41, "grad_norm": 0.783936055476026, "learning_rate": 6.673171039565075e-07, "loss": 0.3694, "step": 6424 }, { "epoch": 0.41, "grad_norm": 8.06002758520689, "learning_rate": 6.672197804053515e-07, "loss": 0.3867, "step": 6425 }, { "epoch": 0.41, "grad_norm": 0.1685342360811017, "learning_rate": 6.671224497202636e-07, "loss": 0.0897, "step": 6426 }, { "epoch": 0.41, "grad_norm": 3.2502365801792226, "learning_rate": 6.670251119053962e-07, "loss": 0.2772, "step": 6427 }, { "epoch": 0.41, "grad_norm": 0.8456925694631241, "learning_rate": 6.669277669649017e-07, "loss": 0.2723, "step": 6428 }, { "epoch": 0.41, "grad_norm": 0.2186130984426744, "learning_rate": 6.66830414902933e-07, "loss": 0.1497, "step": 6429 }, { "epoch": 0.41, "grad_norm": 1.672158032240156, "learning_rate": 6.667330557236435e-07, "loss": 0.2074, "step": 6430 }, { "epoch": 0.41, "grad_norm": 0.8898479913339835, "learning_rate": 6.666356894311866e-07, "loss": 0.0757, "step": 6431 }, { "epoch": 0.41, "grad_norm": 0.6753965207327566, "learning_rate": 6.665383160297162e-07, "loss": 0.2126, "step": 6432 }, { "epoch": 0.41, "grad_norm": 0.6426171782885012, "learning_rate": 6.664409355233867e-07, "loss": 0.2228, "step": 6433 }, { "epoch": 0.41, "grad_norm": 0.9163429384997365, "learning_rate": 6.66343547916352e-07, "loss": 0.2849, "step": 6434 }, { "epoch": 0.41, "grad_norm": 0.39900204553834123, "learning_rate": 6.662461532127673e-07, "loss": 0.133, "step": 6435 }, { "epoch": 0.41, "grad_norm": 0.5645169244933328, "learning_rate": 6.661487514167874e-07, "loss": 0.2357, "step": 6436 }, { "epoch": 0.41, "grad_norm": 1.0206422537663962, "learning_rate": 6.660513425325679e-07, "loss": 0.116, "step": 6437 }, { "epoch": 0.41, "grad_norm": 3.2466906121473285, "learning_rate": 6.659539265642642e-07, "loss": 0.0162, "step": 6438 }, { "epoch": 0.41, "grad_norm": 0.30870560313402473, "learning_rate": 6.658565035160325e-07, "loss": 0.1962, "step": 6439 }, { "epoch": 0.41, "grad_norm": 2.20218703221726, "learning_rate": 6.657590733920289e-07, "loss": 0.1525, "step": 6440 }, { "epoch": 0.41, "grad_norm": 0.23113188580448643, "learning_rate": 6.656616361964099e-07, "loss": 0.0881, "step": 6441 }, { "epoch": 0.41, "grad_norm": 0.49620995616366453, "learning_rate": 6.655641919333325e-07, "loss": 0.3741, "step": 6442 }, { "epoch": 0.41, "grad_norm": 0.5141069284573254, "learning_rate": 6.654667406069539e-07, "loss": 0.0724, "step": 6443 }, { "epoch": 0.41, "grad_norm": 0.7985520950661176, "learning_rate": 6.653692822214316e-07, "loss": 0.3261, "step": 6444 }, { "epoch": 0.41, "grad_norm": 0.8930237310932487, "learning_rate": 6.652718167809232e-07, "loss": 0.0418, "step": 6445 }, { "epoch": 0.41, "grad_norm": 0.8823951379759444, "learning_rate": 6.65174344289587e-07, "loss": 0.2082, "step": 6446 }, { "epoch": 0.41, "grad_norm": 1.3241973155351798, "learning_rate": 6.650768647515812e-07, "loss": 0.0664, "step": 6447 }, { "epoch": 0.41, "grad_norm": 0.6709408431913578, "learning_rate": 6.649793781710644e-07, "loss": 0.2816, "step": 6448 }, { "epoch": 0.41, "grad_norm": 0.7087149931517718, "learning_rate": 6.648818845521955e-07, "loss": 0.2196, "step": 6449 }, { "epoch": 0.41, "grad_norm": 0.7660689037529811, "learning_rate": 6.647843838991342e-07, "loss": 0.115, "step": 6450 }, { "epoch": 0.41, "grad_norm": 1.586727923499886, "learning_rate": 6.646868762160398e-07, "loss": 0.255, "step": 6451 }, { "epoch": 0.41, "grad_norm": 0.2765990842381686, "learning_rate": 6.645893615070722e-07, "loss": 0.0083, "step": 6452 }, { "epoch": 0.41, "grad_norm": 0.7482934174593467, "learning_rate": 6.644918397763914e-07, "loss": 0.2262, "step": 6453 }, { "epoch": 0.41, "grad_norm": 1.3505831849518155, "learning_rate": 6.643943110281583e-07, "loss": 0.1289, "step": 6454 }, { "epoch": 0.41, "grad_norm": 0.5051971188853868, "learning_rate": 6.642967752665333e-07, "loss": 0.1454, "step": 6455 }, { "epoch": 0.41, "grad_norm": 0.45116601841098775, "learning_rate": 6.641992324956775e-07, "loss": 0.1051, "step": 6456 }, { "epoch": 0.41, "grad_norm": 9.6467710453751, "learning_rate": 6.641016827197526e-07, "loss": 0.1847, "step": 6457 }, { "epoch": 0.41, "grad_norm": 0.8112876594939268, "learning_rate": 6.640041259429199e-07, "loss": 0.5393, "step": 6458 }, { "epoch": 0.41, "grad_norm": 0.8930198854204855, "learning_rate": 6.639065621693414e-07, "loss": 0.325, "step": 6459 }, { "epoch": 0.41, "grad_norm": 1.7377636794694982, "learning_rate": 6.638089914031794e-07, "loss": 0.3848, "step": 6460 }, { "epoch": 0.41, "grad_norm": 1.1674386532537577, "learning_rate": 6.637114136485968e-07, "loss": 0.298, "step": 6461 }, { "epoch": 0.41, "grad_norm": 0.598930227126228, "learning_rate": 6.636138289097561e-07, "loss": 0.1786, "step": 6462 }, { "epoch": 0.41, "grad_norm": 2.377681268256108, "learning_rate": 6.635162371908205e-07, "loss": 0.1953, "step": 6463 }, { "epoch": 0.41, "grad_norm": 12.51374160520077, "learning_rate": 6.634186384959536e-07, "loss": 0.1956, "step": 6464 }, { "epoch": 0.41, "grad_norm": 1.5770264650314494, "learning_rate": 6.63321032829319e-07, "loss": 0.1165, "step": 6465 }, { "epoch": 0.41, "grad_norm": 1.4165259930628278, "learning_rate": 6.632234201950808e-07, "loss": 0.0081, "step": 6466 }, { "epoch": 0.41, "grad_norm": 1.5479311939948879, "learning_rate": 6.631258005974034e-07, "loss": 0.1396, "step": 6467 }, { "epoch": 0.41, "grad_norm": 0.7595715487242493, "learning_rate": 6.630281740404513e-07, "loss": 0.3016, "step": 6468 }, { "epoch": 0.41, "grad_norm": 0.39108477600863556, "learning_rate": 6.629305405283897e-07, "loss": 0.3183, "step": 6469 }, { "epoch": 0.41, "grad_norm": 5.949277306145099, "learning_rate": 6.628329000653837e-07, "loss": 0.1225, "step": 6470 }, { "epoch": 0.41, "grad_norm": 0.7967134536685371, "learning_rate": 6.62735252655599e-07, "loss": 0.2556, "step": 6471 }, { "epoch": 0.41, "grad_norm": 1.3221524500716266, "learning_rate": 6.62637598303201e-07, "loss": 0.4512, "step": 6472 }, { "epoch": 0.41, "grad_norm": 5.538877138746729, "learning_rate": 6.625399370123562e-07, "loss": 0.3627, "step": 6473 }, { "epoch": 0.41, "grad_norm": 2.466976161261532, "learning_rate": 6.624422687872311e-07, "loss": 0.0839, "step": 6474 }, { "epoch": 0.41, "grad_norm": 1.0441176222594615, "learning_rate": 6.623445936319922e-07, "loss": 0.3385, "step": 6475 }, { "epoch": 0.41, "grad_norm": 0.9286328959903735, "learning_rate": 6.622469115508065e-07, "loss": 0.3544, "step": 6476 }, { "epoch": 0.41, "grad_norm": 0.8921096068236626, "learning_rate": 6.621492225478413e-07, "loss": 0.217, "step": 6477 }, { "epoch": 0.41, "grad_norm": 0.527601776872973, "learning_rate": 6.620515266272645e-07, "loss": 0.2838, "step": 6478 }, { "epoch": 0.41, "grad_norm": 0.5407391774707727, "learning_rate": 6.619538237932437e-07, "loss": 0.0094, "step": 6479 }, { "epoch": 0.41, "grad_norm": 3.0682816568887366, "learning_rate": 6.618561140499472e-07, "loss": 0.1509, "step": 6480 }, { "epoch": 0.41, "grad_norm": 0.767621032382869, "learning_rate": 6.617583974015436e-07, "loss": 0.4076, "step": 6481 }, { "epoch": 0.41, "grad_norm": 0.8183856655656446, "learning_rate": 6.616606738522016e-07, "loss": 0.2044, "step": 6482 }, { "epoch": 0.41, "grad_norm": 0.29348998718732544, "learning_rate": 6.615629434060902e-07, "loss": 0.0747, "step": 6483 }, { "epoch": 0.41, "grad_norm": 0.316376771796467, "learning_rate": 6.614652060673789e-07, "loss": 0.2296, "step": 6484 }, { "epoch": 0.41, "grad_norm": 0.49404427248597543, "learning_rate": 6.613674618402373e-07, "loss": 0.3097, "step": 6485 }, { "epoch": 0.41, "grad_norm": 0.635776322474302, "learning_rate": 6.612697107288352e-07, "loss": 0.1519, "step": 6486 }, { "epoch": 0.41, "grad_norm": 0.4708852073170756, "learning_rate": 6.611719527373433e-07, "loss": 0.0105, "step": 6487 }, { "epoch": 0.41, "grad_norm": 1.6560381143197433, "learning_rate": 6.610741878699319e-07, "loss": 0.2706, "step": 6488 }, { "epoch": 0.41, "grad_norm": 0.683823648475233, "learning_rate": 6.609764161307718e-07, "loss": 0.345, "step": 6489 }, { "epoch": 0.41, "grad_norm": 0.5789976564007079, "learning_rate": 6.608786375240342e-07, "loss": 0.2959, "step": 6490 }, { "epoch": 0.41, "grad_norm": 0.47281055515844767, "learning_rate": 6.607808520538904e-07, "loss": 0.0035, "step": 6491 }, { "epoch": 0.41, "grad_norm": 1.0032031279807376, "learning_rate": 6.606830597245123e-07, "loss": 0.1048, "step": 6492 }, { "epoch": 0.41, "grad_norm": 0.6261579156018745, "learning_rate": 6.605852605400719e-07, "loss": 0.1656, "step": 6493 }, { "epoch": 0.41, "grad_norm": 0.732854958715936, "learning_rate": 6.604874545047414e-07, "loss": 0.2638, "step": 6494 }, { "epoch": 0.41, "grad_norm": 3.780507289556038, "learning_rate": 6.603896416226935e-07, "loss": 0.4101, "step": 6495 }, { "epoch": 0.41, "grad_norm": 1.3972146105870629, "learning_rate": 6.60291821898101e-07, "loss": 0.2275, "step": 6496 }, { "epoch": 0.41, "grad_norm": 0.9831244538642729, "learning_rate": 6.601939953351373e-07, "loss": 0.3621, "step": 6497 }, { "epoch": 0.41, "grad_norm": 0.9525721240066384, "learning_rate": 6.600961619379757e-07, "loss": 0.1266, "step": 6498 }, { "epoch": 0.41, "grad_norm": 2.801474755548807, "learning_rate": 6.599983217107899e-07, "loss": 0.0536, "step": 6499 }, { "epoch": 0.41, "grad_norm": 0.9934987509011964, "learning_rate": 6.599004746577541e-07, "loss": 0.1469, "step": 6500 }, { "epoch": 0.41, "grad_norm": 4.473780673185841, "learning_rate": 6.598026207830427e-07, "loss": 0.206, "step": 6501 }, { "epoch": 0.41, "grad_norm": 0.5678875570797841, "learning_rate": 6.597047600908301e-07, "loss": 0.1189, "step": 6502 }, { "epoch": 0.41, "grad_norm": 0.981066779593897, "learning_rate": 6.596068925852915e-07, "loss": 0.2041, "step": 6503 }, { "epoch": 0.41, "grad_norm": 1.9243978928505816, "learning_rate": 6.59509018270602e-07, "loss": 0.303, "step": 6504 }, { "epoch": 0.41, "grad_norm": 0.6919267581567077, "learning_rate": 6.594111371509371e-07, "loss": 0.2919, "step": 6505 }, { "epoch": 0.41, "grad_norm": 1.7933123534219113, "learning_rate": 6.593132492304727e-07, "loss": 0.3159, "step": 6506 }, { "epoch": 0.41, "grad_norm": 2.729456866562622, "learning_rate": 6.592153545133847e-07, "loss": 0.0478, "step": 6507 }, { "epoch": 0.42, "grad_norm": 1.3389561586035512, "learning_rate": 6.591174530038497e-07, "loss": 0.163, "step": 6508 }, { "epoch": 0.42, "grad_norm": 0.3966142880288903, "learning_rate": 6.590195447060442e-07, "loss": 0.1249, "step": 6509 }, { "epoch": 0.42, "grad_norm": 0.47325938802164785, "learning_rate": 6.589216296241454e-07, "loss": 0.1079, "step": 6510 }, { "epoch": 0.42, "grad_norm": 0.38768715086358946, "learning_rate": 6.588237077623305e-07, "loss": 0.1644, "step": 6511 }, { "epoch": 0.42, "grad_norm": 0.9449971313198207, "learning_rate": 6.587257791247767e-07, "loss": 0.3849, "step": 6512 }, { "epoch": 0.42, "grad_norm": 0.4173316413548671, "learning_rate": 6.586278437156621e-07, "loss": 0.1504, "step": 6513 }, { "epoch": 0.42, "grad_norm": 0.7588325020050893, "learning_rate": 6.585299015391648e-07, "loss": 0.3681, "step": 6514 }, { "epoch": 0.42, "grad_norm": 7.643512989101004, "learning_rate": 6.584319525994633e-07, "loss": 0.1995, "step": 6515 }, { "epoch": 0.42, "grad_norm": 6.448581609866346, "learning_rate": 6.583339969007363e-07, "loss": 0.1167, "step": 6516 }, { "epoch": 0.42, "grad_norm": 7.179579918401583, "learning_rate": 6.582360344471626e-07, "loss": 0.03, "step": 6517 }, { "epoch": 0.42, "grad_norm": 0.9476712906991313, "learning_rate": 6.581380652429215e-07, "loss": 0.0128, "step": 6518 }, { "epoch": 0.42, "grad_norm": 0.5618076951985692, "learning_rate": 6.580400892921928e-07, "loss": 0.0394, "step": 6519 }, { "epoch": 0.42, "grad_norm": 0.7208144385368301, "learning_rate": 6.579421065991562e-07, "loss": 0.1988, "step": 6520 }, { "epoch": 0.42, "grad_norm": 0.6700607999321094, "learning_rate": 6.578441171679916e-07, "loss": 0.1638, "step": 6521 }, { "epoch": 0.42, "grad_norm": 0.6036084279517975, "learning_rate": 6.577461210028798e-07, "loss": 0.104, "step": 6522 }, { "epoch": 0.42, "grad_norm": 1.2579511343596985, "learning_rate": 6.576481181080014e-07, "loss": 0.2006, "step": 6523 }, { "epoch": 0.42, "grad_norm": 5.995596720419675, "learning_rate": 6.575501084875373e-07, "loss": 0.0842, "step": 6524 }, { "epoch": 0.42, "grad_norm": 0.7921197772652121, "learning_rate": 6.574520921456687e-07, "loss": 0.1853, "step": 6525 }, { "epoch": 0.42, "grad_norm": 0.8754186202166979, "learning_rate": 6.573540690865777e-07, "loss": 0.2886, "step": 6526 }, { "epoch": 0.42, "grad_norm": 0.6026451594831287, "learning_rate": 6.572560393144456e-07, "loss": 0.1946, "step": 6527 }, { "epoch": 0.42, "grad_norm": 0.9409002386965092, "learning_rate": 6.571580028334546e-07, "loss": 0.298, "step": 6528 }, { "epoch": 0.42, "grad_norm": 0.6088540029934685, "learning_rate": 6.570599596477874e-07, "loss": 0.0904, "step": 6529 }, { "epoch": 0.42, "grad_norm": 0.4002230965927323, "learning_rate": 6.569619097616268e-07, "loss": 0.0855, "step": 6530 }, { "epoch": 0.42, "grad_norm": 0.3362088504980511, "learning_rate": 6.568638531791554e-07, "loss": 0.0082, "step": 6531 }, { "epoch": 0.42, "grad_norm": 1.3475426728193038, "learning_rate": 6.567657899045566e-07, "loss": 0.0606, "step": 6532 }, { "epoch": 0.42, "grad_norm": 5.105330272258604, "learning_rate": 6.566677199420142e-07, "loss": 0.3037, "step": 6533 }, { "epoch": 0.42, "grad_norm": 1.7076940168194243, "learning_rate": 6.565696432957119e-07, "loss": 0.2003, "step": 6534 }, { "epoch": 0.42, "grad_norm": 0.9592712351857866, "learning_rate": 6.564715599698338e-07, "loss": 0.154, "step": 6535 }, { "epoch": 0.42, "grad_norm": 1.0719706917957044, "learning_rate": 6.563734699685646e-07, "loss": 0.3982, "step": 6536 }, { "epoch": 0.42, "grad_norm": 0.5285085284835996, "learning_rate": 6.562753732960886e-07, "loss": 0.2912, "step": 6537 }, { "epoch": 0.42, "grad_norm": 0.709800823003546, "learning_rate": 6.56177269956591e-07, "loss": 0.3394, "step": 6538 }, { "epoch": 0.42, "grad_norm": 0.8931223092415999, "learning_rate": 6.560791599542572e-07, "loss": 0.1268, "step": 6539 }, { "epoch": 0.42, "grad_norm": 9.20830405696897, "learning_rate": 6.559810432932727e-07, "loss": 0.1853, "step": 6540 }, { "epoch": 0.42, "grad_norm": 0.6305902483585847, "learning_rate": 6.558829199778233e-07, "loss": 0.2657, "step": 6541 }, { "epoch": 0.42, "grad_norm": 0.7724616563658449, "learning_rate": 6.557847900120952e-07, "loss": 0.2717, "step": 6542 }, { "epoch": 0.42, "grad_norm": 0.4791742121706651, "learning_rate": 6.55686653400275e-07, "loss": 0.134, "step": 6543 }, { "epoch": 0.42, "grad_norm": 0.9442269367318035, "learning_rate": 6.555885101465489e-07, "loss": 0.2108, "step": 6544 }, { "epoch": 0.42, "grad_norm": 0.4039040723449807, "learning_rate": 6.554903602551043e-07, "loss": 0.1719, "step": 6545 }, { "epoch": 0.42, "grad_norm": 1.3915708401672107, "learning_rate": 6.553922037301283e-07, "loss": 0.3847, "step": 6546 }, { "epoch": 0.42, "grad_norm": 0.38248881133172413, "learning_rate": 6.552940405758084e-07, "loss": 0.0838, "step": 6547 }, { "epoch": 0.42, "grad_norm": 2.5619254010366976, "learning_rate": 6.551958707963328e-07, "loss": 0.226, "step": 6548 }, { "epoch": 0.42, "grad_norm": 1.2534025099978723, "learning_rate": 6.550976943958891e-07, "loss": 0.1677, "step": 6549 }, { "epoch": 0.42, "grad_norm": 0.831115820788772, "learning_rate": 6.549995113786662e-07, "loss": 0.1906, "step": 6550 }, { "epoch": 0.42, "grad_norm": 7.0313492870443195, "learning_rate": 6.549013217488525e-07, "loss": 0.2437, "step": 6551 }, { "epoch": 0.42, "grad_norm": 2.123844449278096, "learning_rate": 6.54803125510637e-07, "loss": 0.1735, "step": 6552 }, { "epoch": 0.42, "grad_norm": 0.9805224328146542, "learning_rate": 6.547049226682089e-07, "loss": 0.2336, "step": 6553 }, { "epoch": 0.42, "grad_norm": 0.6122253035320749, "learning_rate": 6.546067132257579e-07, "loss": 0.1658, "step": 6554 }, { "epoch": 0.42, "grad_norm": 0.9216797554993842, "learning_rate": 6.545084971874736e-07, "loss": 0.2078, "step": 6555 }, { "epoch": 0.42, "grad_norm": 0.6379449880822752, "learning_rate": 6.544102745575463e-07, "loss": 0.2793, "step": 6556 }, { "epoch": 0.42, "grad_norm": 2.3316942329819206, "learning_rate": 6.543120453401664e-07, "loss": 0.0257, "step": 6557 }, { "epoch": 0.42, "grad_norm": 0.3965400725147025, "learning_rate": 6.542138095395243e-07, "loss": 0.0984, "step": 6558 }, { "epoch": 0.42, "grad_norm": 1.2872385639535027, "learning_rate": 6.541155671598111e-07, "loss": 0.248, "step": 6559 }, { "epoch": 0.42, "grad_norm": 4.246088391524185, "learning_rate": 6.54017318205218e-07, "loss": 0.1558, "step": 6560 }, { "epoch": 0.42, "grad_norm": 0.6877516861733453, "learning_rate": 6.539190626799364e-07, "loss": 0.0134, "step": 6561 }, { "epoch": 0.42, "grad_norm": 0.443626408272202, "learning_rate": 6.538208005881583e-07, "loss": 0.1173, "step": 6562 }, { "epoch": 0.42, "grad_norm": 1.273995148271526, "learning_rate": 6.537225319340757e-07, "loss": 0.197, "step": 6563 }, { "epoch": 0.42, "grad_norm": 0.5114725362639235, "learning_rate": 6.536242567218807e-07, "loss": 0.2292, "step": 6564 }, { "epoch": 0.42, "grad_norm": 2.243953362163476, "learning_rate": 6.53525974955766e-07, "loss": 0.0133, "step": 6565 }, { "epoch": 0.42, "grad_norm": 0.5488081363315258, "learning_rate": 6.534276866399247e-07, "loss": 0.0538, "step": 6566 }, { "epoch": 0.42, "grad_norm": 0.9886178392048715, "learning_rate": 6.5332939177855e-07, "loss": 0.2797, "step": 6567 }, { "epoch": 0.42, "grad_norm": 0.8601829829661732, "learning_rate": 6.53231090375835e-07, "loss": 0.3236, "step": 6568 }, { "epoch": 0.42, "grad_norm": 0.5067175979204344, "learning_rate": 6.531327824359738e-07, "loss": 0.2217, "step": 6569 }, { "epoch": 0.42, "grad_norm": 0.75280071450143, "learning_rate": 6.530344679631602e-07, "loss": 0.2105, "step": 6570 }, { "epoch": 0.42, "grad_norm": 0.9963648393536968, "learning_rate": 6.529361469615887e-07, "loss": 0.2076, "step": 6571 }, { "epoch": 0.42, "grad_norm": 1.120861879069957, "learning_rate": 6.528378194354536e-07, "loss": 0.1174, "step": 6572 }, { "epoch": 0.42, "grad_norm": 0.4431707215135499, "learning_rate": 6.527394853889499e-07, "loss": 0.0998, "step": 6573 }, { "epoch": 0.42, "grad_norm": 0.09461529127731924, "learning_rate": 6.526411448262726e-07, "loss": 0.005, "step": 6574 }, { "epoch": 0.42, "grad_norm": 0.8224445636242497, "learning_rate": 6.525427977516173e-07, "loss": 0.4131, "step": 6575 }, { "epoch": 0.42, "grad_norm": 0.6901181559297348, "learning_rate": 6.524444441691795e-07, "loss": 0.3649, "step": 6576 }, { "epoch": 0.42, "grad_norm": 0.35761199002053246, "learning_rate": 6.523460840831554e-07, "loss": 0.2469, "step": 6577 }, { "epoch": 0.42, "grad_norm": 1.5334737367261122, "learning_rate": 6.522477174977411e-07, "loss": 0.0843, "step": 6578 }, { "epoch": 0.42, "grad_norm": 0.5179009731488441, "learning_rate": 6.52149344417133e-07, "loss": 0.0151, "step": 6579 }, { "epoch": 0.42, "grad_norm": 0.6488619418859829, "learning_rate": 6.520509648455282e-07, "loss": 0.2846, "step": 6580 }, { "epoch": 0.42, "grad_norm": 0.3682507192636819, "learning_rate": 6.519525787871234e-07, "loss": 0.095, "step": 6581 }, { "epoch": 0.42, "grad_norm": 1.1212866152391388, "learning_rate": 6.518541862461162e-07, "loss": 0.0726, "step": 6582 }, { "epoch": 0.42, "grad_norm": 8.54516622159933, "learning_rate": 6.517557872267041e-07, "loss": 0.1133, "step": 6583 }, { "epoch": 0.42, "grad_norm": 0.6169215072686566, "learning_rate": 6.516573817330851e-07, "loss": 0.1888, "step": 6584 }, { "epoch": 0.42, "grad_norm": 0.6833676158879121, "learning_rate": 6.515589697694574e-07, "loss": 0.1507, "step": 6585 }, { "epoch": 0.42, "grad_norm": 0.5188710826658951, "learning_rate": 6.514605513400193e-07, "loss": 0.4211, "step": 6586 }, { "epoch": 0.42, "grad_norm": 0.3199304249283735, "learning_rate": 6.513621264489696e-07, "loss": 0.2161, "step": 6587 }, { "epoch": 0.42, "grad_norm": 0.9024744484290665, "learning_rate": 6.512636951005073e-07, "loss": 0.2223, "step": 6588 }, { "epoch": 0.42, "grad_norm": 1.2358497207831372, "learning_rate": 6.511652572988316e-07, "loss": 0.2116, "step": 6589 }, { "epoch": 0.42, "grad_norm": 0.790714556211648, "learning_rate": 6.510668130481423e-07, "loss": 0.2035, "step": 6590 }, { "epoch": 0.42, "grad_norm": 0.5837610043799196, "learning_rate": 6.50968362352639e-07, "loss": 0.2612, "step": 6591 }, { "epoch": 0.42, "grad_norm": 2.1322110048786116, "learning_rate": 6.508699052165218e-07, "loss": 0.0731, "step": 6592 }, { "epoch": 0.42, "grad_norm": 0.4869642528488591, "learning_rate": 6.507714416439914e-07, "loss": 0.1773, "step": 6593 }, { "epoch": 0.42, "grad_norm": 0.46931346360923487, "learning_rate": 6.506729716392479e-07, "loss": 0.1304, "step": 6594 }, { "epoch": 0.42, "grad_norm": 0.6495464461000598, "learning_rate": 6.505744952064927e-07, "loss": 0.211, "step": 6595 }, { "epoch": 0.42, "grad_norm": 0.6816189436042995, "learning_rate": 6.504760123499269e-07, "loss": 0.2026, "step": 6596 }, { "epoch": 0.42, "grad_norm": 0.5359782743913805, "learning_rate": 6.503775230737518e-07, "loss": 0.1804, "step": 6597 }, { "epoch": 0.42, "grad_norm": 0.6466741031098954, "learning_rate": 6.502790273821694e-07, "loss": 0.2872, "step": 6598 }, { "epoch": 0.42, "grad_norm": 3.059231198735174, "learning_rate": 6.501805252793817e-07, "loss": 0.2318, "step": 6599 }, { "epoch": 0.42, "grad_norm": 0.1495449829824636, "learning_rate": 6.500820167695905e-07, "loss": 0.0118, "step": 6600 }, { "epoch": 0.42, "grad_norm": 0.6556357119339661, "learning_rate": 6.49983501856999e-07, "loss": 0.2049, "step": 6601 }, { "epoch": 0.42, "grad_norm": 0.19084012033555592, "learning_rate": 6.498849805458098e-07, "loss": 0.0892, "step": 6602 }, { "epoch": 0.42, "grad_norm": 0.7542708776617518, "learning_rate": 6.49786452840226e-07, "loss": 0.0684, "step": 6603 }, { "epoch": 0.42, "grad_norm": 1.130488531154757, "learning_rate": 6.496879187444509e-07, "loss": 0.0361, "step": 6604 }, { "epoch": 0.42, "grad_norm": 0.12030908783750106, "learning_rate": 6.495893782626884e-07, "loss": 0.0038, "step": 6605 }, { "epoch": 0.42, "grad_norm": 4.472643673466841, "learning_rate": 6.494908313991424e-07, "loss": 0.027, "step": 6606 }, { "epoch": 0.42, "grad_norm": 6.756578264745721, "learning_rate": 6.493922781580169e-07, "loss": 0.1752, "step": 6607 }, { "epoch": 0.42, "grad_norm": 0.47065007556631094, "learning_rate": 6.492937185435165e-07, "loss": 0.0365, "step": 6608 }, { "epoch": 0.42, "grad_norm": 0.37773434203172523, "learning_rate": 6.491951525598461e-07, "loss": 0.1043, "step": 6609 }, { "epoch": 0.42, "grad_norm": 16.170231038508277, "learning_rate": 6.490965802112103e-07, "loss": 0.0348, "step": 6610 }, { "epoch": 0.42, "grad_norm": 3.2153154323505864, "learning_rate": 6.489980015018147e-07, "loss": 0.2384, "step": 6611 }, { "epoch": 0.42, "grad_norm": 1.1887446464012499, "learning_rate": 6.488994164358651e-07, "loss": 0.4168, "step": 6612 }, { "epoch": 0.42, "grad_norm": 0.5097010072238237, "learning_rate": 6.488008250175669e-07, "loss": 0.0474, "step": 6613 }, { "epoch": 0.42, "grad_norm": 1.7804920096956791, "learning_rate": 6.487022272511264e-07, "loss": 0.1254, "step": 6614 }, { "epoch": 0.42, "grad_norm": 0.5381319677601095, "learning_rate": 6.486036231407499e-07, "loss": 0.0628, "step": 6615 }, { "epoch": 0.42, "grad_norm": 0.3956450165002046, "learning_rate": 6.485050126906442e-07, "loss": 0.1665, "step": 6616 }, { "epoch": 0.42, "grad_norm": 0.6006757979680589, "learning_rate": 6.48406395905016e-07, "loss": 0.1678, "step": 6617 }, { "epoch": 0.42, "grad_norm": 0.5749528585225581, "learning_rate": 6.483077727880726e-07, "loss": 0.2657, "step": 6618 }, { "epoch": 0.42, "grad_norm": 2.546013603761911, "learning_rate": 6.482091433440215e-07, "loss": 0.1231, "step": 6619 }, { "epoch": 0.42, "grad_norm": 0.6563493718514495, "learning_rate": 6.481105075770705e-07, "loss": 0.3289, "step": 6620 }, { "epoch": 0.42, "grad_norm": 0.3557252399707117, "learning_rate": 6.480118654914275e-07, "loss": 0.2553, "step": 6621 }, { "epoch": 0.42, "grad_norm": 1.3836554168728923, "learning_rate": 6.479132170913009e-07, "loss": 0.2631, "step": 6622 }, { "epoch": 0.42, "grad_norm": 0.49305934072685575, "learning_rate": 6.478145623808988e-07, "loss": 0.2529, "step": 6623 }, { "epoch": 0.42, "grad_norm": 0.9065870549860774, "learning_rate": 6.477159013644306e-07, "loss": 0.2566, "step": 6624 }, { "epoch": 0.42, "grad_norm": 1.220369398802296, "learning_rate": 6.476172340461051e-07, "loss": 0.3382, "step": 6625 }, { "epoch": 0.42, "grad_norm": 0.5111977616318292, "learning_rate": 6.475185604301314e-07, "loss": 0.1724, "step": 6626 }, { "epoch": 0.42, "grad_norm": 0.4111482409555583, "learning_rate": 6.474198805207196e-07, "loss": 0.1109, "step": 6627 }, { "epoch": 0.42, "grad_norm": 15.147696513130137, "learning_rate": 6.473211943220792e-07, "loss": 0.1227, "step": 6628 }, { "epoch": 0.42, "grad_norm": 0.9306742720330724, "learning_rate": 6.472225018384205e-07, "loss": 0.2259, "step": 6629 }, { "epoch": 0.42, "grad_norm": 0.26695730537907403, "learning_rate": 6.471238030739541e-07, "loss": 0.191, "step": 6630 }, { "epoch": 0.42, "grad_norm": 1.1497587790002861, "learning_rate": 6.470250980328903e-07, "loss": 0.3335, "step": 6631 }, { "epoch": 0.42, "grad_norm": 0.5381242770023748, "learning_rate": 6.469263867194404e-07, "loss": 0.0837, "step": 6632 }, { "epoch": 0.42, "grad_norm": 1.1718839718459892, "learning_rate": 6.468276691378154e-07, "loss": 0.2356, "step": 6633 }, { "epoch": 0.42, "grad_norm": 1.0734400553710504, "learning_rate": 6.467289452922268e-07, "loss": 0.229, "step": 6634 }, { "epoch": 0.42, "grad_norm": 1.4721460541689104, "learning_rate": 6.466302151868865e-07, "loss": 0.3775, "step": 6635 }, { "epoch": 0.42, "grad_norm": 0.47354152135182404, "learning_rate": 6.465314788260065e-07, "loss": 0.3041, "step": 6636 }, { "epoch": 0.42, "grad_norm": 1.213183190688514, "learning_rate": 6.46432736213799e-07, "loss": 0.0726, "step": 6637 }, { "epoch": 0.42, "grad_norm": 4.691683528170075, "learning_rate": 6.463339873544766e-07, "loss": 0.0671, "step": 6638 }, { "epoch": 0.42, "grad_norm": 0.7382855304101535, "learning_rate": 6.462352322522523e-07, "loss": 0.3539, "step": 6639 }, { "epoch": 0.42, "grad_norm": 2.0412966731957, "learning_rate": 6.461364709113389e-07, "loss": 0.1656, "step": 6640 }, { "epoch": 0.42, "grad_norm": 0.9203636723708595, "learning_rate": 6.460377033359499e-07, "loss": 0.2078, "step": 6641 }, { "epoch": 0.42, "grad_norm": 0.5885890438419307, "learning_rate": 6.459389295302989e-07, "loss": 0.0815, "step": 6642 }, { "epoch": 0.42, "grad_norm": 0.7677984394111165, "learning_rate": 6.458401494985997e-07, "loss": 0.3011, "step": 6643 }, { "epoch": 0.42, "grad_norm": 0.2514408018419238, "learning_rate": 6.457413632450666e-07, "loss": 0.0802, "step": 6644 }, { "epoch": 0.42, "grad_norm": 1.9141602880990805, "learning_rate": 6.45642570773914e-07, "loss": 0.2407, "step": 6645 }, { "epoch": 0.42, "grad_norm": 1.0567396478833757, "learning_rate": 6.455437720893564e-07, "loss": 0.3146, "step": 6646 }, { "epoch": 0.42, "grad_norm": 0.7719336200227167, "learning_rate": 6.454449671956091e-07, "loss": 0.1949, "step": 6647 }, { "epoch": 0.42, "grad_norm": 0.799251195275632, "learning_rate": 6.45346156096887e-07, "loss": 0.296, "step": 6648 }, { "epoch": 0.42, "grad_norm": 5.86904514165989, "learning_rate": 6.452473387974058e-07, "loss": 0.368, "step": 6649 }, { "epoch": 0.42, "grad_norm": 0.5941697259217461, "learning_rate": 6.45148515301381e-07, "loss": 0.2574, "step": 6650 }, { "epoch": 0.42, "grad_norm": 1.471621454658878, "learning_rate": 6.45049685613029e-07, "loss": 0.1181, "step": 6651 }, { "epoch": 0.42, "grad_norm": 0.5072800655176259, "learning_rate": 6.449508497365656e-07, "loss": 0.3983, "step": 6652 }, { "epoch": 0.42, "grad_norm": 0.44085371993939076, "learning_rate": 6.448520076762076e-07, "loss": 0.0789, "step": 6653 }, { "epoch": 0.42, "grad_norm": 12.275489586224218, "learning_rate": 6.447531594361719e-07, "loss": 0.0508, "step": 6654 }, { "epoch": 0.42, "grad_norm": 1.630060685326117, "learning_rate": 6.446543050206752e-07, "loss": 0.1589, "step": 6655 }, { "epoch": 0.42, "grad_norm": 0.2328741430616827, "learning_rate": 6.445554444339352e-07, "loss": 0.0978, "step": 6656 }, { "epoch": 0.42, "grad_norm": 0.5372530982254144, "learning_rate": 6.444565776801693e-07, "loss": 0.1315, "step": 6657 }, { "epoch": 0.42, "grad_norm": 0.7599789551403485, "learning_rate": 6.443577047635956e-07, "loss": 0.1224, "step": 6658 }, { "epoch": 0.42, "grad_norm": 4.0296851642204246, "learning_rate": 6.442588256884318e-07, "loss": 0.2358, "step": 6659 }, { "epoch": 0.42, "grad_norm": 2.108297544744153, "learning_rate": 6.441599404588966e-07, "loss": 0.12, "step": 6660 }, { "epoch": 0.42, "grad_norm": 0.501292539605557, "learning_rate": 6.440610490792084e-07, "loss": 0.2431, "step": 6661 }, { "epoch": 0.42, "grad_norm": 0.34171415167564934, "learning_rate": 6.439621515535863e-07, "loss": 0.1894, "step": 6662 }, { "epoch": 0.42, "grad_norm": 0.682733308349673, "learning_rate": 6.438632478862494e-07, "loss": 0.3229, "step": 6663 }, { "epoch": 0.42, "grad_norm": 12.390017699026032, "learning_rate": 6.437643380814171e-07, "loss": 0.3022, "step": 6664 }, { "epoch": 0.43, "grad_norm": 1.8681554634213695, "learning_rate": 6.436654221433093e-07, "loss": 0.0442, "step": 6665 }, { "epoch": 0.43, "grad_norm": 0.541158370693776, "learning_rate": 6.435665000761458e-07, "loss": 0.2862, "step": 6666 }, { "epoch": 0.43, "grad_norm": 0.390095187041583, "learning_rate": 6.434675718841468e-07, "loss": 0.2874, "step": 6667 }, { "epoch": 0.43, "grad_norm": 1.4536828965465176, "learning_rate": 6.433686375715327e-07, "loss": 0.2102, "step": 6668 }, { "epoch": 0.43, "grad_norm": 1.1446732248591567, "learning_rate": 6.432696971425243e-07, "loss": 0.2566, "step": 6669 }, { "epoch": 0.43, "grad_norm": 0.757703886679042, "learning_rate": 6.431707506013426e-07, "loss": 0.2267, "step": 6670 }, { "epoch": 0.43, "grad_norm": 1.4140671835415346, "learning_rate": 6.430717979522088e-07, "loss": 0.2333, "step": 6671 }, { "epoch": 0.43, "grad_norm": 0.6067960560052531, "learning_rate": 6.429728391993445e-07, "loss": 0.2523, "step": 6672 }, { "epoch": 0.43, "grad_norm": 0.7960205313213627, "learning_rate": 6.428738743469717e-07, "loss": 0.2754, "step": 6673 }, { "epoch": 0.43, "grad_norm": 0.7451427062897477, "learning_rate": 6.427749033993119e-07, "loss": 0.228, "step": 6674 }, { "epoch": 0.43, "grad_norm": 1.0022757694742077, "learning_rate": 6.42675926360588e-07, "loss": 0.4864, "step": 6675 }, { "epoch": 0.43, "grad_norm": 3.1426768082927397, "learning_rate": 6.425769432350221e-07, "loss": 0.1139, "step": 6676 }, { "epoch": 0.43, "grad_norm": 2.124319062949054, "learning_rate": 6.424779540268372e-07, "loss": 0.1503, "step": 6677 }, { "epoch": 0.43, "grad_norm": 1.2308310821855686, "learning_rate": 6.423789587402564e-07, "loss": 0.1021, "step": 6678 }, { "epoch": 0.43, "grad_norm": 0.5066897111623873, "learning_rate": 6.422799573795031e-07, "loss": 0.3278, "step": 6679 }, { "epoch": 0.43, "grad_norm": 0.5059648558799121, "learning_rate": 6.421809499488006e-07, "loss": 0.1839, "step": 6680 }, { "epoch": 0.43, "grad_norm": 0.27275453426585833, "learning_rate": 6.420819364523731e-07, "loss": 0.0828, "step": 6681 }, { "epoch": 0.43, "grad_norm": 0.5761906999379414, "learning_rate": 6.419829168944444e-07, "loss": 0.1035, "step": 6682 }, { "epoch": 0.43, "grad_norm": 0.7912710360378307, "learning_rate": 6.418838912792393e-07, "loss": 0.0861, "step": 6683 }, { "epoch": 0.43, "grad_norm": 1.572790723941205, "learning_rate": 6.41784859610982e-07, "loss": 0.1856, "step": 6684 }, { "epoch": 0.43, "grad_norm": 1.2006324218480453, "learning_rate": 6.416858218938975e-07, "loss": 0.199, "step": 6685 }, { "epoch": 0.43, "grad_norm": 1.181230130596287, "learning_rate": 6.415867781322112e-07, "loss": 0.3226, "step": 6686 }, { "epoch": 0.43, "grad_norm": 0.9367056371259241, "learning_rate": 6.414877283301482e-07, "loss": 0.3043, "step": 6687 }, { "epoch": 0.43, "grad_norm": 0.780971038545777, "learning_rate": 6.413886724919343e-07, "loss": 0.3924, "step": 6688 }, { "epoch": 0.43, "grad_norm": 0.1464454703347422, "learning_rate": 6.412896106217955e-07, "loss": 0.0669, "step": 6689 }, { "epoch": 0.43, "grad_norm": 0.6590059178969544, "learning_rate": 6.411905427239577e-07, "loss": 0.3126, "step": 6690 }, { "epoch": 0.43, "grad_norm": 0.5853783605081505, "learning_rate": 6.410914688026475e-07, "loss": 0.2109, "step": 6691 }, { "epoch": 0.43, "grad_norm": 1.1179016763262029, "learning_rate": 6.409923888620918e-07, "loss": 0.2518, "step": 6692 }, { "epoch": 0.43, "grad_norm": 13.581686640631824, "learning_rate": 6.408933029065173e-07, "loss": 0.2171, "step": 6693 }, { "epoch": 0.43, "grad_norm": 0.8759997525338321, "learning_rate": 6.407942109401514e-07, "loss": 0.141, "step": 6694 }, { "epoch": 0.43, "grad_norm": 0.5080766936981634, "learning_rate": 6.406951129672212e-07, "loss": 0.2806, "step": 6695 }, { "epoch": 0.43, "grad_norm": 1.0399523992915547, "learning_rate": 6.405960089919548e-07, "loss": 0.0772, "step": 6696 }, { "epoch": 0.43, "grad_norm": 0.6478695764754306, "learning_rate": 6.404968990185799e-07, "loss": 0.0619, "step": 6697 }, { "epoch": 0.43, "grad_norm": 7.273136086090483, "learning_rate": 6.403977830513248e-07, "loss": 0.2038, "step": 6698 }, { "epoch": 0.43, "grad_norm": 0.7704261948601646, "learning_rate": 6.402986610944182e-07, "loss": 0.2672, "step": 6699 }, { "epoch": 0.43, "grad_norm": 1.0132380244586805, "learning_rate": 6.401995331520886e-07, "loss": 0.1967, "step": 6700 }, { "epoch": 0.43, "grad_norm": 2.9905749152200145, "learning_rate": 6.401003992285652e-07, "loss": 0.0433, "step": 6701 }, { "epoch": 0.43, "grad_norm": 1.4633532929933657, "learning_rate": 6.400012593280771e-07, "loss": 0.1597, "step": 6702 }, { "epoch": 0.43, "grad_norm": 1.9126576229653443, "learning_rate": 6.399021134548537e-07, "loss": 0.0904, "step": 6703 }, { "epoch": 0.43, "grad_norm": 0.7200646969220672, "learning_rate": 6.39802961613125e-07, "loss": 0.3084, "step": 6704 }, { "epoch": 0.43, "grad_norm": 1.1758572937507459, "learning_rate": 6.39703803807121e-07, "loss": 0.0533, "step": 6705 }, { "epoch": 0.43, "grad_norm": 1.4498625526282167, "learning_rate": 6.396046400410718e-07, "loss": 0.0553, "step": 6706 }, { "epoch": 0.43, "grad_norm": 0.6364404529115247, "learning_rate": 6.39505470319208e-07, "loss": 0.226, "step": 6707 }, { "epoch": 0.43, "grad_norm": 1.7772069739561727, "learning_rate": 6.394062946457604e-07, "loss": 0.4006, "step": 6708 }, { "epoch": 0.43, "grad_norm": 0.6028916535151102, "learning_rate": 6.3930711302496e-07, "loss": 0.3125, "step": 6709 }, { "epoch": 0.43, "grad_norm": 0.6758249587863409, "learning_rate": 6.39207925461038e-07, "loss": 0.4262, "step": 6710 }, { "epoch": 0.43, "grad_norm": 7.127001295949069, "learning_rate": 6.391087319582263e-07, "loss": 0.2229, "step": 6711 }, { "epoch": 0.43, "grad_norm": 0.3262722172903861, "learning_rate": 6.390095325207564e-07, "loss": 0.0041, "step": 6712 }, { "epoch": 0.43, "grad_norm": 0.7507862581580848, "learning_rate": 6.389103271528605e-07, "loss": 0.145, "step": 6713 }, { "epoch": 0.43, "grad_norm": 0.6605094691567882, "learning_rate": 6.388111158587706e-07, "loss": 0.2998, "step": 6714 }, { "epoch": 0.43, "grad_norm": 3.5563965705038187, "learning_rate": 6.387118986427195e-07, "loss": 0.0762, "step": 6715 }, { "epoch": 0.43, "grad_norm": 0.8925033905398138, "learning_rate": 6.386126755089398e-07, "loss": 0.2854, "step": 6716 }, { "epoch": 0.43, "grad_norm": 5.237973136924914, "learning_rate": 6.385134464616648e-07, "loss": 0.0446, "step": 6717 }, { "epoch": 0.43, "grad_norm": 0.5150703661256893, "learning_rate": 6.384142115051279e-07, "loss": 0.1314, "step": 6718 }, { "epoch": 0.43, "grad_norm": 0.4835942761407444, "learning_rate": 6.383149706435625e-07, "loss": 0.1088, "step": 6719 }, { "epoch": 0.43, "grad_norm": 1.6085024289437382, "learning_rate": 6.382157238812023e-07, "loss": 0.1894, "step": 6720 }, { "epoch": 0.43, "grad_norm": 0.6893796463969709, "learning_rate": 6.381164712222814e-07, "loss": 0.1025, "step": 6721 }, { "epoch": 0.43, "grad_norm": 0.3454877105562411, "learning_rate": 6.380172126710344e-07, "loss": 0.0908, "step": 6722 }, { "epoch": 0.43, "grad_norm": 0.4616723698580447, "learning_rate": 6.379179482316954e-07, "loss": 0.2442, "step": 6723 }, { "epoch": 0.43, "grad_norm": 0.9968537970426457, "learning_rate": 6.378186779084995e-07, "loss": 0.3062, "step": 6724 }, { "epoch": 0.43, "grad_norm": 1.6244526051281594, "learning_rate": 6.377194017056819e-07, "loss": 0.2975, "step": 6725 }, { "epoch": 0.43, "grad_norm": 0.7509501104222818, "learning_rate": 6.376201196274777e-07, "loss": 0.1483, "step": 6726 }, { "epoch": 0.43, "grad_norm": 0.3151371133817729, "learning_rate": 6.375208316781226e-07, "loss": 0.1096, "step": 6727 }, { "epoch": 0.43, "grad_norm": 0.3383792185360287, "learning_rate": 6.374215378618523e-07, "loss": 0.0132, "step": 6728 }, { "epoch": 0.43, "grad_norm": 0.6491694219196131, "learning_rate": 6.373222381829031e-07, "loss": 0.1896, "step": 6729 }, { "epoch": 0.43, "grad_norm": 1.3156566306106663, "learning_rate": 6.37222932645511e-07, "loss": 0.2652, "step": 6730 }, { "epoch": 0.43, "grad_norm": 0.8979187862266332, "learning_rate": 6.371236212539129e-07, "loss": 0.2058, "step": 6731 }, { "epoch": 0.43, "grad_norm": 1.278954544093297, "learning_rate": 6.370243040123452e-07, "loss": 0.3163, "step": 6732 }, { "epoch": 0.43, "grad_norm": 0.6705358804955086, "learning_rate": 6.369249809250454e-07, "loss": 0.2729, "step": 6733 }, { "epoch": 0.43, "grad_norm": 0.706961289716489, "learning_rate": 6.368256519962506e-07, "loss": 0.0283, "step": 6734 }, { "epoch": 0.43, "grad_norm": 1.1270196563346673, "learning_rate": 6.367263172301984e-07, "loss": 0.043, "step": 6735 }, { "epoch": 0.43, "grad_norm": 0.27520589895700176, "learning_rate": 6.366269766311269e-07, "loss": 0.1172, "step": 6736 }, { "epoch": 0.43, "grad_norm": 1.6130876314593725, "learning_rate": 6.365276302032737e-07, "loss": 0.4631, "step": 6737 }, { "epoch": 0.43, "grad_norm": 1.9158684740988248, "learning_rate": 6.364282779508774e-07, "loss": 0.0629, "step": 6738 }, { "epoch": 0.43, "grad_norm": 1.5385877544612692, "learning_rate": 6.363289198781765e-07, "loss": 0.1979, "step": 6739 }, { "epoch": 0.43, "grad_norm": 2.7091496753358872, "learning_rate": 6.362295559894099e-07, "loss": 0.1839, "step": 6740 }, { "epoch": 0.43, "grad_norm": 1.9930991041229553, "learning_rate": 6.361301862888164e-07, "loss": 0.3691, "step": 6741 }, { "epoch": 0.43, "grad_norm": 0.5466540366244256, "learning_rate": 6.360308107806357e-07, "loss": 0.1623, "step": 6742 }, { "epoch": 0.43, "grad_norm": 1.13058829593568, "learning_rate": 6.35931429469107e-07, "loss": 0.266, "step": 6743 }, { "epoch": 0.43, "grad_norm": 0.8362409781450588, "learning_rate": 6.358320423584704e-07, "loss": 0.3643, "step": 6744 }, { "epoch": 0.43, "grad_norm": 0.8712235975431387, "learning_rate": 6.357326494529657e-07, "loss": 0.2947, "step": 6745 }, { "epoch": 0.43, "grad_norm": 5.790006196204553, "learning_rate": 6.356332507568333e-07, "loss": 0.1578, "step": 6746 }, { "epoch": 0.43, "grad_norm": 0.5730066080214747, "learning_rate": 6.355338462743138e-07, "loss": 0.1548, "step": 6747 }, { "epoch": 0.43, "grad_norm": 2.454384903442159, "learning_rate": 6.35434436009648e-07, "loss": 0.3031, "step": 6748 }, { "epoch": 0.43, "grad_norm": 0.6514055731437256, "learning_rate": 6.353350199670771e-07, "loss": 0.1687, "step": 6749 }, { "epoch": 0.43, "grad_norm": 0.18309743262848088, "learning_rate": 6.352355981508419e-07, "loss": 0.1015, "step": 6750 }, { "epoch": 0.43, "grad_norm": 0.8656871711227753, "learning_rate": 6.351361705651842e-07, "loss": 0.3523, "step": 6751 }, { "epoch": 0.43, "grad_norm": 0.8071333822294513, "learning_rate": 6.350367372143459e-07, "loss": 0.261, "step": 6752 }, { "epoch": 0.43, "grad_norm": 0.6646761374076501, "learning_rate": 6.34937298102569e-07, "loss": 0.2523, "step": 6753 }, { "epoch": 0.43, "grad_norm": 0.5172036440857891, "learning_rate": 6.348378532340957e-07, "loss": 0.4282, "step": 6754 }, { "epoch": 0.43, "grad_norm": 0.8127899949717996, "learning_rate": 6.347384026131683e-07, "loss": 0.2521, "step": 6755 }, { "epoch": 0.43, "grad_norm": 1.1329997645487928, "learning_rate": 6.3463894624403e-07, "loss": 0.2012, "step": 6756 }, { "epoch": 0.43, "grad_norm": 4.088470560050196, "learning_rate": 6.345394841309237e-07, "loss": 0.0251, "step": 6757 }, { "epoch": 0.43, "grad_norm": 0.8617685245678508, "learning_rate": 6.344400162780923e-07, "loss": 0.0105, "step": 6758 }, { "epoch": 0.43, "grad_norm": 0.5893830867155143, "learning_rate": 6.343405426897797e-07, "loss": 0.2246, "step": 6759 }, { "epoch": 0.43, "grad_norm": 1.7666749117852034, "learning_rate": 6.342410633702294e-07, "loss": 0.1976, "step": 6760 }, { "epoch": 0.43, "grad_norm": 0.43331517845402123, "learning_rate": 6.341415783236854e-07, "loss": 0.1695, "step": 6761 }, { "epoch": 0.43, "grad_norm": 0.9868818836290725, "learning_rate": 6.340420875543921e-07, "loss": 0.0934, "step": 6762 }, { "epoch": 0.43, "grad_norm": 0.6897306726627255, "learning_rate": 6.33942591066594e-07, "loss": 0.2653, "step": 6763 }, { "epoch": 0.43, "grad_norm": 4.0389408405008735, "learning_rate": 6.338430888645356e-07, "loss": 0.0828, "step": 6764 }, { "epoch": 0.43, "grad_norm": 0.5074205963952, "learning_rate": 6.33743580952462e-07, "loss": 0.2088, "step": 6765 }, { "epoch": 0.43, "grad_norm": 0.47124719780596425, "learning_rate": 6.336440673346184e-07, "loss": 0.1126, "step": 6766 }, { "epoch": 0.43, "grad_norm": 0.4583817460931665, "learning_rate": 6.335445480152503e-07, "loss": 0.1736, "step": 6767 }, { "epoch": 0.43, "grad_norm": 0.3462864839528916, "learning_rate": 6.334450229986031e-07, "loss": 0.1249, "step": 6768 }, { "epoch": 0.43, "grad_norm": 0.36479249709676476, "learning_rate": 6.33345492288923e-07, "loss": 0.0074, "step": 6769 }, { "epoch": 0.43, "grad_norm": 0.2588922594358886, "learning_rate": 6.332459558904563e-07, "loss": 0.1877, "step": 6770 }, { "epoch": 0.43, "grad_norm": 0.899914125571852, "learning_rate": 6.331464138074491e-07, "loss": 0.3181, "step": 6771 }, { "epoch": 0.43, "grad_norm": 0.934832204196358, "learning_rate": 6.330468660441484e-07, "loss": 0.2004, "step": 6772 }, { "epoch": 0.43, "grad_norm": 0.9039667094154356, "learning_rate": 6.329473126048008e-07, "loss": 0.0988, "step": 6773 }, { "epoch": 0.43, "grad_norm": 1.1220408080787108, "learning_rate": 6.328477534936537e-07, "loss": 0.2913, "step": 6774 }, { "epoch": 0.43, "grad_norm": 0.7439464280070911, "learning_rate": 6.327481887149542e-07, "loss": 0.2032, "step": 6775 }, { "epoch": 0.43, "grad_norm": 2.2175894949275574, "learning_rate": 6.326486182729504e-07, "loss": 0.217, "step": 6776 }, { "epoch": 0.43, "grad_norm": 0.6617656036990464, "learning_rate": 6.325490421718897e-07, "loss": 0.0769, "step": 6777 }, { "epoch": 0.43, "grad_norm": 0.6548727496560991, "learning_rate": 6.324494604160205e-07, "loss": 0.1635, "step": 6778 }, { "epoch": 0.43, "grad_norm": 1.1961538380748058, "learning_rate": 6.323498730095909e-07, "loss": 0.34, "step": 6779 }, { "epoch": 0.43, "grad_norm": 0.7573309400627748, "learning_rate": 6.322502799568496e-07, "loss": 0.1469, "step": 6780 }, { "epoch": 0.43, "grad_norm": 0.4620618967612187, "learning_rate": 6.321506812620457e-07, "loss": 0.1377, "step": 6781 }, { "epoch": 0.43, "grad_norm": 0.9670347054280445, "learning_rate": 6.32051076929428e-07, "loss": 0.2661, "step": 6782 }, { "epoch": 0.43, "grad_norm": 2.6153038264053357, "learning_rate": 6.319514669632459e-07, "loss": 0.1095, "step": 6783 }, { "epoch": 0.43, "grad_norm": 1.1576094055504325, "learning_rate": 6.318518513677491e-07, "loss": 0.2552, "step": 6784 }, { "epoch": 0.43, "grad_norm": 2.3510418092236915, "learning_rate": 6.317522301471872e-07, "loss": 0.2902, "step": 6785 }, { "epoch": 0.43, "grad_norm": 0.609354472764255, "learning_rate": 6.316526033058103e-07, "loss": 0.2841, "step": 6786 }, { "epoch": 0.43, "grad_norm": 1.3021652967977673, "learning_rate": 6.315529708478685e-07, "loss": 0.1702, "step": 6787 }, { "epoch": 0.43, "grad_norm": 0.9888446225038058, "learning_rate": 6.314533327776126e-07, "loss": 0.2604, "step": 6788 }, { "epoch": 0.43, "grad_norm": 3.0346951448584885, "learning_rate": 6.313536890992935e-07, "loss": 0.4024, "step": 6789 }, { "epoch": 0.43, "grad_norm": 0.5675104587584527, "learning_rate": 6.312540398171617e-07, "loss": 0.0111, "step": 6790 }, { "epoch": 0.43, "grad_norm": 0.5672563634103571, "learning_rate": 6.311543849354689e-07, "loss": 0.327, "step": 6791 }, { "epoch": 0.43, "grad_norm": 3.443557818405537, "learning_rate": 6.310547244584663e-07, "loss": 0.0626, "step": 6792 }, { "epoch": 0.43, "grad_norm": 0.3011797489014841, "learning_rate": 6.309550583904057e-07, "loss": 0.1006, "step": 6793 }, { "epoch": 0.43, "grad_norm": 1.246214236196623, "learning_rate": 6.308553867355391e-07, "loss": 0.1736, "step": 6794 }, { "epoch": 0.43, "grad_norm": 1.446954270409426, "learning_rate": 6.307557094981184e-07, "loss": 0.1546, "step": 6795 }, { "epoch": 0.43, "grad_norm": 1.7072287569490263, "learning_rate": 6.306560266823966e-07, "loss": 0.1176, "step": 6796 }, { "epoch": 0.43, "grad_norm": 0.9312694871096633, "learning_rate": 6.305563382926259e-07, "loss": 0.1041, "step": 6797 }, { "epoch": 0.43, "grad_norm": 1.1974276895069347, "learning_rate": 6.304566443330594e-07, "loss": 0.1647, "step": 6798 }, { "epoch": 0.43, "grad_norm": 0.719215210065318, "learning_rate": 6.303569448079502e-07, "loss": 0.1296, "step": 6799 }, { "epoch": 0.43, "grad_norm": 8.842410190704364, "learning_rate": 6.302572397215516e-07, "loss": 0.0253, "step": 6800 }, { "epoch": 0.43, "grad_norm": 0.30814150918820393, "learning_rate": 6.301575290781174e-07, "loss": 0.0688, "step": 6801 }, { "epoch": 0.43, "grad_norm": 0.6158718607868237, "learning_rate": 6.300578128819015e-07, "loss": 0.2403, "step": 6802 }, { "epoch": 0.43, "grad_norm": 1.4780560018664006, "learning_rate": 6.299580911371576e-07, "loss": 0.1857, "step": 6803 }, { "epoch": 0.43, "grad_norm": 0.9397332241649011, "learning_rate": 6.298583638481403e-07, "loss": 0.2389, "step": 6804 }, { "epoch": 0.43, "grad_norm": 4.923574127674357, "learning_rate": 6.297586310191043e-07, "loss": 0.0653, "step": 6805 }, { "epoch": 0.43, "grad_norm": 0.5311185850515917, "learning_rate": 6.296588926543042e-07, "loss": 0.295, "step": 6806 }, { "epoch": 0.43, "grad_norm": 1.6618138251404375, "learning_rate": 6.29559148757995e-07, "loss": 0.2583, "step": 6807 }, { "epoch": 0.43, "grad_norm": 0.7704545834260043, "learning_rate": 6.294593993344322e-07, "loss": 0.2924, "step": 6808 }, { "epoch": 0.43, "grad_norm": 3.225263236600781, "learning_rate": 6.29359644387871e-07, "loss": 0.218, "step": 6809 }, { "epoch": 0.43, "grad_norm": 1.5587407216636802, "learning_rate": 6.292598839225674e-07, "loss": 0.3032, "step": 6810 }, { "epoch": 0.43, "grad_norm": 1.0127580991201515, "learning_rate": 6.291601179427774e-07, "loss": 0.3185, "step": 6811 }, { "epoch": 0.43, "grad_norm": 0.511789852653221, "learning_rate": 6.290603464527571e-07, "loss": 0.0501, "step": 6812 }, { "epoch": 0.43, "grad_norm": 7.280016632547844, "learning_rate": 6.28960569456763e-07, "loss": 0.2233, "step": 6813 }, { "epoch": 0.43, "grad_norm": 0.7443266284461287, "learning_rate": 6.288607869590517e-07, "loss": 0.2224, "step": 6814 }, { "epoch": 0.43, "grad_norm": 1.1274665943383784, "learning_rate": 6.2876099896388e-07, "loss": 0.3323, "step": 6815 }, { "epoch": 0.43, "grad_norm": 0.5260091674169767, "learning_rate": 6.286612054755054e-07, "loss": 0.1671, "step": 6816 }, { "epoch": 0.43, "grad_norm": 0.6868699193481335, "learning_rate": 6.285614064981853e-07, "loss": 0.02, "step": 6817 }, { "epoch": 0.43, "grad_norm": 0.732733327052659, "learning_rate": 6.284616020361771e-07, "loss": 0.0463, "step": 6818 }, { "epoch": 0.43, "grad_norm": 0.21229347455748196, "learning_rate": 6.283617920937388e-07, "loss": 0.0889, "step": 6819 }, { "epoch": 0.43, "grad_norm": 1.7182442012876338, "learning_rate": 6.282619766751282e-07, "loss": 0.2928, "step": 6820 }, { "epoch": 0.43, "grad_norm": 1.2240411603638657, "learning_rate": 6.281621557846039e-07, "loss": 0.1031, "step": 6821 }, { "epoch": 0.44, "grad_norm": 0.49941767733144304, "learning_rate": 6.280623294264242e-07, "loss": 0.2916, "step": 6822 }, { "epoch": 0.44, "grad_norm": 2.4504738802027255, "learning_rate": 6.279624976048483e-07, "loss": 0.3901, "step": 6823 }, { "epoch": 0.44, "grad_norm": 1.8574063668748844, "learning_rate": 6.278626603241351e-07, "loss": 0.1969, "step": 6824 }, { "epoch": 0.44, "grad_norm": 0.6650372944583962, "learning_rate": 6.277628175885435e-07, "loss": 0.1371, "step": 6825 }, { "epoch": 0.44, "grad_norm": 1.8725049317109095, "learning_rate": 6.276629694023335e-07, "loss": 0.0056, "step": 6826 }, { "epoch": 0.44, "grad_norm": 1.7852613308837884, "learning_rate": 6.275631157697646e-07, "loss": 0.1149, "step": 6827 }, { "epoch": 0.44, "grad_norm": 0.5960671203452759, "learning_rate": 6.274632566950966e-07, "loss": 0.2283, "step": 6828 }, { "epoch": 0.44, "grad_norm": 1.3019755436344658, "learning_rate": 6.273633921825899e-07, "loss": 0.3012, "step": 6829 }, { "epoch": 0.44, "grad_norm": 10.425201121885749, "learning_rate": 6.272635222365049e-07, "loss": 0.2481, "step": 6830 }, { "epoch": 0.44, "grad_norm": 0.8176663616259935, "learning_rate": 6.271636468611021e-07, "loss": 0.1553, "step": 6831 }, { "epoch": 0.44, "grad_norm": 0.6115331284522859, "learning_rate": 6.270637660606426e-07, "loss": 0.0967, "step": 6832 }, { "epoch": 0.44, "grad_norm": 0.9737249811992744, "learning_rate": 6.269638798393875e-07, "loss": 0.281, "step": 6833 }, { "epoch": 0.44, "grad_norm": 18.043001773579363, "learning_rate": 6.268639882015978e-07, "loss": 0.1687, "step": 6834 }, { "epoch": 0.44, "grad_norm": 0.6132946727237054, "learning_rate": 6.267640911515355e-07, "loss": 0.2331, "step": 6835 }, { "epoch": 0.44, "grad_norm": 0.5091497400318866, "learning_rate": 6.266641886934622e-07, "loss": 0.1694, "step": 6836 }, { "epoch": 0.44, "grad_norm": 2.3970003631326264, "learning_rate": 6.2656428083164e-07, "loss": 0.3293, "step": 6837 }, { "epoch": 0.44, "grad_norm": 1.505052794904146, "learning_rate": 6.264643675703312e-07, "loss": 0.226, "step": 6838 }, { "epoch": 0.44, "grad_norm": 5.9167611183008, "learning_rate": 6.263644489137982e-07, "loss": 0.1118, "step": 6839 }, { "epoch": 0.44, "grad_norm": 1.0811616980231809, "learning_rate": 6.262645248663037e-07, "loss": 0.2247, "step": 6840 }, { "epoch": 0.44, "grad_norm": 1.6414905323992222, "learning_rate": 6.261645954321108e-07, "loss": 0.1955, "step": 6841 }, { "epoch": 0.44, "grad_norm": 1.3250585746642969, "learning_rate": 6.260646606154827e-07, "loss": 0.1739, "step": 6842 }, { "epoch": 0.44, "grad_norm": 0.7866168030369379, "learning_rate": 6.259647204206827e-07, "loss": 0.3972, "step": 6843 }, { "epoch": 0.44, "grad_norm": 0.5581515988682273, "learning_rate": 6.258647748519746e-07, "loss": 0.2474, "step": 6844 }, { "epoch": 0.44, "grad_norm": 0.7993384137029356, "learning_rate": 6.257648239136221e-07, "loss": 0.3265, "step": 6845 }, { "epoch": 0.44, "grad_norm": 0.8276376844992642, "learning_rate": 6.256648676098895e-07, "loss": 0.2602, "step": 6846 }, { "epoch": 0.44, "grad_norm": 1.3657207400181275, "learning_rate": 6.25564905945041e-07, "loss": 0.3829, "step": 6847 }, { "epoch": 0.44, "grad_norm": 0.4485079253704959, "learning_rate": 6.254649389233409e-07, "loss": 0.176, "step": 6848 }, { "epoch": 0.44, "grad_norm": 0.43576652917857184, "learning_rate": 6.253649665490545e-07, "loss": 0.0893, "step": 6849 }, { "epoch": 0.44, "grad_norm": 0.5289305109863717, "learning_rate": 6.252649888264468e-07, "loss": 0.1441, "step": 6850 }, { "epoch": 0.44, "grad_norm": 1.469843352456335, "learning_rate": 6.251650057597826e-07, "loss": 0.1063, "step": 6851 }, { "epoch": 0.44, "grad_norm": 1.1768701925903495, "learning_rate": 6.250650173533278e-07, "loss": 0.2213, "step": 6852 }, { "epoch": 0.44, "grad_norm": 0.9943437390577592, "learning_rate": 6.24965023611348e-07, "loss": 0.3129, "step": 6853 }, { "epoch": 0.44, "grad_norm": 1.057901605441251, "learning_rate": 6.248650245381088e-07, "loss": 0.0195, "step": 6854 }, { "epoch": 0.44, "grad_norm": 0.7806514200502783, "learning_rate": 6.247650201378769e-07, "loss": 0.2214, "step": 6855 }, { "epoch": 0.44, "grad_norm": 0.5671469742361622, "learning_rate": 6.246650104149184e-07, "loss": 0.2613, "step": 6856 }, { "epoch": 0.44, "grad_norm": 5.373765136142397, "learning_rate": 6.245649953734998e-07, "loss": 0.1473, "step": 6857 }, { "epoch": 0.44, "grad_norm": 0.4461208500038242, "learning_rate": 6.244649750178882e-07, "loss": 0.2095, "step": 6858 }, { "epoch": 0.44, "grad_norm": 0.724439562233869, "learning_rate": 6.243649493523505e-07, "loss": 0.127, "step": 6859 }, { "epoch": 0.44, "grad_norm": 12.835199059229101, "learning_rate": 6.242649183811541e-07, "loss": 0.0996, "step": 6860 }, { "epoch": 0.44, "grad_norm": 0.7136121296431818, "learning_rate": 6.241648821085665e-07, "loss": 0.1202, "step": 6861 }, { "epoch": 0.44, "grad_norm": 1.1481135542597556, "learning_rate": 6.240648405388555e-07, "loss": 0.1732, "step": 6862 }, { "epoch": 0.44, "grad_norm": 14.84286234223769, "learning_rate": 6.239647936762888e-07, "loss": 0.21, "step": 6863 }, { "epoch": 0.44, "grad_norm": 7.559212080866238, "learning_rate": 6.238647415251349e-07, "loss": 0.1487, "step": 6864 }, { "epoch": 0.44, "grad_norm": 0.8333486084871736, "learning_rate": 6.237646840896622e-07, "loss": 0.0493, "step": 6865 }, { "epoch": 0.44, "grad_norm": 0.9564629195000102, "learning_rate": 6.236646213741393e-07, "loss": 0.2068, "step": 6866 }, { "epoch": 0.44, "grad_norm": 0.6018376526384829, "learning_rate": 6.235645533828348e-07, "loss": 0.2356, "step": 6867 }, { "epoch": 0.44, "grad_norm": 1.6077695408432908, "learning_rate": 6.234644801200182e-07, "loss": 0.378, "step": 6868 }, { "epoch": 0.44, "grad_norm": 0.25863719910906324, "learning_rate": 6.233644015899587e-07, "loss": 0.1129, "step": 6869 }, { "epoch": 0.44, "grad_norm": 13.29980844732213, "learning_rate": 6.232643177969258e-07, "loss": 0.092, "step": 6870 }, { "epoch": 0.44, "grad_norm": 0.4036243777816879, "learning_rate": 6.231642287451894e-07, "loss": 0.137, "step": 6871 }, { "epoch": 0.44, "grad_norm": 1.531938766838832, "learning_rate": 6.230641344390193e-07, "loss": 0.1709, "step": 6872 }, { "epoch": 0.44, "grad_norm": 1.626332250713213, "learning_rate": 6.22964034882686e-07, "loss": 0.259, "step": 6873 }, { "epoch": 0.44, "grad_norm": 1.3131201119663751, "learning_rate": 6.228639300804597e-07, "loss": 0.1008, "step": 6874 }, { "epoch": 0.44, "grad_norm": 0.948858701528825, "learning_rate": 6.227638200366111e-07, "loss": 0.0435, "step": 6875 }, { "epoch": 0.44, "grad_norm": 0.34621796552462913, "learning_rate": 6.226637047554112e-07, "loss": 0.1809, "step": 6876 }, { "epoch": 0.44, "grad_norm": 0.6545483903692884, "learning_rate": 6.22563584241131e-07, "loss": 0.2285, "step": 6877 }, { "epoch": 0.44, "grad_norm": 1.4983342353946147, "learning_rate": 6.224634584980419e-07, "loss": 0.2559, "step": 6878 }, { "epoch": 0.44, "grad_norm": 0.6262553504638929, "learning_rate": 6.223633275304157e-07, "loss": 0.2534, "step": 6879 }, { "epoch": 0.44, "grad_norm": 3.0476521647089876, "learning_rate": 6.222631913425237e-07, "loss": 0.1286, "step": 6880 }, { "epoch": 0.44, "grad_norm": 1.7532988373024432, "learning_rate": 6.221630499386383e-07, "loss": 0.1562, "step": 6881 }, { "epoch": 0.44, "grad_norm": 0.2812288963283702, "learning_rate": 6.220629033230317e-07, "loss": 0.0915, "step": 6882 }, { "epoch": 0.44, "grad_norm": 0.5943660575348206, "learning_rate": 6.219627514999761e-07, "loss": 0.1219, "step": 6883 }, { "epoch": 0.44, "grad_norm": 3.5110541196172584, "learning_rate": 6.218625944737444e-07, "loss": 0.0744, "step": 6884 }, { "epoch": 0.44, "grad_norm": 1.1145811824476093, "learning_rate": 6.217624322486094e-07, "loss": 0.361, "step": 6885 }, { "epoch": 0.44, "grad_norm": 1.0813975123631303, "learning_rate": 6.216622648288443e-07, "loss": 0.1015, "step": 6886 }, { "epoch": 0.44, "grad_norm": 0.5614615087155922, "learning_rate": 6.215620922187226e-07, "loss": 0.2179, "step": 6887 }, { "epoch": 0.44, "grad_norm": 0.666469936246563, "learning_rate": 6.214619144225175e-07, "loss": 0.1969, "step": 6888 }, { "epoch": 0.44, "grad_norm": 1.8435504813603971, "learning_rate": 6.21361731444503e-07, "loss": 0.1713, "step": 6889 }, { "epoch": 0.44, "grad_norm": 9.12098429969003, "learning_rate": 6.212615432889529e-07, "loss": 0.1041, "step": 6890 }, { "epoch": 0.44, "grad_norm": 0.6345391001430074, "learning_rate": 6.211613499601418e-07, "loss": 0.327, "step": 6891 }, { "epoch": 0.44, "grad_norm": 1.1628990424915013, "learning_rate": 6.210611514623439e-07, "loss": 0.1649, "step": 6892 }, { "epoch": 0.44, "grad_norm": 0.8765090102836677, "learning_rate": 6.209609477998338e-07, "loss": 0.2759, "step": 6893 }, { "epoch": 0.44, "grad_norm": 1.2374649525278218, "learning_rate": 6.208607389768866e-07, "loss": 0.3338, "step": 6894 }, { "epoch": 0.44, "grad_norm": 5.199880028380958, "learning_rate": 6.207605249977773e-07, "loss": 0.3526, "step": 6895 }, { "epoch": 0.44, "grad_norm": 2.5493822708776075, "learning_rate": 6.206603058667814e-07, "loss": 0.3173, "step": 6896 }, { "epoch": 0.44, "grad_norm": 1.2408383608057978, "learning_rate": 6.20560081588174e-07, "loss": 0.2257, "step": 6897 }, { "epoch": 0.44, "grad_norm": 0.28977942430802, "learning_rate": 6.204598521662315e-07, "loss": 0.0809, "step": 6898 }, { "epoch": 0.44, "grad_norm": 0.5550949076424141, "learning_rate": 6.203596176052293e-07, "loss": 0.0761, "step": 6899 }, { "epoch": 0.44, "grad_norm": 0.2417739785051891, "learning_rate": 6.20259377909444e-07, "loss": 0.0807, "step": 6900 }, { "epoch": 0.44, "grad_norm": 0.39630038415248925, "learning_rate": 6.201591330831517e-07, "loss": 0.0536, "step": 6901 }, { "epoch": 0.44, "grad_norm": 0.9842101392684679, "learning_rate": 6.200588831306293e-07, "loss": 0.3377, "step": 6902 }, { "epoch": 0.44, "grad_norm": 0.5111251463979756, "learning_rate": 6.199586280561538e-07, "loss": 0.1592, "step": 6903 }, { "epoch": 0.44, "grad_norm": 0.3409038537179123, "learning_rate": 6.198583678640019e-07, "loss": 0.1111, "step": 6904 }, { "epoch": 0.44, "grad_norm": 0.3043629387571401, "learning_rate": 6.197581025584511e-07, "loss": 0.0168, "step": 6905 }, { "epoch": 0.44, "grad_norm": 0.7300653946375809, "learning_rate": 6.196578321437789e-07, "loss": 0.0399, "step": 6906 }, { "epoch": 0.44, "grad_norm": 0.8141902535050116, "learning_rate": 6.19557556624263e-07, "loss": 0.1366, "step": 6907 }, { "epoch": 0.44, "grad_norm": 0.07619674392620761, "learning_rate": 6.194572760041815e-07, "loss": 0.0012, "step": 6908 }, { "epoch": 0.44, "grad_norm": 1.8270895485880623, "learning_rate": 6.193569902878124e-07, "loss": 0.2732, "step": 6909 }, { "epoch": 0.44, "grad_norm": 0.8055134513779834, "learning_rate": 6.192566994794342e-07, "loss": 0.219, "step": 6910 }, { "epoch": 0.44, "grad_norm": 1.3787247546125327, "learning_rate": 6.191564035833253e-07, "loss": 0.4333, "step": 6911 }, { "epoch": 0.44, "grad_norm": 0.9173091222520319, "learning_rate": 6.190561026037648e-07, "loss": 0.2145, "step": 6912 }, { "epoch": 0.44, "grad_norm": 0.8747517670691473, "learning_rate": 6.189557965450316e-07, "loss": 0.1481, "step": 6913 }, { "epoch": 0.44, "grad_norm": 0.5975429868199008, "learning_rate": 6.188554854114052e-07, "loss": 0.137, "step": 6914 }, { "epoch": 0.44, "grad_norm": 0.7264176118036235, "learning_rate": 6.187551692071648e-07, "loss": 0.1171, "step": 6915 }, { "epoch": 0.44, "grad_norm": 1.098857129560535, "learning_rate": 6.1865484793659e-07, "loss": 0.487, "step": 6916 }, { "epoch": 0.44, "grad_norm": 0.5252791025312935, "learning_rate": 6.185545216039609e-07, "loss": 0.2031, "step": 6917 }, { "epoch": 0.44, "grad_norm": 0.6138002838529008, "learning_rate": 6.184541902135576e-07, "loss": 0.1736, "step": 6918 }, { "epoch": 0.44, "grad_norm": 0.5698825485798151, "learning_rate": 6.183538537696604e-07, "loss": 0.2127, "step": 6919 }, { "epoch": 0.44, "grad_norm": 2.5815916642846686, "learning_rate": 6.182535122765498e-07, "loss": 0.1407, "step": 6920 }, { "epoch": 0.44, "grad_norm": 3.9493237980265676, "learning_rate": 6.181531657385068e-07, "loss": 0.1887, "step": 6921 }, { "epoch": 0.44, "grad_norm": 1.7150951635851865, "learning_rate": 6.180528141598121e-07, "loss": 0.1532, "step": 6922 }, { "epoch": 0.44, "grad_norm": 0.39603092612381635, "learning_rate": 6.179524575447471e-07, "loss": 0.0771, "step": 6923 }, { "epoch": 0.44, "grad_norm": 1.0938817803981258, "learning_rate": 6.178520958975932e-07, "loss": 0.2842, "step": 6924 }, { "epoch": 0.44, "grad_norm": 0.8009649953234502, "learning_rate": 6.17751729222632e-07, "loss": 0.0132, "step": 6925 }, { "epoch": 0.44, "grad_norm": 0.33108503279882795, "learning_rate": 6.176513575241452e-07, "loss": 0.0082, "step": 6926 }, { "epoch": 0.44, "grad_norm": 0.31729671294644823, "learning_rate": 6.175509808064149e-07, "loss": 0.1053, "step": 6927 }, { "epoch": 0.44, "grad_norm": 0.5492720653003984, "learning_rate": 6.174505990737238e-07, "loss": 0.1087, "step": 6928 }, { "epoch": 0.44, "grad_norm": 0.41360439446294106, "learning_rate": 6.173502123303538e-07, "loss": 0.2309, "step": 6929 }, { "epoch": 0.44, "grad_norm": 0.5401488246862701, "learning_rate": 6.172498205805878e-07, "loss": 0.2762, "step": 6930 }, { "epoch": 0.44, "grad_norm": 1.1357691343779526, "learning_rate": 6.171494238287088e-07, "loss": 0.26, "step": 6931 }, { "epoch": 0.44, "grad_norm": 0.34405876677592406, "learning_rate": 6.170490220789999e-07, "loss": 0.2515, "step": 6932 }, { "epoch": 0.44, "grad_norm": 1.0478404241618138, "learning_rate": 6.169486153357444e-07, "loss": 0.2009, "step": 6933 }, { "epoch": 0.44, "grad_norm": 0.40073446946851377, "learning_rate": 6.168482036032259e-07, "loss": 0.0367, "step": 6934 }, { "epoch": 0.44, "grad_norm": 0.6573471808166629, "learning_rate": 6.167477868857281e-07, "loss": 0.2296, "step": 6935 }, { "epoch": 0.44, "grad_norm": 0.43918478126204147, "learning_rate": 6.166473651875351e-07, "loss": 0.0706, "step": 6936 }, { "epoch": 0.44, "grad_norm": 1.002656753065787, "learning_rate": 6.165469385129309e-07, "loss": 0.1135, "step": 6937 }, { "epoch": 0.44, "grad_norm": 1.494979637287609, "learning_rate": 6.164465068662e-07, "loss": 0.4066, "step": 6938 }, { "epoch": 0.44, "grad_norm": 0.47883964075679697, "learning_rate": 6.163460702516271e-07, "loss": 0.1891, "step": 6939 }, { "epoch": 0.44, "grad_norm": 1.1409255996678267, "learning_rate": 6.162456286734969e-07, "loss": 0.3263, "step": 6940 }, { "epoch": 0.44, "grad_norm": 0.3621007983991172, "learning_rate": 6.161451821360947e-07, "loss": 0.1045, "step": 6941 }, { "epoch": 0.44, "grad_norm": 1.0862254194163052, "learning_rate": 6.160447306437054e-07, "loss": 0.2759, "step": 6942 }, { "epoch": 0.44, "grad_norm": 0.5946015766783351, "learning_rate": 6.159442742006147e-07, "loss": 0.0527, "step": 6943 }, { "epoch": 0.44, "grad_norm": 0.5582335389228525, "learning_rate": 6.158438128111081e-07, "loss": 0.0515, "step": 6944 }, { "epoch": 0.44, "grad_norm": 0.6744063246468665, "learning_rate": 6.157433464794716e-07, "loss": 0.0112, "step": 6945 }, { "epoch": 0.44, "grad_norm": 0.7471634695884373, "learning_rate": 6.156428752099912e-07, "loss": 0.1808, "step": 6946 }, { "epoch": 0.44, "grad_norm": 0.383769180786806, "learning_rate": 6.155423990069532e-07, "loss": 0.1272, "step": 6947 }, { "epoch": 0.44, "grad_norm": 1.464658661457626, "learning_rate": 6.154419178746443e-07, "loss": 0.2879, "step": 6948 }, { "epoch": 0.44, "grad_norm": 4.586826095802796, "learning_rate": 6.153414318173511e-07, "loss": 0.1035, "step": 6949 }, { "epoch": 0.44, "grad_norm": 0.42072790409513783, "learning_rate": 6.152409408393606e-07, "loss": 0.0064, "step": 6950 }, { "epoch": 0.44, "grad_norm": 0.3812237351074588, "learning_rate": 6.151404449449599e-07, "loss": 0.1688, "step": 6951 }, { "epoch": 0.44, "grad_norm": 1.0321606069137594, "learning_rate": 6.150399441384363e-07, "loss": 0.1237, "step": 6952 }, { "epoch": 0.44, "grad_norm": 1.606937099338738, "learning_rate": 6.149394384240775e-07, "loss": 0.205, "step": 6953 }, { "epoch": 0.44, "grad_norm": 5.982863396210204, "learning_rate": 6.148389278061711e-07, "loss": 0.2688, "step": 6954 }, { "epoch": 0.44, "grad_norm": 0.5434452744793399, "learning_rate": 6.147384122890052e-07, "loss": 0.281, "step": 6955 }, { "epoch": 0.44, "grad_norm": 2.0847305758437145, "learning_rate": 6.146378918768681e-07, "loss": 0.2153, "step": 6956 }, { "epoch": 0.44, "grad_norm": 0.8470400376818763, "learning_rate": 6.14537366574048e-07, "loss": 0.3103, "step": 6957 }, { "epoch": 0.44, "grad_norm": 0.7175605445892274, "learning_rate": 6.144368363848335e-07, "loss": 0.2604, "step": 6958 }, { "epoch": 0.44, "grad_norm": 0.565787560280558, "learning_rate": 6.143363013135136e-07, "loss": 0.2501, "step": 6959 }, { "epoch": 0.44, "grad_norm": 0.3774440405473363, "learning_rate": 6.142357613643773e-07, "loss": 0.2021, "step": 6960 }, { "epoch": 0.44, "grad_norm": 1.2299796206338582, "learning_rate": 6.141352165417137e-07, "loss": 0.1713, "step": 6961 }, { "epoch": 0.44, "grad_norm": 0.7901467738433083, "learning_rate": 6.140346668498124e-07, "loss": 0.1122, "step": 6962 }, { "epoch": 0.44, "grad_norm": 0.6819889165777708, "learning_rate": 6.139341122929629e-07, "loss": 0.1528, "step": 6963 }, { "epoch": 0.44, "grad_norm": 0.8651701321719657, "learning_rate": 6.13833552875455e-07, "loss": 0.2012, "step": 6964 }, { "epoch": 0.44, "grad_norm": 0.17737635653643177, "learning_rate": 6.137329886015791e-07, "loss": 0.019, "step": 6965 }, { "epoch": 0.44, "grad_norm": 1.2778729695799618, "learning_rate": 6.136324194756252e-07, "loss": 0.3287, "step": 6966 }, { "epoch": 0.44, "grad_norm": 0.5028356491136653, "learning_rate": 6.135318455018839e-07, "loss": 0.185, "step": 6967 }, { "epoch": 0.44, "grad_norm": 0.49877890473115016, "learning_rate": 6.134312666846459e-07, "loss": 0.1714, "step": 6968 }, { "epoch": 0.44, "grad_norm": 0.7981573156656027, "learning_rate": 6.13330683028202e-07, "loss": 0.0811, "step": 6969 }, { "epoch": 0.44, "grad_norm": 0.6005895831721726, "learning_rate": 6.132300945368433e-07, "loss": 0.3987, "step": 6970 }, { "epoch": 0.44, "grad_norm": 0.31192909204643715, "learning_rate": 6.131295012148612e-07, "loss": 0.2181, "step": 6971 }, { "epoch": 0.44, "grad_norm": 0.3545310906924588, "learning_rate": 6.130289030665468e-07, "loss": 0.12, "step": 6972 }, { "epoch": 0.44, "grad_norm": 1.025570772503253, "learning_rate": 6.129283000961926e-07, "loss": 0.3388, "step": 6973 }, { "epoch": 0.44, "grad_norm": 0.5429665518394462, "learning_rate": 6.1282769230809e-07, "loss": 0.2221, "step": 6974 }, { "epoch": 0.44, "grad_norm": 1.0744699445752983, "learning_rate": 6.127270797065312e-07, "loss": 0.0902, "step": 6975 }, { "epoch": 0.44, "grad_norm": 1.2929872500332653, "learning_rate": 6.126264622958086e-07, "loss": 0.038, "step": 6976 }, { "epoch": 0.44, "grad_norm": 1.7455266878558893, "learning_rate": 6.125258400802146e-07, "loss": 0.3655, "step": 6977 }, { "epoch": 0.44, "grad_norm": 2.627917352512731, "learning_rate": 6.124252130640423e-07, "loss": 0.0604, "step": 6978 }, { "epoch": 0.45, "grad_norm": 0.8291769579982922, "learning_rate": 6.123245812515843e-07, "loss": 0.2673, "step": 6979 }, { "epoch": 0.45, "grad_norm": 0.4631818470052978, "learning_rate": 6.122239446471338e-07, "loss": 0.1917, "step": 6980 }, { "epoch": 0.45, "grad_norm": 1.0010906980464314, "learning_rate": 6.121233032549842e-07, "loss": 0.0943, "step": 6981 }, { "epoch": 0.45, "grad_norm": 1.113879147230086, "learning_rate": 6.120226570794291e-07, "loss": 0.2011, "step": 6982 }, { "epoch": 0.45, "grad_norm": 0.1995621808630295, "learning_rate": 6.119220061247622e-07, "loss": 0.0945, "step": 6983 }, { "epoch": 0.45, "grad_norm": 0.8629359079521771, "learning_rate": 6.118213503952778e-07, "loss": 0.1741, "step": 6984 }, { "epoch": 0.45, "grad_norm": 0.7448026705895965, "learning_rate": 6.117206898952696e-07, "loss": 0.3602, "step": 6985 }, { "epoch": 0.45, "grad_norm": 1.197204476150808, "learning_rate": 6.116200246290322e-07, "loss": 0.3732, "step": 6986 }, { "epoch": 0.45, "grad_norm": 0.7836219455196711, "learning_rate": 6.115193546008601e-07, "loss": 0.2002, "step": 6987 }, { "epoch": 0.45, "grad_norm": 0.5205671280907647, "learning_rate": 6.114186798150482e-07, "loss": 0.2355, "step": 6988 }, { "epoch": 0.45, "grad_norm": 2.7519950190590827, "learning_rate": 6.113180002758915e-07, "loss": 0.2139, "step": 6989 }, { "epoch": 0.45, "grad_norm": 0.3892107135968623, "learning_rate": 6.112173159876851e-07, "loss": 0.0879, "step": 6990 }, { "epoch": 0.45, "grad_norm": 0.31456452934603335, "learning_rate": 6.111166269547243e-07, "loss": 0.0036, "step": 6991 }, { "epoch": 0.45, "grad_norm": 0.795319291289239, "learning_rate": 6.110159331813049e-07, "loss": 0.1771, "step": 6992 }, { "epoch": 0.45, "grad_norm": 3.76803851220041, "learning_rate": 6.109152346717228e-07, "loss": 0.2949, "step": 6993 }, { "epoch": 0.45, "grad_norm": 1.126107545010468, "learning_rate": 6.108145314302736e-07, "loss": 0.3448, "step": 6994 }, { "epoch": 0.45, "grad_norm": 0.7010751213890889, "learning_rate": 6.107138234612539e-07, "loss": 0.342, "step": 6995 }, { "epoch": 0.45, "grad_norm": 2.1487274465706263, "learning_rate": 6.106131107689598e-07, "loss": 0.1234, "step": 6996 }, { "epoch": 0.45, "grad_norm": 1.0047599593340455, "learning_rate": 6.105123933576881e-07, "loss": 0.2634, "step": 6997 }, { "epoch": 0.45, "grad_norm": 0.6502696156537707, "learning_rate": 6.104116712317355e-07, "loss": 0.468, "step": 6998 }, { "epoch": 0.45, "grad_norm": 0.27978610515785746, "learning_rate": 6.10310944395399e-07, "loss": 0.1076, "step": 6999 }, { "epoch": 0.45, "grad_norm": 0.5266859976788861, "learning_rate": 6.102102128529759e-07, "loss": 0.0514, "step": 7000 }, { "epoch": 0.45, "grad_norm": 0.49816105419925955, "learning_rate": 6.101094766087637e-07, "loss": 0.1085, "step": 7001 }, { "epoch": 0.45, "grad_norm": 0.8781912021438211, "learning_rate": 6.100087356670596e-07, "loss": 0.302, "step": 7002 }, { "epoch": 0.45, "grad_norm": 4.143626452489102, "learning_rate": 6.09907990032162e-07, "loss": 0.2635, "step": 7003 }, { "epoch": 0.45, "grad_norm": 0.9082295169453426, "learning_rate": 6.098072397083684e-07, "loss": 0.1063, "step": 7004 }, { "epoch": 0.45, "grad_norm": 0.3790899514979397, "learning_rate": 6.097064846999773e-07, "loss": 0.1614, "step": 7005 }, { "epoch": 0.45, "grad_norm": 0.5109056251055746, "learning_rate": 6.096057250112869e-07, "loss": 0.1268, "step": 7006 }, { "epoch": 0.45, "grad_norm": 0.3445931844183288, "learning_rate": 6.095049606465962e-07, "loss": 0.1876, "step": 7007 }, { "epoch": 0.45, "grad_norm": 1.6897933713277216, "learning_rate": 6.094041916102035e-07, "loss": 0.0207, "step": 7008 }, { "epoch": 0.45, "grad_norm": 0.701035327618761, "learning_rate": 6.093034179064081e-07, "loss": 0.1681, "step": 7009 }, { "epoch": 0.45, "grad_norm": 0.5862399618767468, "learning_rate": 6.092026395395091e-07, "loss": 0.282, "step": 7010 }, { "epoch": 0.45, "grad_norm": 0.9519031633352236, "learning_rate": 6.091018565138061e-07, "loss": 0.0982, "step": 7011 }, { "epoch": 0.45, "grad_norm": 7.167690752353747, "learning_rate": 6.090010688335987e-07, "loss": 0.1385, "step": 7012 }, { "epoch": 0.45, "grad_norm": 0.6973553591500395, "learning_rate": 6.089002765031864e-07, "loss": 0.22, "step": 7013 }, { "epoch": 0.45, "grad_norm": 0.19367020577117855, "learning_rate": 6.087994795268695e-07, "loss": 0.0833, "step": 7014 }, { "epoch": 0.45, "grad_norm": 1.5990229607046382, "learning_rate": 6.08698677908948e-07, "loss": 0.2395, "step": 7015 }, { "epoch": 0.45, "grad_norm": 0.8620603415141842, "learning_rate": 6.085978716537223e-07, "loss": 0.3999, "step": 7016 }, { "epoch": 0.45, "grad_norm": 1.049386554922625, "learning_rate": 6.084970607654931e-07, "loss": 0.1278, "step": 7017 }, { "epoch": 0.45, "grad_norm": 1.6789723515233934, "learning_rate": 6.083962452485614e-07, "loss": 0.0992, "step": 7018 }, { "epoch": 0.45, "grad_norm": 0.6013876531051031, "learning_rate": 6.082954251072278e-07, "loss": 0.068, "step": 7019 }, { "epoch": 0.45, "grad_norm": 0.7665096500380996, "learning_rate": 6.081946003457936e-07, "loss": 0.1732, "step": 7020 }, { "epoch": 0.45, "grad_norm": 0.6786080414216922, "learning_rate": 6.080937709685604e-07, "loss": 0.1515, "step": 7021 }, { "epoch": 0.45, "grad_norm": 0.6232705392964524, "learning_rate": 6.079929369798297e-07, "loss": 0.3126, "step": 7022 }, { "epoch": 0.45, "grad_norm": 0.9174738349237397, "learning_rate": 6.078920983839031e-07, "loss": 0.0919, "step": 7023 }, { "epoch": 0.45, "grad_norm": 0.3351450552038322, "learning_rate": 6.077912551850828e-07, "loss": 0.1825, "step": 7024 }, { "epoch": 0.45, "grad_norm": 0.23047285273482546, "learning_rate": 6.076904073876706e-07, "loss": 0.1486, "step": 7025 }, { "epoch": 0.45, "grad_norm": 9.8596867267407, "learning_rate": 6.075895549959693e-07, "loss": 0.0656, "step": 7026 }, { "epoch": 0.45, "grad_norm": 0.5862264008202591, "learning_rate": 6.074886980142813e-07, "loss": 0.2388, "step": 7027 }, { "epoch": 0.45, "grad_norm": 0.6502582586265484, "learning_rate": 6.073878364469094e-07, "loss": 0.3046, "step": 7028 }, { "epoch": 0.45, "grad_norm": 0.7072089060085425, "learning_rate": 6.072869702981565e-07, "loss": 0.016, "step": 7029 }, { "epoch": 0.45, "grad_norm": 1.9694642254886912, "learning_rate": 6.071860995723257e-07, "loss": 0.6971, "step": 7030 }, { "epoch": 0.45, "grad_norm": 0.6990240558170092, "learning_rate": 6.070852242737206e-07, "loss": 0.16, "step": 7031 }, { "epoch": 0.45, "grad_norm": 0.936223825646583, "learning_rate": 6.069843444066444e-07, "loss": 0.295, "step": 7032 }, { "epoch": 0.45, "grad_norm": 0.40226374749511673, "learning_rate": 6.06883459975401e-07, "loss": 0.1209, "step": 7033 }, { "epoch": 0.45, "grad_norm": 1.935690253055944, "learning_rate": 6.067825709842945e-07, "loss": 0.1124, "step": 7034 }, { "epoch": 0.45, "grad_norm": 1.2207454136067766, "learning_rate": 6.066816774376287e-07, "loss": 0.1075, "step": 7035 }, { "epoch": 0.45, "grad_norm": 0.5618052578226107, "learning_rate": 6.06580779339708e-07, "loss": 0.0529, "step": 7036 }, { "epoch": 0.45, "grad_norm": 0.7092968831146726, "learning_rate": 6.064798766948371e-07, "loss": 0.2881, "step": 7037 }, { "epoch": 0.45, "grad_norm": 4.862719814670639, "learning_rate": 6.063789695073208e-07, "loss": 0.1436, "step": 7038 }, { "epoch": 0.45, "grad_norm": 0.6966503669595614, "learning_rate": 6.062780577814636e-07, "loss": 0.2753, "step": 7039 }, { "epoch": 0.45, "grad_norm": 0.15349847192339625, "learning_rate": 6.061771415215708e-07, "loss": 0.0889, "step": 7040 }, { "epoch": 0.45, "grad_norm": 0.8643961482115704, "learning_rate": 6.060762207319479e-07, "loss": 0.1374, "step": 7041 }, { "epoch": 0.45, "grad_norm": 0.2582579644147097, "learning_rate": 6.059752954168999e-07, "loss": 0.0937, "step": 7042 }, { "epoch": 0.45, "grad_norm": 0.8432267169130003, "learning_rate": 6.058743655807331e-07, "loss": 0.2547, "step": 7043 }, { "epoch": 0.45, "grad_norm": 0.2976526130842025, "learning_rate": 6.057734312277526e-07, "loss": 0.0898, "step": 7044 }, { "epoch": 0.45, "grad_norm": 3.102839488835296, "learning_rate": 6.056724923622651e-07, "loss": 0.1107, "step": 7045 }, { "epoch": 0.45, "grad_norm": 0.550269931757731, "learning_rate": 6.055715489885768e-07, "loss": 0.0329, "step": 7046 }, { "epoch": 0.45, "grad_norm": 0.4707632456794082, "learning_rate": 6.054706011109938e-07, "loss": 0.0565, "step": 7047 }, { "epoch": 0.45, "grad_norm": 0.46532627867999804, "learning_rate": 6.05369648733823e-07, "loss": 0.0121, "step": 7048 }, { "epoch": 0.45, "grad_norm": 5.047816250916057, "learning_rate": 6.052686918613712e-07, "loss": 0.4253, "step": 7049 }, { "epoch": 0.45, "grad_norm": 1.0216356209529873, "learning_rate": 6.051677304979453e-07, "loss": 0.1661, "step": 7050 }, { "epoch": 0.45, "grad_norm": 0.7515596358866237, "learning_rate": 6.050667646478527e-07, "loss": 0.3816, "step": 7051 }, { "epoch": 0.45, "grad_norm": 0.5555317867772822, "learning_rate": 6.049657943154006e-07, "loss": 0.2741, "step": 7052 }, { "epoch": 0.45, "grad_norm": 0.45035019037585133, "learning_rate": 6.048648195048968e-07, "loss": 0.0255, "step": 7053 }, { "epoch": 0.45, "grad_norm": 1.3586966690743851, "learning_rate": 6.047638402206489e-07, "loss": 0.1486, "step": 7054 }, { "epoch": 0.45, "grad_norm": 0.7127962571969929, "learning_rate": 6.046628564669651e-07, "loss": 0.0519, "step": 7055 }, { "epoch": 0.45, "grad_norm": 1.1781048738549795, "learning_rate": 6.045618682481535e-07, "loss": 0.3443, "step": 7056 }, { "epoch": 0.45, "grad_norm": 2.4974924732471555, "learning_rate": 6.044608755685222e-07, "loss": 0.1785, "step": 7057 }, { "epoch": 0.45, "grad_norm": 0.9627407303530922, "learning_rate": 6.043598784323802e-07, "loss": 0.1179, "step": 7058 }, { "epoch": 0.45, "grad_norm": 0.8824047630303993, "learning_rate": 6.042588768440357e-07, "loss": 0.4079, "step": 7059 }, { "epoch": 0.45, "grad_norm": 0.7897017631778379, "learning_rate": 6.041578708077981e-07, "loss": 0.2406, "step": 7060 }, { "epoch": 0.45, "grad_norm": 0.4281677492674576, "learning_rate": 6.040568603279763e-07, "loss": 0.1699, "step": 7061 }, { "epoch": 0.45, "grad_norm": 0.3898733033420859, "learning_rate": 6.039558454088795e-07, "loss": 0.1781, "step": 7062 }, { "epoch": 0.45, "grad_norm": 0.5578960512714476, "learning_rate": 6.038548260548176e-07, "loss": 0.2769, "step": 7063 }, { "epoch": 0.45, "grad_norm": 1.7492669585363871, "learning_rate": 6.037538022700999e-07, "loss": 0.1844, "step": 7064 }, { "epoch": 0.45, "grad_norm": 1.6116049102028214, "learning_rate": 6.036527740590365e-07, "loss": 0.2459, "step": 7065 }, { "epoch": 0.45, "grad_norm": 1.2831282294351607, "learning_rate": 6.035517414259377e-07, "loss": 0.1085, "step": 7066 }, { "epoch": 0.45, "grad_norm": 1.0692156727939737, "learning_rate": 6.03450704375113e-07, "loss": 0.4134, "step": 7067 }, { "epoch": 0.45, "grad_norm": 0.9839753343140111, "learning_rate": 6.033496629108736e-07, "loss": 0.2069, "step": 7068 }, { "epoch": 0.45, "grad_norm": 0.9041922192505167, "learning_rate": 6.032486170375296e-07, "loss": 0.0116, "step": 7069 }, { "epoch": 0.45, "grad_norm": 0.4681185734971004, "learning_rate": 6.031475667593919e-07, "loss": 0.2982, "step": 7070 }, { "epoch": 0.45, "grad_norm": 0.7860744215186456, "learning_rate": 6.030465120807719e-07, "loss": 0.0097, "step": 7071 }, { "epoch": 0.45, "grad_norm": 0.9383937659706048, "learning_rate": 6.029454530059806e-07, "loss": 0.1902, "step": 7072 }, { "epoch": 0.45, "grad_norm": 1.718398069911204, "learning_rate": 6.028443895393291e-07, "loss": 0.3911, "step": 7073 }, { "epoch": 0.45, "grad_norm": 0.06919293034300165, "learning_rate": 6.027433216851294e-07, "loss": 0.0005, "step": 7074 }, { "epoch": 0.45, "grad_norm": 1.0320483799463922, "learning_rate": 6.026422494476929e-07, "loss": 0.1796, "step": 7075 }, { "epoch": 0.45, "grad_norm": 0.9449293296619714, "learning_rate": 6.025411728313317e-07, "loss": 0.1266, "step": 7076 }, { "epoch": 0.45, "grad_norm": 0.2993277087210431, "learning_rate": 6.02440091840358e-07, "loss": 0.1716, "step": 7077 }, { "epoch": 0.45, "grad_norm": 0.945913960885128, "learning_rate": 6.023390064790841e-07, "loss": 0.298, "step": 7078 }, { "epoch": 0.45, "grad_norm": 0.8939332327962871, "learning_rate": 6.022379167518225e-07, "loss": 0.0611, "step": 7079 }, { "epoch": 0.45, "grad_norm": 0.9663232671040488, "learning_rate": 6.021368226628857e-07, "loss": 0.4879, "step": 7080 }, { "epoch": 0.45, "grad_norm": 0.3242663789682058, "learning_rate": 6.020357242165868e-07, "loss": 0.1704, "step": 7081 }, { "epoch": 0.45, "grad_norm": 0.9455796030817295, "learning_rate": 6.019346214172388e-07, "loss": 0.1807, "step": 7082 }, { "epoch": 0.45, "grad_norm": 1.4402759090689443, "learning_rate": 6.018335142691548e-07, "loss": 0.0593, "step": 7083 }, { "epoch": 0.45, "grad_norm": 1.2060208908726957, "learning_rate": 6.017324027766486e-07, "loss": 0.2904, "step": 7084 }, { "epoch": 0.45, "grad_norm": 0.7129259771847446, "learning_rate": 6.016312869440334e-07, "loss": 0.4456, "step": 7085 }, { "epoch": 0.45, "grad_norm": 0.7854997394334815, "learning_rate": 6.015301667756233e-07, "loss": 0.1428, "step": 7086 }, { "epoch": 0.45, "grad_norm": 0.48994875507244606, "learning_rate": 6.014290422757322e-07, "loss": 0.2895, "step": 7087 }, { "epoch": 0.45, "grad_norm": 0.7294957042734611, "learning_rate": 6.013279134486742e-07, "loss": 0.0426, "step": 7088 }, { "epoch": 0.45, "grad_norm": 0.7434832182245137, "learning_rate": 6.012267802987636e-07, "loss": 0.1383, "step": 7089 }, { "epoch": 0.45, "grad_norm": 0.4636452503714737, "learning_rate": 6.011256428303152e-07, "loss": 0.1793, "step": 7090 }, { "epoch": 0.45, "grad_norm": 0.7988272816439137, "learning_rate": 6.010245010476436e-07, "loss": 0.3583, "step": 7091 }, { "epoch": 0.45, "grad_norm": 0.9552552379750733, "learning_rate": 6.009233549550636e-07, "loss": 0.3605, "step": 7092 }, { "epoch": 0.45, "grad_norm": 9.43371975585381, "learning_rate": 6.008222045568907e-07, "loss": 0.1882, "step": 7093 }, { "epoch": 0.45, "grad_norm": 1.3571109562706452, "learning_rate": 6.007210498574395e-07, "loss": 0.2746, "step": 7094 }, { "epoch": 0.45, "grad_norm": 0.4418232209930374, "learning_rate": 6.00619890861026e-07, "loss": 0.1772, "step": 7095 }, { "epoch": 0.45, "grad_norm": 5.088758666672563, "learning_rate": 6.005187275719657e-07, "loss": 0.0755, "step": 7096 }, { "epoch": 0.45, "grad_norm": 0.5687223310030136, "learning_rate": 6.004175599945743e-07, "loss": 0.1623, "step": 7097 }, { "epoch": 0.45, "grad_norm": 1.01592613710664, "learning_rate": 6.003163881331681e-07, "loss": 0.2278, "step": 7098 }, { "epoch": 0.45, "grad_norm": 0.3370256316854185, "learning_rate": 6.002152119920629e-07, "loss": 0.1968, "step": 7099 }, { "epoch": 0.45, "grad_norm": 0.4808284341035867, "learning_rate": 6.001140315755755e-07, "loss": 0.2689, "step": 7100 }, { "epoch": 0.45, "grad_norm": 8.011765000959112, "learning_rate": 6.000128468880222e-07, "loss": 0.3323, "step": 7101 }, { "epoch": 0.45, "grad_norm": 0.8513305845378742, "learning_rate": 5.999116579337198e-07, "loss": 0.2085, "step": 7102 }, { "epoch": 0.45, "grad_norm": 0.6186146638521367, "learning_rate": 5.998104647169852e-07, "loss": 0.0298, "step": 7103 }, { "epoch": 0.45, "grad_norm": 0.7770787457397188, "learning_rate": 5.997092672421356e-07, "loss": 0.2867, "step": 7104 }, { "epoch": 0.45, "grad_norm": 0.6759549912096738, "learning_rate": 5.996080655134881e-07, "loss": 0.3032, "step": 7105 }, { "epoch": 0.45, "grad_norm": 1.375055395363622, "learning_rate": 5.995068595353604e-07, "loss": 0.0204, "step": 7106 }, { "epoch": 0.45, "grad_norm": 0.8979471716423942, "learning_rate": 5.994056493120699e-07, "loss": 0.2618, "step": 7107 }, { "epoch": 0.45, "grad_norm": 0.5738377713029338, "learning_rate": 5.993044348479347e-07, "loss": 0.1037, "step": 7108 }, { "epoch": 0.45, "grad_norm": 0.5764327562948564, "learning_rate": 5.992032161472726e-07, "loss": 0.1666, "step": 7109 }, { "epoch": 0.45, "grad_norm": 1.5461270918707353, "learning_rate": 5.99101993214402e-07, "loss": 0.3417, "step": 7110 }, { "epoch": 0.45, "grad_norm": 1.4657147448614762, "learning_rate": 5.99000766053641e-07, "loss": 0.1103, "step": 7111 }, { "epoch": 0.45, "grad_norm": 0.8157136799194386, "learning_rate": 5.988995346693084e-07, "loss": 0.1337, "step": 7112 }, { "epoch": 0.45, "grad_norm": 0.46844818672747407, "learning_rate": 5.987982990657228e-07, "loss": 0.1807, "step": 7113 }, { "epoch": 0.45, "grad_norm": 1.8863593073604794, "learning_rate": 5.986970592472033e-07, "loss": 0.3098, "step": 7114 }, { "epoch": 0.45, "grad_norm": 0.5580882911711244, "learning_rate": 5.985958152180686e-07, "loss": 0.2203, "step": 7115 }, { "epoch": 0.45, "grad_norm": 0.9407780401334193, "learning_rate": 5.984945669826382e-07, "loss": 0.2465, "step": 7116 }, { "epoch": 0.45, "grad_norm": 0.699661212881519, "learning_rate": 5.983933145452318e-07, "loss": 0.2772, "step": 7117 }, { "epoch": 0.45, "grad_norm": 0.24115919400100697, "learning_rate": 5.982920579101687e-07, "loss": 0.0113, "step": 7118 }, { "epoch": 0.45, "grad_norm": 0.5354065445651078, "learning_rate": 5.981907970817688e-07, "loss": 0.1612, "step": 7119 }, { "epoch": 0.45, "grad_norm": 0.6437441543164536, "learning_rate": 5.980895320643521e-07, "loss": 0.2031, "step": 7120 }, { "epoch": 0.45, "grad_norm": 0.7059838005150271, "learning_rate": 5.979882628622389e-07, "loss": 0.4494, "step": 7121 }, { "epoch": 0.45, "grad_norm": 0.31758025093563497, "learning_rate": 5.978869894797493e-07, "loss": 0.1973, "step": 7122 }, { "epoch": 0.45, "grad_norm": 0.6120633224019312, "learning_rate": 5.97785711921204e-07, "loss": 0.3558, "step": 7123 }, { "epoch": 0.45, "grad_norm": 1.312367372031094, "learning_rate": 5.976844301909236e-07, "loss": 0.2304, "step": 7124 }, { "epoch": 0.45, "grad_norm": 0.3313741831082385, "learning_rate": 5.975831442932291e-07, "loss": 0.1006, "step": 7125 }, { "epoch": 0.45, "grad_norm": 0.5129312842561959, "learning_rate": 5.974818542324414e-07, "loss": 0.22, "step": 7126 }, { "epoch": 0.45, "grad_norm": 0.32401724292769385, "learning_rate": 5.97380560012882e-07, "loss": 0.2122, "step": 7127 }, { "epoch": 0.45, "grad_norm": 0.4625502269175985, "learning_rate": 5.972792616388721e-07, "loss": 0.055, "step": 7128 }, { "epoch": 0.45, "grad_norm": 0.9942322616535594, "learning_rate": 5.971779591147332e-07, "loss": 0.1874, "step": 7129 }, { "epoch": 0.45, "grad_norm": 0.4563264253733514, "learning_rate": 5.970766524447875e-07, "loss": 0.087, "step": 7130 }, { "epoch": 0.45, "grad_norm": 1.2157699901808448, "learning_rate": 5.969753416333564e-07, "loss": 0.0866, "step": 7131 }, { "epoch": 0.45, "grad_norm": 0.8444079709273857, "learning_rate": 5.968740266847623e-07, "loss": 0.1228, "step": 7132 }, { "epoch": 0.45, "grad_norm": 0.34704186053735525, "learning_rate": 5.967727076033274e-07, "loss": 0.0757, "step": 7133 }, { "epoch": 0.45, "grad_norm": 1.3237010936906624, "learning_rate": 5.966713843933746e-07, "loss": 0.0512, "step": 7134 }, { "epoch": 0.46, "grad_norm": 5.932547110155241, "learning_rate": 5.965700570592261e-07, "loss": 0.245, "step": 7135 }, { "epoch": 0.46, "grad_norm": 0.7496287466544687, "learning_rate": 5.964687256052046e-07, "loss": 0.2027, "step": 7136 }, { "epoch": 0.46, "grad_norm": 0.30193107065623703, "learning_rate": 5.963673900356335e-07, "loss": 0.092, "step": 7137 }, { "epoch": 0.46, "grad_norm": 2.332263676956981, "learning_rate": 5.962660503548358e-07, "loss": 0.1935, "step": 7138 }, { "epoch": 0.46, "grad_norm": 0.6921697636905982, "learning_rate": 5.961647065671349e-07, "loss": 0.2038, "step": 7139 }, { "epoch": 0.46, "grad_norm": 1.7691815576656338, "learning_rate": 5.960633586768542e-07, "loss": 0.1293, "step": 7140 }, { "epoch": 0.46, "grad_norm": 0.5950655910490383, "learning_rate": 5.959620066883175e-07, "loss": 0.163, "step": 7141 }, { "epoch": 0.46, "grad_norm": 0.6732396003329623, "learning_rate": 5.958606506058488e-07, "loss": 0.1545, "step": 7142 }, { "epoch": 0.46, "grad_norm": 0.5869733926350481, "learning_rate": 5.95759290433772e-07, "loss": 0.0886, "step": 7143 }, { "epoch": 0.46, "grad_norm": 0.8168037275744525, "learning_rate": 5.956579261764115e-07, "loss": 0.3139, "step": 7144 }, { "epoch": 0.46, "grad_norm": 0.32485987063036015, "learning_rate": 5.955565578380914e-07, "loss": 0.1552, "step": 7145 }, { "epoch": 0.46, "grad_norm": 0.31954013098482065, "learning_rate": 5.954551854231365e-07, "loss": 0.086, "step": 7146 }, { "epoch": 0.46, "grad_norm": 0.2954219369681322, "learning_rate": 5.953538089358713e-07, "loss": 0.0162, "step": 7147 }, { "epoch": 0.46, "grad_norm": 0.66192513564281, "learning_rate": 5.952524283806214e-07, "loss": 0.2002, "step": 7148 }, { "epoch": 0.46, "grad_norm": 0.6250885992829237, "learning_rate": 5.95151043761711e-07, "loss": 0.2374, "step": 7149 }, { "epoch": 0.46, "grad_norm": 0.30212655217801865, "learning_rate": 5.950496550834659e-07, "loss": 0.1873, "step": 7150 }, { "epoch": 0.46, "grad_norm": 1.091021040170508, "learning_rate": 5.949482623502116e-07, "loss": 0.3885, "step": 7151 }, { "epoch": 0.46, "grad_norm": 0.6016881690483314, "learning_rate": 5.948468655662734e-07, "loss": 0.311, "step": 7152 }, { "epoch": 0.46, "grad_norm": 0.8520165271748031, "learning_rate": 5.947454647359774e-07, "loss": 0.4149, "step": 7153 }, { "epoch": 0.46, "grad_norm": 0.6822457209307743, "learning_rate": 5.946440598636492e-07, "loss": 0.1621, "step": 7154 }, { "epoch": 0.46, "grad_norm": 0.9286637757220354, "learning_rate": 5.945426509536152e-07, "loss": 0.1993, "step": 7155 }, { "epoch": 0.46, "grad_norm": 0.4551563553112895, "learning_rate": 5.944412380102017e-07, "loss": 0.1555, "step": 7156 }, { "epoch": 0.46, "grad_norm": 1.549513819768842, "learning_rate": 5.943398210377352e-07, "loss": 0.317, "step": 7157 }, { "epoch": 0.46, "grad_norm": 0.8022045638220008, "learning_rate": 5.942384000405423e-07, "loss": 0.2718, "step": 7158 }, { "epoch": 0.46, "grad_norm": 0.5936247476974245, "learning_rate": 5.941369750229497e-07, "loss": 0.2676, "step": 7159 }, { "epoch": 0.46, "grad_norm": 0.43999162216149557, "learning_rate": 5.940355459892844e-07, "loss": 0.3624, "step": 7160 }, { "epoch": 0.46, "grad_norm": 2.844207234831347, "learning_rate": 5.939341129438738e-07, "loss": 0.054, "step": 7161 }, { "epoch": 0.46, "grad_norm": 0.5082391569111769, "learning_rate": 5.938326758910453e-07, "loss": 0.2208, "step": 7162 }, { "epoch": 0.46, "grad_norm": 1.172219731786886, "learning_rate": 5.93731234835126e-07, "loss": 0.3387, "step": 7163 }, { "epoch": 0.46, "grad_norm": 0.4864323174163239, "learning_rate": 5.936297897804439e-07, "loss": 0.2884, "step": 7164 }, { "epoch": 0.46, "grad_norm": 0.4171003748458937, "learning_rate": 5.935283407313268e-07, "loss": 0.1463, "step": 7165 }, { "epoch": 0.46, "grad_norm": 1.2970815946203582, "learning_rate": 5.934268876921025e-07, "loss": 0.3739, "step": 7166 }, { "epoch": 0.46, "grad_norm": 0.5236303770620295, "learning_rate": 5.933254306670994e-07, "loss": 0.1736, "step": 7167 }, { "epoch": 0.46, "grad_norm": 0.5700171082654275, "learning_rate": 5.932239696606457e-07, "loss": 0.2731, "step": 7168 }, { "epoch": 0.46, "grad_norm": 0.43685907066280566, "learning_rate": 5.931225046770703e-07, "loss": 0.2653, "step": 7169 }, { "epoch": 0.46, "grad_norm": 2.8688550234332038, "learning_rate": 5.930210357207015e-07, "loss": 0.2695, "step": 7170 }, { "epoch": 0.46, "grad_norm": 1.0139315492143588, "learning_rate": 5.929195627958683e-07, "loss": 0.1514, "step": 7171 }, { "epoch": 0.46, "grad_norm": 1.563002708663235, "learning_rate": 5.928180859068999e-07, "loss": 0.1547, "step": 7172 }, { "epoch": 0.46, "grad_norm": 1.5001619767205903, "learning_rate": 5.927166050581252e-07, "loss": 0.033, "step": 7173 }, { "epoch": 0.46, "grad_norm": 0.6917706009406938, "learning_rate": 5.926151202538739e-07, "loss": 0.1704, "step": 7174 }, { "epoch": 0.46, "grad_norm": 0.9353287183536301, "learning_rate": 5.925136314984753e-07, "loss": 0.2011, "step": 7175 }, { "epoch": 0.46, "grad_norm": 0.6112538646341721, "learning_rate": 5.924121387962593e-07, "loss": 0.0681, "step": 7176 }, { "epoch": 0.46, "grad_norm": 0.858619915778985, "learning_rate": 5.923106421515556e-07, "loss": 0.3036, "step": 7177 }, { "epoch": 0.46, "grad_norm": 5.354676634271242, "learning_rate": 5.922091415686944e-07, "loss": 0.0096, "step": 7178 }, { "epoch": 0.46, "grad_norm": 1.2571455296787604, "learning_rate": 5.921076370520057e-07, "loss": 0.1193, "step": 7179 }, { "epoch": 0.46, "grad_norm": 0.736715975373125, "learning_rate": 5.920061286058202e-07, "loss": 0.1809, "step": 7180 }, { "epoch": 0.46, "grad_norm": 7.068782573052699, "learning_rate": 5.919046162344683e-07, "loss": 0.0239, "step": 7181 }, { "epoch": 0.46, "grad_norm": 1.3297467235529592, "learning_rate": 5.918030999422808e-07, "loss": 0.0886, "step": 7182 }, { "epoch": 0.46, "grad_norm": 0.7417717017004508, "learning_rate": 5.917015797335882e-07, "loss": 0.1744, "step": 7183 }, { "epoch": 0.46, "grad_norm": 1.0933639592682791, "learning_rate": 5.916000556127221e-07, "loss": 0.1144, "step": 7184 }, { "epoch": 0.46, "grad_norm": 0.17164216430584833, "learning_rate": 5.914985275840135e-07, "loss": 0.0051, "step": 7185 }, { "epoch": 0.46, "grad_norm": 0.6620853225118942, "learning_rate": 5.913969956517936e-07, "loss": 0.1639, "step": 7186 }, { "epoch": 0.46, "grad_norm": 1.0864099238793041, "learning_rate": 5.912954598203943e-07, "loss": 0.3266, "step": 7187 }, { "epoch": 0.46, "grad_norm": 0.6563817289774813, "learning_rate": 5.91193920094147e-07, "loss": 0.3287, "step": 7188 }, { "epoch": 0.46, "grad_norm": 0.7480545118849424, "learning_rate": 5.910923764773841e-07, "loss": 0.0214, "step": 7189 }, { "epoch": 0.46, "grad_norm": 0.7998208232146227, "learning_rate": 5.90990828974437e-07, "loss": 0.3755, "step": 7190 }, { "epoch": 0.46, "grad_norm": 0.85748334831464, "learning_rate": 5.908892775896383e-07, "loss": 0.0507, "step": 7191 }, { "epoch": 0.46, "grad_norm": 1.1970218213760373, "learning_rate": 5.907877223273202e-07, "loss": 0.2678, "step": 7192 }, { "epoch": 0.46, "grad_norm": 1.2924320625986172, "learning_rate": 5.906861631918155e-07, "loss": 0.2574, "step": 7193 }, { "epoch": 0.46, "grad_norm": 0.7987424260681844, "learning_rate": 5.905846001874566e-07, "loss": 0.1193, "step": 7194 }, { "epoch": 0.46, "grad_norm": 0.6319274213415333, "learning_rate": 5.904830333185768e-07, "loss": 0.1224, "step": 7195 }, { "epoch": 0.46, "grad_norm": 0.3711639795844755, "learning_rate": 5.903814625895088e-07, "loss": 0.0397, "step": 7196 }, { "epoch": 0.46, "grad_norm": 0.3745274574843632, "learning_rate": 5.902798880045858e-07, "loss": 0.0753, "step": 7197 }, { "epoch": 0.46, "grad_norm": 4.56204350914102, "learning_rate": 5.901783095681414e-07, "loss": 0.0599, "step": 7198 }, { "epoch": 0.46, "grad_norm": 1.1506934707785388, "learning_rate": 5.900767272845091e-07, "loss": 0.212, "step": 7199 }, { "epoch": 0.46, "grad_norm": 0.5970370856193812, "learning_rate": 5.899751411580224e-07, "loss": 0.2976, "step": 7200 }, { "epoch": 0.46, "grad_norm": 0.9259827639952576, "learning_rate": 5.898735511930155e-07, "loss": 0.2, "step": 7201 }, { "epoch": 0.46, "grad_norm": 0.46751645919753676, "learning_rate": 5.89771957393822e-07, "loss": 0.2628, "step": 7202 }, { "epoch": 0.46, "grad_norm": 0.8725741363156909, "learning_rate": 5.896703597647764e-07, "loss": 0.2463, "step": 7203 }, { "epoch": 0.46, "grad_norm": 1.4982264543897335, "learning_rate": 5.89568758310213e-07, "loss": 0.1472, "step": 7204 }, { "epoch": 0.46, "grad_norm": 1.1028605339442303, "learning_rate": 5.894671530344664e-07, "loss": 0.193, "step": 7205 }, { "epoch": 0.46, "grad_norm": 0.5001892897512012, "learning_rate": 5.893655439418711e-07, "loss": 0.088, "step": 7206 }, { "epoch": 0.46, "grad_norm": 9.022140920096918, "learning_rate": 5.892639310367622e-07, "loss": 0.1458, "step": 7207 }, { "epoch": 0.46, "grad_norm": 14.497112180372172, "learning_rate": 5.891623143234744e-07, "loss": 0.189, "step": 7208 }, { "epoch": 0.46, "grad_norm": 0.30387165766925095, "learning_rate": 5.89060693806343e-07, "loss": 0.0739, "step": 7209 }, { "epoch": 0.46, "grad_norm": 1.0157909781768688, "learning_rate": 5.889590694897035e-07, "loss": 0.5586, "step": 7210 }, { "epoch": 0.46, "grad_norm": 1.1583784876385135, "learning_rate": 5.888574413778913e-07, "loss": 0.3272, "step": 7211 }, { "epoch": 0.46, "grad_norm": 0.7133214714640085, "learning_rate": 5.88755809475242e-07, "loss": 0.1663, "step": 7212 }, { "epoch": 0.46, "grad_norm": 0.5133567316724253, "learning_rate": 5.886541737860912e-07, "loss": 0.1052, "step": 7213 }, { "epoch": 0.46, "grad_norm": 17.285625438249102, "learning_rate": 5.885525343147754e-07, "loss": 0.2791, "step": 7214 }, { "epoch": 0.46, "grad_norm": 3.1601010633523465, "learning_rate": 5.884508910656302e-07, "loss": 0.1034, "step": 7215 }, { "epoch": 0.46, "grad_norm": 1.7205234624779004, "learning_rate": 5.883492440429925e-07, "loss": 0.4425, "step": 7216 }, { "epoch": 0.46, "grad_norm": 0.9092036765999852, "learning_rate": 5.882475932511984e-07, "loss": 0.2444, "step": 7217 }, { "epoch": 0.46, "grad_norm": 1.966331431955261, "learning_rate": 5.881459386945845e-07, "loss": 0.1008, "step": 7218 }, { "epoch": 0.46, "grad_norm": 0.7241359638013197, "learning_rate": 5.880442803774877e-07, "loss": 0.0397, "step": 7219 }, { "epoch": 0.46, "grad_norm": 0.7865950024944246, "learning_rate": 5.879426183042448e-07, "loss": 0.3198, "step": 7220 }, { "epoch": 0.46, "grad_norm": 3.2668959117213308, "learning_rate": 5.878409524791929e-07, "loss": 0.3804, "step": 7221 }, { "epoch": 0.46, "grad_norm": 0.6940116579350895, "learning_rate": 5.877392829066697e-07, "loss": 0.1352, "step": 7222 }, { "epoch": 0.46, "grad_norm": 0.7889425403151112, "learning_rate": 5.876376095910122e-07, "loss": 0.1604, "step": 7223 }, { "epoch": 0.46, "grad_norm": 0.13743757225120593, "learning_rate": 5.87535932536558e-07, "loss": 0.0081, "step": 7224 }, { "epoch": 0.46, "grad_norm": 0.3539409891951253, "learning_rate": 5.874342517476451e-07, "loss": 0.1702, "step": 7225 }, { "epoch": 0.46, "grad_norm": 1.3253394447168811, "learning_rate": 5.873325672286112e-07, "loss": 0.1449, "step": 7226 }, { "epoch": 0.46, "grad_norm": 0.1508727438993049, "learning_rate": 5.872308789837943e-07, "loss": 0.0957, "step": 7227 }, { "epoch": 0.46, "grad_norm": 0.37757411481511055, "learning_rate": 5.871291870175328e-07, "loss": 0.1251, "step": 7228 }, { "epoch": 0.46, "grad_norm": 0.16487198167342196, "learning_rate": 5.87027491334165e-07, "loss": 0.0046, "step": 7229 }, { "epoch": 0.46, "grad_norm": 0.6371992172879106, "learning_rate": 5.869257919380297e-07, "loss": 0.124, "step": 7230 }, { "epoch": 0.46, "grad_norm": 0.9435011963184733, "learning_rate": 5.868240888334652e-07, "loss": 0.2358, "step": 7231 }, { "epoch": 0.46, "grad_norm": 0.6291321543008145, "learning_rate": 5.867223820248105e-07, "loss": 0.2749, "step": 7232 }, { "epoch": 0.46, "grad_norm": 0.6028154079885149, "learning_rate": 5.866206715164047e-07, "loss": 0.2447, "step": 7233 }, { "epoch": 0.46, "grad_norm": 0.243144110217354, "learning_rate": 5.86518957312587e-07, "loss": 0.0919, "step": 7234 }, { "epoch": 0.46, "grad_norm": 1.7366644013030765, "learning_rate": 5.864172394176965e-07, "loss": 0.1285, "step": 7235 }, { "epoch": 0.46, "grad_norm": 1.3381590499033993, "learning_rate": 5.86315517836073e-07, "loss": 0.1884, "step": 7236 }, { "epoch": 0.46, "grad_norm": 0.9461233417057059, "learning_rate": 5.862137925720559e-07, "loss": 0.1757, "step": 7237 }, { "epoch": 0.46, "grad_norm": 0.3710231970881075, "learning_rate": 5.861120636299851e-07, "loss": 0.2281, "step": 7238 }, { "epoch": 0.46, "grad_norm": 0.6179070684772522, "learning_rate": 5.860103310142005e-07, "loss": 0.1681, "step": 7239 }, { "epoch": 0.46, "grad_norm": 1.0201386018154825, "learning_rate": 5.859085947290423e-07, "loss": 0.3028, "step": 7240 }, { "epoch": 0.46, "grad_norm": 0.8091902911533021, "learning_rate": 5.858068547788509e-07, "loss": 0.0124, "step": 7241 }, { "epoch": 0.46, "grad_norm": 1.3980271825191142, "learning_rate": 5.857051111679664e-07, "loss": 0.3732, "step": 7242 }, { "epoch": 0.46, "grad_norm": 0.8013402975837172, "learning_rate": 5.856033639007297e-07, "loss": 0.2799, "step": 7243 }, { "epoch": 0.46, "grad_norm": 0.1687425807823621, "learning_rate": 5.855016129814815e-07, "loss": 0.0311, "step": 7244 }, { "epoch": 0.46, "grad_norm": 1.3294547064513318, "learning_rate": 5.853998584145624e-07, "loss": 0.2819, "step": 7245 }, { "epoch": 0.46, "grad_norm": 1.3755901930805092, "learning_rate": 5.852981002043138e-07, "loss": 0.3118, "step": 7246 }, { "epoch": 0.46, "grad_norm": 1.289201689858387, "learning_rate": 5.851963383550766e-07, "loss": 0.106, "step": 7247 }, { "epoch": 0.46, "grad_norm": 0.5300436654021281, "learning_rate": 5.850945728711925e-07, "loss": 0.1291, "step": 7248 }, { "epoch": 0.46, "grad_norm": 0.6562759244648393, "learning_rate": 5.849928037570028e-07, "loss": 0.1758, "step": 7249 }, { "epoch": 0.46, "grad_norm": 4.098462470269909, "learning_rate": 5.848910310168493e-07, "loss": 0.1566, "step": 7250 }, { "epoch": 0.46, "grad_norm": 0.8302393051115002, "learning_rate": 5.847892546550737e-07, "loss": 0.128, "step": 7251 }, { "epoch": 0.46, "grad_norm": 0.8359880903191275, "learning_rate": 5.84687474676018e-07, "loss": 0.3108, "step": 7252 }, { "epoch": 0.46, "grad_norm": 1.4437891175800763, "learning_rate": 5.845856910840245e-07, "loss": 0.2676, "step": 7253 }, { "epoch": 0.46, "grad_norm": 0.9646235215281507, "learning_rate": 5.844839038834353e-07, "loss": 0.0868, "step": 7254 }, { "epoch": 0.46, "grad_norm": 0.6620742954436339, "learning_rate": 5.84382113078593e-07, "loss": 0.2169, "step": 7255 }, { "epoch": 0.46, "grad_norm": 0.4860674138148537, "learning_rate": 5.8428031867384e-07, "loss": 0.1432, "step": 7256 }, { "epoch": 0.46, "grad_norm": 0.5606129692529753, "learning_rate": 5.841785206735192e-07, "loss": 0.1234, "step": 7257 }, { "epoch": 0.46, "grad_norm": 1.094353071114604, "learning_rate": 5.840767190819736e-07, "loss": 0.2795, "step": 7258 }, { "epoch": 0.46, "grad_norm": 0.9150744126339434, "learning_rate": 5.839749139035461e-07, "loss": 0.191, "step": 7259 }, { "epoch": 0.46, "grad_norm": 1.3703948391791727, "learning_rate": 5.8387310514258e-07, "loss": 0.0749, "step": 7260 }, { "epoch": 0.46, "grad_norm": 0.5517259013873832, "learning_rate": 5.837712928034187e-07, "loss": 0.2893, "step": 7261 }, { "epoch": 0.46, "grad_norm": 1.0500803493405015, "learning_rate": 5.836694768904054e-07, "loss": 0.3375, "step": 7262 }, { "epoch": 0.46, "grad_norm": 2.9423254492124644, "learning_rate": 5.835676574078842e-07, "loss": 0.075, "step": 7263 }, { "epoch": 0.46, "grad_norm": 0.7692318263987405, "learning_rate": 5.834658343601987e-07, "loss": 0.4155, "step": 7264 }, { "epoch": 0.46, "grad_norm": 0.9579505383096613, "learning_rate": 5.833640077516929e-07, "loss": 0.1745, "step": 7265 }, { "epoch": 0.46, "grad_norm": 1.2862268054084534, "learning_rate": 5.832621775867109e-07, "loss": 0.1967, "step": 7266 }, { "epoch": 0.46, "grad_norm": 0.6713444781147704, "learning_rate": 5.831603438695971e-07, "loss": 0.3673, "step": 7267 }, { "epoch": 0.46, "grad_norm": 1.3249779682100575, "learning_rate": 5.830585066046958e-07, "loss": 0.4114, "step": 7268 }, { "epoch": 0.46, "grad_norm": 0.7909573625344419, "learning_rate": 5.829566657963517e-07, "loss": 0.0779, "step": 7269 }, { "epoch": 0.46, "grad_norm": 1.3061708633493678, "learning_rate": 5.828548214489095e-07, "loss": 0.1772, "step": 7270 }, { "epoch": 0.46, "grad_norm": 3.2316863525602373, "learning_rate": 5.82752973566714e-07, "loss": 0.1857, "step": 7271 }, { "epoch": 0.46, "grad_norm": 0.3739109090293889, "learning_rate": 5.826511221541104e-07, "loss": 0.0953, "step": 7272 }, { "epoch": 0.46, "grad_norm": 2.657124797840211, "learning_rate": 5.825492672154437e-07, "loss": 0.2445, "step": 7273 }, { "epoch": 0.46, "grad_norm": 0.568751310419939, "learning_rate": 5.824474087550593e-07, "loss": 0.1064, "step": 7274 }, { "epoch": 0.46, "grad_norm": 0.5527706509889663, "learning_rate": 5.823455467773026e-07, "loss": 0.3305, "step": 7275 }, { "epoch": 0.46, "grad_norm": 0.7951595167062507, "learning_rate": 5.822436812865194e-07, "loss": 0.0214, "step": 7276 }, { "epoch": 0.46, "grad_norm": 4.654831699863176, "learning_rate": 5.821418122870556e-07, "loss": 0.0929, "step": 7277 }, { "epoch": 0.46, "grad_norm": 0.8192238158820631, "learning_rate": 5.820399397832568e-07, "loss": 0.399, "step": 7278 }, { "epoch": 0.46, "grad_norm": 0.6710596774381883, "learning_rate": 5.819380637794693e-07, "loss": 0.2271, "step": 7279 }, { "epoch": 0.46, "grad_norm": 1.0295387864219177, "learning_rate": 5.818361842800392e-07, "loss": 0.2058, "step": 7280 }, { "epoch": 0.46, "grad_norm": 0.4595757345400026, "learning_rate": 5.817343012893131e-07, "loss": 0.0978, "step": 7281 }, { "epoch": 0.46, "grad_norm": 1.1208460280131833, "learning_rate": 5.816324148116374e-07, "loss": 0.2242, "step": 7282 }, { "epoch": 0.46, "grad_norm": 0.7370933209663031, "learning_rate": 5.815305248513587e-07, "loss": 0.209, "step": 7283 }, { "epoch": 0.46, "grad_norm": 1.1254594372749078, "learning_rate": 5.814286314128238e-07, "loss": 0.1657, "step": 7284 }, { "epoch": 0.46, "grad_norm": 1.5784525671144114, "learning_rate": 5.8132673450038e-07, "loss": 0.0118, "step": 7285 }, { "epoch": 0.46, "grad_norm": 1.0887737723803113, "learning_rate": 5.812248341183741e-07, "loss": 0.1122, "step": 7286 }, { "epoch": 0.46, "grad_norm": 0.3398571182371665, "learning_rate": 5.811229302711536e-07, "loss": 0.0988, "step": 7287 }, { "epoch": 0.46, "grad_norm": 0.29411231775771174, "learning_rate": 5.810210229630657e-07, "loss": 0.029, "step": 7288 }, { "epoch": 0.46, "grad_norm": 0.6714221210862924, "learning_rate": 5.809191121984582e-07, "loss": 0.0151, "step": 7289 }, { "epoch": 0.46, "grad_norm": 0.7738093846839272, "learning_rate": 5.808171979816786e-07, "loss": 0.4841, "step": 7290 }, { "epoch": 0.46, "grad_norm": 1.2247291459538128, "learning_rate": 5.80715280317075e-07, "loss": 0.1818, "step": 7291 }, { "epoch": 0.47, "grad_norm": 0.17260494263047638, "learning_rate": 5.80613359208995e-07, "loss": 0.1391, "step": 7292 }, { "epoch": 0.47, "grad_norm": 14.147421161271533, "learning_rate": 5.805114346617873e-07, "loss": 0.0911, "step": 7293 }, { "epoch": 0.47, "grad_norm": 0.350897262005696, "learning_rate": 5.804095066797999e-07, "loss": 0.1607, "step": 7294 }, { "epoch": 0.47, "grad_norm": 3.4318251929458587, "learning_rate": 5.803075752673812e-07, "loss": 0.025, "step": 7295 }, { "epoch": 0.47, "grad_norm": 0.6625797293507669, "learning_rate": 5.802056404288801e-07, "loss": 0.009, "step": 7296 }, { "epoch": 0.47, "grad_norm": 1.2150155533447429, "learning_rate": 5.80103702168645e-07, "loss": 0.348, "step": 7297 }, { "epoch": 0.47, "grad_norm": 0.7820606372820846, "learning_rate": 5.80001760491025e-07, "loss": 0.3305, "step": 7298 }, { "epoch": 0.47, "grad_norm": 0.3395156167890347, "learning_rate": 5.798998154003691e-07, "loss": 0.1866, "step": 7299 }, { "epoch": 0.47, "grad_norm": 1.583684948082188, "learning_rate": 5.797978669010264e-07, "loss": 0.1602, "step": 7300 }, { "epoch": 0.47, "grad_norm": 0.7808833631802821, "learning_rate": 5.796959149973463e-07, "loss": 0.2628, "step": 7301 }, { "epoch": 0.47, "grad_norm": 0.4931297365788696, "learning_rate": 5.795939596936782e-07, "loss": 0.0382, "step": 7302 }, { "epoch": 0.47, "grad_norm": 0.46770785079811705, "learning_rate": 5.794920009943719e-07, "loss": 0.1639, "step": 7303 }, { "epoch": 0.47, "grad_norm": 0.5700771055707824, "learning_rate": 5.793900389037769e-07, "loss": 0.2143, "step": 7304 }, { "epoch": 0.47, "grad_norm": 0.563425993375374, "learning_rate": 5.792880734262433e-07, "loss": 0.2519, "step": 7305 }, { "epoch": 0.47, "grad_norm": 4.458020840421295, "learning_rate": 5.791861045661211e-07, "loss": 0.3534, "step": 7306 }, { "epoch": 0.47, "grad_norm": 0.7477225624836562, "learning_rate": 5.790841323277606e-07, "loss": 0.1822, "step": 7307 }, { "epoch": 0.47, "grad_norm": 1.1290463820403616, "learning_rate": 5.789821567155119e-07, "loss": 0.2205, "step": 7308 }, { "epoch": 0.47, "grad_norm": 0.7053548395714695, "learning_rate": 5.788801777337256e-07, "loss": 0.1834, "step": 7309 }, { "epoch": 0.47, "grad_norm": 3.024969451501184, "learning_rate": 5.787781953867523e-07, "loss": 0.247, "step": 7310 }, { "epoch": 0.47, "grad_norm": 1.7774324720546493, "learning_rate": 5.78676209678943e-07, "loss": 0.2281, "step": 7311 }, { "epoch": 0.47, "grad_norm": 0.5249247474929776, "learning_rate": 5.785742206146483e-07, "loss": 0.2471, "step": 7312 }, { "epoch": 0.47, "grad_norm": 0.47158116919704907, "learning_rate": 5.784722281982196e-07, "loss": 0.1751, "step": 7313 }, { "epoch": 0.47, "grad_norm": 1.7358734719182072, "learning_rate": 5.783702324340078e-07, "loss": 0.1933, "step": 7314 }, { "epoch": 0.47, "grad_norm": 0.3645172137192145, "learning_rate": 5.782682333263643e-07, "loss": 0.1133, "step": 7315 }, { "epoch": 0.47, "grad_norm": 10.867536241972235, "learning_rate": 5.781662308796406e-07, "loss": 0.0963, "step": 7316 }, { "epoch": 0.47, "grad_norm": 0.6811480977103188, "learning_rate": 5.780642250981884e-07, "loss": 0.4057, "step": 7317 }, { "epoch": 0.47, "grad_norm": 0.681771414756409, "learning_rate": 5.779622159863593e-07, "loss": 0.0947, "step": 7318 }, { "epoch": 0.47, "grad_norm": 0.5311426383320308, "learning_rate": 5.778602035485054e-07, "loss": 0.0439, "step": 7319 }, { "epoch": 0.47, "grad_norm": 1.0074352489639262, "learning_rate": 5.777581877889787e-07, "loss": 0.3747, "step": 7320 }, { "epoch": 0.47, "grad_norm": 0.7402048501721525, "learning_rate": 5.776561687121315e-07, "loss": 0.3483, "step": 7321 }, { "epoch": 0.47, "grad_norm": 0.7648789389158936, "learning_rate": 5.77554146322316e-07, "loss": 0.331, "step": 7322 }, { "epoch": 0.47, "grad_norm": 2.650351293264006, "learning_rate": 5.774521206238847e-07, "loss": 0.1978, "step": 7323 }, { "epoch": 0.47, "grad_norm": 8.773258554706342, "learning_rate": 5.773500916211902e-07, "loss": 0.2602, "step": 7324 }, { "epoch": 0.47, "grad_norm": 1.148778741358844, "learning_rate": 5.772480593185853e-07, "loss": 0.0301, "step": 7325 }, { "epoch": 0.47, "grad_norm": 1.766184658234635, "learning_rate": 5.771460237204229e-07, "loss": 0.2557, "step": 7326 }, { "epoch": 0.47, "grad_norm": 0.39052958858279657, "learning_rate": 5.770439848310562e-07, "loss": 0.1391, "step": 7327 }, { "epoch": 0.47, "grad_norm": 1.5991400704799148, "learning_rate": 5.769419426548381e-07, "loss": 0.1637, "step": 7328 }, { "epoch": 0.47, "grad_norm": 0.5392510406057364, "learning_rate": 5.76839897196122e-07, "loss": 0.1154, "step": 7329 }, { "epoch": 0.47, "grad_norm": 0.5869935013530925, "learning_rate": 5.767378484592616e-07, "loss": 0.2897, "step": 7330 }, { "epoch": 0.47, "grad_norm": 0.40047283983111004, "learning_rate": 5.766357964486102e-07, "loss": 0.2059, "step": 7331 }, { "epoch": 0.47, "grad_norm": 0.2511239699126518, "learning_rate": 5.765337411685216e-07, "loss": 0.1408, "step": 7332 }, { "epoch": 0.47, "grad_norm": 0.6251798292657806, "learning_rate": 5.764316826233498e-07, "loss": 0.2621, "step": 7333 }, { "epoch": 0.47, "grad_norm": 0.5509022837755441, "learning_rate": 5.763296208174488e-07, "loss": 0.2851, "step": 7334 }, { "epoch": 0.47, "grad_norm": 0.8798314347754198, "learning_rate": 5.762275557551726e-07, "loss": 0.3619, "step": 7335 }, { "epoch": 0.47, "grad_norm": 0.1918609967717406, "learning_rate": 5.761254874408759e-07, "loss": 0.0782, "step": 7336 }, { "epoch": 0.47, "grad_norm": 0.9119931217971832, "learning_rate": 5.760234158789126e-07, "loss": 0.4166, "step": 7337 }, { "epoch": 0.47, "grad_norm": 0.8541241720669797, "learning_rate": 5.759213410736376e-07, "loss": 0.0546, "step": 7338 }, { "epoch": 0.47, "grad_norm": 0.6403395795517418, "learning_rate": 5.758192630294058e-07, "loss": 0.2085, "step": 7339 }, { "epoch": 0.47, "grad_norm": 0.6724951175395882, "learning_rate": 5.757171817505716e-07, "loss": 0.2122, "step": 7340 }, { "epoch": 0.47, "grad_norm": 1.7958559112389705, "learning_rate": 5.756150972414903e-07, "loss": 0.0467, "step": 7341 }, { "epoch": 0.47, "grad_norm": 0.7387496108044898, "learning_rate": 5.75513009506517e-07, "loss": 0.1968, "step": 7342 }, { "epoch": 0.47, "grad_norm": 0.9963018733725529, "learning_rate": 5.754109185500069e-07, "loss": 0.3962, "step": 7343 }, { "epoch": 0.47, "grad_norm": 1.5125945984411493, "learning_rate": 5.753088243763153e-07, "loss": 0.061, "step": 7344 }, { "epoch": 0.47, "grad_norm": 7.129892852925207, "learning_rate": 5.752067269897979e-07, "loss": 0.0721, "step": 7345 }, { "epoch": 0.47, "grad_norm": 0.7931316184757881, "learning_rate": 5.751046263948104e-07, "loss": 0.0084, "step": 7346 }, { "epoch": 0.47, "grad_norm": 0.9499235591276186, "learning_rate": 5.750025225957085e-07, "loss": 0.1203, "step": 7347 }, { "epoch": 0.47, "grad_norm": 0.563436624583075, "learning_rate": 5.749004155968482e-07, "loss": 0.1158, "step": 7348 }, { "epoch": 0.47, "grad_norm": 0.8241276446340091, "learning_rate": 5.747983054025856e-07, "loss": 0.2044, "step": 7349 }, { "epoch": 0.47, "grad_norm": 3.101218515866801, "learning_rate": 5.746961920172771e-07, "loss": 0.1827, "step": 7350 }, { "epoch": 0.47, "grad_norm": 1.3685416761720732, "learning_rate": 5.745940754452787e-07, "loss": 0.0704, "step": 7351 }, { "epoch": 0.47, "grad_norm": 1.328030911984748, "learning_rate": 5.744919556909472e-07, "loss": 0.2337, "step": 7352 }, { "epoch": 0.47, "grad_norm": 2.5497695070730573, "learning_rate": 5.74389832758639e-07, "loss": 0.1309, "step": 7353 }, { "epoch": 0.47, "grad_norm": 0.7591838272308836, "learning_rate": 5.742877066527112e-07, "loss": 0.0285, "step": 7354 }, { "epoch": 0.47, "grad_norm": 0.8302343064697865, "learning_rate": 5.741855773775204e-07, "loss": 0.2295, "step": 7355 }, { "epoch": 0.47, "grad_norm": 0.5090856157851922, "learning_rate": 5.740834449374237e-07, "loss": 0.2969, "step": 7356 }, { "epoch": 0.47, "grad_norm": 0.2033450691961722, "learning_rate": 5.739813093367783e-07, "loss": 0.0484, "step": 7357 }, { "epoch": 0.47, "grad_norm": 0.584509287221558, "learning_rate": 5.738791705799415e-07, "loss": 0.1798, "step": 7358 }, { "epoch": 0.47, "grad_norm": 0.4247587619689443, "learning_rate": 5.737770286712708e-07, "loss": 0.285, "step": 7359 }, { "epoch": 0.47, "grad_norm": 3.6729317035023294, "learning_rate": 5.736748836151237e-07, "loss": 0.1208, "step": 7360 }, { "epoch": 0.47, "grad_norm": 0.6018869856424918, "learning_rate": 5.73572735415858e-07, "loss": 0.1735, "step": 7361 }, { "epoch": 0.47, "grad_norm": 1.0309512263073584, "learning_rate": 5.734705840778315e-07, "loss": 0.5402, "step": 7362 }, { "epoch": 0.47, "grad_norm": 0.3507469001297476, "learning_rate": 5.733684296054022e-07, "loss": 0.1199, "step": 7363 }, { "epoch": 0.47, "grad_norm": 3.592307700239587, "learning_rate": 5.732662720029282e-07, "loss": 0.1362, "step": 7364 }, { "epoch": 0.47, "grad_norm": 2.3931602978053745, "learning_rate": 5.731641112747679e-07, "loss": 0.1187, "step": 7365 }, { "epoch": 0.47, "grad_norm": 1.1124017828310615, "learning_rate": 5.730619474252792e-07, "loss": 0.1946, "step": 7366 }, { "epoch": 0.47, "grad_norm": 0.5105519902196064, "learning_rate": 5.729597804588212e-07, "loss": 0.2302, "step": 7367 }, { "epoch": 0.47, "grad_norm": 1.2018239662291923, "learning_rate": 5.728576103797524e-07, "loss": 0.4881, "step": 7368 }, { "epoch": 0.47, "grad_norm": 1.1376805720728558, "learning_rate": 5.727554371924313e-07, "loss": 0.3844, "step": 7369 }, { "epoch": 0.47, "grad_norm": 0.6324332393685804, "learning_rate": 5.72653260901217e-07, "loss": 0.4303, "step": 7370 }, { "epoch": 0.47, "grad_norm": 0.65881572345634, "learning_rate": 5.725510815104685e-07, "loss": 0.0819, "step": 7371 }, { "epoch": 0.47, "grad_norm": 1.7395975743305332, "learning_rate": 5.724488990245451e-07, "loss": 0.1954, "step": 7372 }, { "epoch": 0.47, "grad_norm": 0.9277443571557993, "learning_rate": 5.723467134478059e-07, "loss": 0.2163, "step": 7373 }, { "epoch": 0.47, "grad_norm": 0.9069014862888083, "learning_rate": 5.722445247846106e-07, "loss": 0.2253, "step": 7374 }, { "epoch": 0.47, "grad_norm": 0.11844499542924179, "learning_rate": 5.721423330393187e-07, "loss": 0.0032, "step": 7375 }, { "epoch": 0.47, "grad_norm": 0.8113056982316724, "learning_rate": 5.720401382162898e-07, "loss": 0.1503, "step": 7376 }, { "epoch": 0.47, "grad_norm": 0.9212818922283537, "learning_rate": 5.719379403198837e-07, "loss": 0.1709, "step": 7377 }, { "epoch": 0.47, "grad_norm": 0.9404735582256118, "learning_rate": 5.718357393544605e-07, "loss": 0.1065, "step": 7378 }, { "epoch": 0.47, "grad_norm": 0.4801437221205867, "learning_rate": 5.717335353243802e-07, "loss": 0.1478, "step": 7379 }, { "epoch": 0.47, "grad_norm": 1.4510494955548074, "learning_rate": 5.716313282340032e-07, "loss": 0.159, "step": 7380 }, { "epoch": 0.47, "grad_norm": 1.5956475286412855, "learning_rate": 5.715291180876896e-07, "loss": 0.1442, "step": 7381 }, { "epoch": 0.47, "grad_norm": 5.172564143546517, "learning_rate": 5.714269048898002e-07, "loss": 0.0202, "step": 7382 }, { "epoch": 0.47, "grad_norm": 1.574979745638005, "learning_rate": 5.713246886446953e-07, "loss": 0.0843, "step": 7383 }, { "epoch": 0.47, "grad_norm": 0.9970643512039582, "learning_rate": 5.712224693567358e-07, "loss": 0.1385, "step": 7384 }, { "epoch": 0.47, "grad_norm": 0.6911268598909803, "learning_rate": 5.711202470302827e-07, "loss": 0.2269, "step": 7385 }, { "epoch": 0.47, "grad_norm": 4.80939766757664, "learning_rate": 5.710180216696968e-07, "loss": 0.3549, "step": 7386 }, { "epoch": 0.47, "grad_norm": 6.610968088004343, "learning_rate": 5.709157932793394e-07, "loss": 0.0185, "step": 7387 }, { "epoch": 0.47, "grad_norm": 0.7329459952435532, "learning_rate": 5.708135618635717e-07, "loss": 0.1785, "step": 7388 }, { "epoch": 0.47, "grad_norm": 1.8214124310681195, "learning_rate": 5.70711327426755e-07, "loss": 0.2332, "step": 7389 }, { "epoch": 0.47, "grad_norm": 3.653476522592435, "learning_rate": 5.706090899732508e-07, "loss": 0.1851, "step": 7390 }, { "epoch": 0.47, "grad_norm": 1.8452045293615673, "learning_rate": 5.705068495074211e-07, "loss": 0.1723, "step": 7391 }, { "epoch": 0.47, "grad_norm": 0.8629686321621239, "learning_rate": 5.704046060336275e-07, "loss": 0.2495, "step": 7392 }, { "epoch": 0.47, "grad_norm": 6.380436473286159, "learning_rate": 5.703023595562318e-07, "loss": 0.2289, "step": 7393 }, { "epoch": 0.47, "grad_norm": 3.4993895908190598, "learning_rate": 5.702001100795961e-07, "loss": 0.2069, "step": 7394 }, { "epoch": 0.47, "grad_norm": 1.507355922001305, "learning_rate": 5.700978576080826e-07, "loss": 0.2583, "step": 7395 }, { "epoch": 0.47, "grad_norm": 0.514544843370766, "learning_rate": 5.699956021460537e-07, "loss": 0.0457, "step": 7396 }, { "epoch": 0.47, "grad_norm": 1.0435372857076322, "learning_rate": 5.698933436978715e-07, "loss": 0.4678, "step": 7397 }, { "epoch": 0.47, "grad_norm": 1.2240897383019091, "learning_rate": 5.697910822678988e-07, "loss": 0.2138, "step": 7398 }, { "epoch": 0.47, "grad_norm": 0.75916899659083, "learning_rate": 5.696888178604982e-07, "loss": 0.2188, "step": 7399 }, { "epoch": 0.47, "grad_norm": 0.7958821348665706, "learning_rate": 5.695865504800327e-07, "loss": 0.2863, "step": 7400 }, { "epoch": 0.47, "grad_norm": 0.9350666028729014, "learning_rate": 5.694842801308651e-07, "loss": 0.1918, "step": 7401 }, { "epoch": 0.47, "grad_norm": 1.0548114986016022, "learning_rate": 5.693820068173583e-07, "loss": 0.2893, "step": 7402 }, { "epoch": 0.47, "grad_norm": 0.5524496773427505, "learning_rate": 5.692797305438756e-07, "loss": 0.3367, "step": 7403 }, { "epoch": 0.47, "grad_norm": 0.6890498424681148, "learning_rate": 5.691774513147802e-07, "loss": 0.2766, "step": 7404 }, { "epoch": 0.47, "grad_norm": 1.2570451277405057, "learning_rate": 5.690751691344359e-07, "loss": 0.3281, "step": 7405 }, { "epoch": 0.47, "grad_norm": 0.7342886843729234, "learning_rate": 5.689728840072059e-07, "loss": 0.1861, "step": 7406 }, { "epoch": 0.47, "grad_norm": 2.2393823786284757, "learning_rate": 5.688705959374542e-07, "loss": 0.1758, "step": 7407 }, { "epoch": 0.47, "grad_norm": 0.6368756048863157, "learning_rate": 5.687683049295441e-07, "loss": 0.1287, "step": 7408 }, { "epoch": 0.47, "grad_norm": 1.3698230996427294, "learning_rate": 5.686660109878401e-07, "loss": 0.1459, "step": 7409 }, { "epoch": 0.47, "grad_norm": 1.0428440778245631, "learning_rate": 5.68563714116706e-07, "loss": 0.1315, "step": 7410 }, { "epoch": 0.47, "grad_norm": 0.4462005025358093, "learning_rate": 5.68461414320506e-07, "loss": 0.2594, "step": 7411 }, { "epoch": 0.47, "grad_norm": 0.4810305048733831, "learning_rate": 5.683591116036045e-07, "loss": 0.2054, "step": 7412 }, { "epoch": 0.47, "grad_norm": 1.091110397138657, "learning_rate": 5.682568059703659e-07, "loss": 0.1844, "step": 7413 }, { "epoch": 0.47, "grad_norm": 1.111523344208612, "learning_rate": 5.681544974251547e-07, "loss": 0.1186, "step": 7414 }, { "epoch": 0.47, "grad_norm": 0.5531265967104229, "learning_rate": 5.680521859723355e-07, "loss": 0.201, "step": 7415 }, { "epoch": 0.47, "grad_norm": 0.36396224093760593, "learning_rate": 5.679498716162733e-07, "loss": 0.1059, "step": 7416 }, { "epoch": 0.47, "grad_norm": 9.232194012764126, "learning_rate": 5.67847554361333e-07, "loss": 0.2394, "step": 7417 }, { "epoch": 0.47, "grad_norm": 0.33948903903857913, "learning_rate": 5.677452342118797e-07, "loss": 0.0658, "step": 7418 }, { "epoch": 0.47, "grad_norm": 0.9176724911364541, "learning_rate": 5.676429111722784e-07, "loss": 0.0953, "step": 7419 }, { "epoch": 0.47, "grad_norm": 1.0441066460826038, "learning_rate": 5.675405852468948e-07, "loss": 0.1832, "step": 7420 }, { "epoch": 0.47, "grad_norm": 0.7920029983208265, "learning_rate": 5.674382564400938e-07, "loss": 0.2583, "step": 7421 }, { "epoch": 0.47, "grad_norm": 3.048901779663038, "learning_rate": 5.673359247562412e-07, "loss": 0.257, "step": 7422 }, { "epoch": 0.47, "grad_norm": 0.615553286073353, "learning_rate": 5.67233590199703e-07, "loss": 0.0901, "step": 7423 }, { "epoch": 0.47, "grad_norm": 0.7062176481074847, "learning_rate": 5.671312527748444e-07, "loss": 0.0728, "step": 7424 }, { "epoch": 0.47, "grad_norm": 5.232755665082155, "learning_rate": 5.670289124860317e-07, "loss": 0.2293, "step": 7425 }, { "epoch": 0.47, "grad_norm": 2.405574665183514, "learning_rate": 5.669265693376309e-07, "loss": 0.0083, "step": 7426 }, { "epoch": 0.47, "grad_norm": 1.1098301197267593, "learning_rate": 5.66824223334008e-07, "loss": 0.3068, "step": 7427 }, { "epoch": 0.47, "grad_norm": 0.9236546297958498, "learning_rate": 5.667218744795293e-07, "loss": 0.2577, "step": 7428 }, { "epoch": 0.47, "grad_norm": 0.8847137481375129, "learning_rate": 5.666195227785615e-07, "loss": 0.1334, "step": 7429 }, { "epoch": 0.47, "grad_norm": 0.5823681045348512, "learning_rate": 5.665171682354709e-07, "loss": 0.1977, "step": 7430 }, { "epoch": 0.47, "grad_norm": 2.3705110138490335, "learning_rate": 5.664148108546242e-07, "loss": 0.0945, "step": 7431 }, { "epoch": 0.47, "grad_norm": 1.1590817549719374, "learning_rate": 5.663124506403881e-07, "loss": 0.3621, "step": 7432 }, { "epoch": 0.47, "grad_norm": 8.905325163363612, "learning_rate": 5.662100875971297e-07, "loss": 0.1864, "step": 7433 }, { "epoch": 0.47, "grad_norm": 1.0797349867376271, "learning_rate": 5.661077217292155e-07, "loss": 0.2406, "step": 7434 }, { "epoch": 0.47, "grad_norm": 7.195393183468775, "learning_rate": 5.660053530410132e-07, "loss": 0.2057, "step": 7435 }, { "epoch": 0.47, "grad_norm": 0.40924590451085185, "learning_rate": 5.6590298153689e-07, "loss": 0.2426, "step": 7436 }, { "epoch": 0.47, "grad_norm": 0.6132333443663023, "learning_rate": 5.658006072212132e-07, "loss": 0.1041, "step": 7437 }, { "epoch": 0.47, "grad_norm": 1.378726814524991, "learning_rate": 5.656982300983499e-07, "loss": 0.2694, "step": 7438 }, { "epoch": 0.47, "grad_norm": 5.546295418977469, "learning_rate": 5.655958501726682e-07, "loss": 0.2877, "step": 7439 }, { "epoch": 0.47, "grad_norm": 0.9890241239705159, "learning_rate": 5.654934674485356e-07, "loss": 0.1202, "step": 7440 }, { "epoch": 0.47, "grad_norm": 0.5518453900124864, "learning_rate": 5.653910819303202e-07, "loss": 0.1582, "step": 7441 }, { "epoch": 0.47, "grad_norm": 0.31820669561369536, "learning_rate": 5.652886936223896e-07, "loss": 0.1387, "step": 7442 }, { "epoch": 0.47, "grad_norm": 1.194544907467919, "learning_rate": 5.65186302529112e-07, "loss": 0.5461, "step": 7443 }, { "epoch": 0.47, "grad_norm": 0.3864319810046312, "learning_rate": 5.650839086548559e-07, "loss": 0.2458, "step": 7444 }, { "epoch": 0.47, "grad_norm": 1.2756607149434038, "learning_rate": 5.649815120039894e-07, "loss": 0.0146, "step": 7445 }, { "epoch": 0.47, "grad_norm": 1.374367894987499, "learning_rate": 5.648791125808808e-07, "loss": 0.1254, "step": 7446 }, { "epoch": 0.47, "grad_norm": 0.6856197673962292, "learning_rate": 5.647767103898989e-07, "loss": 0.093, "step": 7447 }, { "epoch": 0.47, "grad_norm": 0.9346600309424645, "learning_rate": 5.646743054354123e-07, "loss": 0.2221, "step": 7448 }, { "epoch": 0.48, "grad_norm": 1.312934572514989, "learning_rate": 5.6457189772179e-07, "loss": 0.1119, "step": 7449 }, { "epoch": 0.48, "grad_norm": 0.8698171801457183, "learning_rate": 5.644694872534007e-07, "loss": 0.3173, "step": 7450 }, { "epoch": 0.48, "grad_norm": 0.5585476449319329, "learning_rate": 5.643670740346134e-07, "loss": 0.3569, "step": 7451 }, { "epoch": 0.48, "grad_norm": 0.1521584245809629, "learning_rate": 5.642646580697973e-07, "loss": 0.0535, "step": 7452 }, { "epoch": 0.48, "grad_norm": 0.824701361709204, "learning_rate": 5.641622393633217e-07, "loss": 0.251, "step": 7453 }, { "epoch": 0.48, "grad_norm": 0.4205844762570796, "learning_rate": 5.640598179195561e-07, "loss": 0.1754, "step": 7454 }, { "epoch": 0.48, "grad_norm": 0.8573693617300268, "learning_rate": 5.639573937428698e-07, "loss": 0.1083, "step": 7455 }, { "epoch": 0.48, "grad_norm": 1.1005043004659185, "learning_rate": 5.638549668376325e-07, "loss": 0.1672, "step": 7456 }, { "epoch": 0.48, "grad_norm": 0.6114767191442775, "learning_rate": 5.637525372082139e-07, "loss": 0.1807, "step": 7457 }, { "epoch": 0.48, "grad_norm": 0.8702120397028751, "learning_rate": 5.63650104858984e-07, "loss": 0.1615, "step": 7458 }, { "epoch": 0.48, "grad_norm": 1.750906079323879, "learning_rate": 5.635476697943127e-07, "loss": 0.1844, "step": 7459 }, { "epoch": 0.48, "grad_norm": 0.4672155006839803, "learning_rate": 5.6344523201857e-07, "loss": 0.0849, "step": 7460 }, { "epoch": 0.48, "grad_norm": 0.31078634128353105, "learning_rate": 5.633427915361261e-07, "loss": 0.115, "step": 7461 }, { "epoch": 0.48, "grad_norm": 6.36883160702436, "learning_rate": 5.632403483513514e-07, "loss": 0.0598, "step": 7462 }, { "epoch": 0.48, "grad_norm": 0.7199565022113346, "learning_rate": 5.631379024686163e-07, "loss": 0.0862, "step": 7463 }, { "epoch": 0.48, "grad_norm": 0.9325619587341416, "learning_rate": 5.630354538922914e-07, "loss": 0.3025, "step": 7464 }, { "epoch": 0.48, "grad_norm": 0.7220012421406382, "learning_rate": 5.629330026267474e-07, "loss": 0.1827, "step": 7465 }, { "epoch": 0.48, "grad_norm": 0.4894598119503782, "learning_rate": 5.628305486763551e-07, "loss": 0.1976, "step": 7466 }, { "epoch": 0.48, "grad_norm": 0.740669590038806, "learning_rate": 5.627280920454851e-07, "loss": 0.2961, "step": 7467 }, { "epoch": 0.48, "grad_norm": 1.1176548194685672, "learning_rate": 5.626256327385086e-07, "loss": 0.128, "step": 7468 }, { "epoch": 0.48, "grad_norm": 0.31588475411744266, "learning_rate": 5.625231707597966e-07, "loss": 0.0828, "step": 7469 }, { "epoch": 0.48, "grad_norm": 0.7131728843382178, "learning_rate": 5.624207061137205e-07, "loss": 0.158, "step": 7470 }, { "epoch": 0.48, "grad_norm": 4.46312935719627, "learning_rate": 5.623182388046516e-07, "loss": 0.1385, "step": 7471 }, { "epoch": 0.48, "grad_norm": 1.1777465574904347, "learning_rate": 5.622157688369615e-07, "loss": 0.2871, "step": 7472 }, { "epoch": 0.48, "grad_norm": 2.688964848925245, "learning_rate": 5.621132962150216e-07, "loss": 0.0781, "step": 7473 }, { "epoch": 0.48, "grad_norm": 0.5968462726565935, "learning_rate": 5.620108209432036e-07, "loss": 0.1112, "step": 7474 }, { "epoch": 0.48, "grad_norm": 1.3971323826497248, "learning_rate": 5.619083430258793e-07, "loss": 0.1836, "step": 7475 }, { "epoch": 0.48, "grad_norm": 1.0694918118281551, "learning_rate": 5.618058624674207e-07, "loss": 0.1519, "step": 7476 }, { "epoch": 0.48, "grad_norm": 1.0335790561850045, "learning_rate": 5.617033792721997e-07, "loss": 0.2359, "step": 7477 }, { "epoch": 0.48, "grad_norm": 0.4831809868341667, "learning_rate": 5.616008934445883e-07, "loss": 0.2011, "step": 7478 }, { "epoch": 0.48, "grad_norm": 0.4898245992729707, "learning_rate": 5.614984049889593e-07, "loss": 0.1118, "step": 7479 }, { "epoch": 0.48, "grad_norm": 4.1437450886365275, "learning_rate": 5.613959139096845e-07, "loss": 0.3033, "step": 7480 }, { "epoch": 0.48, "grad_norm": 0.7385254767518705, "learning_rate": 5.612934202111367e-07, "loss": 0.2655, "step": 7481 }, { "epoch": 0.48, "grad_norm": 0.3418980351521246, "learning_rate": 5.611909238976884e-07, "loss": 0.2096, "step": 7482 }, { "epoch": 0.48, "grad_norm": 0.2870817067320607, "learning_rate": 5.610884249737121e-07, "loss": 0.1744, "step": 7483 }, { "epoch": 0.48, "grad_norm": 2.0469729269311743, "learning_rate": 5.60985923443581e-07, "loss": 0.1054, "step": 7484 }, { "epoch": 0.48, "grad_norm": 0.2029980528324711, "learning_rate": 5.608834193116677e-07, "loss": 0.1731, "step": 7485 }, { "epoch": 0.48, "grad_norm": 0.7293884920226757, "learning_rate": 5.607809125823453e-07, "loss": 0.0062, "step": 7486 }, { "epoch": 0.48, "grad_norm": 20.011926147180226, "learning_rate": 5.606784032599869e-07, "loss": 0.1207, "step": 7487 }, { "epoch": 0.48, "grad_norm": 0.6523455597712159, "learning_rate": 5.60575891348966e-07, "loss": 0.2639, "step": 7488 }, { "epoch": 0.48, "grad_norm": 0.5988232100390637, "learning_rate": 5.604733768536559e-07, "loss": 0.2087, "step": 7489 }, { "epoch": 0.48, "grad_norm": 1.1281720086035099, "learning_rate": 5.603708597784298e-07, "loss": 0.0091, "step": 7490 }, { "epoch": 0.48, "grad_norm": 0.5289742693658647, "learning_rate": 5.602683401276614e-07, "loss": 0.1171, "step": 7491 }, { "epoch": 0.48, "grad_norm": 1.1895943799066078, "learning_rate": 5.601658179057247e-07, "loss": 0.1158, "step": 7492 }, { "epoch": 0.48, "grad_norm": 0.8388780051991538, "learning_rate": 5.600632931169932e-07, "loss": 0.3189, "step": 7493 }, { "epoch": 0.48, "grad_norm": 4.686891110765236, "learning_rate": 5.599607657658408e-07, "loss": 0.0356, "step": 7494 }, { "epoch": 0.48, "grad_norm": 0.6788510211169609, "learning_rate": 5.598582358566415e-07, "loss": 0.2257, "step": 7495 }, { "epoch": 0.48, "grad_norm": 9.364153192213852, "learning_rate": 5.597557033937697e-07, "loss": 0.2356, "step": 7496 }, { "epoch": 0.48, "grad_norm": 0.37181014590039935, "learning_rate": 5.596531683815992e-07, "loss": 0.1165, "step": 7497 }, { "epoch": 0.48, "grad_norm": 0.7648144265397897, "learning_rate": 5.595506308245049e-07, "loss": 0.3077, "step": 7498 }, { "epoch": 0.48, "grad_norm": 2.4327140745052023, "learning_rate": 5.594480907268609e-07, "loss": 0.2935, "step": 7499 }, { "epoch": 0.48, "grad_norm": 3.2451480230515064, "learning_rate": 5.593455480930418e-07, "loss": 0.3365, "step": 7500 }, { "epoch": 0.48, "grad_norm": 5.915083767056461, "learning_rate": 5.592430029274224e-07, "loss": 0.0809, "step": 7501 }, { "epoch": 0.48, "grad_norm": 0.9222138475209727, "learning_rate": 5.591404552343774e-07, "loss": 0.3594, "step": 7502 }, { "epoch": 0.48, "grad_norm": 0.32992894311582377, "learning_rate": 5.590379050182817e-07, "loss": 0.0914, "step": 7503 }, { "epoch": 0.48, "grad_norm": 5.206815532507948, "learning_rate": 5.589353522835102e-07, "loss": 0.112, "step": 7504 }, { "epoch": 0.48, "grad_norm": 0.8363646515703453, "learning_rate": 5.58832797034438e-07, "loss": 0.2132, "step": 7505 }, { "epoch": 0.48, "grad_norm": 0.8144334905459782, "learning_rate": 5.587302392754407e-07, "loss": 0.3436, "step": 7506 }, { "epoch": 0.48, "grad_norm": 0.8848522820728025, "learning_rate": 5.586276790108931e-07, "loss": 0.1702, "step": 7507 }, { "epoch": 0.48, "grad_norm": 0.25257579505733574, "learning_rate": 5.585251162451709e-07, "loss": 0.0853, "step": 7508 }, { "epoch": 0.48, "grad_norm": 1.3327463292421773, "learning_rate": 5.584225509826497e-07, "loss": 0.0967, "step": 7509 }, { "epoch": 0.48, "grad_norm": 1.3346992988258828, "learning_rate": 5.583199832277049e-07, "loss": 0.1898, "step": 7510 }, { "epoch": 0.48, "grad_norm": 1.4112276192280022, "learning_rate": 5.582174129847125e-07, "loss": 0.1245, "step": 7511 }, { "epoch": 0.48, "grad_norm": 0.8455868299535237, "learning_rate": 5.581148402580481e-07, "loss": 0.0945, "step": 7512 }, { "epoch": 0.48, "grad_norm": 0.7961139849131209, "learning_rate": 5.580122650520879e-07, "loss": 0.2118, "step": 7513 }, { "epoch": 0.48, "grad_norm": 0.9728331830756659, "learning_rate": 5.579096873712077e-07, "loss": 0.2882, "step": 7514 }, { "epoch": 0.48, "grad_norm": 3.145700284257323, "learning_rate": 5.578071072197839e-07, "loss": 0.1685, "step": 7515 }, { "epoch": 0.48, "grad_norm": 0.4719600960107473, "learning_rate": 5.577045246021928e-07, "loss": 0.1886, "step": 7516 }, { "epoch": 0.48, "grad_norm": 0.9118244024819626, "learning_rate": 5.576019395228106e-07, "loss": 0.0788, "step": 7517 }, { "epoch": 0.48, "grad_norm": 0.9239190246386999, "learning_rate": 5.574993519860138e-07, "loss": 0.3041, "step": 7518 }, { "epoch": 0.48, "grad_norm": 0.6822214387634311, "learning_rate": 5.573967619961791e-07, "loss": 0.1335, "step": 7519 }, { "epoch": 0.48, "grad_norm": 1.0513784452022978, "learning_rate": 5.572941695576834e-07, "loss": 0.0111, "step": 7520 }, { "epoch": 0.48, "grad_norm": 0.7666833761008804, "learning_rate": 5.57191574674903e-07, "loss": 0.1138, "step": 7521 }, { "epoch": 0.48, "grad_norm": 0.6404449871594167, "learning_rate": 5.570889773522149e-07, "loss": 0.1827, "step": 7522 }, { "epoch": 0.48, "grad_norm": 0.8274110168994253, "learning_rate": 5.569863775939965e-07, "loss": 0.2472, "step": 7523 }, { "epoch": 0.48, "grad_norm": 1.0695101580292428, "learning_rate": 5.568837754046246e-07, "loss": 0.2303, "step": 7524 }, { "epoch": 0.48, "grad_norm": 1.1625372819514135, "learning_rate": 5.567811707884765e-07, "loss": 0.2299, "step": 7525 }, { "epoch": 0.48, "grad_norm": 2.239994514135691, "learning_rate": 5.566785637499296e-07, "loss": 0.3032, "step": 7526 }, { "epoch": 0.48, "grad_norm": 1.845062834876523, "learning_rate": 5.565759542933611e-07, "loss": 0.2958, "step": 7527 }, { "epoch": 0.48, "grad_norm": 0.90402531981841, "learning_rate": 5.564733424231487e-07, "loss": 0.4111, "step": 7528 }, { "epoch": 0.48, "grad_norm": 2.1355457669657243, "learning_rate": 5.5637072814367e-07, "loss": 0.379, "step": 7529 }, { "epoch": 0.48, "grad_norm": 1.1256920054797275, "learning_rate": 5.562681114593028e-07, "loss": 0.1953, "step": 7530 }, { "epoch": 0.48, "grad_norm": 0.9255408113191933, "learning_rate": 5.561654923744248e-07, "loss": 0.306, "step": 7531 }, { "epoch": 0.48, "grad_norm": 0.6651820663004722, "learning_rate": 5.560628708934138e-07, "loss": 0.2565, "step": 7532 }, { "epoch": 0.48, "grad_norm": 1.2996596651136072, "learning_rate": 5.559602470206483e-07, "loss": 0.4174, "step": 7533 }, { "epoch": 0.48, "grad_norm": 0.48131214773171593, "learning_rate": 5.55857620760506e-07, "loss": 0.2003, "step": 7534 }, { "epoch": 0.48, "grad_norm": 2.0320814499631323, "learning_rate": 5.557549921173655e-07, "loss": 0.5093, "step": 7535 }, { "epoch": 0.48, "grad_norm": 1.0990309741661553, "learning_rate": 5.556523610956047e-07, "loss": 0.2306, "step": 7536 }, { "epoch": 0.48, "grad_norm": 1.8260995074990696, "learning_rate": 5.555497276996024e-07, "loss": 0.1701, "step": 7537 }, { "epoch": 0.48, "grad_norm": 1.1976736039695113, "learning_rate": 5.554470919337372e-07, "loss": 0.2351, "step": 7538 }, { "epoch": 0.48, "grad_norm": 0.40345372153506504, "learning_rate": 5.553444538023873e-07, "loss": 0.0877, "step": 7539 }, { "epoch": 0.48, "grad_norm": 0.5977917822238236, "learning_rate": 5.55241813309932e-07, "loss": 0.0766, "step": 7540 }, { "epoch": 0.48, "grad_norm": 0.7745418848440369, "learning_rate": 5.551391704607497e-07, "loss": 0.2889, "step": 7541 }, { "epoch": 0.48, "grad_norm": 0.42312695308428366, "learning_rate": 5.550365252592196e-07, "loss": 0.1046, "step": 7542 }, { "epoch": 0.48, "grad_norm": 0.6641934247142979, "learning_rate": 5.549338777097208e-07, "loss": 0.3636, "step": 7543 }, { "epoch": 0.48, "grad_norm": 0.952245733703652, "learning_rate": 5.548312278166322e-07, "loss": 0.2157, "step": 7544 }, { "epoch": 0.48, "grad_norm": 0.7019529118628163, "learning_rate": 5.547285755843334e-07, "loss": 0.2253, "step": 7545 }, { "epoch": 0.48, "grad_norm": 0.9408301507018305, "learning_rate": 5.546259210172034e-07, "loss": 0.2198, "step": 7546 }, { "epoch": 0.48, "grad_norm": 1.9697559515545209, "learning_rate": 5.54523264119622e-07, "loss": 0.1452, "step": 7547 }, { "epoch": 0.48, "grad_norm": 4.314768569281151, "learning_rate": 5.544206048959682e-07, "loss": 0.2431, "step": 7548 }, { "epoch": 0.48, "grad_norm": 0.7991822471163085, "learning_rate": 5.543179433506222e-07, "loss": 0.0965, "step": 7549 }, { "epoch": 0.48, "grad_norm": 0.7061729152233741, "learning_rate": 5.542152794879636e-07, "loss": 0.2957, "step": 7550 }, { "epoch": 0.48, "grad_norm": 0.5756950985548269, "learning_rate": 5.54112613312372e-07, "loss": 0.256, "step": 7551 }, { "epoch": 0.48, "grad_norm": 0.2867756764185683, "learning_rate": 5.540099448282276e-07, "loss": 0.0989, "step": 7552 }, { "epoch": 0.48, "grad_norm": 0.6086105040269305, "learning_rate": 5.539072740399104e-07, "loss": 0.112, "step": 7553 }, { "epoch": 0.48, "grad_norm": 1.138626460754883, "learning_rate": 5.538046009518006e-07, "loss": 0.059, "step": 7554 }, { "epoch": 0.48, "grad_norm": 0.6693955455920618, "learning_rate": 5.537019255682783e-07, "loss": 0.3693, "step": 7555 }, { "epoch": 0.48, "grad_norm": 6.216398338530876, "learning_rate": 5.535992478937239e-07, "loss": 0.2422, "step": 7556 }, { "epoch": 0.48, "grad_norm": 0.6205476139449395, "learning_rate": 5.534965679325179e-07, "loss": 0.1238, "step": 7557 }, { "epoch": 0.48, "grad_norm": 0.2505535639024559, "learning_rate": 5.533938856890407e-07, "loss": 0.0778, "step": 7558 }, { "epoch": 0.48, "grad_norm": 0.7440686785079905, "learning_rate": 5.532912011676729e-07, "loss": 0.0899, "step": 7559 }, { "epoch": 0.48, "grad_norm": 5.782080365904402, "learning_rate": 5.531885143727955e-07, "loss": 0.09, "step": 7560 }, { "epoch": 0.48, "grad_norm": 1.9205299052777522, "learning_rate": 5.530858253087891e-07, "loss": 0.1384, "step": 7561 }, { "epoch": 0.48, "grad_norm": 2.0070649464553347, "learning_rate": 5.529831339800348e-07, "loss": 0.2301, "step": 7562 }, { "epoch": 0.48, "grad_norm": 4.642698626302945, "learning_rate": 5.528804403909133e-07, "loss": 0.1444, "step": 7563 }, { "epoch": 0.48, "grad_norm": 3.5385271444594335, "learning_rate": 5.52777744545806e-07, "loss": 0.1689, "step": 7564 }, { "epoch": 0.48, "grad_norm": 0.6910624278217642, "learning_rate": 5.52675046449094e-07, "loss": 0.1171, "step": 7565 }, { "epoch": 0.48, "grad_norm": 0.5188507620744114, "learning_rate": 5.525723461051587e-07, "loss": 0.118, "step": 7566 }, { "epoch": 0.48, "grad_norm": 0.7215030641675393, "learning_rate": 5.524696435183812e-07, "loss": 0.1521, "step": 7567 }, { "epoch": 0.48, "grad_norm": 0.27550035434511455, "learning_rate": 5.523669386931433e-07, "loss": 0.0337, "step": 7568 }, { "epoch": 0.48, "grad_norm": 0.45155692176302054, "learning_rate": 5.522642316338268e-07, "loss": 0.0892, "step": 7569 }, { "epoch": 0.48, "grad_norm": 0.5821565862132517, "learning_rate": 5.521615223448129e-07, "loss": 0.2217, "step": 7570 }, { "epoch": 0.48, "grad_norm": 4.732416575891742, "learning_rate": 5.520588108304836e-07, "loss": 0.2106, "step": 7571 }, { "epoch": 0.48, "grad_norm": 2.594161838262372, "learning_rate": 5.519560970952207e-07, "loss": 0.0865, "step": 7572 }, { "epoch": 0.48, "grad_norm": 0.4948336791965397, "learning_rate": 5.518533811434064e-07, "loss": 0.2018, "step": 7573 }, { "epoch": 0.48, "grad_norm": 1.0229297133671198, "learning_rate": 5.517506629794226e-07, "loss": 0.2161, "step": 7574 }, { "epoch": 0.48, "grad_norm": 0.6807350704565484, "learning_rate": 5.516479426076515e-07, "loss": 0.1555, "step": 7575 }, { "epoch": 0.48, "grad_norm": 5.0254761901459135, "learning_rate": 5.515452200324753e-07, "loss": 0.1612, "step": 7576 }, { "epoch": 0.48, "grad_norm": 13.57679777406315, "learning_rate": 5.514424952582765e-07, "loss": 0.2426, "step": 7577 }, { "epoch": 0.48, "grad_norm": 0.8858005241840776, "learning_rate": 5.513397682894373e-07, "loss": 0.2008, "step": 7578 }, { "epoch": 0.48, "grad_norm": 0.6883412880363127, "learning_rate": 5.512370391303404e-07, "loss": 0.2956, "step": 7579 }, { "epoch": 0.48, "grad_norm": 1.642081543611526, "learning_rate": 5.511343077853684e-07, "loss": 0.1615, "step": 7580 }, { "epoch": 0.48, "grad_norm": 0.7327974006760869, "learning_rate": 5.510315742589042e-07, "loss": 0.3597, "step": 7581 }, { "epoch": 0.48, "grad_norm": 0.5961600903985111, "learning_rate": 5.509288385553302e-07, "loss": 0.1655, "step": 7582 }, { "epoch": 0.48, "grad_norm": 0.5370388498415422, "learning_rate": 5.508261006790298e-07, "loss": 0.3869, "step": 7583 }, { "epoch": 0.48, "grad_norm": 1.8474899756471022, "learning_rate": 5.507233606343857e-07, "loss": 0.3243, "step": 7584 }, { "epoch": 0.48, "grad_norm": 1.2173954578916704, "learning_rate": 5.506206184257809e-07, "loss": 0.3129, "step": 7585 }, { "epoch": 0.48, "grad_norm": 0.5548149607465687, "learning_rate": 5.505178740575989e-07, "loss": 0.2358, "step": 7586 }, { "epoch": 0.48, "grad_norm": 2.2944959997760943, "learning_rate": 5.504151275342228e-07, "loss": 0.1349, "step": 7587 }, { "epoch": 0.48, "grad_norm": 24.841566155442354, "learning_rate": 5.503123788600361e-07, "loss": 0.0623, "step": 7588 }, { "epoch": 0.48, "grad_norm": 0.8311751932502293, "learning_rate": 5.502096280394222e-07, "loss": 0.1832, "step": 7589 }, { "epoch": 0.48, "grad_norm": 0.7656308631754906, "learning_rate": 5.501068750767646e-07, "loss": 0.2056, "step": 7590 }, { "epoch": 0.48, "grad_norm": 0.8605056054119362, "learning_rate": 5.500041199764469e-07, "loss": 0.1156, "step": 7591 }, { "epoch": 0.48, "grad_norm": 0.5339431055225383, "learning_rate": 5.49901362742853e-07, "loss": 0.0966, "step": 7592 }, { "epoch": 0.48, "grad_norm": 0.7953356023979216, "learning_rate": 5.497986033803664e-07, "loss": 0.2662, "step": 7593 }, { "epoch": 0.48, "grad_norm": 0.5622668629224475, "learning_rate": 5.496958418933715e-07, "loss": 0.0963, "step": 7594 }, { "epoch": 0.48, "grad_norm": 0.6164005777732087, "learning_rate": 5.495930782862521e-07, "loss": 0.3443, "step": 7595 }, { "epoch": 0.48, "grad_norm": 0.9408372696437483, "learning_rate": 5.494903125633923e-07, "loss": 0.2415, "step": 7596 }, { "epoch": 0.48, "grad_norm": 0.6880557385671987, "learning_rate": 5.493875447291762e-07, "loss": 0.1342, "step": 7597 }, { "epoch": 0.48, "grad_norm": 0.44870256035805056, "learning_rate": 5.492847747879882e-07, "loss": 0.1061, "step": 7598 }, { "epoch": 0.48, "grad_norm": 0.8199473668297109, "learning_rate": 5.491820027442126e-07, "loss": 0.3366, "step": 7599 }, { "epoch": 0.48, "grad_norm": 0.7325136084105575, "learning_rate": 5.490792286022339e-07, "loss": 0.2507, "step": 7600 }, { "epoch": 0.48, "grad_norm": 0.728868102794531, "learning_rate": 5.489764523664366e-07, "loss": 0.2105, "step": 7601 }, { "epoch": 0.48, "grad_norm": 3.3552811345763844, "learning_rate": 5.488736740412056e-07, "loss": 0.3901, "step": 7602 }, { "epoch": 0.48, "grad_norm": 0.9693907483553831, "learning_rate": 5.487708936309252e-07, "loss": 0.2329, "step": 7603 }, { "epoch": 0.48, "grad_norm": 0.6543900507727194, "learning_rate": 5.486681111399804e-07, "loss": 0.3631, "step": 7604 }, { "epoch": 0.48, "grad_norm": 0.3797306778892442, "learning_rate": 5.485653265727563e-07, "loss": 0.0862, "step": 7605 }, { "epoch": 0.49, "grad_norm": 1.347410830998166, "learning_rate": 5.484625399336378e-07, "loss": 0.1362, "step": 7606 }, { "epoch": 0.49, "grad_norm": 1.1322417870599837, "learning_rate": 5.483597512270097e-07, "loss": 0.3191, "step": 7607 }, { "epoch": 0.49, "grad_norm": 2.3830461419814357, "learning_rate": 5.482569604572576e-07, "loss": 0.1132, "step": 7608 }, { "epoch": 0.49, "grad_norm": 0.4995163021122291, "learning_rate": 5.481541676287664e-07, "loss": 0.3372, "step": 7609 }, { "epoch": 0.49, "grad_norm": 0.6449794252728354, "learning_rate": 5.480513727459218e-07, "loss": 0.2552, "step": 7610 }, { "epoch": 0.49, "grad_norm": 0.3200613387108409, "learning_rate": 5.479485758131089e-07, "loss": 0.1286, "step": 7611 }, { "epoch": 0.49, "grad_norm": 0.4558450421203875, "learning_rate": 5.478457768347132e-07, "loss": 0.1366, "step": 7612 }, { "epoch": 0.49, "grad_norm": 8.501176775713478, "learning_rate": 5.477429758151208e-07, "loss": 0.2014, "step": 7613 }, { "epoch": 0.49, "grad_norm": 1.168999490477346, "learning_rate": 5.476401727587168e-07, "loss": 0.2244, "step": 7614 }, { "epoch": 0.49, "grad_norm": 0.5360430642494545, "learning_rate": 5.475373676698874e-07, "loss": 0.2527, "step": 7615 }, { "epoch": 0.49, "grad_norm": 1.3431825819123024, "learning_rate": 5.474345605530185e-07, "loss": 0.2389, "step": 7616 }, { "epoch": 0.49, "grad_norm": 0.755240154250283, "learning_rate": 5.473317514124957e-07, "loss": 0.2358, "step": 7617 }, { "epoch": 0.49, "grad_norm": 0.5671909924975638, "learning_rate": 5.472289402527053e-07, "loss": 0.2044, "step": 7618 }, { "epoch": 0.49, "grad_norm": 1.0461198665266376, "learning_rate": 5.471261270780333e-07, "loss": 0.3341, "step": 7619 }, { "epoch": 0.49, "grad_norm": 2.7318666519389283, "learning_rate": 5.470233118928659e-07, "loss": 0.098, "step": 7620 }, { "epoch": 0.49, "grad_norm": 6.196377638093798, "learning_rate": 5.469204947015897e-07, "loss": 0.1907, "step": 7621 }, { "epoch": 0.49, "grad_norm": 8.052346199280715, "learning_rate": 5.468176755085907e-07, "loss": 0.3102, "step": 7622 }, { "epoch": 0.49, "grad_norm": 0.4156714251904758, "learning_rate": 5.467148543182556e-07, "loss": 0.0306, "step": 7623 }, { "epoch": 0.49, "grad_norm": 1.2150847494755244, "learning_rate": 5.466120311349709e-07, "loss": 0.0505, "step": 7624 }, { "epoch": 0.49, "grad_norm": 0.09483021303767877, "learning_rate": 5.465092059631234e-07, "loss": 0.0652, "step": 7625 }, { "epoch": 0.49, "grad_norm": 0.5774180036422483, "learning_rate": 5.464063788070995e-07, "loss": 0.0294, "step": 7626 }, { "epoch": 0.49, "grad_norm": 1.1717246689561696, "learning_rate": 5.463035496712862e-07, "loss": 0.2189, "step": 7627 }, { "epoch": 0.49, "grad_norm": 0.15920007005961984, "learning_rate": 5.462007185600705e-07, "loss": 0.0866, "step": 7628 }, { "epoch": 0.49, "grad_norm": 5.562923039175392, "learning_rate": 5.460978854778392e-07, "loss": 0.1522, "step": 7629 }, { "epoch": 0.49, "grad_norm": 3.784044452622361, "learning_rate": 5.459950504289794e-07, "loss": 0.1205, "step": 7630 }, { "epoch": 0.49, "grad_norm": 0.5308654101621021, "learning_rate": 5.458922134178784e-07, "loss": 0.0208, "step": 7631 }, { "epoch": 0.49, "grad_norm": 7.900879184419944, "learning_rate": 5.457893744489233e-07, "loss": 0.1425, "step": 7632 }, { "epoch": 0.49, "grad_norm": 0.22269806232356187, "learning_rate": 5.456865335265013e-07, "loss": 0.007, "step": 7633 }, { "epoch": 0.49, "grad_norm": 0.9320836456837528, "learning_rate": 5.45583690655e-07, "loss": 0.2754, "step": 7634 }, { "epoch": 0.49, "grad_norm": 0.8461662886483006, "learning_rate": 5.454808458388069e-07, "loss": 0.1706, "step": 7635 }, { "epoch": 0.49, "grad_norm": 0.7396967270574977, "learning_rate": 5.453779990823094e-07, "loss": 0.1853, "step": 7636 }, { "epoch": 0.49, "grad_norm": 1.1345758146153235, "learning_rate": 5.452751503898953e-07, "loss": 0.2223, "step": 7637 }, { "epoch": 0.49, "grad_norm": 0.09067743686668761, "learning_rate": 5.451722997659522e-07, "loss": 0.0006, "step": 7638 }, { "epoch": 0.49, "grad_norm": 0.4105353757140416, "learning_rate": 5.450694472148679e-07, "loss": 0.3065, "step": 7639 }, { "epoch": 0.49, "grad_norm": 3.2832544663749155, "learning_rate": 5.449665927410305e-07, "loss": 0.0066, "step": 7640 }, { "epoch": 0.49, "grad_norm": 0.6266049999684713, "learning_rate": 5.44863736348828e-07, "loss": 0.1949, "step": 7641 }, { "epoch": 0.49, "grad_norm": 1.7392414920433288, "learning_rate": 5.447608780426481e-07, "loss": 0.0945, "step": 7642 }, { "epoch": 0.49, "grad_norm": 1.4139969306125162, "learning_rate": 5.446580178268794e-07, "loss": 0.1347, "step": 7643 }, { "epoch": 0.49, "grad_norm": 5.381408684843561, "learning_rate": 5.445551557059097e-07, "loss": 0.1924, "step": 7644 }, { "epoch": 0.49, "grad_norm": 0.5822479549167303, "learning_rate": 5.444522916841275e-07, "loss": 0.4079, "step": 7645 }, { "epoch": 0.49, "grad_norm": 5.772179732324928, "learning_rate": 5.443494257659211e-07, "loss": 0.1675, "step": 7646 }, { "epoch": 0.49, "grad_norm": 0.6034245235777258, "learning_rate": 5.442465579556792e-07, "loss": 0.1451, "step": 7647 }, { "epoch": 0.49, "grad_norm": 1.0490176186344715, "learning_rate": 5.441436882577901e-07, "loss": 0.4069, "step": 7648 }, { "epoch": 0.49, "grad_norm": 4.994696639605889, "learning_rate": 5.440408166766426e-07, "loss": 0.2387, "step": 7649 }, { "epoch": 0.49, "grad_norm": 0.6424124901515734, "learning_rate": 5.439379432166254e-07, "loss": 0.3026, "step": 7650 }, { "epoch": 0.49, "grad_norm": 12.852007642684793, "learning_rate": 5.438350678821269e-07, "loss": 0.2627, "step": 7651 }, { "epoch": 0.49, "grad_norm": 2.395700268164772, "learning_rate": 5.437321906775366e-07, "loss": 0.2675, "step": 7652 }, { "epoch": 0.49, "grad_norm": 0.27957112006074175, "learning_rate": 5.43629311607243e-07, "loss": 0.1985, "step": 7653 }, { "epoch": 0.49, "grad_norm": 0.7109060020565691, "learning_rate": 5.435264306756353e-07, "loss": 0.2381, "step": 7654 }, { "epoch": 0.49, "grad_norm": 0.7992833436780332, "learning_rate": 5.434235478871025e-07, "loss": 0.267, "step": 7655 }, { "epoch": 0.49, "grad_norm": 0.8420379115632857, "learning_rate": 5.433206632460339e-07, "loss": 0.1712, "step": 7656 }, { "epoch": 0.49, "grad_norm": 1.0141722606768333, "learning_rate": 5.432177767568188e-07, "loss": 0.2647, "step": 7657 }, { "epoch": 0.49, "grad_norm": 0.9163221936286211, "learning_rate": 5.431148884238463e-07, "loss": 0.2378, "step": 7658 }, { "epoch": 0.49, "grad_norm": 1.11536793358381, "learning_rate": 5.430119982515061e-07, "loss": 0.3102, "step": 7659 }, { "epoch": 0.49, "grad_norm": 0.6877843730171554, "learning_rate": 5.429091062441877e-07, "loss": 0.1391, "step": 7660 }, { "epoch": 0.49, "grad_norm": 0.8357158283203828, "learning_rate": 5.428062124062803e-07, "loss": 0.1612, "step": 7661 }, { "epoch": 0.49, "grad_norm": 0.3087892038315234, "learning_rate": 5.427033167421739e-07, "loss": 0.1355, "step": 7662 }, { "epoch": 0.49, "grad_norm": 0.6497803644720554, "learning_rate": 5.426004192562583e-07, "loss": 0.1091, "step": 7663 }, { "epoch": 0.49, "grad_norm": 1.1518661309368448, "learning_rate": 5.424975199529231e-07, "loss": 0.0588, "step": 7664 }, { "epoch": 0.49, "grad_norm": 0.7817363220829404, "learning_rate": 5.423946188365583e-07, "loss": 0.2561, "step": 7665 }, { "epoch": 0.49, "grad_norm": 5.0568310816484825, "learning_rate": 5.422917159115538e-07, "loss": 0.0106, "step": 7666 }, { "epoch": 0.49, "grad_norm": 1.0509192402800769, "learning_rate": 5.421888111822996e-07, "loss": 0.1834, "step": 7667 }, { "epoch": 0.49, "grad_norm": 2.638077809428933, "learning_rate": 5.42085904653186e-07, "loss": 0.0882, "step": 7668 }, { "epoch": 0.49, "grad_norm": 1.1955041141489864, "learning_rate": 5.419829963286032e-07, "loss": 0.0549, "step": 7669 }, { "epoch": 0.49, "grad_norm": 0.5011330821533714, "learning_rate": 5.418800862129411e-07, "loss": 0.129, "step": 7670 }, { "epoch": 0.49, "grad_norm": 0.6610469479758729, "learning_rate": 5.417771743105907e-07, "loss": 0.2157, "step": 7671 }, { "epoch": 0.49, "grad_norm": 0.32319978083462303, "learning_rate": 5.416742606259418e-07, "loss": 0.0035, "step": 7672 }, { "epoch": 0.49, "grad_norm": 0.25474548508449196, "learning_rate": 5.415713451633852e-07, "loss": 0.1364, "step": 7673 }, { "epoch": 0.49, "grad_norm": 0.5000377958750638, "learning_rate": 5.414684279273115e-07, "loss": 0.1953, "step": 7674 }, { "epoch": 0.49, "grad_norm": 0.9522464291738733, "learning_rate": 5.413655089221113e-07, "loss": 0.3496, "step": 7675 }, { "epoch": 0.49, "grad_norm": 0.3830960507033039, "learning_rate": 5.412625881521753e-07, "loss": 0.1597, "step": 7676 }, { "epoch": 0.49, "grad_norm": 4.8450301876350155, "learning_rate": 5.411596656218945e-07, "loss": 0.1851, "step": 7677 }, { "epoch": 0.49, "grad_norm": 0.6179371119718056, "learning_rate": 5.410567413356593e-07, "loss": 0.1354, "step": 7678 }, { "epoch": 0.49, "grad_norm": 0.8340167567328702, "learning_rate": 5.409538152978612e-07, "loss": 0.2141, "step": 7679 }, { "epoch": 0.49, "grad_norm": 2.8976651429360185, "learning_rate": 5.40850887512891e-07, "loss": 0.2676, "step": 7680 }, { "epoch": 0.49, "grad_norm": 1.1350004580546227, "learning_rate": 5.407479579851398e-07, "loss": 0.3251, "step": 7681 }, { "epoch": 0.49, "grad_norm": 0.8125019697000947, "learning_rate": 5.406450267189989e-07, "loss": 0.1211, "step": 7682 }, { "epoch": 0.49, "grad_norm": 0.87036871741752, "learning_rate": 5.405420937188591e-07, "loss": 0.0286, "step": 7683 }, { "epoch": 0.49, "grad_norm": 1.2302845580684012, "learning_rate": 5.404391589891125e-07, "loss": 0.3608, "step": 7684 }, { "epoch": 0.49, "grad_norm": 2.849230930705265, "learning_rate": 5.403362225341499e-07, "loss": 0.205, "step": 7685 }, { "epoch": 0.49, "grad_norm": 1.8302906844553808, "learning_rate": 5.40233284358363e-07, "loss": 0.221, "step": 7686 }, { "epoch": 0.49, "grad_norm": 0.663861848975396, "learning_rate": 5.401303444661433e-07, "loss": 0.2397, "step": 7687 }, { "epoch": 0.49, "grad_norm": 1.9918275103532312, "learning_rate": 5.400274028618824e-07, "loss": 0.1558, "step": 7688 }, { "epoch": 0.49, "grad_norm": 0.7139400232361313, "learning_rate": 5.39924459549972e-07, "loss": 0.3042, "step": 7689 }, { "epoch": 0.49, "grad_norm": 1.6811553655827545, "learning_rate": 5.398215145348039e-07, "loss": 0.2626, "step": 7690 }, { "epoch": 0.49, "grad_norm": 0.6472167918658924, "learning_rate": 5.3971856782077e-07, "loss": 0.2076, "step": 7691 }, { "epoch": 0.49, "grad_norm": 0.7166800549540868, "learning_rate": 5.396156194122621e-07, "loss": 0.2499, "step": 7692 }, { "epoch": 0.49, "grad_norm": 0.7760320156479528, "learning_rate": 5.395126693136723e-07, "loss": 0.4403, "step": 7693 }, { "epoch": 0.49, "grad_norm": 0.44126949815312816, "learning_rate": 5.394097175293926e-07, "loss": 0.2611, "step": 7694 }, { "epoch": 0.49, "grad_norm": 1.2981542533191037, "learning_rate": 5.393067640638151e-07, "loss": 0.1411, "step": 7695 }, { "epoch": 0.49, "grad_norm": 3.3773437959150394, "learning_rate": 5.39203808921332e-07, "loss": 0.2844, "step": 7696 }, { "epoch": 0.49, "grad_norm": 1.9578332391244648, "learning_rate": 5.391008521063356e-07, "loss": 0.3256, "step": 7697 }, { "epoch": 0.49, "grad_norm": 0.6184362630267441, "learning_rate": 5.389978936232185e-07, "loss": 0.0678, "step": 7698 }, { "epoch": 0.49, "grad_norm": 5.015152660420145, "learning_rate": 5.388949334763724e-07, "loss": 0.3061, "step": 7699 }, { "epoch": 0.49, "grad_norm": 0.14821951159348457, "learning_rate": 5.387919716701905e-07, "loss": 0.077, "step": 7700 }, { "epoch": 0.49, "grad_norm": 0.8196056775202845, "learning_rate": 5.386890082090652e-07, "loss": 0.0755, "step": 7701 }, { "epoch": 0.49, "grad_norm": 0.27443719106242326, "learning_rate": 5.385860430973889e-07, "loss": 0.168, "step": 7702 }, { "epoch": 0.49, "grad_norm": 0.39796304383492087, "learning_rate": 5.384830763395544e-07, "loss": 0.2335, "step": 7703 }, { "epoch": 0.49, "grad_norm": 0.2585751688268829, "learning_rate": 5.383801079399546e-07, "loss": 0.1074, "step": 7704 }, { "epoch": 0.49, "grad_norm": 0.2769851886238829, "learning_rate": 5.382771379029822e-07, "loss": 0.1108, "step": 7705 }, { "epoch": 0.49, "grad_norm": 0.638441502383699, "learning_rate": 5.381741662330302e-07, "loss": 0.1724, "step": 7706 }, { "epoch": 0.49, "grad_norm": 1.681914166537708, "learning_rate": 5.380711929344914e-07, "loss": 0.2414, "step": 7707 }, { "epoch": 0.49, "grad_norm": 0.42036962401488187, "learning_rate": 5.37968218011759e-07, "loss": 0.1828, "step": 7708 }, { "epoch": 0.49, "grad_norm": 0.6681332202550524, "learning_rate": 5.378652414692262e-07, "loss": 0.1142, "step": 7709 }, { "epoch": 0.49, "grad_norm": 0.6668070210396156, "learning_rate": 5.37762263311286e-07, "loss": 0.2297, "step": 7710 }, { "epoch": 0.49, "grad_norm": 0.17273657349610713, "learning_rate": 5.376592835423319e-07, "loss": 0.0043, "step": 7711 }, { "epoch": 0.49, "grad_norm": 0.6394614784215694, "learning_rate": 5.37556302166757e-07, "loss": 0.2194, "step": 7712 }, { "epoch": 0.49, "grad_norm": 0.9990973771703578, "learning_rate": 5.374533191889546e-07, "loss": 0.3003, "step": 7713 }, { "epoch": 0.49, "grad_norm": 2.852651630229182, "learning_rate": 5.373503346133183e-07, "loss": 0.0856, "step": 7714 }, { "epoch": 0.49, "grad_norm": 0.7313290835938698, "learning_rate": 5.372473484442418e-07, "loss": 0.0975, "step": 7715 }, { "epoch": 0.49, "grad_norm": 0.7619842201842432, "learning_rate": 5.371443606861186e-07, "loss": 0.1442, "step": 7716 }, { "epoch": 0.49, "grad_norm": 0.48601837270091935, "learning_rate": 5.370413713433419e-07, "loss": 0.2027, "step": 7717 }, { "epoch": 0.49, "grad_norm": 0.9943991060640079, "learning_rate": 5.369383804203062e-07, "loss": 0.1661, "step": 7718 }, { "epoch": 0.49, "grad_norm": 0.7764327557555127, "learning_rate": 5.368353879214048e-07, "loss": 0.1986, "step": 7719 }, { "epoch": 0.49, "grad_norm": 0.9571235265037613, "learning_rate": 5.367323938510319e-07, "loss": 0.0151, "step": 7720 }, { "epoch": 0.49, "grad_norm": 0.1331415179899828, "learning_rate": 5.36629398213581e-07, "loss": 0.0049, "step": 7721 }, { "epoch": 0.49, "grad_norm": 0.9564062075025476, "learning_rate": 5.365264010134465e-07, "loss": 0.2393, "step": 7722 }, { "epoch": 0.49, "grad_norm": 3.9988487704387783, "learning_rate": 5.364234022550222e-07, "loss": 0.0996, "step": 7723 }, { "epoch": 0.49, "grad_norm": 0.6578332170961195, "learning_rate": 5.363204019427023e-07, "loss": 0.4422, "step": 7724 }, { "epoch": 0.49, "grad_norm": 1.0935508995397545, "learning_rate": 5.362174000808812e-07, "loss": 0.3207, "step": 7725 }, { "epoch": 0.49, "grad_norm": 0.6625098167845284, "learning_rate": 5.361143966739528e-07, "loss": 0.2386, "step": 7726 }, { "epoch": 0.49, "grad_norm": 0.6384002449405118, "learning_rate": 5.360113917263119e-07, "loss": 0.2626, "step": 7727 }, { "epoch": 0.49, "grad_norm": 0.86185255036157, "learning_rate": 5.359083852423525e-07, "loss": 0.5264, "step": 7728 }, { "epoch": 0.49, "grad_norm": 0.5290037514723575, "learning_rate": 5.358053772264691e-07, "loss": 0.1684, "step": 7729 }, { "epoch": 0.49, "grad_norm": 0.22971659840906652, "learning_rate": 5.357023676830565e-07, "loss": 0.0068, "step": 7730 }, { "epoch": 0.49, "grad_norm": 1.0084694477299414, "learning_rate": 5.35599356616509e-07, "loss": 0.3455, "step": 7731 }, { "epoch": 0.49, "grad_norm": 1.5719038292544176, "learning_rate": 5.354963440312215e-07, "loss": 0.1823, "step": 7732 }, { "epoch": 0.49, "grad_norm": 0.8107698385400323, "learning_rate": 5.353933299315885e-07, "loss": 0.0623, "step": 7733 }, { "epoch": 0.49, "grad_norm": 1.5501289802182807, "learning_rate": 5.35290314322005e-07, "loss": 0.3329, "step": 7734 }, { "epoch": 0.49, "grad_norm": 1.3193147033692225, "learning_rate": 5.351872972068656e-07, "loss": 0.214, "step": 7735 }, { "epoch": 0.49, "grad_norm": 0.8932772123066981, "learning_rate": 5.350842785905654e-07, "loss": 0.5223, "step": 7736 }, { "epoch": 0.49, "grad_norm": 1.3815366076253965, "learning_rate": 5.349812584774994e-07, "loss": 0.1413, "step": 7737 }, { "epoch": 0.49, "grad_norm": 1.0877090864882917, "learning_rate": 5.348782368720625e-07, "loss": 0.3399, "step": 7738 }, { "epoch": 0.49, "grad_norm": 0.649411908566595, "learning_rate": 5.347752137786501e-07, "loss": 0.0373, "step": 7739 }, { "epoch": 0.49, "grad_norm": 1.168303644676484, "learning_rate": 5.346721892016571e-07, "loss": 0.2095, "step": 7740 }, { "epoch": 0.49, "grad_norm": 1.6210928324996892, "learning_rate": 5.345691631454788e-07, "loss": 0.1635, "step": 7741 }, { "epoch": 0.49, "grad_norm": 1.4875449275210424, "learning_rate": 5.344661356145104e-07, "loss": 0.1373, "step": 7742 }, { "epoch": 0.49, "grad_norm": 0.35111158500776496, "learning_rate": 5.343631066131476e-07, "loss": 0.2429, "step": 7743 }, { "epoch": 0.49, "grad_norm": 5.130496377326105, "learning_rate": 5.342600761457853e-07, "loss": 0.286, "step": 7744 }, { "epoch": 0.49, "grad_norm": 0.6032419223302801, "learning_rate": 5.341570442168194e-07, "loss": 0.0359, "step": 7745 }, { "epoch": 0.49, "grad_norm": 1.0336253550312409, "learning_rate": 5.340540108306454e-07, "loss": 0.1477, "step": 7746 }, { "epoch": 0.49, "grad_norm": 0.4896560824713781, "learning_rate": 5.339509759916589e-07, "loss": 0.1968, "step": 7747 }, { "epoch": 0.49, "grad_norm": 0.624759526829821, "learning_rate": 5.338479397042553e-07, "loss": 0.3045, "step": 7748 }, { "epoch": 0.49, "grad_norm": 0.2795796516751618, "learning_rate": 5.337449019728306e-07, "loss": 0.1251, "step": 7749 }, { "epoch": 0.49, "grad_norm": 0.8266157280587519, "learning_rate": 5.336418628017807e-07, "loss": 0.3139, "step": 7750 }, { "epoch": 0.49, "grad_norm": 0.6785908836524304, "learning_rate": 5.335388221955012e-07, "loss": 0.2336, "step": 7751 }, { "epoch": 0.49, "grad_norm": 2.3237713765769747, "learning_rate": 5.334357801583881e-07, "loss": 0.2462, "step": 7752 }, { "epoch": 0.49, "grad_norm": 0.5192653336984875, "learning_rate": 5.333327366948374e-07, "loss": 0.1865, "step": 7753 }, { "epoch": 0.49, "grad_norm": 0.24939429534706634, "learning_rate": 5.332296918092453e-07, "loss": 0.2587, "step": 7754 }, { "epoch": 0.49, "grad_norm": 0.38511437198997533, "learning_rate": 5.331266455060077e-07, "loss": 0.011, "step": 7755 }, { "epoch": 0.49, "grad_norm": 0.2836501620836957, "learning_rate": 5.330235977895205e-07, "loss": 0.1769, "step": 7756 }, { "epoch": 0.49, "grad_norm": 0.995630535414285, "learning_rate": 5.329205486641806e-07, "loss": 0.385, "step": 7757 }, { "epoch": 0.49, "grad_norm": 0.7148093968339438, "learning_rate": 5.328174981343838e-07, "loss": 0.3027, "step": 7758 }, { "epoch": 0.49, "grad_norm": 0.8158328911905972, "learning_rate": 5.327144462045266e-07, "loss": 0.3766, "step": 7759 }, { "epoch": 0.49, "grad_norm": 0.58806648258431, "learning_rate": 5.326113928790053e-07, "loss": 0.2653, "step": 7760 }, { "epoch": 0.49, "grad_norm": 0.9748352938096041, "learning_rate": 5.325083381622164e-07, "loss": 0.178, "step": 7761 }, { "epoch": 0.49, "grad_norm": 0.9685051298089195, "learning_rate": 5.324052820585563e-07, "loss": 0.2109, "step": 7762 }, { "epoch": 0.5, "grad_norm": 1.344605254927458, "learning_rate": 5.323022245724219e-07, "loss": 0.1343, "step": 7763 }, { "epoch": 0.5, "grad_norm": 2.313227171798812, "learning_rate": 5.321991657082096e-07, "loss": 0.0295, "step": 7764 }, { "epoch": 0.5, "grad_norm": 0.5575780887903956, "learning_rate": 5.320961054703163e-07, "loss": 0.305, "step": 7765 }, { "epoch": 0.5, "grad_norm": 0.36333360036625995, "learning_rate": 5.319930438631386e-07, "loss": 0.002, "step": 7766 }, { "epoch": 0.5, "grad_norm": 0.4898434219005641, "learning_rate": 5.318899808910733e-07, "loss": 0.1105, "step": 7767 }, { "epoch": 0.5, "grad_norm": 0.24221747318964093, "learning_rate": 5.317869165585173e-07, "loss": 0.0364, "step": 7768 }, { "epoch": 0.5, "grad_norm": 1.3506909444437065, "learning_rate": 5.316838508698676e-07, "loss": 0.2376, "step": 7769 }, { "epoch": 0.5, "grad_norm": 1.7355945954046756, "learning_rate": 5.315807838295208e-07, "loss": 0.2195, "step": 7770 }, { "epoch": 0.5, "grad_norm": 0.6566585438574053, "learning_rate": 5.314777154418746e-07, "loss": 0.1648, "step": 7771 }, { "epoch": 0.5, "grad_norm": 0.41189169152852995, "learning_rate": 5.313746457113257e-07, "loss": 0.1249, "step": 7772 }, { "epoch": 0.5, "grad_norm": 0.6918205197923989, "learning_rate": 5.312715746422713e-07, "loss": 0.3339, "step": 7773 }, { "epoch": 0.5, "grad_norm": 2.2473419047145127, "learning_rate": 5.311685022391088e-07, "loss": 0.1643, "step": 7774 }, { "epoch": 0.5, "grad_norm": 0.5809377724992406, "learning_rate": 5.310654285062352e-07, "loss": 0.2165, "step": 7775 }, { "epoch": 0.5, "grad_norm": 2.015666218305165, "learning_rate": 5.30962353448048e-07, "loss": 0.1307, "step": 7776 }, { "epoch": 0.5, "grad_norm": 0.38949374229009076, "learning_rate": 5.308592770689447e-07, "loss": 0.1978, "step": 7777 }, { "epoch": 0.5, "grad_norm": 5.157184007758185, "learning_rate": 5.307561993733225e-07, "loss": 0.2324, "step": 7778 }, { "epoch": 0.5, "grad_norm": 0.22596655073587732, "learning_rate": 5.306531203655789e-07, "loss": 0.0781, "step": 7779 }, { "epoch": 0.5, "grad_norm": 0.7766956549889035, "learning_rate": 5.305500400501116e-07, "loss": 0.2524, "step": 7780 }, { "epoch": 0.5, "grad_norm": 0.533611785353364, "learning_rate": 5.304469584313184e-07, "loss": 0.1424, "step": 7781 }, { "epoch": 0.5, "grad_norm": 0.6913264970543666, "learning_rate": 5.303438755135966e-07, "loss": 0.4107, "step": 7782 }, { "epoch": 0.5, "grad_norm": 1.9672104309768748, "learning_rate": 5.30240791301344e-07, "loss": 0.1589, "step": 7783 }, { "epoch": 0.5, "grad_norm": 0.6988466547244304, "learning_rate": 5.301377057989585e-07, "loss": 0.1725, "step": 7784 }, { "epoch": 0.5, "grad_norm": 0.2910088842559605, "learning_rate": 5.30034619010838e-07, "loss": 0.1654, "step": 7785 }, { "epoch": 0.5, "grad_norm": 4.355324140429315, "learning_rate": 5.299315309413801e-07, "loss": 0.3068, "step": 7786 }, { "epoch": 0.5, "grad_norm": 0.8952307282853894, "learning_rate": 5.298284415949831e-07, "loss": 0.2814, "step": 7787 }, { "epoch": 0.5, "grad_norm": 0.9455958195092917, "learning_rate": 5.297253509760447e-07, "loss": 0.1514, "step": 7788 }, { "epoch": 0.5, "grad_norm": 1.0228700995109932, "learning_rate": 5.29622259088963e-07, "loss": 0.0032, "step": 7789 }, { "epoch": 0.5, "grad_norm": 0.914851414661287, "learning_rate": 5.295191659381361e-07, "loss": 0.1251, "step": 7790 }, { "epoch": 0.5, "grad_norm": 1.1990458833847757, "learning_rate": 5.294160715279625e-07, "loss": 0.3261, "step": 7791 }, { "epoch": 0.5, "grad_norm": 1.189237769919022, "learning_rate": 5.293129758628401e-07, "loss": 0.1638, "step": 7792 }, { "epoch": 0.5, "grad_norm": 1.4307245719668564, "learning_rate": 5.292098789471672e-07, "loss": 0.1495, "step": 7793 }, { "epoch": 0.5, "grad_norm": 0.9181822888819925, "learning_rate": 5.291067807853421e-07, "loss": 0.0756, "step": 7794 }, { "epoch": 0.5, "grad_norm": 1.0261945761045828, "learning_rate": 5.290036813817633e-07, "loss": 0.1123, "step": 7795 }, { "epoch": 0.5, "grad_norm": 1.8237465208574568, "learning_rate": 5.289005807408291e-07, "loss": 0.1983, "step": 7796 }, { "epoch": 0.5, "grad_norm": 0.9719494841336705, "learning_rate": 5.287974788669379e-07, "loss": 0.5802, "step": 7797 }, { "epoch": 0.5, "grad_norm": 0.7977473228851141, "learning_rate": 5.286943757644885e-07, "loss": 0.3637, "step": 7798 }, { "epoch": 0.5, "grad_norm": 0.5200201574343315, "learning_rate": 5.285912714378794e-07, "loss": 0.1093, "step": 7799 }, { "epoch": 0.5, "grad_norm": 0.46932306713004374, "learning_rate": 5.28488165891509e-07, "loss": 0.0479, "step": 7800 }, { "epoch": 0.5, "grad_norm": 1.44605212307224, "learning_rate": 5.283850591297764e-07, "loss": 0.2126, "step": 7801 }, { "epoch": 0.5, "grad_norm": 9.7413993988323, "learning_rate": 5.2828195115708e-07, "loss": 0.3058, "step": 7802 }, { "epoch": 0.5, "grad_norm": 2.4379561362648015, "learning_rate": 5.281788419778187e-07, "loss": 0.0603, "step": 7803 }, { "epoch": 0.5, "grad_norm": 0.8758741891476285, "learning_rate": 5.280757315963914e-07, "loss": 0.1908, "step": 7804 }, { "epoch": 0.5, "grad_norm": 0.4412617542815155, "learning_rate": 5.279726200171968e-07, "loss": 0.2986, "step": 7805 }, { "epoch": 0.5, "grad_norm": 1.5032658183060224, "learning_rate": 5.278695072446342e-07, "loss": 0.0798, "step": 7806 }, { "epoch": 0.5, "grad_norm": 0.4376415470066588, "learning_rate": 5.277663932831022e-07, "loss": 0.0424, "step": 7807 }, { "epoch": 0.5, "grad_norm": 0.9769449621105338, "learning_rate": 5.276632781370003e-07, "loss": 0.1485, "step": 7808 }, { "epoch": 0.5, "grad_norm": 0.7515751597096842, "learning_rate": 5.275601618107272e-07, "loss": 0.431, "step": 7809 }, { "epoch": 0.5, "grad_norm": 0.7794421637018186, "learning_rate": 5.274570443086822e-07, "loss": 0.2394, "step": 7810 }, { "epoch": 0.5, "grad_norm": 0.5447644869777392, "learning_rate": 5.273539256352645e-07, "loss": 0.168, "step": 7811 }, { "epoch": 0.5, "grad_norm": 0.8876176330913443, "learning_rate": 5.272508057948734e-07, "loss": 0.1879, "step": 7812 }, { "epoch": 0.5, "grad_norm": 0.9865574689471549, "learning_rate": 5.27147684791908e-07, "loss": 0.2503, "step": 7813 }, { "epoch": 0.5, "grad_norm": 0.9871320823600839, "learning_rate": 5.270445626307679e-07, "loss": 0.3219, "step": 7814 }, { "epoch": 0.5, "grad_norm": 0.7105275225394994, "learning_rate": 5.269414393158522e-07, "loss": 0.1235, "step": 7815 }, { "epoch": 0.5, "grad_norm": 1.1802947614800248, "learning_rate": 5.268383148515607e-07, "loss": 0.0591, "step": 7816 }, { "epoch": 0.5, "grad_norm": 6.3279453807796, "learning_rate": 5.267351892422928e-07, "loss": 0.1419, "step": 7817 }, { "epoch": 0.5, "grad_norm": 1.4359935887534894, "learning_rate": 5.266320624924479e-07, "loss": 0.4444, "step": 7818 }, { "epoch": 0.5, "grad_norm": 1.335113781847231, "learning_rate": 5.265289346064258e-07, "loss": 0.2569, "step": 7819 }, { "epoch": 0.5, "grad_norm": 5.056877467538794, "learning_rate": 5.264258055886258e-07, "loss": 0.2682, "step": 7820 }, { "epoch": 0.5, "grad_norm": 0.6868373893259, "learning_rate": 5.26322675443448e-07, "loss": 0.1618, "step": 7821 }, { "epoch": 0.5, "grad_norm": 0.5173004207683524, "learning_rate": 5.26219544175292e-07, "loss": 0.2116, "step": 7822 }, { "epoch": 0.5, "grad_norm": 0.5744844435517102, "learning_rate": 5.261164117885572e-07, "loss": 0.2363, "step": 7823 }, { "epoch": 0.5, "grad_norm": 3.0058124276324367, "learning_rate": 5.26013278287644e-07, "loss": 0.2604, "step": 7824 }, { "epoch": 0.5, "grad_norm": 0.5659951737869565, "learning_rate": 5.259101436769522e-07, "loss": 0.1254, "step": 7825 }, { "epoch": 0.5, "grad_norm": 1.8246655049506346, "learning_rate": 5.258070079608814e-07, "loss": 0.0466, "step": 7826 }, { "epoch": 0.5, "grad_norm": 2.354953605724272, "learning_rate": 5.257038711438318e-07, "loss": 0.1545, "step": 7827 }, { "epoch": 0.5, "grad_norm": 1.035775037723114, "learning_rate": 5.256007332302033e-07, "loss": 0.2671, "step": 7828 }, { "epoch": 0.5, "grad_norm": 4.089958319818489, "learning_rate": 5.254975942243962e-07, "loss": 0.3936, "step": 7829 }, { "epoch": 0.5, "grad_norm": 9.762537594903154, "learning_rate": 5.253944541308105e-07, "loss": 0.1401, "step": 7830 }, { "epoch": 0.5, "grad_norm": 2.746543211746795, "learning_rate": 5.252913129538462e-07, "loss": 0.2737, "step": 7831 }, { "epoch": 0.5, "grad_norm": 0.28309018431784894, "learning_rate": 5.251881706979036e-07, "loss": 0.12, "step": 7832 }, { "epoch": 0.5, "grad_norm": 0.8489990124044013, "learning_rate": 5.25085027367383e-07, "loss": 0.2752, "step": 7833 }, { "epoch": 0.5, "grad_norm": 0.3177622837747427, "learning_rate": 5.249818829666849e-07, "loss": 0.1412, "step": 7834 }, { "epoch": 0.5, "grad_norm": 0.8600800060063669, "learning_rate": 5.248787375002093e-07, "loss": 0.3269, "step": 7835 }, { "epoch": 0.5, "grad_norm": 2.9871049459997794, "learning_rate": 5.247755909723569e-07, "loss": 0.0822, "step": 7836 }, { "epoch": 0.5, "grad_norm": 0.824574174471444, "learning_rate": 5.24672443387528e-07, "loss": 0.2035, "step": 7837 }, { "epoch": 0.5, "grad_norm": 0.7726990955671356, "learning_rate": 5.245692947501229e-07, "loss": 0.0721, "step": 7838 }, { "epoch": 0.5, "grad_norm": 0.7629832334784106, "learning_rate": 5.244661450645424e-07, "loss": 0.0428, "step": 7839 }, { "epoch": 0.5, "grad_norm": 1.025522337254423, "learning_rate": 5.243629943351868e-07, "loss": 0.4119, "step": 7840 }, { "epoch": 0.5, "grad_norm": 1.0978245388548087, "learning_rate": 5.242598425664569e-07, "loss": 0.3479, "step": 7841 }, { "epoch": 0.5, "grad_norm": 0.4144828682749771, "learning_rate": 5.241566897627535e-07, "loss": 0.35, "step": 7842 }, { "epoch": 0.5, "grad_norm": 0.348071760526536, "learning_rate": 5.240535359284771e-07, "loss": 0.0055, "step": 7843 }, { "epoch": 0.5, "grad_norm": 14.121454223171519, "learning_rate": 5.239503810680285e-07, "loss": 0.1295, "step": 7844 }, { "epoch": 0.5, "grad_norm": 10.88673366259795, "learning_rate": 5.238472251858085e-07, "loss": 0.02, "step": 7845 }, { "epoch": 0.5, "grad_norm": 0.562742651472133, "learning_rate": 5.23744068286218e-07, "loss": 0.2023, "step": 7846 }, { "epoch": 0.5, "grad_norm": 0.845274488769913, "learning_rate": 5.236409103736578e-07, "loss": 0.0736, "step": 7847 }, { "epoch": 0.5, "grad_norm": 0.6935526470572801, "learning_rate": 5.235377514525287e-07, "loss": 0.2186, "step": 7848 }, { "epoch": 0.5, "grad_norm": 1.491441170439203, "learning_rate": 5.234345915272319e-07, "loss": 0.1227, "step": 7849 }, { "epoch": 0.5, "grad_norm": 1.0239583355372224, "learning_rate": 5.233314306021683e-07, "loss": 0.1545, "step": 7850 }, { "epoch": 0.5, "grad_norm": 2.8937449633580066, "learning_rate": 5.232282686817391e-07, "loss": 0.2691, "step": 7851 }, { "epoch": 0.5, "grad_norm": 0.48963595647131725, "learning_rate": 5.23125105770345e-07, "loss": 0.0804, "step": 7852 }, { "epoch": 0.5, "grad_norm": 0.800896218318243, "learning_rate": 5.230219418723877e-07, "loss": 0.1848, "step": 7853 }, { "epoch": 0.5, "grad_norm": 1.6642178462979964, "learning_rate": 5.229187769922678e-07, "loss": 0.3277, "step": 7854 }, { "epoch": 0.5, "grad_norm": 0.6979156812551884, "learning_rate": 5.228156111343869e-07, "loss": 0.0602, "step": 7855 }, { "epoch": 0.5, "grad_norm": 0.7067320927297459, "learning_rate": 5.227124443031463e-07, "loss": 0.1629, "step": 7856 }, { "epoch": 0.5, "grad_norm": 1.7787555692149855, "learning_rate": 5.226092765029471e-07, "loss": 0.1465, "step": 7857 }, { "epoch": 0.5, "grad_norm": 0.9791138546302344, "learning_rate": 5.225061077381906e-07, "loss": 0.1425, "step": 7858 }, { "epoch": 0.5, "grad_norm": 1.396157475943847, "learning_rate": 5.224029380132784e-07, "loss": 0.0152, "step": 7859 }, { "epoch": 0.5, "grad_norm": 1.5747056921221125, "learning_rate": 5.222997673326117e-07, "loss": 0.0786, "step": 7860 }, { "epoch": 0.5, "grad_norm": 1.4217302165875072, "learning_rate": 5.221965957005923e-07, "loss": 0.0959, "step": 7861 }, { "epoch": 0.5, "grad_norm": 6.778564964888106, "learning_rate": 5.220934231216213e-07, "loss": 0.1542, "step": 7862 }, { "epoch": 0.5, "grad_norm": 0.471767682061118, "learning_rate": 5.219902496001007e-07, "loss": 0.4055, "step": 7863 }, { "epoch": 0.5, "grad_norm": 0.5294869636570287, "learning_rate": 5.218870751404318e-07, "loss": 0.0591, "step": 7864 }, { "epoch": 0.5, "grad_norm": 0.7789689219152222, "learning_rate": 5.217838997470161e-07, "loss": 0.2538, "step": 7865 }, { "epoch": 0.5, "grad_norm": 7.266033380340139, "learning_rate": 5.216807234242556e-07, "loss": 0.1506, "step": 7866 }, { "epoch": 0.5, "grad_norm": 0.6533613723205102, "learning_rate": 5.215775461765518e-07, "loss": 0.1102, "step": 7867 }, { "epoch": 0.5, "grad_norm": 0.7032095492239564, "learning_rate": 5.214743680083063e-07, "loss": 0.325, "step": 7868 }, { "epoch": 0.5, "grad_norm": 3.2152376585789004, "learning_rate": 5.213711889239213e-07, "loss": 0.2158, "step": 7869 }, { "epoch": 0.5, "grad_norm": 2.031521999729483, "learning_rate": 5.212680089277985e-07, "loss": 0.023, "step": 7870 }, { "epoch": 0.5, "grad_norm": 1.4208887784432154, "learning_rate": 5.211648280243395e-07, "loss": 0.0118, "step": 7871 }, { "epoch": 0.5, "grad_norm": 0.47270387623683613, "learning_rate": 5.210616462179464e-07, "loss": 0.2021, "step": 7872 }, { "epoch": 0.5, "grad_norm": 4.21269858601264, "learning_rate": 5.209584635130213e-07, "loss": 0.2684, "step": 7873 }, { "epoch": 0.5, "grad_norm": 0.7791515887512775, "learning_rate": 5.20855279913966e-07, "loss": 0.2264, "step": 7874 }, { "epoch": 0.5, "grad_norm": 2.987061945698786, "learning_rate": 5.207520954251824e-07, "loss": 0.2461, "step": 7875 }, { "epoch": 0.5, "grad_norm": 1.1747753057153916, "learning_rate": 5.206489100510728e-07, "loss": 0.2769, "step": 7876 }, { "epoch": 0.5, "grad_norm": 0.6665906890843033, "learning_rate": 5.205457237960391e-07, "loss": 0.2738, "step": 7877 }, { "epoch": 0.5, "grad_norm": 0.369531591363996, "learning_rate": 5.204425366644835e-07, "loss": 0.1376, "step": 7878 }, { "epoch": 0.5, "grad_norm": 1.8279046066898146, "learning_rate": 5.203393486608083e-07, "loss": 0.1184, "step": 7879 }, { "epoch": 0.5, "grad_norm": 0.3569836166745123, "learning_rate": 5.202361597894156e-07, "loss": 0.0574, "step": 7880 }, { "epoch": 0.5, "grad_norm": 0.7107200054599689, "learning_rate": 5.201329700547076e-07, "loss": 0.2351, "step": 7881 }, { "epoch": 0.5, "grad_norm": 0.19613903012530348, "learning_rate": 5.200297794610866e-07, "loss": 0.0029, "step": 7882 }, { "epoch": 0.5, "grad_norm": 1.3353815282918122, "learning_rate": 5.199265880129549e-07, "loss": 0.2413, "step": 7883 }, { "epoch": 0.5, "grad_norm": 0.24590316881461446, "learning_rate": 5.19823395714715e-07, "loss": 0.0928, "step": 7884 }, { "epoch": 0.5, "grad_norm": 0.6912026268662488, "learning_rate": 5.197202025707692e-07, "loss": 0.0113, "step": 7885 }, { "epoch": 0.5, "grad_norm": 1.3384877353144549, "learning_rate": 5.196170085855197e-07, "loss": 0.2684, "step": 7886 }, { "epoch": 0.5, "grad_norm": 1.6400111663334485, "learning_rate": 5.195138137633695e-07, "loss": 0.2284, "step": 7887 }, { "epoch": 0.5, "grad_norm": 0.7398213844163112, "learning_rate": 5.194106181087205e-07, "loss": 0.2143, "step": 7888 }, { "epoch": 0.5, "grad_norm": 2.2830114388100147, "learning_rate": 5.193074216259756e-07, "loss": 0.2561, "step": 7889 }, { "epoch": 0.5, "grad_norm": 0.9472489104447508, "learning_rate": 5.192042243195374e-07, "loss": 0.2109, "step": 7890 }, { "epoch": 0.5, "grad_norm": 5.367648599858341, "learning_rate": 5.191010261938084e-07, "loss": 0.4713, "step": 7891 }, { "epoch": 0.5, "grad_norm": 1.155433735597053, "learning_rate": 5.18997827253191e-07, "loss": 0.1569, "step": 7892 }, { "epoch": 0.5, "grad_norm": 0.406164477273476, "learning_rate": 5.188946275020883e-07, "loss": 0.0912, "step": 7893 }, { "epoch": 0.5, "grad_norm": 0.9541769863246171, "learning_rate": 5.187914269449027e-07, "loss": 0.1282, "step": 7894 }, { "epoch": 0.5, "grad_norm": 1.12016431612114, "learning_rate": 5.186882255860371e-07, "loss": 0.2475, "step": 7895 }, { "epoch": 0.5, "grad_norm": 1.077277458584877, "learning_rate": 5.185850234298942e-07, "loss": 0.0833, "step": 7896 }, { "epoch": 0.5, "grad_norm": 1.1287624437146637, "learning_rate": 5.184818204808768e-07, "loss": 0.4477, "step": 7897 }, { "epoch": 0.5, "grad_norm": 4.067688187277838, "learning_rate": 5.183786167433879e-07, "loss": 0.1426, "step": 7898 }, { "epoch": 0.5, "grad_norm": 0.7476661301573576, "learning_rate": 5.182754122218301e-07, "loss": 0.0813, "step": 7899 }, { "epoch": 0.5, "grad_norm": 0.2548690992889109, "learning_rate": 5.181722069206067e-07, "loss": 0.1678, "step": 7900 }, { "epoch": 0.5, "grad_norm": 2.4858376999577905, "learning_rate": 5.180690008441202e-07, "loss": 0.0102, "step": 7901 }, { "epoch": 0.5, "grad_norm": 1.0979010552464186, "learning_rate": 5.179657939967739e-07, "loss": 0.2151, "step": 7902 }, { "epoch": 0.5, "grad_norm": 3.7467724528418, "learning_rate": 5.178625863829708e-07, "loss": 0.1008, "step": 7903 }, { "epoch": 0.5, "grad_norm": 1.0258098759250416, "learning_rate": 5.177593780071138e-07, "loss": 0.216, "step": 7904 }, { "epoch": 0.5, "grad_norm": 0.8528197600824754, "learning_rate": 5.176561688736059e-07, "loss": 0.2002, "step": 7905 }, { "epoch": 0.5, "grad_norm": 3.4726365861437474, "learning_rate": 5.175529589868505e-07, "loss": 0.1345, "step": 7906 }, { "epoch": 0.5, "grad_norm": 6.778146508546821, "learning_rate": 5.174497483512505e-07, "loss": 0.1732, "step": 7907 }, { "epoch": 0.5, "grad_norm": 2.175122001822574, "learning_rate": 5.173465369712092e-07, "loss": 0.0348, "step": 7908 }, { "epoch": 0.5, "grad_norm": 1.3902211936326443, "learning_rate": 5.172433248511298e-07, "loss": 0.2418, "step": 7909 }, { "epoch": 0.5, "grad_norm": 0.6413738610015453, "learning_rate": 5.171401119954155e-07, "loss": 0.2419, "step": 7910 }, { "epoch": 0.5, "grad_norm": 0.9434512738195908, "learning_rate": 5.170368984084695e-07, "loss": 0.0773, "step": 7911 }, { "epoch": 0.5, "grad_norm": 0.49654956072595885, "learning_rate": 5.169336840946951e-07, "loss": 0.2155, "step": 7912 }, { "epoch": 0.5, "grad_norm": 0.6266402889062209, "learning_rate": 5.168304690584957e-07, "loss": 0.2152, "step": 7913 }, { "epoch": 0.5, "grad_norm": 1.0088775659474432, "learning_rate": 5.167272533042747e-07, "loss": 0.1774, "step": 7914 }, { "epoch": 0.5, "grad_norm": 1.413291250801521, "learning_rate": 5.166240368364355e-07, "loss": 0.3786, "step": 7915 }, { "epoch": 0.5, "grad_norm": 1.4757878077723017, "learning_rate": 5.165208196593815e-07, "loss": 0.1337, "step": 7916 }, { "epoch": 0.5, "grad_norm": 0.5831083215292089, "learning_rate": 5.16417601777516e-07, "loss": 0.2393, "step": 7917 }, { "epoch": 0.5, "grad_norm": 0.2954726822787284, "learning_rate": 5.163143831952428e-07, "loss": 0.015, "step": 7918 }, { "epoch": 0.51, "grad_norm": 3.0815712859292015, "learning_rate": 5.16211163916965e-07, "loss": 0.2246, "step": 7919 }, { "epoch": 0.51, "grad_norm": 0.8915610110045376, "learning_rate": 5.161079439470865e-07, "loss": 0.3047, "step": 7920 }, { "epoch": 0.51, "grad_norm": 2.616067334512679, "learning_rate": 5.160047232900105e-07, "loss": 0.0679, "step": 7921 }, { "epoch": 0.51, "grad_norm": 2.6201283893735887, "learning_rate": 5.159015019501412e-07, "loss": 0.1053, "step": 7922 }, { "epoch": 0.51, "grad_norm": 0.5590432125548226, "learning_rate": 5.157982799318816e-07, "loss": 0.0049, "step": 7923 }, { "epoch": 0.51, "grad_norm": 1.5662754624081587, "learning_rate": 5.156950572396357e-07, "loss": 0.002, "step": 7924 }, { "epoch": 0.51, "grad_norm": 0.3489688627740372, "learning_rate": 5.15591833877807e-07, "loss": 0.1944, "step": 7925 }, { "epoch": 0.51, "grad_norm": 0.9277712734648788, "learning_rate": 5.154886098507994e-07, "loss": 0.2823, "step": 7926 }, { "epoch": 0.51, "grad_norm": 0.3126595205279847, "learning_rate": 5.153853851630167e-07, "loss": 0.2276, "step": 7927 }, { "epoch": 0.51, "grad_norm": 0.5437867303477449, "learning_rate": 5.152821598188624e-07, "loss": 0.2645, "step": 7928 }, { "epoch": 0.51, "grad_norm": 9.298265152104033, "learning_rate": 5.151789338227404e-07, "loss": 0.2065, "step": 7929 }, { "epoch": 0.51, "grad_norm": 1.3523082298627538, "learning_rate": 5.150757071790546e-07, "loss": 0.2012, "step": 7930 }, { "epoch": 0.51, "grad_norm": 0.6762209296542762, "learning_rate": 5.14972479892209e-07, "loss": 0.1619, "step": 7931 }, { "epoch": 0.51, "grad_norm": 0.8012859942811726, "learning_rate": 5.148692519666071e-07, "loss": 0.1933, "step": 7932 }, { "epoch": 0.51, "grad_norm": 1.3174116071937148, "learning_rate": 5.147660234066532e-07, "loss": 0.1909, "step": 7933 }, { "epoch": 0.51, "grad_norm": 0.46627943344389033, "learning_rate": 5.146627942167509e-07, "loss": 0.0972, "step": 7934 }, { "epoch": 0.51, "grad_norm": 0.42565532044733345, "learning_rate": 5.145595644013044e-07, "loss": 0.1496, "step": 7935 }, { "epoch": 0.51, "grad_norm": 5.854961639675671, "learning_rate": 5.144563339647177e-07, "loss": 0.1301, "step": 7936 }, { "epoch": 0.51, "grad_norm": 1.828730714431461, "learning_rate": 5.143531029113946e-07, "loss": 0.3431, "step": 7937 }, { "epoch": 0.51, "grad_norm": 2.8837225539057707, "learning_rate": 5.142498712457392e-07, "loss": 0.1427, "step": 7938 }, { "epoch": 0.51, "grad_norm": 0.3271662302507762, "learning_rate": 5.141466389721557e-07, "loss": 0.1194, "step": 7939 }, { "epoch": 0.51, "grad_norm": 0.29422800625131323, "learning_rate": 5.140434060950482e-07, "loss": 0.1945, "step": 7940 }, { "epoch": 0.51, "grad_norm": 1.8082749986629059, "learning_rate": 5.139401726188208e-07, "loss": 0.2746, "step": 7941 }, { "epoch": 0.51, "grad_norm": 4.66322914498958, "learning_rate": 5.138369385478774e-07, "loss": 0.3334, "step": 7942 }, { "epoch": 0.51, "grad_norm": 0.5024296011165815, "learning_rate": 5.137337038866227e-07, "loss": 0.0649, "step": 7943 }, { "epoch": 0.51, "grad_norm": 0.5067471540265036, "learning_rate": 5.136304686394604e-07, "loss": 0.0759, "step": 7944 }, { "epoch": 0.51, "grad_norm": 0.6354339781473719, "learning_rate": 5.135272328107949e-07, "loss": 0.1112, "step": 7945 }, { "epoch": 0.51, "grad_norm": 1.129014749509955, "learning_rate": 5.134239964050307e-07, "loss": 0.1003, "step": 7946 }, { "epoch": 0.51, "grad_norm": 0.5008406881251112, "learning_rate": 5.133207594265715e-07, "loss": 0.1838, "step": 7947 }, { "epoch": 0.51, "grad_norm": 1.1048411268653404, "learning_rate": 5.132175218798221e-07, "loss": 0.1741, "step": 7948 }, { "epoch": 0.51, "grad_norm": 1.0134837594097874, "learning_rate": 5.131142837691865e-07, "loss": 0.1174, "step": 7949 }, { "epoch": 0.51, "grad_norm": 11.01198196378554, "learning_rate": 5.130110450990693e-07, "loss": 0.1294, "step": 7950 }, { "epoch": 0.51, "grad_norm": 3.56648443745084, "learning_rate": 5.129078058738747e-07, "loss": 0.0392, "step": 7951 }, { "epoch": 0.51, "grad_norm": 1.2297248998717523, "learning_rate": 5.128045660980072e-07, "loss": 0.0891, "step": 7952 }, { "epoch": 0.51, "grad_norm": 1.4581243116909357, "learning_rate": 5.127013257758712e-07, "loss": 0.1297, "step": 7953 }, { "epoch": 0.51, "grad_norm": 0.9345380450389321, "learning_rate": 5.125980849118712e-07, "loss": 0.189, "step": 7954 }, { "epoch": 0.51, "grad_norm": 0.7370901183156554, "learning_rate": 5.124948435104114e-07, "loss": 0.1185, "step": 7955 }, { "epoch": 0.51, "grad_norm": 0.7193480222061377, "learning_rate": 5.123916015758964e-07, "loss": 0.2652, "step": 7956 }, { "epoch": 0.51, "grad_norm": 0.6466032299607144, "learning_rate": 5.122883591127309e-07, "loss": 0.1024, "step": 7957 }, { "epoch": 0.51, "grad_norm": 0.7860832056788873, "learning_rate": 5.121851161253192e-07, "loss": 0.226, "step": 7958 }, { "epoch": 0.51, "grad_norm": 0.6013005123668494, "learning_rate": 5.120818726180661e-07, "loss": 0.0711, "step": 7959 }, { "epoch": 0.51, "grad_norm": 6.064753175786098, "learning_rate": 5.11978628595376e-07, "loss": 0.1304, "step": 7960 }, { "epoch": 0.51, "grad_norm": 0.9750951139813014, "learning_rate": 5.118753840616535e-07, "loss": 0.0555, "step": 7961 }, { "epoch": 0.51, "grad_norm": 0.9487612432620487, "learning_rate": 5.117721390213033e-07, "loss": 0.4431, "step": 7962 }, { "epoch": 0.51, "grad_norm": 1.3780561210469224, "learning_rate": 5.116688934787299e-07, "loss": 0.227, "step": 7963 }, { "epoch": 0.51, "grad_norm": 0.9622208866925224, "learning_rate": 5.11565647438338e-07, "loss": 0.2718, "step": 7964 }, { "epoch": 0.51, "grad_norm": 1.3846857939877564, "learning_rate": 5.114624009045324e-07, "loss": 0.4056, "step": 7965 }, { "epoch": 0.51, "grad_norm": 0.6896302473879041, "learning_rate": 5.113591538817176e-07, "loss": 0.0749, "step": 7966 }, { "epoch": 0.51, "grad_norm": 4.1611748449305255, "learning_rate": 5.112559063742986e-07, "loss": 0.3602, "step": 7967 }, { "epoch": 0.51, "grad_norm": 0.9255598049382037, "learning_rate": 5.111526583866799e-07, "loss": 0.2475, "step": 7968 }, { "epoch": 0.51, "grad_norm": 0.5252074153445964, "learning_rate": 5.110494099232665e-07, "loss": 0.1219, "step": 7969 }, { "epoch": 0.51, "grad_norm": 11.55610246662506, "learning_rate": 5.109461609884631e-07, "loss": 0.193, "step": 7970 }, { "epoch": 0.51, "grad_norm": 0.7640356996064016, "learning_rate": 5.108429115866744e-07, "loss": 0.2268, "step": 7971 }, { "epoch": 0.51, "grad_norm": 0.4296766191799689, "learning_rate": 5.107396617223052e-07, "loss": 0.2183, "step": 7972 }, { "epoch": 0.51, "grad_norm": 6.612663793624046, "learning_rate": 5.106364113997607e-07, "loss": 0.3109, "step": 7973 }, { "epoch": 0.51, "grad_norm": 1.2406635949629878, "learning_rate": 5.105331606234452e-07, "loss": 0.2663, "step": 7974 }, { "epoch": 0.51, "grad_norm": 0.3077453661861215, "learning_rate": 5.10429909397764e-07, "loss": 0.0636, "step": 7975 }, { "epoch": 0.51, "grad_norm": 0.7591910911749146, "learning_rate": 5.103266577271219e-07, "loss": 0.0717, "step": 7976 }, { "epoch": 0.51, "grad_norm": 1.3426273054482245, "learning_rate": 5.102234056159239e-07, "loss": 0.3577, "step": 7977 }, { "epoch": 0.51, "grad_norm": 0.4971718407893129, "learning_rate": 5.101201530685748e-07, "loss": 0.127, "step": 7978 }, { "epoch": 0.51, "grad_norm": 1.0861819445275214, "learning_rate": 5.100169000894796e-07, "loss": 0.1992, "step": 7979 }, { "epoch": 0.51, "grad_norm": 1.9030532265442714, "learning_rate": 5.099136466830434e-07, "loss": 0.226, "step": 7980 }, { "epoch": 0.51, "grad_norm": 1.1638092337308699, "learning_rate": 5.09810392853671e-07, "loss": 0.2711, "step": 7981 }, { "epoch": 0.51, "grad_norm": 0.5815643900116362, "learning_rate": 5.097071386057676e-07, "loss": 0.2693, "step": 7982 }, { "epoch": 0.51, "grad_norm": 0.3478150074781302, "learning_rate": 5.096038839437381e-07, "loss": 0.2425, "step": 7983 }, { "epoch": 0.51, "grad_norm": 0.34556668354772546, "learning_rate": 5.095006288719875e-07, "loss": 0.1279, "step": 7984 }, { "epoch": 0.51, "grad_norm": 0.696040851455979, "learning_rate": 5.093973733949212e-07, "loss": 0.2189, "step": 7985 }, { "epoch": 0.51, "grad_norm": 0.8684467332166635, "learning_rate": 5.09294117516944e-07, "loss": 0.4641, "step": 7986 }, { "epoch": 0.51, "grad_norm": 2.106608551218204, "learning_rate": 5.09190861242461e-07, "loss": 0.283, "step": 7987 }, { "epoch": 0.51, "grad_norm": 0.6784539215400707, "learning_rate": 5.090876045758774e-07, "loss": 0.3893, "step": 7988 }, { "epoch": 0.51, "grad_norm": 0.562374800966257, "learning_rate": 5.089843475215983e-07, "loss": 0.1535, "step": 7989 }, { "epoch": 0.51, "grad_norm": 3.6739943047222257, "learning_rate": 5.08881090084029e-07, "loss": 0.2647, "step": 7990 }, { "epoch": 0.51, "grad_norm": 0.4446705559212927, "learning_rate": 5.087778322675744e-07, "loss": 0.3003, "step": 7991 }, { "epoch": 0.51, "grad_norm": 1.1347843340063282, "learning_rate": 5.086745740766398e-07, "loss": 0.3486, "step": 7992 }, { "epoch": 0.51, "grad_norm": 3.6966441014613145, "learning_rate": 5.085713155156305e-07, "loss": 0.0802, "step": 7993 }, { "epoch": 0.51, "grad_norm": 0.6872586725807186, "learning_rate": 5.084680565889517e-07, "loss": 0.209, "step": 7994 }, { "epoch": 0.51, "grad_norm": 0.4954523590391869, "learning_rate": 5.083647973010085e-07, "loss": 0.0734, "step": 7995 }, { "epoch": 0.51, "grad_norm": 0.630303025252287, "learning_rate": 5.082615376562063e-07, "loss": 0.1912, "step": 7996 }, { "epoch": 0.51, "grad_norm": 0.5332288454112831, "learning_rate": 5.081582776589502e-07, "loss": 0.2678, "step": 7997 }, { "epoch": 0.51, "grad_norm": 0.9638121437248867, "learning_rate": 5.080550173136456e-07, "loss": 0.1524, "step": 7998 }, { "epoch": 0.51, "grad_norm": 0.5817877016507166, "learning_rate": 5.079517566246979e-07, "loss": 0.2753, "step": 7999 }, { "epoch": 0.51, "grad_norm": 0.7366627544026684, "learning_rate": 5.078484955965121e-07, "loss": 0.2207, "step": 8000 }, { "epoch": 0.51, "grad_norm": 0.6617489013541173, "learning_rate": 5.077452342334938e-07, "loss": 0.1174, "step": 8001 }, { "epoch": 0.51, "grad_norm": 4.537758947180974, "learning_rate": 5.076419725400482e-07, "loss": 0.1951, "step": 8002 }, { "epoch": 0.51, "grad_norm": 0.894337567628483, "learning_rate": 5.075387105205809e-07, "loss": 0.0404, "step": 8003 }, { "epoch": 0.51, "grad_norm": 0.408447184632947, "learning_rate": 5.074354481794968e-07, "loss": 0.0025, "step": 8004 }, { "epoch": 0.51, "grad_norm": 1.2473253288384847, "learning_rate": 5.073321855212016e-07, "loss": 0.0523, "step": 8005 }, { "epoch": 0.51, "grad_norm": 0.38759903538036455, "learning_rate": 5.072289225501007e-07, "loss": 0.0103, "step": 8006 }, { "epoch": 0.51, "grad_norm": 1.7517533132809295, "learning_rate": 5.071256592705993e-07, "loss": 0.082, "step": 8007 }, { "epoch": 0.51, "grad_norm": 0.21340932328299736, "learning_rate": 5.07022395687103e-07, "loss": 0.0664, "step": 8008 }, { "epoch": 0.51, "grad_norm": 0.6888549592613122, "learning_rate": 5.069191318040171e-07, "loss": 0.1864, "step": 8009 }, { "epoch": 0.51, "grad_norm": 0.6952146654491679, "learning_rate": 5.068158676257471e-07, "loss": 0.2177, "step": 8010 }, { "epoch": 0.51, "grad_norm": 0.21022522516618827, "learning_rate": 5.067126031566987e-07, "loss": 0.009, "step": 8011 }, { "epoch": 0.51, "grad_norm": 0.3295249984265424, "learning_rate": 5.06609338401277e-07, "loss": 0.2187, "step": 8012 }, { "epoch": 0.51, "grad_norm": 0.3381397979980587, "learning_rate": 5.065060733638877e-07, "loss": 0.2721, "step": 8013 }, { "epoch": 0.51, "grad_norm": 0.7951994549328867, "learning_rate": 5.064028080489363e-07, "loss": 0.0808, "step": 8014 }, { "epoch": 0.51, "grad_norm": 1.208335598045493, "learning_rate": 5.062995424608283e-07, "loss": 0.3566, "step": 8015 }, { "epoch": 0.51, "grad_norm": 0.9711718417727587, "learning_rate": 5.061962766039691e-07, "loss": 0.1127, "step": 8016 }, { "epoch": 0.51, "grad_norm": 0.3681723380290377, "learning_rate": 5.060930104827641e-07, "loss": 0.0907, "step": 8017 }, { "epoch": 0.51, "grad_norm": 0.7362546339027762, "learning_rate": 5.05989744101619e-07, "loss": 0.1672, "step": 8018 }, { "epoch": 0.51, "grad_norm": 0.690255265327017, "learning_rate": 5.058864774649395e-07, "loss": 0.1658, "step": 8019 }, { "epoch": 0.51, "grad_norm": 0.873125487517271, "learning_rate": 5.05783210577131e-07, "loss": 0.2203, "step": 8020 }, { "epoch": 0.51, "grad_norm": 0.5923813617712705, "learning_rate": 5.056799434425992e-07, "loss": 0.2328, "step": 8021 }, { "epoch": 0.51, "grad_norm": 1.8319052137680274, "learning_rate": 5.055766760657496e-07, "loss": 0.1973, "step": 8022 }, { "epoch": 0.51, "grad_norm": 1.2575692636570914, "learning_rate": 5.054734084509877e-07, "loss": 0.1985, "step": 8023 }, { "epoch": 0.51, "grad_norm": 1.0313095356288016, "learning_rate": 5.053701406027192e-07, "loss": 0.1867, "step": 8024 }, { "epoch": 0.51, "grad_norm": 0.44176030278131667, "learning_rate": 5.052668725253498e-07, "loss": 0.109, "step": 8025 }, { "epoch": 0.51, "grad_norm": 0.6980213760967009, "learning_rate": 5.051636042232849e-07, "loss": 0.1877, "step": 8026 }, { "epoch": 0.51, "grad_norm": 0.5090893499613691, "learning_rate": 5.050603357009304e-07, "loss": 0.1255, "step": 8027 }, { "epoch": 0.51, "grad_norm": 2.744190893066737, "learning_rate": 5.049570669626917e-07, "loss": 0.1226, "step": 8028 }, { "epoch": 0.51, "grad_norm": 1.417775319956826, "learning_rate": 5.048537980129747e-07, "loss": 0.0854, "step": 8029 }, { "epoch": 0.51, "grad_norm": 0.750635235017525, "learning_rate": 5.047505288561847e-07, "loss": 0.2062, "step": 8030 }, { "epoch": 0.51, "grad_norm": 7.683348780517243, "learning_rate": 5.046472594967278e-07, "loss": 0.1323, "step": 8031 }, { "epoch": 0.51, "grad_norm": 1.108180759399494, "learning_rate": 5.045439899390094e-07, "loss": 0.0988, "step": 8032 }, { "epoch": 0.51, "grad_norm": 0.4179128445023675, "learning_rate": 5.044407201874353e-07, "loss": 0.0593, "step": 8033 }, { "epoch": 0.51, "grad_norm": 3.550250873218855, "learning_rate": 5.04337450246411e-07, "loss": 0.1507, "step": 8034 }, { "epoch": 0.51, "grad_norm": 0.48309367073510967, "learning_rate": 5.042341801203424e-07, "loss": 0.0078, "step": 8035 }, { "epoch": 0.51, "grad_norm": 0.8426078244778038, "learning_rate": 5.041309098136351e-07, "loss": 0.1387, "step": 8036 }, { "epoch": 0.51, "grad_norm": 0.48805343814886276, "learning_rate": 5.04027639330695e-07, "loss": 0.2846, "step": 8037 }, { "epoch": 0.51, "grad_norm": 14.76702358007144, "learning_rate": 5.039243686759277e-07, "loss": 0.1401, "step": 8038 }, { "epoch": 0.51, "grad_norm": 1.3855713237176372, "learning_rate": 5.038210978537388e-07, "loss": 0.0997, "step": 8039 }, { "epoch": 0.51, "grad_norm": 0.4823045750901206, "learning_rate": 5.037178268685344e-07, "loss": 0.1423, "step": 8040 }, { "epoch": 0.51, "grad_norm": 1.6589906452343282, "learning_rate": 5.036145557247199e-07, "loss": 0.0878, "step": 8041 }, { "epoch": 0.51, "grad_norm": 2.1415394052155747, "learning_rate": 5.035112844267014e-07, "loss": 0.0197, "step": 8042 }, { "epoch": 0.51, "grad_norm": 0.6135851235172674, "learning_rate": 5.034080129788843e-07, "loss": 0.1854, "step": 8043 }, { "epoch": 0.51, "grad_norm": 0.6905787597383316, "learning_rate": 5.033047413856745e-07, "loss": 0.2306, "step": 8044 }, { "epoch": 0.51, "grad_norm": 0.7618880486185982, "learning_rate": 5.032014696514776e-07, "loss": 0.2863, "step": 8045 }, { "epoch": 0.51, "grad_norm": 1.1490263618140528, "learning_rate": 5.030981977806998e-07, "loss": 0.2538, "step": 8046 }, { "epoch": 0.51, "grad_norm": 0.41906248444879696, "learning_rate": 5.029949257777466e-07, "loss": 0.1505, "step": 8047 }, { "epoch": 0.51, "grad_norm": 0.6151812296385359, "learning_rate": 5.028916536470239e-07, "loss": 0.1525, "step": 8048 }, { "epoch": 0.51, "grad_norm": 1.321947098669094, "learning_rate": 5.027883813929373e-07, "loss": 0.255, "step": 8049 }, { "epoch": 0.51, "grad_norm": 1.0067335337780323, "learning_rate": 5.026851090198929e-07, "loss": 0.5148, "step": 8050 }, { "epoch": 0.51, "grad_norm": 1.2197256165289168, "learning_rate": 5.025818365322964e-07, "loss": 0.2783, "step": 8051 }, { "epoch": 0.51, "grad_norm": 0.4318952023099847, "learning_rate": 5.024785639345534e-07, "loss": 0.233, "step": 8052 }, { "epoch": 0.51, "grad_norm": 0.467107979145924, "learning_rate": 5.023752912310699e-07, "loss": 0.2264, "step": 8053 }, { "epoch": 0.51, "grad_norm": 0.5960508151237881, "learning_rate": 5.022720184262517e-07, "loss": 0.0043, "step": 8054 }, { "epoch": 0.51, "grad_norm": 0.7491023884257654, "learning_rate": 5.021687455245046e-07, "loss": 0.1087, "step": 8055 }, { "epoch": 0.51, "grad_norm": 0.8026981893353763, "learning_rate": 5.020654725302347e-07, "loss": 0.1684, "step": 8056 }, { "epoch": 0.51, "grad_norm": 1.6393626698686294, "learning_rate": 5.019621994478473e-07, "loss": 0.2286, "step": 8057 }, { "epoch": 0.51, "grad_norm": 0.5835826189171106, "learning_rate": 5.018589262817488e-07, "loss": 0.2519, "step": 8058 }, { "epoch": 0.51, "grad_norm": 7.71906783880137, "learning_rate": 5.017556530363445e-07, "loss": 0.1609, "step": 8059 }, { "epoch": 0.51, "grad_norm": 0.6192050745601048, "learning_rate": 5.016523797160406e-07, "loss": 0.0912, "step": 8060 }, { "epoch": 0.51, "grad_norm": 1.5135148904147495, "learning_rate": 5.015491063252429e-07, "loss": 0.1984, "step": 8061 }, { "epoch": 0.51, "grad_norm": 0.7676244659370985, "learning_rate": 5.014458328683572e-07, "loss": 0.0042, "step": 8062 }, { "epoch": 0.51, "grad_norm": 0.676370431133513, "learning_rate": 5.013425593497892e-07, "loss": 0.3381, "step": 8063 }, { "epoch": 0.51, "grad_norm": 0.5266436808843096, "learning_rate": 5.012392857739452e-07, "loss": 0.1857, "step": 8064 }, { "epoch": 0.51, "grad_norm": 0.9299334948981965, "learning_rate": 5.011360121452306e-07, "loss": 0.5212, "step": 8065 }, { "epoch": 0.51, "grad_norm": 1.4985207563224874, "learning_rate": 5.010327384680515e-07, "loss": 0.0652, "step": 8066 }, { "epoch": 0.51, "grad_norm": 0.2844133747117827, "learning_rate": 5.009294647468136e-07, "loss": 0.109, "step": 8067 }, { "epoch": 0.51, "grad_norm": 2.378840373785456, "learning_rate": 5.00826190985923e-07, "loss": 0.091, "step": 8068 }, { "epoch": 0.51, "grad_norm": 1.2423092356654564, "learning_rate": 5.007229171897854e-07, "loss": 0.262, "step": 8069 }, { "epoch": 0.51, "grad_norm": 0.6809100805891322, "learning_rate": 5.006196433628068e-07, "loss": 0.133, "step": 8070 }, { "epoch": 0.51, "grad_norm": 1.807786554075676, "learning_rate": 5.005163695093927e-07, "loss": 0.2683, "step": 8071 }, { "epoch": 0.51, "grad_norm": 0.6403892779241127, "learning_rate": 5.004130956339494e-07, "loss": 0.0859, "step": 8072 }, { "epoch": 0.51, "grad_norm": 0.7590827412452761, "learning_rate": 5.003098217408826e-07, "loss": 0.0805, "step": 8073 }, { "epoch": 0.51, "grad_norm": 6.672439477342041, "learning_rate": 5.002065478345982e-07, "loss": 0.2191, "step": 8074 }, { "epoch": 0.51, "grad_norm": 0.9034156078321618, "learning_rate": 5.00103273919502e-07, "loss": 0.0136, "step": 8075 }, { "epoch": 0.52, "grad_norm": 2.9520891091385693, "learning_rate": 5e-07, "loss": 0.1591, "step": 8076 }, { "epoch": 0.52, "grad_norm": 1.0735962217333608, "learning_rate": 4.998967260804982e-07, "loss": 0.363, "step": 8077 }, { "epoch": 0.52, "grad_norm": 1.0739018029784582, "learning_rate": 4.997934521654018e-07, "loss": 0.2883, "step": 8078 }, { "epoch": 0.52, "grad_norm": 0.7644867479130036, "learning_rate": 4.996901782591174e-07, "loss": 0.3885, "step": 8079 }, { "epoch": 0.52, "grad_norm": 0.7211748433681862, "learning_rate": 4.995869043660506e-07, "loss": 0.079, "step": 8080 }, { "epoch": 0.52, "grad_norm": 0.3702103745019236, "learning_rate": 4.994836304906073e-07, "loss": 0.0758, "step": 8081 }, { "epoch": 0.52, "grad_norm": 2.0644432870665215, "learning_rate": 4.993803566371933e-07, "loss": 0.0639, "step": 8082 }, { "epoch": 0.52, "grad_norm": 0.8060213093348012, "learning_rate": 4.992770828102147e-07, "loss": 0.3468, "step": 8083 }, { "epoch": 0.52, "grad_norm": 0.5310872523741449, "learning_rate": 4.991738090140769e-07, "loss": 0.3825, "step": 8084 }, { "epoch": 0.52, "grad_norm": 0.6983589634138904, "learning_rate": 4.990705352531864e-07, "loss": 0.1557, "step": 8085 }, { "epoch": 0.52, "grad_norm": 0.5453860264192577, "learning_rate": 4.989672615319485e-07, "loss": 0.2753, "step": 8086 }, { "epoch": 0.52, "grad_norm": 0.588703415308047, "learning_rate": 4.988639878547694e-07, "loss": 0.1344, "step": 8087 }, { "epoch": 0.52, "grad_norm": 0.8166596433992229, "learning_rate": 4.987607142260548e-07, "loss": 0.2103, "step": 8088 }, { "epoch": 0.52, "grad_norm": 1.1569791097214266, "learning_rate": 4.986574406502107e-07, "loss": 0.1128, "step": 8089 }, { "epoch": 0.52, "grad_norm": 4.8820478279134125, "learning_rate": 4.98554167131643e-07, "loss": 0.2281, "step": 8090 }, { "epoch": 0.52, "grad_norm": 4.1672236181499045, "learning_rate": 4.984508936747572e-07, "loss": 0.1638, "step": 8091 }, { "epoch": 0.52, "grad_norm": 1.7543266365961565, "learning_rate": 4.983476202839594e-07, "loss": 0.1106, "step": 8092 }, { "epoch": 0.52, "grad_norm": 1.8841857722847184, "learning_rate": 4.982443469636555e-07, "loss": 0.1923, "step": 8093 }, { "epoch": 0.52, "grad_norm": 3.9459934202711238, "learning_rate": 4.981410737182514e-07, "loss": 0.0412, "step": 8094 }, { "epoch": 0.52, "grad_norm": 1.3946510977912714, "learning_rate": 4.980378005521527e-07, "loss": 0.1793, "step": 8095 }, { "epoch": 0.52, "grad_norm": 1.1675751686893905, "learning_rate": 4.979345274697654e-07, "loss": 0.0289, "step": 8096 }, { "epoch": 0.52, "grad_norm": 0.47801455840366996, "learning_rate": 4.978312544754953e-07, "loss": 0.3539, "step": 8097 }, { "epoch": 0.52, "grad_norm": 0.5212935629372658, "learning_rate": 4.977279815737482e-07, "loss": 0.0023, "step": 8098 }, { "epoch": 0.52, "grad_norm": 0.8551706395574106, "learning_rate": 4.976247087689301e-07, "loss": 0.0983, "step": 8099 }, { "epoch": 0.52, "grad_norm": 2.0236467719199167, "learning_rate": 4.975214360654467e-07, "loss": 0.2156, "step": 8100 }, { "epoch": 0.52, "grad_norm": 0.770254602796096, "learning_rate": 4.974181634677036e-07, "loss": 0.1489, "step": 8101 }, { "epoch": 0.52, "grad_norm": 0.3393677786302405, "learning_rate": 4.973148909801071e-07, "loss": 0.1005, "step": 8102 }, { "epoch": 0.52, "grad_norm": 1.2674565230397858, "learning_rate": 4.972116186070625e-07, "loss": 0.2204, "step": 8103 }, { "epoch": 0.52, "grad_norm": 0.5216410523777196, "learning_rate": 4.971083463529762e-07, "loss": 0.2565, "step": 8104 }, { "epoch": 0.52, "grad_norm": 1.5289063925044168, "learning_rate": 4.970050742222536e-07, "loss": 0.2912, "step": 8105 }, { "epoch": 0.52, "grad_norm": 0.3860747416830018, "learning_rate": 4.969018022193003e-07, "loss": 0.1317, "step": 8106 }, { "epoch": 0.52, "grad_norm": 3.4942623652535483, "learning_rate": 4.967985303485224e-07, "loss": 0.1161, "step": 8107 }, { "epoch": 0.52, "grad_norm": 0.8004291099596728, "learning_rate": 4.966952586143256e-07, "loss": 0.2606, "step": 8108 }, { "epoch": 0.52, "grad_norm": 1.7968568147246406, "learning_rate": 4.965919870211159e-07, "loss": 0.2674, "step": 8109 }, { "epoch": 0.52, "grad_norm": 0.6414534403017824, "learning_rate": 4.964887155732987e-07, "loss": 0.1584, "step": 8110 }, { "epoch": 0.52, "grad_norm": 2.9856321369312853, "learning_rate": 4.9638544427528e-07, "loss": 0.2173, "step": 8111 }, { "epoch": 0.52, "grad_norm": 0.5929900285271503, "learning_rate": 4.962821731314655e-07, "loss": 0.3186, "step": 8112 }, { "epoch": 0.52, "grad_norm": 1.1246579842009279, "learning_rate": 4.961789021462611e-07, "loss": 0.1268, "step": 8113 }, { "epoch": 0.52, "grad_norm": 0.6573588297530798, "learning_rate": 4.960756313240723e-07, "loss": 0.1884, "step": 8114 }, { "epoch": 0.52, "grad_norm": 1.0487922556717102, "learning_rate": 4.959723606693051e-07, "loss": 0.1636, "step": 8115 }, { "epoch": 0.52, "grad_norm": 1.0779077508305832, "learning_rate": 4.958690901863648e-07, "loss": 0.1861, "step": 8116 }, { "epoch": 0.52, "grad_norm": 0.667903773938721, "learning_rate": 4.957658198796577e-07, "loss": 0.2523, "step": 8117 }, { "epoch": 0.52, "grad_norm": 0.47649954407524425, "learning_rate": 4.956625497535892e-07, "loss": 0.3787, "step": 8118 }, { "epoch": 0.52, "grad_norm": 0.8589198922433372, "learning_rate": 4.955592798125648e-07, "loss": 0.4041, "step": 8119 }, { "epoch": 0.52, "grad_norm": 0.6860354350736951, "learning_rate": 4.954560100609908e-07, "loss": 0.2068, "step": 8120 }, { "epoch": 0.52, "grad_norm": 11.332317558630018, "learning_rate": 4.953527405032723e-07, "loss": 0.3692, "step": 8121 }, { "epoch": 0.52, "grad_norm": 1.933375547824541, "learning_rate": 4.952494711438154e-07, "loss": 0.2179, "step": 8122 }, { "epoch": 0.52, "grad_norm": 1.0123585012208458, "learning_rate": 4.951462019870254e-07, "loss": 0.1651, "step": 8123 }, { "epoch": 0.52, "grad_norm": 1.6589280995115026, "learning_rate": 4.950429330373082e-07, "loss": 0.2217, "step": 8124 }, { "epoch": 0.52, "grad_norm": 0.8875536056595668, "learning_rate": 4.949396642990697e-07, "loss": 0.1683, "step": 8125 }, { "epoch": 0.52, "grad_norm": 1.0292126746650092, "learning_rate": 4.94836395776715e-07, "loss": 0.3058, "step": 8126 }, { "epoch": 0.52, "grad_norm": 0.6532360720076447, "learning_rate": 4.947331274746502e-07, "loss": 0.0899, "step": 8127 }, { "epoch": 0.52, "grad_norm": 0.44401196696336753, "learning_rate": 4.946298593972808e-07, "loss": 0.0918, "step": 8128 }, { "epoch": 0.52, "grad_norm": 0.8374962262196353, "learning_rate": 4.945265915490121e-07, "loss": 0.3585, "step": 8129 }, { "epoch": 0.52, "grad_norm": 11.34241429195748, "learning_rate": 4.944233239342504e-07, "loss": 0.0184, "step": 8130 }, { "epoch": 0.52, "grad_norm": 0.6473484301622712, "learning_rate": 4.943200565574007e-07, "loss": 0.1428, "step": 8131 }, { "epoch": 0.52, "grad_norm": 12.825841666555736, "learning_rate": 4.942167894228689e-07, "loss": 0.1661, "step": 8132 }, { "epoch": 0.52, "grad_norm": 0.4350198260722425, "learning_rate": 4.941135225350605e-07, "loss": 0.304, "step": 8133 }, { "epoch": 0.52, "grad_norm": 0.22369393361009066, "learning_rate": 4.94010255898381e-07, "loss": 0.0747, "step": 8134 }, { "epoch": 0.52, "grad_norm": 0.16485481347532424, "learning_rate": 4.93906989517236e-07, "loss": 0.089, "step": 8135 }, { "epoch": 0.52, "grad_norm": 1.6101082522220975, "learning_rate": 4.938037233960311e-07, "loss": 0.1237, "step": 8136 }, { "epoch": 0.52, "grad_norm": 0.3702393562442652, "learning_rate": 4.937004575391719e-07, "loss": 0.1818, "step": 8137 }, { "epoch": 0.52, "grad_norm": 7.872984812179066, "learning_rate": 4.935971919510636e-07, "loss": 0.0108, "step": 8138 }, { "epoch": 0.52, "grad_norm": 1.944398182559204, "learning_rate": 4.934939266361123e-07, "loss": 0.0112, "step": 8139 }, { "epoch": 0.52, "grad_norm": 1.0587341436362527, "learning_rate": 4.933906615987229e-07, "loss": 0.2331, "step": 8140 }, { "epoch": 0.52, "grad_norm": 1.0097210622684705, "learning_rate": 4.932873968433014e-07, "loss": 0.0713, "step": 8141 }, { "epoch": 0.52, "grad_norm": 2.9011634437214178, "learning_rate": 4.931841323742528e-07, "loss": 0.0172, "step": 8142 }, { "epoch": 0.52, "grad_norm": 1.0085254744974845, "learning_rate": 4.930808681959829e-07, "loss": 0.209, "step": 8143 }, { "epoch": 0.52, "grad_norm": 0.6101017080712451, "learning_rate": 4.92977604312897e-07, "loss": 0.0775, "step": 8144 }, { "epoch": 0.52, "grad_norm": 1.0447089952474162, "learning_rate": 4.928743407294008e-07, "loss": 0.1055, "step": 8145 }, { "epoch": 0.52, "grad_norm": 1.546086501221723, "learning_rate": 4.927710774498995e-07, "loss": 0.2003, "step": 8146 }, { "epoch": 0.52, "grad_norm": 1.4433914650160102, "learning_rate": 4.926678144787985e-07, "loss": 0.1689, "step": 8147 }, { "epoch": 0.52, "grad_norm": 0.7219197014496946, "learning_rate": 4.925645518205033e-07, "loss": 0.2977, "step": 8148 }, { "epoch": 0.52, "grad_norm": 0.9241352289020095, "learning_rate": 4.924612894794192e-07, "loss": 0.2784, "step": 8149 }, { "epoch": 0.52, "grad_norm": 0.6593509286864717, "learning_rate": 4.923580274599518e-07, "loss": 0.2903, "step": 8150 }, { "epoch": 0.52, "grad_norm": 0.9204912285373574, "learning_rate": 4.922547657665061e-07, "loss": 0.1956, "step": 8151 }, { "epoch": 0.52, "grad_norm": 1.264522924498254, "learning_rate": 4.921515044034878e-07, "loss": 0.3374, "step": 8152 }, { "epoch": 0.52, "grad_norm": 0.5032700619783951, "learning_rate": 4.920482433753021e-07, "loss": 0.2798, "step": 8153 }, { "epoch": 0.52, "grad_norm": 0.956876684470016, "learning_rate": 4.919449826863544e-07, "loss": 0.2016, "step": 8154 }, { "epoch": 0.52, "grad_norm": 3.7909706573922373, "learning_rate": 4.918417223410497e-07, "loss": 0.213, "step": 8155 }, { "epoch": 0.52, "grad_norm": 1.7344354376992372, "learning_rate": 4.917384623437937e-07, "loss": 0.2235, "step": 8156 }, { "epoch": 0.52, "grad_norm": 1.4713305981333957, "learning_rate": 4.916352026989914e-07, "loss": 0.0422, "step": 8157 }, { "epoch": 0.52, "grad_norm": 0.5609733558667588, "learning_rate": 4.915319434110484e-07, "loss": 0.1526, "step": 8158 }, { "epoch": 0.52, "grad_norm": 0.303771899028892, "learning_rate": 4.914286844843695e-07, "loss": 0.158, "step": 8159 }, { "epoch": 0.52, "grad_norm": 1.3765189718166573, "learning_rate": 4.913254259233602e-07, "loss": 0.3024, "step": 8160 }, { "epoch": 0.52, "grad_norm": 2.322163771166202, "learning_rate": 4.912221677324257e-07, "loss": 0.1188, "step": 8161 }, { "epoch": 0.52, "grad_norm": 0.397630194609182, "learning_rate": 4.911189099159711e-07, "loss": 0.0038, "step": 8162 }, { "epoch": 0.52, "grad_norm": 0.7918383108872507, "learning_rate": 4.910156524784017e-07, "loss": 0.1372, "step": 8163 }, { "epoch": 0.52, "grad_norm": 1.5311296124532012, "learning_rate": 4.909123954241225e-07, "loss": 0.1052, "step": 8164 }, { "epoch": 0.52, "grad_norm": 0.707926056032602, "learning_rate": 4.908091387575391e-07, "loss": 0.175, "step": 8165 }, { "epoch": 0.52, "grad_norm": 1.5344325190448418, "learning_rate": 4.90705882483056e-07, "loss": 0.1296, "step": 8166 }, { "epoch": 0.52, "grad_norm": 1.700488779754395, "learning_rate": 4.906026266050788e-07, "loss": 0.4026, "step": 8167 }, { "epoch": 0.52, "grad_norm": 0.32097578211959354, "learning_rate": 4.904993711280124e-07, "loss": 0.1006, "step": 8168 }, { "epoch": 0.52, "grad_norm": 0.8326151176037822, "learning_rate": 4.903961160562618e-07, "loss": 0.0026, "step": 8169 }, { "epoch": 0.52, "grad_norm": 0.3794740402403776, "learning_rate": 4.902928613942325e-07, "loss": 0.1105, "step": 8170 }, { "epoch": 0.52, "grad_norm": 0.33940217049646326, "learning_rate": 4.901896071463289e-07, "loss": 0.2032, "step": 8171 }, { "epoch": 0.52, "grad_norm": 1.3758703619299881, "learning_rate": 4.900863533169566e-07, "loss": 0.3885, "step": 8172 }, { "epoch": 0.52, "grad_norm": 0.611254928262428, "learning_rate": 4.899830999105204e-07, "loss": 0.1637, "step": 8173 }, { "epoch": 0.52, "grad_norm": 9.332136382986466, "learning_rate": 4.898798469314253e-07, "loss": 0.1619, "step": 8174 }, { "epoch": 0.52, "grad_norm": 1.6545439993081186, "learning_rate": 4.89776594384076e-07, "loss": 0.1125, "step": 8175 }, { "epoch": 0.52, "grad_norm": 0.6373618442771709, "learning_rate": 4.896733422728781e-07, "loss": 0.2989, "step": 8176 }, { "epoch": 0.52, "grad_norm": 1.0231394374024527, "learning_rate": 4.895700906022359e-07, "loss": 0.315, "step": 8177 }, { "epoch": 0.52, "grad_norm": 0.6125522670776106, "learning_rate": 4.894668393765548e-07, "loss": 0.2293, "step": 8178 }, { "epoch": 0.52, "grad_norm": 0.6645183435609646, "learning_rate": 4.893635886002394e-07, "loss": 0.2517, "step": 8179 }, { "epoch": 0.52, "grad_norm": 0.8889959015194648, "learning_rate": 4.892603382776947e-07, "loss": 0.165, "step": 8180 }, { "epoch": 0.52, "grad_norm": 0.960100897863591, "learning_rate": 4.891570884133255e-07, "loss": 0.1212, "step": 8181 }, { "epoch": 0.52, "grad_norm": 0.4169013265792246, "learning_rate": 4.89053839011537e-07, "loss": 0.1354, "step": 8182 }, { "epoch": 0.52, "grad_norm": 15.974883256278313, "learning_rate": 4.889505900767334e-07, "loss": 0.4403, "step": 8183 }, { "epoch": 0.52, "grad_norm": 0.4604167259060572, "learning_rate": 4.8884734161332e-07, "loss": 0.2273, "step": 8184 }, { "epoch": 0.52, "grad_norm": 9.206047113363974, "learning_rate": 4.887440936257013e-07, "loss": 0.2439, "step": 8185 }, { "epoch": 0.52, "grad_norm": 0.4879592225809976, "learning_rate": 4.886408461182824e-07, "loss": 0.1288, "step": 8186 }, { "epoch": 0.52, "grad_norm": 0.8666582105075445, "learning_rate": 4.885375990954677e-07, "loss": 0.0935, "step": 8187 }, { "epoch": 0.52, "grad_norm": 0.5234728481060329, "learning_rate": 4.88434352561662e-07, "loss": 0.1426, "step": 8188 }, { "epoch": 0.52, "grad_norm": 0.5205276731923292, "learning_rate": 4.883311065212703e-07, "loss": 0.1332, "step": 8189 }, { "epoch": 0.52, "grad_norm": 0.6284872415583754, "learning_rate": 4.882278609786968e-07, "loss": 0.1104, "step": 8190 }, { "epoch": 0.52, "grad_norm": 0.5106344655459321, "learning_rate": 4.881246159383466e-07, "loss": 0.0892, "step": 8191 }, { "epoch": 0.52, "grad_norm": 1.7396302828353256, "learning_rate": 4.88021371404624e-07, "loss": 0.2086, "step": 8192 }, { "epoch": 0.52, "grad_norm": 1.6956739818468076, "learning_rate": 4.87918127381934e-07, "loss": 0.3581, "step": 8193 }, { "epoch": 0.52, "grad_norm": 1.0877921362422047, "learning_rate": 4.878148838746806e-07, "loss": 0.0218, "step": 8194 }, { "epoch": 0.52, "grad_norm": 7.673432447629786, "learning_rate": 4.87711640887269e-07, "loss": 0.1537, "step": 8195 }, { "epoch": 0.52, "grad_norm": 0.948180460074467, "learning_rate": 4.876083984241035e-07, "loss": 0.0797, "step": 8196 }, { "epoch": 0.52, "grad_norm": 0.9047276678021269, "learning_rate": 4.875051564895886e-07, "loss": 0.2154, "step": 8197 }, { "epoch": 0.52, "grad_norm": 0.46054497364789626, "learning_rate": 4.874019150881287e-07, "loss": 0.0631, "step": 8198 }, { "epoch": 0.52, "grad_norm": 0.9628242709982492, "learning_rate": 4.872986742241288e-07, "loss": 0.4154, "step": 8199 }, { "epoch": 0.52, "grad_norm": 0.8853121666096614, "learning_rate": 4.871954339019928e-07, "loss": 0.0893, "step": 8200 }, { "epoch": 0.52, "grad_norm": 0.8700554086125147, "learning_rate": 4.870921941261252e-07, "loss": 0.007, "step": 8201 }, { "epoch": 0.52, "grad_norm": 0.710502323168251, "learning_rate": 4.869889549009308e-07, "loss": 0.3938, "step": 8202 }, { "epoch": 0.52, "grad_norm": 1.3829422967772256, "learning_rate": 4.868857162308135e-07, "loss": 0.1249, "step": 8203 }, { "epoch": 0.52, "grad_norm": 5.791836083047208, "learning_rate": 4.86782478120178e-07, "loss": 0.293, "step": 8204 }, { "epoch": 0.52, "grad_norm": 1.055559361123662, "learning_rate": 4.866792405734286e-07, "loss": 0.1588, "step": 8205 }, { "epoch": 0.52, "grad_norm": 0.12669878074895569, "learning_rate": 4.865760035949695e-07, "loss": 0.0149, "step": 8206 }, { "epoch": 0.52, "grad_norm": 1.0831513869789322, "learning_rate": 4.864727671892049e-07, "loss": 0.1228, "step": 8207 }, { "epoch": 0.52, "grad_norm": 2.674063684024899, "learning_rate": 4.863695313605397e-07, "loss": 0.1532, "step": 8208 }, { "epoch": 0.52, "grad_norm": 6.784761189667426, "learning_rate": 4.862662961133773e-07, "loss": 0.2814, "step": 8209 }, { "epoch": 0.52, "grad_norm": 1.3034063368760405, "learning_rate": 4.861630614521225e-07, "loss": 0.272, "step": 8210 }, { "epoch": 0.52, "grad_norm": 1.6401069113083235, "learning_rate": 4.860598273811792e-07, "loss": 0.1154, "step": 8211 }, { "epoch": 0.52, "grad_norm": 0.6507259723895588, "learning_rate": 4.859565939049519e-07, "loss": 0.3146, "step": 8212 }, { "epoch": 0.52, "grad_norm": 0.7603477601240486, "learning_rate": 4.858533610278443e-07, "loss": 0.2414, "step": 8213 }, { "epoch": 0.52, "grad_norm": 0.6837864369543493, "learning_rate": 4.857501287542608e-07, "loss": 0.1865, "step": 8214 }, { "epoch": 0.52, "grad_norm": 0.6469380514526999, "learning_rate": 4.856468970886056e-07, "loss": 0.0477, "step": 8215 }, { "epoch": 0.52, "grad_norm": 0.6632697507099637, "learning_rate": 4.855436660352824e-07, "loss": 0.1616, "step": 8216 }, { "epoch": 0.52, "grad_norm": 1.1212084672513034, "learning_rate": 4.854404355986958e-07, "loss": 0.0772, "step": 8217 }, { "epoch": 0.52, "grad_norm": 1.8867303347652737, "learning_rate": 4.853372057832491e-07, "loss": 0.1253, "step": 8218 }, { "epoch": 0.52, "grad_norm": 0.42147078461829746, "learning_rate": 4.852339765933469e-07, "loss": 0.2253, "step": 8219 }, { "epoch": 0.52, "grad_norm": 1.0304215517377768, "learning_rate": 4.851307480333929e-07, "loss": 0.2961, "step": 8220 }, { "epoch": 0.52, "grad_norm": 1.0333451791890018, "learning_rate": 4.850275201077911e-07, "loss": 0.232, "step": 8221 }, { "epoch": 0.52, "grad_norm": 0.4349674322535713, "learning_rate": 4.849242928209453e-07, "loss": 0.2093, "step": 8222 }, { "epoch": 0.52, "grad_norm": 2.2486247488705318, "learning_rate": 4.848210661772595e-07, "loss": 0.2181, "step": 8223 }, { "epoch": 0.52, "grad_norm": 2.5097889743442834, "learning_rate": 4.847178401811375e-07, "loss": 0.357, "step": 8224 }, { "epoch": 0.52, "grad_norm": 1.3235845555502364, "learning_rate": 4.846146148369834e-07, "loss": 0.075, "step": 8225 }, { "epoch": 0.52, "grad_norm": 0.9198231270911172, "learning_rate": 4.845113901492004e-07, "loss": 0.1831, "step": 8226 }, { "epoch": 0.52, "grad_norm": 1.8586642988102762, "learning_rate": 4.844081661221929e-07, "loss": 0.0167, "step": 8227 }, { "epoch": 0.52, "grad_norm": 0.6726872701153576, "learning_rate": 4.843049427603644e-07, "loss": 0.211, "step": 8228 }, { "epoch": 0.52, "grad_norm": 0.8395887871025307, "learning_rate": 4.842017200681184e-07, "loss": 0.0067, "step": 8229 }, { "epoch": 0.52, "grad_norm": 0.2945067975585641, "learning_rate": 4.840984980498589e-07, "loss": 0.0833, "step": 8230 }, { "epoch": 0.52, "grad_norm": 0.9712970287543637, "learning_rate": 4.839952767099894e-07, "loss": 0.1664, "step": 8231 }, { "epoch": 0.52, "grad_norm": 1.2145687671104668, "learning_rate": 4.838920560529136e-07, "loss": 0.3168, "step": 8232 }, { "epoch": 0.53, "grad_norm": 0.9919768412091605, "learning_rate": 4.837888360830349e-07, "loss": 0.1024, "step": 8233 }, { "epoch": 0.53, "grad_norm": 1.1152289836489446, "learning_rate": 4.836856168047574e-07, "loss": 0.2503, "step": 8234 }, { "epoch": 0.53, "grad_norm": 0.43487629793650373, "learning_rate": 4.835823982224839e-07, "loss": 0.1746, "step": 8235 }, { "epoch": 0.53, "grad_norm": 0.7776933543405603, "learning_rate": 4.834791803406186e-07, "loss": 0.4996, "step": 8236 }, { "epoch": 0.53, "grad_norm": 0.7138515491082422, "learning_rate": 4.833759631635644e-07, "loss": 0.1812, "step": 8237 }, { "epoch": 0.53, "grad_norm": 0.7899423270112802, "learning_rate": 4.832727466957254e-07, "loss": 0.2266, "step": 8238 }, { "epoch": 0.53, "grad_norm": 0.9439042794776742, "learning_rate": 4.831695309415042e-07, "loss": 0.1149, "step": 8239 }, { "epoch": 0.53, "grad_norm": 0.5698927914817434, "learning_rate": 4.83066315905305e-07, "loss": 0.354, "step": 8240 }, { "epoch": 0.53, "grad_norm": 1.0048873877797317, "learning_rate": 4.829631015915306e-07, "loss": 0.035, "step": 8241 }, { "epoch": 0.53, "grad_norm": 1.0012648658704233, "learning_rate": 4.828598880045846e-07, "loss": 0.134, "step": 8242 }, { "epoch": 0.53, "grad_norm": 4.793594013577755, "learning_rate": 4.827566751488704e-07, "loss": 0.1977, "step": 8243 }, { "epoch": 0.53, "grad_norm": 1.171597253648042, "learning_rate": 4.826534630287908e-07, "loss": 0.1717, "step": 8244 }, { "epoch": 0.53, "grad_norm": 1.2448125237617151, "learning_rate": 4.825502516487496e-07, "loss": 0.2479, "step": 8245 }, { "epoch": 0.53, "grad_norm": 0.377931561316245, "learning_rate": 4.824470410131495e-07, "loss": 0.1566, "step": 8246 }, { "epoch": 0.53, "grad_norm": 0.982739061442296, "learning_rate": 4.823438311263942e-07, "loss": 0.2143, "step": 8247 }, { "epoch": 0.53, "grad_norm": 0.45563477761264554, "learning_rate": 4.822406219928863e-07, "loss": 0.2273, "step": 8248 }, { "epoch": 0.53, "grad_norm": 3.088298522228207, "learning_rate": 4.821374136170293e-07, "loss": 0.1257, "step": 8249 }, { "epoch": 0.53, "grad_norm": 0.375275171763511, "learning_rate": 4.820342060032261e-07, "loss": 0.0603, "step": 8250 }, { "epoch": 0.53, "grad_norm": 1.949710767535515, "learning_rate": 4.819309991558798e-07, "loss": 0.359, "step": 8251 }, { "epoch": 0.53, "grad_norm": 6.590046422935963, "learning_rate": 4.818277930793933e-07, "loss": 0.4137, "step": 8252 }, { "epoch": 0.53, "grad_norm": 0.485660221582689, "learning_rate": 4.817245877781698e-07, "loss": 0.1023, "step": 8253 }, { "epoch": 0.53, "grad_norm": 1.2500216822562424, "learning_rate": 4.816213832566121e-07, "loss": 0.0481, "step": 8254 }, { "epoch": 0.53, "grad_norm": 1.501707110922901, "learning_rate": 4.815181795191233e-07, "loss": 0.1328, "step": 8255 }, { "epoch": 0.53, "grad_norm": 0.6027320587153924, "learning_rate": 4.814149765701059e-07, "loss": 0.1465, "step": 8256 }, { "epoch": 0.53, "grad_norm": 3.0408515438402133, "learning_rate": 4.81311774413963e-07, "loss": 0.1136, "step": 8257 }, { "epoch": 0.53, "grad_norm": 1.1432097162698354, "learning_rate": 4.812085730550973e-07, "loss": 0.1676, "step": 8258 }, { "epoch": 0.53, "grad_norm": 1.0042730486274967, "learning_rate": 4.811053724979116e-07, "loss": 0.1835, "step": 8259 }, { "epoch": 0.53, "grad_norm": 0.7901281894729675, "learning_rate": 4.81002172746809e-07, "loss": 0.1528, "step": 8260 }, { "epoch": 0.53, "grad_norm": 0.40423764933552675, "learning_rate": 4.808989738061916e-07, "loss": 0.0702, "step": 8261 }, { "epoch": 0.53, "grad_norm": 0.34243011998723405, "learning_rate": 4.807957756804627e-07, "loss": 0.053, "step": 8262 }, { "epoch": 0.53, "grad_norm": 0.4038619697821567, "learning_rate": 4.806925783740242e-07, "loss": 0.1311, "step": 8263 }, { "epoch": 0.53, "grad_norm": 1.6184916906991575, "learning_rate": 4.805893818912794e-07, "loss": 0.1294, "step": 8264 }, { "epoch": 0.53, "grad_norm": 12.62261932255158, "learning_rate": 4.804861862366305e-07, "loss": 0.1913, "step": 8265 }, { "epoch": 0.53, "grad_norm": 0.6886609271115808, "learning_rate": 4.803829914144802e-07, "loss": 0.4007, "step": 8266 }, { "epoch": 0.53, "grad_norm": 1.7613069744678602, "learning_rate": 4.802797974292308e-07, "loss": 0.1288, "step": 8267 }, { "epoch": 0.53, "grad_norm": 1.3144447465818885, "learning_rate": 4.80176604285285e-07, "loss": 0.2315, "step": 8268 }, { "epoch": 0.53, "grad_norm": 0.47126022938693496, "learning_rate": 4.800734119870451e-07, "loss": 0.1008, "step": 8269 }, { "epoch": 0.53, "grad_norm": 0.4993473224307254, "learning_rate": 4.799702205389134e-07, "loss": 0.0805, "step": 8270 }, { "epoch": 0.53, "grad_norm": 0.826096688904793, "learning_rate": 4.798670299452926e-07, "loss": 0.2461, "step": 8271 }, { "epoch": 0.53, "grad_norm": 0.1566726732922138, "learning_rate": 4.797638402105844e-07, "loss": 0.0678, "step": 8272 }, { "epoch": 0.53, "grad_norm": 17.1333157525917, "learning_rate": 4.796606513391917e-07, "loss": 0.1987, "step": 8273 }, { "epoch": 0.53, "grad_norm": 1.9522791582660433, "learning_rate": 4.795574633355164e-07, "loss": 0.0395, "step": 8274 }, { "epoch": 0.53, "grad_norm": 0.8561925291307796, "learning_rate": 4.794542762039608e-07, "loss": 0.2843, "step": 8275 }, { "epoch": 0.53, "grad_norm": 11.071059948220585, "learning_rate": 4.793510899489272e-07, "loss": 0.0931, "step": 8276 }, { "epoch": 0.53, "grad_norm": 4.473368344647713, "learning_rate": 4.792479045748177e-07, "loss": 0.2107, "step": 8277 }, { "epoch": 0.53, "grad_norm": 1.9678500358403068, "learning_rate": 4.79144720086034e-07, "loss": 0.2189, "step": 8278 }, { "epoch": 0.53, "grad_norm": 1.70270008822198, "learning_rate": 4.790415364869787e-07, "loss": 0.2395, "step": 8279 }, { "epoch": 0.53, "grad_norm": 0.7649746568862957, "learning_rate": 4.789383537820534e-07, "loss": 0.1648, "step": 8280 }, { "epoch": 0.53, "grad_norm": 0.9893349122689812, "learning_rate": 4.788351719756605e-07, "loss": 0.2831, "step": 8281 }, { "epoch": 0.53, "grad_norm": 0.6704188958555899, "learning_rate": 4.787319910722015e-07, "loss": 0.1827, "step": 8282 }, { "epoch": 0.53, "grad_norm": 0.7719067889338662, "learning_rate": 4.786288110760787e-07, "loss": 0.0498, "step": 8283 }, { "epoch": 0.53, "grad_norm": 0.9135942110973932, "learning_rate": 4.785256319916936e-07, "loss": 0.1649, "step": 8284 }, { "epoch": 0.53, "grad_norm": 0.5359750663694142, "learning_rate": 4.784224538234482e-07, "loss": 0.1537, "step": 8285 }, { "epoch": 0.53, "grad_norm": 1.112424574766902, "learning_rate": 4.783192765757446e-07, "loss": 0.2091, "step": 8286 }, { "epoch": 0.53, "grad_norm": 0.5248860382975025, "learning_rate": 4.782161002529839e-07, "loss": 0.3016, "step": 8287 }, { "epoch": 0.53, "grad_norm": 1.3642197574348445, "learning_rate": 4.781129248595684e-07, "loss": 0.3299, "step": 8288 }, { "epoch": 0.53, "grad_norm": 0.9654919145150089, "learning_rate": 4.780097503998993e-07, "loss": 0.3039, "step": 8289 }, { "epoch": 0.53, "grad_norm": 0.7129230089890654, "learning_rate": 4.779065768783786e-07, "loss": 0.2663, "step": 8290 }, { "epoch": 0.53, "grad_norm": 3.492018950625692, "learning_rate": 4.778034042994077e-07, "loss": 0.0489, "step": 8291 }, { "epoch": 0.53, "grad_norm": 1.016346917646898, "learning_rate": 4.777002326673883e-07, "loss": 0.2143, "step": 8292 }, { "epoch": 0.53, "grad_norm": 1.7027076661134657, "learning_rate": 4.775970619867215e-07, "loss": 0.1086, "step": 8293 }, { "epoch": 0.53, "grad_norm": 0.7552783355285612, "learning_rate": 4.774938922618094e-07, "loss": 0.1274, "step": 8294 }, { "epoch": 0.53, "grad_norm": 17.187041279410437, "learning_rate": 4.773907234970528e-07, "loss": 0.3003, "step": 8295 }, { "epoch": 0.53, "grad_norm": 0.561537872363602, "learning_rate": 4.772875556968537e-07, "loss": 0.2272, "step": 8296 }, { "epoch": 0.53, "grad_norm": 0.8076328473818086, "learning_rate": 4.771843888656131e-07, "loss": 0.2778, "step": 8297 }, { "epoch": 0.53, "grad_norm": 0.30795960532586475, "learning_rate": 4.770812230077322e-07, "loss": 0.0026, "step": 8298 }, { "epoch": 0.53, "grad_norm": 0.3285228663365966, "learning_rate": 4.769780581276125e-07, "loss": 0.1642, "step": 8299 }, { "epoch": 0.53, "grad_norm": 0.649480294227551, "learning_rate": 4.768748942296549e-07, "loss": 0.1039, "step": 8300 }, { "epoch": 0.53, "grad_norm": 0.5681854671972938, "learning_rate": 4.7677173131826096e-07, "loss": 0.1768, "step": 8301 }, { "epoch": 0.53, "grad_norm": 0.8524605023286805, "learning_rate": 4.7666856939783167e-07, "loss": 0.163, "step": 8302 }, { "epoch": 0.53, "grad_norm": 0.9369709721737559, "learning_rate": 4.765654084727681e-07, "loss": 0.1572, "step": 8303 }, { "epoch": 0.53, "grad_norm": 5.7409964722526485, "learning_rate": 4.764622485474712e-07, "loss": 0.3333, "step": 8304 }, { "epoch": 0.53, "grad_norm": 0.7505081269417608, "learning_rate": 4.763590896263423e-07, "loss": 0.2993, "step": 8305 }, { "epoch": 0.53, "grad_norm": 0.7082784780927405, "learning_rate": 4.762559317137819e-07, "loss": 0.4248, "step": 8306 }, { "epoch": 0.53, "grad_norm": 0.7729870754292155, "learning_rate": 4.7615277481419146e-07, "loss": 0.1719, "step": 8307 }, { "epoch": 0.53, "grad_norm": 0.8689005251881637, "learning_rate": 4.7604961893197137e-07, "loss": 0.3522, "step": 8308 }, { "epoch": 0.53, "grad_norm": 2.343606459450366, "learning_rate": 4.759464640715229e-07, "loss": 0.114, "step": 8309 }, { "epoch": 0.53, "grad_norm": 5.34035926840061, "learning_rate": 4.758433102372465e-07, "loss": 0.2122, "step": 8310 }, { "epoch": 0.53, "grad_norm": 1.7962579966207999, "learning_rate": 4.7574015743354305e-07, "loss": 0.1081, "step": 8311 }, { "epoch": 0.53, "grad_norm": 1.7334214270279522, "learning_rate": 4.756370056648133e-07, "loss": 0.0126, "step": 8312 }, { "epoch": 0.53, "grad_norm": 0.4473313375010857, "learning_rate": 4.7553385493545766e-07, "loss": 0.2959, "step": 8313 }, { "epoch": 0.53, "grad_norm": 0.6052795437915548, "learning_rate": 4.754307052498773e-07, "loss": 0.2558, "step": 8314 }, { "epoch": 0.53, "grad_norm": 0.52268626158944, "learning_rate": 4.753275566124721e-07, "loss": 0.2504, "step": 8315 }, { "epoch": 0.53, "grad_norm": 0.6730348211603852, "learning_rate": 4.7522440902764323e-07, "loss": 0.0809, "step": 8316 }, { "epoch": 0.53, "grad_norm": 0.8276657829322512, "learning_rate": 4.751212624997906e-07, "loss": 0.3571, "step": 8317 }, { "epoch": 0.53, "grad_norm": 1.0171656654102184, "learning_rate": 4.7501811703331516e-07, "loss": 0.3255, "step": 8318 }, { "epoch": 0.53, "grad_norm": 1.3961201876532447, "learning_rate": 4.7491497263261687e-07, "loss": 0.314, "step": 8319 }, { "epoch": 0.53, "grad_norm": 0.6164111459767151, "learning_rate": 4.7481182930209633e-07, "loss": 0.3303, "step": 8320 }, { "epoch": 0.53, "grad_norm": 3.327110020906333, "learning_rate": 4.7470868704615387e-07, "loss": 0.0943, "step": 8321 }, { "epoch": 0.53, "grad_norm": 0.27534488706768706, "learning_rate": 4.746055458691896e-07, "loss": 0.0504, "step": 8322 }, { "epoch": 0.53, "grad_norm": 0.7571062987207461, "learning_rate": 4.745024057756037e-07, "loss": 0.1668, "step": 8323 }, { "epoch": 0.53, "grad_norm": 0.7103832911043411, "learning_rate": 4.743992667697966e-07, "loss": 0.2673, "step": 8324 }, { "epoch": 0.53, "grad_norm": 0.40543707517658106, "learning_rate": 4.742961288561684e-07, "loss": 0.067, "step": 8325 }, { "epoch": 0.53, "grad_norm": 13.155291203497054, "learning_rate": 4.7419299203911867e-07, "loss": 0.1394, "step": 8326 }, { "epoch": 0.53, "grad_norm": 5.633389690731605, "learning_rate": 4.74089856323048e-07, "loss": 0.3341, "step": 8327 }, { "epoch": 0.53, "grad_norm": 0.7601287600239229, "learning_rate": 4.739867217123559e-07, "loss": 0.1004, "step": 8328 }, { "epoch": 0.53, "grad_norm": 0.659000570859291, "learning_rate": 4.738835882114427e-07, "loss": 0.0744, "step": 8329 }, { "epoch": 0.53, "grad_norm": 5.727445611828362, "learning_rate": 4.73780455824708e-07, "loss": 0.2002, "step": 8330 }, { "epoch": 0.53, "grad_norm": 0.7044838640912626, "learning_rate": 4.73677324556552e-07, "loss": 0.0962, "step": 8331 }, { "epoch": 0.53, "grad_norm": 1.6210387091077625, "learning_rate": 4.7357419441137404e-07, "loss": 0.2306, "step": 8332 }, { "epoch": 0.53, "grad_norm": 3.1456649589567296, "learning_rate": 4.734710653935743e-07, "loss": 0.0845, "step": 8333 }, { "epoch": 0.53, "grad_norm": 0.8166196626153188, "learning_rate": 4.73367937507552e-07, "loss": 0.3285, "step": 8334 }, { "epoch": 0.53, "grad_norm": 0.6904261910706524, "learning_rate": 4.732648107577072e-07, "loss": 0.1853, "step": 8335 }, { "epoch": 0.53, "grad_norm": 0.4770212767922006, "learning_rate": 4.7316168514843914e-07, "loss": 0.1259, "step": 8336 }, { "epoch": 0.53, "grad_norm": 0.6304307296852425, "learning_rate": 4.7305856068414776e-07, "loss": 0.0056, "step": 8337 }, { "epoch": 0.53, "grad_norm": 0.526324203251784, "learning_rate": 4.729554373692323e-07, "loss": 0.1671, "step": 8338 }, { "epoch": 0.53, "grad_norm": 1.1042738785690747, "learning_rate": 4.72852315208092e-07, "loss": 0.124, "step": 8339 }, { "epoch": 0.53, "grad_norm": 0.6621693609820087, "learning_rate": 4.727491942051268e-07, "loss": 0.1009, "step": 8340 }, { "epoch": 0.53, "grad_norm": 0.6882980695951783, "learning_rate": 4.7264607436473555e-07, "loss": 0.2576, "step": 8341 }, { "epoch": 0.53, "grad_norm": 15.985279478692943, "learning_rate": 4.725429556913179e-07, "loss": 0.173, "step": 8342 }, { "epoch": 0.53, "grad_norm": 1.9648885535488356, "learning_rate": 4.724398381892728e-07, "loss": 0.0932, "step": 8343 }, { "epoch": 0.53, "grad_norm": 0.839036168880391, "learning_rate": 4.723367218629998e-07, "loss": 0.0475, "step": 8344 }, { "epoch": 0.53, "grad_norm": 1.0665152967812122, "learning_rate": 4.722336067168977e-07, "loss": 0.1767, "step": 8345 }, { "epoch": 0.53, "grad_norm": 0.8784662023791162, "learning_rate": 4.7213049275536584e-07, "loss": 0.337, "step": 8346 }, { "epoch": 0.53, "grad_norm": 0.988119990288558, "learning_rate": 4.720273799828031e-07, "loss": 0.0906, "step": 8347 }, { "epoch": 0.53, "grad_norm": 0.33164467170179124, "learning_rate": 4.7192426840360864e-07, "loss": 0.2812, "step": 8348 }, { "epoch": 0.53, "grad_norm": 0.5021048271459208, "learning_rate": 4.718211580221812e-07, "loss": 0.1779, "step": 8349 }, { "epoch": 0.53, "grad_norm": 1.1088301088217718, "learning_rate": 4.7171804884292e-07, "loss": 0.2481, "step": 8350 }, { "epoch": 0.53, "grad_norm": 1.5711181883692436, "learning_rate": 4.7161494087022354e-07, "loss": 0.2516, "step": 8351 }, { "epoch": 0.53, "grad_norm": 5.866843021786571, "learning_rate": 4.7151183410849094e-07, "loss": 0.3975, "step": 8352 }, { "epoch": 0.53, "grad_norm": 0.32527701729108605, "learning_rate": 4.7140872856212077e-07, "loss": 0.1093, "step": 8353 }, { "epoch": 0.53, "grad_norm": 0.6621450184134362, "learning_rate": 4.713056242355115e-07, "loss": 0.0823, "step": 8354 }, { "epoch": 0.53, "grad_norm": 0.3107572932525853, "learning_rate": 4.7120252113306207e-07, "loss": 0.1065, "step": 8355 }, { "epoch": 0.53, "grad_norm": 0.05815418833054292, "learning_rate": 4.710994192591709e-07, "loss": 0.0006, "step": 8356 }, { "epoch": 0.53, "grad_norm": 0.3691203708536689, "learning_rate": 4.709963186182368e-07, "loss": 0.0926, "step": 8357 }, { "epoch": 0.53, "grad_norm": 1.0321675826769945, "learning_rate": 4.708932192146579e-07, "loss": 0.3407, "step": 8358 }, { "epoch": 0.53, "grad_norm": 1.2388699622096775, "learning_rate": 4.707901210528329e-07, "loss": 0.3973, "step": 8359 }, { "epoch": 0.53, "grad_norm": 0.6445735646207422, "learning_rate": 4.706870241371598e-07, "loss": 0.1492, "step": 8360 }, { "epoch": 0.53, "grad_norm": 0.6405927916498171, "learning_rate": 4.705839284720375e-07, "loss": 0.546, "step": 8361 }, { "epoch": 0.53, "grad_norm": 1.217280316421169, "learning_rate": 4.704808340618637e-07, "loss": 0.3646, "step": 8362 }, { "epoch": 0.53, "grad_norm": 0.32295731170163944, "learning_rate": 4.703777409110371e-07, "loss": 0.2454, "step": 8363 }, { "epoch": 0.53, "grad_norm": 1.2450631581739244, "learning_rate": 4.702746490239554e-07, "loss": 0.0479, "step": 8364 }, { "epoch": 0.53, "grad_norm": 1.6994199215400096, "learning_rate": 4.70171558405017e-07, "loss": 0.2139, "step": 8365 }, { "epoch": 0.53, "grad_norm": 0.9752995049467115, "learning_rate": 4.7006846905861995e-07, "loss": 0.3142, "step": 8366 }, { "epoch": 0.53, "grad_norm": 0.627713157969607, "learning_rate": 4.6996538098916205e-07, "loss": 0.294, "step": 8367 }, { "epoch": 0.53, "grad_norm": 0.41530161662137777, "learning_rate": 4.698622942010416e-07, "loss": 0.0899, "step": 8368 }, { "epoch": 0.53, "grad_norm": 0.5522485878580587, "learning_rate": 4.69759208698656e-07, "loss": 0.1956, "step": 8369 }, { "epoch": 0.53, "grad_norm": 1.5530429781071238, "learning_rate": 4.6965612448640354e-07, "loss": 0.1169, "step": 8370 }, { "epoch": 0.53, "grad_norm": 0.636913940183296, "learning_rate": 4.6955304156868165e-07, "loss": 0.0016, "step": 8371 }, { "epoch": 0.53, "grad_norm": 1.949578232853312, "learning_rate": 4.694499599498884e-07, "loss": 0.1755, "step": 8372 }, { "epoch": 0.53, "grad_norm": 0.6750312131157368, "learning_rate": 4.69346879634421e-07, "loss": 0.2282, "step": 8373 }, { "epoch": 0.53, "grad_norm": 4.723795459386764, "learning_rate": 4.692438006266775e-07, "loss": 0.0811, "step": 8374 }, { "epoch": 0.53, "grad_norm": 0.39597401424435147, "learning_rate": 4.6914072293105523e-07, "loss": 0.0521, "step": 8375 }, { "epoch": 0.53, "grad_norm": 0.5125918304620144, "learning_rate": 4.690376465519519e-07, "loss": 0.1816, "step": 8376 }, { "epoch": 0.53, "grad_norm": 0.15254802116096794, "learning_rate": 4.6893457149376464e-07, "loss": 0.0027, "step": 8377 }, { "epoch": 0.53, "grad_norm": 1.1792409719099959, "learning_rate": 4.688314977608912e-07, "loss": 0.2804, "step": 8378 }, { "epoch": 0.53, "grad_norm": 1.00172599190676, "learning_rate": 4.6872842535772874e-07, "loss": 0.2656, "step": 8379 }, { "epoch": 0.53, "grad_norm": 2.8731598878152553, "learning_rate": 4.686253542886743e-07, "loss": 0.132, "step": 8380 }, { "epoch": 0.53, "grad_norm": 1.0209030059932318, "learning_rate": 4.6852228455812535e-07, "loss": 0.0621, "step": 8381 }, { "epoch": 0.53, "grad_norm": 0.5910051250160278, "learning_rate": 4.684192161704791e-07, "loss": 0.3391, "step": 8382 }, { "epoch": 0.53, "grad_norm": 0.5875092722322819, "learning_rate": 4.683161491301326e-07, "loss": 0.3366, "step": 8383 }, { "epoch": 0.53, "grad_norm": 0.5832849105154081, "learning_rate": 4.6821308344148267e-07, "loss": 0.1486, "step": 8384 }, { "epoch": 0.53, "grad_norm": 0.2557154629724138, "learning_rate": 4.681100191089268e-07, "loss": 0.185, "step": 8385 }, { "epoch": 0.53, "grad_norm": 1.8808627732116225, "learning_rate": 4.6800695613686135e-07, "loss": 0.257, "step": 8386 }, { "epoch": 0.53, "grad_norm": 0.27462422795780117, "learning_rate": 4.679038945296837e-07, "loss": 0.1196, "step": 8387 }, { "epoch": 0.53, "grad_norm": 0.6125359784602092, "learning_rate": 4.6780083429179025e-07, "loss": 0.2423, "step": 8388 }, { "epoch": 0.53, "grad_norm": 0.38922478139869265, "learning_rate": 4.676977754275781e-07, "loss": 0.0041, "step": 8389 }, { "epoch": 0.54, "grad_norm": 0.8147472227342312, "learning_rate": 4.6759471794144354e-07, "loss": 0.2865, "step": 8390 }, { "epoch": 0.54, "grad_norm": 0.8615916354838382, "learning_rate": 4.6749166183778367e-07, "loss": 0.281, "step": 8391 }, { "epoch": 0.54, "grad_norm": 0.6019648112880962, "learning_rate": 4.673886071209948e-07, "loss": 0.0172, "step": 8392 }, { "epoch": 0.54, "grad_norm": 0.7827091748181172, "learning_rate": 4.6728555379547353e-07, "loss": 0.1966, "step": 8393 }, { "epoch": 0.54, "grad_norm": 0.40517344795530413, "learning_rate": 4.6718250186561637e-07, "loss": 0.1911, "step": 8394 }, { "epoch": 0.54, "grad_norm": 0.37609331752727443, "learning_rate": 4.6707945133581944e-07, "loss": 0.0497, "step": 8395 }, { "epoch": 0.54, "grad_norm": 0.8233073465419235, "learning_rate": 4.669764022104795e-07, "loss": 0.1986, "step": 8396 }, { "epoch": 0.54, "grad_norm": 0.7806807328458075, "learning_rate": 4.6687335449399246e-07, "loss": 0.223, "step": 8397 }, { "epoch": 0.54, "grad_norm": 0.9744470360724538, "learning_rate": 4.6677030819075494e-07, "loss": 0.1456, "step": 8398 }, { "epoch": 0.54, "grad_norm": 0.9128355771847734, "learning_rate": 4.666672633051625e-07, "loss": 0.3131, "step": 8399 }, { "epoch": 0.54, "grad_norm": 1.1988994555552688, "learning_rate": 4.6656421984161185e-07, "loss": 0.2688, "step": 8400 }, { "epoch": 0.54, "grad_norm": 0.7766678877976095, "learning_rate": 4.6646117780449875e-07, "loss": 0.2256, "step": 8401 }, { "epoch": 0.54, "grad_norm": 0.7061747127141389, "learning_rate": 4.663581371982194e-07, "loss": 0.1532, "step": 8402 }, { "epoch": 0.54, "grad_norm": 1.173840406685293, "learning_rate": 4.662550980271692e-07, "loss": 0.2169, "step": 8403 }, { "epoch": 0.54, "grad_norm": 0.6232501211624247, "learning_rate": 4.661520602957447e-07, "loss": 0.0222, "step": 8404 }, { "epoch": 0.54, "grad_norm": 1.51348777533686, "learning_rate": 4.6604902400834113e-07, "loss": 0.3304, "step": 8405 }, { "epoch": 0.54, "grad_norm": 0.5824220708737986, "learning_rate": 4.659459891693546e-07, "loss": 0.3077, "step": 8406 }, { "epoch": 0.54, "grad_norm": 0.5160647659160633, "learning_rate": 4.6584295578318055e-07, "loss": 0.1435, "step": 8407 }, { "epoch": 0.54, "grad_norm": 1.3249734204797299, "learning_rate": 4.657399238542147e-07, "loss": 0.1361, "step": 8408 }, { "epoch": 0.54, "grad_norm": 1.601539827843137, "learning_rate": 4.6563689338685253e-07, "loss": 0.2454, "step": 8409 }, { "epoch": 0.54, "grad_norm": 0.4928277164362475, "learning_rate": 4.6553386438548946e-07, "loss": 0.0144, "step": 8410 }, { "epoch": 0.54, "grad_norm": 1.1178534249758558, "learning_rate": 4.6543083685452127e-07, "loss": 0.4038, "step": 8411 }, { "epoch": 0.54, "grad_norm": 11.69181954222864, "learning_rate": 4.6532781079834284e-07, "loss": 0.1806, "step": 8412 }, { "epoch": 0.54, "grad_norm": 0.023451811323160213, "learning_rate": 4.652247862213499e-07, "loss": 0.0003, "step": 8413 }, { "epoch": 0.54, "grad_norm": 0.7853279716942542, "learning_rate": 4.6512176312793735e-07, "loss": 0.0061, "step": 8414 }, { "epoch": 0.54, "grad_norm": 0.6417682889599597, "learning_rate": 4.6501874152250064e-07, "loss": 0.1982, "step": 8415 }, { "epoch": 0.54, "grad_norm": 0.5193905532521556, "learning_rate": 4.6491572140943455e-07, "loss": 0.1116, "step": 8416 }, { "epoch": 0.54, "grad_norm": 2.7000523213032452, "learning_rate": 4.6481270279313433e-07, "loss": 0.304, "step": 8417 }, { "epoch": 0.54, "grad_norm": 1.8775827363867574, "learning_rate": 4.647096856779951e-07, "loss": 0.1525, "step": 8418 }, { "epoch": 0.54, "grad_norm": 1.257623169315283, "learning_rate": 4.646066700684115e-07, "loss": 0.0711, "step": 8419 }, { "epoch": 0.54, "grad_norm": 0.3113897164922417, "learning_rate": 4.6450365596877867e-07, "loss": 0.059, "step": 8420 }, { "epoch": 0.54, "grad_norm": 0.7721374090663957, "learning_rate": 4.6440064338349094e-07, "loss": 0.1587, "step": 8421 }, { "epoch": 0.54, "grad_norm": 1.2158504599664803, "learning_rate": 4.642976323169436e-07, "loss": 0.1179, "step": 8422 }, { "epoch": 0.54, "grad_norm": 1.3499208462900647, "learning_rate": 4.6419462277353083e-07, "loss": 0.2684, "step": 8423 }, { "epoch": 0.54, "grad_norm": 2.922887618861537, "learning_rate": 4.640916147576477e-07, "loss": 0.0658, "step": 8424 }, { "epoch": 0.54, "grad_norm": 0.5540538560114585, "learning_rate": 4.6398860827368814e-07, "loss": 0.1489, "step": 8425 }, { "epoch": 0.54, "grad_norm": 1.0657525259003775, "learning_rate": 4.638856033260471e-07, "loss": 0.2786, "step": 8426 }, { "epoch": 0.54, "grad_norm": 0.38324265983943734, "learning_rate": 4.6378259991911883e-07, "loss": 0.0047, "step": 8427 }, { "epoch": 0.54, "grad_norm": 2.9656010549995666, "learning_rate": 4.6367959805729765e-07, "loss": 0.226, "step": 8428 }, { "epoch": 0.54, "grad_norm": 9.998603730798484, "learning_rate": 4.635765977449777e-07, "loss": 0.094, "step": 8429 }, { "epoch": 0.54, "grad_norm": 0.548560762199273, "learning_rate": 4.634735989865536e-07, "loss": 0.0562, "step": 8430 }, { "epoch": 0.54, "grad_norm": 0.4566069762684165, "learning_rate": 4.633706017864189e-07, "loss": 0.114, "step": 8431 }, { "epoch": 0.54, "grad_norm": 0.8559033881437175, "learning_rate": 4.632676061489682e-07, "loss": 0.1003, "step": 8432 }, { "epoch": 0.54, "grad_norm": 0.8142044444980603, "learning_rate": 4.6316461207859505e-07, "loss": 0.1296, "step": 8433 }, { "epoch": 0.54, "grad_norm": 0.5067751983888892, "learning_rate": 4.630616195796938e-07, "loss": 0.0145, "step": 8434 }, { "epoch": 0.54, "grad_norm": 1.1498546862355845, "learning_rate": 4.62958628656658e-07, "loss": 0.3408, "step": 8435 }, { "epoch": 0.54, "grad_norm": 0.59984117843711, "learning_rate": 4.628556393138815e-07, "loss": 0.1739, "step": 8436 }, { "epoch": 0.54, "grad_norm": 1.4816842370205916, "learning_rate": 4.6275265155575824e-07, "loss": 0.0509, "step": 8437 }, { "epoch": 0.54, "grad_norm": 0.8461256816270033, "learning_rate": 4.626496653866816e-07, "loss": 0.3231, "step": 8438 }, { "epoch": 0.54, "grad_norm": 2.0689898407931318, "learning_rate": 4.6254668081104553e-07, "loss": 0.3156, "step": 8439 }, { "epoch": 0.54, "grad_norm": 1.154742449093417, "learning_rate": 4.624436978332431e-07, "loss": 0.2319, "step": 8440 }, { "epoch": 0.54, "grad_norm": 0.7100348437553234, "learning_rate": 4.6234071645766826e-07, "loss": 0.2873, "step": 8441 }, { "epoch": 0.54, "grad_norm": 0.5191890453423041, "learning_rate": 4.622377366887139e-07, "loss": 0.0466, "step": 8442 }, { "epoch": 0.54, "grad_norm": 0.8436845098887154, "learning_rate": 4.621347585307739e-07, "loss": 0.2187, "step": 8443 }, { "epoch": 0.54, "grad_norm": 0.4120815992148151, "learning_rate": 4.620317819882409e-07, "loss": 0.0053, "step": 8444 }, { "epoch": 0.54, "grad_norm": 0.656772350978441, "learning_rate": 4.619288070655086e-07, "loss": 0.1531, "step": 8445 }, { "epoch": 0.54, "grad_norm": 0.6586417685936933, "learning_rate": 4.618258337669698e-07, "loss": 0.239, "step": 8446 }, { "epoch": 0.54, "grad_norm": 1.5419891264394243, "learning_rate": 4.617228620970178e-07, "loss": 0.1489, "step": 8447 }, { "epoch": 0.54, "grad_norm": 0.8317930629719872, "learning_rate": 4.6161989206004554e-07, "loss": 0.2021, "step": 8448 }, { "epoch": 0.54, "grad_norm": 0.38792223643785884, "learning_rate": 4.615169236604456e-07, "loss": 0.0915, "step": 8449 }, { "epoch": 0.54, "grad_norm": 0.9317987377358307, "learning_rate": 4.614139569026113e-07, "loss": 0.1291, "step": 8450 }, { "epoch": 0.54, "grad_norm": 1.964238468234055, "learning_rate": 4.613109917909349e-07, "loss": 0.1983, "step": 8451 }, { "epoch": 0.54, "grad_norm": 10.504604358323638, "learning_rate": 4.6120802832980946e-07, "loss": 0.1325, "step": 8452 }, { "epoch": 0.54, "grad_norm": 1.193223609694503, "learning_rate": 4.611050665236276e-07, "loss": 0.5047, "step": 8453 }, { "epoch": 0.54, "grad_norm": 2.584229022981351, "learning_rate": 4.610021063767817e-07, "loss": 0.2107, "step": 8454 }, { "epoch": 0.54, "grad_norm": 0.7681234642862357, "learning_rate": 4.608991478936643e-07, "loss": 0.1975, "step": 8455 }, { "epoch": 0.54, "grad_norm": 0.9747994156063038, "learning_rate": 4.60796191078668e-07, "loss": 0.2907, "step": 8456 }, { "epoch": 0.54, "grad_norm": 0.6442174283300625, "learning_rate": 4.6069323593618487e-07, "loss": 0.1747, "step": 8457 }, { "epoch": 0.54, "grad_norm": 1.0394665216571475, "learning_rate": 4.605902824706074e-07, "loss": 0.2331, "step": 8458 }, { "epoch": 0.54, "grad_norm": 0.7345764613771602, "learning_rate": 4.6048733068632763e-07, "loss": 0.0262, "step": 8459 }, { "epoch": 0.54, "grad_norm": 1.364427066048286, "learning_rate": 4.6038438058773795e-07, "loss": 0.0444, "step": 8460 }, { "epoch": 0.54, "grad_norm": 0.7958588620663453, "learning_rate": 4.6028143217922994e-07, "loss": 0.1346, "step": 8461 }, { "epoch": 0.54, "grad_norm": 1.57771632222782, "learning_rate": 4.60178485465196e-07, "loss": 0.1572, "step": 8462 }, { "epoch": 0.54, "grad_norm": 0.4540274020577097, "learning_rate": 4.6007554045002807e-07, "loss": 0.2118, "step": 8463 }, { "epoch": 0.54, "grad_norm": 0.4574318972870368, "learning_rate": 4.599725971381176e-07, "loss": 0.0756, "step": 8464 }, { "epoch": 0.54, "grad_norm": 1.2494177439012157, "learning_rate": 4.598696555338569e-07, "loss": 0.0699, "step": 8465 }, { "epoch": 0.54, "grad_norm": 4.324517809174406, "learning_rate": 4.5976671564163703e-07, "loss": 0.1505, "step": 8466 }, { "epoch": 0.54, "grad_norm": 0.8563665290211504, "learning_rate": 4.5966377746585024e-07, "loss": 0.0799, "step": 8467 }, { "epoch": 0.54, "grad_norm": 1.307458669269294, "learning_rate": 4.5956084101088755e-07, "loss": 0.3482, "step": 8468 }, { "epoch": 0.54, "grad_norm": 1.8117454115871394, "learning_rate": 4.594579062811409e-07, "loss": 0.2052, "step": 8469 }, { "epoch": 0.54, "grad_norm": 22.86468592982254, "learning_rate": 4.593549732810012e-07, "loss": 0.2059, "step": 8470 }, { "epoch": 0.54, "grad_norm": 0.9233915420970702, "learning_rate": 4.592520420148602e-07, "loss": 0.1638, "step": 8471 }, { "epoch": 0.54, "grad_norm": 1.9841653192358117, "learning_rate": 4.591491124871089e-07, "loss": 0.2057, "step": 8472 }, { "epoch": 0.54, "grad_norm": 0.6514008796100588, "learning_rate": 4.590461847021388e-07, "loss": 0.1183, "step": 8473 }, { "epoch": 0.54, "grad_norm": 1.115142321159001, "learning_rate": 4.589432586643405e-07, "loss": 0.263, "step": 8474 }, { "epoch": 0.54, "grad_norm": 0.6404165934906061, "learning_rate": 4.5884033437810565e-07, "loss": 0.388, "step": 8475 }, { "epoch": 0.54, "grad_norm": 0.8905150331337415, "learning_rate": 4.5873741184782484e-07, "loss": 0.1492, "step": 8476 }, { "epoch": 0.54, "grad_norm": 0.3374940481431025, "learning_rate": 4.5863449107788877e-07, "loss": 0.0189, "step": 8477 }, { "epoch": 0.54, "grad_norm": 0.837287076567836, "learning_rate": 4.585315720726885e-07, "loss": 0.5004, "step": 8478 }, { "epoch": 0.54, "grad_norm": 4.45436173566265, "learning_rate": 4.584286548366148e-07, "loss": 0.0123, "step": 8479 }, { "epoch": 0.54, "grad_norm": 0.07608838150686013, "learning_rate": 4.583257393740583e-07, "loss": 0.0126, "step": 8480 }, { "epoch": 0.54, "grad_norm": 1.298241936440913, "learning_rate": 4.582228256894093e-07, "loss": 0.1864, "step": 8481 }, { "epoch": 0.54, "grad_norm": 1.9557106428726254, "learning_rate": 4.581199137870588e-07, "loss": 0.1747, "step": 8482 }, { "epoch": 0.54, "grad_norm": 0.4943259769523899, "learning_rate": 4.580170036713968e-07, "loss": 0.2299, "step": 8483 }, { "epoch": 0.54, "grad_norm": 8.991030265560546, "learning_rate": 4.57914095346814e-07, "loss": 0.1538, "step": 8484 }, { "epoch": 0.54, "grad_norm": 1.0107885142531865, "learning_rate": 4.578111888177003e-07, "loss": 0.3783, "step": 8485 }, { "epoch": 0.54, "grad_norm": 1.0740041697591793, "learning_rate": 4.577082840884463e-07, "loss": 0.211, "step": 8486 }, { "epoch": 0.54, "grad_norm": 0.6396555243855557, "learning_rate": 4.5760538116344174e-07, "loss": 0.2172, "step": 8487 }, { "epoch": 0.54, "grad_norm": 0.6631907456910964, "learning_rate": 4.57502480047077e-07, "loss": 0.264, "step": 8488 }, { "epoch": 0.54, "grad_norm": 1.1861297116872045, "learning_rate": 4.573995807437418e-07, "loss": 0.0859, "step": 8489 }, { "epoch": 0.54, "grad_norm": 12.233467100768586, "learning_rate": 4.57296683257826e-07, "loss": 0.2024, "step": 8490 }, { "epoch": 0.54, "grad_norm": 0.562863703698288, "learning_rate": 4.571937875937198e-07, "loss": 0.1103, "step": 8491 }, { "epoch": 0.54, "grad_norm": 1.3014767160948295, "learning_rate": 4.570908937558124e-07, "loss": 0.2501, "step": 8492 }, { "epoch": 0.54, "grad_norm": 1.3948229887777503, "learning_rate": 4.56988001748494e-07, "loss": 0.275, "step": 8493 }, { "epoch": 0.54, "grad_norm": 0.9564443585401297, "learning_rate": 4.568851115761537e-07, "loss": 0.368, "step": 8494 }, { "epoch": 0.54, "grad_norm": 0.29839740125089426, "learning_rate": 4.5678222324318137e-07, "loss": 0.1443, "step": 8495 }, { "epoch": 0.54, "grad_norm": 0.6743428581848108, "learning_rate": 4.5667933675396606e-07, "loss": 0.2141, "step": 8496 }, { "epoch": 0.54, "grad_norm": 0.7459111072513056, "learning_rate": 4.5657645211289744e-07, "loss": 0.1226, "step": 8497 }, { "epoch": 0.54, "grad_norm": 0.7824845827003406, "learning_rate": 4.564735693243647e-07, "loss": 0.1287, "step": 8498 }, { "epoch": 0.54, "grad_norm": 1.4834984827118423, "learning_rate": 4.56370688392757e-07, "loss": 0.1547, "step": 8499 }, { "epoch": 0.54, "grad_norm": 1.0938361165736732, "learning_rate": 4.562678093224633e-07, "loss": 0.3256, "step": 8500 }, { "epoch": 0.54, "grad_norm": 29.55789695253735, "learning_rate": 4.5616493211787297e-07, "loss": 0.2399, "step": 8501 }, { "epoch": 0.54, "grad_norm": 0.6838882822874129, "learning_rate": 4.560620567833746e-07, "loss": 0.076, "step": 8502 }, { "epoch": 0.54, "grad_norm": 0.24447589420637153, "learning_rate": 4.5595918332335744e-07, "loss": 0.069, "step": 8503 }, { "epoch": 0.54, "grad_norm": 0.7864297919528537, "learning_rate": 4.5585631174220994e-07, "loss": 0.2658, "step": 8504 }, { "epoch": 0.54, "grad_norm": 2.3257562561475966, "learning_rate": 4.557534420443208e-07, "loss": 0.0941, "step": 8505 }, { "epoch": 0.54, "grad_norm": 1.9571794965312268, "learning_rate": 4.556505742340789e-07, "loss": 0.0663, "step": 8506 }, { "epoch": 0.54, "grad_norm": 0.7530598028222795, "learning_rate": 4.555477083158725e-07, "loss": 0.3032, "step": 8507 }, { "epoch": 0.54, "grad_norm": 5.675894191460296, "learning_rate": 4.554448442940904e-07, "loss": 0.124, "step": 8508 }, { "epoch": 0.54, "grad_norm": 0.5352661522361948, "learning_rate": 4.5534198217312066e-07, "loss": 0.2388, "step": 8509 }, { "epoch": 0.54, "grad_norm": 1.1325929422909422, "learning_rate": 4.552391219573519e-07, "loss": 0.2196, "step": 8510 }, { "epoch": 0.54, "grad_norm": 1.1595632651429615, "learning_rate": 4.5513626365117196e-07, "loss": 0.1096, "step": 8511 }, { "epoch": 0.54, "grad_norm": 0.935187183481488, "learning_rate": 4.5503340725896946e-07, "loss": 0.2327, "step": 8512 }, { "epoch": 0.54, "grad_norm": 0.6270097308649183, "learning_rate": 4.5493055278513194e-07, "loss": 0.1127, "step": 8513 }, { "epoch": 0.54, "grad_norm": 0.2333554916025773, "learning_rate": 4.548277002340479e-07, "loss": 0.0802, "step": 8514 }, { "epoch": 0.54, "grad_norm": 0.3078384653044468, "learning_rate": 4.547248496101047e-07, "loss": 0.0836, "step": 8515 }, { "epoch": 0.54, "grad_norm": 0.8529922203258207, "learning_rate": 4.546220009176906e-07, "loss": 0.2505, "step": 8516 }, { "epoch": 0.54, "grad_norm": 0.5147821592586467, "learning_rate": 4.5451915416119325e-07, "loss": 0.1138, "step": 8517 }, { "epoch": 0.54, "grad_norm": 0.7878131179164628, "learning_rate": 4.54416309345e-07, "loss": 0.0269, "step": 8518 }, { "epoch": 0.54, "grad_norm": 14.107587381449914, "learning_rate": 4.5431346647349885e-07, "loss": 0.0914, "step": 8519 }, { "epoch": 0.54, "grad_norm": 3.3688615720946276, "learning_rate": 4.542106255510768e-07, "loss": 0.3, "step": 8520 }, { "epoch": 0.54, "grad_norm": 0.9529514095880248, "learning_rate": 4.541077865821218e-07, "loss": 0.3298, "step": 8521 }, { "epoch": 0.54, "grad_norm": 0.51656489930236, "learning_rate": 4.540049495710206e-07, "loss": 0.3581, "step": 8522 }, { "epoch": 0.54, "grad_norm": 1.0633316442621703, "learning_rate": 4.5390211452216087e-07, "loss": 0.2921, "step": 8523 }, { "epoch": 0.54, "grad_norm": 0.6073632241863104, "learning_rate": 4.537992814399296e-07, "loss": 0.0691, "step": 8524 }, { "epoch": 0.54, "grad_norm": 1.0659607195223204, "learning_rate": 4.5369645032871377e-07, "loss": 0.0703, "step": 8525 }, { "epoch": 0.54, "grad_norm": 0.9100748578867685, "learning_rate": 4.5359362119290046e-07, "loss": 0.2182, "step": 8526 }, { "epoch": 0.54, "grad_norm": 0.5451664741831638, "learning_rate": 4.534907940368767e-07, "loss": 0.1903, "step": 8527 }, { "epoch": 0.54, "grad_norm": 1.2983460906523192, "learning_rate": 4.5338796886502896e-07, "loss": 0.2098, "step": 8528 }, { "epoch": 0.54, "grad_norm": 0.9268620866095381, "learning_rate": 4.5328514568174444e-07, "loss": 0.1282, "step": 8529 }, { "epoch": 0.54, "grad_norm": 0.6468324519500694, "learning_rate": 4.531823244914094e-07, "loss": 0.2481, "step": 8530 }, { "epoch": 0.54, "grad_norm": 5.875258197524188, "learning_rate": 4.530795052984104e-07, "loss": 0.1138, "step": 8531 }, { "epoch": 0.54, "grad_norm": 0.6116136267177559, "learning_rate": 4.529766881071341e-07, "loss": 0.0849, "step": 8532 }, { "epoch": 0.54, "grad_norm": 0.8706325022627655, "learning_rate": 4.528738729219667e-07, "loss": 0.3316, "step": 8533 }, { "epoch": 0.54, "grad_norm": 0.477585535082431, "learning_rate": 4.5277105974729484e-07, "loss": 0.1506, "step": 8534 }, { "epoch": 0.54, "grad_norm": 1.119045564193954, "learning_rate": 4.526682485875043e-07, "loss": 0.469, "step": 8535 }, { "epoch": 0.54, "grad_norm": 2.147142349762046, "learning_rate": 4.525654394469816e-07, "loss": 0.135, "step": 8536 }, { "epoch": 0.54, "grad_norm": 0.8146832885122889, "learning_rate": 4.5246263233011244e-07, "loss": 0.1504, "step": 8537 }, { "epoch": 0.54, "grad_norm": 0.4546010503383642, "learning_rate": 4.523598272412832e-07, "loss": 0.1102, "step": 8538 }, { "epoch": 0.54, "grad_norm": 1.6218698456156957, "learning_rate": 4.522570241848792e-07, "loss": 0.1513, "step": 8539 }, { "epoch": 0.54, "grad_norm": 1.5445552367923363, "learning_rate": 4.521542231652868e-07, "loss": 0.2466, "step": 8540 }, { "epoch": 0.54, "grad_norm": 0.5778055370867833, "learning_rate": 4.520514241868912e-07, "loss": 0.1345, "step": 8541 }, { "epoch": 0.54, "grad_norm": 0.7511853910031592, "learning_rate": 4.519486272540783e-07, "loss": 0.1383, "step": 8542 }, { "epoch": 0.54, "grad_norm": 0.3388616325480788, "learning_rate": 4.5184583237123356e-07, "loss": 0.1855, "step": 8543 }, { "epoch": 0.54, "grad_norm": 0.7383229291815947, "learning_rate": 4.517430395427424e-07, "loss": 0.2364, "step": 8544 }, { "epoch": 0.54, "grad_norm": 0.8153944375237804, "learning_rate": 4.5164024877299034e-07, "loss": 0.2166, "step": 8545 }, { "epoch": 0.54, "grad_norm": 1.2759299075411263, "learning_rate": 4.515374600663623e-07, "loss": 0.3461, "step": 8546 }, { "epoch": 0.55, "grad_norm": 1.6106474779064373, "learning_rate": 4.514346734272437e-07, "loss": 0.2649, "step": 8547 }, { "epoch": 0.55, "grad_norm": 0.8904849809586077, "learning_rate": 4.5133188886001947e-07, "loss": 0.2382, "step": 8548 }, { "epoch": 0.55, "grad_norm": 1.6980647809570883, "learning_rate": 4.512291063690749e-07, "loss": 0.2302, "step": 8549 }, { "epoch": 0.55, "grad_norm": 1.1361973622331778, "learning_rate": 4.5112632595879444e-07, "loss": 0.2255, "step": 8550 }, { "epoch": 0.55, "grad_norm": 1.380638977705818, "learning_rate": 4.510235476335633e-07, "loss": 0.127, "step": 8551 }, { "epoch": 0.55, "grad_norm": 14.98530149233771, "learning_rate": 4.5092077139776594e-07, "loss": 0.1807, "step": 8552 }, { "epoch": 0.55, "grad_norm": 0.8875602497822686, "learning_rate": 4.508179972557874e-07, "loss": 0.3445, "step": 8553 }, { "epoch": 0.55, "grad_norm": 4.0839262924190445, "learning_rate": 4.507152252120117e-07, "loss": 0.1327, "step": 8554 }, { "epoch": 0.55, "grad_norm": 0.6907390655670341, "learning_rate": 4.506124552708238e-07, "loss": 0.1258, "step": 8555 }, { "epoch": 0.55, "grad_norm": 0.7974425763261175, "learning_rate": 4.505096874366077e-07, "loss": 0.0369, "step": 8556 }, { "epoch": 0.55, "grad_norm": 0.45529218864764853, "learning_rate": 4.5040692171374794e-07, "loss": 0.1797, "step": 8557 }, { "epoch": 0.55, "grad_norm": 0.794674730828558, "learning_rate": 4.503041581066285e-07, "loss": 0.1448, "step": 8558 }, { "epoch": 0.55, "grad_norm": 1.3580680448302769, "learning_rate": 4.502013966196336e-07, "loss": 0.1913, "step": 8559 }, { "epoch": 0.55, "grad_norm": 2.8710498541525413, "learning_rate": 4.500986372571472e-07, "loss": 0.3488, "step": 8560 }, { "epoch": 0.55, "grad_norm": 2.1796187287502056, "learning_rate": 4.4999588002355314e-07, "loss": 0.4305, "step": 8561 }, { "epoch": 0.55, "grad_norm": 0.5076527385948689, "learning_rate": 4.498931249232356e-07, "loss": 0.1952, "step": 8562 }, { "epoch": 0.55, "grad_norm": 1.1039826500375853, "learning_rate": 4.4979037196057785e-07, "loss": 0.2341, "step": 8563 }, { "epoch": 0.55, "grad_norm": 0.3838415985195842, "learning_rate": 4.49687621139964e-07, "loss": 0.1882, "step": 8564 }, { "epoch": 0.55, "grad_norm": 0.7075439609216445, "learning_rate": 4.4958487246577714e-07, "loss": 0.2253, "step": 8565 }, { "epoch": 0.55, "grad_norm": 1.8971257055841362, "learning_rate": 4.4948212594240113e-07, "loss": 0.0992, "step": 8566 }, { "epoch": 0.55, "grad_norm": 1.3838648711265849, "learning_rate": 4.493793815742191e-07, "loss": 0.0273, "step": 8567 }, { "epoch": 0.55, "grad_norm": 0.6225953300300522, "learning_rate": 4.492766393656144e-07, "loss": 0.3109, "step": 8568 }, { "epoch": 0.55, "grad_norm": 0.7097647397292058, "learning_rate": 4.4917389932097027e-07, "loss": 0.1853, "step": 8569 }, { "epoch": 0.55, "grad_norm": 0.5155582573603247, "learning_rate": 4.4907116144466976e-07, "loss": 0.0226, "step": 8570 }, { "epoch": 0.55, "grad_norm": 1.1948258985476277, "learning_rate": 4.489684257410958e-07, "loss": 0.0056, "step": 8571 }, { "epoch": 0.55, "grad_norm": 0.28535405179376255, "learning_rate": 4.4886569221463154e-07, "loss": 0.1475, "step": 8572 }, { "epoch": 0.55, "grad_norm": 5.021488884601238, "learning_rate": 4.487629608696597e-07, "loss": 0.273, "step": 8573 }, { "epoch": 0.55, "grad_norm": 0.9495205928322551, "learning_rate": 4.4866023171056274e-07, "loss": 0.1342, "step": 8574 }, { "epoch": 0.55, "grad_norm": 0.4904094764027969, "learning_rate": 4.485575047417237e-07, "loss": 0.1998, "step": 8575 }, { "epoch": 0.55, "grad_norm": 2.02893706203771, "learning_rate": 4.484547799675247e-07, "loss": 0.1373, "step": 8576 }, { "epoch": 0.55, "grad_norm": 0.26415931682304167, "learning_rate": 4.483520573923485e-07, "loss": 0.165, "step": 8577 }, { "epoch": 0.55, "grad_norm": 0.11699404437693192, "learning_rate": 4.4824933702057727e-07, "loss": 0.0034, "step": 8578 }, { "epoch": 0.55, "grad_norm": 0.9777991146970927, "learning_rate": 4.4814661885659353e-07, "loss": 0.2992, "step": 8579 }, { "epoch": 0.55, "grad_norm": 0.6497567332125923, "learning_rate": 4.480439029047791e-07, "loss": 0.3313, "step": 8580 }, { "epoch": 0.55, "grad_norm": 0.7355528914886902, "learning_rate": 4.4794118916951644e-07, "loss": 0.3799, "step": 8581 }, { "epoch": 0.55, "grad_norm": 0.8520051342515318, "learning_rate": 4.478384776551871e-07, "loss": 0.1371, "step": 8582 }, { "epoch": 0.55, "grad_norm": 1.2076017304051176, "learning_rate": 4.477357683661733e-07, "loss": 0.2908, "step": 8583 }, { "epoch": 0.55, "grad_norm": 0.7971270527375104, "learning_rate": 4.476330613068565e-07, "loss": 0.1446, "step": 8584 }, { "epoch": 0.55, "grad_norm": 0.8498952281979963, "learning_rate": 4.4753035648161886e-07, "loss": 0.3229, "step": 8585 }, { "epoch": 0.55, "grad_norm": 1.084276383353148, "learning_rate": 4.474276538948415e-07, "loss": 0.128, "step": 8586 }, { "epoch": 0.55, "grad_norm": 0.779160114721411, "learning_rate": 4.4732495355090597e-07, "loss": 0.228, "step": 8587 }, { "epoch": 0.55, "grad_norm": 0.4506411749383888, "learning_rate": 4.4722225545419413e-07, "loss": 0.0505, "step": 8588 }, { "epoch": 0.55, "grad_norm": 1.1525618354432954, "learning_rate": 4.471195596090867e-07, "loss": 0.2949, "step": 8589 }, { "epoch": 0.55, "grad_norm": 1.0802060735415184, "learning_rate": 4.470168660199654e-07, "loss": 0.1355, "step": 8590 }, { "epoch": 0.55, "grad_norm": 0.5992702188904998, "learning_rate": 4.4691417469121083e-07, "loss": 0.2294, "step": 8591 }, { "epoch": 0.55, "grad_norm": 0.5559914605425803, "learning_rate": 4.4681148562720455e-07, "loss": 0.3393, "step": 8592 }, { "epoch": 0.55, "grad_norm": 11.390465442380409, "learning_rate": 4.4670879883232696e-07, "loss": 0.2481, "step": 8593 }, { "epoch": 0.55, "grad_norm": 0.8133634865613211, "learning_rate": 4.466061143109593e-07, "loss": 0.174, "step": 8594 }, { "epoch": 0.55, "grad_norm": 1.3427987143616134, "learning_rate": 4.4650343206748215e-07, "loss": 0.1598, "step": 8595 }, { "epoch": 0.55, "grad_norm": 0.5185805257579341, "learning_rate": 4.464007521062761e-07, "loss": 0.0469, "step": 8596 }, { "epoch": 0.55, "grad_norm": 0.6905742917442251, "learning_rate": 4.462980744317216e-07, "loss": 0.1262, "step": 8597 }, { "epoch": 0.55, "grad_norm": 1.0559210717523206, "learning_rate": 4.461953990481994e-07, "loss": 0.2988, "step": 8598 }, { "epoch": 0.55, "grad_norm": 1.0591882872068195, "learning_rate": 4.460927259600897e-07, "loss": 0.3202, "step": 8599 }, { "epoch": 0.55, "grad_norm": 0.8848992748142672, "learning_rate": 4.459900551717723e-07, "loss": 0.4407, "step": 8600 }, { "epoch": 0.55, "grad_norm": 2.730913811847409, "learning_rate": 4.4588738668762815e-07, "loss": 0.1821, "step": 8601 }, { "epoch": 0.55, "grad_norm": 4.541553882484758, "learning_rate": 4.4578472051203653e-07, "loss": 0.2061, "step": 8602 }, { "epoch": 0.55, "grad_norm": 1.8020017672337625, "learning_rate": 4.4568205664937786e-07, "loss": 0.3005, "step": 8603 }, { "epoch": 0.55, "grad_norm": 1.2069163581798041, "learning_rate": 4.455793951040318e-07, "loss": 0.2515, "step": 8604 }, { "epoch": 0.55, "grad_norm": 13.947038719264075, "learning_rate": 4.454767358803782e-07, "loss": 0.1206, "step": 8605 }, { "epoch": 0.55, "grad_norm": 0.2260616838684422, "learning_rate": 4.453740789827965e-07, "loss": 0.1138, "step": 8606 }, { "epoch": 0.55, "grad_norm": 0.9046499230171707, "learning_rate": 4.452714244156667e-07, "loss": 0.097, "step": 8607 }, { "epoch": 0.55, "grad_norm": 4.356366811719842, "learning_rate": 4.451687721833676e-07, "loss": 0.2497, "step": 8608 }, { "epoch": 0.55, "grad_norm": 0.8806676060896952, "learning_rate": 4.450661222902792e-07, "loss": 0.2198, "step": 8609 }, { "epoch": 0.55, "grad_norm": 8.340212205460144, "learning_rate": 4.4496347474078027e-07, "loss": 0.2238, "step": 8610 }, { "epoch": 0.55, "grad_norm": 1.5907989412480348, "learning_rate": 4.448608295392503e-07, "loss": 0.3913, "step": 8611 }, { "epoch": 0.55, "grad_norm": 0.33591879836840965, "learning_rate": 4.4475818669006806e-07, "loss": 0.1539, "step": 8612 }, { "epoch": 0.55, "grad_norm": 1.481818672318781, "learning_rate": 4.4465554619761263e-07, "loss": 0.1277, "step": 8613 }, { "epoch": 0.55, "grad_norm": 3.0167631925411578, "learning_rate": 4.44552908066263e-07, "loss": 0.1213, "step": 8614 }, { "epoch": 0.55, "grad_norm": 0.6183022367609539, "learning_rate": 4.4445027230039755e-07, "loss": 0.1888, "step": 8615 }, { "epoch": 0.55, "grad_norm": 0.6780775402626844, "learning_rate": 4.4434763890439543e-07, "loss": 0.2843, "step": 8616 }, { "epoch": 0.55, "grad_norm": 16.540078901841717, "learning_rate": 4.442450078826346e-07, "loss": 0.1059, "step": 8617 }, { "epoch": 0.55, "grad_norm": 3.022785206750274, "learning_rate": 4.4414237923949407e-07, "loss": 0.3227, "step": 8618 }, { "epoch": 0.55, "grad_norm": 2.721530692035682, "learning_rate": 4.440397529793517e-07, "loss": 0.0032, "step": 8619 }, { "epoch": 0.55, "grad_norm": 0.7044104800798273, "learning_rate": 4.439371291065862e-07, "loss": 0.3409, "step": 8620 }, { "epoch": 0.55, "grad_norm": 0.5552772627363022, "learning_rate": 4.438345076255753e-07, "loss": 0.1829, "step": 8621 }, { "epoch": 0.55, "grad_norm": 0.17930898073871132, "learning_rate": 4.4373188854069727e-07, "loss": 0.0717, "step": 8622 }, { "epoch": 0.55, "grad_norm": 0.5092543984358266, "learning_rate": 4.436292718563299e-07, "loss": 0.0758, "step": 8623 }, { "epoch": 0.55, "grad_norm": 1.8913321612001055, "learning_rate": 4.4352665757685125e-07, "loss": 0.3083, "step": 8624 }, { "epoch": 0.55, "grad_norm": 1.119974980136262, "learning_rate": 4.4342404570663876e-07, "loss": 0.2503, "step": 8625 }, { "epoch": 0.55, "grad_norm": 0.439870508423816, "learning_rate": 4.4332143625007043e-07, "loss": 0.1707, "step": 8626 }, { "epoch": 0.55, "grad_norm": 11.48170564663966, "learning_rate": 4.432188292115236e-07, "loss": 0.1542, "step": 8627 }, { "epoch": 0.55, "grad_norm": 1.449310572381116, "learning_rate": 4.431162245953754e-07, "loss": 0.0982, "step": 8628 }, { "epoch": 0.55, "grad_norm": 1.2178805021997257, "learning_rate": 4.430136224060035e-07, "loss": 0.1398, "step": 8629 }, { "epoch": 0.55, "grad_norm": 1.7279167315476547, "learning_rate": 4.4291102264778506e-07, "loss": 0.157, "step": 8630 }, { "epoch": 0.55, "grad_norm": 1.093104599073745, "learning_rate": 4.4280842532509716e-07, "loss": 0.2506, "step": 8631 }, { "epoch": 0.55, "grad_norm": 0.7498838727386716, "learning_rate": 4.4270583044231667e-07, "loss": 0.1681, "step": 8632 }, { "epoch": 0.55, "grad_norm": 1.916312673192367, "learning_rate": 4.4260323800382086e-07, "loss": 0.0876, "step": 8633 }, { "epoch": 0.55, "grad_norm": 1.73006049574467, "learning_rate": 4.4250064801398607e-07, "loss": 0.0755, "step": 8634 }, { "epoch": 0.55, "grad_norm": 0.47755745843624, "learning_rate": 4.4239806047718945e-07, "loss": 0.1912, "step": 8635 }, { "epoch": 0.55, "grad_norm": 0.3343599388855688, "learning_rate": 4.4229547539780715e-07, "loss": 0.1178, "step": 8636 }, { "epoch": 0.55, "grad_norm": 0.40355819689314826, "learning_rate": 4.421928927802161e-07, "loss": 0.1157, "step": 8637 }, { "epoch": 0.55, "grad_norm": 0.17095910480109017, "learning_rate": 4.420903126287922e-07, "loss": 0.0202, "step": 8638 }, { "epoch": 0.55, "grad_norm": 1.5340901959281243, "learning_rate": 4.419877349479121e-07, "loss": 0.3271, "step": 8639 }, { "epoch": 0.55, "grad_norm": 1.0797816216778933, "learning_rate": 4.41885159741952e-07, "loss": 0.2782, "step": 8640 }, { "epoch": 0.55, "grad_norm": 1.5183601064212755, "learning_rate": 4.4178258701528756e-07, "loss": 0.1911, "step": 8641 }, { "epoch": 0.55, "grad_norm": 4.65818756265992, "learning_rate": 4.4168001677229523e-07, "loss": 0.1366, "step": 8642 }, { "epoch": 0.55, "grad_norm": 0.9687773782908738, "learning_rate": 4.4157744901735033e-07, "loss": 0.1931, "step": 8643 }, { "epoch": 0.55, "grad_norm": 0.6543510595751106, "learning_rate": 4.4147488375482915e-07, "loss": 0.1133, "step": 8644 }, { "epoch": 0.55, "grad_norm": 1.1118765521249256, "learning_rate": 4.4137232098910693e-07, "loss": 0.3299, "step": 8645 }, { "epoch": 0.55, "grad_norm": 0.24685852565012192, "learning_rate": 4.4126976072455946e-07, "loss": 0.1877, "step": 8646 }, { "epoch": 0.55, "grad_norm": 0.2573644076145768, "learning_rate": 4.411672029655619e-07, "loss": 0.1783, "step": 8647 }, { "epoch": 0.55, "grad_norm": 12.250444386789134, "learning_rate": 4.410646477164899e-07, "loss": 0.3069, "step": 8648 }, { "epoch": 0.55, "grad_norm": 1.1789709245803754, "learning_rate": 4.4096209498171825e-07, "loss": 0.4047, "step": 8649 }, { "epoch": 0.55, "grad_norm": 1.1773938457912942, "learning_rate": 4.4085954476562263e-07, "loss": 0.3411, "step": 8650 }, { "epoch": 0.55, "grad_norm": 15.609434498904355, "learning_rate": 4.4075699707257754e-07, "loss": 0.2038, "step": 8651 }, { "epoch": 0.55, "grad_norm": 0.7033537492918164, "learning_rate": 4.4065445190695816e-07, "loss": 0.2254, "step": 8652 }, { "epoch": 0.55, "grad_norm": 8.534189575717024, "learning_rate": 4.40551909273139e-07, "loss": 0.36, "step": 8653 }, { "epoch": 0.55, "grad_norm": 0.42367573297646766, "learning_rate": 4.4044936917549505e-07, "loss": 0.1442, "step": 8654 }, { "epoch": 0.55, "grad_norm": 0.1640803618953181, "learning_rate": 4.403468316184007e-07, "loss": 0.0206, "step": 8655 }, { "epoch": 0.55, "grad_norm": 0.35458751937588334, "learning_rate": 4.402442966062304e-07, "loss": 0.0076, "step": 8656 }, { "epoch": 0.55, "grad_norm": 1.5175948670631023, "learning_rate": 4.4014176414335854e-07, "loss": 0.1836, "step": 8657 }, { "epoch": 0.55, "grad_norm": 0.6565120566952919, "learning_rate": 4.4003923423415923e-07, "loss": 0.1064, "step": 8658 }, { "epoch": 0.55, "grad_norm": 1.5703494333846566, "learning_rate": 4.3993670688300694e-07, "loss": 0.3563, "step": 8659 }, { "epoch": 0.55, "grad_norm": 1.451116982674173, "learning_rate": 4.3983418209427526e-07, "loss": 0.1283, "step": 8660 }, { "epoch": 0.55, "grad_norm": 0.7066977888382018, "learning_rate": 4.397316598723385e-07, "loss": 0.1661, "step": 8661 }, { "epoch": 0.55, "grad_norm": 0.861787167085259, "learning_rate": 4.3962914022157014e-07, "loss": 0.222, "step": 8662 }, { "epoch": 0.55, "grad_norm": 0.5970506627413962, "learning_rate": 4.395266231463442e-07, "loss": 0.2229, "step": 8663 }, { "epoch": 0.55, "grad_norm": 3.1996669243248315, "learning_rate": 4.3942410865103384e-07, "loss": 0.1183, "step": 8664 }, { "epoch": 0.55, "grad_norm": 0.6991684169332326, "learning_rate": 4.3932159674001305e-07, "loss": 0.0926, "step": 8665 }, { "epoch": 0.55, "grad_norm": 0.8945252113205767, "learning_rate": 4.392190874176547e-07, "loss": 0.1876, "step": 8666 }, { "epoch": 0.55, "grad_norm": 12.472981795237423, "learning_rate": 4.3911658068833234e-07, "loss": 0.2331, "step": 8667 }, { "epoch": 0.55, "grad_norm": 0.5292525294377366, "learning_rate": 4.3901407655641915e-07, "loss": 0.3686, "step": 8668 }, { "epoch": 0.55, "grad_norm": 0.6746245898447345, "learning_rate": 4.389115750262878e-07, "loss": 0.0777, "step": 8669 }, { "epoch": 0.55, "grad_norm": 1.9496036799051835, "learning_rate": 4.3880907610231177e-07, "loss": 0.0932, "step": 8670 }, { "epoch": 0.55, "grad_norm": 1.1429990801985839, "learning_rate": 4.387065797888633e-07, "loss": 0.3681, "step": 8671 }, { "epoch": 0.55, "grad_norm": 0.5704570566856747, "learning_rate": 4.386040860903156e-07, "loss": 0.2151, "step": 8672 }, { "epoch": 0.55, "grad_norm": 40.21932685715568, "learning_rate": 4.3850159501104073e-07, "loss": 0.2563, "step": 8673 }, { "epoch": 0.55, "grad_norm": 8.817384533150433, "learning_rate": 4.3839910655541157e-07, "loss": 0.0577, "step": 8674 }, { "epoch": 0.55, "grad_norm": 0.46772570645501416, "learning_rate": 4.382966207278004e-07, "loss": 0.1736, "step": 8675 }, { "epoch": 0.55, "grad_norm": 0.24529039893710539, "learning_rate": 4.381941375325794e-07, "loss": 0.1944, "step": 8676 }, { "epoch": 0.55, "grad_norm": 0.8053000301213352, "learning_rate": 4.380916569741206e-07, "loss": 0.2553, "step": 8677 }, { "epoch": 0.55, "grad_norm": 1.2459487746508058, "learning_rate": 4.379891790567964e-07, "loss": 0.2528, "step": 8678 }, { "epoch": 0.55, "grad_norm": 0.5786673703935769, "learning_rate": 4.3788670378497827e-07, "loss": 0.0103, "step": 8679 }, { "epoch": 0.55, "grad_norm": 0.6131249040544321, "learning_rate": 4.377842311630385e-07, "loss": 0.1538, "step": 8680 }, { "epoch": 0.55, "grad_norm": 0.43975674968747863, "learning_rate": 4.3768176119534816e-07, "loss": 0.216, "step": 8681 }, { "epoch": 0.55, "grad_norm": 0.9238939584516679, "learning_rate": 4.3757929388627943e-07, "loss": 0.1618, "step": 8682 }, { "epoch": 0.55, "grad_norm": 0.5606365831651372, "learning_rate": 4.3747682924020346e-07, "loss": 0.275, "step": 8683 }, { "epoch": 0.55, "grad_norm": 0.2871099484383887, "learning_rate": 4.373743672614915e-07, "loss": 0.2205, "step": 8684 }, { "epoch": 0.55, "grad_norm": 1.2847660107718668, "learning_rate": 4.3727190795451506e-07, "loss": 0.2713, "step": 8685 }, { "epoch": 0.55, "grad_norm": 1.6266697475905256, "learning_rate": 4.3716945132364495e-07, "loss": 0.0137, "step": 8686 }, { "epoch": 0.55, "grad_norm": 0.09043999399511815, "learning_rate": 4.3706699737325263e-07, "loss": 0.0024, "step": 8687 }, { "epoch": 0.55, "grad_norm": 0.773539277771456, "learning_rate": 4.3696454610770843e-07, "loss": 0.1742, "step": 8688 }, { "epoch": 0.55, "grad_norm": 0.3623365393024069, "learning_rate": 4.368620975313837e-07, "loss": 0.0477, "step": 8689 }, { "epoch": 0.55, "grad_norm": 0.7092943270773006, "learning_rate": 4.367596516486485e-07, "loss": 0.2816, "step": 8690 }, { "epoch": 0.55, "grad_norm": 0.42632053701819134, "learning_rate": 4.3665720846387397e-07, "loss": 0.0572, "step": 8691 }, { "epoch": 0.55, "grad_norm": 1.2135248255203528, "learning_rate": 4.3655476798143003e-07, "loss": 0.419, "step": 8692 }, { "epoch": 0.55, "grad_norm": 0.8067930471012168, "learning_rate": 4.3645233020568734e-07, "loss": 0.1214, "step": 8693 }, { "epoch": 0.55, "grad_norm": 0.5990983382871302, "learning_rate": 4.363498951410159e-07, "loss": 0.1371, "step": 8694 }, { "epoch": 0.55, "grad_norm": 0.8936630579336159, "learning_rate": 4.3624746279178603e-07, "loss": 0.1692, "step": 8695 }, { "epoch": 0.55, "grad_norm": 0.5441647656795372, "learning_rate": 4.361450331623676e-07, "loss": 0.2733, "step": 8696 }, { "epoch": 0.55, "grad_norm": 0.6118546178471441, "learning_rate": 4.3604260625713025e-07, "loss": 0.3282, "step": 8697 }, { "epoch": 0.55, "grad_norm": 0.39873167931542874, "learning_rate": 4.359401820804441e-07, "loss": 0.3149, "step": 8698 }, { "epoch": 0.55, "grad_norm": 0.37182408010870244, "learning_rate": 4.358377606366783e-07, "loss": 0.0987, "step": 8699 }, { "epoch": 0.55, "grad_norm": 0.543440377640531, "learning_rate": 4.3573534193020274e-07, "loss": 0.0189, "step": 8700 }, { "epoch": 0.55, "grad_norm": 0.7683463471750335, "learning_rate": 4.3563292596538665e-07, "loss": 0.1435, "step": 8701 }, { "epoch": 0.55, "grad_norm": 1.063278332037879, "learning_rate": 4.3553051274659934e-07, "loss": 0.1295, "step": 8702 }, { "epoch": 0.56, "grad_norm": 1.254262210336867, "learning_rate": 4.3542810227820985e-07, "loss": 0.3357, "step": 8703 }, { "epoch": 0.56, "grad_norm": 0.481368459596217, "learning_rate": 4.3532569456458755e-07, "loss": 0.3109, "step": 8704 }, { "epoch": 0.56, "grad_norm": 1.2236216102368147, "learning_rate": 4.352232896101009e-07, "loss": 0.1365, "step": 8705 }, { "epoch": 0.56, "grad_norm": 14.940685031864735, "learning_rate": 4.351208874191192e-07, "loss": 0.2594, "step": 8706 }, { "epoch": 0.56, "grad_norm": 0.5902382366757902, "learning_rate": 4.350184879960106e-07, "loss": 0.0927, "step": 8707 }, { "epoch": 0.56, "grad_norm": 4.597275120931913, "learning_rate": 4.3491609134514416e-07, "loss": 0.0874, "step": 8708 }, { "epoch": 0.56, "grad_norm": 0.5006277786000783, "learning_rate": 4.3481369747088804e-07, "loss": 0.2512, "step": 8709 }, { "epoch": 0.56, "grad_norm": 1.914182112445155, "learning_rate": 4.3471130637761044e-07, "loss": 0.0853, "step": 8710 }, { "epoch": 0.56, "grad_norm": 0.2741050102783117, "learning_rate": 4.3460891806968004e-07, "loss": 0.2112, "step": 8711 }, { "epoch": 0.56, "grad_norm": 0.6388195877724325, "learning_rate": 4.3450653255146436e-07, "loss": 0.2596, "step": 8712 }, { "epoch": 0.56, "grad_norm": 0.3621628629849335, "learning_rate": 4.344041498273319e-07, "loss": 0.069, "step": 8713 }, { "epoch": 0.56, "grad_norm": 2.377470267897786, "learning_rate": 4.343017699016501e-07, "loss": 0.1486, "step": 8714 }, { "epoch": 0.56, "grad_norm": 0.27535344211405466, "learning_rate": 4.3419939277878705e-07, "loss": 0.0665, "step": 8715 }, { "epoch": 0.56, "grad_norm": 0.40089579548440857, "learning_rate": 4.340970184631099e-07, "loss": 0.3926, "step": 8716 }, { "epoch": 0.56, "grad_norm": 0.6938331545369534, "learning_rate": 4.339946469589867e-07, "loss": 0.1051, "step": 8717 }, { "epoch": 0.56, "grad_norm": 2.462107263236189, "learning_rate": 4.3389227827078434e-07, "loss": 0.1912, "step": 8718 }, { "epoch": 0.56, "grad_norm": 1.242599660882346, "learning_rate": 4.3378991240287044e-07, "loss": 0.2046, "step": 8719 }, { "epoch": 0.56, "grad_norm": 1.803931785433432, "learning_rate": 4.336875493596119e-07, "loss": 0.3035, "step": 8720 }, { "epoch": 0.56, "grad_norm": 0.34555717096346095, "learning_rate": 4.3358518914537587e-07, "loss": 0.0487, "step": 8721 }, { "epoch": 0.56, "grad_norm": 0.2831189267608031, "learning_rate": 4.33482831764529e-07, "loss": 0.1161, "step": 8722 }, { "epoch": 0.56, "grad_norm": 0.785665893365899, "learning_rate": 4.333804772214385e-07, "loss": 0.2001, "step": 8723 }, { "epoch": 0.56, "grad_norm": 1.1015902546268215, "learning_rate": 4.332781255204707e-07, "loss": 0.0564, "step": 8724 }, { "epoch": 0.56, "grad_norm": 0.897124187273214, "learning_rate": 4.331757766659921e-07, "loss": 0.2349, "step": 8725 }, { "epoch": 0.56, "grad_norm": 2.7622424133876797, "learning_rate": 4.3307343066236934e-07, "loss": 0.0969, "step": 8726 }, { "epoch": 0.56, "grad_norm": 1.0066861931099265, "learning_rate": 4.329710875139684e-07, "loss": 0.2512, "step": 8727 }, { "epoch": 0.56, "grad_norm": 1.171090718565011, "learning_rate": 4.3286874722515564e-07, "loss": 0.2247, "step": 8728 }, { "epoch": 0.56, "grad_norm": 0.2886861799314343, "learning_rate": 4.32766409800297e-07, "loss": 0.1011, "step": 8729 }, { "epoch": 0.56, "grad_norm": 1.7881433383426815, "learning_rate": 4.326640752437587e-07, "loss": 0.156, "step": 8730 }, { "epoch": 0.56, "grad_norm": 0.36662181002272237, "learning_rate": 4.325617435599061e-07, "loss": 0.1765, "step": 8731 }, { "epoch": 0.56, "grad_norm": 0.8300046605081782, "learning_rate": 4.3245941475310535e-07, "loss": 0.1485, "step": 8732 }, { "epoch": 0.56, "grad_norm": 2.08197081239682, "learning_rate": 4.323570888277214e-07, "loss": 0.1513, "step": 8733 }, { "epoch": 0.56, "grad_norm": 0.6685893549406288, "learning_rate": 4.322547657881203e-07, "loss": 0.1808, "step": 8734 }, { "epoch": 0.56, "grad_norm": 0.7791728144281073, "learning_rate": 4.321524456386669e-07, "loss": 0.2814, "step": 8735 }, { "epoch": 0.56, "grad_norm": 0.5645565736405153, "learning_rate": 4.320501283837267e-07, "loss": 0.2956, "step": 8736 }, { "epoch": 0.56, "grad_norm": 4.139665768483349, "learning_rate": 4.3194781402766464e-07, "loss": 0.0679, "step": 8737 }, { "epoch": 0.56, "grad_norm": 0.7585261044644834, "learning_rate": 4.318455025748454e-07, "loss": 0.4393, "step": 8738 }, { "epoch": 0.56, "grad_norm": 0.6420015229006227, "learning_rate": 4.3174319402963426e-07, "loss": 0.3076, "step": 8739 }, { "epoch": 0.56, "grad_norm": 8.053093193924328, "learning_rate": 4.316408883963955e-07, "loss": 0.0895, "step": 8740 }, { "epoch": 0.56, "grad_norm": 2.1766155095962807, "learning_rate": 4.315385856794941e-07, "loss": 0.3237, "step": 8741 }, { "epoch": 0.56, "grad_norm": 0.28378767122573545, "learning_rate": 4.31436285883294e-07, "loss": 0.0871, "step": 8742 }, { "epoch": 0.56, "grad_norm": 1.8100632778598453, "learning_rate": 4.3133398901216e-07, "loss": 0.0596, "step": 8743 }, { "epoch": 0.56, "grad_norm": 0.917157902373197, "learning_rate": 4.3123169507045587e-07, "loss": 0.2018, "step": 8744 }, { "epoch": 0.56, "grad_norm": 0.9164757278327148, "learning_rate": 4.3112940406254595e-07, "loss": 0.1257, "step": 8745 }, { "epoch": 0.56, "grad_norm": 0.3863658506835133, "learning_rate": 4.3102711599279406e-07, "loss": 0.1335, "step": 8746 }, { "epoch": 0.56, "grad_norm": 0.5603958703346787, "learning_rate": 4.309248308655641e-07, "loss": 0.2087, "step": 8747 }, { "epoch": 0.56, "grad_norm": 0.8364777548053638, "learning_rate": 4.308225486852196e-07, "loss": 0.1572, "step": 8748 }, { "epoch": 0.56, "grad_norm": 0.6659128864748425, "learning_rate": 4.307202694561245e-07, "loss": 0.2157, "step": 8749 }, { "epoch": 0.56, "grad_norm": 1.4506071015548738, "learning_rate": 4.306179931826419e-07, "loss": 0.2602, "step": 8750 }, { "epoch": 0.56, "grad_norm": 2.109939051352687, "learning_rate": 4.3051571986913506e-07, "loss": 0.316, "step": 8751 }, { "epoch": 0.56, "grad_norm": 0.43624671986174895, "learning_rate": 4.304134495199674e-07, "loss": 0.0085, "step": 8752 }, { "epoch": 0.56, "grad_norm": 2.181416783377148, "learning_rate": 4.3031118213950166e-07, "loss": 0.1573, "step": 8753 }, { "epoch": 0.56, "grad_norm": 19.120870356589716, "learning_rate": 4.302089177321012e-07, "loss": 0.2051, "step": 8754 }, { "epoch": 0.56, "grad_norm": 0.8544548483466076, "learning_rate": 4.3010665630212836e-07, "loss": 0.1524, "step": 8755 }, { "epoch": 0.56, "grad_norm": 0.7709835200098785, "learning_rate": 4.3000439785394646e-07, "loss": 0.2253, "step": 8756 }, { "epoch": 0.56, "grad_norm": 3.236281598019691, "learning_rate": 4.299021423919173e-07, "loss": 0.2094, "step": 8757 }, { "epoch": 0.56, "grad_norm": 0.2272571124262694, "learning_rate": 4.297998899204039e-07, "loss": 0.0809, "step": 8758 }, { "epoch": 0.56, "grad_norm": 0.7594296087296805, "learning_rate": 4.296976404437681e-07, "loss": 0.1135, "step": 8759 }, { "epoch": 0.56, "grad_norm": 1.0453715609503011, "learning_rate": 4.295953939663726e-07, "loss": 0.1734, "step": 8760 }, { "epoch": 0.56, "grad_norm": 0.8404093890406891, "learning_rate": 4.2949315049257873e-07, "loss": 0.3088, "step": 8761 }, { "epoch": 0.56, "grad_norm": 1.3165393104660033, "learning_rate": 4.293909100267491e-07, "loss": 0.2291, "step": 8762 }, { "epoch": 0.56, "grad_norm": 0.8868526873780667, "learning_rate": 4.2928867257324495e-07, "loss": 0.0805, "step": 8763 }, { "epoch": 0.56, "grad_norm": 0.5407469980793979, "learning_rate": 4.2918643813642836e-07, "loss": 0.2776, "step": 8764 }, { "epoch": 0.56, "grad_norm": 1.0047903624271113, "learning_rate": 4.2908420672066075e-07, "loss": 0.2345, "step": 8765 }, { "epoch": 0.56, "grad_norm": 0.4653994504140275, "learning_rate": 4.289819783303032e-07, "loss": 0.1555, "step": 8766 }, { "epoch": 0.56, "grad_norm": 1.46737109688055, "learning_rate": 4.288797529697175e-07, "loss": 0.3302, "step": 8767 }, { "epoch": 0.56, "grad_norm": 0.6238645780029942, "learning_rate": 4.287775306432642e-07, "loss": 0.1174, "step": 8768 }, { "epoch": 0.56, "grad_norm": 0.8079535255908654, "learning_rate": 4.2867531135530484e-07, "loss": 0.3148, "step": 8769 }, { "epoch": 0.56, "grad_norm": 0.43723374040357554, "learning_rate": 4.285730951101999e-07, "loss": 0.1074, "step": 8770 }, { "epoch": 0.56, "grad_norm": 0.38881263524925763, "learning_rate": 4.284708819123104e-07, "loss": 0.01, "step": 8771 }, { "epoch": 0.56, "grad_norm": 1.1943730728812147, "learning_rate": 4.2836867176599684e-07, "loss": 0.0127, "step": 8772 }, { "epoch": 0.56, "grad_norm": 0.48869727062370727, "learning_rate": 4.282664646756198e-07, "loss": 0.1828, "step": 8773 }, { "epoch": 0.56, "grad_norm": 1.0197104140215727, "learning_rate": 4.2816426064553946e-07, "loss": 0.1231, "step": 8774 }, { "epoch": 0.56, "grad_norm": 0.5393575259996369, "learning_rate": 4.280620596801163e-07, "loss": 0.0497, "step": 8775 }, { "epoch": 0.56, "grad_norm": 0.05371980771129873, "learning_rate": 4.279598617837102e-07, "loss": 0.0002, "step": 8776 }, { "epoch": 0.56, "grad_norm": 0.5745978682807066, "learning_rate": 4.278576669606814e-07, "loss": 0.1843, "step": 8777 }, { "epoch": 0.56, "grad_norm": 0.9278652412815943, "learning_rate": 4.277554752153895e-07, "loss": 0.0959, "step": 8778 }, { "epoch": 0.56, "grad_norm": 0.5250453545431085, "learning_rate": 4.27653286552194e-07, "loss": 0.2151, "step": 8779 }, { "epoch": 0.56, "grad_norm": 0.9632878342014657, "learning_rate": 4.2755110097545495e-07, "loss": 0.023, "step": 8780 }, { "epoch": 0.56, "grad_norm": 0.6455365855607985, "learning_rate": 4.274489184895315e-07, "loss": 0.1282, "step": 8781 }, { "epoch": 0.56, "grad_norm": 4.19205089455886, "learning_rate": 4.273467390987831e-07, "loss": 0.0743, "step": 8782 }, { "epoch": 0.56, "grad_norm": 0.854499994034134, "learning_rate": 4.272445628075687e-07, "loss": 0.216, "step": 8783 }, { "epoch": 0.56, "grad_norm": 0.6115082054764824, "learning_rate": 4.2714238962024775e-07, "loss": 0.1696, "step": 8784 }, { "epoch": 0.56, "grad_norm": 2.2685461559803914, "learning_rate": 4.270402195411787e-07, "loss": 0.1875, "step": 8785 }, { "epoch": 0.56, "grad_norm": 0.8428912491270012, "learning_rate": 4.269380525747207e-07, "loss": 0.1702, "step": 8786 }, { "epoch": 0.56, "grad_norm": 1.4900605117836085, "learning_rate": 4.2683588872523214e-07, "loss": 0.1229, "step": 8787 }, { "epoch": 0.56, "grad_norm": 0.9344322384855469, "learning_rate": 4.267337279970718e-07, "loss": 0.2983, "step": 8788 }, { "epoch": 0.56, "grad_norm": 1.3308066950851476, "learning_rate": 4.2663157039459774e-07, "loss": 0.1737, "step": 8789 }, { "epoch": 0.56, "grad_norm": 1.4536736858630486, "learning_rate": 4.265294159221684e-07, "loss": 0.1757, "step": 8790 }, { "epoch": 0.56, "grad_norm": 2.2401233737061514, "learning_rate": 4.264272645841419e-07, "loss": 0.2111, "step": 8791 }, { "epoch": 0.56, "grad_norm": 1.7235916896888939, "learning_rate": 4.263251163848762e-07, "loss": 0.3137, "step": 8792 }, { "epoch": 0.56, "grad_norm": 0.8752625380095372, "learning_rate": 4.262229713287293e-07, "loss": 0.5314, "step": 8793 }, { "epoch": 0.56, "grad_norm": 4.771569810967823, "learning_rate": 4.261208294200585e-07, "loss": 0.196, "step": 8794 }, { "epoch": 0.56, "grad_norm": 0.9800783063616554, "learning_rate": 4.260186906632219e-07, "loss": 0.2956, "step": 8795 }, { "epoch": 0.56, "grad_norm": 0.9444654094315114, "learning_rate": 4.259165550625764e-07, "loss": 0.2544, "step": 8796 }, { "epoch": 0.56, "grad_norm": 0.5934749999704413, "learning_rate": 4.258144226224798e-07, "loss": 0.1408, "step": 8797 }, { "epoch": 0.56, "grad_norm": 2.3570317708605626, "learning_rate": 4.257122933472889e-07, "loss": 0.0839, "step": 8798 }, { "epoch": 0.56, "grad_norm": 1.2242800026870266, "learning_rate": 4.256101672413609e-07, "loss": 0.1207, "step": 8799 }, { "epoch": 0.56, "grad_norm": 1.241545716452189, "learning_rate": 4.255080443090527e-07, "loss": 0.4852, "step": 8800 }, { "epoch": 0.56, "grad_norm": 0.35390348831937746, "learning_rate": 4.254059245547212e-07, "loss": 0.1465, "step": 8801 }, { "epoch": 0.56, "grad_norm": 0.3990140449314784, "learning_rate": 4.2530380798272283e-07, "loss": 0.0983, "step": 8802 }, { "epoch": 0.56, "grad_norm": 1.4236219228074876, "learning_rate": 4.2520169459741427e-07, "loss": 0.1796, "step": 8803 }, { "epoch": 0.56, "grad_norm": 5.57621134386628, "learning_rate": 4.250995844031516e-07, "loss": 0.1206, "step": 8804 }, { "epoch": 0.56, "grad_norm": 1.0245743971573507, "learning_rate": 4.249974774042915e-07, "loss": 0.3929, "step": 8805 }, { "epoch": 0.56, "grad_norm": 0.9511849609887367, "learning_rate": 4.2489537360518963e-07, "loss": 0.1818, "step": 8806 }, { "epoch": 0.56, "grad_norm": 0.36073045919090885, "learning_rate": 4.2479327301020214e-07, "loss": 0.275, "step": 8807 }, { "epoch": 0.56, "grad_norm": 0.7016519427161496, "learning_rate": 4.246911756236847e-07, "loss": 0.1796, "step": 8808 }, { "epoch": 0.56, "grad_norm": 5.261643534370843, "learning_rate": 4.245890814499931e-07, "loss": 0.2649, "step": 8809 }, { "epoch": 0.56, "grad_norm": 0.17211187922867002, "learning_rate": 4.244869904934831e-07, "loss": 0.0906, "step": 8810 }, { "epoch": 0.56, "grad_norm": 1.0630988415737792, "learning_rate": 4.243849027585096e-07, "loss": 0.0131, "step": 8811 }, { "epoch": 0.56, "grad_norm": 0.9457028326375878, "learning_rate": 4.2428281824942843e-07, "loss": 0.4614, "step": 8812 }, { "epoch": 0.56, "grad_norm": 1.3354028111491003, "learning_rate": 4.2418073697059414e-07, "loss": 0.1195, "step": 8813 }, { "epoch": 0.56, "grad_norm": 0.486547982467088, "learning_rate": 4.240786589263623e-07, "loss": 0.3201, "step": 8814 }, { "epoch": 0.56, "grad_norm": 0.608709704000889, "learning_rate": 4.239765841210873e-07, "loss": 0.0223, "step": 8815 }, { "epoch": 0.56, "grad_norm": 0.8671122060277289, "learning_rate": 4.2387451255912407e-07, "loss": 0.2541, "step": 8816 }, { "epoch": 0.56, "grad_norm": 1.11364371117671, "learning_rate": 4.2377244424482726e-07, "loss": 0.2379, "step": 8817 }, { "epoch": 0.56, "grad_norm": 0.5695881297848178, "learning_rate": 4.236703791825512e-07, "loss": 0.0693, "step": 8818 }, { "epoch": 0.56, "grad_norm": 1.2860772628905273, "learning_rate": 4.235683173766503e-07, "loss": 0.148, "step": 8819 }, { "epoch": 0.56, "grad_norm": 1.9321914860380835, "learning_rate": 4.234662588314784e-07, "loss": 0.2268, "step": 8820 }, { "epoch": 0.56, "grad_norm": 1.8848982594913803, "learning_rate": 4.2336420355139e-07, "loss": 0.0244, "step": 8821 }, { "epoch": 0.56, "grad_norm": 0.5598738834934773, "learning_rate": 4.232621515407385e-07, "loss": 0.2702, "step": 8822 }, { "epoch": 0.56, "grad_norm": 0.4768426556401251, "learning_rate": 4.2316010280387803e-07, "loss": 0.1238, "step": 8823 }, { "epoch": 0.56, "grad_norm": 1.5368905908160633, "learning_rate": 4.230580573451619e-07, "loss": 0.2323, "step": 8824 }, { "epoch": 0.56, "grad_norm": 3.4680823082719106, "learning_rate": 4.2295601516894387e-07, "loss": 0.2658, "step": 8825 }, { "epoch": 0.56, "grad_norm": 0.30093073521511227, "learning_rate": 4.2285397627957686e-07, "loss": 0.0098, "step": 8826 }, { "epoch": 0.56, "grad_norm": 0.6365285501257308, "learning_rate": 4.227519406814146e-07, "loss": 0.0815, "step": 8827 }, { "epoch": 0.56, "grad_norm": 0.6121023613489245, "learning_rate": 4.226499083788096e-07, "loss": 0.3337, "step": 8828 }, { "epoch": 0.56, "grad_norm": 0.7200664274228797, "learning_rate": 4.225478793761153e-07, "loss": 0.1372, "step": 8829 }, { "epoch": 0.56, "grad_norm": 2.995027730163176, "learning_rate": 4.2244585367768387e-07, "loss": 0.1968, "step": 8830 }, { "epoch": 0.56, "grad_norm": 1.1568868680182585, "learning_rate": 4.2234383128786847e-07, "loss": 0.2059, "step": 8831 }, { "epoch": 0.56, "grad_norm": 1.065654123080128, "learning_rate": 4.222418122110211e-07, "loss": 0.0939, "step": 8832 }, { "epoch": 0.56, "grad_norm": 1.760090562541034, "learning_rate": 4.2213979645149455e-07, "loss": 0.304, "step": 8833 }, { "epoch": 0.56, "grad_norm": 1.1222549533889943, "learning_rate": 4.220377840136407e-07, "loss": 0.2764, "step": 8834 }, { "epoch": 0.56, "grad_norm": 1.2180117061610305, "learning_rate": 4.2193577490181163e-07, "loss": 0.1748, "step": 8835 }, { "epoch": 0.56, "grad_norm": 0.4405662940994313, "learning_rate": 4.218337691203595e-07, "loss": 0.1627, "step": 8836 }, { "epoch": 0.56, "grad_norm": 0.9280427079148141, "learning_rate": 4.217317666736357e-07, "loss": 0.0307, "step": 8837 }, { "epoch": 0.56, "grad_norm": 8.040666841572136, "learning_rate": 4.2162976756599234e-07, "loss": 0.0824, "step": 8838 }, { "epoch": 0.56, "grad_norm": 0.408029469422152, "learning_rate": 4.215277718017804e-07, "loss": 0.3568, "step": 8839 }, { "epoch": 0.56, "grad_norm": 0.850396461252373, "learning_rate": 4.214257793853517e-07, "loss": 0.3457, "step": 8840 }, { "epoch": 0.56, "grad_norm": 0.130899098865247, "learning_rate": 4.2132379032105693e-07, "loss": 0.0031, "step": 8841 }, { "epoch": 0.56, "grad_norm": 0.9850970598885355, "learning_rate": 4.212218046132476e-07, "loss": 0.2646, "step": 8842 }, { "epoch": 0.56, "grad_norm": 0.7836241026560231, "learning_rate": 4.2111982226627433e-07, "loss": 0.0229, "step": 8843 }, { "epoch": 0.56, "grad_norm": 10.300270262942005, "learning_rate": 4.2101784328448814e-07, "loss": 0.1831, "step": 8844 }, { "epoch": 0.56, "grad_norm": 1.1229748622532114, "learning_rate": 4.2091586767223936e-07, "loss": 0.1617, "step": 8845 }, { "epoch": 0.56, "grad_norm": 0.6543210638658481, "learning_rate": 4.2081389543387893e-07, "loss": 0.1861, "step": 8846 }, { "epoch": 0.56, "grad_norm": 5.127326640082101, "learning_rate": 4.2071192657375684e-07, "loss": 0.056, "step": 8847 }, { "epoch": 0.56, "grad_norm": 1.479982180174863, "learning_rate": 4.206099610962231e-07, "loss": 0.2553, "step": 8848 }, { "epoch": 0.56, "grad_norm": 0.47943624358487036, "learning_rate": 4.2050799900562826e-07, "loss": 0.3061, "step": 8849 }, { "epoch": 0.56, "grad_norm": 1.1833321862723516, "learning_rate": 4.2040604030632176e-07, "loss": 0.1738, "step": 8850 }, { "epoch": 0.56, "grad_norm": 0.2990509725056782, "learning_rate": 4.203040850026537e-07, "loss": 0.0038, "step": 8851 }, { "epoch": 0.56, "grad_norm": 0.6951384879437232, "learning_rate": 4.202021330989736e-07, "loss": 0.1756, "step": 8852 }, { "epoch": 0.56, "grad_norm": 0.8608802564102809, "learning_rate": 4.201001845996309e-07, "loss": 0.1009, "step": 8853 }, { "epoch": 0.56, "grad_norm": 0.575110980996379, "learning_rate": 4.199982395089749e-07, "loss": 0.1647, "step": 8854 }, { "epoch": 0.56, "grad_norm": 8.192043742401415, "learning_rate": 4.19896297831355e-07, "loss": 0.1525, "step": 8855 }, { "epoch": 0.56, "grad_norm": 0.5647354508329827, "learning_rate": 4.197943595711198e-07, "loss": 0.0924, "step": 8856 }, { "epoch": 0.56, "grad_norm": 4.690594689503533, "learning_rate": 4.1969242473261873e-07, "loss": 0.1563, "step": 8857 }, { "epoch": 0.56, "grad_norm": 15.318151122942464, "learning_rate": 4.195904933202e-07, "loss": 0.1284, "step": 8858 }, { "epoch": 0.56, "grad_norm": 0.4100718526531126, "learning_rate": 4.194885653382127e-07, "loss": 0.1654, "step": 8859 }, { "epoch": 0.57, "grad_norm": 0.6682643198475973, "learning_rate": 4.1938664079100493e-07, "loss": 0.1893, "step": 8860 }, { "epoch": 0.57, "grad_norm": 4.702906979463967, "learning_rate": 4.192847196829251e-07, "loss": 0.3181, "step": 8861 }, { "epoch": 0.57, "grad_norm": 2.2794038402298713, "learning_rate": 4.1918280201832145e-07, "loss": 0.0579, "step": 8862 }, { "epoch": 0.57, "grad_norm": 0.43971315297669045, "learning_rate": 4.1908088780154183e-07, "loss": 0.1261, "step": 8863 }, { "epoch": 0.57, "grad_norm": 0.44545082915315964, "learning_rate": 4.1897897703693435e-07, "loss": 0.1466, "step": 8864 }, { "epoch": 0.57, "grad_norm": 0.2037949421885682, "learning_rate": 4.188770697288464e-07, "loss": 0.1159, "step": 8865 }, { "epoch": 0.57, "grad_norm": 1.2637857986268712, "learning_rate": 4.1877516588162596e-07, "loss": 0.4962, "step": 8866 }, { "epoch": 0.57, "grad_norm": 0.8129489178667497, "learning_rate": 4.1867326549962e-07, "loss": 0.2276, "step": 8867 }, { "epoch": 0.57, "grad_norm": 0.6055254303719817, "learning_rate": 4.185713685871762e-07, "loss": 0.2519, "step": 8868 }, { "epoch": 0.57, "grad_norm": 0.7323659725268498, "learning_rate": 4.184694751486414e-07, "loss": 0.1028, "step": 8869 }, { "epoch": 0.57, "grad_norm": 0.684166843548358, "learning_rate": 4.183675851883627e-07, "loss": 0.2773, "step": 8870 }, { "epoch": 0.57, "grad_norm": 0.1639504907915714, "learning_rate": 4.1826569871068686e-07, "loss": 0.0061, "step": 8871 }, { "epoch": 0.57, "grad_norm": 3.8127810049258692, "learning_rate": 4.1816381571996075e-07, "loss": 0.1356, "step": 8872 }, { "epoch": 0.57, "grad_norm": 0.9025274955719915, "learning_rate": 4.180619362205306e-07, "loss": 0.17, "step": 8873 }, { "epoch": 0.57, "grad_norm": 0.8218113799740292, "learning_rate": 4.1796006021674326e-07, "loss": 0.2427, "step": 8874 }, { "epoch": 0.57, "grad_norm": 0.22969596510198548, "learning_rate": 4.1785818771294456e-07, "loss": 0.0627, "step": 8875 }, { "epoch": 0.57, "grad_norm": 1.013127341362521, "learning_rate": 4.1775631871348056e-07, "loss": 0.2197, "step": 8876 }, { "epoch": 0.57, "grad_norm": 0.6531218599809984, "learning_rate": 4.176544532226974e-07, "loss": 0.1165, "step": 8877 }, { "epoch": 0.57, "grad_norm": 3.6029073170787513, "learning_rate": 4.175525912449408e-07, "loss": 0.1084, "step": 8878 }, { "epoch": 0.57, "grad_norm": 1.9247928800834706, "learning_rate": 4.174507327845564e-07, "loss": 0.0345, "step": 8879 }, { "epoch": 0.57, "grad_norm": 2.4511928593583288, "learning_rate": 4.173488778458896e-07, "loss": 0.3031, "step": 8880 }, { "epoch": 0.57, "grad_norm": 0.758286084876253, "learning_rate": 4.17247026433286e-07, "loss": 0.0033, "step": 8881 }, { "epoch": 0.57, "grad_norm": 0.6638340939757755, "learning_rate": 4.171451785510904e-07, "loss": 0.419, "step": 8882 }, { "epoch": 0.57, "grad_norm": 0.8397141346873115, "learning_rate": 4.1704333420364827e-07, "loss": 0.2656, "step": 8883 }, { "epoch": 0.57, "grad_norm": 0.48375765804415893, "learning_rate": 4.16941493395304e-07, "loss": 0.0618, "step": 8884 }, { "epoch": 0.57, "grad_norm": 2.4125366962194494, "learning_rate": 4.1683965613040293e-07, "loss": 0.3128, "step": 8885 }, { "epoch": 0.57, "grad_norm": 1.7514592078671956, "learning_rate": 4.16737822413289e-07, "loss": 0.2279, "step": 8886 }, { "epoch": 0.57, "grad_norm": 0.4256378297724906, "learning_rate": 4.166359922483071e-07, "loss": 0.2878, "step": 8887 }, { "epoch": 0.57, "grad_norm": 0.6441216191541835, "learning_rate": 4.165341656398014e-07, "loss": 0.1651, "step": 8888 }, { "epoch": 0.57, "grad_norm": 0.2115471476169245, "learning_rate": 4.164323425921158e-07, "loss": 0.1172, "step": 8889 }, { "epoch": 0.57, "grad_norm": 0.9850882113832111, "learning_rate": 4.163305231095947e-07, "loss": 0.003, "step": 8890 }, { "epoch": 0.57, "grad_norm": 0.8093672278466705, "learning_rate": 4.1622870719658145e-07, "loss": 0.2152, "step": 8891 }, { "epoch": 0.57, "grad_norm": 0.6736874441809302, "learning_rate": 4.1612689485742013e-07, "loss": 0.1791, "step": 8892 }, { "epoch": 0.57, "grad_norm": 9.01785573323617, "learning_rate": 4.160250860964539e-07, "loss": 0.1442, "step": 8893 }, { "epoch": 0.57, "grad_norm": 1.8646779079782378, "learning_rate": 4.1592328091802645e-07, "loss": 0.2035, "step": 8894 }, { "epoch": 0.57, "grad_norm": 0.3145613306400818, "learning_rate": 4.158214793264807e-07, "loss": 0.0532, "step": 8895 }, { "epoch": 0.57, "grad_norm": 2.1389023483351055, "learning_rate": 4.1571968132615996e-07, "loss": 0.2556, "step": 8896 }, { "epoch": 0.57, "grad_norm": 0.7143415719349784, "learning_rate": 4.1561788692140707e-07, "loss": 0.148, "step": 8897 }, { "epoch": 0.57, "grad_norm": 1.090866347674417, "learning_rate": 4.1551609611656473e-07, "loss": 0.1279, "step": 8898 }, { "epoch": 0.57, "grad_norm": 0.6558849385962897, "learning_rate": 4.1541430891597544e-07, "loss": 0.3018, "step": 8899 }, { "epoch": 0.57, "grad_norm": 2.387434021177685, "learning_rate": 4.15312525323982e-07, "loss": 0.3073, "step": 8900 }, { "epoch": 0.57, "grad_norm": 0.7054601023014728, "learning_rate": 4.152107453449263e-07, "loss": 0.0077, "step": 8901 }, { "epoch": 0.57, "grad_norm": 0.5885797868645957, "learning_rate": 4.151089689831508e-07, "loss": 0.2542, "step": 8902 }, { "epoch": 0.57, "grad_norm": 1.3560516496879205, "learning_rate": 4.1500719624299734e-07, "loss": 0.0821, "step": 8903 }, { "epoch": 0.57, "grad_norm": 3.3467056027890014, "learning_rate": 4.1490542712880754e-07, "loss": 0.2704, "step": 8904 }, { "epoch": 0.57, "grad_norm": 0.5524279259988052, "learning_rate": 4.148036616449234e-07, "loss": 0.112, "step": 8905 }, { "epoch": 0.57, "grad_norm": 1.740823832942431, "learning_rate": 4.147018997956862e-07, "loss": 0.2801, "step": 8906 }, { "epoch": 0.57, "grad_norm": 1.0735528649293915, "learning_rate": 4.146001415854377e-07, "loss": 0.2887, "step": 8907 }, { "epoch": 0.57, "grad_norm": 2.7113644386011595, "learning_rate": 4.144983870185185e-07, "loss": 0.051, "step": 8908 }, { "epoch": 0.57, "grad_norm": 0.9173098476106, "learning_rate": 4.1439663609927033e-07, "loss": 0.3603, "step": 8909 }, { "epoch": 0.57, "grad_norm": 1.0221068373924442, "learning_rate": 4.1429488883203346e-07, "loss": 0.5015, "step": 8910 }, { "epoch": 0.57, "grad_norm": 0.9748475047797515, "learning_rate": 4.141931452211492e-07, "loss": 0.0713, "step": 8911 }, { "epoch": 0.57, "grad_norm": 0.877476302434798, "learning_rate": 4.140914052709575e-07, "loss": 0.3888, "step": 8912 }, { "epoch": 0.57, "grad_norm": 2.035962076885889, "learning_rate": 4.139896689857995e-07, "loss": 0.0879, "step": 8913 }, { "epoch": 0.57, "grad_norm": 0.3054973153770853, "learning_rate": 4.138879363700149e-07, "loss": 0.1952, "step": 8914 }, { "epoch": 0.57, "grad_norm": 0.59316800489636, "learning_rate": 4.1378620742794413e-07, "loss": 0.2633, "step": 8915 }, { "epoch": 0.57, "grad_norm": 1.9554768278231138, "learning_rate": 4.136844821639272e-07, "loss": 0.3325, "step": 8916 }, { "epoch": 0.57, "grad_norm": 1.5777505092967516, "learning_rate": 4.135827605823035e-07, "loss": 0.2842, "step": 8917 }, { "epoch": 0.57, "grad_norm": 0.4933923077079456, "learning_rate": 4.134810426874131e-07, "loss": 0.1007, "step": 8918 }, { "epoch": 0.57, "grad_norm": 0.41674791695581065, "learning_rate": 4.1337932848359526e-07, "loss": 0.0506, "step": 8919 }, { "epoch": 0.57, "grad_norm": 0.6713354467851044, "learning_rate": 4.1327761797518955e-07, "loss": 0.2607, "step": 8920 }, { "epoch": 0.57, "grad_norm": 0.4323185746476104, "learning_rate": 4.131759111665348e-07, "loss": 0.0325, "step": 8921 }, { "epoch": 0.57, "grad_norm": 1.0622512988011819, "learning_rate": 4.1307420806197036e-07, "loss": 0.2112, "step": 8922 }, { "epoch": 0.57, "grad_norm": 1.4389750806798214, "learning_rate": 4.129725086658349e-07, "loss": 0.0492, "step": 8923 }, { "epoch": 0.57, "grad_norm": 0.22444360001990274, "learning_rate": 4.1287081298246716e-07, "loss": 0.0761, "step": 8924 }, { "epoch": 0.57, "grad_norm": 2.7183421669202206, "learning_rate": 4.1276912101620564e-07, "loss": 0.0647, "step": 8925 }, { "epoch": 0.57, "grad_norm": 0.7079498184301862, "learning_rate": 4.1266743277138894e-07, "loss": 0.2825, "step": 8926 }, { "epoch": 0.57, "grad_norm": 1.7627827987883815, "learning_rate": 4.125657482523549e-07, "loss": 0.297, "step": 8927 }, { "epoch": 0.57, "grad_norm": 0.9324609714137633, "learning_rate": 4.12464067463442e-07, "loss": 0.3738, "step": 8928 }, { "epoch": 0.57, "grad_norm": 0.4042411237926974, "learning_rate": 4.12362390408988e-07, "loss": 0.2373, "step": 8929 }, { "epoch": 0.57, "grad_norm": 0.09771824803843174, "learning_rate": 4.122607170933304e-07, "loss": 0.0008, "step": 8930 }, { "epoch": 0.57, "grad_norm": 0.6181588386577638, "learning_rate": 4.1215904752080704e-07, "loss": 0.0991, "step": 8931 }, { "epoch": 0.57, "grad_norm": 0.4188993403668053, "learning_rate": 4.120573816957552e-07, "loss": 0.0067, "step": 8932 }, { "epoch": 0.57, "grad_norm": 0.5478641896560902, "learning_rate": 4.119557196225125e-07, "loss": 0.278, "step": 8933 }, { "epoch": 0.57, "grad_norm": 0.8758992113226542, "learning_rate": 4.118540613054155e-07, "loss": 0.372, "step": 8934 }, { "epoch": 0.57, "grad_norm": 0.9157197980811437, "learning_rate": 4.117524067488017e-07, "loss": 0.3069, "step": 8935 }, { "epoch": 0.57, "grad_norm": 0.892030904545098, "learning_rate": 4.116507559570074e-07, "loss": 0.2113, "step": 8936 }, { "epoch": 0.57, "grad_norm": 0.4467649615776646, "learning_rate": 4.1154910893436966e-07, "loss": 0.0266, "step": 8937 }, { "epoch": 0.57, "grad_norm": 4.770026343102141, "learning_rate": 4.1144746568522457e-07, "loss": 0.1038, "step": 8938 }, { "epoch": 0.57, "grad_norm": 0.5433695625090664, "learning_rate": 4.113458262139088e-07, "loss": 0.2527, "step": 8939 }, { "epoch": 0.57, "grad_norm": 0.8449894277474345, "learning_rate": 4.112441905247581e-07, "loss": 0.2222, "step": 8940 }, { "epoch": 0.57, "grad_norm": 2.7217248814820465, "learning_rate": 4.111425586221087e-07, "loss": 0.3618, "step": 8941 }, { "epoch": 0.57, "grad_norm": 1.8637140262740672, "learning_rate": 4.1104093051029647e-07, "loss": 0.1332, "step": 8942 }, { "epoch": 0.57, "grad_norm": 0.5222302317550118, "learning_rate": 4.109393061936569e-07, "loss": 0.2407, "step": 8943 }, { "epoch": 0.57, "grad_norm": 0.46888449597675147, "learning_rate": 4.108376856765257e-07, "loss": 0.106, "step": 8944 }, { "epoch": 0.57, "grad_norm": 1.2237282015396125, "learning_rate": 4.107360689632379e-07, "loss": 0.2106, "step": 8945 }, { "epoch": 0.57, "grad_norm": 1.468060232104511, "learning_rate": 4.1063445605812894e-07, "loss": 0.2647, "step": 8946 }, { "epoch": 0.57, "grad_norm": 0.831866185765841, "learning_rate": 4.105328469655336e-07, "loss": 0.2963, "step": 8947 }, { "epoch": 0.57, "grad_norm": 0.1882829784434609, "learning_rate": 4.10431241689787e-07, "loss": 0.0757, "step": 8948 }, { "epoch": 0.57, "grad_norm": 0.9410353486862414, "learning_rate": 4.103296402352236e-07, "loss": 0.1798, "step": 8949 }, { "epoch": 0.57, "grad_norm": 0.8360667018701529, "learning_rate": 4.1022804260617805e-07, "loss": 0.1084, "step": 8950 }, { "epoch": 0.57, "grad_norm": 0.41446117427026946, "learning_rate": 4.1012644880698455e-07, "loss": 0.2458, "step": 8951 }, { "epoch": 0.57, "grad_norm": 0.6538221590023136, "learning_rate": 4.1002485884197765e-07, "loss": 0.2322, "step": 8952 }, { "epoch": 0.57, "grad_norm": 1.8797931882510255, "learning_rate": 4.0992327271549087e-07, "loss": 0.0174, "step": 8953 }, { "epoch": 0.57, "grad_norm": 0.6596385415035146, "learning_rate": 4.098216904318587e-07, "loss": 0.146, "step": 8954 }, { "epoch": 0.57, "grad_norm": 0.9258696029597476, "learning_rate": 4.0972011199541414e-07, "loss": 0.1677, "step": 8955 }, { "epoch": 0.57, "grad_norm": 0.9562414673037318, "learning_rate": 4.0961853741049135e-07, "loss": 0.214, "step": 8956 }, { "epoch": 0.57, "grad_norm": 0.6610291632584135, "learning_rate": 4.095169666814233e-07, "loss": 0.1226, "step": 8957 }, { "epoch": 0.57, "grad_norm": 0.8198860743687632, "learning_rate": 4.0941539981254336e-07, "loss": 0.204, "step": 8958 }, { "epoch": 0.57, "grad_norm": 3.905845755857973, "learning_rate": 4.093138368081847e-07, "loss": 0.1959, "step": 8959 }, { "epoch": 0.57, "grad_norm": 1.211781261974601, "learning_rate": 4.0921227767267974e-07, "loss": 0.2329, "step": 8960 }, { "epoch": 0.57, "grad_norm": 0.4863503643373439, "learning_rate": 4.0911072241036185e-07, "loss": 0.0428, "step": 8961 }, { "epoch": 0.57, "grad_norm": 0.7256574497194364, "learning_rate": 4.09009171025563e-07, "loss": 0.1728, "step": 8962 }, { "epoch": 0.57, "grad_norm": 18.89114656717362, "learning_rate": 4.089076235226161e-07, "loss": 0.4744, "step": 8963 }, { "epoch": 0.57, "grad_norm": 0.9527457468840271, "learning_rate": 4.0880607990585286e-07, "loss": 0.2439, "step": 8964 }, { "epoch": 0.57, "grad_norm": 0.748155632069046, "learning_rate": 4.0870454017960577e-07, "loss": 0.3099, "step": 8965 }, { "epoch": 0.57, "grad_norm": 1.5801864280752493, "learning_rate": 4.0860300434820633e-07, "loss": 0.2227, "step": 8966 }, { "epoch": 0.57, "grad_norm": 0.7190986270264014, "learning_rate": 4.0850147241598654e-07, "loss": 0.2481, "step": 8967 }, { "epoch": 0.57, "grad_norm": 0.5443676111902176, "learning_rate": 4.0839994438727785e-07, "loss": 0.0259, "step": 8968 }, { "epoch": 0.57, "grad_norm": 1.1092656664628424, "learning_rate": 4.082984202664118e-07, "loss": 0.2081, "step": 8969 }, { "epoch": 0.57, "grad_norm": 0.7488173781044702, "learning_rate": 4.0819690005771947e-07, "loss": 0.1968, "step": 8970 }, { "epoch": 0.57, "grad_norm": 1.4602423968032658, "learning_rate": 4.080953837655317e-07, "loss": 0.3081, "step": 8971 }, { "epoch": 0.57, "grad_norm": 6.613948406488765, "learning_rate": 4.079938713941799e-07, "loss": 0.1172, "step": 8972 }, { "epoch": 0.57, "grad_norm": 1.3240785831914836, "learning_rate": 4.078923629479942e-07, "loss": 0.1644, "step": 8973 }, { "epoch": 0.57, "grad_norm": 0.49065242535318954, "learning_rate": 4.077908584313058e-07, "loss": 0.3614, "step": 8974 }, { "epoch": 0.57, "grad_norm": 0.6036816709428935, "learning_rate": 4.0768935784844444e-07, "loss": 0.1832, "step": 8975 }, { "epoch": 0.57, "grad_norm": 8.656090000346207, "learning_rate": 4.0758786120374073e-07, "loss": 0.1065, "step": 8976 }, { "epoch": 0.57, "grad_norm": 2.1238109859775967, "learning_rate": 4.0748636850152457e-07, "loss": 0.4529, "step": 8977 }, { "epoch": 0.57, "grad_norm": 0.658608595335291, "learning_rate": 4.0738487974612613e-07, "loss": 0.3871, "step": 8978 }, { "epoch": 0.57, "grad_norm": 1.020767182452796, "learning_rate": 4.0728339494187465e-07, "loss": 0.2117, "step": 8979 }, { "epoch": 0.57, "grad_norm": 11.587761079292793, "learning_rate": 4.071819140931002e-07, "loss": 0.225, "step": 8980 }, { "epoch": 0.57, "grad_norm": 1.1746589604840403, "learning_rate": 4.0708043720413157e-07, "loss": 0.3331, "step": 8981 }, { "epoch": 0.57, "grad_norm": 0.6432716813705855, "learning_rate": 4.069789642792986e-07, "loss": 0.009, "step": 8982 }, { "epoch": 0.57, "grad_norm": 0.6342285927951091, "learning_rate": 4.068774953229297e-07, "loss": 0.0823, "step": 8983 }, { "epoch": 0.57, "grad_norm": 7.316941299192119, "learning_rate": 4.067760303393543e-07, "loss": 0.0673, "step": 8984 }, { "epoch": 0.57, "grad_norm": 0.7977403896202583, "learning_rate": 4.066745693329007e-07, "loss": 0.0088, "step": 8985 }, { "epoch": 0.57, "grad_norm": 0.6602867804753216, "learning_rate": 4.0657311230789757e-07, "loss": 0.3205, "step": 8986 }, { "epoch": 0.57, "grad_norm": 0.3875502905647615, "learning_rate": 4.0647165926867345e-07, "loss": 0.2616, "step": 8987 }, { "epoch": 0.57, "grad_norm": 1.1612784449805174, "learning_rate": 4.0637021021955615e-07, "loss": 0.3196, "step": 8988 }, { "epoch": 0.57, "grad_norm": 0.4985510654803668, "learning_rate": 4.062687651648741e-07, "loss": 0.0171, "step": 8989 }, { "epoch": 0.57, "grad_norm": 0.27865756818428206, "learning_rate": 4.061673241089547e-07, "loss": 0.0365, "step": 8990 }, { "epoch": 0.57, "grad_norm": 3.3307056343237686, "learning_rate": 4.060658870561262e-07, "loss": 0.0844, "step": 8991 }, { "epoch": 0.57, "grad_norm": 2.243988055057629, "learning_rate": 4.0596445401071547e-07, "loss": 0.073, "step": 8992 }, { "epoch": 0.57, "grad_norm": 1.2478361278616963, "learning_rate": 4.058630249770504e-07, "loss": 0.2899, "step": 8993 }, { "epoch": 0.57, "grad_norm": 1.3104727381547325, "learning_rate": 4.057615999594578e-07, "loss": 0.2499, "step": 8994 }, { "epoch": 0.57, "grad_norm": 0.32810876773928077, "learning_rate": 4.0566017896226485e-07, "loss": 0.1811, "step": 8995 }, { "epoch": 0.57, "grad_norm": 0.8333586188888399, "learning_rate": 4.0555876198979817e-07, "loss": 0.3105, "step": 8996 }, { "epoch": 0.57, "grad_norm": 0.2441767375955954, "learning_rate": 4.054573490463848e-07, "loss": 0.0094, "step": 8997 }, { "epoch": 0.57, "grad_norm": 1.0921772024803698, "learning_rate": 4.0535594013635093e-07, "loss": 0.058, "step": 8998 }, { "epoch": 0.57, "grad_norm": 1.2390422713542835, "learning_rate": 4.0525453526402276e-07, "loss": 0.1206, "step": 8999 }, { "epoch": 0.57, "grad_norm": 1.1778885430337904, "learning_rate": 4.0515313443372675e-07, "loss": 0.2504, "step": 9000 }, { "epoch": 0.57, "grad_norm": 0.08698806208004202, "learning_rate": 4.050517376497885e-07, "loss": 0.0028, "step": 9001 }, { "epoch": 0.57, "grad_norm": 0.46755935882190264, "learning_rate": 4.049503449165341e-07, "loss": 0.0989, "step": 9002 }, { "epoch": 0.57, "grad_norm": 0.7650949509322319, "learning_rate": 4.04848956238289e-07, "loss": 0.1436, "step": 9003 }, { "epoch": 0.57, "grad_norm": 0.2979398501256509, "learning_rate": 4.0474757161937876e-07, "loss": 0.141, "step": 9004 }, { "epoch": 0.57, "grad_norm": 1.0035517416848905, "learning_rate": 4.046461910641285e-07, "loss": 0.2083, "step": 9005 }, { "epoch": 0.57, "grad_norm": 24.001824481540783, "learning_rate": 4.045448145768636e-07, "loss": 0.3099, "step": 9006 }, { "epoch": 0.57, "grad_norm": 1.783926302428239, "learning_rate": 4.0444344216190863e-07, "loss": 0.2344, "step": 9007 }, { "epoch": 0.57, "grad_norm": 8.939785030272276, "learning_rate": 4.0434207382358865e-07, "loss": 0.1586, "step": 9008 }, { "epoch": 0.57, "grad_norm": 0.7242003816517104, "learning_rate": 4.042407095662279e-07, "loss": 0.129, "step": 9009 }, { "epoch": 0.57, "grad_norm": 0.8403834322257356, "learning_rate": 4.0413934939415126e-07, "loss": 0.3, "step": 9010 }, { "epoch": 0.57, "grad_norm": 1.4074080079376277, "learning_rate": 4.0403799331168243e-07, "loss": 0.2928, "step": 9011 }, { "epoch": 0.57, "grad_norm": 1.4771753974956194, "learning_rate": 4.0393664132314576e-07, "loss": 0.1581, "step": 9012 }, { "epoch": 0.57, "grad_norm": 0.6087305966614077, "learning_rate": 4.0383529343286525e-07, "loss": 0.0752, "step": 9013 }, { "epoch": 0.57, "grad_norm": 0.4078878006792705, "learning_rate": 4.037339496451642e-07, "loss": 0.0535, "step": 9014 }, { "epoch": 0.57, "grad_norm": 2.3292486987075627, "learning_rate": 4.0363260996436666e-07, "loss": 0.0814, "step": 9015 }, { "epoch": 0.57, "grad_norm": 0.6243070981600599, "learning_rate": 4.035312743947954e-07, "loss": 0.1725, "step": 9016 }, { "epoch": 0.58, "grad_norm": 1.4245762187153739, "learning_rate": 4.0342994294077414e-07, "loss": 0.1674, "step": 9017 }, { "epoch": 0.58, "grad_norm": 0.22093514648280238, "learning_rate": 4.0332861560662547e-07, "loss": 0.0796, "step": 9018 }, { "epoch": 0.58, "grad_norm": 0.7260767099304124, "learning_rate": 4.0322729239667253e-07, "loss": 0.1967, "step": 9019 }, { "epoch": 0.58, "grad_norm": 0.7598463566865133, "learning_rate": 4.031259733152377e-07, "loss": 0.2083, "step": 9020 }, { "epoch": 0.58, "grad_norm": 2.661778512892847, "learning_rate": 4.030246583666437e-07, "loss": 0.211, "step": 9021 }, { "epoch": 0.58, "grad_norm": 0.5116657482186662, "learning_rate": 4.029233475552125e-07, "loss": 0.2962, "step": 9022 }, { "epoch": 0.58, "grad_norm": 0.4688982985790497, "learning_rate": 4.0282204088526674e-07, "loss": 0.1241, "step": 9023 }, { "epoch": 0.58, "grad_norm": 0.45749741855101583, "learning_rate": 4.0272073836112786e-07, "loss": 0.4121, "step": 9024 }, { "epoch": 0.58, "grad_norm": 0.9434951366675195, "learning_rate": 4.0261943998711803e-07, "loss": 0.1673, "step": 9025 }, { "epoch": 0.58, "grad_norm": 0.20550577244305945, "learning_rate": 4.025181457675587e-07, "loss": 0.0588, "step": 9026 }, { "epoch": 0.58, "grad_norm": 2.428451287773308, "learning_rate": 4.024168557067709e-07, "loss": 0.1814, "step": 9027 }, { "epoch": 0.58, "grad_norm": 0.8381884433585038, "learning_rate": 4.023155698090764e-07, "loss": 0.2038, "step": 9028 }, { "epoch": 0.58, "grad_norm": 0.7965236383737759, "learning_rate": 4.02214288078796e-07, "loss": 0.1706, "step": 9029 }, { "epoch": 0.58, "grad_norm": 0.6777519886371323, "learning_rate": 4.021130105202507e-07, "loss": 0.0079, "step": 9030 }, { "epoch": 0.58, "grad_norm": 2.5632899213811604, "learning_rate": 4.0201173713776105e-07, "loss": 0.156, "step": 9031 }, { "epoch": 0.58, "grad_norm": 2.9247967735208444, "learning_rate": 4.0191046793564787e-07, "loss": 0.3693, "step": 9032 }, { "epoch": 0.58, "grad_norm": 0.5713296572055833, "learning_rate": 4.0180920291823114e-07, "loss": 0.3208, "step": 9033 }, { "epoch": 0.58, "grad_norm": 0.5066304749829127, "learning_rate": 4.017079420898314e-07, "loss": 0.1027, "step": 9034 }, { "epoch": 0.58, "grad_norm": 0.7147742170935585, "learning_rate": 4.016066854547682e-07, "loss": 0.1065, "step": 9035 }, { "epoch": 0.58, "grad_norm": 1.3069685824116117, "learning_rate": 4.015054330173618e-07, "loss": 0.3056, "step": 9036 }, { "epoch": 0.58, "grad_norm": 0.667330074309844, "learning_rate": 4.014041847819314e-07, "loss": 0.1823, "step": 9037 }, { "epoch": 0.58, "grad_norm": 0.9773945518802425, "learning_rate": 4.013029407527968e-07, "loss": 0.1436, "step": 9038 }, { "epoch": 0.58, "grad_norm": 1.2996877867987215, "learning_rate": 4.012017009342773e-07, "loss": 0.1427, "step": 9039 }, { "epoch": 0.58, "grad_norm": 1.4452488615195211, "learning_rate": 4.0110046533069157e-07, "loss": 0.3065, "step": 9040 }, { "epoch": 0.58, "grad_norm": 1.30645353995812, "learning_rate": 4.00999233946359e-07, "loss": 0.333, "step": 9041 }, { "epoch": 0.58, "grad_norm": 0.4956683821175165, "learning_rate": 4.0089800678559803e-07, "loss": 0.1727, "step": 9042 }, { "epoch": 0.58, "grad_norm": 5.739914002270914, "learning_rate": 4.007967838527274e-07, "loss": 0.0503, "step": 9043 }, { "epoch": 0.58, "grad_norm": 0.3603300123768585, "learning_rate": 4.0069556515206527e-07, "loss": 0.1889, "step": 9044 }, { "epoch": 0.58, "grad_norm": 0.9760889355895916, "learning_rate": 4.005943506879301e-07, "loss": 0.077, "step": 9045 }, { "epoch": 0.58, "grad_norm": 1.081222799612338, "learning_rate": 4.004931404646397e-07, "loss": 0.3447, "step": 9046 }, { "epoch": 0.58, "grad_norm": 1.4811212383192218, "learning_rate": 4.003919344865119e-07, "loss": 0.2423, "step": 9047 }, { "epoch": 0.58, "grad_norm": 2.2905747419769944, "learning_rate": 4.0029073275786436e-07, "loss": 0.0074, "step": 9048 }, { "epoch": 0.58, "grad_norm": 7.09817365571362, "learning_rate": 4.001895352830148e-07, "loss": 0.0707, "step": 9049 }, { "epoch": 0.58, "grad_norm": 0.9445935114472639, "learning_rate": 4.000883420662801e-07, "loss": 0.2193, "step": 9050 }, { "epoch": 0.58, "grad_norm": 1.4456565270365584, "learning_rate": 3.9998715311197783e-07, "loss": 0.2212, "step": 9051 }, { "epoch": 0.58, "grad_norm": 1.0246092475515236, "learning_rate": 3.998859684244244e-07, "loss": 0.2089, "step": 9052 }, { "epoch": 0.58, "grad_norm": 10.215196055215365, "learning_rate": 3.99784788007937e-07, "loss": 0.3612, "step": 9053 }, { "epoch": 0.58, "grad_norm": 0.7285277373881145, "learning_rate": 3.99683611866832e-07, "loss": 0.1118, "step": 9054 }, { "epoch": 0.58, "grad_norm": 7.310467250687471, "learning_rate": 3.9958244000542566e-07, "loss": 0.2895, "step": 9055 }, { "epoch": 0.58, "grad_norm": 1.721133612694442, "learning_rate": 3.9948127242803437e-07, "loss": 0.1636, "step": 9056 }, { "epoch": 0.58, "grad_norm": 3.025961345957209, "learning_rate": 3.993801091389739e-07, "loss": 0.1483, "step": 9057 }, { "epoch": 0.58, "grad_norm": 0.9314000741301101, "learning_rate": 3.9927895014256053e-07, "loss": 0.1196, "step": 9058 }, { "epoch": 0.58, "grad_norm": 16.420191387188993, "learning_rate": 3.9917779544310935e-07, "loss": 0.1613, "step": 9059 }, { "epoch": 0.58, "grad_norm": 0.4235913058231204, "learning_rate": 3.990766450449363e-07, "loss": 0.1023, "step": 9060 }, { "epoch": 0.58, "grad_norm": 1.2195491324790444, "learning_rate": 3.989754989523563e-07, "loss": 0.2947, "step": 9061 }, { "epoch": 0.58, "grad_norm": 3.7037524693812025, "learning_rate": 3.9887435716968484e-07, "loss": 0.1254, "step": 9062 }, { "epoch": 0.58, "grad_norm": 1.5073221242159822, "learning_rate": 3.987732197012363e-07, "loss": 0.2778, "step": 9063 }, { "epoch": 0.58, "grad_norm": 1.8665317055890085, "learning_rate": 3.9867208655132586e-07, "loss": 0.0605, "step": 9064 }, { "epoch": 0.58, "grad_norm": 4.762887116434521, "learning_rate": 3.9857095772426784e-07, "loss": 0.0755, "step": 9065 }, { "epoch": 0.58, "grad_norm": 0.9474658750577679, "learning_rate": 3.9846983322437667e-07, "loss": 0.1098, "step": 9066 }, { "epoch": 0.58, "grad_norm": 0.36958400160465527, "learning_rate": 3.983687130559666e-07, "loss": 0.1108, "step": 9067 }, { "epoch": 0.58, "grad_norm": 0.7325477158345699, "learning_rate": 3.982675972233514e-07, "loss": 0.2775, "step": 9068 }, { "epoch": 0.58, "grad_norm": 0.6345864761335269, "learning_rate": 3.981664857308452e-07, "loss": 0.07, "step": 9069 }, { "epoch": 0.58, "grad_norm": 0.5382803473694683, "learning_rate": 3.9806537858276124e-07, "loss": 0.2797, "step": 9070 }, { "epoch": 0.58, "grad_norm": 1.163239855383378, "learning_rate": 3.979642757834133e-07, "loss": 0.1866, "step": 9071 }, { "epoch": 0.58, "grad_norm": 0.5826955762141282, "learning_rate": 3.9786317733711427e-07, "loss": 0.1656, "step": 9072 }, { "epoch": 0.58, "grad_norm": 1.8460086516001164, "learning_rate": 3.9776208324817755e-07, "loss": 0.096, "step": 9073 }, { "epoch": 0.58, "grad_norm": 0.8705191724337407, "learning_rate": 3.9766099352091587e-07, "loss": 0.3196, "step": 9074 }, { "epoch": 0.58, "grad_norm": 0.5142661317455479, "learning_rate": 3.975599081596419e-07, "loss": 0.156, "step": 9075 }, { "epoch": 0.58, "grad_norm": 0.6104047618995742, "learning_rate": 3.9745882716866813e-07, "loss": 0.031, "step": 9076 }, { "epoch": 0.58, "grad_norm": 0.7345028381558942, "learning_rate": 3.9735775055230714e-07, "loss": 0.355, "step": 9077 }, { "epoch": 0.58, "grad_norm": 6.588846068939626, "learning_rate": 3.972566783148706e-07, "loss": 0.1142, "step": 9078 }, { "epoch": 0.58, "grad_norm": 7.341741735597102, "learning_rate": 3.9715561046067094e-07, "loss": 0.1682, "step": 9079 }, { "epoch": 0.58, "grad_norm": 0.1621309997240818, "learning_rate": 3.9705454699401963e-07, "loss": 0.0015, "step": 9080 }, { "epoch": 0.58, "grad_norm": 1.4465533222689928, "learning_rate": 3.969534879192281e-07, "loss": 0.2088, "step": 9081 }, { "epoch": 0.58, "grad_norm": 0.3472903473872619, "learning_rate": 3.9685243324060807e-07, "loss": 0.1967, "step": 9082 }, { "epoch": 0.58, "grad_norm": 0.89663315989137, "learning_rate": 3.9675138296247045e-07, "loss": 0.0303, "step": 9083 }, { "epoch": 0.58, "grad_norm": 0.6255986993763517, "learning_rate": 3.966503370891266e-07, "loss": 0.1166, "step": 9084 }, { "epoch": 0.58, "grad_norm": 2.3405934575042138, "learning_rate": 3.9654929562488693e-07, "loss": 0.2685, "step": 9085 }, { "epoch": 0.58, "grad_norm": 1.5722119128549745, "learning_rate": 3.964482585740625e-07, "loss": 0.1406, "step": 9086 }, { "epoch": 0.58, "grad_norm": 0.24404177857455117, "learning_rate": 3.9634722594096325e-07, "loss": 0.0997, "step": 9087 }, { "epoch": 0.58, "grad_norm": 4.681805078870619, "learning_rate": 3.962461977299e-07, "loss": 0.2331, "step": 9088 }, { "epoch": 0.58, "grad_norm": 0.8243376873980711, "learning_rate": 3.961451739451823e-07, "loss": 0.1161, "step": 9089 }, { "epoch": 0.58, "grad_norm": 0.7757202223089231, "learning_rate": 3.960441545911204e-07, "loss": 0.0637, "step": 9090 }, { "epoch": 0.58, "grad_norm": 2.7456104967572204, "learning_rate": 3.959431396720237e-07, "loss": 0.083, "step": 9091 }, { "epoch": 0.58, "grad_norm": 1.1350634707790515, "learning_rate": 3.9584212919220196e-07, "loss": 0.2344, "step": 9092 }, { "epoch": 0.58, "grad_norm": 0.878038497687454, "learning_rate": 3.957411231559642e-07, "loss": 0.1174, "step": 9093 }, { "epoch": 0.58, "grad_norm": 1.0787155370288095, "learning_rate": 3.9564012156761994e-07, "loss": 0.1609, "step": 9094 }, { "epoch": 0.58, "grad_norm": 0.7154063988683648, "learning_rate": 3.955391244314779e-07, "loss": 0.05, "step": 9095 }, { "epoch": 0.58, "grad_norm": 0.8266407603287446, "learning_rate": 3.954381317518466e-07, "loss": 0.168, "step": 9096 }, { "epoch": 0.58, "grad_norm": 2.122799645277665, "learning_rate": 3.95337143533035e-07, "loss": 0.3171, "step": 9097 }, { "epoch": 0.58, "grad_norm": 0.6984134699333916, "learning_rate": 3.9523615977935097e-07, "loss": 0.3383, "step": 9098 }, { "epoch": 0.58, "grad_norm": 1.1842718551636655, "learning_rate": 3.951351804951032e-07, "loss": 0.0621, "step": 9099 }, { "epoch": 0.58, "grad_norm": 1.1785945127202282, "learning_rate": 3.9503420568459936e-07, "loss": 0.3907, "step": 9100 }, { "epoch": 0.58, "grad_norm": 0.6463394683670811, "learning_rate": 3.9493323535214737e-07, "loss": 0.1419, "step": 9101 }, { "epoch": 0.58, "grad_norm": 1.0666113227064726, "learning_rate": 3.948322695020546e-07, "loss": 0.158, "step": 9102 }, { "epoch": 0.58, "grad_norm": 0.3029223594752769, "learning_rate": 3.947313081386289e-07, "loss": 0.0003, "step": 9103 }, { "epoch": 0.58, "grad_norm": 1.2458121378905718, "learning_rate": 3.9463035126617696e-07, "loss": 0.1343, "step": 9104 }, { "epoch": 0.58, "grad_norm": 1.5893834935333186, "learning_rate": 3.945293988890062e-07, "loss": 0.259, "step": 9105 }, { "epoch": 0.58, "grad_norm": 2.3004152307314003, "learning_rate": 3.944284510114232e-07, "loss": 0.3243, "step": 9106 }, { "epoch": 0.58, "grad_norm": 0.5497385037057883, "learning_rate": 3.943275076377349e-07, "loss": 0.1432, "step": 9107 }, { "epoch": 0.58, "grad_norm": 0.7587435307983558, "learning_rate": 3.942265687722474e-07, "loss": 0.3035, "step": 9108 }, { "epoch": 0.58, "grad_norm": 1.4121270975608575, "learning_rate": 3.9412563441926706e-07, "loss": 0.2223, "step": 9109 }, { "epoch": 0.58, "grad_norm": 1.3152190806198152, "learning_rate": 3.940247045831001e-07, "loss": 0.3556, "step": 9110 }, { "epoch": 0.58, "grad_norm": 1.077669034473468, "learning_rate": 3.939237792680522e-07, "loss": 0.3115, "step": 9111 }, { "epoch": 0.58, "grad_norm": 0.5996167287361195, "learning_rate": 3.938228584784292e-07, "loss": 0.3228, "step": 9112 }, { "epoch": 0.58, "grad_norm": 0.8498250818324076, "learning_rate": 3.9372194221853636e-07, "loss": 0.3961, "step": 9113 }, { "epoch": 0.58, "grad_norm": 1.9441137172312675, "learning_rate": 3.9362103049267934e-07, "loss": 0.0193, "step": 9114 }, { "epoch": 0.58, "grad_norm": 1.1079625665689703, "learning_rate": 3.9352012330516275e-07, "loss": 0.377, "step": 9115 }, { "epoch": 0.58, "grad_norm": 0.5648126864513543, "learning_rate": 3.9341922066029205e-07, "loss": 0.1762, "step": 9116 }, { "epoch": 0.58, "grad_norm": 0.2391696149516038, "learning_rate": 3.9331832256237136e-07, "loss": 0.0849, "step": 9117 }, { "epoch": 0.58, "grad_norm": 28.27204448115714, "learning_rate": 3.932174290157056e-07, "loss": 0.291, "step": 9118 }, { "epoch": 0.58, "grad_norm": 1.1459072452225505, "learning_rate": 3.9311654002459896e-07, "loss": 0.0047, "step": 9119 }, { "epoch": 0.58, "grad_norm": 14.449940850162786, "learning_rate": 3.9301565559335565e-07, "loss": 0.3272, "step": 9120 }, { "epoch": 0.58, "grad_norm": 0.907058481995523, "learning_rate": 3.929147757262794e-07, "loss": 0.099, "step": 9121 }, { "epoch": 0.58, "grad_norm": 6.601482362661088, "learning_rate": 3.9281390042767423e-07, "loss": 0.0294, "step": 9122 }, { "epoch": 0.58, "grad_norm": 0.695113172316583, "learning_rate": 3.927130297018436e-07, "loss": 0.2036, "step": 9123 }, { "epoch": 0.58, "grad_norm": 0.4512606804656528, "learning_rate": 3.9261216355309063e-07, "loss": 0.1826, "step": 9124 }, { "epoch": 0.58, "grad_norm": 0.40888229553450833, "learning_rate": 3.925113019857187e-07, "loss": 0.0128, "step": 9125 }, { "epoch": 0.58, "grad_norm": 8.352895839425, "learning_rate": 3.924104450040307e-07, "loss": 0.2253, "step": 9126 }, { "epoch": 0.58, "grad_norm": 0.8631112591496051, "learning_rate": 3.9230959261232943e-07, "loss": 0.1636, "step": 9127 }, { "epoch": 0.58, "grad_norm": 0.9239130250737256, "learning_rate": 3.9220874481491726e-07, "loss": 0.4013, "step": 9128 }, { "epoch": 0.58, "grad_norm": 0.8545318574040791, "learning_rate": 3.92107901616097e-07, "loss": 0.123, "step": 9129 }, { "epoch": 0.58, "grad_norm": 0.6277111029733823, "learning_rate": 3.920070630201703e-07, "loss": 0.1028, "step": 9130 }, { "epoch": 0.58, "grad_norm": 7.025476062430447, "learning_rate": 3.9190622903143953e-07, "loss": 0.0061, "step": 9131 }, { "epoch": 0.58, "grad_norm": 0.43370590315597163, "learning_rate": 3.918053996542062e-07, "loss": 0.0681, "step": 9132 }, { "epoch": 0.58, "grad_norm": 0.21568988514208015, "learning_rate": 3.9170457489277227e-07, "loss": 0.0934, "step": 9133 }, { "epoch": 0.58, "grad_norm": 1.2134802163903566, "learning_rate": 3.916037547514386e-07, "loss": 0.48, "step": 9134 }, { "epoch": 0.58, "grad_norm": 0.44384820722314655, "learning_rate": 3.9150293923450684e-07, "loss": 0.2024, "step": 9135 }, { "epoch": 0.58, "grad_norm": 0.5290360688562474, "learning_rate": 3.914021283462777e-07, "loss": 0.2915, "step": 9136 }, { "epoch": 0.58, "grad_norm": 4.199319608565975, "learning_rate": 3.9130132209105207e-07, "loss": 0.0644, "step": 9137 }, { "epoch": 0.58, "grad_norm": 0.8110329089991334, "learning_rate": 3.912005204731307e-07, "loss": 0.2226, "step": 9138 }, { "epoch": 0.58, "grad_norm": 1.6090097521712758, "learning_rate": 3.9109972349681357e-07, "loss": 0.1522, "step": 9139 }, { "epoch": 0.58, "grad_norm": 1.0628346686616439, "learning_rate": 3.909989311664014e-07, "loss": 0.1917, "step": 9140 }, { "epoch": 0.58, "grad_norm": 1.0460528081900293, "learning_rate": 3.9089814348619376e-07, "loss": 0.4451, "step": 9141 }, { "epoch": 0.58, "grad_norm": 0.8136537433216505, "learning_rate": 3.907973604604908e-07, "loss": 0.1841, "step": 9142 }, { "epoch": 0.58, "grad_norm": 1.2886978776955402, "learning_rate": 3.9069658209359186e-07, "loss": 0.266, "step": 9143 }, { "epoch": 0.58, "grad_norm": 1.202882225819883, "learning_rate": 3.905958083897965e-07, "loss": 0.2374, "step": 9144 }, { "epoch": 0.58, "grad_norm": 0.500113987439878, "learning_rate": 3.9049503935340386e-07, "loss": 0.2706, "step": 9145 }, { "epoch": 0.58, "grad_norm": 1.1938992054164177, "learning_rate": 3.90394274988713e-07, "loss": 0.143, "step": 9146 }, { "epoch": 0.58, "grad_norm": 3.7248861178693087, "learning_rate": 3.9029351530002264e-07, "loss": 0.0849, "step": 9147 }, { "epoch": 0.58, "grad_norm": 0.8164298406079303, "learning_rate": 3.901927602916316e-07, "loss": 0.0368, "step": 9148 }, { "epoch": 0.58, "grad_norm": 0.7645650435096253, "learning_rate": 3.900920099678382e-07, "loss": 0.1669, "step": 9149 }, { "epoch": 0.58, "grad_norm": 0.6462155620964484, "learning_rate": 3.899912643329403e-07, "loss": 0.0824, "step": 9150 }, { "epoch": 0.58, "grad_norm": 0.6100529014449996, "learning_rate": 3.898905233912365e-07, "loss": 0.3558, "step": 9151 }, { "epoch": 0.58, "grad_norm": 2.44330946916766, "learning_rate": 3.897897871470241e-07, "loss": 0.004, "step": 9152 }, { "epoch": 0.58, "grad_norm": 0.7378142353289706, "learning_rate": 3.8968905560460095e-07, "loss": 0.0732, "step": 9153 }, { "epoch": 0.58, "grad_norm": 0.7624836196576208, "learning_rate": 3.895883287682644e-07, "loss": 0.2406, "step": 9154 }, { "epoch": 0.58, "grad_norm": 1.1989838823723706, "learning_rate": 3.8948760664231194e-07, "loss": 0.2578, "step": 9155 }, { "epoch": 0.58, "grad_norm": 0.38854518237118085, "learning_rate": 3.893868892310401e-07, "loss": 0.0291, "step": 9156 }, { "epoch": 0.58, "grad_norm": 0.6847271574043243, "learning_rate": 3.8928617653874616e-07, "loss": 0.3524, "step": 9157 }, { "epoch": 0.58, "grad_norm": 0.4334590327411633, "learning_rate": 3.891854685697263e-07, "loss": 0.0711, "step": 9158 }, { "epoch": 0.58, "grad_norm": 0.411262064489176, "learning_rate": 3.8908476532827727e-07, "loss": 0.2449, "step": 9159 }, { "epoch": 0.58, "grad_norm": 0.3337348207732892, "learning_rate": 3.889840668186949e-07, "loss": 0.1965, "step": 9160 }, { "epoch": 0.58, "grad_norm": 1.129793996447344, "learning_rate": 3.8888337304527564e-07, "loss": 0.3591, "step": 9161 }, { "epoch": 0.58, "grad_norm": 1.4164599484664437, "learning_rate": 3.8878268401231487e-07, "loss": 0.1566, "step": 9162 }, { "epoch": 0.58, "grad_norm": 1.1135343748933653, "learning_rate": 3.8868199972410846e-07, "loss": 0.186, "step": 9163 }, { "epoch": 0.58, "grad_norm": 0.9535641093883805, "learning_rate": 3.8858132018495184e-07, "loss": 0.2839, "step": 9164 }, { "epoch": 0.58, "grad_norm": 0.4248583334239953, "learning_rate": 3.884806453991398e-07, "loss": 0.1362, "step": 9165 }, { "epoch": 0.58, "grad_norm": 0.49284576972669875, "learning_rate": 3.883799753709679e-07, "loss": 0.3284, "step": 9166 }, { "epoch": 0.58, "grad_norm": 1.028064848108264, "learning_rate": 3.882793101047304e-07, "loss": 0.2369, "step": 9167 }, { "epoch": 0.58, "grad_norm": 0.2460391072109868, "learning_rate": 3.8817864960472233e-07, "loss": 0.009, "step": 9168 }, { "epoch": 0.58, "grad_norm": 0.061349969952489876, "learning_rate": 3.8807799387523765e-07, "loss": 0.001, "step": 9169 }, { "epoch": 0.58, "grad_norm": 0.5311125442366161, "learning_rate": 3.8797734292057087e-07, "loss": 0.0622, "step": 9170 }, { "epoch": 0.58, "grad_norm": 0.8721603956515364, "learning_rate": 3.8787669674501575e-07, "loss": 0.1645, "step": 9171 }, { "epoch": 0.58, "grad_norm": 1.1911385618025718, "learning_rate": 3.877760553528663e-07, "loss": 0.2388, "step": 9172 }, { "epoch": 0.58, "grad_norm": 0.44380367715135, "learning_rate": 3.876754187484157e-07, "loss": 0.1058, "step": 9173 }, { "epoch": 0.59, "grad_norm": 1.0451666161687792, "learning_rate": 3.8757478693595777e-07, "loss": 0.3121, "step": 9174 }, { "epoch": 0.59, "grad_norm": 0.14264998394508208, "learning_rate": 3.874741599197852e-07, "loss": 0.0794, "step": 9175 }, { "epoch": 0.59, "grad_norm": 1.6246629504104158, "learning_rate": 3.873735377041914e-07, "loss": 0.085, "step": 9176 }, { "epoch": 0.59, "grad_norm": 0.5304588390550291, "learning_rate": 3.872729202934689e-07, "loss": 0.1211, "step": 9177 }, { "epoch": 0.59, "grad_norm": 1.2139916211796347, "learning_rate": 3.8717230769191003e-07, "loss": 0.1638, "step": 9178 }, { "epoch": 0.59, "grad_norm": 1.4945753870794083, "learning_rate": 3.870716999038074e-07, "loss": 0.4496, "step": 9179 }, { "epoch": 0.59, "grad_norm": 0.4775768718401756, "learning_rate": 3.869710969334531e-07, "loss": 0.1229, "step": 9180 }, { "epoch": 0.59, "grad_norm": 0.7109870809856619, "learning_rate": 3.86870498785139e-07, "loss": 0.2483, "step": 9181 }, { "epoch": 0.59, "grad_norm": 0.9287785944134873, "learning_rate": 3.8676990546315676e-07, "loss": 0.1111, "step": 9182 }, { "epoch": 0.59, "grad_norm": 0.7729595500918812, "learning_rate": 3.8666931697179816e-07, "loss": 0.2111, "step": 9183 }, { "epoch": 0.59, "grad_norm": 0.6794761483883275, "learning_rate": 3.8656873331535413e-07, "loss": 0.2492, "step": 9184 }, { "epoch": 0.59, "grad_norm": 0.4381350380622578, "learning_rate": 3.8646815449811617e-07, "loss": 0.0783, "step": 9185 }, { "epoch": 0.59, "grad_norm": 0.7720450624517092, "learning_rate": 3.863675805243747e-07, "loss": 0.1668, "step": 9186 }, { "epoch": 0.59, "grad_norm": 0.4921296405996789, "learning_rate": 3.8626701139842094e-07, "loss": 0.217, "step": 9187 }, { "epoch": 0.59, "grad_norm": 0.8177338474651337, "learning_rate": 3.8616644712454486e-07, "loss": 0.1114, "step": 9188 }, { "epoch": 0.59, "grad_norm": 5.166284876492476, "learning_rate": 3.860658877070371e-07, "loss": 0.1488, "step": 9189 }, { "epoch": 0.59, "grad_norm": 12.143827540947076, "learning_rate": 3.8596533315018775e-07, "loss": 0.0627, "step": 9190 }, { "epoch": 0.59, "grad_norm": 0.8860199853316247, "learning_rate": 3.858647834582863e-07, "loss": 0.2058, "step": 9191 }, { "epoch": 0.59, "grad_norm": 0.8097965018820006, "learning_rate": 3.857642386356228e-07, "loss": 0.3329, "step": 9192 }, { "epoch": 0.59, "grad_norm": 4.070774336932143, "learning_rate": 3.8566369868648637e-07, "loss": 0.1568, "step": 9193 }, { "epoch": 0.59, "grad_norm": 1.2699900825217016, "learning_rate": 3.855631636151666e-07, "loss": 0.2194, "step": 9194 }, { "epoch": 0.59, "grad_norm": 3.6469029270648288, "learning_rate": 3.854626334259521e-07, "loss": 0.2912, "step": 9195 }, { "epoch": 0.59, "grad_norm": 1.5244764993476563, "learning_rate": 3.853621081231321e-07, "loss": 0.1268, "step": 9196 }, { "epoch": 0.59, "grad_norm": 0.9962098492424587, "learning_rate": 3.852615877109948e-07, "loss": 0.2871, "step": 9197 }, { "epoch": 0.59, "grad_norm": 4.7032776124392495, "learning_rate": 3.851610721938289e-07, "loss": 0.315, "step": 9198 }, { "epoch": 0.59, "grad_norm": 0.8505050123015702, "learning_rate": 3.850605615759225e-07, "loss": 0.0576, "step": 9199 }, { "epoch": 0.59, "grad_norm": 2.707335169164966, "learning_rate": 3.849600558615637e-07, "loss": 0.1038, "step": 9200 }, { "epoch": 0.59, "grad_norm": 1.0816445579365792, "learning_rate": 3.8485955505504004e-07, "loss": 0.075, "step": 9201 }, { "epoch": 0.59, "grad_norm": 0.1298037125380591, "learning_rate": 3.8475905916063947e-07, "loss": 0.0017, "step": 9202 }, { "epoch": 0.59, "grad_norm": 1.2499967898855564, "learning_rate": 3.8465856818264873e-07, "loss": 0.0452, "step": 9203 }, { "epoch": 0.59, "grad_norm": 0.7829148141341262, "learning_rate": 3.8455808212535566e-07, "loss": 0.2444, "step": 9204 }, { "epoch": 0.59, "grad_norm": 0.45011960836309023, "learning_rate": 3.844576009930468e-07, "loss": 0.264, "step": 9205 }, { "epoch": 0.59, "grad_norm": 2.1994074093175575, "learning_rate": 3.843571247900089e-07, "loss": 0.0186, "step": 9206 }, { "epoch": 0.59, "grad_norm": 1.3517055045129158, "learning_rate": 3.8425665352052854e-07, "loss": 0.4352, "step": 9207 }, { "epoch": 0.59, "grad_norm": 0.7661274484486612, "learning_rate": 3.841561871888919e-07, "loss": 0.1375, "step": 9208 }, { "epoch": 0.59, "grad_norm": 0.7206736025970967, "learning_rate": 3.8405572579938545e-07, "loss": 0.1006, "step": 9209 }, { "epoch": 0.59, "grad_norm": 1.4045134212470072, "learning_rate": 3.839552693562945e-07, "loss": 0.1919, "step": 9210 }, { "epoch": 0.59, "grad_norm": 1.2670005349429003, "learning_rate": 3.838548178639054e-07, "loss": 0.191, "step": 9211 }, { "epoch": 0.59, "grad_norm": 1.3860446118169985, "learning_rate": 3.837543713265029e-07, "loss": 0.1544, "step": 9212 }, { "epoch": 0.59, "grad_norm": 0.6092347251192362, "learning_rate": 3.8365392974837286e-07, "loss": 0.0883, "step": 9213 }, { "epoch": 0.59, "grad_norm": 0.7268909521142994, "learning_rate": 3.8355349313379985e-07, "loss": 0.507, "step": 9214 }, { "epoch": 0.59, "grad_norm": 0.6860064248322543, "learning_rate": 3.8345306148706903e-07, "loss": 0.2763, "step": 9215 }, { "epoch": 0.59, "grad_norm": 1.5713052815692077, "learning_rate": 3.833526348124649e-07, "loss": 0.4124, "step": 9216 }, { "epoch": 0.59, "grad_norm": 0.7284454410432929, "learning_rate": 3.8325221311427187e-07, "loss": 0.3609, "step": 9217 }, { "epoch": 0.59, "grad_norm": 0.5045497683987311, "learning_rate": 3.831517963967742e-07, "loss": 0.1256, "step": 9218 }, { "epoch": 0.59, "grad_norm": 0.7560206150188047, "learning_rate": 3.830513846642556e-07, "loss": 0.3329, "step": 9219 }, { "epoch": 0.59, "grad_norm": 1.5132619562727003, "learning_rate": 3.829509779210002e-07, "loss": 0.2106, "step": 9220 }, { "epoch": 0.59, "grad_norm": 0.6174686597443946, "learning_rate": 3.828505761712912e-07, "loss": 0.2803, "step": 9221 }, { "epoch": 0.59, "grad_norm": 1.8508200040860678, "learning_rate": 3.827501794194123e-07, "loss": 0.3196, "step": 9222 }, { "epoch": 0.59, "grad_norm": 2.1476513717416568, "learning_rate": 3.8264978766964627e-07, "loss": 0.1843, "step": 9223 }, { "epoch": 0.59, "grad_norm": 0.49750688205076526, "learning_rate": 3.825494009262763e-07, "loss": 0.1874, "step": 9224 }, { "epoch": 0.59, "grad_norm": 0.6492510892939429, "learning_rate": 3.8244901919358486e-07, "loss": 0.354, "step": 9225 }, { "epoch": 0.59, "grad_norm": 0.6144163882567673, "learning_rate": 3.823486424758548e-07, "loss": 0.2499, "step": 9226 }, { "epoch": 0.59, "grad_norm": 1.003569649754277, "learning_rate": 3.82248270777368e-07, "loss": 0.2284, "step": 9227 }, { "epoch": 0.59, "grad_norm": 1.2232116338891916, "learning_rate": 3.8214790410240685e-07, "loss": 0.2695, "step": 9228 }, { "epoch": 0.59, "grad_norm": 0.5615557423366042, "learning_rate": 3.820475424552527e-07, "loss": 0.2816, "step": 9229 }, { "epoch": 0.59, "grad_norm": 0.5172325479600991, "learning_rate": 3.8194718584018787e-07, "loss": 0.2323, "step": 9230 }, { "epoch": 0.59, "grad_norm": 1.339617527615744, "learning_rate": 3.8184683426149315e-07, "loss": 0.2372, "step": 9231 }, { "epoch": 0.59, "grad_norm": 8.635007534459703, "learning_rate": 3.817464877234502e-07, "loss": 0.2388, "step": 9232 }, { "epoch": 0.59, "grad_norm": 1.1168657342703339, "learning_rate": 3.816461462303397e-07, "loss": 0.1081, "step": 9233 }, { "epoch": 0.59, "grad_norm": 0.8265448121999057, "learning_rate": 3.815458097864424e-07, "loss": 0.3288, "step": 9234 }, { "epoch": 0.59, "grad_norm": 1.4160038277515097, "learning_rate": 3.8144547839603923e-07, "loss": 0.0752, "step": 9235 }, { "epoch": 0.59, "grad_norm": 1.2909499803288882, "learning_rate": 3.8134515206341005e-07, "loss": 0.0294, "step": 9236 }, { "epoch": 0.59, "grad_norm": 0.9679006141827274, "learning_rate": 3.8124483079283543e-07, "loss": 0.2623, "step": 9237 }, { "epoch": 0.59, "grad_norm": 1.1028104956836575, "learning_rate": 3.8114451458859483e-07, "loss": 0.2275, "step": 9238 }, { "epoch": 0.59, "grad_norm": 0.22196268196621946, "learning_rate": 3.8104420345496837e-07, "loss": 0.1058, "step": 9239 }, { "epoch": 0.59, "grad_norm": 1.993825894678243, "learning_rate": 3.809438973962351e-07, "loss": 0.2151, "step": 9240 }, { "epoch": 0.59, "grad_norm": 1.0319915627413767, "learning_rate": 3.8084359641667476e-07, "loss": 0.076, "step": 9241 }, { "epoch": 0.59, "grad_norm": 0.07997144953621751, "learning_rate": 3.8074330052056587e-07, "loss": 0.0012, "step": 9242 }, { "epoch": 0.59, "grad_norm": 0.7991306537992611, "learning_rate": 3.806430097121876e-07, "loss": 0.2895, "step": 9243 }, { "epoch": 0.59, "grad_norm": 0.8036803252125937, "learning_rate": 3.8054272399581844e-07, "loss": 0.1936, "step": 9244 }, { "epoch": 0.59, "grad_norm": 1.8102653795342827, "learning_rate": 3.8044244337573694e-07, "loss": 0.2513, "step": 9245 }, { "epoch": 0.59, "grad_norm": 0.8675800393996959, "learning_rate": 3.8034216785622125e-07, "loss": 0.0464, "step": 9246 }, { "epoch": 0.59, "grad_norm": 1.2588834721619737, "learning_rate": 3.802418974415489e-07, "loss": 0.3701, "step": 9247 }, { "epoch": 0.59, "grad_norm": 1.0249739483080074, "learning_rate": 3.8014163213599824e-07, "loss": 0.0737, "step": 9248 }, { "epoch": 0.59, "grad_norm": 14.622398411848904, "learning_rate": 3.800413719438463e-07, "loss": 0.3292, "step": 9249 }, { "epoch": 0.59, "grad_norm": 0.40679487772653566, "learning_rate": 3.7994111686937066e-07, "loss": 0.2561, "step": 9250 }, { "epoch": 0.59, "grad_norm": 0.2984174953844867, "learning_rate": 3.798408669168483e-07, "loss": 0.0028, "step": 9251 }, { "epoch": 0.59, "grad_norm": 0.4858604040673141, "learning_rate": 3.7974062209055615e-07, "loss": 0.1167, "step": 9252 }, { "epoch": 0.59, "grad_norm": 1.563922569166749, "learning_rate": 3.7964038239477065e-07, "loss": 0.3193, "step": 9253 }, { "epoch": 0.59, "grad_norm": 0.650429162744675, "learning_rate": 3.7954014783376865e-07, "loss": 0.1074, "step": 9254 }, { "epoch": 0.59, "grad_norm": 9.61252308008292, "learning_rate": 3.794399184118258e-07, "loss": 0.1816, "step": 9255 }, { "epoch": 0.59, "grad_norm": 0.6142621497538089, "learning_rate": 3.793396941332187e-07, "loss": 0.0874, "step": 9256 }, { "epoch": 0.59, "grad_norm": 1.2884583985225477, "learning_rate": 3.7923947500222256e-07, "loss": 0.1975, "step": 9257 }, { "epoch": 0.59, "grad_norm": 11.110770340974891, "learning_rate": 3.7913926102311337e-07, "loss": 0.1255, "step": 9258 }, { "epoch": 0.59, "grad_norm": 0.3867503123187839, "learning_rate": 3.790390522001662e-07, "loss": 0.08, "step": 9259 }, { "epoch": 0.59, "grad_norm": 1.3152861298882226, "learning_rate": 3.789388485376561e-07, "loss": 0.1842, "step": 9260 }, { "epoch": 0.59, "grad_norm": 0.6016674065452318, "learning_rate": 3.7883865003985824e-07, "loss": 0.3036, "step": 9261 }, { "epoch": 0.59, "grad_norm": 1.0199353647149971, "learning_rate": 3.78738456711047e-07, "loss": 0.1298, "step": 9262 }, { "epoch": 0.59, "grad_norm": 1.5743923291555721, "learning_rate": 3.786382685554972e-07, "loss": 0.2609, "step": 9263 }, { "epoch": 0.59, "grad_norm": 1.1420267270789695, "learning_rate": 3.785380855774826e-07, "loss": 0.2284, "step": 9264 }, { "epoch": 0.59, "grad_norm": 0.3823484926973197, "learning_rate": 3.784379077812776e-07, "loss": 0.1094, "step": 9265 }, { "epoch": 0.59, "grad_norm": 0.6768785129797791, "learning_rate": 3.783377351711556e-07, "loss": 0.4861, "step": 9266 }, { "epoch": 0.59, "grad_norm": 0.493585718026856, "learning_rate": 3.7823756775139064e-07, "loss": 0.1025, "step": 9267 }, { "epoch": 0.59, "grad_norm": 0.5133365280139011, "learning_rate": 3.781374055262556e-07, "loss": 0.1072, "step": 9268 }, { "epoch": 0.59, "grad_norm": 0.6378779444642336, "learning_rate": 3.7803724850002386e-07, "loss": 0.3225, "step": 9269 }, { "epoch": 0.59, "grad_norm": 1.6305097473210857, "learning_rate": 3.779370966769683e-07, "loss": 0.0843, "step": 9270 }, { "epoch": 0.59, "grad_norm": 0.7640534442975933, "learning_rate": 3.7783695006136166e-07, "loss": 0.3095, "step": 9271 }, { "epoch": 0.59, "grad_norm": 0.7972338856128529, "learning_rate": 3.7773680865747614e-07, "loss": 0.0946, "step": 9272 }, { "epoch": 0.59, "grad_norm": 0.3971028010300925, "learning_rate": 3.776366724695844e-07, "loss": 0.3012, "step": 9273 }, { "epoch": 0.59, "grad_norm": 2.669258071981314, "learning_rate": 3.775365415019581e-07, "loss": 0.1948, "step": 9274 }, { "epoch": 0.59, "grad_norm": 0.8696804014722496, "learning_rate": 3.77436415758869e-07, "loss": 0.0936, "step": 9275 }, { "epoch": 0.59, "grad_norm": 2.9205455098351596, "learning_rate": 3.773362952445889e-07, "loss": 0.1226, "step": 9276 }, { "epoch": 0.59, "grad_norm": 0.7484490139593507, "learning_rate": 3.77236179963389e-07, "loss": 0.2307, "step": 9277 }, { "epoch": 0.59, "grad_norm": 0.7675191985092316, "learning_rate": 3.771360699195404e-07, "loss": 0.088, "step": 9278 }, { "epoch": 0.59, "grad_norm": 0.48411160950391235, "learning_rate": 3.770359651173139e-07, "loss": 0.1327, "step": 9279 }, { "epoch": 0.59, "grad_norm": 2.966301264369236, "learning_rate": 3.769358655609807e-07, "loss": 0.1546, "step": 9280 }, { "epoch": 0.59, "grad_norm": 0.6779362212885075, "learning_rate": 3.7683577125481053e-07, "loss": 0.2624, "step": 9281 }, { "epoch": 0.59, "grad_norm": 0.7683336809436724, "learning_rate": 3.7673568220307417e-07, "loss": 0.0865, "step": 9282 }, { "epoch": 0.59, "grad_norm": 0.36820261982633506, "learning_rate": 3.766355984100412e-07, "loss": 0.0808, "step": 9283 }, { "epoch": 0.59, "grad_norm": 0.6542238951265417, "learning_rate": 3.7653551987998185e-07, "loss": 0.0406, "step": 9284 }, { "epoch": 0.59, "grad_norm": 1.2466414346265247, "learning_rate": 3.7643544661716514e-07, "loss": 0.1965, "step": 9285 }, { "epoch": 0.59, "grad_norm": 0.9634669945912673, "learning_rate": 3.7633537862586085e-07, "loss": 0.5376, "step": 9286 }, { "epoch": 0.59, "grad_norm": 10.43694322912159, "learning_rate": 3.7623531591033795e-07, "loss": 0.1466, "step": 9287 }, { "epoch": 0.59, "grad_norm": 4.7176798163861555, "learning_rate": 3.76135258474865e-07, "loss": 0.1206, "step": 9288 }, { "epoch": 0.59, "grad_norm": 3.3216627653953292, "learning_rate": 3.7603520632371127e-07, "loss": 0.0452, "step": 9289 }, { "epoch": 0.59, "grad_norm": 20.910602243800536, "learning_rate": 3.7593515946114457e-07, "loss": 0.2689, "step": 9290 }, { "epoch": 0.59, "grad_norm": 1.3322446981226637, "learning_rate": 3.758351178914336e-07, "loss": 0.2586, "step": 9291 }, { "epoch": 0.59, "grad_norm": 1.564679148620845, "learning_rate": 3.7573508161884587e-07, "loss": 0.0327, "step": 9292 }, { "epoch": 0.59, "grad_norm": 0.7216668245555732, "learning_rate": 3.7563505064764953e-07, "loss": 0.2728, "step": 9293 }, { "epoch": 0.59, "grad_norm": 0.9364007422457261, "learning_rate": 3.755350249821118e-07, "loss": 0.2889, "step": 9294 }, { "epoch": 0.59, "grad_norm": 0.5974168987425299, "learning_rate": 3.754350046265002e-07, "loss": 0.1377, "step": 9295 }, { "epoch": 0.59, "grad_norm": 0.6638751878720911, "learning_rate": 3.753349895850817e-07, "loss": 0.2111, "step": 9296 }, { "epoch": 0.59, "grad_norm": 0.4677716160403327, "learning_rate": 3.7523497986212317e-07, "loss": 0.0553, "step": 9297 }, { "epoch": 0.59, "grad_norm": 1.1163588469681387, "learning_rate": 3.7513497546189103e-07, "loss": 0.1391, "step": 9298 }, { "epoch": 0.59, "grad_norm": 1.3841680899630626, "learning_rate": 3.750349763886521e-07, "loss": 0.4298, "step": 9299 }, { "epoch": 0.59, "grad_norm": 2.948411230163331, "learning_rate": 3.7493498264667234e-07, "loss": 0.0064, "step": 9300 }, { "epoch": 0.59, "grad_norm": 0.6125477209147805, "learning_rate": 3.7483499424021737e-07, "loss": 0.1465, "step": 9301 }, { "epoch": 0.59, "grad_norm": 0.9717487418475622, "learning_rate": 3.747350111735533e-07, "loss": 0.2071, "step": 9302 }, { "epoch": 0.59, "grad_norm": 0.6911711802058582, "learning_rate": 3.7463503345094537e-07, "loss": 0.246, "step": 9303 }, { "epoch": 0.59, "grad_norm": 0.6588841672053187, "learning_rate": 3.74535061076659e-07, "loss": 0.0713, "step": 9304 }, { "epoch": 0.59, "grad_norm": 1.1389535575238425, "learning_rate": 3.7443509405495905e-07, "loss": 0.1852, "step": 9305 }, { "epoch": 0.59, "grad_norm": 0.6035440069910316, "learning_rate": 3.7433513239011063e-07, "loss": 0.2771, "step": 9306 }, { "epoch": 0.59, "grad_norm": 0.8692971748620172, "learning_rate": 3.742351760863778e-07, "loss": 0.2165, "step": 9307 }, { "epoch": 0.59, "grad_norm": 2.8784110511688423, "learning_rate": 3.7413522514802544e-07, "loss": 0.1849, "step": 9308 }, { "epoch": 0.59, "grad_norm": 1.0342959203137772, "learning_rate": 3.7403527957931713e-07, "loss": 0.1187, "step": 9309 }, { "epoch": 0.59, "grad_norm": 0.35069950752885065, "learning_rate": 3.7393533938451733e-07, "loss": 0.0126, "step": 9310 }, { "epoch": 0.59, "grad_norm": 0.8453374061680935, "learning_rate": 3.738354045678891e-07, "loss": 0.2379, "step": 9311 }, { "epoch": 0.59, "grad_norm": 7.790934286897221, "learning_rate": 3.737354751336963e-07, "loss": 0.195, "step": 9312 }, { "epoch": 0.59, "grad_norm": 3.04097460610923, "learning_rate": 3.736355510862018e-07, "loss": 0.0642, "step": 9313 }, { "epoch": 0.59, "grad_norm": 1.9689038288395968, "learning_rate": 3.7353563242966877e-07, "loss": 0.1235, "step": 9314 }, { "epoch": 0.59, "grad_norm": 0.9070018954853412, "learning_rate": 3.7343571916836005e-07, "loss": 0.1238, "step": 9315 }, { "epoch": 0.59, "grad_norm": 0.5193318612328358, "learning_rate": 3.733358113065378e-07, "loss": 0.1348, "step": 9316 }, { "epoch": 0.59, "grad_norm": 6.589835579930599, "learning_rate": 3.7323590884846454e-07, "loss": 0.2745, "step": 9317 }, { "epoch": 0.59, "grad_norm": 0.8207545384687512, "learning_rate": 3.7313601179840214e-07, "loss": 0.1382, "step": 9318 }, { "epoch": 0.59, "grad_norm": 1.7543516932605283, "learning_rate": 3.730361201606127e-07, "loss": 0.2343, "step": 9319 }, { "epoch": 0.59, "grad_norm": 2.319509783700083, "learning_rate": 3.7293623393935736e-07, "loss": 0.2488, "step": 9320 }, { "epoch": 0.59, "grad_norm": 0.9649618460249436, "learning_rate": 3.7283635313889784e-07, "loss": 0.2595, "step": 9321 }, { "epoch": 0.59, "grad_norm": 1.8895959725612803, "learning_rate": 3.7273647776349514e-07, "loss": 0.1304, "step": 9322 }, { "epoch": 0.59, "grad_norm": 0.6724856053530631, "learning_rate": 3.726366078174101e-07, "loss": 0.2315, "step": 9323 }, { "epoch": 0.59, "grad_norm": 0.494764728635633, "learning_rate": 3.7253674330490324e-07, "loss": 0.1345, "step": 9324 }, { "epoch": 0.59, "grad_norm": 1.1241400815157059, "learning_rate": 3.7243688423023546e-07, "loss": 0.3665, "step": 9325 }, { "epoch": 0.59, "grad_norm": 0.7634027749162353, "learning_rate": 3.7233703059766643e-07, "loss": 0.1472, "step": 9326 }, { "epoch": 0.59, "grad_norm": 0.35743247768133973, "learning_rate": 3.722371824114564e-07, "loss": 0.0067, "step": 9327 }, { "epoch": 0.59, "grad_norm": 0.19256407345676066, "learning_rate": 3.7213733967586514e-07, "loss": 0.1221, "step": 9328 }, { "epoch": 0.59, "grad_norm": 1.8973184367805298, "learning_rate": 3.720375023951517e-07, "loss": 0.1271, "step": 9329 }, { "epoch": 0.59, "grad_norm": 2.8442009928931453, "learning_rate": 3.7193767057357577e-07, "loss": 0.1489, "step": 9330 }, { "epoch": 0.6, "grad_norm": 0.1138760101326204, "learning_rate": 3.718378442153962e-07, "loss": 0.0028, "step": 9331 }, { "epoch": 0.6, "grad_norm": 0.8201737035455592, "learning_rate": 3.7173802332487196e-07, "loss": 0.1708, "step": 9332 }, { "epoch": 0.6, "grad_norm": 0.4636916227535591, "learning_rate": 3.716382079062613e-07, "loss": 0.1483, "step": 9333 }, { "epoch": 0.6, "grad_norm": 0.8163774505063767, "learning_rate": 3.71538397963823e-07, "loss": 0.352, "step": 9334 }, { "epoch": 0.6, "grad_norm": 0.40018528842627643, "learning_rate": 3.7143859350181464e-07, "loss": 0.2831, "step": 9335 }, { "epoch": 0.6, "grad_norm": 1.785518886776523, "learning_rate": 3.7133879452449446e-07, "loss": 0.4279, "step": 9336 }, { "epoch": 0.6, "grad_norm": 0.7049248450393616, "learning_rate": 3.712390010361198e-07, "loss": 0.1501, "step": 9337 }, { "epoch": 0.6, "grad_norm": 0.7123666464227902, "learning_rate": 3.711392130409484e-07, "loss": 0.2971, "step": 9338 }, { "epoch": 0.6, "grad_norm": 2.368363434577095, "learning_rate": 3.710394305432371e-07, "loss": 0.2618, "step": 9339 }, { "epoch": 0.6, "grad_norm": 0.8423457662773516, "learning_rate": 3.7093965354724286e-07, "loss": 0.1768, "step": 9340 }, { "epoch": 0.6, "grad_norm": 1.1982493547049378, "learning_rate": 3.708398820572225e-07, "loss": 0.0443, "step": 9341 }, { "epoch": 0.6, "grad_norm": 0.8572009384772475, "learning_rate": 3.707401160774325e-07, "loss": 0.074, "step": 9342 }, { "epoch": 0.6, "grad_norm": 0.715077017393968, "learning_rate": 3.70640355612129e-07, "loss": 0.1425, "step": 9343 }, { "epoch": 0.6, "grad_norm": 0.8019019532902004, "learning_rate": 3.7054060066556793e-07, "loss": 0.2665, "step": 9344 }, { "epoch": 0.6, "grad_norm": 0.791625294891656, "learning_rate": 3.7044085124200514e-07, "loss": 0.087, "step": 9345 }, { "epoch": 0.6, "grad_norm": 8.747912087089999, "learning_rate": 3.703411073456959e-07, "loss": 0.3365, "step": 9346 }, { "epoch": 0.6, "grad_norm": 0.6407920231572916, "learning_rate": 3.7024136898089576e-07, "loss": 0.3517, "step": 9347 }, { "epoch": 0.6, "grad_norm": 15.630045302440479, "learning_rate": 3.701416361518597e-07, "loss": 0.0223, "step": 9348 }, { "epoch": 0.6, "grad_norm": 1.3720280989672002, "learning_rate": 3.700419088628425e-07, "loss": 0.3173, "step": 9349 }, { "epoch": 0.6, "grad_norm": 0.2971003551245055, "learning_rate": 3.6994218711809856e-07, "loss": 0.097, "step": 9350 }, { "epoch": 0.6, "grad_norm": 0.48855604220375287, "learning_rate": 3.698424709218826e-07, "loss": 0.2794, "step": 9351 }, { "epoch": 0.6, "grad_norm": 1.434706071754007, "learning_rate": 3.697427602784483e-07, "loss": 0.2833, "step": 9352 }, { "epoch": 0.6, "grad_norm": 0.7130226436187235, "learning_rate": 3.696430551920499e-07, "loss": 0.1784, "step": 9353 }, { "epoch": 0.6, "grad_norm": 0.6419149544861212, "learning_rate": 3.6954335566694057e-07, "loss": 0.2665, "step": 9354 }, { "epoch": 0.6, "grad_norm": 1.7860592936408222, "learning_rate": 3.6944366170737415e-07, "loss": 0.1419, "step": 9355 }, { "epoch": 0.6, "grad_norm": 0.3870030044112588, "learning_rate": 3.693439733176035e-07, "loss": 0.1992, "step": 9356 }, { "epoch": 0.6, "grad_norm": 0.4643972104686778, "learning_rate": 3.6924429050188157e-07, "loss": 0.1586, "step": 9357 }, { "epoch": 0.6, "grad_norm": 3.778282250295543, "learning_rate": 3.6914461326446104e-07, "loss": 0.1099, "step": 9358 }, { "epoch": 0.6, "grad_norm": 1.0465978772358504, "learning_rate": 3.690449416095943e-07, "loss": 0.1571, "step": 9359 }, { "epoch": 0.6, "grad_norm": 1.5731659928726092, "learning_rate": 3.6894527554153383e-07, "loss": 0.3195, "step": 9360 }, { "epoch": 0.6, "grad_norm": 2.2875317520675376, "learning_rate": 3.688456150645311e-07, "loss": 0.2305, "step": 9361 }, { "epoch": 0.6, "grad_norm": 1.029337424805925, "learning_rate": 3.6874596018283833e-07, "loss": 0.088, "step": 9362 }, { "epoch": 0.6, "grad_norm": 0.8809815467086525, "learning_rate": 3.6864631090070653e-07, "loss": 0.3054, "step": 9363 }, { "epoch": 0.6, "grad_norm": 0.47611715679060396, "learning_rate": 3.6854666722238737e-07, "loss": 0.1209, "step": 9364 }, { "epoch": 0.6, "grad_norm": 0.6557302681228317, "learning_rate": 3.684470291521314e-07, "loss": 0.0543, "step": 9365 }, { "epoch": 0.6, "grad_norm": 0.31106441265079243, "learning_rate": 3.683473966941898e-07, "loss": 0.0051, "step": 9366 }, { "epoch": 0.6, "grad_norm": 2.020456985395643, "learning_rate": 3.682477698528128e-07, "loss": 0.1233, "step": 9367 }, { "epoch": 0.6, "grad_norm": 0.6984927839984826, "learning_rate": 3.6814814863225083e-07, "loss": 0.0894, "step": 9368 }, { "epoch": 0.6, "grad_norm": 1.0236622478125843, "learning_rate": 3.6804853303675407e-07, "loss": 0.0866, "step": 9369 }, { "epoch": 0.6, "grad_norm": 2.9520519063215946, "learning_rate": 3.679489230705719e-07, "loss": 0.1956, "step": 9370 }, { "epoch": 0.6, "grad_norm": 0.9785352268345708, "learning_rate": 3.6784931873795434e-07, "loss": 0.2661, "step": 9371 }, { "epoch": 0.6, "grad_norm": 0.5489439806777325, "learning_rate": 3.677497200431503e-07, "loss": 0.2252, "step": 9372 }, { "epoch": 0.6, "grad_norm": 1.1660682395226845, "learning_rate": 3.676501269904093e-07, "loss": 0.2097, "step": 9373 }, { "epoch": 0.6, "grad_norm": 0.9117422336753886, "learning_rate": 3.6755053958397964e-07, "loss": 0.1649, "step": 9374 }, { "epoch": 0.6, "grad_norm": 1.3174871712267413, "learning_rate": 3.6745095782811034e-07, "loss": 0.1165, "step": 9375 }, { "epoch": 0.6, "grad_norm": 1.1737490207513126, "learning_rate": 3.673513817270496e-07, "loss": 0.3228, "step": 9376 }, { "epoch": 0.6, "grad_norm": 0.3779584403245971, "learning_rate": 3.6725181128504566e-07, "loss": 0.0662, "step": 9377 }, { "epoch": 0.6, "grad_norm": 0.9040176754693658, "learning_rate": 3.6715224650634623e-07, "loss": 0.3983, "step": 9378 }, { "epoch": 0.6, "grad_norm": 2.0949680380558715, "learning_rate": 3.6705268739519916e-07, "loss": 0.2969, "step": 9379 }, { "epoch": 0.6, "grad_norm": 0.5585126973068211, "learning_rate": 3.669531339558515e-07, "loss": 0.0869, "step": 9380 }, { "epoch": 0.6, "grad_norm": 3.8409218650921324, "learning_rate": 3.668535861925509e-07, "loss": 0.2927, "step": 9381 }, { "epoch": 0.6, "grad_norm": 8.401990657290293, "learning_rate": 3.667540441095436e-07, "loss": 0.0338, "step": 9382 }, { "epoch": 0.6, "grad_norm": 0.27809309699568374, "learning_rate": 3.6665450771107697e-07, "loss": 0.1003, "step": 9383 }, { "epoch": 0.6, "grad_norm": 0.7875044896588642, "learning_rate": 3.6655497700139693e-07, "loss": 0.1111, "step": 9384 }, { "epoch": 0.6, "grad_norm": 1.3888928888215062, "learning_rate": 3.6645545198474973e-07, "loss": 0.1958, "step": 9385 }, { "epoch": 0.6, "grad_norm": 0.46544895086304044, "learning_rate": 3.6635593266538167e-07, "loss": 0.2682, "step": 9386 }, { "epoch": 0.6, "grad_norm": 0.5980123759044917, "learning_rate": 3.662564190475379e-07, "loss": 0.1744, "step": 9387 }, { "epoch": 0.6, "grad_norm": 1.738477076975794, "learning_rate": 3.6615691113546445e-07, "loss": 0.0572, "step": 9388 }, { "epoch": 0.6, "grad_norm": 0.4997082116318408, "learning_rate": 3.6605740893340596e-07, "loss": 0.2915, "step": 9389 }, { "epoch": 0.6, "grad_norm": 1.0518297815423197, "learning_rate": 3.659579124456079e-07, "loss": 0.2468, "step": 9390 }, { "epoch": 0.6, "grad_norm": 0.8886369436167223, "learning_rate": 3.6585842167631453e-07, "loss": 0.3167, "step": 9391 }, { "epoch": 0.6, "grad_norm": 0.7653665718881084, "learning_rate": 3.6575893662977066e-07, "loss": 0.171, "step": 9392 }, { "epoch": 0.6, "grad_norm": 1.3841488873652306, "learning_rate": 3.6565945731022036e-07, "loss": 0.1029, "step": 9393 }, { "epoch": 0.6, "grad_norm": 0.8641759029425111, "learning_rate": 3.655599837219077e-07, "loss": 0.2416, "step": 9394 }, { "epoch": 0.6, "grad_norm": 0.9428812489335693, "learning_rate": 3.6546051586907635e-07, "loss": 0.1067, "step": 9395 }, { "epoch": 0.6, "grad_norm": 1.3502908802967608, "learning_rate": 3.6536105375596996e-07, "loss": 0.0645, "step": 9396 }, { "epoch": 0.6, "grad_norm": 0.8358373455391138, "learning_rate": 3.652615973868317e-07, "loss": 0.0722, "step": 9397 }, { "epoch": 0.6, "grad_norm": 0.6973503012289795, "learning_rate": 3.651621467659044e-07, "loss": 0.1672, "step": 9398 }, { "epoch": 0.6, "grad_norm": 0.926952821654789, "learning_rate": 3.6506270189743116e-07, "loss": 0.3482, "step": 9399 }, { "epoch": 0.6, "grad_norm": 1.0162155844066454, "learning_rate": 3.649632627856541e-07, "loss": 0.3422, "step": 9400 }, { "epoch": 0.6, "grad_norm": 1.170829378895211, "learning_rate": 3.648638294348158e-07, "loss": 0.0836, "step": 9401 }, { "epoch": 0.6, "grad_norm": 0.33666530053854105, "learning_rate": 3.647644018491581e-07, "loss": 0.1849, "step": 9402 }, { "epoch": 0.6, "grad_norm": 1.195738706653307, "learning_rate": 3.646649800329231e-07, "loss": 0.2266, "step": 9403 }, { "epoch": 0.6, "grad_norm": 2.7221616589187154, "learning_rate": 3.6456556399035186e-07, "loss": 0.0995, "step": 9404 }, { "epoch": 0.6, "grad_norm": 1.5300694572259874, "learning_rate": 3.6446615372568616e-07, "loss": 0.181, "step": 9405 }, { "epoch": 0.6, "grad_norm": 14.89805046405319, "learning_rate": 3.643667492431666e-07, "loss": 0.0775, "step": 9406 }, { "epoch": 0.6, "grad_norm": 0.38542962638776146, "learning_rate": 3.642673505470344e-07, "loss": 0.0659, "step": 9407 }, { "epoch": 0.6, "grad_norm": 0.6983717939905597, "learning_rate": 3.6416795764152964e-07, "loss": 0.2694, "step": 9408 }, { "epoch": 0.6, "grad_norm": 0.47082656016469765, "learning_rate": 3.64068570530893e-07, "loss": 0.1048, "step": 9409 }, { "epoch": 0.6, "grad_norm": 0.9109312430517122, "learning_rate": 3.639691892193644e-07, "loss": 0.1162, "step": 9410 }, { "epoch": 0.6, "grad_norm": 1.6899362271898326, "learning_rate": 3.638698137111835e-07, "loss": 0.1932, "step": 9411 }, { "epoch": 0.6, "grad_norm": 0.8027858842048811, "learning_rate": 3.637704440105902e-07, "loss": 0.1846, "step": 9412 }, { "epoch": 0.6, "grad_norm": 12.739760075312104, "learning_rate": 3.636710801218235e-07, "loss": 0.1182, "step": 9413 }, { "epoch": 0.6, "grad_norm": 0.5382520406612367, "learning_rate": 3.635717220491227e-07, "loss": 0.2819, "step": 9414 }, { "epoch": 0.6, "grad_norm": 1.0858281035483512, "learning_rate": 3.6347236979672626e-07, "loss": 0.3465, "step": 9415 }, { "epoch": 0.6, "grad_norm": 1.9059460894166045, "learning_rate": 3.633730233688732e-07, "loss": 0.1408, "step": 9416 }, { "epoch": 0.6, "grad_norm": 0.7336907181010045, "learning_rate": 3.6327368276980145e-07, "loss": 0.0576, "step": 9417 }, { "epoch": 0.6, "grad_norm": 1.1336513371755648, "learning_rate": 3.631743480037495e-07, "loss": 0.2492, "step": 9418 }, { "epoch": 0.6, "grad_norm": 0.4045115547659766, "learning_rate": 3.630750190749546e-07, "loss": 0.2621, "step": 9419 }, { "epoch": 0.6, "grad_norm": 0.44199810016468766, "learning_rate": 3.6297569598765475e-07, "loss": 0.0086, "step": 9420 }, { "epoch": 0.6, "grad_norm": 1.2029416627921536, "learning_rate": 3.6287637874608713e-07, "loss": 0.3133, "step": 9421 }, { "epoch": 0.6, "grad_norm": 1.1686022841441073, "learning_rate": 3.6277706735448903e-07, "loss": 0.2998, "step": 9422 }, { "epoch": 0.6, "grad_norm": 1.519242917613396, "learning_rate": 3.6267776181709684e-07, "loss": 0.3648, "step": 9423 }, { "epoch": 0.6, "grad_norm": 0.8015422825334999, "learning_rate": 3.6257846213814767e-07, "loss": 0.0705, "step": 9424 }, { "epoch": 0.6, "grad_norm": 1.5074650534128662, "learning_rate": 3.6247916832187755e-07, "loss": 0.0888, "step": 9425 }, { "epoch": 0.6, "grad_norm": 1.4409350840386246, "learning_rate": 3.6237988037252227e-07, "loss": 0.3621, "step": 9426 }, { "epoch": 0.6, "grad_norm": 8.588334078053489, "learning_rate": 3.622805982943181e-07, "loss": 0.0433, "step": 9427 }, { "epoch": 0.6, "grad_norm": 1.690879103346311, "learning_rate": 3.621813220915004e-07, "loss": 0.2864, "step": 9428 }, { "epoch": 0.6, "grad_norm": 3.641539238707806, "learning_rate": 3.620820517683046e-07, "loss": 0.094, "step": 9429 }, { "epoch": 0.6, "grad_norm": 1.49798299721738, "learning_rate": 3.619827873289657e-07, "loss": 0.3174, "step": 9430 }, { "epoch": 0.6, "grad_norm": 0.6268779811381258, "learning_rate": 3.6188352877771865e-07, "loss": 0.1791, "step": 9431 }, { "epoch": 0.6, "grad_norm": 0.4414730057712469, "learning_rate": 3.617842761187977e-07, "loss": 0.3484, "step": 9432 }, { "epoch": 0.6, "grad_norm": 5.086231279791825, "learning_rate": 3.6168502935643763e-07, "loss": 0.019, "step": 9433 }, { "epoch": 0.6, "grad_norm": 0.4089373888040658, "learning_rate": 3.61585788494872e-07, "loss": 0.0678, "step": 9434 }, { "epoch": 0.6, "grad_norm": 1.4732372969873673, "learning_rate": 3.614865535383351e-07, "loss": 0.1703, "step": 9435 }, { "epoch": 0.6, "grad_norm": 1.5186388228339889, "learning_rate": 3.613873244910601e-07, "loss": 0.2791, "step": 9436 }, { "epoch": 0.6, "grad_norm": 0.9842137879472515, "learning_rate": 3.612881013572806e-07, "loss": 0.1501, "step": 9437 }, { "epoch": 0.6, "grad_norm": 0.5935332181125628, "learning_rate": 3.611888841412296e-07, "loss": 0.2552, "step": 9438 }, { "epoch": 0.6, "grad_norm": 0.787286726230883, "learning_rate": 3.6108967284713966e-07, "loss": 0.193, "step": 9439 }, { "epoch": 0.6, "grad_norm": 1.4755215820683798, "learning_rate": 3.6099046747924373e-07, "loss": 0.1659, "step": 9440 }, { "epoch": 0.6, "grad_norm": 0.6750708548489052, "learning_rate": 3.6089126804177364e-07, "loss": 0.1033, "step": 9441 }, { "epoch": 0.6, "grad_norm": 1.7863623723317774, "learning_rate": 3.60792074538962e-07, "loss": 0.1735, "step": 9442 }, { "epoch": 0.6, "grad_norm": 16.245197149121086, "learning_rate": 3.606928869750401e-07, "loss": 0.2154, "step": 9443 }, { "epoch": 0.6, "grad_norm": 0.6445252825729044, "learning_rate": 3.605937053542398e-07, "loss": 0.3043, "step": 9444 }, { "epoch": 0.6, "grad_norm": 0.9737129284554178, "learning_rate": 3.604945296807921e-07, "loss": 0.3247, "step": 9445 }, { "epoch": 0.6, "grad_norm": 0.3940290771510294, "learning_rate": 3.6039535995892835e-07, "loss": 0.0489, "step": 9446 }, { "epoch": 0.6, "grad_norm": 0.610151977789657, "learning_rate": 3.6029619619287897e-07, "loss": 0.0484, "step": 9447 }, { "epoch": 0.6, "grad_norm": 1.3523942494620698, "learning_rate": 3.60197038386875e-07, "loss": 0.2164, "step": 9448 }, { "epoch": 0.6, "grad_norm": 0.4857908866675192, "learning_rate": 3.600978865451462e-07, "loss": 0.0688, "step": 9449 }, { "epoch": 0.6, "grad_norm": 0.7683698264502922, "learning_rate": 3.59998740671923e-07, "loss": 0.1496, "step": 9450 }, { "epoch": 0.6, "grad_norm": 0.6713109763358255, "learning_rate": 3.5989960077143475e-07, "loss": 0.141, "step": 9451 }, { "epoch": 0.6, "grad_norm": 0.39428184273653033, "learning_rate": 3.598004668479113e-07, "loss": 0.1295, "step": 9452 }, { "epoch": 0.6, "grad_norm": 0.6878095952961367, "learning_rate": 3.597013389055818e-07, "loss": 0.0199, "step": 9453 }, { "epoch": 0.6, "grad_norm": 0.8524581325660707, "learning_rate": 3.5960221694867516e-07, "loss": 0.3448, "step": 9454 }, { "epoch": 0.6, "grad_norm": 1.5628398563066372, "learning_rate": 3.5950310098142016e-07, "loss": 0.2538, "step": 9455 }, { "epoch": 0.6, "grad_norm": 1.1186249288055055, "learning_rate": 3.594039910080452e-07, "loss": 0.0979, "step": 9456 }, { "epoch": 0.6, "grad_norm": 0.5744758314234945, "learning_rate": 3.5930488703277884e-07, "loss": 0.3416, "step": 9457 }, { "epoch": 0.6, "grad_norm": 0.990597953243976, "learning_rate": 3.5920578905984867e-07, "loss": 0.284, "step": 9458 }, { "epoch": 0.6, "grad_norm": 1.5819561244282796, "learning_rate": 3.591066970934827e-07, "loss": 0.1504, "step": 9459 }, { "epoch": 0.6, "grad_norm": 1.3169886141667242, "learning_rate": 3.5900761113790813e-07, "loss": 0.255, "step": 9460 }, { "epoch": 0.6, "grad_norm": 1.165269825801838, "learning_rate": 3.589085311973524e-07, "loss": 0.3182, "step": 9461 }, { "epoch": 0.6, "grad_norm": 12.824506802886921, "learning_rate": 3.588094572760423e-07, "loss": 0.2083, "step": 9462 }, { "epoch": 0.6, "grad_norm": 0.6393293552065233, "learning_rate": 3.587103893782046e-07, "loss": 0.0955, "step": 9463 }, { "epoch": 0.6, "grad_norm": 1.1996395451813144, "learning_rate": 3.5861132750806566e-07, "loss": 0.4389, "step": 9464 }, { "epoch": 0.6, "grad_norm": 2.61484900781638, "learning_rate": 3.5851227166985184e-07, "loss": 0.2825, "step": 9465 }, { "epoch": 0.6, "grad_norm": 6.172490924953922, "learning_rate": 3.5841322186778894e-07, "loss": 0.1773, "step": 9466 }, { "epoch": 0.6, "grad_norm": 5.638159471201789, "learning_rate": 3.583141781061024e-07, "loss": 0.1132, "step": 9467 }, { "epoch": 0.6, "grad_norm": 0.9351782254085814, "learning_rate": 3.582151403890182e-07, "loss": 0.2241, "step": 9468 }, { "epoch": 0.6, "grad_norm": 0.9256261683344589, "learning_rate": 3.581161087207608e-07, "loss": 0.2268, "step": 9469 }, { "epoch": 0.6, "grad_norm": 1.3051866291544902, "learning_rate": 3.580170831055557e-07, "loss": 0.2202, "step": 9470 }, { "epoch": 0.6, "grad_norm": 1.2190511644389843, "learning_rate": 3.57918063547627e-07, "loss": 0.0569, "step": 9471 }, { "epoch": 0.6, "grad_norm": 0.442025476340937, "learning_rate": 3.578190500511994e-07, "loss": 0.1541, "step": 9472 }, { "epoch": 0.6, "grad_norm": 0.8236563443916848, "learning_rate": 3.5772004262049705e-07, "loss": 0.3391, "step": 9473 }, { "epoch": 0.6, "grad_norm": 1.006509833273857, "learning_rate": 3.5762104125974357e-07, "loss": 0.1443, "step": 9474 }, { "epoch": 0.6, "grad_norm": 5.159114976755673, "learning_rate": 3.575220459731627e-07, "loss": 0.1633, "step": 9475 }, { "epoch": 0.6, "grad_norm": 0.4073080062571323, "learning_rate": 3.5742305676497785e-07, "loss": 0.0046, "step": 9476 }, { "epoch": 0.6, "grad_norm": 0.9576682490917284, "learning_rate": 3.573240736394119e-07, "loss": 0.3445, "step": 9477 }, { "epoch": 0.6, "grad_norm": 0.39686532945091335, "learning_rate": 3.57225096600688e-07, "loss": 0.0589, "step": 9478 }, { "epoch": 0.6, "grad_norm": 0.8499172182669122, "learning_rate": 3.5712612565302846e-07, "loss": 0.183, "step": 9479 }, { "epoch": 0.6, "grad_norm": 1.0256610774157449, "learning_rate": 3.5702716080065544e-07, "loss": 0.4106, "step": 9480 }, { "epoch": 0.6, "grad_norm": 0.6932690415855755, "learning_rate": 3.569282020477912e-07, "loss": 0.115, "step": 9481 }, { "epoch": 0.6, "grad_norm": 0.6391868470132124, "learning_rate": 3.568292493986574e-07, "loss": 0.2923, "step": 9482 }, { "epoch": 0.6, "grad_norm": 2.0553878712678806, "learning_rate": 3.5673030285747583e-07, "loss": 0.126, "step": 9483 }, { "epoch": 0.6, "grad_norm": 0.21637608116406298, "learning_rate": 3.566313624284674e-07, "loss": 0.1065, "step": 9484 }, { "epoch": 0.6, "grad_norm": 1.875493412863062, "learning_rate": 3.565324281158534e-07, "loss": 0.158, "step": 9485 }, { "epoch": 0.6, "grad_norm": 0.2088440849044081, "learning_rate": 3.564334999238542e-07, "loss": 0.0567, "step": 9486 }, { "epoch": 0.6, "grad_norm": 0.6064417453901161, "learning_rate": 3.563345778566907e-07, "loss": 0.0139, "step": 9487 }, { "epoch": 0.61, "grad_norm": 0.6574714732114035, "learning_rate": 3.562356619185827e-07, "loss": 0.2009, "step": 9488 }, { "epoch": 0.61, "grad_norm": 1.155984125520488, "learning_rate": 3.5613675211375063e-07, "loss": 0.1546, "step": 9489 }, { "epoch": 0.61, "grad_norm": 0.5766695840302901, "learning_rate": 3.560378484464137e-07, "loss": 0.3639, "step": 9490 }, { "epoch": 0.61, "grad_norm": 0.48979977542295144, "learning_rate": 3.559389509207916e-07, "loss": 0.1652, "step": 9491 }, { "epoch": 0.61, "grad_norm": 0.493927423119219, "learning_rate": 3.5584005954110343e-07, "loss": 0.1472, "step": 9492 }, { "epoch": 0.61, "grad_norm": 0.7184802566700499, "learning_rate": 3.5574117431156826e-07, "loss": 0.3289, "step": 9493 }, { "epoch": 0.61, "grad_norm": 0.5233670046628419, "learning_rate": 3.5564229523640466e-07, "loss": 0.1573, "step": 9494 }, { "epoch": 0.61, "grad_norm": 1.5534037970393029, "learning_rate": 3.5554342231983063e-07, "loss": 0.3675, "step": 9495 }, { "epoch": 0.61, "grad_norm": 2.866844066999668, "learning_rate": 3.554445555660649e-07, "loss": 0.0769, "step": 9496 }, { "epoch": 0.61, "grad_norm": 0.7776888752887263, "learning_rate": 3.5534569497932474e-07, "loss": 0.2311, "step": 9497 }, { "epoch": 0.61, "grad_norm": 1.7046551376374321, "learning_rate": 3.552468405638282e-07, "loss": 0.2205, "step": 9498 }, { "epoch": 0.61, "grad_norm": 0.6692309773361224, "learning_rate": 3.551479923237924e-07, "loss": 0.1334, "step": 9499 }, { "epoch": 0.61, "grad_norm": 2.104945431067929, "learning_rate": 3.5504915026343443e-07, "loss": 0.071, "step": 9500 }, { "epoch": 0.61, "grad_norm": 0.5064598508166142, "learning_rate": 3.54950314386971e-07, "loss": 0.3589, "step": 9501 }, { "epoch": 0.61, "grad_norm": 0.8520594832192026, "learning_rate": 3.548514846986189e-07, "loss": 0.1262, "step": 9502 }, { "epoch": 0.61, "grad_norm": 15.362113891260808, "learning_rate": 3.547526612025942e-07, "loss": 0.3452, "step": 9503 }, { "epoch": 0.61, "grad_norm": 0.9419430460431464, "learning_rate": 3.5465384390311297e-07, "loss": 0.447, "step": 9504 }, { "epoch": 0.61, "grad_norm": 1.2961654803612481, "learning_rate": 3.545550328043908e-07, "loss": 0.1917, "step": 9505 }, { "epoch": 0.61, "grad_norm": 0.5315887724165193, "learning_rate": 3.5445622791064357e-07, "loss": 0.2115, "step": 9506 }, { "epoch": 0.61, "grad_norm": 0.4770144668206306, "learning_rate": 3.543574292260861e-07, "loss": 0.0086, "step": 9507 }, { "epoch": 0.61, "grad_norm": 1.2355838871005962, "learning_rate": 3.542586367549334e-07, "loss": 0.1475, "step": 9508 }, { "epoch": 0.61, "grad_norm": 1.2483751771922211, "learning_rate": 3.541598505014004e-07, "loss": 0.256, "step": 9509 }, { "epoch": 0.61, "grad_norm": 0.4285539010868225, "learning_rate": 3.540610704697011e-07, "loss": 0.2119, "step": 9510 }, { "epoch": 0.61, "grad_norm": 0.734726231846914, "learning_rate": 3.5396229666405026e-07, "loss": 0.2798, "step": 9511 }, { "epoch": 0.61, "grad_norm": 0.482766810153079, "learning_rate": 3.538635290886611e-07, "loss": 0.2323, "step": 9512 }, { "epoch": 0.61, "grad_norm": 2.0432071428183933, "learning_rate": 3.5376476774774776e-07, "loss": 0.3549, "step": 9513 }, { "epoch": 0.61, "grad_norm": 0.7664898071153935, "learning_rate": 3.536660126455233e-07, "loss": 0.0092, "step": 9514 }, { "epoch": 0.61, "grad_norm": 0.6304981059941067, "learning_rate": 3.5356726378620103e-07, "loss": 0.005, "step": 9515 }, { "epoch": 0.61, "grad_norm": 0.38579326887007837, "learning_rate": 3.5346852117399347e-07, "loss": 0.0814, "step": 9516 }, { "epoch": 0.61, "grad_norm": 1.3293810669383335, "learning_rate": 3.533697848131134e-07, "loss": 0.1155, "step": 9517 }, { "epoch": 0.61, "grad_norm": 14.950871791178397, "learning_rate": 3.5327105470777305e-07, "loss": 0.1445, "step": 9518 }, { "epoch": 0.61, "grad_norm": 1.7977325969398998, "learning_rate": 3.531723308621847e-07, "loss": 0.1063, "step": 9519 }, { "epoch": 0.61, "grad_norm": 2.3291692439433445, "learning_rate": 3.5307361328055976e-07, "loss": 0.1668, "step": 9520 }, { "epoch": 0.61, "grad_norm": 12.029977035286095, "learning_rate": 3.529749019671097e-07, "loss": 0.1566, "step": 9521 }, { "epoch": 0.61, "grad_norm": 0.759593639531423, "learning_rate": 3.5287619692604607e-07, "loss": 0.1452, "step": 9522 }, { "epoch": 0.61, "grad_norm": 5.215541996792754, "learning_rate": 3.527774981615794e-07, "loss": 0.1516, "step": 9523 }, { "epoch": 0.61, "grad_norm": 0.9919919160786849, "learning_rate": 3.526788056779208e-07, "loss": 0.1444, "step": 9524 }, { "epoch": 0.61, "grad_norm": 0.6100328855461785, "learning_rate": 3.5258011947928047e-07, "loss": 0.1438, "step": 9525 }, { "epoch": 0.61, "grad_norm": 0.6662172389324932, "learning_rate": 3.524814395698686e-07, "loss": 0.1544, "step": 9526 }, { "epoch": 0.61, "grad_norm": 1.8541939750322884, "learning_rate": 3.5238276595389495e-07, "loss": 0.2717, "step": 9527 }, { "epoch": 0.61, "grad_norm": 0.9632005762172113, "learning_rate": 3.522840986355694e-07, "loss": 0.2894, "step": 9528 }, { "epoch": 0.61, "grad_norm": 1.787435734934215, "learning_rate": 3.5218543761910104e-07, "loss": 0.0897, "step": 9529 }, { "epoch": 0.61, "grad_norm": 0.33808649369160954, "learning_rate": 3.520867829086992e-07, "loss": 0.1316, "step": 9530 }, { "epoch": 0.61, "grad_norm": 0.5051858975240087, "learning_rate": 3.519881345085723e-07, "loss": 0.0644, "step": 9531 }, { "epoch": 0.61, "grad_norm": 0.5099164061138237, "learning_rate": 3.5188949242292945e-07, "loss": 0.2252, "step": 9532 }, { "epoch": 0.61, "grad_norm": 1.8753734344434103, "learning_rate": 3.517908566559783e-07, "loss": 0.1211, "step": 9533 }, { "epoch": 0.61, "grad_norm": 0.7426447517918439, "learning_rate": 3.5169222721192735e-07, "loss": 0.4824, "step": 9534 }, { "epoch": 0.61, "grad_norm": 1.0894601162545856, "learning_rate": 3.515936040949841e-07, "loss": 0.0962, "step": 9535 }, { "epoch": 0.61, "grad_norm": 1.8256599029554128, "learning_rate": 3.514949873093559e-07, "loss": 0.0358, "step": 9536 }, { "epoch": 0.61, "grad_norm": 8.405568848617357, "learning_rate": 3.513963768592502e-07, "loss": 0.1053, "step": 9537 }, { "epoch": 0.61, "grad_norm": 0.7117838901206692, "learning_rate": 3.5129777274887363e-07, "loss": 0.0553, "step": 9538 }, { "epoch": 0.61, "grad_norm": 0.8556497796650047, "learning_rate": 3.511991749824332e-07, "loss": 0.1805, "step": 9539 }, { "epoch": 0.61, "grad_norm": 0.9670599418896123, "learning_rate": 3.5110058356413497e-07, "loss": 0.3328, "step": 9540 }, { "epoch": 0.61, "grad_norm": 1.2557616840936097, "learning_rate": 3.510019984981853e-07, "loss": 0.016, "step": 9541 }, { "epoch": 0.61, "grad_norm": 1.192287509787701, "learning_rate": 3.509034197887897e-07, "loss": 0.3793, "step": 9542 }, { "epoch": 0.61, "grad_norm": 0.5618701902969143, "learning_rate": 3.5080484744015405e-07, "loss": 0.1652, "step": 9543 }, { "epoch": 0.61, "grad_norm": 0.4294697667042685, "learning_rate": 3.5070628145648353e-07, "loss": 0.1548, "step": 9544 }, { "epoch": 0.61, "grad_norm": 1.2650894762362481, "learning_rate": 3.5060772184198313e-07, "loss": 0.0619, "step": 9545 }, { "epoch": 0.61, "grad_norm": 0.2826780412617805, "learning_rate": 3.5050916860085757e-07, "loss": 0.1783, "step": 9546 }, { "epoch": 0.61, "grad_norm": 0.48055308768303967, "learning_rate": 3.5041062173731153e-07, "loss": 0.0374, "step": 9547 }, { "epoch": 0.61, "grad_norm": 0.26875555863780737, "learning_rate": 3.503120812555491e-07, "loss": 0.1087, "step": 9548 }, { "epoch": 0.61, "grad_norm": 0.7210446372994427, "learning_rate": 3.5021354715977404e-07, "loss": 0.2469, "step": 9549 }, { "epoch": 0.61, "grad_norm": 1.9612550213102113, "learning_rate": 3.5011501945419034e-07, "loss": 0.2909, "step": 9550 }, { "epoch": 0.61, "grad_norm": 1.3144814339545225, "learning_rate": 3.5001649814300103e-07, "loss": 0.3016, "step": 9551 }, { "epoch": 0.61, "grad_norm": 0.5657734398150366, "learning_rate": 3.4991798323040957e-07, "loss": 0.0682, "step": 9552 }, { "epoch": 0.61, "grad_norm": 0.7252407911586491, "learning_rate": 3.4981947472061846e-07, "loss": 0.2639, "step": 9553 }, { "epoch": 0.61, "grad_norm": 1.6242112103120891, "learning_rate": 3.497209726178306e-07, "loss": 0.0052, "step": 9554 }, { "epoch": 0.61, "grad_norm": 0.2664016430341551, "learning_rate": 3.4962247692624806e-07, "loss": 0.1158, "step": 9555 }, { "epoch": 0.61, "grad_norm": 1.3797610985119093, "learning_rate": 3.495239876500732e-07, "loss": 0.1901, "step": 9556 }, { "epoch": 0.61, "grad_norm": 1.0646627807982256, "learning_rate": 3.494255047935072e-07, "loss": 0.264, "step": 9557 }, { "epoch": 0.61, "grad_norm": 0.8334236528510394, "learning_rate": 3.493270283607521e-07, "loss": 0.3143, "step": 9558 }, { "epoch": 0.61, "grad_norm": 0.6066992204145405, "learning_rate": 3.492285583560086e-07, "loss": 0.2633, "step": 9559 }, { "epoch": 0.61, "grad_norm": 0.5321456602640795, "learning_rate": 3.4913009478347824e-07, "loss": 0.1904, "step": 9560 }, { "epoch": 0.61, "grad_norm": 0.7261091688805966, "learning_rate": 3.49031637647361e-07, "loss": 0.4801, "step": 9561 }, { "epoch": 0.61, "grad_norm": 1.033724474706224, "learning_rate": 3.4893318695185767e-07, "loss": 0.4179, "step": 9562 }, { "epoch": 0.61, "grad_norm": 3.075835070526746, "learning_rate": 3.488347427011684e-07, "loss": 0.3301, "step": 9563 }, { "epoch": 0.61, "grad_norm": 0.502068478136284, "learning_rate": 3.4873630489949274e-07, "loss": 0.0192, "step": 9564 }, { "epoch": 0.61, "grad_norm": 0.395113070811715, "learning_rate": 3.486378735510306e-07, "loss": 0.0063, "step": 9565 }, { "epoch": 0.61, "grad_norm": 1.7232580462657048, "learning_rate": 3.4853944865998073e-07, "loss": 0.2881, "step": 9566 }, { "epoch": 0.61, "grad_norm": 0.9599165312685054, "learning_rate": 3.484410302305427e-07, "loss": 0.3233, "step": 9567 }, { "epoch": 0.61, "grad_norm": 1.4703181695866034, "learning_rate": 3.483426182669149e-07, "loss": 0.1889, "step": 9568 }, { "epoch": 0.61, "grad_norm": 1.146495240531375, "learning_rate": 3.4824421277329585e-07, "loss": 0.4046, "step": 9569 }, { "epoch": 0.61, "grad_norm": 0.4314771979829906, "learning_rate": 3.481458137538838e-07, "loss": 0.1848, "step": 9570 }, { "epoch": 0.61, "grad_norm": 0.9250065699509874, "learning_rate": 3.4804742121287654e-07, "loss": 0.4214, "step": 9571 }, { "epoch": 0.61, "grad_norm": 0.3293318691339513, "learning_rate": 3.4794903515447174e-07, "loss": 0.188, "step": 9572 }, { "epoch": 0.61, "grad_norm": 0.4910563428076659, "learning_rate": 3.4785065558286696e-07, "loss": 0.1863, "step": 9573 }, { "epoch": 0.61, "grad_norm": 1.5969956253129487, "learning_rate": 3.477522825022588e-07, "loss": 0.0602, "step": 9574 }, { "epoch": 0.61, "grad_norm": 0.3744545662743759, "learning_rate": 3.476539159168446e-07, "loss": 0.0669, "step": 9575 }, { "epoch": 0.61, "grad_norm": 7.734299639347295, "learning_rate": 3.475555558308205e-07, "loss": 0.4074, "step": 9576 }, { "epoch": 0.61, "grad_norm": 1.3504725250091854, "learning_rate": 3.4745720224838275e-07, "loss": 0.424, "step": 9577 }, { "epoch": 0.61, "grad_norm": 0.4331875068739966, "learning_rate": 3.4735885517372745e-07, "loss": 0.0128, "step": 9578 }, { "epoch": 0.61, "grad_norm": 0.42824816220074896, "learning_rate": 3.472605146110501e-07, "loss": 0.153, "step": 9579 }, { "epoch": 0.61, "grad_norm": 0.4351001430071234, "learning_rate": 3.4716218056454647e-07, "loss": 0.1793, "step": 9580 }, { "epoch": 0.61, "grad_norm": 1.4280842403548806, "learning_rate": 3.4706385303841134e-07, "loss": 0.1541, "step": 9581 }, { "epoch": 0.61, "grad_norm": 0.5926766013134903, "learning_rate": 3.4696553203683983e-07, "loss": 0.1084, "step": 9582 }, { "epoch": 0.61, "grad_norm": 1.0985680596498748, "learning_rate": 3.4686721756402616e-07, "loss": 0.2146, "step": 9583 }, { "epoch": 0.61, "grad_norm": 0.8880529549801257, "learning_rate": 3.4676890962416496e-07, "loss": 0.017, "step": 9584 }, { "epoch": 0.61, "grad_norm": 0.6677483589894967, "learning_rate": 3.4667060822145e-07, "loss": 0.2069, "step": 9585 }, { "epoch": 0.61, "grad_norm": 1.0592907579725246, "learning_rate": 3.4657231336007523e-07, "loss": 0.13, "step": 9586 }, { "epoch": 0.61, "grad_norm": 3.6592791785561625, "learning_rate": 3.4647402504423385e-07, "loss": 0.1032, "step": 9587 }, { "epoch": 0.61, "grad_norm": 0.5331203369768155, "learning_rate": 3.463757432781193e-07, "loss": 0.2472, "step": 9588 }, { "epoch": 0.61, "grad_norm": 7.921419468819464, "learning_rate": 3.4627746806592444e-07, "loss": 0.3503, "step": 9589 }, { "epoch": 0.61, "grad_norm": 0.5934001506174543, "learning_rate": 3.4617919941184166e-07, "loss": 0.4153, "step": 9590 }, { "epoch": 0.61, "grad_norm": 0.9025315697185343, "learning_rate": 3.460809373200636e-07, "loss": 0.051, "step": 9591 }, { "epoch": 0.61, "grad_norm": 0.9931361896209535, "learning_rate": 3.4598268179478194e-07, "loss": 0.279, "step": 9592 }, { "epoch": 0.61, "grad_norm": 6.088044584822752, "learning_rate": 3.45884432840189e-07, "loss": 0.059, "step": 9593 }, { "epoch": 0.61, "grad_norm": 1.2314237640656722, "learning_rate": 3.4578619046047567e-07, "loss": 0.2023, "step": 9594 }, { "epoch": 0.61, "grad_norm": 0.5092112323077743, "learning_rate": 3.456879546598337e-07, "loss": 0.093, "step": 9595 }, { "epoch": 0.61, "grad_norm": 4.128925925754039, "learning_rate": 3.4558972544245356e-07, "loss": 0.1333, "step": 9596 }, { "epoch": 0.61, "grad_norm": 1.8043015206803905, "learning_rate": 3.454915028125263e-07, "loss": 0.2976, "step": 9597 }, { "epoch": 0.61, "grad_norm": 1.3068952477633422, "learning_rate": 3.45393286774242e-07, "loss": 0.2758, "step": 9598 }, { "epoch": 0.61, "grad_norm": 0.9730817044959121, "learning_rate": 3.452950773317911e-07, "loss": 0.3222, "step": 9599 }, { "epoch": 0.61, "grad_norm": 4.272755291092515, "learning_rate": 3.4519687448936295e-07, "loss": 0.2205, "step": 9600 }, { "epoch": 0.61, "grad_norm": 1.5868444706628881, "learning_rate": 3.4509867825114755e-07, "loss": 0.1644, "step": 9601 }, { "epoch": 0.61, "grad_norm": 1.8330587807219971, "learning_rate": 3.450004886213337e-07, "loss": 0.0324, "step": 9602 }, { "epoch": 0.61, "grad_norm": 3.0068560904339283, "learning_rate": 3.4490230560411085e-07, "loss": 0.1586, "step": 9603 }, { "epoch": 0.61, "grad_norm": 1.0001005538099705, "learning_rate": 3.448041292036673e-07, "loss": 0.2305, "step": 9604 }, { "epoch": 0.61, "grad_norm": 0.4916345275421797, "learning_rate": 3.447059594241916e-07, "loss": 0.0538, "step": 9605 }, { "epoch": 0.61, "grad_norm": 2.618687922918267, "learning_rate": 3.446077962698718e-07, "loss": 0.2956, "step": 9606 }, { "epoch": 0.61, "grad_norm": 0.5696702585567011, "learning_rate": 3.445096397448958e-07, "loss": 0.2709, "step": 9607 }, { "epoch": 0.61, "grad_norm": 0.1884662622061173, "learning_rate": 3.4441148985345123e-07, "loss": 0.0745, "step": 9608 }, { "epoch": 0.61, "grad_norm": 1.1563562383245019, "learning_rate": 3.443133465997251e-07, "loss": 0.3654, "step": 9609 }, { "epoch": 0.61, "grad_norm": 0.2414528424868526, "learning_rate": 3.442152099879048e-07, "loss": 0.0804, "step": 9610 }, { "epoch": 0.61, "grad_norm": 0.9512809743923842, "learning_rate": 3.441170800221765e-07, "loss": 0.1162, "step": 9611 }, { "epoch": 0.61, "grad_norm": 1.0785385980380031, "learning_rate": 3.440189567067273e-07, "loss": 0.0659, "step": 9612 }, { "epoch": 0.61, "grad_norm": 1.5797862836251841, "learning_rate": 3.4392084004574275e-07, "loss": 0.3305, "step": 9613 }, { "epoch": 0.61, "grad_norm": 0.6063870396351476, "learning_rate": 3.4382273004340887e-07, "loss": 0.2354, "step": 9614 }, { "epoch": 0.61, "grad_norm": 2.5974500860148275, "learning_rate": 3.4372462670391144e-07, "loss": 0.1743, "step": 9615 }, { "epoch": 0.61, "grad_norm": 1.3077113945833219, "learning_rate": 3.436265300314355e-07, "loss": 0.1657, "step": 9616 }, { "epoch": 0.61, "grad_norm": 0.603166167192824, "learning_rate": 3.4352844003016624e-07, "loss": 0.2456, "step": 9617 }, { "epoch": 0.61, "grad_norm": 0.2856105969290681, "learning_rate": 3.434303567042881e-07, "loss": 0.2376, "step": 9618 }, { "epoch": 0.61, "grad_norm": 0.9429974027176391, "learning_rate": 3.433322800579859e-07, "loss": 0.2456, "step": 9619 }, { "epoch": 0.61, "grad_norm": 0.880197302547717, "learning_rate": 3.432342100954434e-07, "loss": 0.1117, "step": 9620 }, { "epoch": 0.61, "grad_norm": 0.2472669673392983, "learning_rate": 3.431361468208448e-07, "loss": 0.0912, "step": 9621 }, { "epoch": 0.61, "grad_norm": 0.4222150174216388, "learning_rate": 3.4303809023837327e-07, "loss": 0.2073, "step": 9622 }, { "epoch": 0.61, "grad_norm": 0.1100839309970587, "learning_rate": 3.4294004035221246e-07, "loss": 0.0011, "step": 9623 }, { "epoch": 0.61, "grad_norm": 0.43061421143584283, "learning_rate": 3.428419971665452e-07, "loss": 0.1153, "step": 9624 }, { "epoch": 0.61, "grad_norm": 1.1921919757851094, "learning_rate": 3.4274396068555446e-07, "loss": 0.3812, "step": 9625 }, { "epoch": 0.61, "grad_norm": 0.4931510013296186, "learning_rate": 3.4264593091342225e-07, "loss": 0.144, "step": 9626 }, { "epoch": 0.61, "grad_norm": 4.841924744830834, "learning_rate": 3.425479078543312e-07, "loss": 0.0298, "step": 9627 }, { "epoch": 0.61, "grad_norm": 3.2763868997235948, "learning_rate": 3.424498915124627e-07, "loss": 0.2633, "step": 9628 }, { "epoch": 0.61, "grad_norm": 0.4627046317141414, "learning_rate": 3.423518818919987e-07, "loss": 0.0877, "step": 9629 }, { "epoch": 0.61, "grad_norm": 1.1415060188542492, "learning_rate": 3.4225387899712017e-07, "loss": 0.2889, "step": 9630 }, { "epoch": 0.61, "grad_norm": 2.8697188070980397, "learning_rate": 3.4215588283200847e-07, "loss": 0.4644, "step": 9631 }, { "epoch": 0.61, "grad_norm": 0.7868394702210959, "learning_rate": 3.42057893400844e-07, "loss": 0.1948, "step": 9632 }, { "epoch": 0.61, "grad_norm": 0.7210343365033178, "learning_rate": 3.419599107078072e-07, "loss": 0.0843, "step": 9633 }, { "epoch": 0.61, "grad_norm": 0.9195153597489307, "learning_rate": 3.418619347570785e-07, "loss": 0.0828, "step": 9634 }, { "epoch": 0.61, "grad_norm": 0.31388945391280904, "learning_rate": 3.4176396555283744e-07, "loss": 0.2229, "step": 9635 }, { "epoch": 0.61, "grad_norm": 16.59159076340234, "learning_rate": 3.4166600309926387e-07, "loss": 0.2786, "step": 9636 }, { "epoch": 0.61, "grad_norm": 0.5840827296823353, "learning_rate": 3.4156804740053665e-07, "loss": 0.1857, "step": 9637 }, { "epoch": 0.61, "grad_norm": 1.304655110458904, "learning_rate": 3.414700984608352e-07, "loss": 0.0982, "step": 9638 }, { "epoch": 0.61, "grad_norm": 0.23187899125055195, "learning_rate": 3.4137215628433793e-07, "loss": 0.0079, "step": 9639 }, { "epoch": 0.61, "grad_norm": 1.549274691583505, "learning_rate": 3.412742208752234e-07, "loss": 0.2203, "step": 9640 }, { "epoch": 0.61, "grad_norm": 0.5426514275423491, "learning_rate": 3.4117629223766965e-07, "loss": 0.1101, "step": 9641 }, { "epoch": 0.61, "grad_norm": 0.8701990932823046, "learning_rate": 3.4107837037585456e-07, "loss": 0.2096, "step": 9642 }, { "epoch": 0.61, "grad_norm": 0.31101715403328684, "learning_rate": 3.4098045529395557e-07, "loss": 0.09, "step": 9643 }, { "epoch": 0.62, "grad_norm": 0.49925692395483745, "learning_rate": 3.408825469961503e-07, "loss": 0.009, "step": 9644 }, { "epoch": 0.62, "grad_norm": 1.0480465009927071, "learning_rate": 3.407846454866153e-07, "loss": 0.2926, "step": 9645 }, { "epoch": 0.62, "grad_norm": 1.1996004083321903, "learning_rate": 3.4068675076952735e-07, "loss": 0.3273, "step": 9646 }, { "epoch": 0.62, "grad_norm": 0.6171935971223315, "learning_rate": 3.40588862849063e-07, "loss": 0.1699, "step": 9647 }, { "epoch": 0.62, "grad_norm": 0.4455311050832932, "learning_rate": 3.40490981729398e-07, "loss": 0.1556, "step": 9648 }, { "epoch": 0.62, "grad_norm": 15.928084012225668, "learning_rate": 3.403931074147085e-07, "loss": 0.1599, "step": 9649 }, { "epoch": 0.62, "grad_norm": 0.4388893883048158, "learning_rate": 3.4029523990916984e-07, "loss": 0.0156, "step": 9650 }, { "epoch": 0.62, "grad_norm": 0.8493084512380781, "learning_rate": 3.4019737921695734e-07, "loss": 0.0307, "step": 9651 }, { "epoch": 0.62, "grad_norm": 2.2978810644829313, "learning_rate": 3.4009952534224573e-07, "loss": 0.025, "step": 9652 }, { "epoch": 0.62, "grad_norm": 1.0884515437922089, "learning_rate": 3.400016782892101e-07, "loss": 0.0912, "step": 9653 }, { "epoch": 0.62, "grad_norm": 1.1045847972889378, "learning_rate": 3.3990383806202427e-07, "loss": 0.218, "step": 9654 }, { "epoch": 0.62, "grad_norm": 1.3820573701876342, "learning_rate": 3.398060046648627e-07, "loss": 0.1362, "step": 9655 }, { "epoch": 0.62, "grad_norm": 1.2204567299780504, "learning_rate": 3.3970817810189883e-07, "loss": 0.2979, "step": 9656 }, { "epoch": 0.62, "grad_norm": 0.3358705612120792, "learning_rate": 3.396103583773066e-07, "loss": 0.0846, "step": 9657 }, { "epoch": 0.62, "grad_norm": 0.7443790564141608, "learning_rate": 3.3951254549525865e-07, "loss": 0.2865, "step": 9658 }, { "epoch": 0.62, "grad_norm": 1.864869465123004, "learning_rate": 3.394147394599281e-07, "loss": 0.1399, "step": 9659 }, { "epoch": 0.62, "grad_norm": 1.0788401377321628, "learning_rate": 3.3931694027548774e-07, "loss": 0.1065, "step": 9660 }, { "epoch": 0.62, "grad_norm": 0.8862960168875686, "learning_rate": 3.392191479461096e-07, "loss": 0.1619, "step": 9661 }, { "epoch": 0.62, "grad_norm": 0.45105233462744826, "learning_rate": 3.39121362475966e-07, "loss": 0.011, "step": 9662 }, { "epoch": 0.62, "grad_norm": 0.40562950885699883, "learning_rate": 3.3902358386922823e-07, "loss": 0.1351, "step": 9663 }, { "epoch": 0.62, "grad_norm": 1.0528420955859363, "learning_rate": 3.389258121300682e-07, "loss": 0.142, "step": 9664 }, { "epoch": 0.62, "grad_norm": 0.322898913995691, "learning_rate": 3.388280472626567e-07, "loss": 0.0512, "step": 9665 }, { "epoch": 0.62, "grad_norm": 3.9924111999275667, "learning_rate": 3.3873028927116474e-07, "loss": 0.1011, "step": 9666 }, { "epoch": 0.62, "grad_norm": 1.6810908359051329, "learning_rate": 3.386325381597628e-07, "loss": 0.1435, "step": 9667 }, { "epoch": 0.62, "grad_norm": 0.973501401654767, "learning_rate": 3.385347939326212e-07, "loss": 0.161, "step": 9668 }, { "epoch": 0.62, "grad_norm": 0.4225090616841344, "learning_rate": 3.3843705659390977e-07, "loss": 0.0649, "step": 9669 }, { "epoch": 0.62, "grad_norm": 0.9910227797615438, "learning_rate": 3.383393261477985e-07, "loss": 0.1963, "step": 9670 }, { "epoch": 0.62, "grad_norm": 0.8026710605913076, "learning_rate": 3.382416025984563e-07, "loss": 0.3476, "step": 9671 }, { "epoch": 0.62, "grad_norm": 0.43782863834267566, "learning_rate": 3.3814388595005274e-07, "loss": 0.0489, "step": 9672 }, { "epoch": 0.62, "grad_norm": 0.7834382778568777, "learning_rate": 3.380461762067564e-07, "loss": 0.3936, "step": 9673 }, { "epoch": 0.62, "grad_norm": 1.140878511252451, "learning_rate": 3.379484733727356e-07, "loss": 0.2009, "step": 9674 }, { "epoch": 0.62, "grad_norm": 2.145944887695228, "learning_rate": 3.3785077745215867e-07, "loss": 0.1061, "step": 9675 }, { "epoch": 0.62, "grad_norm": 0.7828966360437236, "learning_rate": 3.377530884491936e-07, "loss": 0.133, "step": 9676 }, { "epoch": 0.62, "grad_norm": 0.9790514628359507, "learning_rate": 3.3765540636800795e-07, "loss": 0.2241, "step": 9677 }, { "epoch": 0.62, "grad_norm": 1.6244034945981407, "learning_rate": 3.375577312127689e-07, "loss": 0.0923, "step": 9678 }, { "epoch": 0.62, "grad_norm": 2.1176109283196176, "learning_rate": 3.374600629876437e-07, "loss": 0.2063, "step": 9679 }, { "epoch": 0.62, "grad_norm": 1.1400215488775414, "learning_rate": 3.373624016967989e-07, "loss": 0.339, "step": 9680 }, { "epoch": 0.62, "grad_norm": 0.7266091394368728, "learning_rate": 3.372647473444011e-07, "loss": 0.1516, "step": 9681 }, { "epoch": 0.62, "grad_norm": 0.998266862212202, "learning_rate": 3.371670999346162e-07, "loss": 0.226, "step": 9682 }, { "epoch": 0.62, "grad_norm": 1.1303660680185235, "learning_rate": 3.370694594716103e-07, "loss": 0.3154, "step": 9683 }, { "epoch": 0.62, "grad_norm": 0.5290700509410663, "learning_rate": 3.369718259595486e-07, "loss": 0.1477, "step": 9684 }, { "epoch": 0.62, "grad_norm": 3.0769691318048444, "learning_rate": 3.368741994025966e-07, "loss": 0.1802, "step": 9685 }, { "epoch": 0.62, "grad_norm": 1.1874478464550087, "learning_rate": 3.367765798049193e-07, "loss": 0.2394, "step": 9686 }, { "epoch": 0.62, "grad_norm": 0.9981652510796589, "learning_rate": 3.36678967170681e-07, "loss": 0.2965, "step": 9687 }, { "epoch": 0.62, "grad_norm": 0.38936891349843017, "learning_rate": 3.365813615040465e-07, "loss": 0.0568, "step": 9688 }, { "epoch": 0.62, "grad_norm": 0.7970928639037027, "learning_rate": 3.3648376280917946e-07, "loss": 0.0278, "step": 9689 }, { "epoch": 0.62, "grad_norm": 0.44544585074375437, "learning_rate": 3.3638617109024405e-07, "loss": 0.0273, "step": 9690 }, { "epoch": 0.62, "grad_norm": 2.623778611811336, "learning_rate": 3.3628858635140317e-07, "loss": 0.3168, "step": 9691 }, { "epoch": 0.62, "grad_norm": 0.838098790065471, "learning_rate": 3.361910085968205e-07, "loss": 0.1211, "step": 9692 }, { "epoch": 0.62, "grad_norm": 0.5313964576854305, "learning_rate": 3.360934378306586e-07, "loss": 0.1963, "step": 9693 }, { "epoch": 0.62, "grad_norm": 0.5870650851985146, "learning_rate": 3.359958740570802e-07, "loss": 0.2638, "step": 9694 }, { "epoch": 0.62, "grad_norm": 0.7881511462464323, "learning_rate": 3.3589831728024733e-07, "loss": 0.2737, "step": 9695 }, { "epoch": 0.62, "grad_norm": 1.0385252321720932, "learning_rate": 3.358007675043224e-07, "loss": 0.3148, "step": 9696 }, { "epoch": 0.62, "grad_norm": 5.494414612317763, "learning_rate": 3.357032247334666e-07, "loss": 0.0761, "step": 9697 }, { "epoch": 0.62, "grad_norm": 1.4388717112905187, "learning_rate": 3.356056889718417e-07, "loss": 0.2136, "step": 9698 }, { "epoch": 0.62, "grad_norm": 1.1491435647868617, "learning_rate": 3.355081602236086e-07, "loss": 0.2962, "step": 9699 }, { "epoch": 0.62, "grad_norm": 1.0983749076551765, "learning_rate": 3.3541063849292785e-07, "loss": 0.1626, "step": 9700 }, { "epoch": 0.62, "grad_norm": 19.719457677317227, "learning_rate": 3.3531312378396023e-07, "loss": 0.1695, "step": 9701 }, { "epoch": 0.62, "grad_norm": 0.5293311824003611, "learning_rate": 3.352156161008658e-07, "loss": 0.1906, "step": 9702 }, { "epoch": 0.62, "grad_norm": 0.7800234034844945, "learning_rate": 3.3511811544780445e-07, "loss": 0.0977, "step": 9703 }, { "epoch": 0.62, "grad_norm": 1.2912943653128102, "learning_rate": 3.3502062182893563e-07, "loss": 0.0991, "step": 9704 }, { "epoch": 0.62, "grad_norm": 0.9972025373690413, "learning_rate": 3.3492313524841896e-07, "loss": 0.4256, "step": 9705 }, { "epoch": 0.62, "grad_norm": 0.394740195822023, "learning_rate": 3.34825655710413e-07, "loss": 0.0541, "step": 9706 }, { "epoch": 0.62, "grad_norm": 0.8023099451970512, "learning_rate": 3.3472818321907677e-07, "loss": 0.1039, "step": 9707 }, { "epoch": 0.62, "grad_norm": 0.5450040172144064, "learning_rate": 3.3463071777856826e-07, "loss": 0.0747, "step": 9708 }, { "epoch": 0.62, "grad_norm": 0.6629103130157872, "learning_rate": 3.34533259393046e-07, "loss": 0.2211, "step": 9709 }, { "epoch": 0.62, "grad_norm": 1.0004173541174128, "learning_rate": 3.344358080666674e-07, "loss": 0.2864, "step": 9710 }, { "epoch": 0.62, "grad_norm": 0.5784660571422798, "learning_rate": 3.3433836380359017e-07, "loss": 0.1308, "step": 9711 }, { "epoch": 0.62, "grad_norm": 0.429189676289002, "learning_rate": 3.342409266079711e-07, "loss": 0.202, "step": 9712 }, { "epoch": 0.62, "grad_norm": 5.455297020664025, "learning_rate": 3.3414349648396755e-07, "loss": 0.1935, "step": 9713 }, { "epoch": 0.62, "grad_norm": 1.3197767456611609, "learning_rate": 3.340460734357359e-07, "loss": 0.015, "step": 9714 }, { "epoch": 0.62, "grad_norm": 1.6251486683180227, "learning_rate": 3.339486574674321e-07, "loss": 0.105, "step": 9715 }, { "epoch": 0.62, "grad_norm": 0.5383432674816208, "learning_rate": 3.338512485832127e-07, "loss": 0.2392, "step": 9716 }, { "epoch": 0.62, "grad_norm": 0.5975307515061975, "learning_rate": 3.3375384678723275e-07, "loss": 0.2254, "step": 9717 }, { "epoch": 0.62, "grad_norm": 1.2249868304715692, "learning_rate": 3.3365645208364814e-07, "loss": 0.206, "step": 9718 }, { "epoch": 0.62, "grad_norm": 0.5727470827042784, "learning_rate": 3.335590644766134e-07, "loss": 0.1096, "step": 9719 }, { "epoch": 0.62, "grad_norm": 0.26983518233568515, "learning_rate": 3.3346168397028375e-07, "loss": 0.1458, "step": 9720 }, { "epoch": 0.62, "grad_norm": 0.7253371494377086, "learning_rate": 3.333643105688134e-07, "loss": 0.3655, "step": 9721 }, { "epoch": 0.62, "grad_norm": 0.25924859035933084, "learning_rate": 3.3326694427635657e-07, "loss": 0.0089, "step": 9722 }, { "epoch": 0.62, "grad_norm": 1.4638056171243157, "learning_rate": 3.3316958509706695e-07, "loss": 0.1153, "step": 9723 }, { "epoch": 0.62, "grad_norm": 0.7050846533927227, "learning_rate": 3.3307223303509835e-07, "loss": 0.1181, "step": 9724 }, { "epoch": 0.62, "grad_norm": 0.51324220020139, "learning_rate": 3.329748880946037e-07, "loss": 0.3162, "step": 9725 }, { "epoch": 0.62, "grad_norm": 0.7059254358554043, "learning_rate": 3.328775502797363e-07, "loss": 0.1879, "step": 9726 }, { "epoch": 0.62, "grad_norm": 1.454404948998227, "learning_rate": 3.3278021959464857e-07, "loss": 0.1415, "step": 9727 }, { "epoch": 0.62, "grad_norm": 2.855437271924765, "learning_rate": 3.3268289604349266e-07, "loss": 0.2297, "step": 9728 }, { "epoch": 0.62, "grad_norm": 0.8428411441037661, "learning_rate": 3.325855796304208e-07, "loss": 0.1808, "step": 9729 }, { "epoch": 0.62, "grad_norm": 0.9022351116652456, "learning_rate": 3.324882703595845e-07, "loss": 0.3482, "step": 9730 }, { "epoch": 0.62, "grad_norm": 0.5492724358666593, "learning_rate": 3.3239096823513565e-07, "loss": 0.1431, "step": 9731 }, { "epoch": 0.62, "grad_norm": 4.918600336029918, "learning_rate": 3.322936732612247e-07, "loss": 0.1724, "step": 9732 }, { "epoch": 0.62, "grad_norm": 0.7260838766547156, "learning_rate": 3.321963854420031e-07, "loss": 0.0123, "step": 9733 }, { "epoch": 0.62, "grad_norm": 2.844162753335101, "learning_rate": 3.3209910478162077e-07, "loss": 0.2646, "step": 9734 }, { "epoch": 0.62, "grad_norm": 1.2387889557100995, "learning_rate": 3.320018312842284e-07, "loss": 0.1458, "step": 9735 }, { "epoch": 0.62, "grad_norm": 1.224096149135845, "learning_rate": 3.3190456495397534e-07, "loss": 0.2795, "step": 9736 }, { "epoch": 0.62, "grad_norm": 0.9895912364231522, "learning_rate": 3.318073057950117e-07, "loss": 0.089, "step": 9737 }, { "epoch": 0.62, "grad_norm": 9.013396254623336, "learning_rate": 3.317100538114863e-07, "loss": 0.1287, "step": 9738 }, { "epoch": 0.62, "grad_norm": 4.944640453005538, "learning_rate": 3.3161280900754845e-07, "loss": 0.0451, "step": 9739 }, { "epoch": 0.62, "grad_norm": 1.4478979865196622, "learning_rate": 3.3151557138734655e-07, "loss": 0.3024, "step": 9740 }, { "epoch": 0.62, "grad_norm": 2.140879600682289, "learning_rate": 3.314183409550292e-07, "loss": 0.2722, "step": 9741 }, { "epoch": 0.62, "grad_norm": 1.2735408774536647, "learning_rate": 3.313211177147444e-07, "loss": 0.0224, "step": 9742 }, { "epoch": 0.62, "grad_norm": 1.2281358995379472, "learning_rate": 3.3122390167063965e-07, "loss": 0.2272, "step": 9743 }, { "epoch": 0.62, "grad_norm": 0.4739552998196937, "learning_rate": 3.311266928268626e-07, "loss": 0.1578, "step": 9744 }, { "epoch": 0.62, "grad_norm": 0.8496568020715792, "learning_rate": 3.3102949118756017e-07, "loss": 0.1799, "step": 9745 }, { "epoch": 0.62, "grad_norm": 2.1675489398324537, "learning_rate": 3.3093229675687945e-07, "loss": 0.0326, "step": 9746 }, { "epoch": 0.62, "grad_norm": 1.9339951663613293, "learning_rate": 3.3083510953896676e-07, "loss": 0.2022, "step": 9747 }, { "epoch": 0.62, "grad_norm": 0.6903485604455774, "learning_rate": 3.307379295379684e-07, "loss": 0.2035, "step": 9748 }, { "epoch": 0.62, "grad_norm": 1.0305568072894793, "learning_rate": 3.3064075675803016e-07, "loss": 0.2289, "step": 9749 }, { "epoch": 0.62, "grad_norm": 3.941810575292261, "learning_rate": 3.3054359120329786e-07, "loss": 0.1156, "step": 9750 }, { "epoch": 0.62, "grad_norm": 0.5583045424424432, "learning_rate": 3.3044643287791643e-07, "loss": 0.1339, "step": 9751 }, { "epoch": 0.62, "grad_norm": 0.9404728184231548, "learning_rate": 3.3034928178603115e-07, "loss": 0.0288, "step": 9752 }, { "epoch": 0.62, "grad_norm": 1.8899062681751952, "learning_rate": 3.3025213793178643e-07, "loss": 0.1342, "step": 9753 }, { "epoch": 0.62, "grad_norm": 0.3018027126828369, "learning_rate": 3.301550013193268e-07, "loss": 0.2209, "step": 9754 }, { "epoch": 0.62, "grad_norm": 0.4847358677401563, "learning_rate": 3.300578719527963e-07, "loss": 0.0411, "step": 9755 }, { "epoch": 0.62, "grad_norm": 0.7310701196391066, "learning_rate": 3.2996074983633846e-07, "loss": 0.3191, "step": 9756 }, { "epoch": 0.62, "grad_norm": 0.5122777123842382, "learning_rate": 3.2986363497409707e-07, "loss": 0.1452, "step": 9757 }, { "epoch": 0.62, "grad_norm": 4.29930004610316, "learning_rate": 3.2976652737021483e-07, "loss": 0.1408, "step": 9758 }, { "epoch": 0.62, "grad_norm": 0.7674445115922284, "learning_rate": 3.2966942702883485e-07, "loss": 0.1664, "step": 9759 }, { "epoch": 0.62, "grad_norm": 0.2529105157325676, "learning_rate": 3.2957233395409945e-07, "loss": 0.0529, "step": 9760 }, { "epoch": 0.62, "grad_norm": 0.7890271333132229, "learning_rate": 3.29475248150151e-07, "loss": 0.1605, "step": 9761 }, { "epoch": 0.62, "grad_norm": 0.7592650520725693, "learning_rate": 3.293781696211311e-07, "loss": 0.2552, "step": 9762 }, { "epoch": 0.62, "grad_norm": 1.1271683139864541, "learning_rate": 3.2928109837118165e-07, "loss": 0.1316, "step": 9763 }, { "epoch": 0.62, "grad_norm": 12.793001244147618, "learning_rate": 3.291840344044435e-07, "loss": 0.0628, "step": 9764 }, { "epoch": 0.62, "grad_norm": 0.2535944043000978, "learning_rate": 3.2908697772505807e-07, "loss": 0.0689, "step": 9765 }, { "epoch": 0.62, "grad_norm": 5.561272604758733, "learning_rate": 3.2898992833716563e-07, "loss": 0.289, "step": 9766 }, { "epoch": 0.62, "grad_norm": 0.3481681415578158, "learning_rate": 3.288928862449066e-07, "loss": 0.193, "step": 9767 }, { "epoch": 0.62, "grad_norm": 2.1000027488492163, "learning_rate": 3.2879585145242117e-07, "loss": 0.1165, "step": 9768 }, { "epoch": 0.62, "grad_norm": 0.7352536233546072, "learning_rate": 3.2869882396384873e-07, "loss": 0.1636, "step": 9769 }, { "epoch": 0.62, "grad_norm": 2.1403764319347593, "learning_rate": 3.286018037833289e-07, "loss": 0.414, "step": 9770 }, { "epoch": 0.62, "grad_norm": 0.6444888092476602, "learning_rate": 3.2850479091500053e-07, "loss": 0.0698, "step": 9771 }, { "epoch": 0.62, "grad_norm": 0.8511599200217639, "learning_rate": 3.284077853630027e-07, "loss": 0.246, "step": 9772 }, { "epoch": 0.62, "grad_norm": 0.27623584953854535, "learning_rate": 3.2831078713147354e-07, "loss": 0.0084, "step": 9773 }, { "epoch": 0.62, "grad_norm": 1.2594911388306558, "learning_rate": 3.2821379622455146e-07, "loss": 0.2543, "step": 9774 }, { "epoch": 0.62, "grad_norm": 18.190853615986125, "learning_rate": 3.2811681264637405e-07, "loss": 0.3407, "step": 9775 }, { "epoch": 0.62, "grad_norm": 4.4178107490553025, "learning_rate": 3.280198364010791e-07, "loss": 0.3291, "step": 9776 }, { "epoch": 0.62, "grad_norm": 2.980847201905259, "learning_rate": 3.2792286749280346e-07, "loss": 0.1604, "step": 9777 }, { "epoch": 0.62, "grad_norm": 1.1848945969660434, "learning_rate": 3.2782590592568436e-07, "loss": 0.1019, "step": 9778 }, { "epoch": 0.62, "grad_norm": 0.7675199347965304, "learning_rate": 3.2772895170385816e-07, "loss": 0.1948, "step": 9779 }, { "epoch": 0.62, "grad_norm": 0.6438653739147412, "learning_rate": 3.2763200483146125e-07, "loss": 0.2464, "step": 9780 }, { "epoch": 0.62, "grad_norm": 0.5752937854883382, "learning_rate": 3.275350653126294e-07, "loss": 0.2327, "step": 9781 }, { "epoch": 0.62, "grad_norm": 0.3260922813825861, "learning_rate": 3.274381331514986e-07, "loss": 0.216, "step": 9782 }, { "epoch": 0.62, "grad_norm": 0.6309515879234886, "learning_rate": 3.2734120835220377e-07, "loss": 0.1001, "step": 9783 }, { "epoch": 0.62, "grad_norm": 0.9280792828751381, "learning_rate": 3.2724429091887993e-07, "loss": 0.2191, "step": 9784 }, { "epoch": 0.62, "grad_norm": 1.1782041291181589, "learning_rate": 3.271473808556622e-07, "loss": 0.4337, "step": 9785 }, { "epoch": 0.62, "grad_norm": 0.3325734765309915, "learning_rate": 3.270504781666845e-07, "loss": 0.1293, "step": 9786 }, { "epoch": 0.62, "grad_norm": 0.11298133267691497, "learning_rate": 3.269535828560812e-07, "loss": 0.0005, "step": 9787 }, { "epoch": 0.62, "grad_norm": 1.6464759162177716, "learning_rate": 3.268566949279857e-07, "loss": 0.1142, "step": 9788 }, { "epoch": 0.62, "grad_norm": 11.900505378403551, "learning_rate": 3.26759814386532e-07, "loss": 0.1764, "step": 9789 }, { "epoch": 0.62, "grad_norm": 1.108662414175651, "learning_rate": 3.2666294123585253e-07, "loss": 0.1668, "step": 9790 }, { "epoch": 0.62, "grad_norm": 1.205937580649672, "learning_rate": 3.2656607548008064e-07, "loss": 0.2206, "step": 9791 }, { "epoch": 0.62, "grad_norm": 1.1276834088401069, "learning_rate": 3.2646921712334854e-07, "loss": 0.1915, "step": 9792 }, { "epoch": 0.62, "grad_norm": 1.041790434933491, "learning_rate": 3.263723661697885e-07, "loss": 0.0513, "step": 9793 }, { "epoch": 0.62, "grad_norm": 0.45748085143230005, "learning_rate": 3.2627552262353223e-07, "loss": 0.1839, "step": 9794 }, { "epoch": 0.62, "grad_norm": 0.37755251386908534, "learning_rate": 3.2617868648871167e-07, "loss": 0.0076, "step": 9795 }, { "epoch": 0.62, "grad_norm": 0.9007939493937487, "learning_rate": 3.2608185776945773e-07, "loss": 0.0692, "step": 9796 }, { "epoch": 0.62, "grad_norm": 0.41751782266332393, "learning_rate": 3.259850364699012e-07, "loss": 0.0901, "step": 9797 }, { "epoch": 0.62, "grad_norm": 0.7244555180524256, "learning_rate": 3.2588822259417294e-07, "loss": 0.1607, "step": 9798 }, { "epoch": 0.62, "grad_norm": 0.639742382083237, "learning_rate": 3.2579141614640293e-07, "loss": 0.0075, "step": 9799 }, { "epoch": 0.62, "grad_norm": 0.83033785241535, "learning_rate": 3.2569461713072145e-07, "loss": 0.3367, "step": 9800 }, { "epoch": 0.63, "grad_norm": 0.8677793501382356, "learning_rate": 3.255978255512579e-07, "loss": 0.2052, "step": 9801 }, { "epoch": 0.63, "grad_norm": 0.8380638557553156, "learning_rate": 3.2550104141214184e-07, "loss": 0.251, "step": 9802 }, { "epoch": 0.63, "grad_norm": 1.065585588617662, "learning_rate": 3.2540426471750197e-07, "loss": 0.0808, "step": 9803 }, { "epoch": 0.63, "grad_norm": 1.1518702118880153, "learning_rate": 3.253074954714674e-07, "loss": 0.2295, "step": 9804 }, { "epoch": 0.63, "grad_norm": 5.898787117839744, "learning_rate": 3.2521073367816597e-07, "loss": 0.0493, "step": 9805 }, { "epoch": 0.63, "grad_norm": 0.6507183401082992, "learning_rate": 3.251139793417263e-07, "loss": 0.013, "step": 9806 }, { "epoch": 0.63, "grad_norm": 1.3547308846636823, "learning_rate": 3.250172324662756e-07, "loss": 0.1052, "step": 9807 }, { "epoch": 0.63, "grad_norm": 20.268166422279045, "learning_rate": 3.249204930559417e-07, "loss": 0.1381, "step": 9808 }, { "epoch": 0.63, "grad_norm": 2.5414980160997245, "learning_rate": 3.248237611148514e-07, "loss": 0.2751, "step": 9809 }, { "epoch": 0.63, "grad_norm": 1.27660086068766, "learning_rate": 3.247270366471316e-07, "loss": 0.1243, "step": 9810 }, { "epoch": 0.63, "grad_norm": 1.5296173960649535, "learning_rate": 3.246303196569089e-07, "loss": 0.1863, "step": 9811 }, { "epoch": 0.63, "grad_norm": 1.590121154826465, "learning_rate": 3.2453361014830904e-07, "loss": 0.023, "step": 9812 }, { "epoch": 0.63, "grad_norm": 1.8378317787950598, "learning_rate": 3.244369081254584e-07, "loss": 0.1815, "step": 9813 }, { "epoch": 0.63, "grad_norm": 1.678106466930295, "learning_rate": 3.24340213592482e-07, "loss": 0.2562, "step": 9814 }, { "epoch": 0.63, "grad_norm": 8.061081552019047, "learning_rate": 3.242435265535053e-07, "loss": 0.2101, "step": 9815 }, { "epoch": 0.63, "grad_norm": 1.4553365836982701, "learning_rate": 3.241468470126529e-07, "loss": 0.3213, "step": 9816 }, { "epoch": 0.63, "grad_norm": 0.6583729938209066, "learning_rate": 3.240501749740496e-07, "loss": 0.1729, "step": 9817 }, { "epoch": 0.63, "grad_norm": 0.5780146377092252, "learning_rate": 3.239535104418196e-07, "loss": 0.3041, "step": 9818 }, { "epoch": 0.63, "grad_norm": 0.501996112418068, "learning_rate": 3.2385685342008674e-07, "loss": 0.2809, "step": 9819 }, { "epoch": 0.63, "grad_norm": 0.6947544197855776, "learning_rate": 3.2376020391297455e-07, "loss": 0.1819, "step": 9820 }, { "epoch": 0.63, "grad_norm": 0.5802086760688637, "learning_rate": 3.236635619246064e-07, "loss": 0.1085, "step": 9821 }, { "epoch": 0.63, "grad_norm": 0.6207744953822706, "learning_rate": 3.235669274591051e-07, "loss": 0.074, "step": 9822 }, { "epoch": 0.63, "grad_norm": 0.6310128693542639, "learning_rate": 3.2347030052059354e-07, "loss": 0.0043, "step": 9823 }, { "epoch": 0.63, "grad_norm": 0.5035220755686219, "learning_rate": 3.233736811131938e-07, "loss": 0.3291, "step": 9824 }, { "epoch": 0.63, "grad_norm": 1.3402357618229086, "learning_rate": 3.232770692410277e-07, "loss": 0.1443, "step": 9825 }, { "epoch": 0.63, "grad_norm": 0.7722676121731585, "learning_rate": 3.2318046490821716e-07, "loss": 0.2044, "step": 9826 }, { "epoch": 0.63, "grad_norm": 0.5447117817685545, "learning_rate": 3.2308386811888346e-07, "loss": 0.0425, "step": 9827 }, { "epoch": 0.63, "grad_norm": 0.8505989702153116, "learning_rate": 3.229872788771476e-07, "loss": 0.4057, "step": 9828 }, { "epoch": 0.63, "grad_norm": 0.6823708881589299, "learning_rate": 3.2289069718713016e-07, "loss": 0.1218, "step": 9829 }, { "epoch": 0.63, "grad_norm": 1.0516683346630333, "learning_rate": 3.227941230529517e-07, "loss": 0.1493, "step": 9830 }, { "epoch": 0.63, "grad_norm": 1.5065633695756446, "learning_rate": 3.2269755647873214e-07, "loss": 0.2123, "step": 9831 }, { "epoch": 0.63, "grad_norm": 14.212562342131438, "learning_rate": 3.226009974685914e-07, "loss": 0.0154, "step": 9832 }, { "epoch": 0.63, "grad_norm": 0.42259660204897154, "learning_rate": 3.225044460266485e-07, "loss": 0.1057, "step": 9833 }, { "epoch": 0.63, "grad_norm": 0.89121376067872, "learning_rate": 3.2240790215702297e-07, "loss": 0.4477, "step": 9834 }, { "epoch": 0.63, "grad_norm": 1.016530243754151, "learning_rate": 3.223113658638331e-07, "loss": 0.0705, "step": 9835 }, { "epoch": 0.63, "grad_norm": 0.19237502739439066, "learning_rate": 3.222148371511977e-07, "loss": 0.1574, "step": 9836 }, { "epoch": 0.63, "grad_norm": 0.9736830476847157, "learning_rate": 3.221183160232348e-07, "loss": 0.2513, "step": 9837 }, { "epoch": 0.63, "grad_norm": 0.5915501474496268, "learning_rate": 3.2202180248406196e-07, "loss": 0.3128, "step": 9838 }, { "epoch": 0.63, "grad_norm": 0.4582903970709344, "learning_rate": 3.21925296537797e-07, "loss": 0.0935, "step": 9839 }, { "epoch": 0.63, "grad_norm": 0.6102361646494264, "learning_rate": 3.218287981885567e-07, "loss": 0.2526, "step": 9840 }, { "epoch": 0.63, "grad_norm": 0.70409353789978, "learning_rate": 3.2173230744045815e-07, "loss": 0.3165, "step": 9841 }, { "epoch": 0.63, "grad_norm": 0.3060056987874527, "learning_rate": 3.216358242976176e-07, "loss": 0.0452, "step": 9842 }, { "epoch": 0.63, "grad_norm": 0.359948758444199, "learning_rate": 3.2153934876415143e-07, "loss": 0.1265, "step": 9843 }, { "epoch": 0.63, "grad_norm": 1.0029068397163248, "learning_rate": 3.214428808441754e-07, "loss": 0.2236, "step": 9844 }, { "epoch": 0.63, "grad_norm": 0.6554839420953738, "learning_rate": 3.2134642054180493e-07, "loss": 0.1272, "step": 9845 }, { "epoch": 0.63, "grad_norm": 1.1511883519595218, "learning_rate": 3.2124996786115524e-07, "loss": 0.1783, "step": 9846 }, { "epoch": 0.63, "grad_norm": 1.3290639556624124, "learning_rate": 3.211535228063415e-07, "loss": 0.3526, "step": 9847 }, { "epoch": 0.63, "grad_norm": 6.436707333221691, "learning_rate": 3.210570853814778e-07, "loss": 0.0636, "step": 9848 }, { "epoch": 0.63, "grad_norm": 0.9722318766730552, "learning_rate": 3.209606555906788e-07, "loss": 0.2851, "step": 9849 }, { "epoch": 0.63, "grad_norm": 0.6854531809469963, "learning_rate": 3.208642334380578e-07, "loss": 0.2056, "step": 9850 }, { "epoch": 0.63, "grad_norm": 2.1512496298708337, "learning_rate": 3.2076781892772904e-07, "loss": 0.1194, "step": 9851 }, { "epoch": 0.63, "grad_norm": 0.9946664661922305, "learning_rate": 3.2067141206380526e-07, "loss": 0.0877, "step": 9852 }, { "epoch": 0.63, "grad_norm": 0.7769647861589055, "learning_rate": 3.2057501285039957e-07, "loss": 0.2582, "step": 9853 }, { "epoch": 0.63, "grad_norm": 0.6634267476529329, "learning_rate": 3.204786212916245e-07, "loss": 0.2364, "step": 9854 }, { "epoch": 0.63, "grad_norm": 0.6357232327354747, "learning_rate": 3.2038223739159225e-07, "loss": 0.1074, "step": 9855 }, { "epoch": 0.63, "grad_norm": 0.6956484779394868, "learning_rate": 3.2028586115441504e-07, "loss": 0.3608, "step": 9856 }, { "epoch": 0.63, "grad_norm": 1.4483626126431408, "learning_rate": 3.2018949258420404e-07, "loss": 0.2857, "step": 9857 }, { "epoch": 0.63, "grad_norm": 3.892691276179684, "learning_rate": 3.2009313168507096e-07, "loss": 0.1045, "step": 9858 }, { "epoch": 0.63, "grad_norm": 5.604541284668023, "learning_rate": 3.1999677846112634e-07, "loss": 0.3529, "step": 9859 }, { "epoch": 0.63, "grad_norm": 0.8117811118884894, "learning_rate": 3.1990043291648116e-07, "loss": 0.2828, "step": 9860 }, { "epoch": 0.63, "grad_norm": 3.8544328729224833, "learning_rate": 3.198040950552454e-07, "loss": 0.2184, "step": 9861 }, { "epoch": 0.63, "grad_norm": 1.107901832492373, "learning_rate": 3.197077648815293e-07, "loss": 0.211, "step": 9862 }, { "epoch": 0.63, "grad_norm": 1.2094327134884784, "learning_rate": 3.196114423994424e-07, "loss": 0.1163, "step": 9863 }, { "epoch": 0.63, "grad_norm": 1.2872753391107936, "learning_rate": 3.19515127613094e-07, "loss": 0.1766, "step": 9864 }, { "epoch": 0.63, "grad_norm": 2.380807332028745, "learning_rate": 3.1941882052659307e-07, "loss": 0.2257, "step": 9865 }, { "epoch": 0.63, "grad_norm": 1.1074461411061158, "learning_rate": 3.1932252114404813e-07, "loss": 0.3668, "step": 9866 }, { "epoch": 0.63, "grad_norm": 0.38756982710411836, "learning_rate": 3.1922622946956787e-07, "loss": 0.1879, "step": 9867 }, { "epoch": 0.63, "grad_norm": 1.1471559271998129, "learning_rate": 3.191299455072598e-07, "loss": 0.1824, "step": 9868 }, { "epoch": 0.63, "grad_norm": 1.467852571762619, "learning_rate": 3.190336692612321e-07, "loss": 0.497, "step": 9869 }, { "epoch": 0.63, "grad_norm": 0.8289119305925776, "learning_rate": 3.1893740073559164e-07, "loss": 0.0471, "step": 9870 }, { "epoch": 0.63, "grad_norm": 0.2894254871999359, "learning_rate": 3.1884113993444576e-07, "loss": 0.1185, "step": 9871 }, { "epoch": 0.63, "grad_norm": 1.2244586613989046, "learning_rate": 3.18744886861901e-07, "loss": 0.3413, "step": 9872 }, { "epoch": 0.63, "grad_norm": 0.7645184562699384, "learning_rate": 3.186486415220638e-07, "loss": 0.3322, "step": 9873 }, { "epoch": 0.63, "grad_norm": 0.7867369180073533, "learning_rate": 3.1855240391903994e-07, "loss": 0.2233, "step": 9874 }, { "epoch": 0.63, "grad_norm": 0.21979088857614315, "learning_rate": 3.1845617405693554e-07, "loss": 0.0017, "step": 9875 }, { "epoch": 0.63, "grad_norm": 4.412127161499392, "learning_rate": 3.1835995193985546e-07, "loss": 0.1026, "step": 9876 }, { "epoch": 0.63, "grad_norm": 1.150974680950936, "learning_rate": 3.1826373757190515e-07, "loss": 0.0228, "step": 9877 }, { "epoch": 0.63, "grad_norm": 0.5825104628199708, "learning_rate": 3.1816753095718914e-07, "loss": 0.2368, "step": 9878 }, { "epoch": 0.63, "grad_norm": 4.99293123207642, "learning_rate": 3.1807133209981167e-07, "loss": 0.1077, "step": 9879 }, { "epoch": 0.63, "grad_norm": 0.9097358517491003, "learning_rate": 3.17975141003877e-07, "loss": 0.3015, "step": 9880 }, { "epoch": 0.63, "grad_norm": 1.226154462304313, "learning_rate": 3.1787895767348856e-07, "loss": 0.1915, "step": 9881 }, { "epoch": 0.63, "grad_norm": 2.0056129689625313, "learning_rate": 3.177827821127501e-07, "loss": 0.1646, "step": 9882 }, { "epoch": 0.63, "grad_norm": 1.5020985357185783, "learning_rate": 3.1768661432576425e-07, "loss": 0.4339, "step": 9883 }, { "epoch": 0.63, "grad_norm": 10.327293405068815, "learning_rate": 3.175904543166342e-07, "loss": 0.238, "step": 9884 }, { "epoch": 0.63, "grad_norm": 0.45995849192979227, "learning_rate": 3.174943020894618e-07, "loss": 0.2684, "step": 9885 }, { "epoch": 0.63, "grad_norm": 0.683662106369194, "learning_rate": 3.173981576483495e-07, "loss": 0.3656, "step": 9886 }, { "epoch": 0.63, "grad_norm": 3.639972937803985, "learning_rate": 3.173020209973988e-07, "loss": 0.0426, "step": 9887 }, { "epoch": 0.63, "grad_norm": 0.6160806936059422, "learning_rate": 3.172058921407112e-07, "loss": 0.1846, "step": 9888 }, { "epoch": 0.63, "grad_norm": 0.7642852608321751, "learning_rate": 3.1710977108238764e-07, "loss": 0.2798, "step": 9889 }, { "epoch": 0.63, "grad_norm": 10.56581276208378, "learning_rate": 3.170136578265289e-07, "loss": 0.1302, "step": 9890 }, { "epoch": 0.63, "grad_norm": 15.594260556517076, "learning_rate": 3.1691755237723536e-07, "loss": 0.1422, "step": 9891 }, { "epoch": 0.63, "grad_norm": 1.893153347593312, "learning_rate": 3.168214547386072e-07, "loss": 0.2696, "step": 9892 }, { "epoch": 0.63, "grad_norm": 1.4365150509337516, "learning_rate": 3.1672536491474404e-07, "loss": 0.1797, "step": 9893 }, { "epoch": 0.63, "grad_norm": 5.122466333845425, "learning_rate": 3.1662928290974514e-07, "loss": 0.0621, "step": 9894 }, { "epoch": 0.63, "grad_norm": 15.115066480424131, "learning_rate": 3.165332087277097e-07, "loss": 0.1453, "step": 9895 }, { "epoch": 0.63, "grad_norm": 4.808008933131503, "learning_rate": 3.164371423727362e-07, "loss": 0.2404, "step": 9896 }, { "epoch": 0.63, "grad_norm": 0.4273001064281147, "learning_rate": 3.163410838489234e-07, "loss": 0.1283, "step": 9897 }, { "epoch": 0.63, "grad_norm": 1.0126728978340207, "learning_rate": 3.162450331603691e-07, "loss": 0.2811, "step": 9898 }, { "epoch": 0.63, "grad_norm": 0.6151956085625986, "learning_rate": 3.1614899031117113e-07, "loss": 0.2404, "step": 9899 }, { "epoch": 0.63, "grad_norm": 1.918530267726266, "learning_rate": 3.160529553054267e-07, "loss": 0.1715, "step": 9900 }, { "epoch": 0.63, "grad_norm": 0.7940771940834439, "learning_rate": 3.159569281472332e-07, "loss": 0.0646, "step": 9901 }, { "epoch": 0.63, "grad_norm": 8.132041643094976, "learning_rate": 3.1586090884068685e-07, "loss": 0.1998, "step": 9902 }, { "epoch": 0.63, "grad_norm": 0.09350245611442531, "learning_rate": 3.1576489738988455e-07, "loss": 0.0017, "step": 9903 }, { "epoch": 0.63, "grad_norm": 2.7649371838661816, "learning_rate": 3.1566889379892193e-07, "loss": 0.0868, "step": 9904 }, { "epoch": 0.63, "grad_norm": 1.9467610536431719, "learning_rate": 3.15572898071895e-07, "loss": 0.2727, "step": 9905 }, { "epoch": 0.63, "grad_norm": 1.8475691244522232, "learning_rate": 3.1547691021289885e-07, "loss": 0.1743, "step": 9906 }, { "epoch": 0.63, "grad_norm": 1.3443031278498072, "learning_rate": 3.1538093022602857e-07, "loss": 0.0531, "step": 9907 }, { "epoch": 0.63, "grad_norm": 0.4824038418934789, "learning_rate": 3.1528495811537916e-07, "loss": 0.251, "step": 9908 }, { "epoch": 0.63, "grad_norm": 0.9748168955001419, "learning_rate": 3.151889938850445e-07, "loss": 0.074, "step": 9909 }, { "epoch": 0.63, "grad_norm": 1.7990677806453492, "learning_rate": 3.1509303753911916e-07, "loss": 0.1646, "step": 9910 }, { "epoch": 0.63, "grad_norm": 2.059348594169725, "learning_rate": 3.149970890816963e-07, "loss": 0.2524, "step": 9911 }, { "epoch": 0.63, "grad_norm": 1.3788194423194713, "learning_rate": 3.1490114851686975e-07, "loss": 0.2684, "step": 9912 }, { "epoch": 0.63, "grad_norm": 1.7774822863336062, "learning_rate": 3.148052158487321e-07, "loss": 0.0269, "step": 9913 }, { "epoch": 0.63, "grad_norm": 1.3721846056736207, "learning_rate": 3.147092910813764e-07, "loss": 0.0962, "step": 9914 }, { "epoch": 0.63, "grad_norm": 1.9218045254917826, "learning_rate": 3.146133742188946e-07, "loss": 0.1785, "step": 9915 }, { "epoch": 0.63, "grad_norm": 0.2376069877986604, "learning_rate": 3.145174652653791e-07, "loss": 0.0841, "step": 9916 }, { "epoch": 0.63, "grad_norm": 1.5314508960948032, "learning_rate": 3.144215642249213e-07, "loss": 0.129, "step": 9917 }, { "epoch": 0.63, "grad_norm": 0.6824153588707195, "learning_rate": 3.143256711016128e-07, "loss": 0.2261, "step": 9918 }, { "epoch": 0.63, "grad_norm": 1.1063518275775739, "learning_rate": 3.1422978589954443e-07, "loss": 0.1102, "step": 9919 }, { "epoch": 0.63, "grad_norm": 0.5319474605010603, "learning_rate": 3.1413390862280655e-07, "loss": 0.2272, "step": 9920 }, { "epoch": 0.63, "grad_norm": 1.5168683468987119, "learning_rate": 3.1403803927549006e-07, "loss": 0.595, "step": 9921 }, { "epoch": 0.63, "grad_norm": 1.0728593540967328, "learning_rate": 3.139421778616844e-07, "loss": 0.1412, "step": 9922 }, { "epoch": 0.63, "grad_norm": 1.1104532322306133, "learning_rate": 3.1384632438547964e-07, "loss": 0.2981, "step": 9923 }, { "epoch": 0.63, "grad_norm": 0.8048437881054166, "learning_rate": 3.137504788509648e-07, "loss": 0.2572, "step": 9924 }, { "epoch": 0.63, "grad_norm": 0.525706281740437, "learning_rate": 3.1365464126222897e-07, "loss": 0.1974, "step": 9925 }, { "epoch": 0.63, "grad_norm": 0.3086253821196317, "learning_rate": 3.135588116233607e-07, "loss": 0.0059, "step": 9926 }, { "epoch": 0.63, "grad_norm": 0.3348227131092952, "learning_rate": 3.1346298993844844e-07, "loss": 0.2634, "step": 9927 }, { "epoch": 0.63, "grad_norm": 1.4285186408888528, "learning_rate": 3.1336717621157986e-07, "loss": 0.0997, "step": 9928 }, { "epoch": 0.63, "grad_norm": 0.7934943600101988, "learning_rate": 3.132713704468429e-07, "loss": 0.2026, "step": 9929 }, { "epoch": 0.63, "grad_norm": 4.100488292872332, "learning_rate": 3.131755726483245e-07, "loss": 0.1602, "step": 9930 }, { "epoch": 0.63, "grad_norm": 0.5042898575656839, "learning_rate": 3.1307978282011186e-07, "loss": 0.089, "step": 9931 }, { "epoch": 0.63, "grad_norm": 1.345181178870036, "learning_rate": 3.129840009662913e-07, "loss": 0.1918, "step": 9932 }, { "epoch": 0.63, "grad_norm": 1.9917433650929446, "learning_rate": 3.1288822709094933e-07, "loss": 0.1131, "step": 9933 }, { "epoch": 0.63, "grad_norm": 0.45394084261043055, "learning_rate": 3.1279246119817174e-07, "loss": 0.2909, "step": 9934 }, { "epoch": 0.63, "grad_norm": 0.5376000378997712, "learning_rate": 3.1269670329204393e-07, "loss": 0.1752, "step": 9935 }, { "epoch": 0.63, "grad_norm": 1.782579958651778, "learning_rate": 3.126009533766515e-07, "loss": 0.174, "step": 9936 }, { "epoch": 0.63, "grad_norm": 3.1027129316539956, "learning_rate": 3.1250521145607894e-07, "loss": 0.1202, "step": 9937 }, { "epoch": 0.63, "grad_norm": 1.6921228097307655, "learning_rate": 3.124094775344112e-07, "loss": 0.216, "step": 9938 }, { "epoch": 0.63, "grad_norm": 0.8306429099856422, "learning_rate": 3.1231375161573194e-07, "loss": 0.1582, "step": 9939 }, { "epoch": 0.63, "grad_norm": 1.0218445145187087, "learning_rate": 3.122180337041256e-07, "loss": 0.0916, "step": 9940 }, { "epoch": 0.63, "grad_norm": 1.3578911721646219, "learning_rate": 3.121223238036752e-07, "loss": 0.3942, "step": 9941 }, { "epoch": 0.63, "grad_norm": 6.109048368111545, "learning_rate": 3.120266219184643e-07, "loss": 0.1368, "step": 9942 }, { "epoch": 0.63, "grad_norm": 0.5985847161275715, "learning_rate": 3.1193092805257554e-07, "loss": 0.22, "step": 9943 }, { "epoch": 0.63, "grad_norm": 0.30522493991220756, "learning_rate": 3.118352422100915e-07, "loss": 0.0889, "step": 9944 }, { "epoch": 0.63, "grad_norm": 0.5032988524392258, "learning_rate": 3.117395643950941e-07, "loss": 0.1759, "step": 9945 }, { "epoch": 0.63, "grad_norm": 4.1292612129831685, "learning_rate": 3.116438946116656e-07, "loss": 0.294, "step": 9946 }, { "epoch": 0.63, "grad_norm": 0.48900880580396, "learning_rate": 3.115482328638872e-07, "loss": 0.0879, "step": 9947 }, { "epoch": 0.63, "grad_norm": 8.261090140424646, "learning_rate": 3.1145257915583975e-07, "loss": 0.1679, "step": 9948 }, { "epoch": 0.63, "grad_norm": 1.0631672282215923, "learning_rate": 3.1135693349160463e-07, "loss": 0.1693, "step": 9949 }, { "epoch": 0.63, "grad_norm": 0.1328929962914109, "learning_rate": 3.112612958752617e-07, "loss": 0.0741, "step": 9950 }, { "epoch": 0.63, "grad_norm": 0.47584586979102117, "learning_rate": 3.111656663108914e-07, "loss": 0.0907, "step": 9951 }, { "epoch": 0.63, "grad_norm": 1.8814139796048461, "learning_rate": 3.110700448025732e-07, "loss": 0.1669, "step": 9952 }, { "epoch": 0.63, "grad_norm": 4.525102249279778, "learning_rate": 3.1097443135438696e-07, "loss": 0.143, "step": 9953 }, { "epoch": 0.63, "grad_norm": 3.360034123319944, "learning_rate": 3.1087882597041125e-07, "loss": 0.2441, "step": 9954 }, { "epoch": 0.63, "grad_norm": 0.8884149877409633, "learning_rate": 3.1078322865472517e-07, "loss": 0.2257, "step": 9955 }, { "epoch": 0.63, "grad_norm": 1.4119783766819232, "learning_rate": 3.1068763941140676e-07, "loss": 0.456, "step": 9956 }, { "epoch": 0.63, "grad_norm": 3.5584195161214445, "learning_rate": 3.105920582445344e-07, "loss": 0.2614, "step": 9957 }, { "epoch": 0.64, "grad_norm": 1.6627540683760709, "learning_rate": 3.104964851581855e-07, "loss": 0.2102, "step": 9958 }, { "epoch": 0.64, "grad_norm": 0.623198031090522, "learning_rate": 3.104009201564376e-07, "loss": 0.301, "step": 9959 }, { "epoch": 0.64, "grad_norm": 5.718368521814724, "learning_rate": 3.103053632433674e-07, "loss": 0.0962, "step": 9960 }, { "epoch": 0.64, "grad_norm": 0.4246775664980575, "learning_rate": 3.102098144230518e-07, "loss": 0.1491, "step": 9961 }, { "epoch": 0.64, "grad_norm": 0.3173459440798687, "learning_rate": 3.101142736995672e-07, "loss": 0.0318, "step": 9962 }, { "epoch": 0.64, "grad_norm": 4.97633954457249, "learning_rate": 3.1001874107698916e-07, "loss": 0.0127, "step": 9963 }, { "epoch": 0.64, "grad_norm": 1.2916660812561684, "learning_rate": 3.0992321655939377e-07, "loss": 0.2368, "step": 9964 }, { "epoch": 0.64, "grad_norm": 0.851010525009808, "learning_rate": 3.0982770015085586e-07, "loss": 0.112, "step": 9965 }, { "epoch": 0.64, "grad_norm": 11.920113350536923, "learning_rate": 3.097321918554507e-07, "loss": 0.2247, "step": 9966 }, { "epoch": 0.64, "grad_norm": 0.6563612146386266, "learning_rate": 3.096366916772526e-07, "loss": 0.1539, "step": 9967 }, { "epoch": 0.64, "grad_norm": 0.80807293742905, "learning_rate": 3.09541199620336e-07, "loss": 0.0922, "step": 9968 }, { "epoch": 0.64, "grad_norm": 0.29216820415518563, "learning_rate": 3.0944571568877466e-07, "loss": 0.2328, "step": 9969 }, { "epoch": 0.64, "grad_norm": 0.7729377265353073, "learning_rate": 3.093502398866422e-07, "loss": 0.0259, "step": 9970 }, { "epoch": 0.64, "grad_norm": 1.0381622148450527, "learning_rate": 3.0925477221801156e-07, "loss": 0.4978, "step": 9971 }, { "epoch": 0.64, "grad_norm": 0.22838959937936235, "learning_rate": 3.0915931268695604e-07, "loss": 0.0997, "step": 9972 }, { "epoch": 0.64, "grad_norm": 4.900846762164515, "learning_rate": 3.090638612975477e-07, "loss": 0.1945, "step": 9973 }, { "epoch": 0.64, "grad_norm": 0.5443537803913264, "learning_rate": 3.089684180538591e-07, "loss": 0.1917, "step": 9974 }, { "epoch": 0.64, "grad_norm": 1.2006464030075767, "learning_rate": 3.0887298295996177e-07, "loss": 0.2563, "step": 9975 }, { "epoch": 0.64, "grad_norm": 0.7391239363071468, "learning_rate": 3.0877755601992694e-07, "loss": 0.1639, "step": 9976 }, { "epoch": 0.64, "grad_norm": 1.6390049803932814, "learning_rate": 3.0868213723782616e-07, "loss": 0.0482, "step": 9977 }, { "epoch": 0.64, "grad_norm": 0.4805558010039064, "learning_rate": 3.0858672661772987e-07, "loss": 0.3875, "step": 9978 }, { "epoch": 0.64, "grad_norm": 0.9976928758079943, "learning_rate": 3.0849132416370883e-07, "loss": 0.1156, "step": 9979 }, { "epoch": 0.64, "grad_norm": 0.45979684673637516, "learning_rate": 3.0839592987983264e-07, "loss": 0.1015, "step": 9980 }, { "epoch": 0.64, "grad_norm": 0.4793416610666731, "learning_rate": 3.0830054377017147e-07, "loss": 0.1903, "step": 9981 }, { "epoch": 0.64, "grad_norm": 0.5359966058214101, "learning_rate": 3.082051658387943e-07, "loss": 0.087, "step": 9982 }, { "epoch": 0.64, "grad_norm": 3.385229362388537, "learning_rate": 3.081097960897705e-07, "loss": 0.27, "step": 9983 }, { "epoch": 0.64, "grad_norm": 0.6884951815819341, "learning_rate": 3.0801443452716827e-07, "loss": 0.0878, "step": 9984 }, { "epoch": 0.64, "grad_norm": 1.112456931004475, "learning_rate": 3.079190811550565e-07, "loss": 0.4376, "step": 9985 }, { "epoch": 0.64, "grad_norm": 3.4666678772114836, "learning_rate": 3.078237359775026e-07, "loss": 0.2145, "step": 9986 }, { "epoch": 0.64, "grad_norm": 0.46319190130088983, "learning_rate": 3.0772839899857463e-07, "loss": 0.1443, "step": 9987 }, { "epoch": 0.64, "grad_norm": 0.9449454412277086, "learning_rate": 3.0763307022233967e-07, "loss": 0.2502, "step": 9988 }, { "epoch": 0.64, "grad_norm": 0.5370357256102969, "learning_rate": 3.075377496528645e-07, "loss": 0.2487, "step": 9989 }, { "epoch": 0.64, "grad_norm": 1.35345407023433, "learning_rate": 3.07442437294216e-07, "loss": 0.0602, "step": 9990 }, { "epoch": 0.64, "grad_norm": 0.7936235863106439, "learning_rate": 3.0734713315046004e-07, "loss": 0.3272, "step": 9991 }, { "epoch": 0.64, "grad_norm": 0.4594258122530349, "learning_rate": 3.0725183722566286e-07, "loss": 0.0157, "step": 9992 }, { "epoch": 0.64, "grad_norm": 0.9367334480193928, "learning_rate": 3.0715654952388954e-07, "loss": 0.242, "step": 9993 }, { "epoch": 0.64, "grad_norm": 9.361592574750528, "learning_rate": 3.0706127004920557e-07, "loss": 0.2584, "step": 9994 }, { "epoch": 0.64, "grad_norm": 0.7880141354801808, "learning_rate": 3.0696599880567576e-07, "loss": 0.2367, "step": 9995 }, { "epoch": 0.64, "grad_norm": 0.7058275558300987, "learning_rate": 3.0687073579736443e-07, "loss": 0.4972, "step": 9996 }, { "epoch": 0.64, "grad_norm": 0.6251495941489278, "learning_rate": 3.067754810283356e-07, "loss": 0.2803, "step": 9997 }, { "epoch": 0.64, "grad_norm": 0.5206111780187221, "learning_rate": 3.0668023450265343e-07, "loss": 0.075, "step": 9998 }, { "epoch": 0.64, "grad_norm": 0.4054752988384419, "learning_rate": 3.0658499622438093e-07, "loss": 0.137, "step": 9999 }, { "epoch": 0.64, "grad_norm": 0.7106065029504237, "learning_rate": 3.0648976619758143e-07, "loss": 0.1458, "step": 10000 }, { "epoch": 0.64, "grad_norm": 3.6995757523269095, "learning_rate": 3.0639454442631735e-07, "loss": 0.0273, "step": 10001 }, { "epoch": 0.64, "grad_norm": 0.5310302256045529, "learning_rate": 3.0629933091465136e-07, "loss": 0.1255, "step": 10002 }, { "epoch": 0.64, "grad_norm": 0.745436216301801, "learning_rate": 3.062041256666452e-07, "loss": 0.2902, "step": 10003 }, { "epoch": 0.64, "grad_norm": 1.6331744260702172, "learning_rate": 3.0610892868636076e-07, "loss": 0.3064, "step": 10004 }, { "epoch": 0.64, "grad_norm": 2.9345158770750337, "learning_rate": 3.06013739977859e-07, "loss": 0.1613, "step": 10005 }, { "epoch": 0.64, "grad_norm": 1.3799141040742937, "learning_rate": 3.0591855954520106e-07, "loss": 0.2232, "step": 10006 }, { "epoch": 0.64, "grad_norm": 1.4388784706078745, "learning_rate": 3.0582338739244765e-07, "loss": 0.0198, "step": 10007 }, { "epoch": 0.64, "grad_norm": 1.048807729296082, "learning_rate": 3.0572822352365873e-07, "loss": 0.4561, "step": 10008 }, { "epoch": 0.64, "grad_norm": 0.7215751437856766, "learning_rate": 3.0563306794289455e-07, "loss": 0.2908, "step": 10009 }, { "epoch": 0.64, "grad_norm": 0.5288074995535735, "learning_rate": 3.055379206542142e-07, "loss": 0.0895, "step": 10010 }, { "epoch": 0.64, "grad_norm": 0.420119204931513, "learning_rate": 3.0544278166167725e-07, "loss": 0.2203, "step": 10011 }, { "epoch": 0.64, "grad_norm": 0.9192937025417116, "learning_rate": 3.0534765096934214e-07, "loss": 0.324, "step": 10012 }, { "epoch": 0.64, "grad_norm": 0.3216767944270263, "learning_rate": 3.0525252858126765e-07, "loss": 0.1034, "step": 10013 }, { "epoch": 0.64, "grad_norm": 0.8025912749416866, "learning_rate": 3.051574145015118e-07, "loss": 0.419, "step": 10014 }, { "epoch": 0.64, "grad_norm": 0.38860547799048384, "learning_rate": 3.050623087341323e-07, "loss": 0.2668, "step": 10015 }, { "epoch": 0.64, "grad_norm": 0.30647173013463946, "learning_rate": 3.049672112831867e-07, "loss": 0.2673, "step": 10016 }, { "epoch": 0.64, "grad_norm": 1.1634362172846398, "learning_rate": 3.048721221527317e-07, "loss": 0.1805, "step": 10017 }, { "epoch": 0.64, "grad_norm": 1.2710136400778729, "learning_rate": 3.047770413468245e-07, "loss": 0.1133, "step": 10018 }, { "epoch": 0.64, "grad_norm": 0.8712449299084689, "learning_rate": 3.046819688695209e-07, "loss": 0.3285, "step": 10019 }, { "epoch": 0.64, "grad_norm": 2.6709309625179394, "learning_rate": 3.0458690472487735e-07, "loss": 0.1468, "step": 10020 }, { "epoch": 0.64, "grad_norm": 2.8985259676730797, "learning_rate": 3.0449184891694914e-07, "loss": 0.1437, "step": 10021 }, { "epoch": 0.64, "grad_norm": 2.161120386247777, "learning_rate": 3.0439680144979174e-07, "loss": 0.4697, "step": 10022 }, { "epoch": 0.64, "grad_norm": 1.5839040079934381, "learning_rate": 3.0430176232745984e-07, "loss": 0.1546, "step": 10023 }, { "epoch": 0.64, "grad_norm": 0.46234028993005594, "learning_rate": 3.0420673155400846e-07, "loss": 0.0834, "step": 10024 }, { "epoch": 0.64, "grad_norm": 0.57650839058358, "learning_rate": 3.041117091334913e-07, "loss": 0.3336, "step": 10025 }, { "epoch": 0.64, "grad_norm": 1.0261905910225066, "learning_rate": 3.040166950699625e-07, "loss": 0.3451, "step": 10026 }, { "epoch": 0.64, "grad_norm": 0.9864322855566573, "learning_rate": 3.039216893674753e-07, "loss": 0.2225, "step": 10027 }, { "epoch": 0.64, "grad_norm": 1.4240512723345102, "learning_rate": 3.038266920300833e-07, "loss": 0.2028, "step": 10028 }, { "epoch": 0.64, "grad_norm": 0.4419891423604961, "learning_rate": 3.037317030618388e-07, "loss": 0.0258, "step": 10029 }, { "epoch": 0.64, "grad_norm": 1.1329131507572303, "learning_rate": 3.036367224667944e-07, "loss": 0.3439, "step": 10030 }, { "epoch": 0.64, "grad_norm": 1.5758442686255685, "learning_rate": 3.0354175024900214e-07, "loss": 0.1377, "step": 10031 }, { "epoch": 0.64, "grad_norm": 1.2849863684374576, "learning_rate": 3.0344678641251364e-07, "loss": 0.4759, "step": 10032 }, { "epoch": 0.64, "grad_norm": 1.2759617670586145, "learning_rate": 3.0335183096138064e-07, "loss": 0.351, "step": 10033 }, { "epoch": 0.64, "grad_norm": 0.7502190509325263, "learning_rate": 3.0325688389965355e-07, "loss": 0.3014, "step": 10034 }, { "epoch": 0.64, "grad_norm": 1.2910158057038357, "learning_rate": 3.0316194523138355e-07, "loss": 0.2676, "step": 10035 }, { "epoch": 0.64, "grad_norm": 0.9492687373755502, "learning_rate": 3.030670149606205e-07, "loss": 0.2831, "step": 10036 }, { "epoch": 0.64, "grad_norm": 0.22668594642335435, "learning_rate": 3.029720930914146e-07, "loss": 0.0998, "step": 10037 }, { "epoch": 0.64, "grad_norm": 0.43713275265812845, "learning_rate": 3.0287717962781506e-07, "loss": 0.3113, "step": 10038 }, { "epoch": 0.64, "grad_norm": 1.089456274733409, "learning_rate": 3.0278227457387144e-07, "loss": 0.1938, "step": 10039 }, { "epoch": 0.64, "grad_norm": 0.5208155172137892, "learning_rate": 3.0268737793363246e-07, "loss": 0.1167, "step": 10040 }, { "epoch": 0.64, "grad_norm": 0.9715374167494162, "learning_rate": 3.025924897111466e-07, "loss": 0.2882, "step": 10041 }, { "epoch": 0.64, "grad_norm": 0.9099912635361547, "learning_rate": 3.024976099104618e-07, "loss": 0.583, "step": 10042 }, { "epoch": 0.64, "grad_norm": 0.437331688200397, "learning_rate": 3.0240273853562625e-07, "loss": 0.1859, "step": 10043 }, { "epoch": 0.64, "grad_norm": 1.2434999435163299, "learning_rate": 3.023078755906871e-07, "loss": 0.2225, "step": 10044 }, { "epoch": 0.64, "grad_norm": 1.2345209468975693, "learning_rate": 3.0221302107969114e-07, "loss": 0.39, "step": 10045 }, { "epoch": 0.64, "grad_norm": 0.37678068180372903, "learning_rate": 3.021181750066856e-07, "loss": 0.3625, "step": 10046 }, { "epoch": 0.64, "grad_norm": 0.7321066389547666, "learning_rate": 3.0202333737571616e-07, "loss": 0.1433, "step": 10047 }, { "epoch": 0.64, "grad_norm": 0.3389152957541529, "learning_rate": 3.0192850819082937e-07, "loss": 0.2074, "step": 10048 }, { "epoch": 0.64, "grad_norm": 0.9342221122330095, "learning_rate": 3.018336874560705e-07, "loss": 0.2716, "step": 10049 }, { "epoch": 0.64, "grad_norm": 2.2871994912978746, "learning_rate": 3.017388751754849e-07, "loss": 0.4026, "step": 10050 }, { "epoch": 0.64, "grad_norm": 0.8573237685064583, "learning_rate": 3.016440713531174e-07, "loss": 0.3745, "step": 10051 }, { "epoch": 0.64, "grad_norm": 0.9915866499862802, "learning_rate": 3.0154927599301274e-07, "loss": 0.5495, "step": 10052 }, { "epoch": 0.64, "grad_norm": 0.1618492345434936, "learning_rate": 3.014544890992147e-07, "loss": 0.0037, "step": 10053 }, { "epoch": 0.64, "grad_norm": 0.23930629093503197, "learning_rate": 3.013597106757674e-07, "loss": 0.0881, "step": 10054 }, { "epoch": 0.64, "grad_norm": 2.7039139070001506, "learning_rate": 3.0126494072671405e-07, "loss": 0.0285, "step": 10055 }, { "epoch": 0.64, "grad_norm": 2.682928743757131, "learning_rate": 3.01170179256098e-07, "loss": 0.1217, "step": 10056 }, { "epoch": 0.64, "grad_norm": 1.1586214481233266, "learning_rate": 3.0107542626796165e-07, "loss": 0.2061, "step": 10057 }, { "epoch": 0.64, "grad_norm": 0.33420648078880516, "learning_rate": 3.009806817663475e-07, "loss": 0.0271, "step": 10058 }, { "epoch": 0.64, "grad_norm": 0.3803203391794372, "learning_rate": 3.0088594575529774e-07, "loss": 0.0549, "step": 10059 }, { "epoch": 0.64, "grad_norm": 0.44753180417879, "learning_rate": 3.0079121823885356e-07, "loss": 0.0903, "step": 10060 }, { "epoch": 0.64, "grad_norm": 0.38166879068954435, "learning_rate": 3.006964992210567e-07, "loss": 0.0024, "step": 10061 }, { "epoch": 0.64, "grad_norm": 0.17810504323650683, "learning_rate": 3.006017887059476e-07, "loss": 0.1087, "step": 10062 }, { "epoch": 0.64, "grad_norm": 1.1141266485442696, "learning_rate": 3.005070866975673e-07, "loss": 0.3222, "step": 10063 }, { "epoch": 0.64, "grad_norm": 0.9520269321580211, "learning_rate": 3.0041239319995544e-07, "loss": 0.2189, "step": 10064 }, { "epoch": 0.64, "grad_norm": 0.976190605386346, "learning_rate": 3.003177082171523e-07, "loss": 0.3072, "step": 10065 }, { "epoch": 0.64, "grad_norm": 1.053057840162801, "learning_rate": 3.0022303175319695e-07, "loss": 0.2027, "step": 10066 }, { "epoch": 0.64, "grad_norm": 0.5750296983297821, "learning_rate": 3.001283638121288e-07, "loss": 0.0482, "step": 10067 }, { "epoch": 0.64, "grad_norm": 2.2917914081263246, "learning_rate": 3.000337043979864e-07, "loss": 0.2143, "step": 10068 }, { "epoch": 0.64, "grad_norm": 2.191854693995644, "learning_rate": 2.9993905351480823e-07, "loss": 0.1763, "step": 10069 }, { "epoch": 0.64, "grad_norm": 3.7433518076973855, "learning_rate": 2.9984441116663206e-07, "loss": 0.2507, "step": 10070 }, { "epoch": 0.64, "grad_norm": 1.4834550371552282, "learning_rate": 2.997497773574959e-07, "loss": 0.3234, "step": 10071 }, { "epoch": 0.64, "grad_norm": 11.049990209930852, "learning_rate": 2.9965515209143674e-07, "loss": 0.2025, "step": 10072 }, { "epoch": 0.64, "grad_norm": 1.6302614591140923, "learning_rate": 2.9956053537249137e-07, "loss": 0.3305, "step": 10073 }, { "epoch": 0.64, "grad_norm": 0.8719787278468222, "learning_rate": 2.994659272046966e-07, "loss": 0.1301, "step": 10074 }, { "epoch": 0.64, "grad_norm": 1.6982705460270011, "learning_rate": 2.993713275920885e-07, "loss": 0.1801, "step": 10075 }, { "epoch": 0.64, "grad_norm": 2.6275087308085543, "learning_rate": 2.992767365387029e-07, "loss": 0.0436, "step": 10076 }, { "epoch": 0.64, "grad_norm": 0.2712916019561169, "learning_rate": 2.9918215404857505e-07, "loss": 0.0128, "step": 10077 }, { "epoch": 0.64, "grad_norm": 1.1491099588932046, "learning_rate": 2.990875801257404e-07, "loss": 0.1069, "step": 10078 }, { "epoch": 0.64, "grad_norm": 0.7451719885124801, "learning_rate": 2.9899301477423336e-07, "loss": 0.2882, "step": 10079 }, { "epoch": 0.64, "grad_norm": 1.4185935208862142, "learning_rate": 2.9889845799808854e-07, "loss": 0.1663, "step": 10080 }, { "epoch": 0.64, "grad_norm": 0.9512447606296824, "learning_rate": 2.9880390980133954e-07, "loss": 0.22, "step": 10081 }, { "epoch": 0.64, "grad_norm": 0.7203886033678134, "learning_rate": 2.9870937018802045e-07, "loss": 0.1875, "step": 10082 }, { "epoch": 0.64, "grad_norm": 1.80985039946878, "learning_rate": 2.98614839162164e-07, "loss": 0.3262, "step": 10083 }, { "epoch": 0.64, "grad_norm": 0.3829490125097748, "learning_rate": 2.985203167278035e-07, "loss": 0.1044, "step": 10084 }, { "epoch": 0.64, "grad_norm": 2.6240450139913647, "learning_rate": 2.984258028889715e-07, "loss": 0.0988, "step": 10085 }, { "epoch": 0.64, "grad_norm": 7.005710200407289, "learning_rate": 2.983312976496996e-07, "loss": 0.3275, "step": 10086 }, { "epoch": 0.64, "grad_norm": 0.9079486894107951, "learning_rate": 2.982368010140203e-07, "loss": 0.0682, "step": 10087 }, { "epoch": 0.64, "grad_norm": 0.9466190574800312, "learning_rate": 2.981423129859643e-07, "loss": 0.385, "step": 10088 }, { "epoch": 0.64, "grad_norm": 0.7014769328021367, "learning_rate": 2.980478335695633e-07, "loss": 0.1394, "step": 10089 }, { "epoch": 0.64, "grad_norm": 1.0209469875880932, "learning_rate": 2.9795336276884753e-07, "loss": 0.2723, "step": 10090 }, { "epoch": 0.64, "grad_norm": 0.6572692611290462, "learning_rate": 2.9785890058784756e-07, "loss": 0.1291, "step": 10091 }, { "epoch": 0.64, "grad_norm": 0.7677575212627107, "learning_rate": 2.977644470305931e-07, "loss": 0.1856, "step": 10092 }, { "epoch": 0.64, "grad_norm": 4.451153090437871, "learning_rate": 2.9767000210111403e-07, "loss": 0.2387, "step": 10093 }, { "epoch": 0.64, "grad_norm": 0.898565856327848, "learning_rate": 2.9757556580343923e-07, "loss": 0.3278, "step": 10094 }, { "epoch": 0.64, "grad_norm": 0.8480122659879772, "learning_rate": 2.9748113814159795e-07, "loss": 0.2639, "step": 10095 }, { "epoch": 0.64, "grad_norm": 0.3145698739956969, "learning_rate": 2.9738671911961826e-07, "loss": 0.0033, "step": 10096 }, { "epoch": 0.64, "grad_norm": 1.0598471603856254, "learning_rate": 2.9729230874152863e-07, "loss": 0.0895, "step": 10097 }, { "epoch": 0.64, "grad_norm": 3.3706323597084116, "learning_rate": 2.9719790701135656e-07, "loss": 0.0065, "step": 10098 }, { "epoch": 0.64, "grad_norm": 14.908254250704342, "learning_rate": 2.9710351393312926e-07, "loss": 0.2405, "step": 10099 }, { "epoch": 0.64, "grad_norm": 0.7518823938104033, "learning_rate": 2.970091295108741e-07, "loss": 0.2358, "step": 10100 }, { "epoch": 0.64, "grad_norm": 0.7617448569859814, "learning_rate": 2.9691475374861747e-07, "loss": 0.4019, "step": 10101 }, { "epoch": 0.64, "grad_norm": 0.40944923641609, "learning_rate": 2.968203866503857e-07, "loss": 0.1179, "step": 10102 }, { "epoch": 0.64, "grad_norm": 0.5376033138473534, "learning_rate": 2.967260282202046e-07, "loss": 0.1083, "step": 10103 }, { "epoch": 0.64, "grad_norm": 0.6607908824701466, "learning_rate": 2.9663167846209996e-07, "loss": 0.1846, "step": 10104 }, { "epoch": 0.64, "grad_norm": 0.5919673760987662, "learning_rate": 2.9653733738009657e-07, "loss": 0.1148, "step": 10105 }, { "epoch": 0.64, "grad_norm": 1.1449331951659232, "learning_rate": 2.964430049782195e-07, "loss": 0.2059, "step": 10106 }, { "epoch": 0.64, "grad_norm": 9.113220674165706, "learning_rate": 2.9634868126049287e-07, "loss": 0.2069, "step": 10107 }, { "epoch": 0.64, "grad_norm": 8.432933815515144, "learning_rate": 2.9625436623094113e-07, "loss": 0.0945, "step": 10108 }, { "epoch": 0.64, "grad_norm": 1.412151247822407, "learning_rate": 2.961600598935875e-07, "loss": 0.1954, "step": 10109 }, { "epoch": 0.64, "grad_norm": 1.692059780788752, "learning_rate": 2.960657622524556e-07, "loss": 0.2773, "step": 10110 }, { "epoch": 0.64, "grad_norm": 1.4465622939612175, "learning_rate": 2.959714733115681e-07, "loss": 0.1709, "step": 10111 }, { "epoch": 0.64, "grad_norm": 1.035136004114922, "learning_rate": 2.9587719307494787e-07, "loss": 0.2606, "step": 10112 }, { "epoch": 0.64, "grad_norm": 0.8765561261070781, "learning_rate": 2.9578292154661696e-07, "loss": 0.2269, "step": 10113 }, { "epoch": 0.64, "grad_norm": 0.4644450559399976, "learning_rate": 2.956886587305969e-07, "loss": 0.1751, "step": 10114 }, { "epoch": 0.65, "grad_norm": 0.8967843536064332, "learning_rate": 2.9559440463090963e-07, "loss": 0.149, "step": 10115 }, { "epoch": 0.65, "grad_norm": 2.7431122814227695, "learning_rate": 2.955001592515758e-07, "loss": 0.1275, "step": 10116 }, { "epoch": 0.65, "grad_norm": 2.1763049705796447, "learning_rate": 2.9540592259661634e-07, "loss": 0.1584, "step": 10117 }, { "epoch": 0.65, "grad_norm": 0.8440220796152866, "learning_rate": 2.9531169467005147e-07, "loss": 0.109, "step": 10118 }, { "epoch": 0.65, "grad_norm": 4.238583783068823, "learning_rate": 2.9521747547590114e-07, "loss": 0.3639, "step": 10119 }, { "epoch": 0.65, "grad_norm": 0.4512614817524705, "learning_rate": 2.9512326501818505e-07, "loss": 0.116, "step": 10120 }, { "epoch": 0.65, "grad_norm": 0.8378187597679194, "learning_rate": 2.9502906330092233e-07, "loss": 0.0733, "step": 10121 }, { "epoch": 0.65, "grad_norm": 3.2417661979508154, "learning_rate": 2.949348703281317e-07, "loss": 0.2177, "step": 10122 }, { "epoch": 0.65, "grad_norm": 0.989954464847215, "learning_rate": 2.948406861038319e-07, "loss": 0.3178, "step": 10123 }, { "epoch": 0.65, "grad_norm": 0.924334248056165, "learning_rate": 2.947465106320407e-07, "loss": 0.197, "step": 10124 }, { "epoch": 0.65, "grad_norm": 1.9169590362837634, "learning_rate": 2.9465234391677614e-07, "loss": 0.2571, "step": 10125 }, { "epoch": 0.65, "grad_norm": 0.612481670372759, "learning_rate": 2.945581859620554e-07, "loss": 0.1421, "step": 10126 }, { "epoch": 0.65, "grad_norm": 0.7364090504313137, "learning_rate": 2.9446403677189523e-07, "loss": 0.1013, "step": 10127 }, { "epoch": 0.65, "grad_norm": 0.755564132742967, "learning_rate": 2.943698963503125e-07, "loss": 0.1658, "step": 10128 }, { "epoch": 0.65, "grad_norm": 1.1002501104171318, "learning_rate": 2.942757647013233e-07, "loss": 0.2312, "step": 10129 }, { "epoch": 0.65, "grad_norm": 3.8455882466354714, "learning_rate": 2.941816418289438e-07, "loss": 0.0073, "step": 10130 }, { "epoch": 0.65, "grad_norm": 0.6175502859939063, "learning_rate": 2.940875277371889e-07, "loss": 0.0127, "step": 10131 }, { "epoch": 0.65, "grad_norm": 1.3002325125406036, "learning_rate": 2.9399342243007423e-07, "loss": 0.2075, "step": 10132 }, { "epoch": 0.65, "grad_norm": 0.6189822446095793, "learning_rate": 2.938993259116141e-07, "loss": 0.1336, "step": 10133 }, { "epoch": 0.65, "grad_norm": 0.7771168105077464, "learning_rate": 2.9380523818582325e-07, "loss": 0.204, "step": 10134 }, { "epoch": 0.65, "grad_norm": 4.246312885201733, "learning_rate": 2.9371115925671517e-07, "loss": 0.2103, "step": 10135 }, { "epoch": 0.65, "grad_norm": 0.9666412012084128, "learning_rate": 2.9361708912830403e-07, "loss": 0.0553, "step": 10136 }, { "epoch": 0.65, "grad_norm": 1.0650677233362091, "learning_rate": 2.935230278046025e-07, "loss": 0.4619, "step": 10137 }, { "epoch": 0.65, "grad_norm": 0.4880920623331828, "learning_rate": 2.934289752896238e-07, "loss": 0.1457, "step": 10138 }, { "epoch": 0.65, "grad_norm": 0.7050596125092834, "learning_rate": 2.9333493158738033e-07, "loss": 0.2861, "step": 10139 }, { "epoch": 0.65, "grad_norm": 0.598019207815051, "learning_rate": 2.9324089670188397e-07, "loss": 0.2154, "step": 10140 }, { "epoch": 0.65, "grad_norm": 1.3899083727109987, "learning_rate": 2.931468706371468e-07, "loss": 0.1716, "step": 10141 }, { "epoch": 0.65, "grad_norm": 0.7443308974448324, "learning_rate": 2.9305285339717964e-07, "loss": 0.2573, "step": 10142 }, { "epoch": 0.65, "grad_norm": 0.6111182814040098, "learning_rate": 2.929588449859941e-07, "loss": 0.0775, "step": 10143 }, { "epoch": 0.65, "grad_norm": 0.6031787105155585, "learning_rate": 2.9286484540760024e-07, "loss": 0.174, "step": 10144 }, { "epoch": 0.65, "grad_norm": 5.059288171325278, "learning_rate": 2.927708546660085e-07, "loss": 0.156, "step": 10145 }, { "epoch": 0.65, "grad_norm": 0.6155850848380845, "learning_rate": 2.926768727652287e-07, "loss": 0.2203, "step": 10146 }, { "epoch": 0.65, "grad_norm": 17.661522486178317, "learning_rate": 2.925828997092703e-07, "loss": 0.2288, "step": 10147 }, { "epoch": 0.65, "grad_norm": 1.5909122067725574, "learning_rate": 2.9248893550214225e-07, "loss": 0.3839, "step": 10148 }, { "epoch": 0.65, "grad_norm": 3.663937159106112, "learning_rate": 2.9239498014785357e-07, "loss": 0.1143, "step": 10149 }, { "epoch": 0.65, "grad_norm": 0.6793887715076052, "learning_rate": 2.923010336504121e-07, "loss": 0.3644, "step": 10150 }, { "epoch": 0.65, "grad_norm": 0.7528713435368135, "learning_rate": 2.9220709601382643e-07, "loss": 0.1757, "step": 10151 }, { "epoch": 0.65, "grad_norm": 4.125473558541575, "learning_rate": 2.9211316724210344e-07, "loss": 0.0956, "step": 10152 }, { "epoch": 0.65, "grad_norm": 0.3127818765202288, "learning_rate": 2.920192473392509e-07, "loss": 0.1067, "step": 10153 }, { "epoch": 0.65, "grad_norm": 0.837624615422415, "learning_rate": 2.919253363092753e-07, "loss": 0.231, "step": 10154 }, { "epoch": 0.65, "grad_norm": 1.2875846712266474, "learning_rate": 2.918314341561829e-07, "loss": 0.1165, "step": 10155 }, { "epoch": 0.65, "grad_norm": 2.4005460474096294, "learning_rate": 2.9173754088398027e-07, "loss": 0.1164, "step": 10156 }, { "epoch": 0.65, "grad_norm": 1.84564787952284, "learning_rate": 2.9164365649667255e-07, "loss": 0.3742, "step": 10157 }, { "epoch": 0.65, "grad_norm": 0.7321126275754902, "learning_rate": 2.915497809982653e-07, "loss": 0.0675, "step": 10158 }, { "epoch": 0.65, "grad_norm": 0.6524438418410831, "learning_rate": 2.914559143927637e-07, "loss": 0.1916, "step": 10159 }, { "epoch": 0.65, "grad_norm": 0.7163088981075492, "learning_rate": 2.913620566841718e-07, "loss": 0.3322, "step": 10160 }, { "epoch": 0.65, "grad_norm": 0.5007082039915902, "learning_rate": 2.9126820787649397e-07, "loss": 0.199, "step": 10161 }, { "epoch": 0.65, "grad_norm": 0.6585978086439676, "learning_rate": 2.911743679737342e-07, "loss": 0.0193, "step": 10162 }, { "epoch": 0.65, "grad_norm": 1.1463803896353832, "learning_rate": 2.9108053697989543e-07, "loss": 0.2195, "step": 10163 }, { "epoch": 0.65, "grad_norm": 1.0451761528139054, "learning_rate": 2.9098671489898114e-07, "loss": 0.0989, "step": 10164 }, { "epoch": 0.65, "grad_norm": 0.89734534298001, "learning_rate": 2.908929017349936e-07, "loss": 0.4223, "step": 10165 }, { "epoch": 0.65, "grad_norm": 1.8270052705829065, "learning_rate": 2.9079909749193544e-07, "loss": 0.2895, "step": 10166 }, { "epoch": 0.65, "grad_norm": 0.364870594769816, "learning_rate": 2.907053021738083e-07, "loss": 0.1091, "step": 10167 }, { "epoch": 0.65, "grad_norm": 0.423504499918296, "learning_rate": 2.906115157846135e-07, "loss": 0.0939, "step": 10168 }, { "epoch": 0.65, "grad_norm": 0.9835994607264935, "learning_rate": 2.9051773832835257e-07, "loss": 0.1069, "step": 10169 }, { "epoch": 0.65, "grad_norm": 0.6078985931798069, "learning_rate": 2.904239698090258e-07, "loss": 0.2006, "step": 10170 }, { "epoch": 0.65, "grad_norm": 1.8585419408299761, "learning_rate": 2.9033021023063405e-07, "loss": 0.0775, "step": 10171 }, { "epoch": 0.65, "grad_norm": 0.9431968041486721, "learning_rate": 2.9023645959717676e-07, "loss": 0.1079, "step": 10172 }, { "epoch": 0.65, "grad_norm": 0.2828754239310904, "learning_rate": 2.90142717912654e-07, "loss": 0.0871, "step": 10173 }, { "epoch": 0.65, "grad_norm": 0.38468142708540887, "learning_rate": 2.9004898518106457e-07, "loss": 0.1511, "step": 10174 }, { "epoch": 0.65, "grad_norm": 0.8713158391269873, "learning_rate": 2.899552614064077e-07, "loss": 0.115, "step": 10175 }, { "epoch": 0.65, "grad_norm": 6.176435073014773, "learning_rate": 2.8986154659268137e-07, "loss": 0.1554, "step": 10176 }, { "epoch": 0.65, "grad_norm": 2.0177193142856265, "learning_rate": 2.8976784074388395e-07, "loss": 0.4084, "step": 10177 }, { "epoch": 0.65, "grad_norm": 7.369229236259131, "learning_rate": 2.896741438640132e-07, "loss": 0.2152, "step": 10178 }, { "epoch": 0.65, "grad_norm": 1.4581195734418044, "learning_rate": 2.8958045595706617e-07, "loss": 0.1189, "step": 10179 }, { "epoch": 0.65, "grad_norm": 3.9939602725598458, "learning_rate": 2.894867770270398e-07, "loss": 0.111, "step": 10180 }, { "epoch": 0.65, "grad_norm": 0.5609244693542784, "learning_rate": 2.8939310707793097e-07, "loss": 0.1326, "step": 10181 }, { "epoch": 0.65, "grad_norm": 1.1259466010058046, "learning_rate": 2.8929944611373554e-07, "loss": 0.2838, "step": 10182 }, { "epoch": 0.65, "grad_norm": 0.833719703385876, "learning_rate": 2.8920579413844904e-07, "loss": 0.0472, "step": 10183 }, { "epoch": 0.65, "grad_norm": 0.6790088170753138, "learning_rate": 2.891121511560674e-07, "loss": 0.1817, "step": 10184 }, { "epoch": 0.65, "grad_norm": 1.0421877559803852, "learning_rate": 2.8901851717058513e-07, "loss": 0.1087, "step": 10185 }, { "epoch": 0.65, "grad_norm": 0.5242384686078717, "learning_rate": 2.889248921859972e-07, "loss": 0.3666, "step": 10186 }, { "epoch": 0.65, "grad_norm": 0.4096621419743181, "learning_rate": 2.888312762062974e-07, "loss": 0.0943, "step": 10187 }, { "epoch": 0.65, "grad_norm": 1.1403917586988235, "learning_rate": 2.887376692354803e-07, "loss": 0.4638, "step": 10188 }, { "epoch": 0.65, "grad_norm": 1.1251275356077421, "learning_rate": 2.886440712775385e-07, "loss": 0.2976, "step": 10189 }, { "epoch": 0.65, "grad_norm": 0.40686203098349166, "learning_rate": 2.8855048233646576e-07, "loss": 0.3208, "step": 10190 }, { "epoch": 0.65, "grad_norm": 0.81965736341543, "learning_rate": 2.884569024162543e-07, "loss": 0.3933, "step": 10191 }, { "epoch": 0.65, "grad_norm": 0.6045867870038998, "learning_rate": 2.8836333152089687e-07, "loss": 0.1915, "step": 10192 }, { "epoch": 0.65, "grad_norm": 0.6880109823058511, "learning_rate": 2.88269769654385e-07, "loss": 0.3868, "step": 10193 }, { "epoch": 0.65, "grad_norm": 1.0855868710341854, "learning_rate": 2.881762168207105e-07, "loss": 0.1405, "step": 10194 }, { "epoch": 0.65, "grad_norm": 1.7032886920100117, "learning_rate": 2.8808267302386423e-07, "loss": 0.2338, "step": 10195 }, { "epoch": 0.65, "grad_norm": 0.7592264969634548, "learning_rate": 2.8798913826783724e-07, "loss": 0.2576, "step": 10196 }, { "epoch": 0.65, "grad_norm": 1.3478853546872511, "learning_rate": 2.8789561255661986e-07, "loss": 0.1196, "step": 10197 }, { "epoch": 0.65, "grad_norm": 1.6906784802526151, "learning_rate": 2.878020958942019e-07, "loss": 0.2686, "step": 10198 }, { "epoch": 0.65, "grad_norm": 0.7904765996454155, "learning_rate": 2.8770858828457336e-07, "loss": 0.1888, "step": 10199 }, { "epoch": 0.65, "grad_norm": 0.5154334002839001, "learning_rate": 2.8761508973172286e-07, "loss": 0.1782, "step": 10200 }, { "epoch": 0.65, "grad_norm": 0.3354066006994746, "learning_rate": 2.875216002396399e-07, "loss": 0.1028, "step": 10201 }, { "epoch": 0.65, "grad_norm": 3.444505493427869, "learning_rate": 2.8742811981231236e-07, "loss": 0.0521, "step": 10202 }, { "epoch": 0.65, "grad_norm": 1.47817889508201, "learning_rate": 2.873346484537288e-07, "loss": 0.298, "step": 10203 }, { "epoch": 0.65, "grad_norm": 0.6336947599145452, "learning_rate": 2.8724118616787636e-07, "loss": 0.5221, "step": 10204 }, { "epoch": 0.65, "grad_norm": 0.928730037937413, "learning_rate": 2.8714773295874283e-07, "loss": 0.3397, "step": 10205 }, { "epoch": 0.65, "grad_norm": 6.858571369987186, "learning_rate": 2.870542888303148e-07, "loss": 0.0332, "step": 10206 }, { "epoch": 0.65, "grad_norm": 3.078718939124812, "learning_rate": 2.86960853786579e-07, "loss": 0.286, "step": 10207 }, { "epoch": 0.65, "grad_norm": 1.1468493477250459, "learning_rate": 2.868674278315214e-07, "loss": 0.139, "step": 10208 }, { "epoch": 0.65, "grad_norm": 1.4781322692514693, "learning_rate": 2.8677401096912767e-07, "loss": 0.268, "step": 10209 }, { "epoch": 0.65, "grad_norm": 0.9173659262253786, "learning_rate": 2.8668060320338345e-07, "loss": 0.2243, "step": 10210 }, { "epoch": 0.65, "grad_norm": 1.2222157176630717, "learning_rate": 2.865872045382733e-07, "loss": 0.3454, "step": 10211 }, { "epoch": 0.65, "grad_norm": 0.3029370794229205, "learning_rate": 2.86493814977782e-07, "loss": 0.0399, "step": 10212 }, { "epoch": 0.65, "grad_norm": 1.468774882671728, "learning_rate": 2.864004345258938e-07, "loss": 0.294, "step": 10213 }, { "epoch": 0.65, "grad_norm": 1.6157720857306135, "learning_rate": 2.863070631865926e-07, "loss": 0.3866, "step": 10214 }, { "epoch": 0.65, "grad_norm": 0.782503562281586, "learning_rate": 2.8621370096386155e-07, "loss": 0.3746, "step": 10215 }, { "epoch": 0.65, "grad_norm": 1.248339048644885, "learning_rate": 2.861203478616839e-07, "loss": 0.2226, "step": 10216 }, { "epoch": 0.65, "grad_norm": 0.3511410845734303, "learning_rate": 2.86027003884042e-07, "loss": 0.2554, "step": 10217 }, { "epoch": 0.65, "grad_norm": 1.1966562512631873, "learning_rate": 2.8593366903491845e-07, "loss": 0.2706, "step": 10218 }, { "epoch": 0.65, "grad_norm": 0.47867191486930216, "learning_rate": 2.8584034331829465e-07, "loss": 0.066, "step": 10219 }, { "epoch": 0.65, "grad_norm": 0.5339949316047048, "learning_rate": 2.8574702673815257e-07, "loss": 0.12, "step": 10220 }, { "epoch": 0.65, "grad_norm": 0.5611385857681584, "learning_rate": 2.856537192984728e-07, "loss": 0.1945, "step": 10221 }, { "epoch": 0.65, "grad_norm": 0.7098057171282469, "learning_rate": 2.8556042100323653e-07, "loss": 0.2918, "step": 10222 }, { "epoch": 0.65, "grad_norm": 3.142105614411486, "learning_rate": 2.854671318564237e-07, "loss": 0.1315, "step": 10223 }, { "epoch": 0.65, "grad_norm": 0.35340259488248843, "learning_rate": 2.853738518620141e-07, "loss": 0.0505, "step": 10224 }, { "epoch": 0.65, "grad_norm": 1.6201995876999788, "learning_rate": 2.8528058102398767e-07, "loss": 0.2407, "step": 10225 }, { "epoch": 0.65, "grad_norm": 1.2426319278333497, "learning_rate": 2.851873193463231e-07, "loss": 0.3811, "step": 10226 }, { "epoch": 0.65, "grad_norm": 2.9231925775715792, "learning_rate": 2.850940668329995e-07, "loss": 0.1911, "step": 10227 }, { "epoch": 0.65, "grad_norm": 7.683992341126847, "learning_rate": 2.8500082348799484e-07, "loss": 0.1405, "step": 10228 }, { "epoch": 0.65, "grad_norm": 3.030114905525364, "learning_rate": 2.8490758931528733e-07, "loss": 0.3582, "step": 10229 }, { "epoch": 0.65, "grad_norm": 0.5730900085900599, "learning_rate": 2.848143643188544e-07, "loss": 0.2923, "step": 10230 }, { "epoch": 0.65, "grad_norm": 1.3130173591499996, "learning_rate": 2.847211485026732e-07, "loss": 0.1366, "step": 10231 }, { "epoch": 0.65, "grad_norm": 1.6563485023553393, "learning_rate": 2.8462794187072056e-07, "loss": 0.1694, "step": 10232 }, { "epoch": 0.65, "grad_norm": 5.513843404179402, "learning_rate": 2.8453474442697313e-07, "loss": 0.0761, "step": 10233 }, { "epoch": 0.65, "grad_norm": 1.4821740985558005, "learning_rate": 2.8444155617540645e-07, "loss": 0.3699, "step": 10234 }, { "epoch": 0.65, "grad_norm": 3.937843973931371, "learning_rate": 2.843483771199964e-07, "loss": 0.2367, "step": 10235 }, { "epoch": 0.65, "grad_norm": 0.6188196116057207, "learning_rate": 2.842552072647182e-07, "loss": 0.3673, "step": 10236 }, { "epoch": 0.65, "grad_norm": 3.668748716083185, "learning_rate": 2.8416204661354634e-07, "loss": 0.1505, "step": 10237 }, { "epoch": 0.65, "grad_norm": 1.6595810651174245, "learning_rate": 2.8406889517045563e-07, "loss": 0.3319, "step": 10238 }, { "epoch": 0.65, "grad_norm": 0.41065654226966897, "learning_rate": 2.839757529394197e-07, "loss": 0.1962, "step": 10239 }, { "epoch": 0.65, "grad_norm": 1.3955031550423198, "learning_rate": 2.8388261992441263e-07, "loss": 0.1096, "step": 10240 }, { "epoch": 0.65, "grad_norm": 0.6508463178007073, "learning_rate": 2.8378949612940726e-07, "loss": 0.3858, "step": 10241 }, { "epoch": 0.65, "grad_norm": 0.48423807947557324, "learning_rate": 2.836963815583769e-07, "loss": 0.1474, "step": 10242 }, { "epoch": 0.65, "grad_norm": 2.8928769138359334, "learning_rate": 2.8360327621529343e-07, "loss": 0.0216, "step": 10243 }, { "epoch": 0.65, "grad_norm": 0.42743112036302267, "learning_rate": 2.835101801041294e-07, "loss": 0.142, "step": 10244 }, { "epoch": 0.65, "grad_norm": 1.0621584090643938, "learning_rate": 2.834170932288562e-07, "loss": 0.1203, "step": 10245 }, { "epoch": 0.65, "grad_norm": 0.4533056235972029, "learning_rate": 2.8332401559344534e-07, "loss": 0.0911, "step": 10246 }, { "epoch": 0.65, "grad_norm": 1.3373391990105874, "learning_rate": 2.8323094720186724e-07, "loss": 0.2234, "step": 10247 }, { "epoch": 0.65, "grad_norm": 0.23324687665964133, "learning_rate": 2.831378880580928e-07, "loss": 0.0763, "step": 10248 }, { "epoch": 0.65, "grad_norm": 0.6111975212027193, "learning_rate": 2.8304483816609215e-07, "loss": 0.1486, "step": 10249 }, { "epoch": 0.65, "grad_norm": 1.4004848741687492, "learning_rate": 2.8295179752983466e-07, "loss": 0.1131, "step": 10250 }, { "epoch": 0.65, "grad_norm": 0.6956776664200824, "learning_rate": 2.828587661532901e-07, "loss": 0.1478, "step": 10251 }, { "epoch": 0.65, "grad_norm": 0.8217026929838352, "learning_rate": 2.8276574404042677e-07, "loss": 0.1402, "step": 10252 }, { "epoch": 0.65, "grad_norm": 0.6513064563950473, "learning_rate": 2.8267273119521365e-07, "loss": 0.2293, "step": 10253 }, { "epoch": 0.65, "grad_norm": 0.6472530386355813, "learning_rate": 2.8257972762161863e-07, "loss": 0.3021, "step": 10254 }, { "epoch": 0.65, "grad_norm": 0.7732910536516439, "learning_rate": 2.824867333236096e-07, "loss": 0.1998, "step": 10255 }, { "epoch": 0.65, "grad_norm": 0.6853220452332898, "learning_rate": 2.823937483051536e-07, "loss": 0.1586, "step": 10256 }, { "epoch": 0.65, "grad_norm": 2.6189092250336454, "learning_rate": 2.82300772570218e-07, "loss": 0.3721, "step": 10257 }, { "epoch": 0.65, "grad_norm": 1.3897431660961344, "learning_rate": 2.8220780612276887e-07, "loss": 0.2036, "step": 10258 }, { "epoch": 0.65, "grad_norm": 4.048638907755914, "learning_rate": 2.8211484896677274e-07, "loss": 0.1098, "step": 10259 }, { "epoch": 0.65, "grad_norm": 1.022173529418032, "learning_rate": 2.820219011061949e-07, "loss": 0.2273, "step": 10260 }, { "epoch": 0.65, "grad_norm": 0.48019927111320554, "learning_rate": 2.819289625450012e-07, "loss": 0.3013, "step": 10261 }, { "epoch": 0.65, "grad_norm": 0.3915675359680851, "learning_rate": 2.8183603328715613e-07, "loss": 0.248, "step": 10262 }, { "epoch": 0.65, "grad_norm": 1.3131145352365399, "learning_rate": 2.817431133366246e-07, "loss": 0.1545, "step": 10263 }, { "epoch": 0.65, "grad_norm": 0.41835883686822495, "learning_rate": 2.8165020269737046e-07, "loss": 0.0843, "step": 10264 }, { "epoch": 0.65, "grad_norm": 5.489341998654538, "learning_rate": 2.8155730137335777e-07, "loss": 0.0821, "step": 10265 }, { "epoch": 0.65, "grad_norm": 0.31110884249060594, "learning_rate": 2.8146440936854953e-07, "loss": 0.1006, "step": 10266 }, { "epoch": 0.65, "grad_norm": 0.2558649006924264, "learning_rate": 2.813715266869089e-07, "loss": 0.0826, "step": 10267 }, { "epoch": 0.65, "grad_norm": 1.138848363680206, "learning_rate": 2.812786533323987e-07, "loss": 0.1607, "step": 10268 }, { "epoch": 0.65, "grad_norm": 0.3580398835841955, "learning_rate": 2.811857893089806e-07, "loss": 0.0585, "step": 10269 }, { "epoch": 0.65, "grad_norm": 3.5828412543228145, "learning_rate": 2.810929346206168e-07, "loss": 0.1878, "step": 10270 }, { "epoch": 0.65, "grad_norm": 5.606115308601487, "learning_rate": 2.810000892712682e-07, "loss": 0.1429, "step": 10271 }, { "epoch": 0.66, "grad_norm": 0.22420079108152827, "learning_rate": 2.809072532648963e-07, "loss": 0.1768, "step": 10272 }, { "epoch": 0.66, "grad_norm": 0.04813094441474049, "learning_rate": 2.808144266054612e-07, "loss": 0.0009, "step": 10273 }, { "epoch": 0.66, "grad_norm": 0.47288197730564774, "learning_rate": 2.8072160929692354e-07, "loss": 0.1842, "step": 10274 }, { "epoch": 0.66, "grad_norm": 0.738235164841927, "learning_rate": 2.806288013432425e-07, "loss": 0.1385, "step": 10275 }, { "epoch": 0.66, "grad_norm": 0.7262208566464031, "learning_rate": 2.8053600274837807e-07, "loss": 0.1382, "step": 10276 }, { "epoch": 0.66, "grad_norm": 21.905346040021723, "learning_rate": 2.8044321351628897e-07, "loss": 0.2486, "step": 10277 }, { "epoch": 0.66, "grad_norm": 0.7732430553059354, "learning_rate": 2.8035043365093347e-07, "loss": 0.2504, "step": 10278 }, { "epoch": 0.66, "grad_norm": 2.705032161380198, "learning_rate": 2.802576631562703e-07, "loss": 0.0047, "step": 10279 }, { "epoch": 0.66, "grad_norm": 1.6248934088142746, "learning_rate": 2.801649020362567e-07, "loss": 0.0321, "step": 10280 }, { "epoch": 0.66, "grad_norm": 1.5827335660059887, "learning_rate": 2.8007215029485054e-07, "loss": 0.0608, "step": 10281 }, { "epoch": 0.66, "grad_norm": 0.8611950012380255, "learning_rate": 2.799794079360083e-07, "loss": 0.18, "step": 10282 }, { "epoch": 0.66, "grad_norm": 1.7082351348512694, "learning_rate": 2.798866749636869e-07, "loss": 0.076, "step": 10283 }, { "epoch": 0.66, "grad_norm": 0.9008111053865964, "learning_rate": 2.797939513818426e-07, "loss": 0.2403, "step": 10284 }, { "epoch": 0.66, "grad_norm": 0.8943813175106697, "learning_rate": 2.7970123719443073e-07, "loss": 0.1797, "step": 10285 }, { "epoch": 0.66, "grad_norm": 0.8584149610242897, "learning_rate": 2.79608532405407e-07, "loss": 0.0888, "step": 10286 }, { "epoch": 0.66, "grad_norm": 15.404306424246116, "learning_rate": 2.795158370187265e-07, "loss": 0.1976, "step": 10287 }, { "epoch": 0.66, "grad_norm": 0.7313523866606231, "learning_rate": 2.794231510383435e-07, "loss": 0.2239, "step": 10288 }, { "epoch": 0.66, "grad_norm": 29.744310653057806, "learning_rate": 2.7933047446821236e-07, "loss": 0.1484, "step": 10289 }, { "epoch": 0.66, "grad_norm": 0.4091252632914925, "learning_rate": 2.7923780731228664e-07, "loss": 0.3602, "step": 10290 }, { "epoch": 0.66, "grad_norm": 0.17125294876779928, "learning_rate": 2.7914514957452004e-07, "loss": 0.0044, "step": 10291 }, { "epoch": 0.66, "grad_norm": 0.8727869656872513, "learning_rate": 2.7905250125886536e-07, "loss": 0.33, "step": 10292 }, { "epoch": 0.66, "grad_norm": 0.40791427587481266, "learning_rate": 2.7895986236927495e-07, "loss": 0.205, "step": 10293 }, { "epoch": 0.66, "grad_norm": 0.808736533405036, "learning_rate": 2.7886723290970134e-07, "loss": 0.3945, "step": 10294 }, { "epoch": 0.66, "grad_norm": 0.6901055506409601, "learning_rate": 2.787746128840959e-07, "loss": 0.1194, "step": 10295 }, { "epoch": 0.66, "grad_norm": 0.5673610143798768, "learning_rate": 2.7868200229641035e-07, "loss": 0.2125, "step": 10296 }, { "epoch": 0.66, "grad_norm": 25.81391849887281, "learning_rate": 2.7858940115059537e-07, "loss": 0.375, "step": 10297 }, { "epoch": 0.66, "grad_norm": 0.9861928424107715, "learning_rate": 2.7849680945060176e-07, "loss": 0.0967, "step": 10298 }, { "epoch": 0.66, "grad_norm": 0.45214295832004353, "learning_rate": 2.784042272003794e-07, "loss": 0.1678, "step": 10299 }, { "epoch": 0.66, "grad_norm": 8.084999990659764, "learning_rate": 2.7831165440387825e-07, "loss": 0.3066, "step": 10300 }, { "epoch": 0.66, "grad_norm": 1.2193244138626602, "learning_rate": 2.7821909106504747e-07, "loss": 0.3322, "step": 10301 }, { "epoch": 0.66, "grad_norm": 5.810876479458508, "learning_rate": 2.781265371878361e-07, "loss": 0.1077, "step": 10302 }, { "epoch": 0.66, "grad_norm": 3.3503260080641253, "learning_rate": 2.7803399277619265e-07, "loss": 0.3605, "step": 10303 }, { "epoch": 0.66, "grad_norm": 1.420953137141911, "learning_rate": 2.779414578340655e-07, "loss": 0.1638, "step": 10304 }, { "epoch": 0.66, "grad_norm": 0.8445873763051406, "learning_rate": 2.778489323654022e-07, "loss": 0.2076, "step": 10305 }, { "epoch": 0.66, "grad_norm": 0.8129850797536065, "learning_rate": 2.777564163741497e-07, "loss": 0.1513, "step": 10306 }, { "epoch": 0.66, "grad_norm": 1.4345052749212925, "learning_rate": 2.7766390986425557e-07, "loss": 0.2743, "step": 10307 }, { "epoch": 0.66, "grad_norm": 0.608516525244101, "learning_rate": 2.775714128396657e-07, "loss": 0.3439, "step": 10308 }, { "epoch": 0.66, "grad_norm": 1.3602859943341306, "learning_rate": 2.7747892530432675e-07, "loss": 0.1154, "step": 10309 }, { "epoch": 0.66, "grad_norm": 0.8412252413073988, "learning_rate": 2.7738644726218396e-07, "loss": 0.0925, "step": 10310 }, { "epoch": 0.66, "grad_norm": 0.4230985042881405, "learning_rate": 2.77293978717183e-07, "loss": 0.0543, "step": 10311 }, { "epoch": 0.66, "grad_norm": 1.730359179745662, "learning_rate": 2.7720151967326845e-07, "loss": 0.083, "step": 10312 }, { "epoch": 0.66, "grad_norm": 1.1155228647285207, "learning_rate": 2.771090701343851e-07, "loss": 0.4335, "step": 10313 }, { "epoch": 0.66, "grad_norm": 0.6157355401303554, "learning_rate": 2.7701663010447673e-07, "loss": 0.099, "step": 10314 }, { "epoch": 0.66, "grad_norm": 0.607567660844606, "learning_rate": 2.7692419958748734e-07, "loss": 0.0942, "step": 10315 }, { "epoch": 0.66, "grad_norm": 1.7522680907313777, "learning_rate": 2.7683177858736e-07, "loss": 0.1124, "step": 10316 }, { "epoch": 0.66, "grad_norm": 3.149502614901415, "learning_rate": 2.767393671080376e-07, "loss": 0.1974, "step": 10317 }, { "epoch": 0.66, "grad_norm": 0.7498093110361993, "learning_rate": 2.766469651534624e-07, "loss": 0.2381, "step": 10318 }, { "epoch": 0.66, "grad_norm": 0.6138349791888598, "learning_rate": 2.765545727275768e-07, "loss": 0.1838, "step": 10319 }, { "epoch": 0.66, "grad_norm": 0.8325056592051004, "learning_rate": 2.7646218983432245e-07, "loss": 0.2273, "step": 10320 }, { "epoch": 0.66, "grad_norm": 0.5471731317832644, "learning_rate": 2.7636981647764024e-07, "loss": 0.1571, "step": 10321 }, { "epoch": 0.66, "grad_norm": 0.39690254731037833, "learning_rate": 2.762774526614714e-07, "loss": 0.0376, "step": 10322 }, { "epoch": 0.66, "grad_norm": 0.24004478182410177, "learning_rate": 2.76185098389756e-07, "loss": 0.1198, "step": 10323 }, { "epoch": 0.66, "grad_norm": 5.3566657921106415, "learning_rate": 2.760927536664344e-07, "loss": 0.2131, "step": 10324 }, { "epoch": 0.66, "grad_norm": 0.9512974098221918, "learning_rate": 2.7600041849544584e-07, "loss": 0.1373, "step": 10325 }, { "epoch": 0.66, "grad_norm": 0.7863187991648417, "learning_rate": 2.7590809288073e-07, "loss": 0.1968, "step": 10326 }, { "epoch": 0.66, "grad_norm": 0.788712920122992, "learning_rate": 2.7581577682622513e-07, "loss": 0.2357, "step": 10327 }, { "epoch": 0.66, "grad_norm": 1.284394747997247, "learning_rate": 2.757234703358701e-07, "loss": 0.1383, "step": 10328 }, { "epoch": 0.66, "grad_norm": 6.363789118705277, "learning_rate": 2.7563117341360244e-07, "loss": 0.0422, "step": 10329 }, { "epoch": 0.66, "grad_norm": 0.31012208836261174, "learning_rate": 2.7553888606336016e-07, "loss": 0.0392, "step": 10330 }, { "epoch": 0.66, "grad_norm": 0.7872579617347853, "learning_rate": 2.754466082890801e-07, "loss": 0.0694, "step": 10331 }, { "epoch": 0.66, "grad_norm": 0.14947528566196186, "learning_rate": 2.7535434009469924e-07, "loss": 0.0021, "step": 10332 }, { "epoch": 0.66, "grad_norm": 4.8144802725901945, "learning_rate": 2.7526208148415394e-07, "loss": 0.1461, "step": 10333 }, { "epoch": 0.66, "grad_norm": 1.3009842356808723, "learning_rate": 2.7516983246137977e-07, "loss": 0.3036, "step": 10334 }, { "epoch": 0.66, "grad_norm": 0.9552782663324134, "learning_rate": 2.750775930303125e-07, "loss": 0.3527, "step": 10335 }, { "epoch": 0.66, "grad_norm": 2.1074123808822676, "learning_rate": 2.749853631948875e-07, "loss": 0.1009, "step": 10336 }, { "epoch": 0.66, "grad_norm": 1.7717845227744466, "learning_rate": 2.74893142959039e-07, "loss": 0.1866, "step": 10337 }, { "epoch": 0.66, "grad_norm": 3.847477736795236, "learning_rate": 2.7480093232670155e-07, "loss": 0.0834, "step": 10338 }, { "epoch": 0.66, "grad_norm": 0.8115874522348948, "learning_rate": 2.747087313018092e-07, "loss": 0.29, "step": 10339 }, { "epoch": 0.66, "grad_norm": 1.3672825170076108, "learning_rate": 2.7461653988829515e-07, "loss": 0.1825, "step": 10340 }, { "epoch": 0.66, "grad_norm": 0.8866651351700062, "learning_rate": 2.745243580900927e-07, "loss": 0.2698, "step": 10341 }, { "epoch": 0.66, "grad_norm": 0.5706974465252797, "learning_rate": 2.7443218591113427e-07, "loss": 0.2585, "step": 10342 }, { "epoch": 0.66, "grad_norm": 1.6401080035984237, "learning_rate": 2.7434002335535233e-07, "loss": 0.5122, "step": 10343 }, { "epoch": 0.66, "grad_norm": 1.0598099448529625, "learning_rate": 2.742478704266785e-07, "loss": 0.152, "step": 10344 }, { "epoch": 0.66, "grad_norm": 1.573205755417549, "learning_rate": 2.7415572712904454e-07, "loss": 0.0744, "step": 10345 }, { "epoch": 0.66, "grad_norm": 0.9795329057276252, "learning_rate": 2.740635934663813e-07, "loss": 0.1945, "step": 10346 }, { "epoch": 0.66, "grad_norm": 4.036694928135541, "learning_rate": 2.739714694426191e-07, "loss": 0.1459, "step": 10347 }, { "epoch": 0.66, "grad_norm": 1.1962920417360572, "learning_rate": 2.7387935506168857e-07, "loss": 0.1081, "step": 10348 }, { "epoch": 0.66, "grad_norm": 1.047942062306148, "learning_rate": 2.7378725032751915e-07, "loss": 0.1798, "step": 10349 }, { "epoch": 0.66, "grad_norm": 0.8520817851554787, "learning_rate": 2.7369515524404064e-07, "loss": 0.4585, "step": 10350 }, { "epoch": 0.66, "grad_norm": 1.03077299174891, "learning_rate": 2.7360306981518147e-07, "loss": 0.3425, "step": 10351 }, { "epoch": 0.66, "grad_norm": 0.4387898763168881, "learning_rate": 2.735109940448706e-07, "loss": 0.0154, "step": 10352 }, { "epoch": 0.66, "grad_norm": 0.6529751277320938, "learning_rate": 2.734189279370359e-07, "loss": 0.1431, "step": 10353 }, { "epoch": 0.66, "grad_norm": 1.8642363768247914, "learning_rate": 2.733268714956052e-07, "loss": 0.1578, "step": 10354 }, { "epoch": 0.66, "grad_norm": 0.5916966740097632, "learning_rate": 2.7323482472450597e-07, "loss": 0.3505, "step": 10355 }, { "epoch": 0.66, "grad_norm": 0.9445762314627908, "learning_rate": 2.7314278762766483e-07, "loss": 0.3951, "step": 10356 }, { "epoch": 0.66, "grad_norm": 0.7871275307321242, "learning_rate": 2.7305076020900843e-07, "loss": 0.2309, "step": 10357 }, { "epoch": 0.66, "grad_norm": 2.1404180942625537, "learning_rate": 2.7295874247246303e-07, "loss": 0.1901, "step": 10358 }, { "epoch": 0.66, "grad_norm": 1.4365060851096445, "learning_rate": 2.7286673442195406e-07, "loss": 0.1272, "step": 10359 }, { "epoch": 0.66, "grad_norm": 0.6029931644642945, "learning_rate": 2.727747360614066e-07, "loss": 0.1351, "step": 10360 }, { "epoch": 0.66, "grad_norm": 0.47347890544347404, "learning_rate": 2.726827473947458e-07, "loss": 0.3076, "step": 10361 }, { "epoch": 0.66, "grad_norm": 0.47592070419616117, "learning_rate": 2.725907684258959e-07, "loss": 0.0055, "step": 10362 }, { "epoch": 0.66, "grad_norm": 8.371716287788669, "learning_rate": 2.7249879915878117e-07, "loss": 0.0995, "step": 10363 }, { "epoch": 0.66, "grad_norm": 12.834889662434467, "learning_rate": 2.724068395973247e-07, "loss": 0.2137, "step": 10364 }, { "epoch": 0.66, "grad_norm": 0.8000913131057464, "learning_rate": 2.7231488974545017e-07, "loss": 0.2315, "step": 10365 }, { "epoch": 0.66, "grad_norm": 0.564255357468439, "learning_rate": 2.7222294960708004e-07, "loss": 0.0632, "step": 10366 }, { "epoch": 0.66, "grad_norm": 0.7205279558390081, "learning_rate": 2.721310191861369e-07, "loss": 0.1315, "step": 10367 }, { "epoch": 0.66, "grad_norm": 1.7295300427974798, "learning_rate": 2.720390984865424e-07, "loss": 0.2025, "step": 10368 }, { "epoch": 0.66, "grad_norm": 1.3406120790995233, "learning_rate": 2.7194718751221836e-07, "loss": 0.3642, "step": 10369 }, { "epoch": 0.66, "grad_norm": 1.258689744960688, "learning_rate": 2.7185528626708553e-07, "loss": 0.1006, "step": 10370 }, { "epoch": 0.66, "grad_norm": 0.43996053820520636, "learning_rate": 2.717633947550651e-07, "loss": 0.1191, "step": 10371 }, { "epoch": 0.66, "grad_norm": 0.4180444976354502, "learning_rate": 2.7167151298007686e-07, "loss": 0.0946, "step": 10372 }, { "epoch": 0.66, "grad_norm": 0.3772174713897463, "learning_rate": 2.715796409460408e-07, "loss": 0.1083, "step": 10373 }, { "epoch": 0.66, "grad_norm": 0.7817672051214644, "learning_rate": 2.714877786568767e-07, "loss": 0.395, "step": 10374 }, { "epoch": 0.66, "grad_norm": 5.325419730471386, "learning_rate": 2.713959261165031e-07, "loss": 0.2452, "step": 10375 }, { "epoch": 0.66, "grad_norm": 0.5344302760002922, "learning_rate": 2.7130408332883904e-07, "loss": 0.1106, "step": 10376 }, { "epoch": 0.66, "grad_norm": 0.9452148141559257, "learning_rate": 2.7121225029780235e-07, "loss": 0.2956, "step": 10377 }, { "epoch": 0.66, "grad_norm": 1.8962247123977751, "learning_rate": 2.7112042702731117e-07, "loss": 0.1983, "step": 10378 }, { "epoch": 0.66, "grad_norm": 1.8331461827843374, "learning_rate": 2.7102861352128246e-07, "loss": 0.4281, "step": 10379 }, { "epoch": 0.66, "grad_norm": 0.9961725351184338, "learning_rate": 2.709368097836336e-07, "loss": 0.4122, "step": 10380 }, { "epoch": 0.66, "grad_norm": 3.1622811223499467, "learning_rate": 2.708450158182807e-07, "loss": 0.0962, "step": 10381 }, { "epoch": 0.66, "grad_norm": 0.5044438682947123, "learning_rate": 2.707532316291403e-07, "loss": 0.1427, "step": 10382 }, { "epoch": 0.66, "grad_norm": 1.0408060350720334, "learning_rate": 2.7066145722012767e-07, "loss": 0.1227, "step": 10383 }, { "epoch": 0.66, "grad_norm": 0.9981343484997779, "learning_rate": 2.7056969259515856e-07, "loss": 0.3258, "step": 10384 }, { "epoch": 0.66, "grad_norm": 11.124303491774027, "learning_rate": 2.704779377581473e-07, "loss": 0.1624, "step": 10385 }, { "epoch": 0.66, "grad_norm": 0.8375855156700935, "learning_rate": 2.7038619271300886e-07, "loss": 0.5048, "step": 10386 }, { "epoch": 0.66, "grad_norm": 0.3768231972231354, "learning_rate": 2.70294457463657e-07, "loss": 0.1078, "step": 10387 }, { "epoch": 0.66, "grad_norm": 0.6236407902616248, "learning_rate": 2.7020273201400514e-07, "loss": 0.282, "step": 10388 }, { "epoch": 0.66, "grad_norm": 0.8953272419992317, "learning_rate": 2.7011101636796674e-07, "loss": 0.3133, "step": 10389 }, { "epoch": 0.66, "grad_norm": 2.074712334373866, "learning_rate": 2.700193105294545e-07, "loss": 0.2425, "step": 10390 }, { "epoch": 0.66, "grad_norm": 0.7647295096150315, "learning_rate": 2.6992761450238086e-07, "loss": 0.1282, "step": 10391 }, { "epoch": 0.66, "grad_norm": 0.6549380344787635, "learning_rate": 2.6983592829065765e-07, "loss": 0.1558, "step": 10392 }, { "epoch": 0.66, "grad_norm": 2.145107928758458, "learning_rate": 2.697442518981966e-07, "loss": 0.2234, "step": 10393 }, { "epoch": 0.66, "grad_norm": 0.47280586014705855, "learning_rate": 2.696525853289084e-07, "loss": 0.057, "step": 10394 }, { "epoch": 0.66, "grad_norm": 0.5057971735520573, "learning_rate": 2.6956092858670416e-07, "loss": 0.0897, "step": 10395 }, { "epoch": 0.66, "grad_norm": 3.2933726887911243, "learning_rate": 2.6946928167549377e-07, "loss": 0.1144, "step": 10396 }, { "epoch": 0.66, "grad_norm": 0.4718624220367011, "learning_rate": 2.693776445991874e-07, "loss": 0.0291, "step": 10397 }, { "epoch": 0.66, "grad_norm": 1.0816261079841452, "learning_rate": 2.692860173616942e-07, "loss": 0.1048, "step": 10398 }, { "epoch": 0.66, "grad_norm": 0.4821952474066926, "learning_rate": 2.6919439996692343e-07, "loss": 0.1043, "step": 10399 }, { "epoch": 0.66, "grad_norm": 1.3739327583129264, "learning_rate": 2.6910279241878337e-07, "loss": 0.2589, "step": 10400 }, { "epoch": 0.66, "grad_norm": 0.6883030170882999, "learning_rate": 2.690111947211825e-07, "loss": 0.173, "step": 10401 }, { "epoch": 0.66, "grad_norm": 0.5572504594788301, "learning_rate": 2.689196068780285e-07, "loss": 0.2161, "step": 10402 }, { "epoch": 0.66, "grad_norm": 0.8375524659754772, "learning_rate": 2.688280288932283e-07, "loss": 0.044, "step": 10403 }, { "epoch": 0.66, "grad_norm": 1.1238918985541626, "learning_rate": 2.687364607706893e-07, "loss": 0.2673, "step": 10404 }, { "epoch": 0.66, "grad_norm": 0.9614332658496733, "learning_rate": 2.686449025143177e-07, "loss": 0.2368, "step": 10405 }, { "epoch": 0.66, "grad_norm": 5.139127536266855, "learning_rate": 2.685533541280197e-07, "loss": 0.2419, "step": 10406 }, { "epoch": 0.66, "grad_norm": 0.9242720014787659, "learning_rate": 2.684618156157008e-07, "loss": 0.3948, "step": 10407 }, { "epoch": 0.66, "grad_norm": 8.721545218688634, "learning_rate": 2.683702869812662e-07, "loss": 0.0945, "step": 10408 }, { "epoch": 0.66, "grad_norm": 15.050138743858733, "learning_rate": 2.682787682286209e-07, "loss": 0.1971, "step": 10409 }, { "epoch": 0.66, "grad_norm": 0.5187178695507517, "learning_rate": 2.681872593616693e-07, "loss": 0.2237, "step": 10410 }, { "epoch": 0.66, "grad_norm": 0.28910169011590614, "learning_rate": 2.6809576038431504e-07, "loss": 0.0723, "step": 10411 }, { "epoch": 0.66, "grad_norm": 0.7739970350627858, "learning_rate": 2.68004271300462e-07, "loss": 0.3179, "step": 10412 }, { "epoch": 0.66, "grad_norm": 0.8477935669559543, "learning_rate": 2.6791279211401284e-07, "loss": 0.3444, "step": 10413 }, { "epoch": 0.66, "grad_norm": 0.3818642354491543, "learning_rate": 2.678213228288708e-07, "loss": 0.0815, "step": 10414 }, { "epoch": 0.66, "grad_norm": 0.528649656653528, "learning_rate": 2.677298634489379e-07, "loss": 0.3162, "step": 10415 }, { "epoch": 0.66, "grad_norm": 0.9286483054980392, "learning_rate": 2.676384139781157e-07, "loss": 0.0572, "step": 10416 }, { "epoch": 0.66, "grad_norm": 0.7625097904677913, "learning_rate": 2.675469744203061e-07, "loss": 0.3784, "step": 10417 }, { "epoch": 0.66, "grad_norm": 0.2864610662133286, "learning_rate": 2.6745554477940967e-07, "loss": 0.0046, "step": 10418 }, { "epoch": 0.66, "grad_norm": 0.6962038626822891, "learning_rate": 2.6736412505932737e-07, "loss": 0.2113, "step": 10419 }, { "epoch": 0.66, "grad_norm": 1.0503986618584422, "learning_rate": 2.672727152639589e-07, "loss": 0.1527, "step": 10420 }, { "epoch": 0.66, "grad_norm": 0.8225215419075138, "learning_rate": 2.6718131539720445e-07, "loss": 0.0783, "step": 10421 }, { "epoch": 0.66, "grad_norm": 0.5404830518009177, "learning_rate": 2.6708992546296294e-07, "loss": 0.1638, "step": 10422 }, { "epoch": 0.66, "grad_norm": 0.9482240485776333, "learning_rate": 2.6699854546513354e-07, "loss": 0.2334, "step": 10423 }, { "epoch": 0.66, "grad_norm": 2.5889709783066235, "learning_rate": 2.6690717540761446e-07, "loss": 0.293, "step": 10424 }, { "epoch": 0.66, "grad_norm": 0.558957254022895, "learning_rate": 2.6681581529430384e-07, "loss": 0.1498, "step": 10425 }, { "epoch": 0.66, "grad_norm": 1.316452324124796, "learning_rate": 2.667244651290995e-07, "loss": 0.0172, "step": 10426 }, { "epoch": 0.66, "grad_norm": 1.0084936543747833, "learning_rate": 2.666331249158982e-07, "loss": 0.0781, "step": 10427 }, { "epoch": 0.67, "grad_norm": 1.1415900491388742, "learning_rate": 2.665417946585972e-07, "loss": 0.3482, "step": 10428 }, { "epoch": 0.67, "grad_norm": 1.230059651396525, "learning_rate": 2.6645047436109224e-07, "loss": 0.1471, "step": 10429 }, { "epoch": 0.67, "grad_norm": 0.6010759257443067, "learning_rate": 2.6635916402727975e-07, "loss": 0.1955, "step": 10430 }, { "epoch": 0.67, "grad_norm": 1.5991172116467856, "learning_rate": 2.662678636610549e-07, "loss": 0.4037, "step": 10431 }, { "epoch": 0.67, "grad_norm": 1.3643282391243785, "learning_rate": 2.6617657326631296e-07, "loss": 0.1668, "step": 10432 }, { "epoch": 0.67, "grad_norm": 0.5806538082505955, "learning_rate": 2.6608529284694823e-07, "loss": 0.1467, "step": 10433 }, { "epoch": 0.67, "grad_norm": 0.6803427975121148, "learning_rate": 2.6599402240685543e-07, "loss": 0.0592, "step": 10434 }, { "epoch": 0.67, "grad_norm": 1.2480218245656194, "learning_rate": 2.659027619499278e-07, "loss": 0.1108, "step": 10435 }, { "epoch": 0.67, "grad_norm": 0.7961858815220818, "learning_rate": 2.658115114800591e-07, "loss": 0.2516, "step": 10436 }, { "epoch": 0.67, "grad_norm": 0.9066160613403413, "learning_rate": 2.657202710011419e-07, "loss": 0.2253, "step": 10437 }, { "epoch": 0.67, "grad_norm": 2.6353668729752076, "learning_rate": 2.6562904051706913e-07, "loss": 0.1116, "step": 10438 }, { "epoch": 0.67, "grad_norm": 0.8542947566658707, "learning_rate": 2.655378200317324e-07, "loss": 0.1967, "step": 10439 }, { "epoch": 0.67, "grad_norm": 0.6447993647207726, "learning_rate": 2.6544660954902385e-07, "loss": 0.2617, "step": 10440 }, { "epoch": 0.67, "grad_norm": 0.8252586569785209, "learning_rate": 2.653554090728342e-07, "loss": 0.2064, "step": 10441 }, { "epoch": 0.67, "grad_norm": 1.551786203138509, "learning_rate": 2.6526421860705473e-07, "loss": 0.1988, "step": 10442 }, { "epoch": 0.67, "grad_norm": 0.888358863306191, "learning_rate": 2.6517303815557533e-07, "loss": 0.168, "step": 10443 }, { "epoch": 0.67, "grad_norm": 0.39348950210940503, "learning_rate": 2.6508186772228626e-07, "loss": 0.2157, "step": 10444 }, { "epoch": 0.67, "grad_norm": 0.9599256798437353, "learning_rate": 2.649907073110771e-07, "loss": 0.2491, "step": 10445 }, { "epoch": 0.67, "grad_norm": 0.7617142381150503, "learning_rate": 2.648995569258366e-07, "loss": 0.4391, "step": 10446 }, { "epoch": 0.67, "grad_norm": 3.397491569318164, "learning_rate": 2.6480841657045384e-07, "loss": 0.0671, "step": 10447 }, { "epoch": 0.67, "grad_norm": 1.3137672202187631, "learning_rate": 2.6471728624881657e-07, "loss": 0.2771, "step": 10448 }, { "epoch": 0.67, "grad_norm": 0.9264509751527862, "learning_rate": 2.6462616596481306e-07, "loss": 0.1368, "step": 10449 }, { "epoch": 0.67, "grad_norm": 1.1262196550745276, "learning_rate": 2.645350557223303e-07, "loss": 0.0926, "step": 10450 }, { "epoch": 0.67, "grad_norm": 0.6099039134737922, "learning_rate": 2.6444395552525556e-07, "loss": 0.1469, "step": 10451 }, { "epoch": 0.67, "grad_norm": 0.7466957962216927, "learning_rate": 2.6435286537747507e-07, "loss": 0.1395, "step": 10452 }, { "epoch": 0.67, "grad_norm": 0.9883365727329495, "learning_rate": 2.6426178528287526e-07, "loss": 0.1304, "step": 10453 }, { "epoch": 0.67, "grad_norm": 0.7112658868253172, "learning_rate": 2.641707152453413e-07, "loss": 0.1583, "step": 10454 }, { "epoch": 0.67, "grad_norm": 3.708473714266294, "learning_rate": 2.64079655268759e-07, "loss": 0.0373, "step": 10455 }, { "epoch": 0.67, "grad_norm": 4.028738953398234, "learning_rate": 2.6398860535701273e-07, "loss": 0.167, "step": 10456 }, { "epoch": 0.67, "grad_norm": 2.0053975009335385, "learning_rate": 2.638975655139869e-07, "loss": 0.0174, "step": 10457 }, { "epoch": 0.67, "grad_norm": 0.2956561433469352, "learning_rate": 2.6380653574356576e-07, "loss": 0.0927, "step": 10458 }, { "epoch": 0.67, "grad_norm": 0.7033163724987198, "learning_rate": 2.637155160496324e-07, "loss": 0.3044, "step": 10459 }, { "epoch": 0.67, "grad_norm": 0.7629600639173423, "learning_rate": 2.636245064360701e-07, "loss": 0.3183, "step": 10460 }, { "epoch": 0.67, "grad_norm": 0.30311969857539905, "learning_rate": 2.635335069067617e-07, "loss": 0.2666, "step": 10461 }, { "epoch": 0.67, "grad_norm": 3.8735044339443045, "learning_rate": 2.634425174655891e-07, "loss": 0.1904, "step": 10462 }, { "epoch": 0.67, "grad_norm": 0.4713183463796078, "learning_rate": 2.633515381164342e-07, "loss": 0.0081, "step": 10463 }, { "epoch": 0.67, "grad_norm": 2.2370586944612736, "learning_rate": 2.632605688631787e-07, "loss": 0.2827, "step": 10464 }, { "epoch": 0.67, "grad_norm": 0.6483202060911514, "learning_rate": 2.631696097097029e-07, "loss": 0.1949, "step": 10465 }, { "epoch": 0.67, "grad_norm": 0.606500670229192, "learning_rate": 2.630786606598879e-07, "loss": 0.2757, "step": 10466 }, { "epoch": 0.67, "grad_norm": 0.8034149397610436, "learning_rate": 2.629877217176134e-07, "loss": 0.3208, "step": 10467 }, { "epoch": 0.67, "grad_norm": 1.0127645399924585, "learning_rate": 2.6289679288675923e-07, "loss": 0.0089, "step": 10468 }, { "epoch": 0.67, "grad_norm": 1.1079613960310797, "learning_rate": 2.628058741712046e-07, "loss": 0.0142, "step": 10469 }, { "epoch": 0.67, "grad_norm": 1.124947011569078, "learning_rate": 2.627149655748279e-07, "loss": 0.1319, "step": 10470 }, { "epoch": 0.67, "grad_norm": 1.8200726489442691, "learning_rate": 2.626240671015081e-07, "loss": 0.0614, "step": 10471 }, { "epoch": 0.67, "grad_norm": 11.110121682498992, "learning_rate": 2.625331787551225e-07, "loss": 0.2593, "step": 10472 }, { "epoch": 0.67, "grad_norm": 0.5619631755302068, "learning_rate": 2.62442300539549e-07, "loss": 0.0547, "step": 10473 }, { "epoch": 0.67, "grad_norm": 0.5592244554126755, "learning_rate": 2.6235143245866445e-07, "loss": 0.176, "step": 10474 }, { "epoch": 0.67, "grad_norm": 0.31486389004552046, "learning_rate": 2.622605745163456e-07, "loss": 0.2191, "step": 10475 }, { "epoch": 0.67, "grad_norm": 0.6316466594089674, "learning_rate": 2.621697267164684e-07, "loss": 0.1195, "step": 10476 }, { "epoch": 0.67, "grad_norm": 0.5717075827821232, "learning_rate": 2.62078889062909e-07, "loss": 0.4002, "step": 10477 }, { "epoch": 0.67, "grad_norm": 0.8042376967286633, "learning_rate": 2.619880615595422e-07, "loss": 0.2113, "step": 10478 }, { "epoch": 0.67, "grad_norm": 0.4112285756893933, "learning_rate": 2.6189724421024315e-07, "loss": 0.158, "step": 10479 }, { "epoch": 0.67, "grad_norm": 2.8920847339347455, "learning_rate": 2.618064370188864e-07, "loss": 0.3246, "step": 10480 }, { "epoch": 0.67, "grad_norm": 1.4096801217003756, "learning_rate": 2.61715639989346e-07, "loss": 0.0097, "step": 10481 }, { "epoch": 0.67, "grad_norm": 0.45938981760695313, "learning_rate": 2.616248531254953e-07, "loss": 0.2131, "step": 10482 }, { "epoch": 0.67, "grad_norm": 0.7588298537674147, "learning_rate": 2.615340764312077e-07, "loss": 0.2523, "step": 10483 }, { "epoch": 0.67, "grad_norm": 1.1579948270642288, "learning_rate": 2.614433099103558e-07, "loss": 0.0244, "step": 10484 }, { "epoch": 0.67, "grad_norm": 0.4989591486028849, "learning_rate": 2.613525535668116e-07, "loss": 0.0517, "step": 10485 }, { "epoch": 0.67, "grad_norm": 1.6484474008402006, "learning_rate": 2.612618074044475e-07, "loss": 0.1227, "step": 10486 }, { "epoch": 0.67, "grad_norm": 1.5772858373783285, "learning_rate": 2.611710714271343e-07, "loss": 0.0385, "step": 10487 }, { "epoch": 0.67, "grad_norm": 0.9127399629002068, "learning_rate": 2.610803456387436e-07, "loss": 0.2728, "step": 10488 }, { "epoch": 0.67, "grad_norm": 1.1753583013101303, "learning_rate": 2.6098963004314536e-07, "loss": 0.0091, "step": 10489 }, { "epoch": 0.67, "grad_norm": 0.6615157221653731, "learning_rate": 2.6089892464421025e-07, "loss": 0.1522, "step": 10490 }, { "epoch": 0.67, "grad_norm": 0.3786927597294344, "learning_rate": 2.6080822944580736e-07, "loss": 0.0326, "step": 10491 }, { "epoch": 0.67, "grad_norm": 1.8774515974536057, "learning_rate": 2.607175444518066e-07, "loss": 0.1332, "step": 10492 }, { "epoch": 0.67, "grad_norm": 0.7859571173425213, "learning_rate": 2.606268696660761e-07, "loss": 0.0165, "step": 10493 }, { "epoch": 0.67, "grad_norm": 1.4774352893560765, "learning_rate": 2.6053620509248475e-07, "loss": 0.2128, "step": 10494 }, { "epoch": 0.67, "grad_norm": 0.8849807851339067, "learning_rate": 2.604455507349001e-07, "loss": 0.2165, "step": 10495 }, { "epoch": 0.67, "grad_norm": 0.3053876242631579, "learning_rate": 2.603549065971898e-07, "loss": 0.1507, "step": 10496 }, { "epoch": 0.67, "grad_norm": 1.236602850657951, "learning_rate": 2.6026427268322113e-07, "loss": 0.2823, "step": 10497 }, { "epoch": 0.67, "grad_norm": 0.9731953198793252, "learning_rate": 2.6017364899686035e-07, "loss": 0.353, "step": 10498 }, { "epoch": 0.67, "grad_norm": 12.849028799996507, "learning_rate": 2.6008303554197396e-07, "loss": 0.2629, "step": 10499 }, { "epoch": 0.67, "grad_norm": 1.609483280714231, "learning_rate": 2.5999243232242745e-07, "loss": 0.019, "step": 10500 }, { "epoch": 0.67, "grad_norm": 1.3888003858011602, "learning_rate": 2.599018393420864e-07, "loss": 0.1603, "step": 10501 }, { "epoch": 0.67, "grad_norm": 2.2085305407941447, "learning_rate": 2.5981125660481535e-07, "loss": 0.1441, "step": 10502 }, { "epoch": 0.67, "grad_norm": 10.44957664609367, "learning_rate": 2.5972068411447914e-07, "loss": 0.0748, "step": 10503 }, { "epoch": 0.67, "grad_norm": 1.6362945088703087, "learning_rate": 2.596301218749414e-07, "loss": 0.0895, "step": 10504 }, { "epoch": 0.67, "grad_norm": 0.7594616526315587, "learning_rate": 2.5953956989006596e-07, "loss": 0.1739, "step": 10505 }, { "epoch": 0.67, "grad_norm": 1.0883217998113466, "learning_rate": 2.594490281637157e-07, "loss": 0.0775, "step": 10506 }, { "epoch": 0.67, "grad_norm": 12.490342342348187, "learning_rate": 2.5935849669975373e-07, "loss": 0.2393, "step": 10507 }, { "epoch": 0.67, "grad_norm": 1.0084608531775603, "learning_rate": 2.5926797550204175e-07, "loss": 0.2081, "step": 10508 }, { "epoch": 0.67, "grad_norm": 0.577018754148711, "learning_rate": 2.591774645744421e-07, "loss": 0.2774, "step": 10509 }, { "epoch": 0.67, "grad_norm": 0.6290297424700538, "learning_rate": 2.5908696392081573e-07, "loss": 0.2795, "step": 10510 }, { "epoch": 0.67, "grad_norm": 0.6718054667773171, "learning_rate": 2.5899647354502403e-07, "loss": 0.1014, "step": 10511 }, { "epoch": 0.67, "grad_norm": 0.45768586251432564, "learning_rate": 2.58905993450927e-07, "loss": 0.1617, "step": 10512 }, { "epoch": 0.67, "grad_norm": 12.646220834732189, "learning_rate": 2.588155236423851e-07, "loss": 0.2365, "step": 10513 }, { "epoch": 0.67, "grad_norm": 0.7789869069369135, "learning_rate": 2.5872506412325767e-07, "loss": 0.3558, "step": 10514 }, { "epoch": 0.67, "grad_norm": 1.151739340656819, "learning_rate": 2.5863461489740403e-07, "loss": 0.3409, "step": 10515 }, { "epoch": 0.67, "grad_norm": 0.5370364545434133, "learning_rate": 2.585441759686831e-07, "loss": 0.2612, "step": 10516 }, { "epoch": 0.67, "grad_norm": 0.5427067013972633, "learning_rate": 2.584537473409528e-07, "loss": 0.1106, "step": 10517 }, { "epoch": 0.67, "grad_norm": 1.617124445554166, "learning_rate": 2.5836332901807145e-07, "loss": 0.0732, "step": 10518 }, { "epoch": 0.67, "grad_norm": 1.7087900502440856, "learning_rate": 2.58272921003896e-07, "loss": 0.1356, "step": 10519 }, { "epoch": 0.67, "grad_norm": 2.951110245258846, "learning_rate": 2.5818252330228387e-07, "loss": 0.0826, "step": 10520 }, { "epoch": 0.67, "grad_norm": 1.1491656278638571, "learning_rate": 2.580921359170912e-07, "loss": 0.076, "step": 10521 }, { "epoch": 0.67, "grad_norm": 1.1568027759612993, "learning_rate": 2.580017588521746e-07, "loss": 0.2779, "step": 10522 }, { "epoch": 0.67, "grad_norm": 3.8358940530762977, "learning_rate": 2.579113921113891e-07, "loss": 0.5154, "step": 10523 }, { "epoch": 0.67, "grad_norm": 1.2170483844360773, "learning_rate": 2.5782103569859054e-07, "loss": 0.0438, "step": 10524 }, { "epoch": 0.67, "grad_norm": 0.7479360058026202, "learning_rate": 2.5773068961763343e-07, "loss": 0.2036, "step": 10525 }, { "epoch": 0.67, "grad_norm": 0.617781493911869, "learning_rate": 2.576403538723719e-07, "loss": 0.1514, "step": 10526 }, { "epoch": 0.67, "grad_norm": 0.7265854935703562, "learning_rate": 2.5755002846666033e-07, "loss": 0.2367, "step": 10527 }, { "epoch": 0.67, "grad_norm": 0.737124315075192, "learning_rate": 2.5745971340435177e-07, "loss": 0.293, "step": 10528 }, { "epoch": 0.67, "grad_norm": 1.6024153526531448, "learning_rate": 2.5736940868929953e-07, "loss": 0.333, "step": 10529 }, { "epoch": 0.67, "grad_norm": 7.599186344179326, "learning_rate": 2.572791143253559e-07, "loss": 0.1295, "step": 10530 }, { "epoch": 0.67, "grad_norm": 0.7085186447716115, "learning_rate": 2.5718883031637314e-07, "loss": 0.2426, "step": 10531 }, { "epoch": 0.67, "grad_norm": 2.8266295058925617, "learning_rate": 2.5709855666620316e-07, "loss": 0.1897, "step": 10532 }, { "epoch": 0.67, "grad_norm": 0.7798735228341888, "learning_rate": 2.5700829337869694e-07, "loss": 0.215, "step": 10533 }, { "epoch": 0.67, "grad_norm": 2.002867064307344, "learning_rate": 2.5691804045770535e-07, "loss": 0.038, "step": 10534 }, { "epoch": 0.67, "grad_norm": 0.8655546053823612, "learning_rate": 2.56827797907079e-07, "loss": 0.11, "step": 10535 }, { "epoch": 0.67, "grad_norm": 1.872437944181974, "learning_rate": 2.5673756573066747e-07, "loss": 0.1736, "step": 10536 }, { "epoch": 0.67, "grad_norm": 1.3986849148475318, "learning_rate": 2.5664734393232054e-07, "loss": 0.2444, "step": 10537 }, { "epoch": 0.67, "grad_norm": 0.5756746969524624, "learning_rate": 2.5655713251588715e-07, "loss": 0.1042, "step": 10538 }, { "epoch": 0.67, "grad_norm": 2.2697659380365542, "learning_rate": 2.564669314852157e-07, "loss": 0.0931, "step": 10539 }, { "epoch": 0.67, "grad_norm": 1.3407578119448709, "learning_rate": 2.5637674084415473e-07, "loss": 0.2629, "step": 10540 }, { "epoch": 0.67, "grad_norm": 0.39097716840919244, "learning_rate": 2.5628656059655153e-07, "loss": 0.2376, "step": 10541 }, { "epoch": 0.67, "grad_norm": 0.6718139343989389, "learning_rate": 2.561963907462537e-07, "loss": 0.2885, "step": 10542 }, { "epoch": 0.67, "grad_norm": 1.8800900704244332, "learning_rate": 2.5610623129710784e-07, "loss": 0.1508, "step": 10543 }, { "epoch": 0.67, "grad_norm": 1.0095528779448446, "learning_rate": 2.560160822529606e-07, "loss": 0.499, "step": 10544 }, { "epoch": 0.67, "grad_norm": 0.7794417974665724, "learning_rate": 2.5592594361765753e-07, "loss": 0.1611, "step": 10545 }, { "epoch": 0.67, "grad_norm": 0.18154471465380143, "learning_rate": 2.5583581539504463e-07, "loss": 0.1021, "step": 10546 }, { "epoch": 0.67, "grad_norm": 0.7474633808438751, "learning_rate": 2.557456975889664e-07, "loss": 0.0125, "step": 10547 }, { "epoch": 0.67, "grad_norm": 4.031072803768003, "learning_rate": 2.55655590203268e-07, "loss": 0.0846, "step": 10548 }, { "epoch": 0.67, "grad_norm": 0.7108431360941682, "learning_rate": 2.55565493241793e-07, "loss": 0.3651, "step": 10549 }, { "epoch": 0.67, "grad_norm": 0.6395022418878527, "learning_rate": 2.5547540670838553e-07, "loss": 0.3176, "step": 10550 }, { "epoch": 0.67, "grad_norm": 1.2489121349943053, "learning_rate": 2.553853306068888e-07, "loss": 0.3709, "step": 10551 }, { "epoch": 0.67, "grad_norm": 4.126379091094442, "learning_rate": 2.552952649411457e-07, "loss": 0.0413, "step": 10552 }, { "epoch": 0.67, "grad_norm": 0.8528101579995798, "learning_rate": 2.5520520971499857e-07, "loss": 0.1108, "step": 10553 }, { "epoch": 0.67, "grad_norm": 0.5092444165228933, "learning_rate": 2.551151649322891e-07, "loss": 0.0573, "step": 10554 }, { "epoch": 0.67, "grad_norm": 1.123056991807261, "learning_rate": 2.5502513059685916e-07, "loss": 0.0976, "step": 10555 }, { "epoch": 0.67, "grad_norm": 0.21528667818707273, "learning_rate": 2.549351067125494e-07, "loss": 0.004, "step": 10556 }, { "epoch": 0.67, "grad_norm": 1.1022126568579682, "learning_rate": 2.5484509328320083e-07, "loss": 0.2352, "step": 10557 }, { "epoch": 0.67, "grad_norm": 4.963214973942761, "learning_rate": 2.5475509031265316e-07, "loss": 0.1623, "step": 10558 }, { "epoch": 0.67, "grad_norm": 0.33563736633622543, "learning_rate": 2.5466509780474667e-07, "loss": 0.0764, "step": 10559 }, { "epoch": 0.67, "grad_norm": 0.9615137551229664, "learning_rate": 2.5457511576332e-07, "loss": 0.2759, "step": 10560 }, { "epoch": 0.67, "grad_norm": 0.8480390854122233, "learning_rate": 2.5448514419221243e-07, "loss": 0.2355, "step": 10561 }, { "epoch": 0.67, "grad_norm": 2.800579296234656, "learning_rate": 2.5439518309526203e-07, "loss": 0.1037, "step": 10562 }, { "epoch": 0.67, "grad_norm": 0.8229825352930443, "learning_rate": 2.5430523247630703e-07, "loss": 0.1112, "step": 10563 }, { "epoch": 0.67, "grad_norm": 0.4302533019832532, "learning_rate": 2.542152923391845e-07, "loss": 0.0144, "step": 10564 }, { "epoch": 0.67, "grad_norm": 0.36507556899829297, "learning_rate": 2.5412536268773186e-07, "loss": 0.1387, "step": 10565 }, { "epoch": 0.67, "grad_norm": 2.479189561026578, "learning_rate": 2.5403544352578544e-07, "loss": 0.3136, "step": 10566 }, { "epoch": 0.67, "grad_norm": 0.4284166623846381, "learning_rate": 2.539455348571813e-07, "loss": 0.022, "step": 10567 }, { "epoch": 0.67, "grad_norm": 2.206928190817757, "learning_rate": 2.538556366857556e-07, "loss": 0.2967, "step": 10568 }, { "epoch": 0.67, "grad_norm": 0.25602016189714777, "learning_rate": 2.5376574901534296e-07, "loss": 0.0336, "step": 10569 }, { "epoch": 0.67, "grad_norm": 0.667943144170874, "learning_rate": 2.536758718497787e-07, "loss": 0.0874, "step": 10570 }, { "epoch": 0.67, "grad_norm": 1.1676065572146814, "learning_rate": 2.535860051928967e-07, "loss": 0.1725, "step": 10571 }, { "epoch": 0.67, "grad_norm": 0.06383168117739876, "learning_rate": 2.534961490485313e-07, "loss": 0.0017, "step": 10572 }, { "epoch": 0.67, "grad_norm": 0.612009773288431, "learning_rate": 2.534063034205155e-07, "loss": 0.3274, "step": 10573 }, { "epoch": 0.67, "grad_norm": 0.5469969393598104, "learning_rate": 2.5331646831268274e-07, "loss": 0.156, "step": 10574 }, { "epoch": 0.67, "grad_norm": 11.353782065378933, "learning_rate": 2.53226643728865e-07, "loss": 0.3062, "step": 10575 }, { "epoch": 0.67, "grad_norm": 0.6720243725775021, "learning_rate": 2.531368296728951e-07, "loss": 0.1248, "step": 10576 }, { "epoch": 0.67, "grad_norm": 0.4131062860017984, "learning_rate": 2.53047026148604e-07, "loss": 0.2135, "step": 10577 }, { "epoch": 0.67, "grad_norm": 0.6516688749793922, "learning_rate": 2.5295723315982345e-07, "loss": 0.1384, "step": 10578 }, { "epoch": 0.67, "grad_norm": 0.46519008824416797, "learning_rate": 2.5286745071038396e-07, "loss": 0.1715, "step": 10579 }, { "epoch": 0.67, "grad_norm": 0.4264081869372595, "learning_rate": 2.527776788041156e-07, "loss": 0.1294, "step": 10580 }, { "epoch": 0.67, "grad_norm": 1.1450129377251033, "learning_rate": 2.526879174448486e-07, "loss": 0.4197, "step": 10581 }, { "epoch": 0.67, "grad_norm": 0.9483996792354955, "learning_rate": 2.5259816663641205e-07, "loss": 0.1096, "step": 10582 }, { "epoch": 0.67, "grad_norm": 0.5704949117557274, "learning_rate": 2.5250842638263526e-07, "loss": 0.0159, "step": 10583 }, { "epoch": 0.67, "grad_norm": 1.0299182900440016, "learning_rate": 2.524186966873463e-07, "loss": 0.3862, "step": 10584 }, { "epoch": 0.68, "grad_norm": 4.038388330939207, "learning_rate": 2.5232897755437346e-07, "loss": 0.3125, "step": 10585 }, { "epoch": 0.68, "grad_norm": 1.8743035505804668, "learning_rate": 2.522392689875442e-07, "loss": 0.1385, "step": 10586 }, { "epoch": 0.68, "grad_norm": 0.1298631708065149, "learning_rate": 2.521495709906861e-07, "loss": 0.0042, "step": 10587 }, { "epoch": 0.68, "grad_norm": 1.5824709882596042, "learning_rate": 2.5205988356762536e-07, "loss": 0.1166, "step": 10588 }, { "epoch": 0.68, "grad_norm": 5.152329124092868, "learning_rate": 2.519702067221886e-07, "loss": 0.1667, "step": 10589 }, { "epoch": 0.68, "grad_norm": 0.8882923684298127, "learning_rate": 2.5188054045820115e-07, "loss": 0.1068, "step": 10590 }, { "epoch": 0.68, "grad_norm": 0.43831150842493927, "learning_rate": 2.517908847794889e-07, "loss": 0.4529, "step": 10591 }, { "epoch": 0.68, "grad_norm": 1.1712969157414352, "learning_rate": 2.517012396898762e-07, "loss": 0.006, "step": 10592 }, { "epoch": 0.68, "grad_norm": 0.5727387135701465, "learning_rate": 2.51611605193188e-07, "loss": 0.0746, "step": 10593 }, { "epoch": 0.68, "grad_norm": 0.9156049890281388, "learning_rate": 2.515219812932481e-07, "loss": 0.3767, "step": 10594 }, { "epoch": 0.68, "grad_norm": 1.43717320243928, "learning_rate": 2.5143236799387975e-07, "loss": 0.1063, "step": 10595 }, { "epoch": 0.68, "grad_norm": 1.3269259054658187, "learning_rate": 2.5134276529890644e-07, "loss": 0.0593, "step": 10596 }, { "epoch": 0.68, "grad_norm": 1.3760722979262, "learning_rate": 2.5125317321215046e-07, "loss": 0.2649, "step": 10597 }, { "epoch": 0.68, "grad_norm": 0.9689287146562792, "learning_rate": 2.511635917374343e-07, "loss": 0.1306, "step": 10598 }, { "epoch": 0.68, "grad_norm": 0.7995139365785888, "learning_rate": 2.510740208785793e-07, "loss": 0.1817, "step": 10599 }, { "epoch": 0.68, "grad_norm": 1.1968501978495556, "learning_rate": 2.5098446063940725e-07, "loss": 0.1242, "step": 10600 }, { "epoch": 0.68, "grad_norm": 0.47176432883077934, "learning_rate": 2.508949110237385e-07, "loss": 0.0039, "step": 10601 }, { "epoch": 0.68, "grad_norm": 0.8108725101723075, "learning_rate": 2.5080537203539364e-07, "loss": 0.2779, "step": 10602 }, { "epoch": 0.68, "grad_norm": 0.8583643549900759, "learning_rate": 2.5071584367819267e-07, "loss": 0.4885, "step": 10603 }, { "epoch": 0.68, "grad_norm": 0.26309118018076055, "learning_rate": 2.506263259559548e-07, "loss": 0.0486, "step": 10604 }, { "epoch": 0.68, "grad_norm": 0.8450317211287066, "learning_rate": 2.505368188724991e-07, "loss": 0.2276, "step": 10605 }, { "epoch": 0.68, "grad_norm": 0.9268324661988067, "learning_rate": 2.5044732243164444e-07, "loss": 0.0802, "step": 10606 }, { "epoch": 0.68, "grad_norm": 0.5004466655700627, "learning_rate": 2.5035783663720866e-07, "loss": 0.2071, "step": 10607 }, { "epoch": 0.68, "grad_norm": 0.34058912591187507, "learning_rate": 2.5026836149300924e-07, "loss": 0.198, "step": 10608 }, { "epoch": 0.68, "grad_norm": 1.3687588008158, "learning_rate": 2.5017889700286366e-07, "loss": 0.0463, "step": 10609 }, { "epoch": 0.68, "grad_norm": 1.3371554072172445, "learning_rate": 2.5008944317058844e-07, "loss": 0.1614, "step": 10610 }, { "epoch": 0.68, "grad_norm": 0.7631054936767125, "learning_rate": 2.500000000000001e-07, "loss": 0.2467, "step": 10611 }, { "epoch": 0.68, "grad_norm": 12.570913992693844, "learning_rate": 2.4991056749491414e-07, "loss": 0.0508, "step": 10612 }, { "epoch": 0.68, "grad_norm": 0.5580753188516984, "learning_rate": 2.498211456591463e-07, "loss": 0.1841, "step": 10613 }, { "epoch": 0.68, "grad_norm": 3.311823748840983, "learning_rate": 2.4973173449651106e-07, "loss": 0.2012, "step": 10614 }, { "epoch": 0.68, "grad_norm": 0.3992919977999924, "learning_rate": 2.496423340108234e-07, "loss": 0.0016, "step": 10615 }, { "epoch": 0.68, "grad_norm": 0.7304015725613393, "learning_rate": 2.4955294420589687e-07, "loss": 0.2443, "step": 10616 }, { "epoch": 0.68, "grad_norm": 0.7688464051008319, "learning_rate": 2.494635650855453e-07, "loss": 0.1986, "step": 10617 }, { "epoch": 0.68, "grad_norm": 0.3838141955276136, "learning_rate": 2.493741966535816e-07, "loss": 0.0887, "step": 10618 }, { "epoch": 0.68, "grad_norm": 1.1487147416039272, "learning_rate": 2.4928483891381863e-07, "loss": 0.1145, "step": 10619 }, { "epoch": 0.68, "grad_norm": 1.5174528699626204, "learning_rate": 2.4919549187006826e-07, "loss": 0.2569, "step": 10620 }, { "epoch": 0.68, "grad_norm": 0.8653498544965659, "learning_rate": 2.491061555261424e-07, "loss": 0.2112, "step": 10621 }, { "epoch": 0.68, "grad_norm": 0.8580172433265398, "learning_rate": 2.4901682988585244e-07, "loss": 0.1548, "step": 10622 }, { "epoch": 0.68, "grad_norm": 0.7678532818603045, "learning_rate": 2.489275149530089e-07, "loss": 0.2632, "step": 10623 }, { "epoch": 0.68, "grad_norm": 0.852388175325931, "learning_rate": 2.488382107314225e-07, "loss": 0.2685, "step": 10624 }, { "epoch": 0.68, "grad_norm": 1.4027321008603786, "learning_rate": 2.487489172249027e-07, "loss": 0.2474, "step": 10625 }, { "epoch": 0.68, "grad_norm": 1.049118659300945, "learning_rate": 2.486596344372594e-07, "loss": 0.1608, "step": 10626 }, { "epoch": 0.68, "grad_norm": 0.8708467854010007, "learning_rate": 2.485703623723011e-07, "loss": 0.214, "step": 10627 }, { "epoch": 0.68, "grad_norm": 0.4338989155347018, "learning_rate": 2.4848110103383683e-07, "loss": 0.1835, "step": 10628 }, { "epoch": 0.68, "grad_norm": 1.0908190163622082, "learning_rate": 2.4839185042567424e-07, "loss": 0.2569, "step": 10629 }, { "epoch": 0.68, "grad_norm": 0.6196024690562174, "learning_rate": 2.483026105516212e-07, "loss": 0.111, "step": 10630 }, { "epoch": 0.68, "grad_norm": 0.7471693799119881, "learning_rate": 2.4821338141548465e-07, "loss": 0.1557, "step": 10631 }, { "epoch": 0.68, "grad_norm": 5.471578282580022, "learning_rate": 2.481241630210716e-07, "loss": 0.2435, "step": 10632 }, { "epoch": 0.68, "grad_norm": 0.48844098333069136, "learning_rate": 2.4803495537218787e-07, "loss": 0.0122, "step": 10633 }, { "epoch": 0.68, "grad_norm": 0.9526328234776237, "learning_rate": 2.4794575847263963e-07, "loss": 0.2759, "step": 10634 }, { "epoch": 0.68, "grad_norm": 1.0131510610706103, "learning_rate": 2.478565723262321e-07, "loss": 0.4307, "step": 10635 }, { "epoch": 0.68, "grad_norm": 0.9358636516171898, "learning_rate": 2.4776739693676977e-07, "loss": 0.2347, "step": 10636 }, { "epoch": 0.68, "grad_norm": 2.165739279034136, "learning_rate": 2.476782323080574e-07, "loss": 0.2701, "step": 10637 }, { "epoch": 0.68, "grad_norm": 0.5396514702615218, "learning_rate": 2.4758907844389905e-07, "loss": 0.1629, "step": 10638 }, { "epoch": 0.68, "grad_norm": 8.320681171798618, "learning_rate": 2.474999353480978e-07, "loss": 0.3476, "step": 10639 }, { "epoch": 0.68, "grad_norm": 1.0111865266702131, "learning_rate": 2.47410803024457e-07, "loss": 0.2517, "step": 10640 }, { "epoch": 0.68, "grad_norm": 1.0803292059452372, "learning_rate": 2.473216814767792e-07, "loss": 0.1364, "step": 10641 }, { "epoch": 0.68, "grad_norm": 0.501200864153896, "learning_rate": 2.472325707088663e-07, "loss": 0.1743, "step": 10642 }, { "epoch": 0.68, "grad_norm": 0.47594962004949454, "learning_rate": 2.471434707245202e-07, "loss": 0.2192, "step": 10643 }, { "epoch": 0.68, "grad_norm": 0.720951558466603, "learning_rate": 2.4705438152754174e-07, "loss": 0.2886, "step": 10644 }, { "epoch": 0.68, "grad_norm": 0.49906162606421556, "learning_rate": 2.469653031217321e-07, "loss": 0.0609, "step": 10645 }, { "epoch": 0.68, "grad_norm": 0.5942776477775826, "learning_rate": 2.4687623551089104e-07, "loss": 0.1346, "step": 10646 }, { "epoch": 0.68, "grad_norm": 1.1049866050200776, "learning_rate": 2.4678717869881884e-07, "loss": 0.214, "step": 10647 }, { "epoch": 0.68, "grad_norm": 1.0522141300105585, "learning_rate": 2.466981326893146e-07, "loss": 0.0121, "step": 10648 }, { "epoch": 0.68, "grad_norm": 9.595667859330842, "learning_rate": 2.46609097486177e-07, "loss": 0.1439, "step": 10649 }, { "epoch": 0.68, "grad_norm": 0.6189656097202366, "learning_rate": 2.465200730932049e-07, "loss": 0.4181, "step": 10650 }, { "epoch": 0.68, "grad_norm": 0.4350202001291532, "learning_rate": 2.464310595141959e-07, "loss": 0.3342, "step": 10651 }, { "epoch": 0.68, "grad_norm": 1.9072968235831722, "learning_rate": 2.4634205675294777e-07, "loss": 0.5667, "step": 10652 }, { "epoch": 0.68, "grad_norm": 2.433996284579106, "learning_rate": 2.4625306481325727e-07, "loss": 0.0617, "step": 10653 }, { "epoch": 0.68, "grad_norm": 0.8957853909505935, "learning_rate": 2.4616408369892126e-07, "loss": 0.2836, "step": 10654 }, { "epoch": 0.68, "grad_norm": 1.8956053562597286, "learning_rate": 2.4607511341373557e-07, "loss": 0.1735, "step": 10655 }, { "epoch": 0.68, "grad_norm": 0.9634464743769201, "learning_rate": 2.4598615396149597e-07, "loss": 0.4158, "step": 10656 }, { "epoch": 0.68, "grad_norm": 3.2599236002697625, "learning_rate": 2.458972053459977e-07, "loss": 0.1456, "step": 10657 }, { "epoch": 0.68, "grad_norm": 0.4171246802869317, "learning_rate": 2.4580826757103564e-07, "loss": 0.162, "step": 10658 }, { "epoch": 0.68, "grad_norm": 4.632731797597872, "learning_rate": 2.457193406404036e-07, "loss": 0.1327, "step": 10659 }, { "epoch": 0.68, "grad_norm": 1.463570315065967, "learning_rate": 2.4563042455789593e-07, "loss": 0.2327, "step": 10660 }, { "epoch": 0.68, "grad_norm": 0.7475601521375592, "learning_rate": 2.455415193273055e-07, "loss": 0.3889, "step": 10661 }, { "epoch": 0.68, "grad_norm": 0.9511196635599971, "learning_rate": 2.4545262495242556e-07, "loss": 0.2351, "step": 10662 }, { "epoch": 0.68, "grad_norm": 7.354419344303781, "learning_rate": 2.4536374143704834e-07, "loss": 0.1062, "step": 10663 }, { "epoch": 0.68, "grad_norm": 2.7422148530732846, "learning_rate": 2.452748687849656e-07, "loss": 0.239, "step": 10664 }, { "epoch": 0.68, "grad_norm": 0.9694439957071486, "learning_rate": 2.4518600699996925e-07, "loss": 0.2528, "step": 10665 }, { "epoch": 0.68, "grad_norm": 0.38941484134385324, "learning_rate": 2.4509715608584986e-07, "loss": 0.0716, "step": 10666 }, { "epoch": 0.68, "grad_norm": 0.53835049220775, "learning_rate": 2.4500831604639843e-07, "loss": 0.2671, "step": 10667 }, { "epoch": 0.68, "grad_norm": 1.4995410768084054, "learning_rate": 2.449194868854046e-07, "loss": 0.0492, "step": 10668 }, { "epoch": 0.68, "grad_norm": 0.688961576178236, "learning_rate": 2.448306686066585e-07, "loss": 0.3869, "step": 10669 }, { "epoch": 0.68, "grad_norm": 0.7985312278403478, "learning_rate": 2.4474186121394875e-07, "loss": 0.3686, "step": 10670 }, { "epoch": 0.68, "grad_norm": 1.116955759566959, "learning_rate": 2.446530647110646e-07, "loss": 0.3511, "step": 10671 }, { "epoch": 0.68, "grad_norm": 1.3933397101762621, "learning_rate": 2.4456427910179374e-07, "loss": 0.1386, "step": 10672 }, { "epoch": 0.68, "grad_norm": 0.24738398884795554, "learning_rate": 2.444755043899243e-07, "loss": 0.1014, "step": 10673 }, { "epoch": 0.68, "grad_norm": 0.5006446506991662, "learning_rate": 2.4438674057924365e-07, "loss": 0.2221, "step": 10674 }, { "epoch": 0.68, "grad_norm": 0.689609795630209, "learning_rate": 2.442979876735383e-07, "loss": 0.2426, "step": 10675 }, { "epoch": 0.68, "grad_norm": 1.3975373833477966, "learning_rate": 2.4420924567659504e-07, "loss": 0.2743, "step": 10676 }, { "epoch": 0.68, "grad_norm": 0.592271887308199, "learning_rate": 2.441205145921994e-07, "loss": 0.1441, "step": 10677 }, { "epoch": 0.68, "grad_norm": 0.36363260404378417, "learning_rate": 2.440317944241372e-07, "loss": 0.0879, "step": 10678 }, { "epoch": 0.68, "grad_norm": 0.8731244608591947, "learning_rate": 2.4394308517619293e-07, "loss": 0.2039, "step": 10679 }, { "epoch": 0.68, "grad_norm": 3.1850744507161584, "learning_rate": 2.4385438685215165e-07, "loss": 0.0939, "step": 10680 }, { "epoch": 0.68, "grad_norm": 2.922497198506668, "learning_rate": 2.437656994557969e-07, "loss": 0.2345, "step": 10681 }, { "epoch": 0.68, "grad_norm": 0.9969140916400155, "learning_rate": 2.4367702299091274e-07, "loss": 0.5095, "step": 10682 }, { "epoch": 0.68, "grad_norm": 0.6505041781957592, "learning_rate": 2.4358835746128186e-07, "loss": 0.2, "step": 10683 }, { "epoch": 0.68, "grad_norm": 0.5692669734993312, "learning_rate": 2.4349970287068725e-07, "loss": 0.1239, "step": 10684 }, { "epoch": 0.68, "grad_norm": 2.4186398298078355, "learning_rate": 2.4341105922291076e-07, "loss": 0.1111, "step": 10685 }, { "epoch": 0.68, "grad_norm": 0.5287317822196954, "learning_rate": 2.4332242652173455e-07, "loss": 0.1459, "step": 10686 }, { "epoch": 0.68, "grad_norm": 1.2826770114363877, "learning_rate": 2.4323380477093934e-07, "loss": 0.5146, "step": 10687 }, { "epoch": 0.68, "grad_norm": 0.4946248138867597, "learning_rate": 2.4314519397430646e-07, "loss": 0.0826, "step": 10688 }, { "epoch": 0.68, "grad_norm": 1.4667660044199442, "learning_rate": 2.430565941356157e-07, "loss": 0.3147, "step": 10689 }, { "epoch": 0.68, "grad_norm": 1.3180618724213369, "learning_rate": 2.4296800525864736e-07, "loss": 0.2997, "step": 10690 }, { "epoch": 0.68, "grad_norm": 7.671803698899248, "learning_rate": 2.4287942734718044e-07, "loss": 0.2124, "step": 10691 }, { "epoch": 0.68, "grad_norm": 0.36901619813841613, "learning_rate": 2.4279086040499394e-07, "loss": 0.1636, "step": 10692 }, { "epoch": 0.68, "grad_norm": 8.125111527344528, "learning_rate": 2.427023044358667e-07, "loss": 0.195, "step": 10693 }, { "epoch": 0.68, "grad_norm": 0.5461668614907347, "learning_rate": 2.4261375944357607e-07, "loss": 0.3006, "step": 10694 }, { "epoch": 0.68, "grad_norm": 1.36798634203906, "learning_rate": 2.4252522543190017e-07, "loss": 0.1184, "step": 10695 }, { "epoch": 0.68, "grad_norm": 0.9025822251560608, "learning_rate": 2.4243670240461556e-07, "loss": 0.2122, "step": 10696 }, { "epoch": 0.68, "grad_norm": 1.2385314046509113, "learning_rate": 2.4234819036549916e-07, "loss": 0.3901, "step": 10697 }, { "epoch": 0.68, "grad_norm": 0.509657875785536, "learning_rate": 2.422596893183266e-07, "loss": 0.2317, "step": 10698 }, { "epoch": 0.68, "grad_norm": 1.2800454280133113, "learning_rate": 2.421711992668741e-07, "loss": 0.3699, "step": 10699 }, { "epoch": 0.68, "grad_norm": 0.9437888866957841, "learning_rate": 2.420827202149164e-07, "loss": 0.1995, "step": 10700 }, { "epoch": 0.68, "grad_norm": 4.7303262059632845, "learning_rate": 2.419942521662285e-07, "loss": 0.1273, "step": 10701 }, { "epoch": 0.68, "grad_norm": 3.096351887508121, "learning_rate": 2.419057951245842e-07, "loss": 0.1125, "step": 10702 }, { "epoch": 0.68, "grad_norm": 7.0985929843028215, "learning_rate": 2.418173490937578e-07, "loss": 0.0228, "step": 10703 }, { "epoch": 0.68, "grad_norm": 0.3791057710799882, "learning_rate": 2.417289140775222e-07, "loss": 0.13, "step": 10704 }, { "epoch": 0.68, "grad_norm": 0.587168419506907, "learning_rate": 2.4164049007965023e-07, "loss": 0.2207, "step": 10705 }, { "epoch": 0.68, "grad_norm": 0.6911265857887287, "learning_rate": 2.415520771039144e-07, "loss": 0.4728, "step": 10706 }, { "epoch": 0.68, "grad_norm": 1.4532718760205119, "learning_rate": 2.414636751540864e-07, "loss": 0.1215, "step": 10707 }, { "epoch": 0.68, "grad_norm": 1.0060944484966456, "learning_rate": 2.413752842339377e-07, "loss": 0.2105, "step": 10708 }, { "epoch": 0.68, "grad_norm": 0.9268099357017542, "learning_rate": 2.4128690434723947e-07, "loss": 0.2565, "step": 10709 }, { "epoch": 0.68, "grad_norm": 0.2520195283701715, "learning_rate": 2.411985354977618e-07, "loss": 0.2335, "step": 10710 }, { "epoch": 0.68, "grad_norm": 3.0977907578748356, "learning_rate": 2.411101776892749e-07, "loss": 0.0649, "step": 10711 }, { "epoch": 0.68, "grad_norm": 6.7218064755685125, "learning_rate": 2.410218309255484e-07, "loss": 0.0321, "step": 10712 }, { "epoch": 0.68, "grad_norm": 0.7625233089280851, "learning_rate": 2.40933495210351e-07, "loss": 0.1131, "step": 10713 }, { "epoch": 0.68, "grad_norm": 1.0855095888509445, "learning_rate": 2.408451705474517e-07, "loss": 0.114, "step": 10714 }, { "epoch": 0.68, "grad_norm": 2.1842261955563496, "learning_rate": 2.407568569406182e-07, "loss": 0.0506, "step": 10715 }, { "epoch": 0.68, "grad_norm": 0.38620217678899577, "learning_rate": 2.406685543936185e-07, "loss": 0.0084, "step": 10716 }, { "epoch": 0.68, "grad_norm": 0.5924790759334992, "learning_rate": 2.405802629102196e-07, "loss": 0.0855, "step": 10717 }, { "epoch": 0.68, "grad_norm": 0.5337281680297228, "learning_rate": 2.4049198249418803e-07, "loss": 0.2453, "step": 10718 }, { "epoch": 0.68, "grad_norm": 0.5770910264205996, "learning_rate": 2.404037131492903e-07, "loss": 0.1786, "step": 10719 }, { "epoch": 0.68, "grad_norm": 0.8558103821620981, "learning_rate": 2.403154548792918e-07, "loss": 0.172, "step": 10720 }, { "epoch": 0.68, "grad_norm": 0.8557960799844515, "learning_rate": 2.402272076879583e-07, "loss": 0.1622, "step": 10721 }, { "epoch": 0.68, "grad_norm": 2.80220731283959, "learning_rate": 2.401389715790541e-07, "loss": 0.2381, "step": 10722 }, { "epoch": 0.68, "grad_norm": 1.076657403162898, "learning_rate": 2.4005074655634393e-07, "loss": 0.3262, "step": 10723 }, { "epoch": 0.68, "grad_norm": 0.3533423049794706, "learning_rate": 2.3996253262359133e-07, "loss": 0.2099, "step": 10724 }, { "epoch": 0.68, "grad_norm": 0.5668099182172478, "learning_rate": 2.3987432978455995e-07, "loss": 0.1811, "step": 10725 }, { "epoch": 0.68, "grad_norm": 0.5371098300354171, "learning_rate": 2.3978613804301246e-07, "loss": 0.4094, "step": 10726 }, { "epoch": 0.68, "grad_norm": 1.838505960343988, "learning_rate": 2.3969795740271145e-07, "loss": 0.2311, "step": 10727 }, { "epoch": 0.68, "grad_norm": 1.667570296835776, "learning_rate": 2.3960978786741874e-07, "loss": 0.1847, "step": 10728 }, { "epoch": 0.68, "grad_norm": 0.5828788587723586, "learning_rate": 2.3952162944089616e-07, "loss": 0.1691, "step": 10729 }, { "epoch": 0.68, "grad_norm": 0.9476949999625318, "learning_rate": 2.3943348212690433e-07, "loss": 0.3167, "step": 10730 }, { "epoch": 0.68, "grad_norm": 0.8783290069773235, "learning_rate": 2.3934534592920413e-07, "loss": 0.1312, "step": 10731 }, { "epoch": 0.68, "grad_norm": 2.081775542810553, "learning_rate": 2.392572208515555e-07, "loss": 0.397, "step": 10732 }, { "epoch": 0.68, "grad_norm": 0.5762764388574344, "learning_rate": 2.3916910689771773e-07, "loss": 0.0017, "step": 10733 }, { "epoch": 0.68, "grad_norm": 0.7355044397297507, "learning_rate": 2.390810040714504e-07, "loss": 0.2677, "step": 10734 }, { "epoch": 0.68, "grad_norm": 0.5964938536065715, "learning_rate": 2.389929123765118e-07, "loss": 0.0749, "step": 10735 }, { "epoch": 0.68, "grad_norm": 0.7905799500317098, "learning_rate": 2.3890483181666036e-07, "loss": 0.1636, "step": 10736 }, { "epoch": 0.68, "grad_norm": 0.36713854956127034, "learning_rate": 2.3881676239565356e-07, "loss": 0.0206, "step": 10737 }, { "epoch": 0.68, "grad_norm": 7.347921022010893, "learning_rate": 2.3872870411724887e-07, "loss": 0.1419, "step": 10738 }, { "epoch": 0.68, "grad_norm": 0.6525773147906483, "learning_rate": 2.386406569852027e-07, "loss": 0.4326, "step": 10739 }, { "epoch": 0.68, "grad_norm": 0.5300134958343342, "learning_rate": 2.3855262100327167e-07, "loss": 0.1283, "step": 10740 }, { "epoch": 0.68, "grad_norm": 2.8747068130142694, "learning_rate": 2.3846459617521123e-07, "loss": 0.0996, "step": 10741 }, { "epoch": 0.69, "grad_norm": 0.8111566195875461, "learning_rate": 2.3837658250477704e-07, "loss": 0.2887, "step": 10742 }, { "epoch": 0.69, "grad_norm": 0.6136206640377274, "learning_rate": 2.382885799957236e-07, "loss": 0.2516, "step": 10743 }, { "epoch": 0.69, "grad_norm": 0.35564433647555616, "learning_rate": 2.3820058865180548e-07, "loss": 0.1259, "step": 10744 }, { "epoch": 0.69, "grad_norm": 5.30969938791369, "learning_rate": 2.3811260847677668e-07, "loss": 0.0474, "step": 10745 }, { "epoch": 0.69, "grad_norm": 0.982035371250566, "learning_rate": 2.3802463947439028e-07, "loss": 0.2785, "step": 10746 }, { "epoch": 0.69, "grad_norm": 0.5894925748322754, "learning_rate": 2.3793668164839954e-07, "loss": 0.3994, "step": 10747 }, { "epoch": 0.69, "grad_norm": 0.8366667867151247, "learning_rate": 2.3784873500255666e-07, "loss": 0.0728, "step": 10748 }, { "epoch": 0.69, "grad_norm": 0.4545383376816283, "learning_rate": 2.3776079954061385e-07, "loss": 0.2984, "step": 10749 }, { "epoch": 0.69, "grad_norm": 1.0425947107482705, "learning_rate": 2.3767287526632234e-07, "loss": 0.4034, "step": 10750 }, { "epoch": 0.69, "grad_norm": 1.0457992927079485, "learning_rate": 2.3758496218343355e-07, "loss": 0.0778, "step": 10751 }, { "epoch": 0.69, "grad_norm": 0.5266467923509143, "learning_rate": 2.3749706029569754e-07, "loss": 0.2213, "step": 10752 }, { "epoch": 0.69, "grad_norm": 0.39514589901137304, "learning_rate": 2.3740916960686486e-07, "loss": 0.0478, "step": 10753 }, { "epoch": 0.69, "grad_norm": 0.7346820868868575, "learning_rate": 2.373212901206847e-07, "loss": 0.1854, "step": 10754 }, { "epoch": 0.69, "grad_norm": 1.701917774586411, "learning_rate": 2.372334218409065e-07, "loss": 0.0562, "step": 10755 }, { "epoch": 0.69, "grad_norm": 2.841902311073271, "learning_rate": 2.3714556477127861e-07, "loss": 0.3454, "step": 10756 }, { "epoch": 0.69, "grad_norm": 0.9812225346183727, "learning_rate": 2.3705771891554948e-07, "loss": 0.2398, "step": 10757 }, { "epoch": 0.69, "grad_norm": 0.5400856289738735, "learning_rate": 2.3696988427746667e-07, "loss": 0.0455, "step": 10758 }, { "epoch": 0.69, "grad_norm": 1.0962111199743132, "learning_rate": 2.3688206086077716e-07, "loss": 0.2525, "step": 10759 }, { "epoch": 0.69, "grad_norm": 0.5658892060409247, "learning_rate": 2.3679424866922805e-07, "loss": 0.2609, "step": 10760 }, { "epoch": 0.69, "grad_norm": 0.8507019580896611, "learning_rate": 2.3670644770656517e-07, "loss": 0.417, "step": 10761 }, { "epoch": 0.69, "grad_norm": 6.051728849599386, "learning_rate": 2.3661865797653458e-07, "loss": 0.2295, "step": 10762 }, { "epoch": 0.69, "grad_norm": 0.3231275283644623, "learning_rate": 2.3653087948288147e-07, "loss": 0.1155, "step": 10763 }, { "epoch": 0.69, "grad_norm": 0.17446752350544095, "learning_rate": 2.3644311222935088e-07, "loss": 0.0846, "step": 10764 }, { "epoch": 0.69, "grad_norm": 0.2839051739428037, "learning_rate": 2.3635535621968678e-07, "loss": 0.1953, "step": 10765 }, { "epoch": 0.69, "grad_norm": 0.34255789800130965, "learning_rate": 2.362676114576333e-07, "loss": 0.1236, "step": 10766 }, { "epoch": 0.69, "grad_norm": 0.9428923488254283, "learning_rate": 2.3617987794693357e-07, "loss": 0.0381, "step": 10767 }, { "epoch": 0.69, "grad_norm": 1.0815575565411712, "learning_rate": 2.3609215569133074e-07, "loss": 0.2537, "step": 10768 }, { "epoch": 0.69, "grad_norm": 1.2725050593944605, "learning_rate": 2.3600444469456688e-07, "loss": 0.0969, "step": 10769 }, { "epoch": 0.69, "grad_norm": 4.320496287192785, "learning_rate": 2.3591674496038428e-07, "loss": 0.0124, "step": 10770 }, { "epoch": 0.69, "grad_norm": 0.3960283998846838, "learning_rate": 2.3582905649252404e-07, "loss": 0.3139, "step": 10771 }, { "epoch": 0.69, "grad_norm": 0.9080930515887877, "learning_rate": 2.357413792947275e-07, "loss": 0.3573, "step": 10772 }, { "epoch": 0.69, "grad_norm": 1.022924753631725, "learning_rate": 2.3565371337073493e-07, "loss": 0.2909, "step": 10773 }, { "epoch": 0.69, "grad_norm": 8.192332589148641, "learning_rate": 2.355660587242862e-07, "loss": 0.3369, "step": 10774 }, { "epoch": 0.69, "grad_norm": 1.5991252494796782, "learning_rate": 2.3547841535912112e-07, "loss": 0.3739, "step": 10775 }, { "epoch": 0.69, "grad_norm": 1.3910590544628323, "learning_rate": 2.3539078327897843e-07, "loss": 0.1886, "step": 10776 }, { "epoch": 0.69, "grad_norm": 0.5203742713285756, "learning_rate": 2.35303162487597e-07, "loss": 0.1422, "step": 10777 }, { "epoch": 0.69, "grad_norm": 10.009354655764911, "learning_rate": 2.3521555298871466e-07, "loss": 0.0497, "step": 10778 }, { "epoch": 0.69, "grad_norm": 0.4855409972449557, "learning_rate": 2.3512795478606905e-07, "loss": 0.3071, "step": 10779 }, { "epoch": 0.69, "grad_norm": 0.6035946861246638, "learning_rate": 2.350403678833976e-07, "loss": 0.2212, "step": 10780 }, { "epoch": 0.69, "grad_norm": 0.43697590959260857, "learning_rate": 2.3495279228443643e-07, "loss": 0.0041, "step": 10781 }, { "epoch": 0.69, "grad_norm": 0.761721033718258, "learning_rate": 2.3486522799292202e-07, "loss": 0.3627, "step": 10782 }, { "epoch": 0.69, "grad_norm": 0.8386367542748692, "learning_rate": 2.347776750125901e-07, "loss": 0.2445, "step": 10783 }, { "epoch": 0.69, "grad_norm": 0.670499795764103, "learning_rate": 2.346901333471756e-07, "loss": 0.187, "step": 10784 }, { "epoch": 0.69, "grad_norm": 0.6148808205684445, "learning_rate": 2.346026030004135e-07, "loss": 0.0973, "step": 10785 }, { "epoch": 0.69, "grad_norm": 0.4434147233251777, "learning_rate": 2.3451508397603785e-07, "loss": 0.1887, "step": 10786 }, { "epoch": 0.69, "grad_norm": 1.134159405329597, "learning_rate": 2.3442757627778227e-07, "loss": 0.2923, "step": 10787 }, { "epoch": 0.69, "grad_norm": 0.9278861527308889, "learning_rate": 2.343400799093803e-07, "loss": 0.1494, "step": 10788 }, { "epoch": 0.69, "grad_norm": 0.6902228714397468, "learning_rate": 2.3425259487456439e-07, "loss": 0.1855, "step": 10789 }, { "epoch": 0.69, "grad_norm": 0.7393512624553055, "learning_rate": 2.341651211770672e-07, "loss": 0.0571, "step": 10790 }, { "epoch": 0.69, "grad_norm": 1.2855576620004634, "learning_rate": 2.340776588206202e-07, "loss": 0.3084, "step": 10791 }, { "epoch": 0.69, "grad_norm": 1.208128575753836, "learning_rate": 2.3399020780895495e-07, "loss": 0.2589, "step": 10792 }, { "epoch": 0.69, "grad_norm": 1.068699523543247, "learning_rate": 2.33902768145802e-07, "loss": 0.1964, "step": 10793 }, { "epoch": 0.69, "grad_norm": 1.622485600677907, "learning_rate": 2.338153398348921e-07, "loss": 0.1287, "step": 10794 }, { "epoch": 0.69, "grad_norm": 0.8692939431777988, "learning_rate": 2.3372792287995473e-07, "loss": 0.2133, "step": 10795 }, { "epoch": 0.69, "grad_norm": 1.51608069027204, "learning_rate": 2.336405172847196e-07, "loss": 0.4074, "step": 10796 }, { "epoch": 0.69, "grad_norm": 1.036266952579445, "learning_rate": 2.3355312305291526e-07, "loss": 0.2128, "step": 10797 }, { "epoch": 0.69, "grad_norm": 0.727967646911707, "learning_rate": 2.3346574018827037e-07, "loss": 0.3428, "step": 10798 }, { "epoch": 0.69, "grad_norm": 1.3616547796227862, "learning_rate": 2.3337836869451294e-07, "loss": 0.2615, "step": 10799 }, { "epoch": 0.69, "grad_norm": 1.447780603213033, "learning_rate": 2.332910085753701e-07, "loss": 0.1512, "step": 10800 }, { "epoch": 0.69, "grad_norm": 0.9427087413710671, "learning_rate": 2.3320365983456918e-07, "loss": 0.267, "step": 10801 }, { "epoch": 0.69, "grad_norm": 9.508688698731216, "learning_rate": 2.3311632247583623e-07, "loss": 0.3934, "step": 10802 }, { "epoch": 0.69, "grad_norm": 0.9400835084229531, "learning_rate": 2.330289965028977e-07, "loss": 0.3936, "step": 10803 }, { "epoch": 0.69, "grad_norm": 0.14746324843519315, "learning_rate": 2.3294168191947867e-07, "loss": 0.0015, "step": 10804 }, { "epoch": 0.69, "grad_norm": 1.1419398616777385, "learning_rate": 2.3285437872930452e-07, "loss": 0.1238, "step": 10805 }, { "epoch": 0.69, "grad_norm": 0.7147358235020845, "learning_rate": 2.3276708693609942e-07, "loss": 0.3919, "step": 10806 }, { "epoch": 0.69, "grad_norm": 0.5466781663745297, "learning_rate": 2.3267980654358782e-07, "loss": 0.1859, "step": 10807 }, { "epoch": 0.69, "grad_norm": 2.135015677342683, "learning_rate": 2.325925375554928e-07, "loss": 0.2184, "step": 10808 }, { "epoch": 0.69, "grad_norm": 0.826877852968951, "learning_rate": 2.3250527997553794e-07, "loss": 0.2951, "step": 10809 }, { "epoch": 0.69, "grad_norm": 0.5517360802098473, "learning_rate": 2.324180338074453e-07, "loss": 0.0864, "step": 10810 }, { "epoch": 0.69, "grad_norm": 1.1360274930485523, "learning_rate": 2.3233079905493747e-07, "loss": 0.2908, "step": 10811 }, { "epoch": 0.69, "grad_norm": 0.7841358997135572, "learning_rate": 2.322435757217357e-07, "loss": 0.0843, "step": 10812 }, { "epoch": 0.69, "grad_norm": 0.419906399602515, "learning_rate": 2.3215636381156135e-07, "loss": 0.0172, "step": 10813 }, { "epoch": 0.69, "grad_norm": 1.0568777606934483, "learning_rate": 2.3206916332813481e-07, "loss": 0.2334, "step": 10814 }, { "epoch": 0.69, "grad_norm": 0.2047534685914261, "learning_rate": 2.3198197427517658e-07, "loss": 0.0967, "step": 10815 }, { "epoch": 0.69, "grad_norm": 0.7356720773304193, "learning_rate": 2.3189479665640588e-07, "loss": 0.1231, "step": 10816 }, { "epoch": 0.69, "grad_norm": 0.9034519010183022, "learning_rate": 2.3180763047554213e-07, "loss": 0.1016, "step": 10817 }, { "epoch": 0.69, "grad_norm": 6.186849603714974, "learning_rate": 2.3172047573630416e-07, "loss": 0.1949, "step": 10818 }, { "epoch": 0.69, "grad_norm": 0.9214053839934992, "learning_rate": 2.3163333244240984e-07, "loss": 0.1373, "step": 10819 }, { "epoch": 0.69, "grad_norm": 0.678931933551839, "learning_rate": 2.3154620059757723e-07, "loss": 0.0885, "step": 10820 }, { "epoch": 0.69, "grad_norm": 1.9878662707130919, "learning_rate": 2.3145908020552318e-07, "loss": 0.3005, "step": 10821 }, { "epoch": 0.69, "grad_norm": 0.47761683792383697, "learning_rate": 2.3137197126996482e-07, "loss": 0.2039, "step": 10822 }, { "epoch": 0.69, "grad_norm": 0.3679451424814524, "learning_rate": 2.3128487379461798e-07, "loss": 0.1906, "step": 10823 }, { "epoch": 0.69, "grad_norm": 1.0166061374711701, "learning_rate": 2.3119778778319877e-07, "loss": 0.2979, "step": 10824 }, { "epoch": 0.69, "grad_norm": 1.216325736730003, "learning_rate": 2.311107132394221e-07, "loss": 0.29, "step": 10825 }, { "epoch": 0.69, "grad_norm": 2.482344311760232, "learning_rate": 2.3102365016700315e-07, "loss": 0.1998, "step": 10826 }, { "epoch": 0.69, "grad_norm": 0.8380530062664374, "learning_rate": 2.3093659856965596e-07, "loss": 0.0783, "step": 10827 }, { "epoch": 0.69, "grad_norm": 0.6588364842038741, "learning_rate": 2.3084955845109416e-07, "loss": 0.1647, "step": 10828 }, { "epoch": 0.69, "grad_norm": 2.2935703272833226, "learning_rate": 2.3076252981503154e-07, "loss": 0.0233, "step": 10829 }, { "epoch": 0.69, "grad_norm": 0.47247374926229946, "learning_rate": 2.3067551266518037e-07, "loss": 0.1596, "step": 10830 }, { "epoch": 0.69, "grad_norm": 0.9566299353721107, "learning_rate": 2.305885070052534e-07, "loss": 0.532, "step": 10831 }, { "epoch": 0.69, "grad_norm": 0.8751567444118277, "learning_rate": 2.305015128389622e-07, "loss": 0.1801, "step": 10832 }, { "epoch": 0.69, "grad_norm": 0.5742905626408723, "learning_rate": 2.3041453017001815e-07, "loss": 0.0756, "step": 10833 }, { "epoch": 0.69, "grad_norm": 0.38439957698341465, "learning_rate": 2.3032755900213223e-07, "loss": 0.2279, "step": 10834 }, { "epoch": 0.69, "grad_norm": 0.18531328291539825, "learning_rate": 2.302405993390149e-07, "loss": 0.1327, "step": 10835 }, { "epoch": 0.69, "grad_norm": 12.60385236376719, "learning_rate": 2.3015365118437574e-07, "loss": 0.2272, "step": 10836 }, { "epoch": 0.69, "grad_norm": 0.6486143980048088, "learning_rate": 2.3006671454192444e-07, "loss": 0.2544, "step": 10837 }, { "epoch": 0.69, "grad_norm": 1.6628286454833892, "learning_rate": 2.2997978941536955e-07, "loss": 0.4387, "step": 10838 }, { "epoch": 0.69, "grad_norm": 0.450387559009505, "learning_rate": 2.2989287580841981e-07, "loss": 0.1608, "step": 10839 }, { "epoch": 0.69, "grad_norm": 2.560444908888854, "learning_rate": 2.2980597372478282e-07, "loss": 0.1453, "step": 10840 }, { "epoch": 0.69, "grad_norm": 0.7652646336107973, "learning_rate": 2.2971908316816635e-07, "loss": 0.4461, "step": 10841 }, { "epoch": 0.69, "grad_norm": 9.641306707553857, "learning_rate": 2.2963220414227708e-07, "loss": 0.2179, "step": 10842 }, { "epoch": 0.69, "grad_norm": 1.1646865114753244, "learning_rate": 2.2954533665082132e-07, "loss": 0.1115, "step": 10843 }, { "epoch": 0.69, "grad_norm": 0.5558736048088738, "learning_rate": 2.2945848069750538e-07, "loss": 0.0706, "step": 10844 }, { "epoch": 0.69, "grad_norm": 1.0829731712122816, "learning_rate": 2.2937163628603434e-07, "loss": 0.1866, "step": 10845 }, { "epoch": 0.69, "grad_norm": 0.7158053352819196, "learning_rate": 2.2928480342011347e-07, "loss": 0.4227, "step": 10846 }, { "epoch": 0.69, "grad_norm": 0.5870431288921848, "learning_rate": 2.2919798210344694e-07, "loss": 0.1103, "step": 10847 }, { "epoch": 0.69, "grad_norm": 0.518641319599222, "learning_rate": 2.2911117233973905e-07, "loss": 0.1914, "step": 10848 }, { "epoch": 0.69, "grad_norm": 23.7094736476411, "learning_rate": 2.2902437413269293e-07, "loss": 0.0784, "step": 10849 }, { "epoch": 0.69, "grad_norm": 1.037859043760364, "learning_rate": 2.2893758748601172e-07, "loss": 0.2207, "step": 10850 }, { "epoch": 0.69, "grad_norm": 1.5423624933920133, "learning_rate": 2.2885081240339809e-07, "loss": 0.3081, "step": 10851 }, { "epoch": 0.69, "grad_norm": 0.5829197775705188, "learning_rate": 2.2876404888855372e-07, "loss": 0.1074, "step": 10852 }, { "epoch": 0.69, "grad_norm": 11.54594707465134, "learning_rate": 2.286772969451803e-07, "loss": 0.1657, "step": 10853 }, { "epoch": 0.69, "grad_norm": 0.7077938283153192, "learning_rate": 2.28590556576979e-07, "loss": 0.2317, "step": 10854 }, { "epoch": 0.69, "grad_norm": 0.8975389192393, "learning_rate": 2.2850382778765014e-07, "loss": 0.227, "step": 10855 }, { "epoch": 0.69, "grad_norm": 0.35601280740426633, "learning_rate": 2.2841711058089358e-07, "loss": 0.2147, "step": 10856 }, { "epoch": 0.69, "grad_norm": 0.9712915419698354, "learning_rate": 2.2833040496040922e-07, "loss": 0.1006, "step": 10857 }, { "epoch": 0.69, "grad_norm": 0.9216084415325392, "learning_rate": 2.2824371092989574e-07, "loss": 0.1229, "step": 10858 }, { "epoch": 0.69, "grad_norm": 0.6577588361517682, "learning_rate": 2.2815702849305202e-07, "loss": 0.2169, "step": 10859 }, { "epoch": 0.69, "grad_norm": 0.28307925170456116, "learning_rate": 2.2807035765357573e-07, "loss": 0.105, "step": 10860 }, { "epoch": 0.69, "grad_norm": 4.071701063090803, "learning_rate": 2.2798369841516484e-07, "loss": 0.0923, "step": 10861 }, { "epoch": 0.69, "grad_norm": 1.607538185220634, "learning_rate": 2.27897050781516e-07, "loss": 0.4002, "step": 10862 }, { "epoch": 0.69, "grad_norm": 2.5820780420207257, "learning_rate": 2.2781041475632617e-07, "loss": 0.1472, "step": 10863 }, { "epoch": 0.69, "grad_norm": 0.6536469069476663, "learning_rate": 2.2772379034329103e-07, "loss": 0.1771, "step": 10864 }, { "epoch": 0.69, "grad_norm": 0.8923911434698747, "learning_rate": 2.2763717754610652e-07, "loss": 0.2508, "step": 10865 }, { "epoch": 0.69, "grad_norm": 6.736036544110356, "learning_rate": 2.2755057636846737e-07, "loss": 0.0673, "step": 10866 }, { "epoch": 0.69, "grad_norm": 5.13211128099521, "learning_rate": 2.2746398681406847e-07, "loss": 0.2356, "step": 10867 }, { "epoch": 0.69, "grad_norm": 0.47241730232045226, "learning_rate": 2.2737740888660356e-07, "loss": 0.1102, "step": 10868 }, { "epoch": 0.69, "grad_norm": 1.4254458946121775, "learning_rate": 2.272908425897665e-07, "loss": 0.2835, "step": 10869 }, { "epoch": 0.69, "grad_norm": 0.7895794471402303, "learning_rate": 2.2720428792725044e-07, "loss": 0.2342, "step": 10870 }, { "epoch": 0.69, "grad_norm": 0.6911187701869503, "learning_rate": 2.2711774490274766e-07, "loss": 0.0379, "step": 10871 }, { "epoch": 0.69, "grad_norm": 0.949568196928449, "learning_rate": 2.2703121351995064e-07, "loss": 0.3727, "step": 10872 }, { "epoch": 0.69, "grad_norm": 0.6162927476970472, "learning_rate": 2.269446937825506e-07, "loss": 0.218, "step": 10873 }, { "epoch": 0.69, "grad_norm": 0.7294745669729295, "learning_rate": 2.2685818569423904e-07, "loss": 0.1375, "step": 10874 }, { "epoch": 0.69, "grad_norm": 0.7484061186990685, "learning_rate": 2.2677168925870615e-07, "loss": 0.5, "step": 10875 }, { "epoch": 0.69, "grad_norm": 1.86320809631737, "learning_rate": 2.2668520447964246e-07, "loss": 0.1564, "step": 10876 }, { "epoch": 0.69, "grad_norm": 0.3315311656569516, "learning_rate": 2.2659873136073715e-07, "loss": 0.0126, "step": 10877 }, { "epoch": 0.69, "grad_norm": 0.46869695401621636, "learning_rate": 2.265122699056798e-07, "loss": 0.1943, "step": 10878 }, { "epoch": 0.69, "grad_norm": 0.8318506761180977, "learning_rate": 2.2642582011815858e-07, "loss": 0.312, "step": 10879 }, { "epoch": 0.69, "grad_norm": 0.7972055452495757, "learning_rate": 2.2633938200186203e-07, "loss": 0.1897, "step": 10880 }, { "epoch": 0.69, "grad_norm": 0.8801000207391041, "learning_rate": 2.2625295556047736e-07, "loss": 0.2444, "step": 10881 }, { "epoch": 0.69, "grad_norm": 0.4887721157384821, "learning_rate": 2.261665407976921e-07, "loss": 0.1556, "step": 10882 }, { "epoch": 0.69, "grad_norm": 2.0530104310733335, "learning_rate": 2.260801377171927e-07, "loss": 0.1596, "step": 10883 }, { "epoch": 0.69, "grad_norm": 0.3159872398057218, "learning_rate": 2.2599374632266511e-07, "loss": 0.1374, "step": 10884 }, { "epoch": 0.69, "grad_norm": 1.611180244577775, "learning_rate": 2.259073666177951e-07, "loss": 0.2115, "step": 10885 }, { "epoch": 0.69, "grad_norm": 2.0627240304031664, "learning_rate": 2.2582099860626797e-07, "loss": 0.0233, "step": 10886 }, { "epoch": 0.69, "grad_norm": 0.6653248825304307, "learning_rate": 2.257346422917681e-07, "loss": 0.1105, "step": 10887 }, { "epoch": 0.69, "grad_norm": 0.9950886138225368, "learning_rate": 2.2564829767797965e-07, "loss": 0.1554, "step": 10888 }, { "epoch": 0.69, "grad_norm": 0.46491460431634946, "learning_rate": 2.2556196476858657e-07, "loss": 0.0984, "step": 10889 }, { "epoch": 0.69, "grad_norm": 3.293669330584775, "learning_rate": 2.2547564356727155e-07, "loss": 0.2248, "step": 10890 }, { "epoch": 0.69, "grad_norm": 5.088819784265053, "learning_rate": 2.2538933407771766e-07, "loss": 0.0916, "step": 10891 }, { "epoch": 0.69, "grad_norm": 0.110023347891709, "learning_rate": 2.2530303630360664e-07, "loss": 0.0023, "step": 10892 }, { "epoch": 0.69, "grad_norm": 0.9093858880895302, "learning_rate": 2.2521675024862047e-07, "loss": 0.3635, "step": 10893 }, { "epoch": 0.69, "grad_norm": 0.7695666115831168, "learning_rate": 2.2513047591643992e-07, "loss": 0.13, "step": 10894 }, { "epoch": 0.69, "grad_norm": 1.32660121975726, "learning_rate": 2.2504421331074602e-07, "loss": 0.1307, "step": 10895 }, { "epoch": 0.69, "grad_norm": 0.5935487063161089, "learning_rate": 2.249579624352187e-07, "loss": 0.181, "step": 10896 }, { "epoch": 0.69, "grad_norm": 0.7901801674782892, "learning_rate": 2.2487172329353742e-07, "loss": 0.239, "step": 10897 }, { "epoch": 0.69, "grad_norm": 0.7021555975048261, "learning_rate": 2.2478549588938166e-07, "loss": 0.233, "step": 10898 }, { "epoch": 0.7, "grad_norm": 0.5346683612322068, "learning_rate": 2.2469928022642975e-07, "loss": 0.1757, "step": 10899 }, { "epoch": 0.7, "grad_norm": 0.5942130167894475, "learning_rate": 2.2461307630836019e-07, "loss": 0.4709, "step": 10900 }, { "epoch": 0.7, "grad_norm": 0.7249753989402374, "learning_rate": 2.2452688413885013e-07, "loss": 0.0288, "step": 10901 }, { "epoch": 0.7, "grad_norm": 4.289366193772501, "learning_rate": 2.244407037215772e-07, "loss": 0.1315, "step": 10902 }, { "epoch": 0.7, "grad_norm": 10.954304440433352, "learning_rate": 2.2435453506021756e-07, "loss": 0.1926, "step": 10903 }, { "epoch": 0.7, "grad_norm": 1.2178358002772534, "learning_rate": 2.2426837815844763e-07, "loss": 0.1126, "step": 10904 }, { "epoch": 0.7, "grad_norm": 0.44983159921230526, "learning_rate": 2.2418223301994293e-07, "loss": 0.1848, "step": 10905 }, { "epoch": 0.7, "grad_norm": 1.1048548497985862, "learning_rate": 2.2409609964837883e-07, "loss": 0.1964, "step": 10906 }, { "epoch": 0.7, "grad_norm": 3.967727743154967, "learning_rate": 2.2400997804742961e-07, "loss": 0.1552, "step": 10907 }, { "epoch": 0.7, "grad_norm": 0.13480417299554695, "learning_rate": 2.239238682207697e-07, "loss": 0.0921, "step": 10908 }, { "epoch": 0.7, "grad_norm": 0.4985602003788864, "learning_rate": 2.238377701720725e-07, "loss": 0.1493, "step": 10909 }, { "epoch": 0.7, "grad_norm": 1.815357305600492, "learning_rate": 2.237516839050111e-07, "loss": 0.3343, "step": 10910 }, { "epoch": 0.7, "grad_norm": 0.9960945528187201, "learning_rate": 2.2366560942325828e-07, "loss": 0.2111, "step": 10911 }, { "epoch": 0.7, "grad_norm": 1.4115713506311849, "learning_rate": 2.2357954673048591e-07, "loss": 0.0647, "step": 10912 }, { "epoch": 0.7, "grad_norm": 1.0007216294650185, "learning_rate": 2.2349349583036598e-07, "loss": 0.1253, "step": 10913 }, { "epoch": 0.7, "grad_norm": 0.821385428051082, "learning_rate": 2.2340745672656914e-07, "loss": 0.0354, "step": 10914 }, { "epoch": 0.7, "grad_norm": 0.586873288271951, "learning_rate": 2.2332142942276639e-07, "loss": 0.0837, "step": 10915 }, { "epoch": 0.7, "grad_norm": 0.7014841527889852, "learning_rate": 2.2323541392262745e-07, "loss": 0.196, "step": 10916 }, { "epoch": 0.7, "grad_norm": 3.967562145076491, "learning_rate": 2.2314941022982237e-07, "loss": 0.3864, "step": 10917 }, { "epoch": 0.7, "grad_norm": 0.9130360703446458, "learning_rate": 2.230634183480198e-07, "loss": 0.2439, "step": 10918 }, { "epoch": 0.7, "grad_norm": 0.5969503230851588, "learning_rate": 2.2297743828088867e-07, "loss": 0.0972, "step": 10919 }, { "epoch": 0.7, "grad_norm": 0.496305039123906, "learning_rate": 2.228914700320967e-07, "loss": 0.1178, "step": 10920 }, { "epoch": 0.7, "grad_norm": 1.2448276710422292, "learning_rate": 2.2280551360531168e-07, "loss": 0.0335, "step": 10921 }, { "epoch": 0.7, "grad_norm": 1.0203334600594705, "learning_rate": 2.227195690042009e-07, "loss": 0.195, "step": 10922 }, { "epoch": 0.7, "grad_norm": 0.7703721701369108, "learning_rate": 2.2263363623243054e-07, "loss": 0.2266, "step": 10923 }, { "epoch": 0.7, "grad_norm": 2.049210719399762, "learning_rate": 2.22547715293667e-07, "loss": 0.0976, "step": 10924 }, { "epoch": 0.7, "grad_norm": 2.135960356496401, "learning_rate": 2.2246180619157546e-07, "loss": 0.0766, "step": 10925 }, { "epoch": 0.7, "grad_norm": 0.31557956273237464, "learning_rate": 2.2237590892982138e-07, "loss": 0.2627, "step": 10926 }, { "epoch": 0.7, "grad_norm": 2.608551494496266, "learning_rate": 2.22290023512069e-07, "loss": 0.1915, "step": 10927 }, { "epoch": 0.7, "grad_norm": 0.35726329297766696, "learning_rate": 2.2220414994198262e-07, "loss": 0.1034, "step": 10928 }, { "epoch": 0.7, "grad_norm": 1.886101922517762, "learning_rate": 2.2211828822322544e-07, "loss": 0.0925, "step": 10929 }, { "epoch": 0.7, "grad_norm": 0.1361921865049236, "learning_rate": 2.220324383594609e-07, "loss": 0.104, "step": 10930 }, { "epoch": 0.7, "grad_norm": 0.17799373833339124, "learning_rate": 2.2194660035435115e-07, "loss": 0.021, "step": 10931 }, { "epoch": 0.7, "grad_norm": 14.42703875159493, "learning_rate": 2.2186077421155853e-07, "loss": 0.0421, "step": 10932 }, { "epoch": 0.7, "grad_norm": 0.6544839328708031, "learning_rate": 2.2177495993474426e-07, "loss": 0.0781, "step": 10933 }, { "epoch": 0.7, "grad_norm": 0.5053969140401248, "learning_rate": 2.2168915752756966e-07, "loss": 0.1136, "step": 10934 }, { "epoch": 0.7, "grad_norm": 0.4204335796883389, "learning_rate": 2.2160336699369496e-07, "loss": 0.2632, "step": 10935 }, { "epoch": 0.7, "grad_norm": 1.1065380945059267, "learning_rate": 2.215175883367804e-07, "loss": 0.2669, "step": 10936 }, { "epoch": 0.7, "grad_norm": 1.061545908147558, "learning_rate": 2.2143182156048533e-07, "loss": 0.0885, "step": 10937 }, { "epoch": 0.7, "grad_norm": 0.7378857275320583, "learning_rate": 2.213460666684686e-07, "loss": 0.2206, "step": 10938 }, { "epoch": 0.7, "grad_norm": 1.050980108589889, "learning_rate": 2.2126032366438884e-07, "loss": 0.18, "step": 10939 }, { "epoch": 0.7, "grad_norm": 2.3873979101593528, "learning_rate": 2.2117459255190395e-07, "loss": 0.2091, "step": 10940 }, { "epoch": 0.7, "grad_norm": 0.81192411700409, "learning_rate": 2.2108887333467168e-07, "loss": 0.2052, "step": 10941 }, { "epoch": 0.7, "grad_norm": 1.1025429172170629, "learning_rate": 2.2100316601634856e-07, "loss": 0.2563, "step": 10942 }, { "epoch": 0.7, "grad_norm": 0.6293557518228154, "learning_rate": 2.2091747060059141e-07, "loss": 0.3961, "step": 10943 }, { "epoch": 0.7, "grad_norm": 0.5601243607070677, "learning_rate": 2.2083178709105583e-07, "loss": 0.1867, "step": 10944 }, { "epoch": 0.7, "grad_norm": 4.018311267316071, "learning_rate": 2.2074611549139754e-07, "loss": 0.2061, "step": 10945 }, { "epoch": 0.7, "grad_norm": 0.7922089652905071, "learning_rate": 2.206604558052712e-07, "loss": 0.1672, "step": 10946 }, { "epoch": 0.7, "grad_norm": 1.7070052739808517, "learning_rate": 2.2057480803633154e-07, "loss": 0.2281, "step": 10947 }, { "epoch": 0.7, "grad_norm": 2.7016793010266547, "learning_rate": 2.204891721882321e-07, "loss": 0.2014, "step": 10948 }, { "epoch": 0.7, "grad_norm": 0.8402525364923409, "learning_rate": 2.2040354826462664e-07, "loss": 0.2437, "step": 10949 }, { "epoch": 0.7, "grad_norm": 0.5643340185649347, "learning_rate": 2.2031793626916768e-07, "loss": 0.1824, "step": 10950 }, { "epoch": 0.7, "grad_norm": 0.8265373224511069, "learning_rate": 2.2023233620550797e-07, "loss": 0.1035, "step": 10951 }, { "epoch": 0.7, "grad_norm": 0.348838993143465, "learning_rate": 2.2014674807729923e-07, "loss": 0.0987, "step": 10952 }, { "epoch": 0.7, "grad_norm": 1.7134912311271417, "learning_rate": 2.2006117188819257e-07, "loss": 0.2111, "step": 10953 }, { "epoch": 0.7, "grad_norm": 4.039798570227404, "learning_rate": 2.1997560764183926e-07, "loss": 0.1505, "step": 10954 }, { "epoch": 0.7, "grad_norm": 1.2631960550321324, "learning_rate": 2.1989005534188927e-07, "loss": 0.1331, "step": 10955 }, { "epoch": 0.7, "grad_norm": 0.41869817689472827, "learning_rate": 2.198045149919926e-07, "loss": 0.1506, "step": 10956 }, { "epoch": 0.7, "grad_norm": 1.6820577255079567, "learning_rate": 2.197189865957988e-07, "loss": 0.0211, "step": 10957 }, { "epoch": 0.7, "grad_norm": 0.8595514898710638, "learning_rate": 2.1963347015695627e-07, "loss": 0.1329, "step": 10958 }, { "epoch": 0.7, "grad_norm": 0.4475700125823077, "learning_rate": 2.195479656791135e-07, "loss": 0.2221, "step": 10959 }, { "epoch": 0.7, "grad_norm": 3.7665736691231024, "learning_rate": 2.1946247316591843e-07, "loss": 0.0957, "step": 10960 }, { "epoch": 0.7, "grad_norm": 1.5043782022654189, "learning_rate": 2.193769926210181e-07, "loss": 0.0801, "step": 10961 }, { "epoch": 0.7, "grad_norm": 0.1340649885550248, "learning_rate": 2.1929152404805956e-07, "loss": 0.0045, "step": 10962 }, { "epoch": 0.7, "grad_norm": 0.9196294150621533, "learning_rate": 2.1920606745068864e-07, "loss": 0.2139, "step": 10963 }, { "epoch": 0.7, "grad_norm": 3.5396771173298256, "learning_rate": 2.1912062283255162e-07, "loss": 0.0046, "step": 10964 }, { "epoch": 0.7, "grad_norm": 1.6335613203423425, "learning_rate": 2.1903519019729343e-07, "loss": 0.1939, "step": 10965 }, { "epoch": 0.7, "grad_norm": 0.7146708155614051, "learning_rate": 2.1894976954855865e-07, "loss": 0.3615, "step": 10966 }, { "epoch": 0.7, "grad_norm": 0.39300506891577236, "learning_rate": 2.188643608899919e-07, "loss": 0.2438, "step": 10967 }, { "epoch": 0.7, "grad_norm": 0.23469147985959943, "learning_rate": 2.1877896422523644e-07, "loss": 0.0026, "step": 10968 }, { "epoch": 0.7, "grad_norm": 0.659546256345295, "learning_rate": 2.1869357955793594e-07, "loss": 0.1957, "step": 10969 }, { "epoch": 0.7, "grad_norm": 1.1810606116773537, "learning_rate": 2.186082068917326e-07, "loss": 0.3302, "step": 10970 }, { "epoch": 0.7, "grad_norm": 1.2789898034190668, "learning_rate": 2.18522846230269e-07, "loss": 0.1007, "step": 10971 }, { "epoch": 0.7, "grad_norm": 1.8152696515338478, "learning_rate": 2.1843749757718642e-07, "loss": 0.2352, "step": 10972 }, { "epoch": 0.7, "grad_norm": 0.506016568426306, "learning_rate": 2.1835216093612646e-07, "loss": 0.098, "step": 10973 }, { "epoch": 0.7, "grad_norm": 1.3521813685806305, "learning_rate": 2.182668363107293e-07, "loss": 0.0706, "step": 10974 }, { "epoch": 0.7, "grad_norm": 7.784555173968003, "learning_rate": 2.1818152370463523e-07, "loss": 0.0722, "step": 10975 }, { "epoch": 0.7, "grad_norm": 1.952886471051704, "learning_rate": 2.1809622312148402e-07, "loss": 0.2057, "step": 10976 }, { "epoch": 0.7, "grad_norm": 0.772372475430498, "learning_rate": 2.180109345649145e-07, "loss": 0.1928, "step": 10977 }, { "epoch": 0.7, "grad_norm": 0.3889697536058163, "learning_rate": 2.1792565803856556e-07, "loss": 0.0522, "step": 10978 }, { "epoch": 0.7, "grad_norm": 0.8443163771593503, "learning_rate": 2.178403935460749e-07, "loss": 0.1573, "step": 10979 }, { "epoch": 0.7, "grad_norm": 0.549392684872409, "learning_rate": 2.1775514109108046e-07, "loss": 0.1038, "step": 10980 }, { "epoch": 0.7, "grad_norm": 1.2069482018807078, "learning_rate": 2.176699006772189e-07, "loss": 0.2588, "step": 10981 }, { "epoch": 0.7, "grad_norm": 7.342735477229369, "learning_rate": 2.175846723081271e-07, "loss": 0.3343, "step": 10982 }, { "epoch": 0.7, "grad_norm": 0.6949236444913175, "learning_rate": 2.1749945598744075e-07, "loss": 0.0976, "step": 10983 }, { "epoch": 0.7, "grad_norm": 3.4281699533945873, "learning_rate": 2.1741425171879563e-07, "loss": 0.2621, "step": 10984 }, { "epoch": 0.7, "grad_norm": 1.0020370195970825, "learning_rate": 2.1732905950582648e-07, "loss": 0.1802, "step": 10985 }, { "epoch": 0.7, "grad_norm": 2.0932002911407386, "learning_rate": 2.1724387935216802e-07, "loss": 0.1798, "step": 10986 }, { "epoch": 0.7, "grad_norm": 1.2957926083056028, "learning_rate": 2.171587112614539e-07, "loss": 0.2829, "step": 10987 }, { "epoch": 0.7, "grad_norm": 7.253034759683275, "learning_rate": 2.1707355523731796e-07, "loss": 0.1463, "step": 10988 }, { "epoch": 0.7, "grad_norm": 0.34662369243637214, "learning_rate": 2.1698841128339273e-07, "loss": 0.0779, "step": 10989 }, { "epoch": 0.7, "grad_norm": 0.8748200788607298, "learning_rate": 2.1690327940331093e-07, "loss": 0.163, "step": 10990 }, { "epoch": 0.7, "grad_norm": 0.9269464993348918, "learning_rate": 2.168181596007042e-07, "loss": 0.118, "step": 10991 }, { "epoch": 0.7, "grad_norm": 0.5689704725961378, "learning_rate": 2.167330518792042e-07, "loss": 0.1489, "step": 10992 }, { "epoch": 0.7, "grad_norm": 0.1384217527504339, "learning_rate": 2.1664795624244147e-07, "loss": 0.0393, "step": 10993 }, { "epoch": 0.7, "grad_norm": 1.7500700286501576, "learning_rate": 2.1656287269404656e-07, "loss": 0.1911, "step": 10994 }, { "epoch": 0.7, "grad_norm": 1.1563941203100896, "learning_rate": 2.1647780123764942e-07, "loss": 0.1127, "step": 10995 }, { "epoch": 0.7, "grad_norm": 0.7440228102081966, "learning_rate": 2.1639274187687906e-07, "loss": 0.0976, "step": 10996 }, { "epoch": 0.7, "grad_norm": 0.3423135838816568, "learning_rate": 2.1630769461536463e-07, "loss": 0.0711, "step": 10997 }, { "epoch": 0.7, "grad_norm": 0.6920210219335216, "learning_rate": 2.1622265945673406e-07, "loss": 0.2363, "step": 10998 }, { "epoch": 0.7, "grad_norm": 1.477111108642288, "learning_rate": 2.1613763640461552e-07, "loss": 0.1506, "step": 10999 }, { "epoch": 0.7, "grad_norm": 0.42656076104498575, "learning_rate": 2.1605262546263587e-07, "loss": 0.047, "step": 11000 }, { "epoch": 0.7, "grad_norm": 0.8438581654792345, "learning_rate": 2.1596762663442213e-07, "loss": 0.0957, "step": 11001 }, { "epoch": 0.7, "grad_norm": 7.809254864804685, "learning_rate": 2.158826399236003e-07, "loss": 0.1341, "step": 11002 }, { "epoch": 0.7, "grad_norm": 0.8724682548909874, "learning_rate": 2.1579766533379635e-07, "loss": 0.3811, "step": 11003 }, { "epoch": 0.7, "grad_norm": 0.2784828289476305, "learning_rate": 2.1571270286863514e-07, "loss": 0.2726, "step": 11004 }, { "epoch": 0.7, "grad_norm": 1.325186343899838, "learning_rate": 2.156277525317417e-07, "loss": 0.2104, "step": 11005 }, { "epoch": 0.7, "grad_norm": 0.7809667381097108, "learning_rate": 2.1554281432674e-07, "loss": 0.203, "step": 11006 }, { "epoch": 0.7, "grad_norm": 0.5387177855910273, "learning_rate": 2.1545788825725348e-07, "loss": 0.2158, "step": 11007 }, { "epoch": 0.7, "grad_norm": 1.7752514065628473, "learning_rate": 2.1537297432690566e-07, "loss": 0.0828, "step": 11008 }, { "epoch": 0.7, "grad_norm": 0.6494091713904622, "learning_rate": 2.152880725393187e-07, "loss": 0.1067, "step": 11009 }, { "epoch": 0.7, "grad_norm": 0.8775734501206596, "learning_rate": 2.152031828981149e-07, "loss": 0.4599, "step": 11010 }, { "epoch": 0.7, "grad_norm": 0.4376476542465838, "learning_rate": 2.151183054069159e-07, "loss": 0.1312, "step": 11011 }, { "epoch": 0.7, "grad_norm": 1.1787140873996134, "learning_rate": 2.1503344006934283e-07, "loss": 0.1617, "step": 11012 }, { "epoch": 0.7, "grad_norm": 0.9153597476126069, "learning_rate": 2.1494858688901585e-07, "loss": 0.2475, "step": 11013 }, { "epoch": 0.7, "grad_norm": 0.7634750254238873, "learning_rate": 2.1486374586955535e-07, "loss": 0.1348, "step": 11014 }, { "epoch": 0.7, "grad_norm": 0.2705824767832749, "learning_rate": 2.1477891701458052e-07, "loss": 0.098, "step": 11015 }, { "epoch": 0.7, "grad_norm": 0.5972037289624058, "learning_rate": 2.1469410032771056e-07, "loss": 0.0561, "step": 11016 }, { "epoch": 0.7, "grad_norm": 9.543764948171805, "learning_rate": 2.146092958125637e-07, "loss": 0.1716, "step": 11017 }, { "epoch": 0.7, "grad_norm": 0.20522270832625228, "learning_rate": 2.145245034727582e-07, "loss": 0.0167, "step": 11018 }, { "epoch": 0.7, "grad_norm": 1.2037803633222162, "learning_rate": 2.1443972331191118e-07, "loss": 0.0692, "step": 11019 }, { "epoch": 0.7, "grad_norm": 0.4281920119549205, "learning_rate": 2.1435495533363946e-07, "loss": 0.1621, "step": 11020 }, { "epoch": 0.7, "grad_norm": 2.1511074131109007, "learning_rate": 2.1427019954155979e-07, "loss": 0.3641, "step": 11021 }, { "epoch": 0.7, "grad_norm": 1.4625029465831232, "learning_rate": 2.1418545593928756e-07, "loss": 0.3489, "step": 11022 }, { "epoch": 0.7, "grad_norm": 1.3047263123238109, "learning_rate": 2.1410072453043853e-07, "loss": 0.1995, "step": 11023 }, { "epoch": 0.7, "grad_norm": 1.369664270063948, "learning_rate": 2.1401600531862713e-07, "loss": 0.1859, "step": 11024 }, { "epoch": 0.7, "grad_norm": 4.689149430334241, "learning_rate": 2.1393129830746804e-07, "loss": 0.1718, "step": 11025 }, { "epoch": 0.7, "grad_norm": 1.6200892886489244, "learning_rate": 2.1384660350057465e-07, "loss": 0.1687, "step": 11026 }, { "epoch": 0.7, "grad_norm": 0.706330514924541, "learning_rate": 2.1376192090156043e-07, "loss": 0.1297, "step": 11027 }, { "epoch": 0.7, "grad_norm": 1.0425024830100322, "learning_rate": 2.1367725051403817e-07, "loss": 0.3272, "step": 11028 }, { "epoch": 0.7, "grad_norm": 0.946566279496054, "learning_rate": 2.1359259234161985e-07, "loss": 0.1678, "step": 11029 }, { "epoch": 0.7, "grad_norm": 0.48638936657029846, "learning_rate": 2.135079463879173e-07, "loss": 0.0274, "step": 11030 }, { "epoch": 0.7, "grad_norm": 1.5038371680377733, "learning_rate": 2.134233126565419e-07, "loss": 0.1905, "step": 11031 }, { "epoch": 0.7, "grad_norm": 0.9346990238157602, "learning_rate": 2.1333869115110382e-07, "loss": 0.5061, "step": 11032 }, { "epoch": 0.7, "grad_norm": 3.0362277480262794, "learning_rate": 2.1325408187521364e-07, "loss": 0.167, "step": 11033 }, { "epoch": 0.7, "grad_norm": 4.861370803338594, "learning_rate": 2.1316948483248082e-07, "loss": 0.2061, "step": 11034 }, { "epoch": 0.7, "grad_norm": 0.7567563176852425, "learning_rate": 2.1308490002651413e-07, "loss": 0.2294, "step": 11035 }, { "epoch": 0.7, "grad_norm": 0.30553706264457836, "learning_rate": 2.1300032746092257e-07, "loss": 0.0625, "step": 11036 }, { "epoch": 0.7, "grad_norm": 3.7933743165339, "learning_rate": 2.129157671393138e-07, "loss": 0.1537, "step": 11037 }, { "epoch": 0.7, "grad_norm": 1.335773703985514, "learning_rate": 2.1283121906529571e-07, "loss": 0.3533, "step": 11038 }, { "epoch": 0.7, "grad_norm": 0.4964445480855572, "learning_rate": 2.1274668324247491e-07, "loss": 0.2921, "step": 11039 }, { "epoch": 0.7, "grad_norm": 2.930152862201858, "learning_rate": 2.126621596744582e-07, "loss": 0.1646, "step": 11040 }, { "epoch": 0.7, "grad_norm": 4.248724251053854, "learning_rate": 2.1257764836485127e-07, "loss": 0.2651, "step": 11041 }, { "epoch": 0.7, "grad_norm": 0.3113870893272026, "learning_rate": 2.1249314931725974e-07, "loss": 0.2208, "step": 11042 }, { "epoch": 0.7, "grad_norm": 0.4539614691084134, "learning_rate": 2.1240866253528832e-07, "loss": 0.2331, "step": 11043 }, { "epoch": 0.7, "grad_norm": 0.506758943529494, "learning_rate": 2.1232418802254165e-07, "loss": 0.3359, "step": 11044 }, { "epoch": 0.7, "grad_norm": 0.48030673453778105, "learning_rate": 2.1223972578262322e-07, "loss": 0.0153, "step": 11045 }, { "epoch": 0.7, "grad_norm": 0.8853162385891539, "learning_rate": 2.1215527581913655e-07, "loss": 0.1202, "step": 11046 }, { "epoch": 0.7, "grad_norm": 0.7863355873264025, "learning_rate": 2.1207083813568466e-07, "loss": 0.2481, "step": 11047 }, { "epoch": 0.7, "grad_norm": 1.0899537164999757, "learning_rate": 2.1198641273586947e-07, "loss": 0.2548, "step": 11048 }, { "epoch": 0.7, "grad_norm": 7.11510099776902, "learning_rate": 2.1190199962329302e-07, "loss": 0.026, "step": 11049 }, { "epoch": 0.7, "grad_norm": 0.6448255195451303, "learning_rate": 2.1181759880155625e-07, "loss": 0.2012, "step": 11050 }, { "epoch": 0.7, "grad_norm": 1.3651832298687614, "learning_rate": 2.1173321027426022e-07, "loss": 0.0762, "step": 11051 }, { "epoch": 0.7, "grad_norm": 0.7536512939188647, "learning_rate": 2.1164883404500476e-07, "loss": 0.1388, "step": 11052 }, { "epoch": 0.7, "grad_norm": 0.9228609114338816, "learning_rate": 2.1156447011738986e-07, "loss": 0.064, "step": 11053 }, { "epoch": 0.7, "grad_norm": 1.2032538379935707, "learning_rate": 2.1148011849501436e-07, "loss": 0.1504, "step": 11054 }, { "epoch": 0.7, "grad_norm": 0.7894577062338723, "learning_rate": 2.1139577918147711e-07, "loss": 0.3238, "step": 11055 }, { "epoch": 0.71, "grad_norm": 0.2614407387535882, "learning_rate": 2.1131145218037593e-07, "loss": 0.0887, "step": 11056 }, { "epoch": 0.71, "grad_norm": 1.045079877596098, "learning_rate": 2.1122713749530875e-07, "loss": 0.2197, "step": 11057 }, { "epoch": 0.71, "grad_norm": 0.5737296747467094, "learning_rate": 2.1114283512987218e-07, "loss": 0.223, "step": 11058 }, { "epoch": 0.71, "grad_norm": 3.231003810412058, "learning_rate": 2.1105854508766312e-07, "loss": 0.2532, "step": 11059 }, { "epoch": 0.71, "grad_norm": 16.87974904872534, "learning_rate": 2.1097426737227724e-07, "loss": 0.1487, "step": 11060 }, { "epoch": 0.71, "grad_norm": 0.6033239733857554, "learning_rate": 2.1089000198731027e-07, "loss": 0.0204, "step": 11061 }, { "epoch": 0.71, "grad_norm": 0.7984236334952097, "learning_rate": 2.1080574893635683e-07, "loss": 0.1942, "step": 11062 }, { "epoch": 0.71, "grad_norm": 1.9847381829793511, "learning_rate": 2.1072150822301167e-07, "loss": 0.0893, "step": 11063 }, { "epoch": 0.71, "grad_norm": 0.7396772250488312, "learning_rate": 2.1063727985086827e-07, "loss": 0.1299, "step": 11064 }, { "epoch": 0.71, "grad_norm": 0.5982581324806512, "learning_rate": 2.1055306382352022e-07, "loss": 0.2079, "step": 11065 }, { "epoch": 0.71, "grad_norm": 2.247004817230564, "learning_rate": 2.1046886014456054e-07, "loss": 0.2978, "step": 11066 }, { "epoch": 0.71, "grad_norm": 1.2188244200151201, "learning_rate": 2.1038466881758116e-07, "loss": 0.2978, "step": 11067 }, { "epoch": 0.71, "grad_norm": 1.0301709760929476, "learning_rate": 2.1030048984617416e-07, "loss": 0.2754, "step": 11068 }, { "epoch": 0.71, "grad_norm": 5.343686261228263, "learning_rate": 2.102163232339304e-07, "loss": 0.2055, "step": 11069 }, { "epoch": 0.71, "grad_norm": 1.0485427604293511, "learning_rate": 2.1013216898444109e-07, "loss": 0.3121, "step": 11070 }, { "epoch": 0.71, "grad_norm": 1.1570134171494262, "learning_rate": 2.1004802710129592e-07, "loss": 0.1822, "step": 11071 }, { "epoch": 0.71, "grad_norm": 1.5084985592317486, "learning_rate": 2.0996389758808498e-07, "loss": 0.0521, "step": 11072 }, { "epoch": 0.71, "grad_norm": 0.4328263965227327, "learning_rate": 2.0987978044839705e-07, "loss": 0.0071, "step": 11073 }, { "epoch": 0.71, "grad_norm": 0.42636946733427256, "learning_rate": 2.0979567568582108e-07, "loss": 0.1459, "step": 11074 }, { "epoch": 0.71, "grad_norm": 0.8818847458483785, "learning_rate": 2.0971158330394495e-07, "loss": 0.4688, "step": 11075 }, { "epoch": 0.71, "grad_norm": 0.6775631918795026, "learning_rate": 2.096275033063561e-07, "loss": 0.019, "step": 11076 }, { "epoch": 0.71, "grad_norm": 2.3100042645637124, "learning_rate": 2.0954343569664185e-07, "loss": 0.1081, "step": 11077 }, { "epoch": 0.71, "grad_norm": 1.3202207840361624, "learning_rate": 2.094593804783883e-07, "loss": 0.2358, "step": 11078 }, { "epoch": 0.71, "grad_norm": 2.1424883672675796, "learning_rate": 2.0937533765518185e-07, "loss": 0.0199, "step": 11079 }, { "epoch": 0.71, "grad_norm": 10.759146111867244, "learning_rate": 2.0929130723060752e-07, "loss": 0.257, "step": 11080 }, { "epoch": 0.71, "grad_norm": 2.8304879441262054, "learning_rate": 2.0920728920825043e-07, "loss": 0.1505, "step": 11081 }, { "epoch": 0.71, "grad_norm": 0.29801680293990124, "learning_rate": 2.0912328359169495e-07, "loss": 0.0428, "step": 11082 }, { "epoch": 0.71, "grad_norm": 0.3461982760630799, "learning_rate": 2.090392903845251e-07, "loss": 0.0044, "step": 11083 }, { "epoch": 0.71, "grad_norm": 2.0081329296310395, "learning_rate": 2.0895530959032388e-07, "loss": 0.3769, "step": 11084 }, { "epoch": 0.71, "grad_norm": 0.8026771160791514, "learning_rate": 2.0887134121267437e-07, "loss": 0.2868, "step": 11085 }, { "epoch": 0.71, "grad_norm": 0.45385461846890257, "learning_rate": 2.0878738525515853e-07, "loss": 0.2828, "step": 11086 }, { "epoch": 0.71, "grad_norm": 0.41979306423280627, "learning_rate": 2.087034417213584e-07, "loss": 0.2649, "step": 11087 }, { "epoch": 0.71, "grad_norm": 4.705398689810544, "learning_rate": 2.0861951061485504e-07, "loss": 0.1281, "step": 11088 }, { "epoch": 0.71, "grad_norm": 1.0492582653181994, "learning_rate": 2.0853559193922892e-07, "loss": 0.2019, "step": 11089 }, { "epoch": 0.71, "grad_norm": 2.050281563306293, "learning_rate": 2.084516856980606e-07, "loss": 0.2285, "step": 11090 }, { "epoch": 0.71, "grad_norm": 0.6808190367480089, "learning_rate": 2.083677918949292e-07, "loss": 0.011, "step": 11091 }, { "epoch": 0.71, "grad_norm": 2.999061353887269, "learning_rate": 2.0828391053341427e-07, "loss": 0.1825, "step": 11092 }, { "epoch": 0.71, "grad_norm": 3.2763628861309195, "learning_rate": 2.0820004161709393e-07, "loss": 0.2497, "step": 11093 }, { "epoch": 0.71, "grad_norm": 3.360521841048718, "learning_rate": 2.0811618514954665e-07, "loss": 0.158, "step": 11094 }, { "epoch": 0.71, "grad_norm": 0.9218195814754128, "learning_rate": 2.0803234113434942e-07, "loss": 0.1443, "step": 11095 }, { "epoch": 0.71, "grad_norm": 0.6991316452549218, "learning_rate": 2.0794850957507965e-07, "loss": 0.1132, "step": 11096 }, { "epoch": 0.71, "grad_norm": 0.744254439839101, "learning_rate": 2.0786469047531341e-07, "loss": 0.2021, "step": 11097 }, { "epoch": 0.71, "grad_norm": 0.8041557797444049, "learning_rate": 2.0778088383862686e-07, "loss": 0.3431, "step": 11098 }, { "epoch": 0.71, "grad_norm": 0.6138279733706382, "learning_rate": 2.0769708966859512e-07, "loss": 0.1224, "step": 11099 }, { "epoch": 0.71, "grad_norm": 0.6659031859543398, "learning_rate": 2.0761330796879306e-07, "loss": 0.1869, "step": 11100 }, { "epoch": 0.71, "grad_norm": 0.576919203865755, "learning_rate": 2.075295387427951e-07, "loss": 0.3169, "step": 11101 }, { "epoch": 0.71, "grad_norm": 1.0270864257226362, "learning_rate": 2.0744578199417519e-07, "loss": 0.1338, "step": 11102 }, { "epoch": 0.71, "grad_norm": 0.9645019146817351, "learning_rate": 2.0736203772650622e-07, "loss": 0.2736, "step": 11103 }, { "epoch": 0.71, "grad_norm": 1.1034516439933453, "learning_rate": 2.0727830594336088e-07, "loss": 0.3325, "step": 11104 }, { "epoch": 0.71, "grad_norm": 0.6279409222422017, "learning_rate": 2.0719458664831164e-07, "loss": 0.1725, "step": 11105 }, { "epoch": 0.71, "grad_norm": 1.0000270743273245, "learning_rate": 2.0711087984492976e-07, "loss": 0.3225, "step": 11106 }, { "epoch": 0.71, "grad_norm": 2.489929596811043, "learning_rate": 2.0702718553678672e-07, "loss": 0.1607, "step": 11107 }, { "epoch": 0.71, "grad_norm": 0.5464027832140982, "learning_rate": 2.0694350372745267e-07, "loss": 0.0064, "step": 11108 }, { "epoch": 0.71, "grad_norm": 0.4753609948604213, "learning_rate": 2.068598344204981e-07, "loss": 0.1583, "step": 11109 }, { "epoch": 0.71, "grad_norm": 1.0716566064726825, "learning_rate": 2.0677617761949206e-07, "loss": 0.2261, "step": 11110 }, { "epoch": 0.71, "grad_norm": 0.31560982199835835, "learning_rate": 2.066925333280039e-07, "loss": 0.0317, "step": 11111 }, { "epoch": 0.71, "grad_norm": 1.8557832334185258, "learning_rate": 2.0660890154960175e-07, "loss": 0.2705, "step": 11112 }, { "epoch": 0.71, "grad_norm": 0.16970641697493014, "learning_rate": 2.0652528228785382e-07, "loss": 0.1145, "step": 11113 }, { "epoch": 0.71, "grad_norm": 0.7323255733169622, "learning_rate": 2.0644167554632712e-07, "loss": 0.1083, "step": 11114 }, { "epoch": 0.71, "grad_norm": 0.423845452669845, "learning_rate": 2.0635808132858883e-07, "loss": 0.2142, "step": 11115 }, { "epoch": 0.71, "grad_norm": 0.7703557994381832, "learning_rate": 2.0627449963820494e-07, "loss": 0.0975, "step": 11116 }, { "epoch": 0.71, "grad_norm": 1.397097604885472, "learning_rate": 2.0619093047874136e-07, "loss": 0.3368, "step": 11117 }, { "epoch": 0.71, "grad_norm": 5.648458511106753, "learning_rate": 2.0610737385376348e-07, "loss": 0.0169, "step": 11118 }, { "epoch": 0.71, "grad_norm": 1.4159630617314296, "learning_rate": 2.0602382976683564e-07, "loss": 0.3397, "step": 11119 }, { "epoch": 0.71, "grad_norm": 0.40372083127383873, "learning_rate": 2.0594029822152238e-07, "loss": 0.1969, "step": 11120 }, { "epoch": 0.71, "grad_norm": 1.3412550776805925, "learning_rate": 2.0585677922138694e-07, "loss": 0.3395, "step": 11121 }, { "epoch": 0.71, "grad_norm": 0.8002680589122609, "learning_rate": 2.057732727699928e-07, "loss": 0.3023, "step": 11122 }, { "epoch": 0.71, "grad_norm": 9.79165682852856, "learning_rate": 2.056897788709021e-07, "loss": 0.2777, "step": 11123 }, { "epoch": 0.71, "grad_norm": 0.6588782726645641, "learning_rate": 2.0560629752767727e-07, "loss": 0.2617, "step": 11124 }, { "epoch": 0.71, "grad_norm": 2.9772970872969657, "learning_rate": 2.0552282874387944e-07, "loss": 0.1517, "step": 11125 }, { "epoch": 0.71, "grad_norm": 8.680089317186349, "learning_rate": 2.0543937252306986e-07, "loss": 0.1008, "step": 11126 }, { "epoch": 0.71, "grad_norm": 0.16421178474496412, "learning_rate": 2.0535592886880858e-07, "loss": 0.0016, "step": 11127 }, { "epoch": 0.71, "grad_norm": 1.0169762939722602, "learning_rate": 2.0527249778465595e-07, "loss": 0.3602, "step": 11128 }, { "epoch": 0.71, "grad_norm": 0.4693658530747357, "learning_rate": 2.051890792741708e-07, "loss": 0.1672, "step": 11129 }, { "epoch": 0.71, "grad_norm": 4.377894875031313, "learning_rate": 2.0510567334091233e-07, "loss": 0.3077, "step": 11130 }, { "epoch": 0.71, "grad_norm": 0.9291163569811182, "learning_rate": 2.050222799884387e-07, "loss": 0.1542, "step": 11131 }, { "epoch": 0.71, "grad_norm": 0.8849080450106809, "learning_rate": 2.0493889922030738e-07, "loss": 0.2231, "step": 11132 }, { "epoch": 0.71, "grad_norm": 0.9583189714214655, "learning_rate": 2.048555310400758e-07, "loss": 0.3332, "step": 11133 }, { "epoch": 0.71, "grad_norm": 0.33436524652268007, "learning_rate": 2.0477217545130072e-07, "loss": 0.0046, "step": 11134 }, { "epoch": 0.71, "grad_norm": 0.3954086713344425, "learning_rate": 2.0468883245753794e-07, "loss": 0.0819, "step": 11135 }, { "epoch": 0.71, "grad_norm": 0.39441446712178774, "learning_rate": 2.0460550206234323e-07, "loss": 0.2405, "step": 11136 }, { "epoch": 0.71, "grad_norm": 0.7869768509866532, "learning_rate": 2.0452218426927182e-07, "loss": 0.1657, "step": 11137 }, { "epoch": 0.71, "grad_norm": 0.31599890181855755, "learning_rate": 2.0443887908187778e-07, "loss": 0.1003, "step": 11138 }, { "epoch": 0.71, "grad_norm": 0.9200400092405193, "learning_rate": 2.0435558650371553e-07, "loss": 0.2184, "step": 11139 }, { "epoch": 0.71, "grad_norm": 0.5725922337780283, "learning_rate": 2.042723065383381e-07, "loss": 0.2143, "step": 11140 }, { "epoch": 0.71, "grad_norm": 1.8036918149895718, "learning_rate": 2.0418903918929875e-07, "loss": 0.1671, "step": 11141 }, { "epoch": 0.71, "grad_norm": 1.0754672093221846, "learning_rate": 2.0410578446014943e-07, "loss": 0.1978, "step": 11142 }, { "epoch": 0.71, "grad_norm": 4.009860347668434, "learning_rate": 2.0402254235444237e-07, "loss": 0.263, "step": 11143 }, { "epoch": 0.71, "grad_norm": 2.0077324420036544, "learning_rate": 2.0393931287572863e-07, "loss": 0.4108, "step": 11144 }, { "epoch": 0.71, "grad_norm": 0.7975073861023216, "learning_rate": 2.0385609602755877e-07, "loss": 0.2923, "step": 11145 }, { "epoch": 0.71, "grad_norm": 0.7617043569621263, "learning_rate": 2.0377289181348338e-07, "loss": 0.2321, "step": 11146 }, { "epoch": 0.71, "grad_norm": 2.8149967886397724, "learning_rate": 2.0368970023705174e-07, "loss": 0.1248, "step": 11147 }, { "epoch": 0.71, "grad_norm": 0.3126689947528291, "learning_rate": 2.0360652130181329e-07, "loss": 0.0073, "step": 11148 }, { "epoch": 0.71, "grad_norm": 0.5347447803184486, "learning_rate": 2.0352335501131634e-07, "loss": 0.0089, "step": 11149 }, { "epoch": 0.71, "grad_norm": 1.1174960807730312, "learning_rate": 2.0344020136910915e-07, "loss": 0.1651, "step": 11150 }, { "epoch": 0.71, "grad_norm": 0.3891793644539086, "learning_rate": 2.0335706037873907e-07, "loss": 0.1932, "step": 11151 }, { "epoch": 0.71, "grad_norm": 3.4109825230115582, "learning_rate": 2.0327393204375303e-07, "loss": 0.0103, "step": 11152 }, { "epoch": 0.71, "grad_norm": 0.44008456427840037, "learning_rate": 2.0319081636769775e-07, "loss": 0.0624, "step": 11153 }, { "epoch": 0.71, "grad_norm": 0.6804454130050315, "learning_rate": 2.0310771335411876e-07, "loss": 0.1748, "step": 11154 }, { "epoch": 0.71, "grad_norm": 0.6616182441697905, "learning_rate": 2.0302462300656148e-07, "loss": 0.1779, "step": 11155 }, { "epoch": 0.71, "grad_norm": 1.194886232200801, "learning_rate": 2.02941545328571e-07, "loss": 0.0157, "step": 11156 }, { "epoch": 0.71, "grad_norm": 1.0210193060768187, "learning_rate": 2.0285848032369136e-07, "loss": 0.1991, "step": 11157 }, { "epoch": 0.71, "grad_norm": 0.42220279880617184, "learning_rate": 2.027754279954661e-07, "loss": 0.0531, "step": 11158 }, { "epoch": 0.71, "grad_norm": 1.3413695279247972, "learning_rate": 2.0269238834743873e-07, "loss": 0.0783, "step": 11159 }, { "epoch": 0.71, "grad_norm": 0.1783040252237527, "learning_rate": 2.026093613831516e-07, "loss": 0.0881, "step": 11160 }, { "epoch": 0.71, "grad_norm": 1.352888585774589, "learning_rate": 2.0252634710614708e-07, "loss": 0.1885, "step": 11161 }, { "epoch": 0.71, "grad_norm": 0.7125407256154703, "learning_rate": 2.0244334551996644e-07, "loss": 0.207, "step": 11162 }, { "epoch": 0.71, "grad_norm": 1.0810572743366624, "learning_rate": 2.02360356628151e-07, "loss": 0.1821, "step": 11163 }, { "epoch": 0.71, "grad_norm": 0.9800343033106649, "learning_rate": 2.0227738043424092e-07, "loss": 0.0958, "step": 11164 }, { "epoch": 0.71, "grad_norm": 1.1925215461899459, "learning_rate": 2.0219441694177646e-07, "loss": 0.13, "step": 11165 }, { "epoch": 0.71, "grad_norm": 2.488269609454193, "learning_rate": 2.0211146615429663e-07, "loss": 0.0184, "step": 11166 }, { "epoch": 0.71, "grad_norm": 0.5923949634576661, "learning_rate": 2.0202852807534072e-07, "loss": 0.1575, "step": 11167 }, { "epoch": 0.71, "grad_norm": 1.0325760284176058, "learning_rate": 2.0194560270844656e-07, "loss": 0.2335, "step": 11168 }, { "epoch": 0.71, "grad_norm": 1.616832799167069, "learning_rate": 2.0186269005715238e-07, "loss": 0.0999, "step": 11169 }, { "epoch": 0.71, "grad_norm": 0.24812581154032837, "learning_rate": 2.0177979012499496e-07, "loss": 0.0109, "step": 11170 }, { "epoch": 0.71, "grad_norm": 0.7395334250891737, "learning_rate": 2.0169690291551122e-07, "loss": 0.1651, "step": 11171 }, { "epoch": 0.71, "grad_norm": 4.678953458118523, "learning_rate": 2.0161402843223746e-07, "loss": 0.2679, "step": 11172 }, { "epoch": 0.71, "grad_norm": 1.2235245198901974, "learning_rate": 2.0153116667870884e-07, "loss": 0.1278, "step": 11173 }, { "epoch": 0.71, "grad_norm": 7.413046949698867, "learning_rate": 2.0144831765846086e-07, "loss": 0.0327, "step": 11174 }, { "epoch": 0.71, "grad_norm": 4.374152723337225, "learning_rate": 2.0136548137502762e-07, "loss": 0.0914, "step": 11175 }, { "epoch": 0.71, "grad_norm": 0.5635293048822592, "learning_rate": 2.0128265783194347e-07, "loss": 0.2037, "step": 11176 }, { "epoch": 0.71, "grad_norm": 0.696824857039066, "learning_rate": 2.0119984703274145e-07, "loss": 0.2893, "step": 11177 }, { "epoch": 0.71, "grad_norm": 1.4448810036308517, "learning_rate": 2.0111704898095484e-07, "loss": 0.299, "step": 11178 }, { "epoch": 0.71, "grad_norm": 1.4842321762944772, "learning_rate": 2.010342636801155e-07, "loss": 0.2445, "step": 11179 }, { "epoch": 0.71, "grad_norm": 1.2315378854292929, "learning_rate": 2.009514911337557e-07, "loss": 0.1501, "step": 11180 }, { "epoch": 0.71, "grad_norm": 0.9680214184034847, "learning_rate": 2.0086873134540622e-07, "loss": 0.1119, "step": 11181 }, { "epoch": 0.71, "grad_norm": 0.6481634616597933, "learning_rate": 2.007859843185982e-07, "loss": 0.3295, "step": 11182 }, { "epoch": 0.71, "grad_norm": 0.8337402788030434, "learning_rate": 2.0070325005686146e-07, "loss": 0.4265, "step": 11183 }, { "epoch": 0.71, "grad_norm": 1.1277671168274772, "learning_rate": 2.006205285637258e-07, "loss": 0.1004, "step": 11184 }, { "epoch": 0.71, "grad_norm": 1.1033904171117666, "learning_rate": 2.0053781984272028e-07, "loss": 0.2123, "step": 11185 }, { "epoch": 0.71, "grad_norm": 2.831684019145874, "learning_rate": 2.0045512389737317e-07, "loss": 0.1505, "step": 11186 }, { "epoch": 0.71, "grad_norm": 0.9398543804586794, "learning_rate": 2.0037244073121268e-07, "loss": 0.0956, "step": 11187 }, { "epoch": 0.71, "grad_norm": 0.33626345490035, "learning_rate": 2.0028977034776617e-07, "loss": 0.1765, "step": 11188 }, { "epoch": 0.71, "grad_norm": 0.7330600531312977, "learning_rate": 2.0020711275056068e-07, "loss": 0.2654, "step": 11189 }, { "epoch": 0.71, "grad_norm": 5.071430163760048, "learning_rate": 2.0012446794312232e-07, "loss": 0.1128, "step": 11190 }, { "epoch": 0.71, "grad_norm": 0.4393179279597202, "learning_rate": 2.000418359289771e-07, "loss": 0.0701, "step": 11191 }, { "epoch": 0.71, "grad_norm": 0.7261389888422688, "learning_rate": 1.9995921671165e-07, "loss": 0.2169, "step": 11192 }, { "epoch": 0.71, "grad_norm": 3.6970623023922435, "learning_rate": 1.9987661029466606e-07, "loss": 0.4113, "step": 11193 }, { "epoch": 0.71, "grad_norm": 0.2783452929849227, "learning_rate": 1.9979401668154905e-07, "loss": 0.1111, "step": 11194 }, { "epoch": 0.71, "grad_norm": 0.7138841297634648, "learning_rate": 1.9971143587582296e-07, "loss": 0.1301, "step": 11195 }, { "epoch": 0.71, "grad_norm": 0.7634042944827867, "learning_rate": 1.9962886788101047e-07, "loss": 0.3775, "step": 11196 }, { "epoch": 0.71, "grad_norm": 0.8514063734546307, "learning_rate": 1.9954631270063455e-07, "loss": 0.3355, "step": 11197 }, { "epoch": 0.71, "grad_norm": 0.36701638430003297, "learning_rate": 1.9946377033821682e-07, "loss": 0.1232, "step": 11198 }, { "epoch": 0.71, "grad_norm": 0.8138968763301555, "learning_rate": 1.9938124079727868e-07, "loss": 0.2306, "step": 11199 }, { "epoch": 0.71, "grad_norm": 4.957162666514625, "learning_rate": 1.9929872408134128e-07, "loss": 0.01, "step": 11200 }, { "epoch": 0.71, "grad_norm": 1.663272276955558, "learning_rate": 1.9921622019392465e-07, "loss": 0.3103, "step": 11201 }, { "epoch": 0.71, "grad_norm": 0.6185255139594323, "learning_rate": 1.9913372913854887e-07, "loss": 0.1556, "step": 11202 }, { "epoch": 0.71, "grad_norm": 2.517211480643752, "learning_rate": 1.9905125091873286e-07, "loss": 0.1336, "step": 11203 }, { "epoch": 0.71, "grad_norm": 4.136301256768579, "learning_rate": 1.9896878553799552e-07, "loss": 0.0989, "step": 11204 }, { "epoch": 0.71, "grad_norm": 0.5318233109332234, "learning_rate": 1.9888633299985501e-07, "loss": 0.0444, "step": 11205 }, { "epoch": 0.71, "grad_norm": 0.6815359606972862, "learning_rate": 1.988038933078287e-07, "loss": 0.1588, "step": 11206 }, { "epoch": 0.71, "grad_norm": 1.064440810281863, "learning_rate": 1.9872146646543385e-07, "loss": 0.1855, "step": 11207 }, { "epoch": 0.71, "grad_norm": 0.9657875998630698, "learning_rate": 1.98639052476187e-07, "loss": 0.1648, "step": 11208 }, { "epoch": 0.71, "grad_norm": 0.6574026522270735, "learning_rate": 1.9855665134360384e-07, "loss": 0.2409, "step": 11209 }, { "epoch": 0.71, "grad_norm": 1.4096503991301412, "learning_rate": 1.984742630712001e-07, "loss": 0.1128, "step": 11210 }, { "epoch": 0.71, "grad_norm": 1.5949327327874128, "learning_rate": 1.983918876624902e-07, "loss": 0.1528, "step": 11211 }, { "epoch": 0.72, "grad_norm": 2.2938873804657227, "learning_rate": 1.9830952512098887e-07, "loss": 0.3501, "step": 11212 }, { "epoch": 0.72, "grad_norm": 0.7971925638047858, "learning_rate": 1.9822717545020968e-07, "loss": 0.1957, "step": 11213 }, { "epoch": 0.72, "grad_norm": 0.7505609000325931, "learning_rate": 1.9814483865366565e-07, "loss": 0.452, "step": 11214 }, { "epoch": 0.72, "grad_norm": 1.1554099347542885, "learning_rate": 1.9806251473486985e-07, "loss": 0.4074, "step": 11215 }, { "epoch": 0.72, "grad_norm": 1.612928200475958, "learning_rate": 1.9798020369733387e-07, "loss": 0.2485, "step": 11216 }, { "epoch": 0.72, "grad_norm": 1.1516989032552714, "learning_rate": 1.9789790554456975e-07, "loss": 0.2485, "step": 11217 }, { "epoch": 0.72, "grad_norm": 0.7193849460650937, "learning_rate": 1.9781562028008815e-07, "loss": 0.2538, "step": 11218 }, { "epoch": 0.72, "grad_norm": 2.1335841146218795, "learning_rate": 1.9773334790739977e-07, "loss": 0.3156, "step": 11219 }, { "epoch": 0.72, "grad_norm": 0.9071096834049142, "learning_rate": 1.9765108843001422e-07, "loss": 0.2181, "step": 11220 }, { "epoch": 0.72, "grad_norm": 0.4201123633870905, "learning_rate": 1.9756884185144124e-07, "loss": 0.126, "step": 11221 }, { "epoch": 0.72, "grad_norm": 8.74766202741582, "learning_rate": 1.9748660817518924e-07, "loss": 0.1011, "step": 11222 }, { "epoch": 0.72, "grad_norm": 1.5162123783421584, "learning_rate": 1.9740438740476667e-07, "loss": 0.0893, "step": 11223 }, { "epoch": 0.72, "grad_norm": 0.8705645587691554, "learning_rate": 1.9732217954368142e-07, "loss": 0.2566, "step": 11224 }, { "epoch": 0.72, "grad_norm": 0.7949028927690419, "learning_rate": 1.9723998459544027e-07, "loss": 0.2387, "step": 11225 }, { "epoch": 0.72, "grad_norm": 7.079413507175972, "learning_rate": 1.9715780256355013e-07, "loss": 0.1022, "step": 11226 }, { "epoch": 0.72, "grad_norm": 1.1397131273361476, "learning_rate": 1.970756334515168e-07, "loss": 0.0882, "step": 11227 }, { "epoch": 0.72, "grad_norm": 0.6633143255294004, "learning_rate": 1.969934772628461e-07, "loss": 0.0782, "step": 11228 }, { "epoch": 0.72, "grad_norm": 0.7519215211373192, "learning_rate": 1.9691133400104254e-07, "loss": 0.145, "step": 11229 }, { "epoch": 0.72, "grad_norm": 0.8070537655189025, "learning_rate": 1.9682920366961098e-07, "loss": 0.2521, "step": 11230 }, { "epoch": 0.72, "grad_norm": 1.1612635234765778, "learning_rate": 1.9674708627205484e-07, "loss": 0.4225, "step": 11231 }, { "epoch": 0.72, "grad_norm": 0.6298576287659362, "learning_rate": 1.9666498181187775e-07, "loss": 0.4885, "step": 11232 }, { "epoch": 0.72, "grad_norm": 0.6332193205907097, "learning_rate": 1.9658289029258218e-07, "loss": 0.1921, "step": 11233 }, { "epoch": 0.72, "grad_norm": 0.9558343194950308, "learning_rate": 1.9650081171767063e-07, "loss": 0.2455, "step": 11234 }, { "epoch": 0.72, "grad_norm": 8.19114937827609, "learning_rate": 1.964187460906444e-07, "loss": 0.1037, "step": 11235 }, { "epoch": 0.72, "grad_norm": 3.9883718376886304, "learning_rate": 1.9633669341500492e-07, "loss": 0.2626, "step": 11236 }, { "epoch": 0.72, "grad_norm": 1.5499604013139279, "learning_rate": 1.962546536942523e-07, "loss": 0.3044, "step": 11237 }, { "epoch": 0.72, "grad_norm": 0.9565536379392627, "learning_rate": 1.9617262693188703e-07, "loss": 0.0567, "step": 11238 }, { "epoch": 0.72, "grad_norm": 0.45903994597422537, "learning_rate": 1.96090613131408e-07, "loss": 0.0997, "step": 11239 }, { "epoch": 0.72, "grad_norm": 0.9446472463424355, "learning_rate": 1.9600861229631456e-07, "loss": 0.2746, "step": 11240 }, { "epoch": 0.72, "grad_norm": 1.0899076842399467, "learning_rate": 1.9592662443010466e-07, "loss": 0.1573, "step": 11241 }, { "epoch": 0.72, "grad_norm": 0.718774540486392, "learning_rate": 1.9584464953627621e-07, "loss": 0.135, "step": 11242 }, { "epoch": 0.72, "grad_norm": 0.6652962312181652, "learning_rate": 1.957626876183266e-07, "loss": 0.1435, "step": 11243 }, { "epoch": 0.72, "grad_norm": 0.600922276086795, "learning_rate": 1.9568073867975217e-07, "loss": 0.2011, "step": 11244 }, { "epoch": 0.72, "grad_norm": 0.7627624830365518, "learning_rate": 1.9559880272404937e-07, "loss": 0.1666, "step": 11245 }, { "epoch": 0.72, "grad_norm": 0.34654085612832575, "learning_rate": 1.9551687975471337e-07, "loss": 0.0986, "step": 11246 }, { "epoch": 0.72, "grad_norm": 0.44899840641322625, "learning_rate": 1.9543496977523954e-07, "loss": 0.2565, "step": 11247 }, { "epoch": 0.72, "grad_norm": 4.913897145425443, "learning_rate": 1.95353072789122e-07, "loss": 0.0238, "step": 11248 }, { "epoch": 0.72, "grad_norm": 0.5162736989873675, "learning_rate": 1.9527118879985498e-07, "loss": 0.0036, "step": 11249 }, { "epoch": 0.72, "grad_norm": 1.4226692172058273, "learning_rate": 1.9518931781093145e-07, "loss": 0.2843, "step": 11250 }, { "epoch": 0.72, "grad_norm": 0.7849040542404795, "learning_rate": 1.9510745982584452e-07, "loss": 0.1482, "step": 11251 }, { "epoch": 0.72, "grad_norm": 0.9902365652357928, "learning_rate": 1.9502561484808612e-07, "loss": 0.0985, "step": 11252 }, { "epoch": 0.72, "grad_norm": 1.4831877618226033, "learning_rate": 1.9494378288114816e-07, "loss": 0.2056, "step": 11253 }, { "epoch": 0.72, "grad_norm": 2.2272332823805097, "learning_rate": 1.948619639285217e-07, "loss": 0.257, "step": 11254 }, { "epoch": 0.72, "grad_norm": 1.3251243194056916, "learning_rate": 1.9478015799369711e-07, "loss": 0.2012, "step": 11255 }, { "epoch": 0.72, "grad_norm": 1.0066410634636844, "learning_rate": 1.9469836508016475e-07, "loss": 0.242, "step": 11256 }, { "epoch": 0.72, "grad_norm": 0.44470279731483114, "learning_rate": 1.9461658519141368e-07, "loss": 0.1918, "step": 11257 }, { "epoch": 0.72, "grad_norm": 0.9107649630220954, "learning_rate": 1.9453481833093298e-07, "loss": 0.2812, "step": 11258 }, { "epoch": 0.72, "grad_norm": 1.8946927979470785, "learning_rate": 1.94453064502211e-07, "loss": 0.1912, "step": 11259 }, { "epoch": 0.72, "grad_norm": 0.46426163624716443, "learning_rate": 1.9437132370873565e-07, "loss": 0.0628, "step": 11260 }, { "epoch": 0.72, "grad_norm": 0.6507929639380663, "learning_rate": 1.9428959595399385e-07, "loss": 0.2525, "step": 11261 }, { "epoch": 0.72, "grad_norm": 0.8699678919806615, "learning_rate": 1.9420788124147264e-07, "loss": 0.1519, "step": 11262 }, { "epoch": 0.72, "grad_norm": 0.8073902475761016, "learning_rate": 1.9412617957465777e-07, "loss": 0.0252, "step": 11263 }, { "epoch": 0.72, "grad_norm": 0.34797149864818405, "learning_rate": 1.9404449095703512e-07, "loss": 0.1325, "step": 11264 }, { "epoch": 0.72, "grad_norm": 0.8124671326892062, "learning_rate": 1.9396281539208937e-07, "loss": 0.2661, "step": 11265 }, { "epoch": 0.72, "grad_norm": 4.250268017497989, "learning_rate": 1.9388115288330526e-07, "loss": 0.0244, "step": 11266 }, { "epoch": 0.72, "grad_norm": 0.6363677745593981, "learning_rate": 1.9379950343416656e-07, "loss": 0.1499, "step": 11267 }, { "epoch": 0.72, "grad_norm": 1.158412443101855, "learning_rate": 1.9371786704815645e-07, "loss": 0.3666, "step": 11268 }, { "epoch": 0.72, "grad_norm": 1.3719723092275262, "learning_rate": 1.93636243728758e-07, "loss": 0.2845, "step": 11269 }, { "epoch": 0.72, "grad_norm": 1.8101719245667807, "learning_rate": 1.9355463347945305e-07, "loss": 0.2269, "step": 11270 }, { "epoch": 0.72, "grad_norm": 1.3636657007926116, "learning_rate": 1.934730363037237e-07, "loss": 0.2541, "step": 11271 }, { "epoch": 0.72, "grad_norm": 0.5334193361930671, "learning_rate": 1.933914522050506e-07, "loss": 0.1261, "step": 11272 }, { "epoch": 0.72, "grad_norm": 1.9624972864915775, "learning_rate": 1.933098811869147e-07, "loss": 0.2005, "step": 11273 }, { "epoch": 0.72, "grad_norm": 0.4614518387255773, "learning_rate": 1.9322832325279558e-07, "loss": 0.1322, "step": 11274 }, { "epoch": 0.72, "grad_norm": 0.35987495361670174, "learning_rate": 1.931467784061731e-07, "loss": 0.0917, "step": 11275 }, { "epoch": 0.72, "grad_norm": 0.3751024634717288, "learning_rate": 1.930652466505257e-07, "loss": 0.2347, "step": 11276 }, { "epoch": 0.72, "grad_norm": 13.791205175829184, "learning_rate": 1.9298372798933193e-07, "loss": 0.1651, "step": 11277 }, { "epoch": 0.72, "grad_norm": 0.5227542350670976, "learning_rate": 1.9290222242606946e-07, "loss": 0.0922, "step": 11278 }, { "epoch": 0.72, "grad_norm": 0.39238810146444875, "learning_rate": 1.9282072996421577e-07, "loss": 0.1329, "step": 11279 }, { "epoch": 0.72, "grad_norm": 1.8366118709173087, "learning_rate": 1.92739250607247e-07, "loss": 0.1964, "step": 11280 }, { "epoch": 0.72, "grad_norm": 0.8545381031191396, "learning_rate": 1.9265778435863967e-07, "loss": 0.2031, "step": 11281 }, { "epoch": 0.72, "grad_norm": 0.5449701366996338, "learning_rate": 1.925763312218691e-07, "loss": 0.1661, "step": 11282 }, { "epoch": 0.72, "grad_norm": 0.3373603343141682, "learning_rate": 1.9249489120041007e-07, "loss": 0.4064, "step": 11283 }, { "epoch": 0.72, "grad_norm": 1.7810633429509313, "learning_rate": 1.924134642977373e-07, "loss": 0.2826, "step": 11284 }, { "epoch": 0.72, "grad_norm": 0.5539469380350881, "learning_rate": 1.9233205051732431e-07, "loss": 0.3932, "step": 11285 }, { "epoch": 0.72, "grad_norm": 1.0544716698876138, "learning_rate": 1.9225064986264473e-07, "loss": 0.2043, "step": 11286 }, { "epoch": 0.72, "grad_norm": 1.4614833600306727, "learning_rate": 1.9216926233717084e-07, "loss": 0.0585, "step": 11287 }, { "epoch": 0.72, "grad_norm": 1.2431582573504985, "learning_rate": 1.920878879443753e-07, "loss": 0.3204, "step": 11288 }, { "epoch": 0.72, "grad_norm": 0.7717165785479521, "learning_rate": 1.9200652668772922e-07, "loss": 0.4098, "step": 11289 }, { "epoch": 0.72, "grad_norm": 1.27975978791258, "learning_rate": 1.9192517857070402e-07, "loss": 0.3203, "step": 11290 }, { "epoch": 0.72, "grad_norm": 0.5398482857016399, "learning_rate": 1.9184384359676986e-07, "loss": 0.1996, "step": 11291 }, { "epoch": 0.72, "grad_norm": 0.8720094451731067, "learning_rate": 1.9176252176939696e-07, "loss": 0.3041, "step": 11292 }, { "epoch": 0.72, "grad_norm": 1.40526003036035, "learning_rate": 1.916812130920543e-07, "loss": 0.2855, "step": 11293 }, { "epoch": 0.72, "grad_norm": 0.7492185619361499, "learning_rate": 1.9159991756821097e-07, "loss": 0.1638, "step": 11294 }, { "epoch": 0.72, "grad_norm": 3.8863006868943675, "learning_rate": 1.9151863520133527e-07, "loss": 0.0419, "step": 11295 }, { "epoch": 0.72, "grad_norm": 1.2509224150024811, "learning_rate": 1.914373659948945e-07, "loss": 0.0916, "step": 11296 }, { "epoch": 0.72, "grad_norm": 1.734187662025367, "learning_rate": 1.9135610995235618e-07, "loss": 0.2635, "step": 11297 }, { "epoch": 0.72, "grad_norm": 0.18010752757270784, "learning_rate": 1.9127486707718648e-07, "loss": 0.0547, "step": 11298 }, { "epoch": 0.72, "grad_norm": 0.3652400883110703, "learning_rate": 1.9119363737285177e-07, "loss": 0.0909, "step": 11299 }, { "epoch": 0.72, "grad_norm": 0.8036609510321918, "learning_rate": 1.911124208428171e-07, "loss": 0.1154, "step": 11300 }, { "epoch": 0.72, "grad_norm": 0.9157834260756635, "learning_rate": 1.9103121749054767e-07, "loss": 0.1498, "step": 11301 }, { "epoch": 0.72, "grad_norm": 0.44321398656095967, "learning_rate": 1.9095002731950738e-07, "loss": 0.1158, "step": 11302 }, { "epoch": 0.72, "grad_norm": 1.6170540323583107, "learning_rate": 1.9086885033316042e-07, "loss": 0.2886, "step": 11303 }, { "epoch": 0.72, "grad_norm": 1.156481405504817, "learning_rate": 1.9078768653496957e-07, "loss": 0.3127, "step": 11304 }, { "epoch": 0.72, "grad_norm": 1.371487411194727, "learning_rate": 1.9070653592839774e-07, "loss": 0.0578, "step": 11305 }, { "epoch": 0.72, "grad_norm": 1.4820827101920844, "learning_rate": 1.906253985169067e-07, "loss": 0.2871, "step": 11306 }, { "epoch": 0.72, "grad_norm": 8.743525703499758, "learning_rate": 1.9054427430395825e-07, "loss": 0.2672, "step": 11307 }, { "epoch": 0.72, "grad_norm": 1.3482562172310517, "learning_rate": 1.904631632930131e-07, "loss": 0.0837, "step": 11308 }, { "epoch": 0.72, "grad_norm": 0.7090816574899531, "learning_rate": 1.9038206548753156e-07, "loss": 0.3691, "step": 11309 }, { "epoch": 0.72, "grad_norm": 0.6656375800217357, "learning_rate": 1.9030098089097345e-07, "loss": 0.1905, "step": 11310 }, { "epoch": 0.72, "grad_norm": 0.9876900600288355, "learning_rate": 1.9021990950679821e-07, "loss": 0.0927, "step": 11311 }, { "epoch": 0.72, "grad_norm": 0.6965926457653281, "learning_rate": 1.901388513384643e-07, "loss": 0.1765, "step": 11312 }, { "epoch": 0.72, "grad_norm": 0.423903362526783, "learning_rate": 1.900578063894298e-07, "loss": 0.0063, "step": 11313 }, { "epoch": 0.72, "grad_norm": 1.227600379231426, "learning_rate": 1.8997677466315253e-07, "loss": 0.2589, "step": 11314 }, { "epoch": 0.72, "grad_norm": 1.3112065002380018, "learning_rate": 1.8989575616308916e-07, "loss": 0.1935, "step": 11315 }, { "epoch": 0.72, "grad_norm": 1.5071865985285988, "learning_rate": 1.898147508926964e-07, "loss": 0.5045, "step": 11316 }, { "epoch": 0.72, "grad_norm": 0.6361604339442415, "learning_rate": 1.8973375885542963e-07, "loss": 0.2456, "step": 11317 }, { "epoch": 0.72, "grad_norm": 0.49806507043030224, "learning_rate": 1.8965278005474473e-07, "loss": 0.2286, "step": 11318 }, { "epoch": 0.72, "grad_norm": 0.5761747681338725, "learning_rate": 1.8957181449409582e-07, "loss": 0.0065, "step": 11319 }, { "epoch": 0.72, "grad_norm": 0.7775147011624235, "learning_rate": 1.894908621769376e-07, "loss": 0.0907, "step": 11320 }, { "epoch": 0.72, "grad_norm": 0.3591939137242455, "learning_rate": 1.8940992310672315e-07, "loss": 0.2522, "step": 11321 }, { "epoch": 0.72, "grad_norm": 0.975254351357844, "learning_rate": 1.89328997286906e-07, "loss": 0.2037, "step": 11322 }, { "epoch": 0.72, "grad_norm": 1.0850047961635327, "learning_rate": 1.892480847209383e-07, "loss": 0.183, "step": 11323 }, { "epoch": 0.72, "grad_norm": 0.9839293972390819, "learning_rate": 1.8916718541227185e-07, "loss": 0.3225, "step": 11324 }, { "epoch": 0.72, "grad_norm": 0.7497703514236261, "learning_rate": 1.8908629936435827e-07, "loss": 0.1687, "step": 11325 }, { "epoch": 0.72, "grad_norm": 0.2926129797374673, "learning_rate": 1.8900542658064805e-07, "loss": 0.1619, "step": 11326 }, { "epoch": 0.72, "grad_norm": 1.8220806442208703, "learning_rate": 1.8892456706459163e-07, "loss": 0.133, "step": 11327 }, { "epoch": 0.72, "grad_norm": 1.0574883016902499, "learning_rate": 1.8884372081963835e-07, "loss": 0.0633, "step": 11328 }, { "epoch": 0.72, "grad_norm": 0.775531667402893, "learning_rate": 1.8876288784923745e-07, "loss": 0.0403, "step": 11329 }, { "epoch": 0.72, "grad_norm": 3.709690766797677, "learning_rate": 1.8868206815683763e-07, "loss": 0.0914, "step": 11330 }, { "epoch": 0.72, "grad_norm": 0.9142820871214081, "learning_rate": 1.8860126174588636e-07, "loss": 0.2878, "step": 11331 }, { "epoch": 0.72, "grad_norm": 1.953645651570006, "learning_rate": 1.8852046861983134e-07, "loss": 0.0113, "step": 11332 }, { "epoch": 0.72, "grad_norm": 0.5171814683896292, "learning_rate": 1.884396887821194e-07, "loss": 0.1535, "step": 11333 }, { "epoch": 0.72, "grad_norm": 0.9780494805901289, "learning_rate": 1.883589222361965e-07, "loss": 0.3118, "step": 11334 }, { "epoch": 0.72, "grad_norm": 0.4719540877982585, "learning_rate": 1.8827816898550863e-07, "loss": 0.1827, "step": 11335 }, { "epoch": 0.72, "grad_norm": 0.5489758011674478, "learning_rate": 1.8819742903350068e-07, "loss": 0.1459, "step": 11336 }, { "epoch": 0.72, "grad_norm": 1.3944660254622898, "learning_rate": 1.8811670238361703e-07, "loss": 0.1734, "step": 11337 }, { "epoch": 0.72, "grad_norm": 0.577398856012256, "learning_rate": 1.8803598903930205e-07, "loss": 0.0038, "step": 11338 }, { "epoch": 0.72, "grad_norm": 0.821320998314224, "learning_rate": 1.8795528900399872e-07, "loss": 0.2236, "step": 11339 }, { "epoch": 0.72, "grad_norm": 0.7722991064448721, "learning_rate": 1.878746022811502e-07, "loss": 0.1255, "step": 11340 }, { "epoch": 0.72, "grad_norm": 0.36684766591212525, "learning_rate": 1.8779392887419843e-07, "loss": 0.1759, "step": 11341 }, { "epoch": 0.72, "grad_norm": 2.3443206899330606, "learning_rate": 1.8771326878658545e-07, "loss": 0.374, "step": 11342 }, { "epoch": 0.72, "grad_norm": 0.8821809580335769, "learning_rate": 1.8763262202175202e-07, "loss": 0.2174, "step": 11343 }, { "epoch": 0.72, "grad_norm": 18.54331626413916, "learning_rate": 1.8755198858313903e-07, "loss": 0.1073, "step": 11344 }, { "epoch": 0.72, "grad_norm": 1.3406909347627889, "learning_rate": 1.874713684741861e-07, "loss": 0.1718, "step": 11345 }, { "epoch": 0.72, "grad_norm": 1.7917407665217508, "learning_rate": 1.8739076169833308e-07, "loss": 0.1452, "step": 11346 }, { "epoch": 0.72, "grad_norm": 0.7331813446289264, "learning_rate": 1.8731016825901842e-07, "loss": 0.2365, "step": 11347 }, { "epoch": 0.72, "grad_norm": 1.423763078498533, "learning_rate": 1.8722958815968054e-07, "loss": 0.1211, "step": 11348 }, { "epoch": 0.72, "grad_norm": 9.238856469415873, "learning_rate": 1.871490214037572e-07, "loss": 0.0339, "step": 11349 }, { "epoch": 0.72, "grad_norm": 0.38855925532029184, "learning_rate": 1.8706846799468568e-07, "loss": 0.0896, "step": 11350 }, { "epoch": 0.72, "grad_norm": 1.112503105090476, "learning_rate": 1.8698792793590235e-07, "loss": 0.1634, "step": 11351 }, { "epoch": 0.72, "grad_norm": 0.6381406907226377, "learning_rate": 1.8690740123084315e-07, "loss": 0.0893, "step": 11352 }, { "epoch": 0.72, "grad_norm": 0.9092041439344964, "learning_rate": 1.868268878829437e-07, "loss": 0.2248, "step": 11353 }, { "epoch": 0.72, "grad_norm": 5.6039574825094425, "learning_rate": 1.8674638789563869e-07, "loss": 0.1151, "step": 11354 }, { "epoch": 0.72, "grad_norm": 0.7360579684385566, "learning_rate": 1.866659012723626e-07, "loss": 0.2566, "step": 11355 }, { "epoch": 0.72, "grad_norm": 0.8850195778522192, "learning_rate": 1.8658542801654887e-07, "loss": 0.1602, "step": 11356 }, { "epoch": 0.72, "grad_norm": 0.7711100564924428, "learning_rate": 1.8650496813163096e-07, "loss": 0.0459, "step": 11357 }, { "epoch": 0.72, "grad_norm": 4.383151805890166, "learning_rate": 1.864245216210412e-07, "loss": 0.0569, "step": 11358 }, { "epoch": 0.72, "grad_norm": 1.6917029957197063, "learning_rate": 1.8634408848821186e-07, "loss": 0.0785, "step": 11359 }, { "epoch": 0.72, "grad_norm": 0.8159387198461046, "learning_rate": 1.8626366873657413e-07, "loss": 0.1036, "step": 11360 }, { "epoch": 0.72, "grad_norm": 2.240459356947686, "learning_rate": 1.8618326236955906e-07, "loss": 0.1267, "step": 11361 }, { "epoch": 0.72, "grad_norm": 7.495084438111064, "learning_rate": 1.8610286939059676e-07, "loss": 0.1815, "step": 11362 }, { "epoch": 0.72, "grad_norm": 2.692074811260936, "learning_rate": 1.860224898031172e-07, "loss": 0.1091, "step": 11363 }, { "epoch": 0.72, "grad_norm": 3.4801473968529875, "learning_rate": 1.8594212361054922e-07, "loss": 0.0339, "step": 11364 }, { "epoch": 0.72, "grad_norm": 1.0618883813365898, "learning_rate": 1.8586177081632158e-07, "loss": 0.3165, "step": 11365 }, { "epoch": 0.72, "grad_norm": 21.182772692848125, "learning_rate": 1.8578143142386248e-07, "loss": 0.2846, "step": 11366 }, { "epoch": 0.72, "grad_norm": 1.5496385039487668, "learning_rate": 1.8570110543659907e-07, "loss": 0.4394, "step": 11367 }, { "epoch": 0.72, "grad_norm": 0.8117317447610425, "learning_rate": 1.856207928579584e-07, "loss": 0.1669, "step": 11368 }, { "epoch": 0.73, "grad_norm": 0.6162402209721237, "learning_rate": 1.8554049369136655e-07, "loss": 0.0026, "step": 11369 }, { "epoch": 0.73, "grad_norm": 0.7646794922934054, "learning_rate": 1.8546020794024954e-07, "loss": 0.1577, "step": 11370 }, { "epoch": 0.73, "grad_norm": 0.79595816006261, "learning_rate": 1.853799356080322e-07, "loss": 0.3337, "step": 11371 }, { "epoch": 0.73, "grad_norm": 1.1560967344600477, "learning_rate": 1.8529967669813945e-07, "loss": 0.2162, "step": 11372 }, { "epoch": 0.73, "grad_norm": 1.9609528498824436, "learning_rate": 1.8521943121399497e-07, "loss": 0.3063, "step": 11373 }, { "epoch": 0.73, "grad_norm": 0.7273985933698603, "learning_rate": 1.8513919915902248e-07, "loss": 0.2677, "step": 11374 }, { "epoch": 0.73, "grad_norm": 0.8282398453406957, "learning_rate": 1.8505898053664455e-07, "loss": 0.28, "step": 11375 }, { "epoch": 0.73, "grad_norm": 10.672463712150755, "learning_rate": 1.849787753502838e-07, "loss": 0.1784, "step": 11376 }, { "epoch": 0.73, "grad_norm": 0.5570384879018646, "learning_rate": 1.848985836033617e-07, "loss": 0.1562, "step": 11377 }, { "epoch": 0.73, "grad_norm": 5.666323930178118, "learning_rate": 1.8481840529929938e-07, "loss": 0.223, "step": 11378 }, { "epoch": 0.73, "grad_norm": 1.0651202670771267, "learning_rate": 1.8473824044151758e-07, "loss": 0.0999, "step": 11379 }, { "epoch": 0.73, "grad_norm": 10.857961344166354, "learning_rate": 1.8465808903343606e-07, "loss": 0.1848, "step": 11380 }, { "epoch": 0.73, "grad_norm": 0.3676116604325516, "learning_rate": 1.8457795107847435e-07, "loss": 0.0898, "step": 11381 }, { "epoch": 0.73, "grad_norm": 10.171026555137782, "learning_rate": 1.8449782658005152e-07, "loss": 0.3671, "step": 11382 }, { "epoch": 0.73, "grad_norm": 0.5106940084247752, "learning_rate": 1.8441771554158554e-07, "loss": 0.1456, "step": 11383 }, { "epoch": 0.73, "grad_norm": 0.7994769509285218, "learning_rate": 1.843376179664941e-07, "loss": 0.2311, "step": 11384 }, { "epoch": 0.73, "grad_norm": 1.5945871463968089, "learning_rate": 1.842575338581947e-07, "loss": 0.2475, "step": 11385 }, { "epoch": 0.73, "grad_norm": 1.3367533168219845, "learning_rate": 1.8417746322010342e-07, "loss": 0.2572, "step": 11386 }, { "epoch": 0.73, "grad_norm": 0.212180939838843, "learning_rate": 1.8409740605563662e-07, "loss": 0.0048, "step": 11387 }, { "epoch": 0.73, "grad_norm": 3.529879221363101, "learning_rate": 1.8401736236820931e-07, "loss": 0.0993, "step": 11388 }, { "epoch": 0.73, "grad_norm": 0.5938335393745138, "learning_rate": 1.8393733216123675e-07, "loss": 0.2476, "step": 11389 }, { "epoch": 0.73, "grad_norm": 0.2951656965333097, "learning_rate": 1.8385731543813278e-07, "loss": 0.0676, "step": 11390 }, { "epoch": 0.73, "grad_norm": 1.1143514561504615, "learning_rate": 1.837773122023114e-07, "loss": 0.1534, "step": 11391 }, { "epoch": 0.73, "grad_norm": 0.3308697602123853, "learning_rate": 1.8369732245718562e-07, "loss": 0.1973, "step": 11392 }, { "epoch": 0.73, "grad_norm": 0.354457923485755, "learning_rate": 1.836173462061677e-07, "loss": 0.0941, "step": 11393 }, { "epoch": 0.73, "grad_norm": 0.6274520460016499, "learning_rate": 1.8353738345267e-07, "loss": 0.1106, "step": 11394 }, { "epoch": 0.73, "grad_norm": 0.5221636998211159, "learning_rate": 1.834574342001035e-07, "loss": 0.3306, "step": 11395 }, { "epoch": 0.73, "grad_norm": 7.324097496094751, "learning_rate": 1.8337749845187934e-07, "loss": 0.0962, "step": 11396 }, { "epoch": 0.73, "grad_norm": 0.675354180486095, "learning_rate": 1.8329757621140746e-07, "loss": 0.0131, "step": 11397 }, { "epoch": 0.73, "grad_norm": 1.5524860551788853, "learning_rate": 1.8321766748209778e-07, "loss": 0.1393, "step": 11398 }, { "epoch": 0.73, "grad_norm": 0.5590398221356457, "learning_rate": 1.8313777226735904e-07, "loss": 0.1419, "step": 11399 }, { "epoch": 0.73, "grad_norm": 0.5652153307301052, "learning_rate": 1.8305789057059995e-07, "loss": 0.1761, "step": 11400 }, { "epoch": 0.73, "grad_norm": 1.1758393193850527, "learning_rate": 1.8297802239522847e-07, "loss": 0.133, "step": 11401 }, { "epoch": 0.73, "grad_norm": 0.7437972372207307, "learning_rate": 1.8289816774465178e-07, "loss": 0.1527, "step": 11402 }, { "epoch": 0.73, "grad_norm": 1.3463829145471895, "learning_rate": 1.8281832662227665e-07, "loss": 0.6341, "step": 11403 }, { "epoch": 0.73, "grad_norm": 1.2889171271947935, "learning_rate": 1.8273849903150946e-07, "loss": 0.0184, "step": 11404 }, { "epoch": 0.73, "grad_norm": 1.3106307738070322, "learning_rate": 1.8265868497575576e-07, "loss": 0.0877, "step": 11405 }, { "epoch": 0.73, "grad_norm": 0.5275145363797861, "learning_rate": 1.8257888445842023e-07, "loss": 0.2441, "step": 11406 }, { "epoch": 0.73, "grad_norm": 0.11567962623004249, "learning_rate": 1.824990974829078e-07, "loss": 0.0019, "step": 11407 }, { "epoch": 0.73, "grad_norm": 0.3797639917060166, "learning_rate": 1.82419324052622e-07, "loss": 0.1215, "step": 11408 }, { "epoch": 0.73, "grad_norm": 6.989969077827051, "learning_rate": 1.823395641709664e-07, "loss": 0.3756, "step": 11409 }, { "epoch": 0.73, "grad_norm": 0.8942893852458406, "learning_rate": 1.8225981784134336e-07, "loss": 0.0271, "step": 11410 }, { "epoch": 0.73, "grad_norm": 1.9581676941039796, "learning_rate": 1.8218008506715544e-07, "loss": 0.0194, "step": 11411 }, { "epoch": 0.73, "grad_norm": 0.600710384600168, "learning_rate": 1.8210036585180383e-07, "loss": 0.0841, "step": 11412 }, { "epoch": 0.73, "grad_norm": 2.112113519964818, "learning_rate": 1.8202066019868979e-07, "loss": 0.4596, "step": 11413 }, { "epoch": 0.73, "grad_norm": 1.0098642959598396, "learning_rate": 1.8194096811121346e-07, "loss": 0.1137, "step": 11414 }, { "epoch": 0.73, "grad_norm": 0.250568542399658, "learning_rate": 1.8186128959277497e-07, "loss": 0.0919, "step": 11415 }, { "epoch": 0.73, "grad_norm": 0.9247788958931612, "learning_rate": 1.8178162464677328e-07, "loss": 0.2528, "step": 11416 }, { "epoch": 0.73, "grad_norm": 0.4988442802089735, "learning_rate": 1.817019732766073e-07, "loss": 0.0702, "step": 11417 }, { "epoch": 0.73, "grad_norm": 1.0537970175279456, "learning_rate": 1.8162233548567486e-07, "loss": 0.3904, "step": 11418 }, { "epoch": 0.73, "grad_norm": 1.4204321740646035, "learning_rate": 1.8154271127737357e-07, "loss": 0.0892, "step": 11419 }, { "epoch": 0.73, "grad_norm": 0.7301438334478592, "learning_rate": 1.814631006551006e-07, "loss": 0.1586, "step": 11420 }, { "epoch": 0.73, "grad_norm": 4.607254441125836, "learning_rate": 1.813835036222519e-07, "loss": 0.2037, "step": 11421 }, { "epoch": 0.73, "grad_norm": 1.4555645094974445, "learning_rate": 1.8130392018222362e-07, "loss": 0.1012, "step": 11422 }, { "epoch": 0.73, "grad_norm": 0.5832270929414122, "learning_rate": 1.812243503384106e-07, "loss": 0.2479, "step": 11423 }, { "epoch": 0.73, "grad_norm": 0.9202720334256785, "learning_rate": 1.811447940942078e-07, "loss": 0.2745, "step": 11424 }, { "epoch": 0.73, "grad_norm": 0.8422475057212134, "learning_rate": 1.810652514530089e-07, "loss": 0.1688, "step": 11425 }, { "epoch": 0.73, "grad_norm": 1.267528395624066, "learning_rate": 1.8098572241820764e-07, "loss": 0.1757, "step": 11426 }, { "epoch": 0.73, "grad_norm": 0.42829894387744827, "learning_rate": 1.8090620699319658e-07, "loss": 0.1465, "step": 11427 }, { "epoch": 0.73, "grad_norm": 1.3229782021811989, "learning_rate": 1.8082670518136839e-07, "loss": 0.1382, "step": 11428 }, { "epoch": 0.73, "grad_norm": 13.010708927575749, "learning_rate": 1.807472169861144e-07, "loss": 0.4057, "step": 11429 }, { "epoch": 0.73, "grad_norm": 0.46407104850424413, "learning_rate": 1.806677424108261e-07, "loss": 0.1411, "step": 11430 }, { "epoch": 0.73, "grad_norm": 2.3753019193240372, "learning_rate": 1.8058828145889367e-07, "loss": 0.2436, "step": 11431 }, { "epoch": 0.73, "grad_norm": 2.601650084403245, "learning_rate": 1.8050883413370738e-07, "loss": 0.0615, "step": 11432 }, { "epoch": 0.73, "grad_norm": 0.10839587137874689, "learning_rate": 1.8042940043865655e-07, "loss": 0.0044, "step": 11433 }, { "epoch": 0.73, "grad_norm": 1.94888379640534, "learning_rate": 1.8034998037712967e-07, "loss": 0.2202, "step": 11434 }, { "epoch": 0.73, "grad_norm": 1.0849138426443623, "learning_rate": 1.8027057395251528e-07, "loss": 0.4132, "step": 11435 }, { "epoch": 0.73, "grad_norm": 1.4985725447357996, "learning_rate": 1.8019118116820091e-07, "loss": 0.1153, "step": 11436 }, { "epoch": 0.73, "grad_norm": 1.0127380697798904, "learning_rate": 1.8011180202757382e-07, "loss": 0.0446, "step": 11437 }, { "epoch": 0.73, "grad_norm": 0.21527906885469447, "learning_rate": 1.8003243653402013e-07, "loss": 0.1046, "step": 11438 }, { "epoch": 0.73, "grad_norm": 0.5323627967733878, "learning_rate": 1.7995308469092608e-07, "loss": 0.0247, "step": 11439 }, { "epoch": 0.73, "grad_norm": 1.1324239374290164, "learning_rate": 1.7987374650167664e-07, "loss": 0.1062, "step": 11440 }, { "epoch": 0.73, "grad_norm": 1.2843530420456393, "learning_rate": 1.797944219696569e-07, "loss": 0.0342, "step": 11441 }, { "epoch": 0.73, "grad_norm": 1.1114536875301562, "learning_rate": 1.797151110982506e-07, "loss": 0.2956, "step": 11442 }, { "epoch": 0.73, "grad_norm": 1.5086169328961192, "learning_rate": 1.7963581389084175e-07, "loss": 0.1714, "step": 11443 }, { "epoch": 0.73, "grad_norm": 0.919644543589692, "learning_rate": 1.7955653035081287e-07, "loss": 0.249, "step": 11444 }, { "epoch": 0.73, "grad_norm": 0.42747007523985503, "learning_rate": 1.7947726048154676e-07, "loss": 0.0351, "step": 11445 }, { "epoch": 0.73, "grad_norm": 0.7128928853881998, "learning_rate": 1.793980042864251e-07, "loss": 0.2259, "step": 11446 }, { "epoch": 0.73, "grad_norm": 2.726557080274698, "learning_rate": 1.7931876176882883e-07, "loss": 0.1929, "step": 11447 }, { "epoch": 0.73, "grad_norm": 2.1988796467009433, "learning_rate": 1.79239532932139e-07, "loss": 0.255, "step": 11448 }, { "epoch": 0.73, "grad_norm": 1.0306298807776058, "learning_rate": 1.791603177797354e-07, "loss": 0.3409, "step": 11449 }, { "epoch": 0.73, "grad_norm": 0.8206713277340335, "learning_rate": 1.7908111631499772e-07, "loss": 0.2763, "step": 11450 }, { "epoch": 0.73, "grad_norm": 0.906822347935492, "learning_rate": 1.7900192854130464e-07, "loss": 0.1326, "step": 11451 }, { "epoch": 0.73, "grad_norm": 0.7884778066634541, "learning_rate": 1.789227544620347e-07, "loss": 0.1866, "step": 11452 }, { "epoch": 0.73, "grad_norm": 0.4145810436910572, "learning_rate": 1.7884359408056532e-07, "loss": 0.031, "step": 11453 }, { "epoch": 0.73, "grad_norm": 0.6210063643046614, "learning_rate": 1.7876444740027384e-07, "loss": 0.1273, "step": 11454 }, { "epoch": 0.73, "grad_norm": 0.9570963601069166, "learning_rate": 1.7868531442453677e-07, "loss": 0.23, "step": 11455 }, { "epoch": 0.73, "grad_norm": 0.9909922353503083, "learning_rate": 1.7860619515673032e-07, "loss": 0.3218, "step": 11456 }, { "epoch": 0.73, "grad_norm": 0.436526150111342, "learning_rate": 1.785270896002295e-07, "loss": 0.0065, "step": 11457 }, { "epoch": 0.73, "grad_norm": 1.4145354786040616, "learning_rate": 1.784479977584094e-07, "loss": 0.1989, "step": 11458 }, { "epoch": 0.73, "grad_norm": 1.0745897571282355, "learning_rate": 1.78368919634644e-07, "loss": 0.1913, "step": 11459 }, { "epoch": 0.73, "grad_norm": 2.6166655301826944, "learning_rate": 1.7828985523230722e-07, "loss": 0.3184, "step": 11460 }, { "epoch": 0.73, "grad_norm": 0.5085237659998391, "learning_rate": 1.782108045547719e-07, "loss": 0.0833, "step": 11461 }, { "epoch": 0.73, "grad_norm": 0.3943343845860209, "learning_rate": 1.7813176760541037e-07, "loss": 0.2125, "step": 11462 }, { "epoch": 0.73, "grad_norm": 1.625016401705777, "learning_rate": 1.7805274438759482e-07, "loss": 0.0823, "step": 11463 }, { "epoch": 0.73, "grad_norm": 2.275557332623727, "learning_rate": 1.7797373490469624e-07, "loss": 0.2726, "step": 11464 }, { "epoch": 0.73, "grad_norm": 1.0708090834780841, "learning_rate": 1.7789473916008568e-07, "loss": 0.3473, "step": 11465 }, { "epoch": 0.73, "grad_norm": 1.224580636042428, "learning_rate": 1.7781575715713287e-07, "loss": 0.2007, "step": 11466 }, { "epoch": 0.73, "grad_norm": 0.8447087835760301, "learning_rate": 1.7773678889920774e-07, "loss": 0.0485, "step": 11467 }, { "epoch": 0.73, "grad_norm": 4.765010029835095, "learning_rate": 1.776578343896788e-07, "loss": 0.0837, "step": 11468 }, { "epoch": 0.73, "grad_norm": 0.4329833036547865, "learning_rate": 1.775788936319148e-07, "loss": 0.1843, "step": 11469 }, { "epoch": 0.73, "grad_norm": 0.5821034965963179, "learning_rate": 1.7749996662928317e-07, "loss": 0.1486, "step": 11470 }, { "epoch": 0.73, "grad_norm": 1.621258290149381, "learning_rate": 1.7742105338515135e-07, "loss": 0.3242, "step": 11471 }, { "epoch": 0.73, "grad_norm": 1.129873094277355, "learning_rate": 1.7734215390288598e-07, "loss": 0.2612, "step": 11472 }, { "epoch": 0.73, "grad_norm": 0.49494560339519383, "learning_rate": 1.7726326818585275e-07, "loss": 0.0985, "step": 11473 }, { "epoch": 0.73, "grad_norm": 0.08218488388321017, "learning_rate": 1.771843962374175e-07, "loss": 0.0018, "step": 11474 }, { "epoch": 0.73, "grad_norm": 2.6028904083027404, "learning_rate": 1.7710553806094465e-07, "loss": 0.2327, "step": 11475 }, { "epoch": 0.73, "grad_norm": 12.263165722367507, "learning_rate": 1.770266936597988e-07, "loss": 0.2769, "step": 11476 }, { "epoch": 0.73, "grad_norm": 1.504074391140485, "learning_rate": 1.7694786303734326e-07, "loss": 0.2694, "step": 11477 }, { "epoch": 0.73, "grad_norm": 1.7620545644939543, "learning_rate": 1.7686904619694154e-07, "loss": 0.0712, "step": 11478 }, { "epoch": 0.73, "grad_norm": 0.7622744474898832, "learning_rate": 1.7679024314195567e-07, "loss": 0.2432, "step": 11479 }, { "epoch": 0.73, "grad_norm": 0.9690519051497881, "learning_rate": 1.7671145387574798e-07, "loss": 0.4384, "step": 11480 }, { "epoch": 0.73, "grad_norm": 0.6862667021500114, "learning_rate": 1.7663267840167934e-07, "loss": 0.3638, "step": 11481 }, { "epoch": 0.73, "grad_norm": 0.46122680833365964, "learning_rate": 1.7655391672311088e-07, "loss": 0.1538, "step": 11482 }, { "epoch": 0.73, "grad_norm": 10.401978850357725, "learning_rate": 1.7647516884340235e-07, "loss": 0.2038, "step": 11483 }, { "epoch": 0.73, "grad_norm": 2.1790502517707164, "learning_rate": 1.763964347659137e-07, "loss": 0.267, "step": 11484 }, { "epoch": 0.73, "grad_norm": 0.9168656808320996, "learning_rate": 1.7631771449400346e-07, "loss": 0.2097, "step": 11485 }, { "epoch": 0.73, "grad_norm": 0.5945793788160334, "learning_rate": 1.7623900803103036e-07, "loss": 0.2263, "step": 11486 }, { "epoch": 0.73, "grad_norm": 1.7329442615887327, "learning_rate": 1.7616031538035187e-07, "loss": 0.1219, "step": 11487 }, { "epoch": 0.73, "grad_norm": 0.7730545468778828, "learning_rate": 1.7608163654532548e-07, "loss": 0.3914, "step": 11488 }, { "epoch": 0.73, "grad_norm": 0.5839277464470503, "learning_rate": 1.760029715293075e-07, "loss": 0.0079, "step": 11489 }, { "epoch": 0.73, "grad_norm": 0.5553975994262143, "learning_rate": 1.7592432033565407e-07, "loss": 0.2914, "step": 11490 }, { "epoch": 0.73, "grad_norm": 7.576759306034706, "learning_rate": 1.7584568296772073e-07, "loss": 0.0139, "step": 11491 }, { "epoch": 0.73, "grad_norm": 0.7765750848745309, "learning_rate": 1.7576705942886206e-07, "loss": 0.1938, "step": 11492 }, { "epoch": 0.73, "grad_norm": 0.7595096649079054, "learning_rate": 1.7568844972243257e-07, "loss": 0.0817, "step": 11493 }, { "epoch": 0.73, "grad_norm": 1.6963008825521375, "learning_rate": 1.7560985385178561e-07, "loss": 0.1989, "step": 11494 }, { "epoch": 0.73, "grad_norm": 5.610266027254652, "learning_rate": 1.7553127182027456e-07, "loss": 0.2918, "step": 11495 }, { "epoch": 0.73, "grad_norm": 2.4262986009962106, "learning_rate": 1.7545270363125153e-07, "loss": 0.1154, "step": 11496 }, { "epoch": 0.73, "grad_norm": 1.1687380590174075, "learning_rate": 1.7537414928806876e-07, "loss": 0.299, "step": 11497 }, { "epoch": 0.73, "grad_norm": 0.3245236831780917, "learning_rate": 1.7529560879407718e-07, "loss": 0.1032, "step": 11498 }, { "epoch": 0.73, "grad_norm": 1.3085804959006284, "learning_rate": 1.7521708215262788e-07, "loss": 0.0966, "step": 11499 }, { "epoch": 0.73, "grad_norm": 0.5337805967239032, "learning_rate": 1.751385693670705e-07, "loss": 0.0953, "step": 11500 } ], "logging_steps": 1.0, "max_steps": 15681, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1464249587613696.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }