{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.594202898550725, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 3.75e-06, "loss": 2.586, "step": 1 }, { "epoch": 0.05, "learning_rate": 7.5e-06, "loss": 2.5846, "step": 2 }, { "epoch": 0.07, "learning_rate": 1.125e-05, "loss": 2.5617, "step": 3 }, { "epoch": 0.09, "learning_rate": 1.5e-05, "loss": 2.5385, "step": 4 }, { "epoch": 0.12, "learning_rate": 1.8750000000000002e-05, "loss": 2.5426, "step": 5 }, { "epoch": 0.14, "learning_rate": 2.25e-05, "loss": 2.5664, "step": 6 }, { "epoch": 0.16, "learning_rate": 2.625e-05, "loss": 2.5914, "step": 7 }, { "epoch": 0.19, "learning_rate": 3e-05, "loss": 2.4594, "step": 8 }, { "epoch": 0.21, "learning_rate": 2.9939024390243903e-05, "loss": 2.5858, "step": 9 }, { "epoch": 0.23, "learning_rate": 2.9878048780487805e-05, "loss": 2.5069, "step": 10 }, { "epoch": 0.26, "learning_rate": 2.9817073170731707e-05, "loss": 2.5146, "step": 11 }, { "epoch": 0.28, "learning_rate": 2.975609756097561e-05, "loss": 2.4025, "step": 12 }, { "epoch": 0.3, "learning_rate": 2.9695121951219515e-05, "loss": 2.4719, "step": 13 }, { "epoch": 0.32, "learning_rate": 2.9634146341463413e-05, "loss": 2.4496, "step": 14 }, { "epoch": 0.35, "learning_rate": 2.9573170731707316e-05, "loss": 2.3463, "step": 15 }, { "epoch": 0.37, "learning_rate": 2.951219512195122e-05, "loss": 2.4185, "step": 16 }, { "epoch": 0.39, "learning_rate": 2.9451219512195123e-05, "loss": 2.3562, "step": 17 }, { "epoch": 0.42, "learning_rate": 2.9390243902439022e-05, "loss": 2.3574, "step": 18 }, { "epoch": 0.44, "learning_rate": 2.9329268292682927e-05, "loss": 2.3392, "step": 19 }, { "epoch": 0.46, "learning_rate": 2.926829268292683e-05, "loss": 2.2794, "step": 20 }, { "epoch": 0.49, "learning_rate": 2.9207317073170735e-05, "loss": 2.2295, "step": 21 }, { "epoch": 0.51, "learning_rate": 2.9146341463414634e-05, "loss": 2.2596, "step": 22 }, { "epoch": 0.53, "learning_rate": 2.9085365853658536e-05, "loss": 2.1936, "step": 23 }, { "epoch": 0.56, "learning_rate": 2.902439024390244e-05, "loss": 2.1734, "step": 24 }, { "epoch": 0.58, "learning_rate": 2.8963414634146343e-05, "loss": 2.1466, "step": 25 }, { "epoch": 0.6, "learning_rate": 2.8902439024390242e-05, "loss": 2.119, "step": 26 }, { "epoch": 0.63, "learning_rate": 2.8841463414634148e-05, "loss": 2.0868, "step": 27 }, { "epoch": 0.65, "learning_rate": 2.878048780487805e-05, "loss": 2.1283, "step": 28 }, { "epoch": 0.67, "learning_rate": 2.8719512195121952e-05, "loss": 2.0504, "step": 29 }, { "epoch": 0.7, "learning_rate": 2.8658536585365854e-05, "loss": 1.9765, "step": 30 }, { "epoch": 0.72, "learning_rate": 2.8597560975609756e-05, "loss": 1.9256, "step": 31 }, { "epoch": 0.74, "learning_rate": 2.8536585365853658e-05, "loss": 2.0544, "step": 32 }, { "epoch": 0.77, "learning_rate": 2.8475609756097564e-05, "loss": 2.0002, "step": 33 }, { "epoch": 0.79, "learning_rate": 2.8414634146341462e-05, "loss": 1.9999, "step": 34 }, { "epoch": 0.81, "learning_rate": 2.8353658536585365e-05, "loss": 2.0487, "step": 35 }, { "epoch": 0.83, "learning_rate": 2.829268292682927e-05, "loss": 2.0628, "step": 36 }, { "epoch": 0.86, "learning_rate": 2.8231707317073172e-05, "loss": 2.0396, "step": 37 }, { "epoch": 0.88, "learning_rate": 2.817073170731707e-05, "loss": 1.9948, "step": 38 }, { "epoch": 0.9, "learning_rate": 2.8109756097560976e-05, "loss": 1.9887, "step": 39 }, { "epoch": 0.93, "learning_rate": 2.804878048780488e-05, "loss": 2.0597, "step": 40 }, { "epoch": 0.95, "learning_rate": 2.7987804878048784e-05, "loss": 2.0228, "step": 41 }, { "epoch": 0.97, "learning_rate": 2.7926829268292683e-05, "loss": 2.0505, "step": 42 }, { "epoch": 1.0, "learning_rate": 2.7865853658536585e-05, "loss": 1.9953, "step": 43 }, { "epoch": 1.02, "learning_rate": 2.780487804878049e-05, "loss": 2.061, "step": 44 }, { "epoch": 1.04, "learning_rate": 2.7743902439024393e-05, "loss": 2.01, "step": 45 }, { "epoch": 1.07, "learning_rate": 2.768292682926829e-05, "loss": 1.9348, "step": 46 }, { "epoch": 1.09, "learning_rate": 2.7621951219512197e-05, "loss": 2.0096, "step": 47 }, { "epoch": 1.11, "learning_rate": 2.75609756097561e-05, "loss": 1.9776, "step": 48 }, { "epoch": 1.14, "learning_rate": 2.75e-05, "loss": 1.9766, "step": 49 }, { "epoch": 1.16, "learning_rate": 2.7439024390243903e-05, "loss": 1.963, "step": 50 }, { "epoch": 1.18, "learning_rate": 2.7378048780487805e-05, "loss": 1.939, "step": 51 }, { "epoch": 1.21, "learning_rate": 2.7317073170731707e-05, "loss": 1.9372, "step": 52 }, { "epoch": 1.23, "learning_rate": 2.7256097560975613e-05, "loss": 1.9914, "step": 53 }, { "epoch": 1.25, "learning_rate": 2.719512195121951e-05, "loss": 1.9472, "step": 54 }, { "epoch": 1.28, "learning_rate": 2.7134146341463414e-05, "loss": 1.9727, "step": 55 }, { "epoch": 1.3, "learning_rate": 2.707317073170732e-05, "loss": 1.9598, "step": 56 }, { "epoch": 1.32, "learning_rate": 2.701219512195122e-05, "loss": 1.913, "step": 57 }, { "epoch": 1.34, "learning_rate": 2.695121951219512e-05, "loss": 1.9835, "step": 58 }, { "epoch": 1.37, "learning_rate": 2.6890243902439026e-05, "loss": 1.9365, "step": 59 }, { "epoch": 1.39, "learning_rate": 2.6829268292682928e-05, "loss": 1.9472, "step": 60 }, { "epoch": 1.41, "learning_rate": 2.6768292682926833e-05, "loss": 1.9362, "step": 61 }, { "epoch": 1.44, "learning_rate": 2.6707317073170732e-05, "loss": 1.9834, "step": 62 }, { "epoch": 1.46, "learning_rate": 2.6646341463414634e-05, "loss": 1.9174, "step": 63 }, { "epoch": 1.48, "learning_rate": 2.658536585365854e-05, "loss": 1.9143, "step": 64 }, { "epoch": 1.51, "learning_rate": 2.652439024390244e-05, "loss": 1.9332, "step": 65 }, { "epoch": 1.53, "learning_rate": 2.646341463414634e-05, "loss": 1.9531, "step": 66 }, { "epoch": 1.55, "learning_rate": 2.6402439024390246e-05, "loss": 1.9712, "step": 67 }, { "epoch": 1.58, "learning_rate": 2.6341463414634148e-05, "loss": 1.8767, "step": 68 }, { "epoch": 1.6, "learning_rate": 2.628048780487805e-05, "loss": 1.9734, "step": 69 }, { "epoch": 1.62, "learning_rate": 2.6219512195121952e-05, "loss": 1.9125, "step": 70 }, { "epoch": 1.65, "learning_rate": 2.6158536585365854e-05, "loss": 1.9498, "step": 71 }, { "epoch": 1.67, "learning_rate": 2.6097560975609756e-05, "loss": 1.9364, "step": 72 }, { "epoch": 1.69, "learning_rate": 2.603658536585366e-05, "loss": 1.9647, "step": 73 }, { "epoch": 1.72, "learning_rate": 2.597560975609756e-05, "loss": 1.9455, "step": 74 }, { "epoch": 1.74, "learning_rate": 2.5914634146341463e-05, "loss": 2.0236, "step": 75 }, { "epoch": 1.76, "learning_rate": 2.5853658536585368e-05, "loss": 1.941, "step": 76 }, { "epoch": 1.79, "learning_rate": 2.5792682926829267e-05, "loss": 1.9002, "step": 77 }, { "epoch": 1.81, "learning_rate": 2.573170731707317e-05, "loss": 1.9697, "step": 78 }, { "epoch": 1.83, "learning_rate": 2.5670731707317075e-05, "loss": 1.8974, "step": 79 }, { "epoch": 1.86, "learning_rate": 2.5609756097560977e-05, "loss": 1.9521, "step": 80 }, { "epoch": 1.88, "learning_rate": 2.554878048780488e-05, "loss": 1.9044, "step": 81 }, { "epoch": 1.9, "learning_rate": 2.548780487804878e-05, "loss": 1.9168, "step": 82 }, { "epoch": 1.92, "learning_rate": 2.5426829268292683e-05, "loss": 1.9546, "step": 83 }, { "epoch": 1.95, "learning_rate": 2.536585365853659e-05, "loss": 1.9659, "step": 84 }, { "epoch": 1.97, "learning_rate": 2.5304878048780487e-05, "loss": 1.9973, "step": 85 }, { "epoch": 1.99, "learning_rate": 2.524390243902439e-05, "loss": 1.9115, "step": 86 }, { "epoch": 2.02, "learning_rate": 2.5182926829268295e-05, "loss": 1.9176, "step": 87 }, { "epoch": 2.04, "learning_rate": 2.5121951219512197e-05, "loss": 1.8841, "step": 88 }, { "epoch": 2.06, "learning_rate": 2.5060975609756096e-05, "loss": 1.9046, "step": 89 }, { "epoch": 2.09, "learning_rate": 2.5e-05, "loss": 1.8913, "step": 90 }, { "epoch": 2.11, "learning_rate": 2.4939024390243903e-05, "loss": 1.8652, "step": 91 }, { "epoch": 2.13, "learning_rate": 2.4878048780487805e-05, "loss": 1.9712, "step": 92 }, { "epoch": 2.16, "learning_rate": 2.4817073170731708e-05, "loss": 1.8976, "step": 93 }, { "epoch": 2.18, "learning_rate": 2.475609756097561e-05, "loss": 1.8578, "step": 94 }, { "epoch": 2.2, "learning_rate": 2.4695121951219512e-05, "loss": 1.892, "step": 95 }, { "epoch": 2.23, "learning_rate": 2.4634146341463417e-05, "loss": 1.8431, "step": 96 }, { "epoch": 2.25, "learning_rate": 2.4573170731707316e-05, "loss": 1.9123, "step": 97 }, { "epoch": 2.27, "learning_rate": 2.4512195121951218e-05, "loss": 1.9426, "step": 98 }, { "epoch": 2.3, "learning_rate": 2.4451219512195124e-05, "loss": 1.9891, "step": 99 }, { "epoch": 2.32, "learning_rate": 2.4390243902439026e-05, "loss": 1.878, "step": 100 }, { "epoch": 2.34, "learning_rate": 2.4329268292682928e-05, "loss": 1.9455, "step": 101 }, { "epoch": 2.37, "learning_rate": 2.426829268292683e-05, "loss": 1.8838, "step": 102 }, { "epoch": 2.39, "learning_rate": 2.4207317073170732e-05, "loss": 1.8222, "step": 103 }, { "epoch": 2.41, "learning_rate": 2.4146341463414638e-05, "loss": 1.9379, "step": 104 }, { "epoch": 2.43, "learning_rate": 2.4085365853658536e-05, "loss": 1.8979, "step": 105 }, { "epoch": 2.46, "learning_rate": 2.402439024390244e-05, "loss": 1.9112, "step": 106 }, { "epoch": 2.48, "learning_rate": 2.3963414634146344e-05, "loss": 1.8726, "step": 107 }, { "epoch": 2.5, "learning_rate": 2.3902439024390246e-05, "loss": 1.952, "step": 108 }, { "epoch": 2.53, "learning_rate": 2.3841463414634145e-05, "loss": 1.8966, "step": 109 }, { "epoch": 2.55, "learning_rate": 2.378048780487805e-05, "loss": 1.951, "step": 110 }, { "epoch": 2.57, "learning_rate": 2.3719512195121952e-05, "loss": 1.9121, "step": 111 }, { "epoch": 2.6, "learning_rate": 2.3658536585365854e-05, "loss": 1.8444, "step": 112 }, { "epoch": 2.62, "learning_rate": 2.3597560975609757e-05, "loss": 1.8815, "step": 113 }, { "epoch": 2.64, "learning_rate": 2.353658536585366e-05, "loss": 1.9449, "step": 114 }, { "epoch": 2.67, "learning_rate": 2.347560975609756e-05, "loss": 1.9857, "step": 115 }, { "epoch": 2.69, "learning_rate": 2.3414634146341466e-05, "loss": 1.8547, "step": 116 }, { "epoch": 2.71, "learning_rate": 2.3353658536585365e-05, "loss": 1.9372, "step": 117 }, { "epoch": 2.74, "learning_rate": 2.3292682926829267e-05, "loss": 1.8957, "step": 118 }, { "epoch": 2.76, "learning_rate": 2.3231707317073173e-05, "loss": 1.8939, "step": 119 }, { "epoch": 2.78, "learning_rate": 2.3170731707317075e-05, "loss": 1.9126, "step": 120 }, { "epoch": 2.81, "learning_rate": 2.3109756097560977e-05, "loss": 1.9055, "step": 121 }, { "epoch": 2.83, "learning_rate": 2.304878048780488e-05, "loss": 1.8628, "step": 122 }, { "epoch": 2.85, "learning_rate": 2.298780487804878e-05, "loss": 1.9196, "step": 123 }, { "epoch": 2.88, "learning_rate": 2.2926829268292687e-05, "loss": 1.9766, "step": 124 }, { "epoch": 2.9, "learning_rate": 2.2865853658536585e-05, "loss": 1.9367, "step": 125 }, { "epoch": 2.92, "learning_rate": 2.2804878048780487e-05, "loss": 1.8564, "step": 126 }, { "epoch": 2.94, "learning_rate": 2.2743902439024393e-05, "loss": 1.9548, "step": 127 }, { "epoch": 2.97, "learning_rate": 2.2682926829268295e-05, "loss": 1.8375, "step": 128 }, { "epoch": 2.99, "learning_rate": 2.2621951219512194e-05, "loss": 1.8987, "step": 129 }, { "epoch": 3.01, "learning_rate": 2.25609756097561e-05, "loss": 1.8881, "step": 130 }, { "epoch": 3.04, "learning_rate": 2.25e-05, "loss": 1.8478, "step": 131 }, { "epoch": 3.06, "learning_rate": 2.2439024390243904e-05, "loss": 1.9343, "step": 132 }, { "epoch": 3.08, "learning_rate": 2.2378048780487806e-05, "loss": 1.9425, "step": 133 }, { "epoch": 3.11, "learning_rate": 2.2317073170731708e-05, "loss": 1.8641, "step": 134 }, { "epoch": 3.13, "learning_rate": 2.225609756097561e-05, "loss": 1.8896, "step": 135 }, { "epoch": 3.15, "learning_rate": 2.2195121951219512e-05, "loss": 1.8852, "step": 136 }, { "epoch": 3.18, "learning_rate": 2.2134146341463414e-05, "loss": 1.8599, "step": 137 }, { "epoch": 3.2, "learning_rate": 2.2073170731707316e-05, "loss": 1.916, "step": 138 }, { "epoch": 3.22, "learning_rate": 2.2012195121951222e-05, "loss": 1.9417, "step": 139 }, { "epoch": 3.25, "learning_rate": 2.195121951219512e-05, "loss": 1.916, "step": 140 }, { "epoch": 3.27, "learning_rate": 2.1890243902439023e-05, "loss": 1.7946, "step": 141 }, { "epoch": 3.29, "learning_rate": 2.1829268292682928e-05, "loss": 1.8801, "step": 142 }, { "epoch": 3.32, "learning_rate": 2.176829268292683e-05, "loss": 1.9045, "step": 143 }, { "epoch": 3.34, "learning_rate": 2.1707317073170732e-05, "loss": 1.9059, "step": 144 }, { "epoch": 3.36, "learning_rate": 2.1646341463414634e-05, "loss": 1.8927, "step": 145 }, { "epoch": 3.39, "learning_rate": 2.1585365853658537e-05, "loss": 2.0249, "step": 146 }, { "epoch": 3.41, "learning_rate": 2.1524390243902442e-05, "loss": 1.8718, "step": 147 }, { "epoch": 3.43, "learning_rate": 2.146341463414634e-05, "loss": 1.9046, "step": 148 }, { "epoch": 3.46, "learning_rate": 2.1402439024390243e-05, "loss": 1.9481, "step": 149 }, { "epoch": 3.48, "learning_rate": 2.134146341463415e-05, "loss": 1.8769, "step": 150 }, { "epoch": 3.5, "learning_rate": 2.128048780487805e-05, "loss": 1.753, "step": 151 }, { "epoch": 3.52, "learning_rate": 2.121951219512195e-05, "loss": 1.9065, "step": 152 }, { "epoch": 3.55, "learning_rate": 2.1158536585365855e-05, "loss": 1.947, "step": 153 }, { "epoch": 3.57, "learning_rate": 2.1097560975609757e-05, "loss": 1.9284, "step": 154 }, { "epoch": 3.59, "learning_rate": 2.103658536585366e-05, "loss": 1.8341, "step": 155 }, { "epoch": 3.62, "learning_rate": 2.097560975609756e-05, "loss": 1.8711, "step": 156 }, { "epoch": 3.64, "learning_rate": 2.0914634146341463e-05, "loss": 1.8183, "step": 157 }, { "epoch": 3.66, "learning_rate": 2.0853658536585365e-05, "loss": 1.8935, "step": 158 }, { "epoch": 3.69, "learning_rate": 2.079268292682927e-05, "loss": 1.8397, "step": 159 }, { "epoch": 3.71, "learning_rate": 2.073170731707317e-05, "loss": 1.8481, "step": 160 }, { "epoch": 3.73, "learning_rate": 2.067073170731707e-05, "loss": 1.8134, "step": 161 }, { "epoch": 3.76, "learning_rate": 2.0609756097560977e-05, "loss": 1.9221, "step": 162 }, { "epoch": 3.78, "learning_rate": 2.054878048780488e-05, "loss": 1.8334, "step": 163 }, { "epoch": 3.8, "learning_rate": 2.048780487804878e-05, "loss": 1.899, "step": 164 }, { "epoch": 3.83, "learning_rate": 2.0426829268292683e-05, "loss": 1.8769, "step": 165 }, { "epoch": 3.85, "learning_rate": 2.0365853658536586e-05, "loss": 1.8427, "step": 166 }, { "epoch": 3.87, "learning_rate": 2.030487804878049e-05, "loss": 1.8423, "step": 167 }, { "epoch": 3.9, "learning_rate": 2.024390243902439e-05, "loss": 1.9441, "step": 168 }, { "epoch": 3.92, "learning_rate": 2.0182926829268292e-05, "loss": 1.7726, "step": 169 }, { "epoch": 3.94, "learning_rate": 2.0121951219512197e-05, "loss": 1.8858, "step": 170 }, { "epoch": 3.97, "learning_rate": 2.00609756097561e-05, "loss": 1.8227, "step": 171 }, { "epoch": 3.99, "learning_rate": 1.9999999999999998e-05, "loss": 1.8622, "step": 172 }, { "epoch": 4.01, "learning_rate": 1.9939024390243904e-05, "loss": 1.8685, "step": 173 }, { "epoch": 4.03, "learning_rate": 1.9878048780487806e-05, "loss": 1.92, "step": 174 }, { "epoch": 4.06, "learning_rate": 1.9817073170731708e-05, "loss": 1.925, "step": 175 }, { "epoch": 4.08, "learning_rate": 1.975609756097561e-05, "loss": 1.8486, "step": 176 }, { "epoch": 4.1, "learning_rate": 1.9695121951219512e-05, "loss": 1.8687, "step": 177 }, { "epoch": 4.13, "learning_rate": 1.9634146341463414e-05, "loss": 1.8679, "step": 178 }, { "epoch": 4.15, "learning_rate": 1.957317073170732e-05, "loss": 1.8379, "step": 179 }, { "epoch": 4.17, "learning_rate": 1.951219512195122e-05, "loss": 1.8829, "step": 180 }, { "epoch": 4.2, "learning_rate": 1.945121951219512e-05, "loss": 1.924, "step": 181 }, { "epoch": 4.22, "learning_rate": 1.9390243902439026e-05, "loss": 1.9461, "step": 182 }, { "epoch": 4.24, "learning_rate": 1.9329268292682928e-05, "loss": 1.8702, "step": 183 }, { "epoch": 4.27, "learning_rate": 1.926829268292683e-05, "loss": 1.8518, "step": 184 }, { "epoch": 4.29, "learning_rate": 1.9207317073170733e-05, "loss": 1.9101, "step": 185 }, { "epoch": 4.31, "learning_rate": 1.9146341463414635e-05, "loss": 1.7935, "step": 186 }, { "epoch": 4.34, "learning_rate": 1.908536585365854e-05, "loss": 1.8736, "step": 187 }, { "epoch": 4.36, "learning_rate": 1.902439024390244e-05, "loss": 1.8667, "step": 188 }, { "epoch": 4.38, "learning_rate": 1.896341463414634e-05, "loss": 1.8707, "step": 189 }, { "epoch": 4.41, "learning_rate": 1.8902439024390246e-05, "loss": 1.9036, "step": 190 }, { "epoch": 4.43, "learning_rate": 1.884146341463415e-05, "loss": 1.7947, "step": 191 }, { "epoch": 4.45, "learning_rate": 1.8780487804878047e-05, "loss": 1.8098, "step": 192 }, { "epoch": 4.48, "learning_rate": 1.8719512195121953e-05, "loss": 1.8323, "step": 193 }, { "epoch": 4.5, "learning_rate": 1.8658536585365855e-05, "loss": 1.8219, "step": 194 }, { "epoch": 4.52, "learning_rate": 1.8597560975609754e-05, "loss": 1.8358, "step": 195 }, { "epoch": 4.54, "learning_rate": 1.853658536585366e-05, "loss": 1.873, "step": 196 }, { "epoch": 4.57, "learning_rate": 1.847560975609756e-05, "loss": 1.8871, "step": 197 }, { "epoch": 4.59, "learning_rate": 1.8414634146341463e-05, "loss": 1.8926, "step": 198 }, { "epoch": 4.61, "learning_rate": 1.8353658536585365e-05, "loss": 1.8976, "step": 199 }, { "epoch": 4.64, "learning_rate": 1.8292682926829268e-05, "loss": 1.8112, "step": 200 }, { "epoch": 4.66, "learning_rate": 1.823170731707317e-05, "loss": 1.8891, "step": 201 }, { "epoch": 4.68, "learning_rate": 1.8170731707317075e-05, "loss": 1.8788, "step": 202 }, { "epoch": 4.71, "learning_rate": 1.8109756097560974e-05, "loss": 1.787, "step": 203 }, { "epoch": 4.73, "learning_rate": 1.804878048780488e-05, "loss": 1.8208, "step": 204 }, { "epoch": 4.75, "learning_rate": 1.798780487804878e-05, "loss": 1.9106, "step": 205 }, { "epoch": 4.78, "learning_rate": 1.7926829268292684e-05, "loss": 1.8922, "step": 206 }, { "epoch": 4.8, "learning_rate": 1.7865853658536586e-05, "loss": 1.9014, "step": 207 }, { "epoch": 4.82, "learning_rate": 1.7804878048780488e-05, "loss": 1.8015, "step": 208 }, { "epoch": 4.85, "learning_rate": 1.774390243902439e-05, "loss": 1.8836, "step": 209 }, { "epoch": 4.87, "learning_rate": 1.7682926829268296e-05, "loss": 1.8423, "step": 210 }, { "epoch": 4.89, "learning_rate": 1.7621951219512194e-05, "loss": 1.8525, "step": 211 }, { "epoch": 4.92, "learning_rate": 1.7560975609756096e-05, "loss": 1.8726, "step": 212 }, { "epoch": 4.94, "learning_rate": 1.7500000000000002e-05, "loss": 1.8582, "step": 213 }, { "epoch": 4.96, "learning_rate": 1.7439024390243904e-05, "loss": 1.8846, "step": 214 }, { "epoch": 4.99, "learning_rate": 1.7378048780487803e-05, "loss": 1.8683, "step": 215 }, { "epoch": 5.01, "learning_rate": 1.7317073170731708e-05, "loss": 1.8713, "step": 216 }, { "epoch": 5.03, "learning_rate": 1.725609756097561e-05, "loss": 1.8359, "step": 217 }, { "epoch": 5.06, "learning_rate": 1.7195121951219512e-05, "loss": 1.8415, "step": 218 }, { "epoch": 5.08, "learning_rate": 1.7134146341463415e-05, "loss": 1.9431, "step": 219 }, { "epoch": 5.1, "learning_rate": 1.7073170731707317e-05, "loss": 1.8503, "step": 220 }, { "epoch": 5.12, "learning_rate": 1.701219512195122e-05, "loss": 1.8799, "step": 221 }, { "epoch": 5.15, "learning_rate": 1.6951219512195124e-05, "loss": 1.8415, "step": 222 }, { "epoch": 5.17, "learning_rate": 1.6890243902439023e-05, "loss": 1.8431, "step": 223 }, { "epoch": 5.19, "learning_rate": 1.682926829268293e-05, "loss": 1.8891, "step": 224 }, { "epoch": 5.22, "learning_rate": 1.676829268292683e-05, "loss": 1.8438, "step": 225 }, { "epoch": 5.24, "learning_rate": 1.6707317073170733e-05, "loss": 1.9094, "step": 226 }, { "epoch": 5.26, "learning_rate": 1.6646341463414635e-05, "loss": 1.8045, "step": 227 }, { "epoch": 5.29, "learning_rate": 1.6585365853658537e-05, "loss": 1.9617, "step": 228 }, { "epoch": 5.31, "learning_rate": 1.652439024390244e-05, "loss": 1.8788, "step": 229 }, { "epoch": 5.33, "learning_rate": 1.6463414634146345e-05, "loss": 1.8641, "step": 230 }, { "epoch": 5.36, "learning_rate": 1.6402439024390243e-05, "loss": 1.8191, "step": 231 }, { "epoch": 5.38, "learning_rate": 1.6341463414634145e-05, "loss": 1.8844, "step": 232 }, { "epoch": 5.4, "learning_rate": 1.628048780487805e-05, "loss": 1.8835, "step": 233 }, { "epoch": 5.43, "learning_rate": 1.6219512195121953e-05, "loss": 1.8899, "step": 234 }, { "epoch": 5.45, "learning_rate": 1.6158536585365852e-05, "loss": 1.7983, "step": 235 }, { "epoch": 5.47, "learning_rate": 1.6097560975609757e-05, "loss": 1.8025, "step": 236 }, { "epoch": 5.5, "learning_rate": 1.603658536585366e-05, "loss": 1.8975, "step": 237 }, { "epoch": 5.52, "learning_rate": 1.597560975609756e-05, "loss": 1.817, "step": 238 }, { "epoch": 5.54, "learning_rate": 1.5914634146341464e-05, "loss": 1.768, "step": 239 }, { "epoch": 5.57, "learning_rate": 1.5853658536585366e-05, "loss": 1.8033, "step": 240 }, { "epoch": 5.59, "learning_rate": 1.5792682926829268e-05, "loss": 1.8734, "step": 241 }, { "epoch": 5.61, "learning_rate": 1.5731707317073173e-05, "loss": 1.8962, "step": 242 }, { "epoch": 5.63, "learning_rate": 1.5670731707317072e-05, "loss": 1.8442, "step": 243 }, { "epoch": 5.66, "learning_rate": 1.5609756097560978e-05, "loss": 1.8266, "step": 244 }, { "epoch": 5.68, "learning_rate": 1.554878048780488e-05, "loss": 1.8717, "step": 245 }, { "epoch": 5.7, "learning_rate": 1.5487804878048782e-05, "loss": 1.9014, "step": 246 }, { "epoch": 5.73, "learning_rate": 1.5426829268292684e-05, "loss": 1.8647, "step": 247 }, { "epoch": 5.75, "learning_rate": 1.5365853658536586e-05, "loss": 1.8806, "step": 248 }, { "epoch": 5.77, "learning_rate": 1.5304878048780488e-05, "loss": 1.8166, "step": 249 }, { "epoch": 5.8, "learning_rate": 1.5243902439024392e-05, "loss": 1.8547, "step": 250 }, { "epoch": 5.82, "learning_rate": 1.5182926829268294e-05, "loss": 1.855, "step": 251 }, { "epoch": 5.84, "learning_rate": 1.5121951219512194e-05, "loss": 1.8207, "step": 252 }, { "epoch": 5.87, "learning_rate": 1.5060975609756098e-05, "loss": 1.8739, "step": 253 }, { "epoch": 5.89, "learning_rate": 1.5e-05, "loss": 1.9057, "step": 254 }, { "epoch": 5.91, "learning_rate": 1.4939024390243902e-05, "loss": 1.8652, "step": 255 }, { "epoch": 5.94, "learning_rate": 1.4878048780487805e-05, "loss": 1.8042, "step": 256 }, { "epoch": 5.96, "learning_rate": 1.4817073170731707e-05, "loss": 1.8646, "step": 257 }, { "epoch": 5.98, "learning_rate": 1.475609756097561e-05, "loss": 1.7727, "step": 258 }, { "epoch": 6.01, "learning_rate": 1.4695121951219511e-05, "loss": 1.8401, "step": 259 }, { "epoch": 6.03, "learning_rate": 1.4634146341463415e-05, "loss": 1.8323, "step": 260 }, { "epoch": 6.05, "learning_rate": 1.4573170731707317e-05, "loss": 1.798, "step": 261 }, { "epoch": 6.08, "learning_rate": 1.451219512195122e-05, "loss": 1.8498, "step": 262 }, { "epoch": 6.1, "learning_rate": 1.4451219512195121e-05, "loss": 1.8613, "step": 263 }, { "epoch": 6.12, "learning_rate": 1.4390243902439025e-05, "loss": 1.8581, "step": 264 }, { "epoch": 6.14, "learning_rate": 1.4329268292682927e-05, "loss": 1.8183, "step": 265 }, { "epoch": 6.17, "learning_rate": 1.4268292682926829e-05, "loss": 1.8341, "step": 266 }, { "epoch": 6.19, "learning_rate": 1.4207317073170731e-05, "loss": 1.8379, "step": 267 }, { "epoch": 6.21, "learning_rate": 1.4146341463414635e-05, "loss": 1.8602, "step": 268 }, { "epoch": 6.24, "learning_rate": 1.4085365853658535e-05, "loss": 1.8839, "step": 269 }, { "epoch": 6.26, "learning_rate": 1.402439024390244e-05, "loss": 1.8391, "step": 270 }, { "epoch": 6.28, "learning_rate": 1.3963414634146341e-05, "loss": 1.8329, "step": 271 }, { "epoch": 6.31, "learning_rate": 1.3902439024390245e-05, "loss": 1.8749, "step": 272 }, { "epoch": 6.33, "learning_rate": 1.3841463414634146e-05, "loss": 1.8137, "step": 273 }, { "epoch": 6.35, "learning_rate": 1.378048780487805e-05, "loss": 1.8471, "step": 274 }, { "epoch": 6.38, "learning_rate": 1.3719512195121952e-05, "loss": 1.8196, "step": 275 }, { "epoch": 6.4, "learning_rate": 1.3658536585365854e-05, "loss": 1.8122, "step": 276 }, { "epoch": 6.42, "learning_rate": 1.3597560975609756e-05, "loss": 1.827, "step": 277 }, { "epoch": 6.45, "learning_rate": 1.353658536585366e-05, "loss": 1.8483, "step": 278 }, { "epoch": 6.47, "learning_rate": 1.347560975609756e-05, "loss": 1.8586, "step": 279 }, { "epoch": 6.49, "learning_rate": 1.3414634146341464e-05, "loss": 1.8376, "step": 280 }, { "epoch": 6.52, "learning_rate": 1.3353658536585366e-05, "loss": 1.9202, "step": 281 }, { "epoch": 6.54, "learning_rate": 1.329268292682927e-05, "loss": 1.8513, "step": 282 }, { "epoch": 6.56, "learning_rate": 1.323170731707317e-05, "loss": 1.8526, "step": 283 }, { "epoch": 6.59, "learning_rate": 1.3170731707317074e-05, "loss": 1.8625, "step": 284 }, { "epoch": 6.61, "learning_rate": 1.3109756097560976e-05, "loss": 1.8891, "step": 285 }, { "epoch": 6.63, "learning_rate": 1.3048780487804878e-05, "loss": 1.8396, "step": 286 }, { "epoch": 6.66, "learning_rate": 1.298780487804878e-05, "loss": 1.8735, "step": 287 }, { "epoch": 6.68, "learning_rate": 1.2926829268292684e-05, "loss": 1.8588, "step": 288 }, { "epoch": 6.7, "learning_rate": 1.2865853658536585e-05, "loss": 1.8354, "step": 289 }, { "epoch": 6.72, "learning_rate": 1.2804878048780488e-05, "loss": 1.7961, "step": 290 }, { "epoch": 6.75, "learning_rate": 1.274390243902439e-05, "loss": 1.8739, "step": 291 }, { "epoch": 6.77, "learning_rate": 1.2682926829268294e-05, "loss": 1.755, "step": 292 }, { "epoch": 6.79, "learning_rate": 1.2621951219512195e-05, "loss": 1.819, "step": 293 }, { "epoch": 6.82, "learning_rate": 1.2560975609756098e-05, "loss": 1.873, "step": 294 }, { "epoch": 6.84, "learning_rate": 1.25e-05, "loss": 1.8856, "step": 295 }, { "epoch": 6.86, "learning_rate": 1.2439024390243903e-05, "loss": 1.7957, "step": 296 }, { "epoch": 6.89, "learning_rate": 1.2378048780487805e-05, "loss": 1.9105, "step": 297 }, { "epoch": 6.91, "learning_rate": 1.2317073170731709e-05, "loss": 1.9567, "step": 298 }, { "epoch": 6.93, "learning_rate": 1.2256097560975609e-05, "loss": 1.8302, "step": 299 }, { "epoch": 6.96, "learning_rate": 1.2195121951219513e-05, "loss": 1.8012, "step": 300 }, { "epoch": 6.98, "learning_rate": 1.2134146341463415e-05, "loss": 1.8738, "step": 301 }, { "epoch": 7.0, "learning_rate": 1.2073170731707319e-05, "loss": 1.8451, "step": 302 }, { "epoch": 7.03, "learning_rate": 1.201219512195122e-05, "loss": 1.8725, "step": 303 }, { "epoch": 7.05, "learning_rate": 1.1951219512195123e-05, "loss": 1.8076, "step": 304 }, { "epoch": 7.07, "learning_rate": 1.1890243902439025e-05, "loss": 1.8252, "step": 305 }, { "epoch": 7.1, "learning_rate": 1.1829268292682927e-05, "loss": 1.8556, "step": 306 }, { "epoch": 7.12, "learning_rate": 1.176829268292683e-05, "loss": 1.9182, "step": 307 }, { "epoch": 7.14, "learning_rate": 1.1707317073170733e-05, "loss": 1.8325, "step": 308 }, { "epoch": 7.17, "learning_rate": 1.1646341463414634e-05, "loss": 1.8132, "step": 309 }, { "epoch": 7.19, "learning_rate": 1.1585365853658537e-05, "loss": 1.8424, "step": 310 }, { "epoch": 7.21, "learning_rate": 1.152439024390244e-05, "loss": 1.8617, "step": 311 }, { "epoch": 7.23, "learning_rate": 1.1463414634146343e-05, "loss": 1.7966, "step": 312 }, { "epoch": 7.26, "learning_rate": 1.1402439024390244e-05, "loss": 1.8628, "step": 313 }, { "epoch": 7.28, "learning_rate": 1.1341463414634148e-05, "loss": 1.8711, "step": 314 }, { "epoch": 7.3, "learning_rate": 1.128048780487805e-05, "loss": 1.8142, "step": 315 }, { "epoch": 7.33, "learning_rate": 1.1219512195121952e-05, "loss": 1.8658, "step": 316 }, { "epoch": 7.35, "learning_rate": 1.1158536585365854e-05, "loss": 1.8233, "step": 317 }, { "epoch": 7.37, "learning_rate": 1.1097560975609756e-05, "loss": 1.8624, "step": 318 }, { "epoch": 7.4, "learning_rate": 1.1036585365853658e-05, "loss": 1.8298, "step": 319 }, { "epoch": 7.42, "learning_rate": 1.097560975609756e-05, "loss": 1.869, "step": 320 }, { "epoch": 7.44, "learning_rate": 1.0914634146341464e-05, "loss": 1.884, "step": 321 }, { "epoch": 7.47, "learning_rate": 1.0853658536585366e-05, "loss": 1.8007, "step": 322 }, { "epoch": 7.49, "learning_rate": 1.0792682926829268e-05, "loss": 1.8457, "step": 323 }, { "epoch": 7.51, "learning_rate": 1.073170731707317e-05, "loss": 1.8589, "step": 324 }, { "epoch": 7.54, "learning_rate": 1.0670731707317074e-05, "loss": 1.8211, "step": 325 }, { "epoch": 7.56, "learning_rate": 1.0609756097560975e-05, "loss": 1.8594, "step": 326 }, { "epoch": 7.58, "learning_rate": 1.0548780487804878e-05, "loss": 1.8588, "step": 327 }, { "epoch": 7.61, "learning_rate": 1.048780487804878e-05, "loss": 1.845, "step": 328 }, { "epoch": 7.63, "learning_rate": 1.0426829268292683e-05, "loss": 1.7974, "step": 329 }, { "epoch": 7.65, "learning_rate": 1.0365853658536585e-05, "loss": 1.8571, "step": 330 }, { "epoch": 7.68, "learning_rate": 1.0304878048780489e-05, "loss": 1.7917, "step": 331 }, { "epoch": 7.7, "learning_rate": 1.024390243902439e-05, "loss": 1.8581, "step": 332 }, { "epoch": 7.72, "learning_rate": 1.0182926829268293e-05, "loss": 1.8404, "step": 333 }, { "epoch": 7.74, "learning_rate": 1.0121951219512195e-05, "loss": 1.8323, "step": 334 }, { "epoch": 7.77, "learning_rate": 1.0060975609756099e-05, "loss": 1.8368, "step": 335 }, { "epoch": 7.79, "learning_rate": 9.999999999999999e-06, "loss": 1.8568, "step": 336 }, { "epoch": 7.81, "learning_rate": 9.939024390243903e-06, "loss": 1.8771, "step": 337 }, { "epoch": 7.84, "learning_rate": 9.878048780487805e-06, "loss": 1.8877, "step": 338 }, { "epoch": 7.86, "learning_rate": 9.817073170731707e-06, "loss": 1.8309, "step": 339 }, { "epoch": 7.88, "learning_rate": 9.75609756097561e-06, "loss": 1.8153, "step": 340 }, { "epoch": 7.91, "learning_rate": 9.695121951219513e-06, "loss": 1.9367, "step": 341 }, { "epoch": 7.93, "learning_rate": 9.634146341463415e-06, "loss": 1.7258, "step": 342 }, { "epoch": 7.95, "learning_rate": 9.573170731707317e-06, "loss": 1.825, "step": 343 }, { "epoch": 7.98, "learning_rate": 9.51219512195122e-06, "loss": 1.8158, "step": 344 }, { "epoch": 8.0, "learning_rate": 9.451219512195123e-06, "loss": 1.8606, "step": 345 }, { "epoch": 8.02, "learning_rate": 9.390243902439024e-06, "loss": 1.881, "step": 346 }, { "epoch": 8.05, "learning_rate": 9.329268292682927e-06, "loss": 1.8339, "step": 347 }, { "epoch": 8.07, "learning_rate": 9.26829268292683e-06, "loss": 1.8034, "step": 348 }, { "epoch": 8.09, "learning_rate": 9.207317073170732e-06, "loss": 1.8334, "step": 349 }, { "epoch": 8.12, "learning_rate": 9.146341463414634e-06, "loss": 1.8411, "step": 350 }, { "epoch": 8.14, "learning_rate": 9.085365853658538e-06, "loss": 1.8157, "step": 351 }, { "epoch": 8.16, "learning_rate": 9.02439024390244e-06, "loss": 1.7685, "step": 352 }, { "epoch": 8.19, "learning_rate": 8.963414634146342e-06, "loss": 1.8318, "step": 353 }, { "epoch": 8.21, "learning_rate": 8.902439024390244e-06, "loss": 1.8124, "step": 354 }, { "epoch": 8.23, "learning_rate": 8.841463414634148e-06, "loss": 1.8656, "step": 355 }, { "epoch": 8.26, "learning_rate": 8.780487804878048e-06, "loss": 1.8696, "step": 356 }, { "epoch": 8.28, "learning_rate": 8.719512195121952e-06, "loss": 1.8821, "step": 357 }, { "epoch": 8.3, "learning_rate": 8.658536585365854e-06, "loss": 1.8023, "step": 358 }, { "epoch": 8.32, "learning_rate": 8.597560975609756e-06, "loss": 1.8643, "step": 359 }, { "epoch": 8.35, "learning_rate": 8.536585365853658e-06, "loss": 1.9178, "step": 360 }, { "epoch": 8.37, "learning_rate": 8.475609756097562e-06, "loss": 1.8703, "step": 361 }, { "epoch": 8.39, "learning_rate": 8.414634146341464e-06, "loss": 1.8673, "step": 362 }, { "epoch": 8.42, "learning_rate": 8.353658536585366e-06, "loss": 1.8394, "step": 363 }, { "epoch": 8.44, "learning_rate": 8.292682926829268e-06, "loss": 1.7786, "step": 364 }, { "epoch": 8.46, "learning_rate": 8.231707317073172e-06, "loss": 1.8853, "step": 365 }, { "epoch": 8.49, "learning_rate": 8.170731707317073e-06, "loss": 1.7903, "step": 366 }, { "epoch": 8.51, "learning_rate": 8.109756097560977e-06, "loss": 1.8189, "step": 367 }, { "epoch": 8.53, "learning_rate": 8.048780487804879e-06, "loss": 1.8252, "step": 368 }, { "epoch": 8.56, "learning_rate": 7.98780487804878e-06, "loss": 1.8291, "step": 369 }, { "epoch": 8.58, "learning_rate": 7.926829268292683e-06, "loss": 1.8915, "step": 370 }, { "epoch": 8.6, "learning_rate": 7.865853658536587e-06, "loss": 1.8891, "step": 371 }, { "epoch": 8.63, "learning_rate": 7.804878048780489e-06, "loss": 1.8618, "step": 372 }, { "epoch": 8.65, "learning_rate": 7.743902439024391e-06, "loss": 1.8479, "step": 373 }, { "epoch": 8.67, "learning_rate": 7.682926829268293e-06, "loss": 1.8519, "step": 374 }, { "epoch": 8.7, "learning_rate": 7.621951219512196e-06, "loss": 1.8327, "step": 375 }, { "epoch": 8.72, "learning_rate": 7.560975609756097e-06, "loss": 1.8108, "step": 376 }, { "epoch": 8.74, "learning_rate": 7.5e-06, "loss": 1.7837, "step": 377 }, { "epoch": 8.77, "learning_rate": 7.439024390243902e-06, "loss": 1.85, "step": 378 }, { "epoch": 8.79, "learning_rate": 7.378048780487805e-06, "loss": 1.8255, "step": 379 }, { "epoch": 8.81, "learning_rate": 7.317073170731707e-06, "loss": 1.8321, "step": 380 }, { "epoch": 8.83, "learning_rate": 7.25609756097561e-06, "loss": 1.8171, "step": 381 }, { "epoch": 8.86, "learning_rate": 7.1951219512195125e-06, "loss": 1.8386, "step": 382 }, { "epoch": 8.88, "learning_rate": 7.1341463414634146e-06, "loss": 1.8255, "step": 383 }, { "epoch": 8.9, "learning_rate": 7.0731707317073175e-06, "loss": 1.8649, "step": 384 }, { "epoch": 8.93, "learning_rate": 7.01219512195122e-06, "loss": 1.83, "step": 385 }, { "epoch": 8.95, "learning_rate": 6.951219512195123e-06, "loss": 1.7527, "step": 386 }, { "epoch": 8.97, "learning_rate": 6.890243902439025e-06, "loss": 1.9175, "step": 387 }, { "epoch": 9.0, "learning_rate": 6.829268292682927e-06, "loss": 1.8951, "step": 388 }, { "epoch": 9.02, "learning_rate": 6.76829268292683e-06, "loss": 1.8545, "step": 389 }, { "epoch": 9.04, "learning_rate": 6.707317073170732e-06, "loss": 1.9206, "step": 390 }, { "epoch": 9.07, "learning_rate": 6.646341463414635e-06, "loss": 1.8398, "step": 391 }, { "epoch": 9.09, "learning_rate": 6.585365853658537e-06, "loss": 1.7943, "step": 392 }, { "epoch": 9.11, "learning_rate": 6.524390243902439e-06, "loss": 1.8321, "step": 393 }, { "epoch": 9.14, "learning_rate": 6.463414634146342e-06, "loss": 1.7706, "step": 394 }, { "epoch": 9.16, "learning_rate": 6.402439024390244e-06, "loss": 1.9105, "step": 395 }, { "epoch": 9.18, "learning_rate": 6.341463414634147e-06, "loss": 1.8658, "step": 396 }, { "epoch": 9.21, "learning_rate": 6.280487804878049e-06, "loss": 1.8315, "step": 397 }, { "epoch": 9.23, "learning_rate": 6.219512195121951e-06, "loss": 1.909, "step": 398 }, { "epoch": 9.25, "learning_rate": 6.158536585365854e-06, "loss": 1.8242, "step": 399 }, { "epoch": 9.28, "learning_rate": 6.0975609756097564e-06, "loss": 1.8862, "step": 400 }, { "epoch": 9.3, "learning_rate": 6.036585365853659e-06, "loss": 1.8389, "step": 401 }, { "epoch": 9.32, "learning_rate": 5.9756097560975615e-06, "loss": 1.8099, "step": 402 }, { "epoch": 9.34, "learning_rate": 5.914634146341464e-06, "loss": 1.8162, "step": 403 }, { "epoch": 9.37, "learning_rate": 5.853658536585367e-06, "loss": 1.8323, "step": 404 }, { "epoch": 9.39, "learning_rate": 5.792682926829269e-06, "loss": 1.8445, "step": 405 }, { "epoch": 9.41, "learning_rate": 5.731707317073172e-06, "loss": 1.7648, "step": 406 }, { "epoch": 9.44, "learning_rate": 5.670731707317074e-06, "loss": 1.862, "step": 407 }, { "epoch": 9.46, "learning_rate": 5.609756097560976e-06, "loss": 1.818, "step": 408 }, { "epoch": 9.48, "learning_rate": 5.548780487804878e-06, "loss": 1.8372, "step": 409 }, { "epoch": 9.51, "learning_rate": 5.48780487804878e-06, "loss": 1.8508, "step": 410 }, { "epoch": 9.53, "learning_rate": 5.426829268292683e-06, "loss": 1.8789, "step": 411 }, { "epoch": 9.55, "learning_rate": 5.365853658536585e-06, "loss": 1.8296, "step": 412 }, { "epoch": 9.58, "learning_rate": 5.304878048780487e-06, "loss": 1.7921, "step": 413 }, { "epoch": 9.6, "learning_rate": 5.24390243902439e-06, "loss": 1.8492, "step": 414 }, { "epoch": 9.62, "learning_rate": 5.182926829268292e-06, "loss": 1.853, "step": 415 }, { "epoch": 9.65, "learning_rate": 5.121951219512195e-06, "loss": 1.883, "step": 416 }, { "epoch": 9.67, "learning_rate": 5.0609756097560974e-06, "loss": 1.7697, "step": 417 }, { "epoch": 9.69, "learning_rate": 4.9999999999999996e-06, "loss": 1.7551, "step": 418 }, { "epoch": 9.72, "learning_rate": 4.9390243902439025e-06, "loss": 1.8302, "step": 419 }, { "epoch": 9.74, "learning_rate": 4.878048780487805e-06, "loss": 1.8384, "step": 420 }, { "epoch": 9.76, "learning_rate": 4.817073170731708e-06, "loss": 1.7868, "step": 421 }, { "epoch": 9.79, "learning_rate": 4.75609756097561e-06, "loss": 1.912, "step": 422 }, { "epoch": 9.81, "learning_rate": 4.695121951219512e-06, "loss": 1.7863, "step": 423 }, { "epoch": 9.83, "learning_rate": 4.634146341463415e-06, "loss": 1.863, "step": 424 }, { "epoch": 9.86, "learning_rate": 4.573170731707317e-06, "loss": 1.8152, "step": 425 }, { "epoch": 9.88, "learning_rate": 4.51219512195122e-06, "loss": 1.8181, "step": 426 }, { "epoch": 9.9, "learning_rate": 4.451219512195122e-06, "loss": 1.8361, "step": 427 }, { "epoch": 9.92, "learning_rate": 4.390243902439024e-06, "loss": 1.9204, "step": 428 }, { "epoch": 9.95, "learning_rate": 4.329268292682927e-06, "loss": 1.8739, "step": 429 }, { "epoch": 9.97, "learning_rate": 4.268292682926829e-06, "loss": 1.8166, "step": 430 }, { "epoch": 9.99, "learning_rate": 4.207317073170732e-06, "loss": 1.8519, "step": 431 }, { "epoch": 10.02, "learning_rate": 4.146341463414634e-06, "loss": 1.7722, "step": 432 }, { "epoch": 10.04, "learning_rate": 4.085365853658536e-06, "loss": 1.8071, "step": 433 }, { "epoch": 10.06, "learning_rate": 4.024390243902439e-06, "loss": 1.8382, "step": 434 }, { "epoch": 10.09, "learning_rate": 3.9634146341463414e-06, "loss": 1.8579, "step": 435 }, { "epoch": 10.11, "learning_rate": 3.902439024390244e-06, "loss": 1.8477, "step": 436 }, { "epoch": 10.13, "learning_rate": 3.8414634146341465e-06, "loss": 1.8037, "step": 437 }, { "epoch": 10.16, "learning_rate": 3.7804878048780486e-06, "loss": 1.8286, "step": 438 }, { "epoch": 10.18, "learning_rate": 3.719512195121951e-06, "loss": 1.8407, "step": 439 }, { "epoch": 10.2, "learning_rate": 3.6585365853658537e-06, "loss": 1.8503, "step": 440 }, { "epoch": 10.23, "learning_rate": 3.5975609756097562e-06, "loss": 1.7788, "step": 441 }, { "epoch": 10.25, "learning_rate": 3.5365853658536588e-06, "loss": 1.8687, "step": 442 }, { "epoch": 10.27, "learning_rate": 3.4756097560975613e-06, "loss": 1.7877, "step": 443 }, { "epoch": 10.3, "learning_rate": 3.4146341463414634e-06, "loss": 1.8842, "step": 444 }, { "epoch": 10.32, "learning_rate": 3.353658536585366e-06, "loss": 1.7731, "step": 445 }, { "epoch": 10.34, "learning_rate": 3.2926829268292685e-06, "loss": 1.9421, "step": 446 }, { "epoch": 10.37, "learning_rate": 3.231707317073171e-06, "loss": 1.893, "step": 447 }, { "epoch": 10.39, "learning_rate": 3.1707317073170736e-06, "loss": 1.8234, "step": 448 }, { "epoch": 10.41, "learning_rate": 3.1097560975609757e-06, "loss": 1.7849, "step": 449 }, { "epoch": 10.43, "learning_rate": 3.0487804878048782e-06, "loss": 1.8656, "step": 450 }, { "epoch": 10.46, "learning_rate": 2.9878048780487808e-06, "loss": 1.9295, "step": 451 }, { "epoch": 10.48, "learning_rate": 2.9268292682926833e-06, "loss": 1.8294, "step": 452 }, { "epoch": 10.5, "learning_rate": 2.865853658536586e-06, "loss": 1.8313, "step": 453 }, { "epoch": 10.53, "learning_rate": 2.804878048780488e-06, "loss": 1.8892, "step": 454 }, { "epoch": 10.55, "learning_rate": 2.74390243902439e-06, "loss": 1.8483, "step": 455 }, { "epoch": 10.57, "learning_rate": 2.6829268292682926e-06, "loss": 1.7787, "step": 456 }, { "epoch": 10.6, "learning_rate": 2.621951219512195e-06, "loss": 1.8363, "step": 457 }, { "epoch": 10.62, "learning_rate": 2.5609756097560977e-06, "loss": 1.8556, "step": 458 }, { "epoch": 10.64, "learning_rate": 2.4999999999999998e-06, "loss": 1.8664, "step": 459 }, { "epoch": 10.67, "learning_rate": 2.4390243902439023e-06, "loss": 1.8107, "step": 460 }, { "epoch": 10.69, "learning_rate": 2.378048780487805e-06, "loss": 1.9115, "step": 461 }, { "epoch": 10.71, "learning_rate": 2.3170731707317074e-06, "loss": 1.8131, "step": 462 }, { "epoch": 10.74, "learning_rate": 2.25609756097561e-06, "loss": 1.8422, "step": 463 }, { "epoch": 10.76, "learning_rate": 2.195121951219512e-06, "loss": 1.7932, "step": 464 }, { "epoch": 10.78, "learning_rate": 2.1341463414634146e-06, "loss": 1.8752, "step": 465 }, { "epoch": 10.81, "learning_rate": 2.073170731707317e-06, "loss": 1.7811, "step": 466 }, { "epoch": 10.83, "learning_rate": 2.0121951219512197e-06, "loss": 1.7859, "step": 467 }, { "epoch": 10.85, "learning_rate": 1.951219512195122e-06, "loss": 1.8503, "step": 468 }, { "epoch": 10.88, "learning_rate": 1.8902439024390243e-06, "loss": 1.8373, "step": 469 }, { "epoch": 10.9, "learning_rate": 1.8292682926829268e-06, "loss": 1.9218, "step": 470 }, { "epoch": 10.92, "learning_rate": 1.7682926829268294e-06, "loss": 1.7876, "step": 471 }, { "epoch": 10.94, "learning_rate": 1.7073170731707317e-06, "loss": 1.859, "step": 472 }, { "epoch": 10.97, "learning_rate": 1.6463414634146342e-06, "loss": 1.8083, "step": 473 }, { "epoch": 10.99, "learning_rate": 1.5853658536585368e-06, "loss": 1.859, "step": 474 }, { "epoch": 11.01, "learning_rate": 1.5243902439024391e-06, "loss": 1.7685, "step": 475 }, { "epoch": 11.04, "learning_rate": 1.4634146341463416e-06, "loss": 1.7692, "step": 476 }, { "epoch": 11.06, "learning_rate": 1.402439024390244e-06, "loss": 1.8202, "step": 477 }, { "epoch": 11.08, "learning_rate": 1.3414634146341463e-06, "loss": 1.8052, "step": 478 }, { "epoch": 11.11, "learning_rate": 1.2804878048780488e-06, "loss": 1.8734, "step": 479 }, { "epoch": 11.13, "learning_rate": 1.2195121951219512e-06, "loss": 1.7973, "step": 480 }, { "epoch": 11.15, "learning_rate": 1.1585365853658537e-06, "loss": 1.8462, "step": 481 }, { "epoch": 11.18, "learning_rate": 1.097560975609756e-06, "loss": 1.8119, "step": 482 }, { "epoch": 11.2, "learning_rate": 1.0365853658536586e-06, "loss": 1.9105, "step": 483 }, { "epoch": 11.22, "learning_rate": 9.75609756097561e-07, "loss": 1.8709, "step": 484 }, { "epoch": 11.25, "learning_rate": 9.146341463414634e-07, "loss": 1.871, "step": 485 }, { "epoch": 11.27, "learning_rate": 8.536585365853659e-07, "loss": 1.7683, "step": 486 }, { "epoch": 11.29, "learning_rate": 7.926829268292684e-07, "loss": 1.8853, "step": 487 }, { "epoch": 11.32, "learning_rate": 7.317073170731708e-07, "loss": 1.7629, "step": 488 }, { "epoch": 11.34, "learning_rate": 6.707317073170731e-07, "loss": 1.7865, "step": 489 }, { "epoch": 11.36, "learning_rate": 6.097560975609756e-07, "loss": 1.9186, "step": 490 }, { "epoch": 11.39, "learning_rate": 5.48780487804878e-07, "loss": 1.8269, "step": 491 }, { "epoch": 11.41, "learning_rate": 4.878048780487805e-07, "loss": 1.8538, "step": 492 }, { "epoch": 11.43, "learning_rate": 4.2682926829268293e-07, "loss": 1.8228, "step": 493 }, { "epoch": 11.46, "learning_rate": 3.658536585365854e-07, "loss": 1.8196, "step": 494 }, { "epoch": 11.48, "learning_rate": 3.048780487804878e-07, "loss": 1.8104, "step": 495 }, { "epoch": 11.5, "learning_rate": 2.439024390243903e-07, "loss": 1.8221, "step": 496 }, { "epoch": 11.52, "learning_rate": 1.829268292682927e-07, "loss": 1.8234, "step": 497 }, { "epoch": 11.55, "learning_rate": 1.2195121951219514e-07, "loss": 1.8229, "step": 498 }, { "epoch": 11.57, "learning_rate": 6.097560975609757e-08, "loss": 1.9636, "step": 499 }, { "epoch": 11.59, "learning_rate": 0.0, "loss": 1.8427, "step": 500 } ], "logging_steps": 1, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 50, "total_flos": 1.3295810584164434e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }